Computing GC content
This commit is contained in:
parent
517b6e0070
commit
b340f05ef4
2 changed files with 64 additions and 0 deletions
55
06_gc/gc.nim
Normal file
55
06_gc/gc.nim
Normal file
|
@ -0,0 +1,55 @@
|
||||||
|
import os
|
||||||
|
import std/sequtils
|
||||||
|
import std/streams
|
||||||
|
import nre
|
||||||
|
import std/strformat
|
||||||
|
|
||||||
|
type
|
||||||
|
DnaString = ref DnaStringObj
|
||||||
|
DnaStringObj = object
|
||||||
|
name: string
|
||||||
|
str: string
|
||||||
|
|
||||||
|
let firstLinePattern = re">(\w+)(?:\s+.+)?"
|
||||||
|
proc parseNextDnaString(stream: Stream): DnaString =
|
||||||
|
if atEnd(stream):
|
||||||
|
raise newException(ValueError, "Stream is at end")
|
||||||
|
|
||||||
|
let firstLine = readLine(stream)
|
||||||
|
let match = firstLine.match(firstLinePattern)
|
||||||
|
if match.isNone:
|
||||||
|
raise newException(ValueError, "Invalid line: " & firstLine)
|
||||||
|
|
||||||
|
let name = match.get.captures[0]
|
||||||
|
var str = ""
|
||||||
|
|
||||||
|
while not atEnd(stream) and peekChar(stream) != '>':
|
||||||
|
str &= readLine(stream)
|
||||||
|
|
||||||
|
DnaString(name: name, str: str)
|
||||||
|
|
||||||
|
proc calcGcContent(dnaString: DnaString): float =
|
||||||
|
let gcCount = dnaString.str.countIt(it == 'G' or it == 'C')
|
||||||
|
return (gcCount / dnaString.str.len) * 100
|
||||||
|
|
||||||
|
proc calcMaxGcContent(stream: Stream): (DnaString, float) =
|
||||||
|
var maxGcString: DnaString = nil
|
||||||
|
var maxGcContent = -1.0
|
||||||
|
|
||||||
|
while not atEnd(stream):
|
||||||
|
let str = parseNextDnaString(stream)
|
||||||
|
let gcContent = calcGcContent(str)
|
||||||
|
if gcContent > maxGcContent:
|
||||||
|
maxGcContent = gcContent
|
||||||
|
maxGcString = str
|
||||||
|
|
||||||
|
return (maxGcString, maxGcContent)
|
||||||
|
|
||||||
|
let fileName = paramStr(1)
|
||||||
|
let fileStream = newFileStream(fileName)
|
||||||
|
if not isNil(fileStream):
|
||||||
|
let maxGcContent = calcMaxGcContent(fileStream)
|
||||||
|
echo $maxGcContent[0].name
|
||||||
|
echo $maxGcContent[1]
|
||||||
|
|
||||||
|
fileStream.close()
|
9
06_gc/sample.txt
Normal file
9
06_gc/sample.txt
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
>Rosalind_6404
|
||||||
|
CCTGCGGAAGATCGGCACTAGAATAGCCAGAACCGTTTCTCTGAGGCTTCCGGCCTTCCC
|
||||||
|
TCCCACTAATAATTCTGAGG
|
||||||
|
>Rosalind_5959
|
||||||
|
CCATCGGTAGCGCATCCTTAGTCCAATTAAGTCCCTATCCAGGCGCTCCGCCGAAGGTCT
|
||||||
|
ATATCCATTTGTCAGCAGACACGC
|
||||||
|
>Rosalind_0808
|
||||||
|
CCACCCTCGTGGTATGGCTAGGCATTCAGGAACCGGAGAACGCTTCAGACCAGCCCGGAC
|
||||||
|
TGGGAACCTGCGGGCAGTAGGTGGAAT
|
Loading…
Add table
Add a link
Reference in a new issue