Computing GC content
This commit is contained in:
parent
517b6e0070
commit
b340f05ef4
2 changed files with 64 additions and 0 deletions
55
06_gc/gc.nim
Normal file
55
06_gc/gc.nim
Normal file
|
@ -0,0 +1,55 @@
|
|||
import os
|
||||
import std/sequtils
|
||||
import std/streams
|
||||
import nre
|
||||
import std/strformat
|
||||
|
||||
type
|
||||
DnaString = ref DnaStringObj
|
||||
DnaStringObj = object
|
||||
name: string
|
||||
str: string
|
||||
|
||||
let firstLinePattern = re">(\w+)(?:\s+.+)?"
|
||||
proc parseNextDnaString(stream: Stream): DnaString =
|
||||
if atEnd(stream):
|
||||
raise newException(ValueError, "Stream is at end")
|
||||
|
||||
let firstLine = readLine(stream)
|
||||
let match = firstLine.match(firstLinePattern)
|
||||
if match.isNone:
|
||||
raise newException(ValueError, "Invalid line: " & firstLine)
|
||||
|
||||
let name = match.get.captures[0]
|
||||
var str = ""
|
||||
|
||||
while not atEnd(stream) and peekChar(stream) != '>':
|
||||
str &= readLine(stream)
|
||||
|
||||
DnaString(name: name, str: str)
|
||||
|
||||
proc calcGcContent(dnaString: DnaString): float =
|
||||
let gcCount = dnaString.str.countIt(it == 'G' or it == 'C')
|
||||
return (gcCount / dnaString.str.len) * 100
|
||||
|
||||
proc calcMaxGcContent(stream: Stream): (DnaString, float) =
|
||||
var maxGcString: DnaString = nil
|
||||
var maxGcContent = -1.0
|
||||
|
||||
while not atEnd(stream):
|
||||
let str = parseNextDnaString(stream)
|
||||
let gcContent = calcGcContent(str)
|
||||
if gcContent > maxGcContent:
|
||||
maxGcContent = gcContent
|
||||
maxGcString = str
|
||||
|
||||
return (maxGcString, maxGcContent)
|
||||
|
||||
let fileName = paramStr(1)
|
||||
let fileStream = newFileStream(fileName)
|
||||
if not isNil(fileStream):
|
||||
let maxGcContent = calcMaxGcContent(fileStream)
|
||||
echo $maxGcContent[0].name
|
||||
echo $maxGcContent[1]
|
||||
|
||||
fileStream.close()
|
9
06_gc/sample.txt
Normal file
9
06_gc/sample.txt
Normal file
|
@ -0,0 +1,9 @@
|
|||
>Rosalind_6404
|
||||
CCTGCGGAAGATCGGCACTAGAATAGCCAGAACCGTTTCTCTGAGGCTTCCGGCCTTCCC
|
||||
TCCCACTAATAATTCTGAGG
|
||||
>Rosalind_5959
|
||||
CCATCGGTAGCGCATCCTTAGTCCAATTAAGTCCCTATCCAGGCGCTCCGCCGAAGGTCT
|
||||
ATATCCATTTGTCAGCAGACACGC
|
||||
>Rosalind_0808
|
||||
CCACCCTCGTGGTATGGCTAGGCATTCAGGAACCGGAGAACGCTTCAGACCAGCCCGGAC
|
||||
TGGGAACCTGCGGGCAGTAGGTGGAAT
|
Loading…
Add table
Add a link
Reference in a new issue