Computing GC content

This commit is contained in:
Alberto Venturini 2024-09-03 14:34:46 +02:00
parent 517b6e0070
commit b340f05ef4
2 changed files with 64 additions and 0 deletions

55
06_gc/gc.nim Normal file
View file

@ -0,0 +1,55 @@
import os
import std/sequtils
import std/streams
import nre
import std/strformat
type
DnaString = ref DnaStringObj
DnaStringObj = object
name: string
str: string
let firstLinePattern = re">(\w+)(?:\s+.+)?"
proc parseNextDnaString(stream: Stream): DnaString =
if atEnd(stream):
raise newException(ValueError, "Stream is at end")
let firstLine = readLine(stream)
let match = firstLine.match(firstLinePattern)
if match.isNone:
raise newException(ValueError, "Invalid line: " & firstLine)
let name = match.get.captures[0]
var str = ""
while not atEnd(stream) and peekChar(stream) != '>':
str &= readLine(stream)
DnaString(name: name, str: str)
proc calcGcContent(dnaString: DnaString): float =
let gcCount = dnaString.str.countIt(it == 'G' or it == 'C')
return (gcCount / dnaString.str.len) * 100
proc calcMaxGcContent(stream: Stream): (DnaString, float) =
var maxGcString: DnaString = nil
var maxGcContent = -1.0
while not atEnd(stream):
let str = parseNextDnaString(stream)
let gcContent = calcGcContent(str)
if gcContent > maxGcContent:
maxGcContent = gcContent
maxGcString = str
return (maxGcString, maxGcContent)
let fileName = paramStr(1)
let fileStream = newFileStream(fileName)
if not isNil(fileStream):
let maxGcContent = calcMaxGcContent(fileStream)
echo $maxGcContent[0].name
echo $maxGcContent[1]
fileStream.close()

9
06_gc/sample.txt Normal file
View file

@ -0,0 +1,9 @@
>Rosalind_6404
CCTGCGGAAGATCGGCACTAGAATAGCCAGAACCGTTTCTCTGAGGCTTCCGGCCTTCCC
TCCCACTAATAATTCTGAGG
>Rosalind_5959
CCATCGGTAGCGCATCCTTAGTCCAATTAAGTCCCTATCCAGGCGCTCCGCCGAAGGTCT
ATATCCATTTGTCAGCAGACACGC
>Rosalind_0808
CCACCCTCGTGGTATGGCTAGGCATTCAGGAACCGGAGAACGCTTCAGACCAGCCCGGAC
TGGGAACCTGCGGGCAGTAGGTGGAAT