Consensus and Profile

This commit is contained in:
Alberto Venturini 2024-09-07 06:06:36 +02:00
parent e94ff2985c
commit 11aa3919f6

90
12_cons/cons.nim Normal file
View file

@ -0,0 +1,90 @@
import os
import std/sequtils
import std/streams
import nre
import std/enumutils
import std/strutils
type
DnaString = ref DnaStringObj
DnaStringObj = object
name: string
str: string
Base = enum
A, C, G, T
ProfileMatrix = array[Base, seq[int]]
template useStream(stream: Stream, body: untyped) =
if not isNil(stream):
try:
body
finally:
stream.close()
let firstLinePattern = re">(\w+)(?:\s+.+)?"
proc parseNextDnaString(stream: Stream): DnaString =
if atEnd(stream):
raise newException(ValueError, "Stream is at end")
let firstLine = readLine(stream)
let match = firstLine.match(firstLinePattern)
if match.isNone:
raise newException(ValueError, "Invalid line: " & firstLine)
let name = match.get.captures[0]
var str = ""
while not atEnd(stream) and peekChar(stream) != '>':
str &= readLine(stream)
DnaString(name: name, str: str)
proc calcProfileMatrix(stream: Stream): ProfileMatrix =
var m: ProfileMatrix
var firstString: bool = true
useStream(stream):
var dnaString = parseNextDnaString(stream)
m[A] = newSeq[int](dnaString.str.len)
m[C] = newSeq[int](dnaString.str.len)
m[G] = newSeq[int](dnaString.str.len)
m[T] = newSeq[int](dnaString.str.len)
while true:
for i in 0 ..< dnaString.str.len:
let base = parseEnum[Base]($dnaString.str[i])
inc m[base][i]
if atEnd(stream):
break
dnaString = parseNextDnaString(stream)
return m
proc toString(m: ProfileMatrix): string =
echo "A: " & m[A].mapIt($it).join(" ")
echo "C: " & m[C].mapIt($it).join(" ")
echo "G: " & m[G].mapIt($it).join(" ")
echo "T: " & m[T].mapIt($it).join(" ")
proc getOneConsensusString(m: ProfileMatrix): string =
var s = ""
for i in 0 ..< m[A].len:
var maxCount = 0
var maxBase = A
for j in items(Base):
if m[j][i] > maxCount:
maxCount = m[j][i]
maxBase = j
s.add($maxBase)
return s
let fileName = paramStr(1)
let fileStream = newFileStream(fileName)
let profileMatrix = calcProfileMatrix(fileStream)
let consensusString = getOneConsensusString(profileMatrix)
echo consensusString
echo profileMatrix.toString