Consensus and Profile
This commit is contained in:
parent
e94ff2985c
commit
11aa3919f6
1 changed files with 90 additions and 0 deletions
90
12_cons/cons.nim
Normal file
90
12_cons/cons.nim
Normal file
|
@ -0,0 +1,90 @@
|
|||
import os
|
||||
import std/sequtils
|
||||
import std/streams
|
||||
import nre
|
||||
import std/enumutils
|
||||
import std/strutils
|
||||
|
||||
type
|
||||
DnaString = ref DnaStringObj
|
||||
DnaStringObj = object
|
||||
name: string
|
||||
str: string
|
||||
Base = enum
|
||||
A, C, G, T
|
||||
ProfileMatrix = array[Base, seq[int]]
|
||||
|
||||
|
||||
template useStream(stream: Stream, body: untyped) =
|
||||
if not isNil(stream):
|
||||
try:
|
||||
body
|
||||
finally:
|
||||
stream.close()
|
||||
|
||||
let firstLinePattern = re">(\w+)(?:\s+.+)?"
|
||||
proc parseNextDnaString(stream: Stream): DnaString =
|
||||
if atEnd(stream):
|
||||
raise newException(ValueError, "Stream is at end")
|
||||
|
||||
let firstLine = readLine(stream)
|
||||
let match = firstLine.match(firstLinePattern)
|
||||
if match.isNone:
|
||||
raise newException(ValueError, "Invalid line: " & firstLine)
|
||||
|
||||
let name = match.get.captures[0]
|
||||
var str = ""
|
||||
|
||||
while not atEnd(stream) and peekChar(stream) != '>':
|
||||
str &= readLine(stream)
|
||||
|
||||
DnaString(name: name, str: str)
|
||||
|
||||
proc calcProfileMatrix(stream: Stream): ProfileMatrix =
|
||||
var m: ProfileMatrix
|
||||
var firstString: bool = true
|
||||
|
||||
useStream(stream):
|
||||
var dnaString = parseNextDnaString(stream)
|
||||
|
||||
m[A] = newSeq[int](dnaString.str.len)
|
||||
m[C] = newSeq[int](dnaString.str.len)
|
||||
m[G] = newSeq[int](dnaString.str.len)
|
||||
m[T] = newSeq[int](dnaString.str.len)
|
||||
|
||||
while true:
|
||||
for i in 0 ..< dnaString.str.len:
|
||||
let base = parseEnum[Base]($dnaString.str[i])
|
||||
inc m[base][i]
|
||||
if atEnd(stream):
|
||||
break
|
||||
dnaString = parseNextDnaString(stream)
|
||||
|
||||
return m
|
||||
|
||||
|
||||
proc toString(m: ProfileMatrix): string =
|
||||
echo "A: " & m[A].mapIt($it).join(" ")
|
||||
echo "C: " & m[C].mapIt($it).join(" ")
|
||||
echo "G: " & m[G].mapIt($it).join(" ")
|
||||
echo "T: " & m[T].mapIt($it).join(" ")
|
||||
|
||||
proc getOneConsensusString(m: ProfileMatrix): string =
|
||||
var s = ""
|
||||
for i in 0 ..< m[A].len:
|
||||
var maxCount = 0
|
||||
var maxBase = A
|
||||
for j in items(Base):
|
||||
if m[j][i] > maxCount:
|
||||
maxCount = m[j][i]
|
||||
maxBase = j
|
||||
s.add($maxBase)
|
||||
return s
|
||||
|
||||
let fileName = paramStr(1)
|
||||
let fileStream = newFileStream(fileName)
|
||||
let profileMatrix = calcProfileMatrix(fileStream)
|
||||
let consensusString = getOneConsensusString(profileMatrix)
|
||||
|
||||
echo consensusString
|
||||
echo profileMatrix.toString
|
Loading…
Add table
Add a link
Reference in a new issue