diff --git a/12_cons/cons.nim b/12_cons/cons.nim new file mode 100644 index 0000000..ef59693 --- /dev/null +++ b/12_cons/cons.nim @@ -0,0 +1,90 @@ +import os +import std/sequtils +import std/streams +import nre +import std/enumutils +import std/strutils + +type + DnaString = ref DnaStringObj + DnaStringObj = object + name: string + str: string + Base = enum + A, C, G, T + ProfileMatrix = array[Base, seq[int]] + + +template useStream(stream: Stream, body: untyped) = + if not isNil(stream): + try: + body + finally: + stream.close() + +let firstLinePattern = re">(\w+)(?:\s+.+)?" +proc parseNextDnaString(stream: Stream): DnaString = + if atEnd(stream): + raise newException(ValueError, "Stream is at end") + + let firstLine = readLine(stream) + let match = firstLine.match(firstLinePattern) + if match.isNone: + raise newException(ValueError, "Invalid line: " & firstLine) + + let name = match.get.captures[0] + var str = "" + + while not atEnd(stream) and peekChar(stream) != '>': + str &= readLine(stream) + + DnaString(name: name, str: str) + +proc calcProfileMatrix(stream: Stream): ProfileMatrix = + var m: ProfileMatrix + var firstString: bool = true + + useStream(stream): + var dnaString = parseNextDnaString(stream) + + m[A] = newSeq[int](dnaString.str.len) + m[C] = newSeq[int](dnaString.str.len) + m[G] = newSeq[int](dnaString.str.len) + m[T] = newSeq[int](dnaString.str.len) + + while true: + for i in 0 ..< dnaString.str.len: + let base = parseEnum[Base]($dnaString.str[i]) + inc m[base][i] + if atEnd(stream): + break + dnaString = parseNextDnaString(stream) + + return m + + +proc toString(m: ProfileMatrix): string = + echo "A: " & m[A].mapIt($it).join(" ") + echo "C: " & m[C].mapIt($it).join(" ") + echo "G: " & m[G].mapIt($it).join(" ") + echo "T: " & m[T].mapIt($it).join(" ") + +proc getOneConsensusString(m: ProfileMatrix): string = + var s = "" + for i in 0 ..< m[A].len: + var maxCount = 0 + var maxBase = A + for j in items(Base): + if m[j][i] > maxCount: + maxCount = m[j][i] + maxBase = j + s.add($maxBase) + return s + +let fileName = paramStr(1) +let fileStream = newFileStream(fileName) +let profileMatrix = calcProfileMatrix(fileStream) +let consensusString = getOneConsensusString(profileMatrix) + +echo consensusString +echo profileMatrix.toString \ No newline at end of file