diff --git a/06_gc/gc.nim b/06_gc/gc.nim index cca7270..f0897e1 100644 --- a/06_gc/gc.nim +++ b/06_gc/gc.nim @@ -2,7 +2,6 @@ import os import std/sequtils import std/streams import nre -import std/strformat type DnaString = ref DnaStringObj diff --git a/07_subs/sample.txt b/07_subs/sample.txt new file mode 100644 index 0000000..a38c50d --- /dev/null +++ b/07_subs/sample.txt @@ -0,0 +1,2 @@ +GATATATGCATATACTT +ATAT diff --git a/07_subs/subs.nim b/07_subs/subs.nim new file mode 100644 index 0000000..70168dc --- /dev/null +++ b/07_subs/subs.nim @@ -0,0 +1,29 @@ +import os +import std/streams +import std/sequtils +from std/strutils import join + +proc findNeedleInHaystack(s, t: string): seq[int] = + var occurrences = newSeq[int]() + + for i in 0 ..< s.len: + var j = 0 + while j < t.len and (i+j) < s.len and s[i+j] == t[j]: + inc j + if j == t.len: + occurrences.add(i+1) + + return occurrences + +proc parseStream(stream: Stream): (string, string) = + let s = readLine(stream) + let t = readLine(stream) + return (s, t) + +let fileName = paramStr(1) +let fileStream = newFileStream(fileName) +if not isNil(fileStream): + let (s, t) = parseStream(fileStream) + let occurrences = findNeedleInHaystack(s, t) + echo occurrences.mapIt($it).join(" ") + fileStream.close() \ No newline at end of file diff --git a/08_grph/README.md b/08_grph/README.md new file mode 100644 index 0000000..9420574 --- /dev/null +++ b/08_grph/README.md @@ -0,0 +1,3 @@ +# Overlap Graphs + +https://rosalind.info/problems/grph/ diff --git a/08_grph/grph.nim b/08_grph/grph.nim new file mode 100644 index 0000000..9fbfe71 --- /dev/null +++ b/08_grph/grph.nim @@ -0,0 +1,84 @@ +import os +import std/streams +# import re +import nre +import std/tables + +const k = 3 + +# let fileName = paramStr(1) +# var dna = readFile(fileName) + +type + Node* = tuple[name: string, dna: string] + AdjacencyList = seq[tuple[v: Node, w: Node]] + AffixTable* = Table[string, seq[Node]] + +var adjList: ref AdjacencyList = seq[tuple[v: Node, w: Node]].new() + +var nodesByPrefix: ref AffixTable = newTable[string, seq[Node]]() +var nodesBySuffix: ref AffixTable = newTable[string, seq[Node]]() + +let firstLinePattern = re">(\w+)(?:\s+.+)?" + +proc parseNextNode*(stream: Stream): Node = + if atEnd(stream): + raise newException(ValueError, "Stream is at end") + + let firstLine = readLine(stream) + let match = firstLine.match(firstLinePattern) + if match.isNone: + raise newException(ValueError, "Invalid line: " & firstLine) + + let name = match.get.captures[0] + var dna = "" + + while not atEnd(stream) and peekChar(stream) != '>': + dna &= readLine(stream) + + (name, dna) + +proc addNodeToTables*(node: Node, prefixes, suffixes: ref AffixTable) = + let prefix = node.dna[0..k-1] + let suffix = node.dna[^k..^1] + + if not prefixes.hasKey(prefix): + prefixes[prefix] = newSeq[Node]() + + prefixes[prefix].add(node) + + if not suffixes.hasKey(suffix): + suffixes[suffix] = newSeq[Node]() + + suffixes[suffix].add(node) + +proc updateAdjacencyList*(node: Node, prefixes, suffixes: ref AffixTable, adjList: ref AdjacencyList) = + let prefix = node.dna[0..k-1] + let suffix = node.dna[^k..^1] + + if prefixes.hasKey(suffix): + for n in items(prefixes[suffix]): + if node.name != n.name: + adjList[].add((node, n)) + + if suffixes.hasKey(prefix): + for n in items(suffixes[prefix]): + if node.name != n.name: + adjList[].add((n, node)) + +proc processInputStream(stream: Stream) = + while not atEnd(stream): + let node = parseNextNode(stream) + addNodeToTables(node, nodesByPrefix, nodesBySuffix) + updateAdjacencyList(node, nodesByPrefix, nodesBySuffix, adjList) + + +let fileName = paramStr(1) +let fileStream = newFileStream(fileName) +if not isNil(fileStream): + processInputStream(fileStream) + + for t in items(adjList[]): + echo t.v.name & " " & $t.w.name + + fileStream.close() \ No newline at end of file diff --git a/08_grph/grph_test.nim b/08_grph/grph_test.nim new file mode 100644 index 0000000..a700b79 --- /dev/null +++ b/08_grph/grph_test.nim @@ -0,0 +1,46 @@ +import std/sequtils +import std/streams +import std/tables +import unittest + +import grph + +suite "Overlap Graphs": + test "parseNextNode_emptyString_raisesException": + expect(ValueError): + discard parseNextNode(newStringStream("")) + + test "parseNextNode_withInvalidLine_raisesException": + expect(ValueError): + discard parseNextNode(newStringStream("Not a valid line")) + + test "parseNextNode_withValidLine_returnsNode": + var stream: StringStream = newStringStream(""" +>Rosalind_0498 something +AAATAAA +""") + check parseNextNode(stream) == ("Rosalind_0498", "AAATAAA") + + test "addNodeToTables_withValidNode_addsNodeToTables": + var prefixes = newTable[string, seq[Node]]() + var suffixes = newTable[string, seq[Node]]() + let node = ("Rosalind_0498", "AAATGGG") + + addNodeToTables(node, prefixes, suffixes) + + check any(prefixes["AAA"], proc(n: Node): bool = n == node) + check any(suffixes["GGG"], proc(n: Node): bool = n == node) + + test "updateAdjacencyList_addsNodeCorrectly": + let rosalind_0498 = ("Rosalind_0498", "AAATAAA") + let rosalind_2391 = ("Rosalind_2391", "AAATTTT") + var prefixes = newTable[string, seq[Node]]() + var suffixes = newTable[string, seq[Node]]() + var adjList = seq[tuple[v: Node, w: Node]].new() + + addNodeToTables(rosalind_0498, prefixes, suffixes) + updateAdjacencyList(rosalind_0498, prefixes, suffixes, adjList) + addNodeToTables(rosalind_2391, prefixes, suffixes) + updateAdjacencyList(rosalind_2391, prefixes, suffixes, adjList) + + check adjList[].any(proc (nodes: tuple[v: Node, w: Node]): bool = nodes.v.name == "Rosalind_0498") \ No newline at end of file diff --git a/08_grph/sample.txt b/08_grph/sample.txt new file mode 100644 index 0000000..68b7cb0 --- /dev/null +++ b/08_grph/sample.txt @@ -0,0 +1,10 @@ +>Rosalind_0498 +AAATAAA +>Rosalind_2391 +AAATTTT +>Rosalind_2323 +TTTTCCC +>Rosalind_0442 +AAATCCC +>Rosalind_5013 +GGGTGGG \ No newline at end of file