Add subs and grph
This commit is contained in:
parent
b340f05ef4
commit
49127ae4ef
7 changed files with 174 additions and 1 deletions
|
@ -2,7 +2,6 @@ import os
|
|||
import std/sequtils
|
||||
import std/streams
|
||||
import nre
|
||||
import std/strformat
|
||||
|
||||
type
|
||||
DnaString = ref DnaStringObj
|
||||
|
|
2
07_subs/sample.txt
Normal file
2
07_subs/sample.txt
Normal file
|
@ -0,0 +1,2 @@
|
|||
GATATATGCATATACTT
|
||||
ATAT
|
29
07_subs/subs.nim
Normal file
29
07_subs/subs.nim
Normal file
|
@ -0,0 +1,29 @@
|
|||
import os
|
||||
import std/streams
|
||||
import std/sequtils
|
||||
from std/strutils import join
|
||||
|
||||
proc findNeedleInHaystack(s, t: string): seq[int] =
|
||||
var occurrences = newSeq[int]()
|
||||
|
||||
for i in 0 ..< s.len:
|
||||
var j = 0
|
||||
while j < t.len and (i+j) < s.len and s[i+j] == t[j]:
|
||||
inc j
|
||||
if j == t.len:
|
||||
occurrences.add(i+1)
|
||||
|
||||
return occurrences
|
||||
|
||||
proc parseStream(stream: Stream): (string, string) =
|
||||
let s = readLine(stream)
|
||||
let t = readLine(stream)
|
||||
return (s, t)
|
||||
|
||||
let fileName = paramStr(1)
|
||||
let fileStream = newFileStream(fileName)
|
||||
if not isNil(fileStream):
|
||||
let (s, t) = parseStream(fileStream)
|
||||
let occurrences = findNeedleInHaystack(s, t)
|
||||
echo occurrences.mapIt($it).join(" ")
|
||||
fileStream.close()
|
3
08_grph/README.md
Normal file
3
08_grph/README.md
Normal file
|
@ -0,0 +1,3 @@
|
|||
# Overlap Graphs
|
||||
|
||||
https://rosalind.info/problems/grph/
|
84
08_grph/grph.nim
Normal file
84
08_grph/grph.nim
Normal file
|
@ -0,0 +1,84 @@
|
|||
import os
|
||||
import std/streams
|
||||
# import re
|
||||
import nre
|
||||
import std/tables
|
||||
|
||||
const k = 3
|
||||
|
||||
# let fileName = paramStr(1)
|
||||
# var dna = readFile(fileName)
|
||||
|
||||
type
|
||||
Node* = tuple[name: string, dna: string]
|
||||
AdjacencyList = seq[tuple[v: Node, w: Node]]
|
||||
AffixTable* = Table[string, seq[Node]]
|
||||
|
||||
var adjList: ref AdjacencyList = seq[tuple[v: Node, w: Node]].new()
|
||||
|
||||
var nodesByPrefix: ref AffixTable = newTable[string, seq[Node]]()
|
||||
var nodesBySuffix: ref AffixTable = newTable[string, seq[Node]]()
|
||||
|
||||
let firstLinePattern = re">(\w+)(?:\s+.+)?"
|
||||
|
||||
proc parseNextNode*(stream: Stream): Node =
|
||||
if atEnd(stream):
|
||||
raise newException(ValueError, "Stream is at end")
|
||||
|
||||
let firstLine = readLine(stream)
|
||||
let match = firstLine.match(firstLinePattern)
|
||||
if match.isNone:
|
||||
raise newException(ValueError, "Invalid line: " & firstLine)
|
||||
|
||||
let name = match.get.captures[0]
|
||||
var dna = ""
|
||||
|
||||
while not atEnd(stream) and peekChar(stream) != '>':
|
||||
dna &= readLine(stream)
|
||||
|
||||
(name, dna)
|
||||
|
||||
proc addNodeToTables*(node: Node, prefixes, suffixes: ref AffixTable) =
|
||||
let prefix = node.dna[0..k-1]
|
||||
let suffix = node.dna[^k..^1]
|
||||
|
||||
if not prefixes.hasKey(prefix):
|
||||
prefixes[prefix] = newSeq[Node]()
|
||||
|
||||
prefixes[prefix].add(node)
|
||||
|
||||
if not suffixes.hasKey(suffix):
|
||||
suffixes[suffix] = newSeq[Node]()
|
||||
|
||||
suffixes[suffix].add(node)
|
||||
|
||||
proc updateAdjacencyList*(node: Node, prefixes, suffixes: ref AffixTable, adjList: ref AdjacencyList) =
|
||||
let prefix = node.dna[0..k-1]
|
||||
let suffix = node.dna[^k..^1]
|
||||
|
||||
if prefixes.hasKey(suffix):
|
||||
for n in items(prefixes[suffix]):
|
||||
if node.name != n.name:
|
||||
adjList[].add((node, n))
|
||||
|
||||
if suffixes.hasKey(prefix):
|
||||
for n in items(suffixes[prefix]):
|
||||
if node.name != n.name:
|
||||
adjList[].add((n, node))
|
||||
|
||||
proc processInputStream(stream: Stream) =
|
||||
while not atEnd(stream):
|
||||
let node = parseNextNode(stream)
|
||||
addNodeToTables(node, nodesByPrefix, nodesBySuffix)
|
||||
updateAdjacencyList(node, nodesByPrefix, nodesBySuffix, adjList)
|
||||
|
||||
|
||||
let fileName = paramStr(1)
|
||||
let fileStream = newFileStream(fileName)
|
||||
if not isNil(fileStream):
|
||||
processInputStream(fileStream)
|
||||
|
||||
for t in items(adjList[]):
|
||||
echo t.v.name & " " & $t.w.name
|
||||
|
||||
fileStream.close()
|
46
08_grph/grph_test.nim
Normal file
46
08_grph/grph_test.nim
Normal file
|
@ -0,0 +1,46 @@
|
|||
import std/sequtils
|
||||
import std/streams
|
||||
import std/tables
|
||||
import unittest
|
||||
|
||||
import grph
|
||||
|
||||
suite "Overlap Graphs":
|
||||
test "parseNextNode_emptyString_raisesException":
|
||||
expect(ValueError):
|
||||
discard parseNextNode(newStringStream(""))
|
||||
|
||||
test "parseNextNode_withInvalidLine_raisesException":
|
||||
expect(ValueError):
|
||||
discard parseNextNode(newStringStream("Not a valid line"))
|
||||
|
||||
test "parseNextNode_withValidLine_returnsNode":
|
||||
var stream: StringStream = newStringStream("""
|
||||
>Rosalind_0498 something
|
||||
AAATAAA
|
||||
""")
|
||||
check parseNextNode(stream) == ("Rosalind_0498", "AAATAAA")
|
||||
|
||||
test "addNodeToTables_withValidNode_addsNodeToTables":
|
||||
var prefixes = newTable[string, seq[Node]]()
|
||||
var suffixes = newTable[string, seq[Node]]()
|
||||
let node = ("Rosalind_0498", "AAATGGG")
|
||||
|
||||
addNodeToTables(node, prefixes, suffixes)
|
||||
|
||||
check any(prefixes["AAA"], proc(n: Node): bool = n == node)
|
||||
check any(suffixes["GGG"], proc(n: Node): bool = n == node)
|
||||
|
||||
test "updateAdjacencyList_addsNodeCorrectly":
|
||||
let rosalind_0498 = ("Rosalind_0498", "AAATAAA")
|
||||
let rosalind_2391 = ("Rosalind_2391", "AAATTTT")
|
||||
var prefixes = newTable[string, seq[Node]]()
|
||||
var suffixes = newTable[string, seq[Node]]()
|
||||
var adjList = seq[tuple[v: Node, w: Node]].new()
|
||||
|
||||
addNodeToTables(rosalind_0498, prefixes, suffixes)
|
||||
updateAdjacencyList(rosalind_0498, prefixes, suffixes, adjList)
|
||||
addNodeToTables(rosalind_2391, prefixes, suffixes)
|
||||
updateAdjacencyList(rosalind_2391, prefixes, suffixes, adjList)
|
||||
|
||||
check adjList[].any(proc (nodes: tuple[v: Node, w: Node]): bool = nodes.v.name == "Rosalind_0498")
|
10
08_grph/sample.txt
Normal file
10
08_grph/sample.txt
Normal file
|
@ -0,0 +1,10 @@
|
|||
>Rosalind_0498
|
||||
AAATAAA
|
||||
>Rosalind_2391
|
||||
AAATTTT
|
||||
>Rosalind_2323
|
||||
TTTTCCC
|
||||
>Rosalind_0442
|
||||
AAATCCC
|
||||
>Rosalind_5013
|
||||
GGGTGGG
|
Loading…
Add table
Add a link
Reference in a new issue