Add subs and grph

This commit is contained in:
Alberto Venturini 2024-09-04 06:54:24 +02:00
parent b340f05ef4
commit 49127ae4ef
7 changed files with 174 additions and 1 deletions

View file

@ -2,7 +2,6 @@ import os
import std/sequtils
import std/streams
import nre
import std/strformat
type
DnaString = ref DnaStringObj

2
07_subs/sample.txt Normal file
View file

@ -0,0 +1,2 @@
GATATATGCATATACTT
ATAT

29
07_subs/subs.nim Normal file
View file

@ -0,0 +1,29 @@
import os
import std/streams
import std/sequtils
from std/strutils import join
proc findNeedleInHaystack(s, t: string): seq[int] =
var occurrences = newSeq[int]()
for i in 0 ..< s.len:
var j = 0
while j < t.len and (i+j) < s.len and s[i+j] == t[j]:
inc j
if j == t.len:
occurrences.add(i+1)
return occurrences
proc parseStream(stream: Stream): (string, string) =
let s = readLine(stream)
let t = readLine(stream)
return (s, t)
let fileName = paramStr(1)
let fileStream = newFileStream(fileName)
if not isNil(fileStream):
let (s, t) = parseStream(fileStream)
let occurrences = findNeedleInHaystack(s, t)
echo occurrences.mapIt($it).join(" ")
fileStream.close()

3
08_grph/README.md Normal file
View file

@ -0,0 +1,3 @@
# Overlap Graphs
https://rosalind.info/problems/grph/

84
08_grph/grph.nim Normal file
View file

@ -0,0 +1,84 @@
import os
import std/streams
# import re
import nre
import std/tables
const k = 3
# let fileName = paramStr(1)
# var dna = readFile(fileName)
type
Node* = tuple[name: string, dna: string]
AdjacencyList = seq[tuple[v: Node, w: Node]]
AffixTable* = Table[string, seq[Node]]
var adjList: ref AdjacencyList = seq[tuple[v: Node, w: Node]].new()
var nodesByPrefix: ref AffixTable = newTable[string, seq[Node]]()
var nodesBySuffix: ref AffixTable = newTable[string, seq[Node]]()
let firstLinePattern = re">(\w+)(?:\s+.+)?"
proc parseNextNode*(stream: Stream): Node =
if atEnd(stream):
raise newException(ValueError, "Stream is at end")
let firstLine = readLine(stream)
let match = firstLine.match(firstLinePattern)
if match.isNone:
raise newException(ValueError, "Invalid line: " & firstLine)
let name = match.get.captures[0]
var dna = ""
while not atEnd(stream) and peekChar(stream) != '>':
dna &= readLine(stream)
(name, dna)
proc addNodeToTables*(node: Node, prefixes, suffixes: ref AffixTable) =
let prefix = node.dna[0..k-1]
let suffix = node.dna[^k..^1]
if not prefixes.hasKey(prefix):
prefixes[prefix] = newSeq[Node]()
prefixes[prefix].add(node)
if not suffixes.hasKey(suffix):
suffixes[suffix] = newSeq[Node]()
suffixes[suffix].add(node)
proc updateAdjacencyList*(node: Node, prefixes, suffixes: ref AffixTable, adjList: ref AdjacencyList) =
let prefix = node.dna[0..k-1]
let suffix = node.dna[^k..^1]
if prefixes.hasKey(suffix):
for n in items(prefixes[suffix]):
if node.name != n.name:
adjList[].add((node, n))
if suffixes.hasKey(prefix):
for n in items(suffixes[prefix]):
if node.name != n.name:
adjList[].add((n, node))
proc processInputStream(stream: Stream) =
while not atEnd(stream):
let node = parseNextNode(stream)
addNodeToTables(node, nodesByPrefix, nodesBySuffix)
updateAdjacencyList(node, nodesByPrefix, nodesBySuffix, adjList)
let fileName = paramStr(1)
let fileStream = newFileStream(fileName)
if not isNil(fileStream):
processInputStream(fileStream)
for t in items(adjList[]):
echo t.v.name & " " & $t.w.name
fileStream.close()

46
08_grph/grph_test.nim Normal file
View file

@ -0,0 +1,46 @@
import std/sequtils
import std/streams
import std/tables
import unittest
import grph
suite "Overlap Graphs":
test "parseNextNode_emptyString_raisesException":
expect(ValueError):
discard parseNextNode(newStringStream(""))
test "parseNextNode_withInvalidLine_raisesException":
expect(ValueError):
discard parseNextNode(newStringStream("Not a valid line"))
test "parseNextNode_withValidLine_returnsNode":
var stream: StringStream = newStringStream("""
>Rosalind_0498 something
AAATAAA
""")
check parseNextNode(stream) == ("Rosalind_0498", "AAATAAA")
test "addNodeToTables_withValidNode_addsNodeToTables":
var prefixes = newTable[string, seq[Node]]()
var suffixes = newTable[string, seq[Node]]()
let node = ("Rosalind_0498", "AAATGGG")
addNodeToTables(node, prefixes, suffixes)
check any(prefixes["AAA"], proc(n: Node): bool = n == node)
check any(suffixes["GGG"], proc(n: Node): bool = n == node)
test "updateAdjacencyList_addsNodeCorrectly":
let rosalind_0498 = ("Rosalind_0498", "AAATAAA")
let rosalind_2391 = ("Rosalind_2391", "AAATTTT")
var prefixes = newTable[string, seq[Node]]()
var suffixes = newTable[string, seq[Node]]()
var adjList = seq[tuple[v: Node, w: Node]].new()
addNodeToTables(rosalind_0498, prefixes, suffixes)
updateAdjacencyList(rosalind_0498, prefixes, suffixes, adjList)
addNodeToTables(rosalind_2391, prefixes, suffixes)
updateAdjacencyList(rosalind_2391, prefixes, suffixes, adjList)
check adjList[].any(proc (nodes: tuple[v: Node, w: Node]): bool = nodes.v.name == "Rosalind_0498")

10
08_grph/sample.txt Normal file
View file

@ -0,0 +1,10 @@
>Rosalind_0498
AAATAAA
>Rosalind_2391
AAATTTT
>Rosalind_2323
TTTTCCC
>Rosalind_0442
AAATCCC
>Rosalind_5013
GGGTGGG