Consensus and Profile
This commit is contained in:
		
							parent
							
								
									e94ff2985c
								
							
						
					
					
						commit
						11aa3919f6
					
				
					 1 changed files with 90 additions and 0 deletions
				
			
		
							
								
								
									
										90
									
								
								12_cons/cons.nim
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										90
									
								
								12_cons/cons.nim
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,90 @@ | |||
| import os | ||||
| import std/sequtils | ||||
| import std/streams | ||||
| import nre | ||||
| import std/enumutils | ||||
| import std/strutils | ||||
| 
 | ||||
| type | ||||
|     DnaString = ref DnaStringObj | ||||
|     DnaStringObj = object | ||||
|         name: string | ||||
|         str: string | ||||
|     Base = enum | ||||
|         A, C, G, T | ||||
|     ProfileMatrix = array[Base, seq[int]] | ||||
| 
 | ||||
| 
 | ||||
| template useStream(stream: Stream, body: untyped) = | ||||
|     if not isNil(stream): | ||||
|         try: | ||||
|             body | ||||
|         finally: | ||||
|             stream.close() | ||||
| 
 | ||||
| let firstLinePattern = re">(\w+)(?:\s+.+)?" | ||||
| proc parseNextDnaString(stream: Stream): DnaString = | ||||
|     if atEnd(stream): | ||||
|         raise newException(ValueError, "Stream is at end") | ||||
| 
 | ||||
|     let firstLine = readLine(stream) | ||||
|     let match = firstLine.match(firstLinePattern) | ||||
|     if match.isNone: | ||||
|         raise newException(ValueError, "Invalid line: " & firstLine) | ||||
| 
 | ||||
|     let name = match.get.captures[0] | ||||
|     var str = "" | ||||
| 
 | ||||
|     while not atEnd(stream) and peekChar(stream) != '>': | ||||
|         str &= readLine(stream) | ||||
| 
 | ||||
|     DnaString(name: name, str: str) | ||||
| 
 | ||||
| proc calcProfileMatrix(stream: Stream): ProfileMatrix = | ||||
|     var m: ProfileMatrix | ||||
|     var firstString: bool = true | ||||
| 
 | ||||
|     useStream(stream): | ||||
|         var dnaString = parseNextDnaString(stream) | ||||
| 
 | ||||
|         m[A] = newSeq[int](dnaString.str.len) | ||||
|         m[C] = newSeq[int](dnaString.str.len) | ||||
|         m[G] = newSeq[int](dnaString.str.len) | ||||
|         m[T] = newSeq[int](dnaString.str.len) | ||||
| 
 | ||||
|         while true: | ||||
|             for i in 0 ..< dnaString.str.len: | ||||
|                 let base = parseEnum[Base]($dnaString.str[i]) | ||||
|                 inc m[base][i] | ||||
|             if atEnd(stream): | ||||
|                 break | ||||
|             dnaString = parseNextDnaString(stream) | ||||
| 
 | ||||
|     return m | ||||
| 
 | ||||
| 
 | ||||
| proc toString(m: ProfileMatrix): string = | ||||
|     echo "A: " & m[A].mapIt($it).join(" ") | ||||
|     echo "C: " & m[C].mapIt($it).join(" ") | ||||
|     echo "G: " & m[G].mapIt($it).join(" ") | ||||
|     echo "T: " & m[T].mapIt($it).join(" ") | ||||
| 
 | ||||
| proc getOneConsensusString(m: ProfileMatrix): string = | ||||
|     var s = "" | ||||
|     for i in 0 ..< m[A].len: | ||||
|         var maxCount = 0 | ||||
|         var maxBase = A | ||||
|         for j in items(Base): | ||||
|             if m[j][i] > maxCount: | ||||
|                 maxCount = m[j][i] | ||||
|                 maxBase = j | ||||
|         s.add($maxBase) | ||||
|     return s | ||||
| 
 | ||||
| let fileName = paramStr(1) | ||||
| let fileStream = newFileStream(fileName) | ||||
| let profileMatrix = calcProfileMatrix(fileStream) | ||||
| let consensusString = getOneConsensusString(profileMatrix) | ||||
| 
 | ||||
| echo consensusString | ||||
| echo profileMatrix.toString | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue