/tools/regVariation/substitutions.py
https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 87 lines · 69 code · 13 blank · 5 comment · 18 complexity · f2dc7ae5ec0865e260dbee185438ff97 MD5 · raw file
- #!/usr/bin/env python
- #Guruprasad ANanda
- """
- Fetches substitutions from pairwise alignments.
- """
- from galaxy import eggs
- from galaxy.tools.util import maf_utilities
- import bx.align.maf
- import sys
- import os, fileinput
- def stop_err(msg):
- sys.stderr.write(msg)
- sys.exit()
- if len(sys.argv) < 3:
- stop_err("Incorrect number of arguments.")
-
- inp_file = sys.argv[1]
- out_file = sys.argv[2]
- fout = open(out_file, 'w')
- def fetchSubs(block):
-
- src1 = block.components[0].src
- sequence1 = block.components[0].text
- start1 = block.components[0].start
- end1 = block.components[0].end
- len1 = int(end1)-int(start1)
- len1_withgap = len(sequence1)
-
- for seq in range (1,len(block.components)):
- src2 = block.components[seq].src
- sequence2 = block.components[seq].text
- start2 = block.components[seq].start
- end2 = block.components[seq].end
- len2 = int(end2)-int(start2)
- sub_begin = None
- sub_end = None
- begin = False
-
- for nt in range(len1_withgap):
- if sequence1[nt] not in '-#$^*?' and sequence2[nt] not in '-#$^*?': #Not a gap or masked character
- if sequence1[nt].upper() != sequence2[nt].upper():
- if not(begin):
- sub_begin = nt
- begin = True
- sub_end = nt
- else:
- if begin:
- print >>fout, "%s\t%s\t%s" %(src1,start1+sub_begin-sequence1[0:sub_begin].count('-'),start1+sub_end-sequence1[0:sub_end].count('-'))
- print >>fout, "%s\t%s\t%s" %(src2,start2+sub_begin-sequence2[0:sub_begin].count('-'),start2+sub_end-sequence2[0:sub_end].count('-'))
- begin = False
- else:
- if begin:
- print >>fout, "%s\t%s\t%s" %(src1,start1+sub_begin-sequence1[0:sub_begin].count('-'),end1+sub_end-sequence1[0:sub_end].count('-'))
- print >>fout, "%s\t%s\t%s" %(src2,start2+sub_begin-sequence2[0:sub_begin].count('-'),end2+sub_end-sequence2[0:sub_end].count('-'))
- begin = False
- ended = False
-
-
- def main():
- skipped = 0
- not_pairwise = 0
- try:
- maf_reader = bx.align.maf.Reader( open(inp_file, 'r') )
- except:
- stop_err("Your MAF file appears to be malformed.")
- print >>fout, "#Chr\tStart\tEnd"
- for block in maf_reader:
- if len(block.components) != 2:
- not_pairwise += 1
- continue
- try:
- fetchSubs(block)
- except:
- skipped += 1
-
- if not_pairwise:
- print "Skipped %d non-pairwise blocks" %(not_pairwise)
- if skipped:
- print "Skipped %d blocks" %(skipped)
- if __name__ == "__main__":
- main()