PageRenderTime 55ms CodeModel.GetById 42ms app.highlight 10ms RepoModel.GetById 1ms app.codeStats 0ms

/tools/regVariation/substitutions.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 87 lines | 69 code | 13 blank | 5 comment | 21 complexity | f2dc7ae5ec0865e260dbee185438ff97 MD5 | raw file
 1#!/usr/bin/env python
 2#Guruprasad ANanda
 3"""
 4Fetches substitutions from pairwise alignments.
 5"""
 6
 7from galaxy import eggs
 8
 9from galaxy.tools.util import maf_utilities
10
11import bx.align.maf
12import sys
13import os, fileinput
14def stop_err(msg):
15    sys.stderr.write(msg)
16    sys.exit()
17
18if len(sys.argv) < 3:
19        stop_err("Incorrect number of arguments.")    
20    
21inp_file = sys.argv[1]
22out_file = sys.argv[2]
23fout = open(out_file, 'w')
24
25def fetchSubs(block):
26    
27    src1 = block.components[0].src
28    sequence1 = block.components[0].text
29    start1 = block.components[0].start
30    end1 = block.components[0].end
31    len1 = int(end1)-int(start1)
32    len1_withgap = len(sequence1)
33    
34    for seq in range (1,len(block.components)):
35        src2 = block.components[seq].src
36        sequence2 = block.components[seq].text
37        start2 = block.components[seq].start
38        end2 = block.components[seq].end
39        len2 = int(end2)-int(start2)
40        sub_begin = None
41        sub_end = None
42        begin = False
43        
44        for nt in range(len1_withgap):
45            if sequence1[nt] not in '-#$^*?' and sequence2[nt] not in '-#$^*?': #Not a gap or masked character
46                if sequence1[nt].upper() != sequence2[nt].upper():
47                    if not(begin):
48                        sub_begin = nt
49                        begin = True
50                    sub_end = nt
51                else:
52                    if begin:
53                        print >>fout, "%s\t%s\t%s" %(src1,start1+sub_begin-sequence1[0:sub_begin].count('-'),start1+sub_end-sequence1[0:sub_end].count('-'))
54                        print >>fout, "%s\t%s\t%s" %(src2,start2+sub_begin-sequence2[0:sub_begin].count('-'),start2+sub_end-sequence2[0:sub_end].count('-'))    
55                        begin = False
56
57            else:
58                if begin:
59                    print >>fout, "%s\t%s\t%s" %(src1,start1+sub_begin-sequence1[0:sub_begin].count('-'),end1+sub_end-sequence1[0:sub_end].count('-'))
60                    print >>fout, "%s\t%s\t%s" %(src2,start2+sub_begin-sequence2[0:sub_begin].count('-'),end2+sub_end-sequence2[0:sub_end].count('-'))    
61                    begin = False
62                    ended = False
63    
64              
65def main():
66    skipped = 0
67    not_pairwise = 0
68    try:
69        maf_reader = bx.align.maf.Reader( open(inp_file, 'r') )
70    except:
71        stop_err("Your MAF file appears to be malformed.")
72    print >>fout, "#Chr\tStart\tEnd"
73    for block in maf_reader:
74        if len(block.components) != 2:
75            not_pairwise += 1
76            continue
77        try:
78            fetchSubs(block)
79        except:
80            skipped += 1
81    
82    if not_pairwise:
83        print "Skipped %d non-pairwise blocks" %(not_pairwise)
84    if skipped:
85        print "Skipped %d blocks" %(skipped)
86if __name__ == "__main__":
87    main()