PageRenderTime 60ms CodeModel.GetById 40ms app.highlight 14ms RepoModel.GetById 1ms app.codeStats 0ms

/tools/filters/lav_to_bed.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 54 lines | 40 code | 11 blank | 3 comment | 11 complexity | bbfcb4c3d20fad5ce5cae9f937bbc2f3 MD5 | raw file
 1#!/usr/bin/env python
 2#Reads a LAV file and writes two BED files.
 3import sys
 4from galaxy import eggs
 5import pkg_resources
 6pkg_resources.require( "bx-python" )
 7import bx.align.lav
 8
 9assert sys.version_info[:2] >= ( 2, 4 )
10
11def stop_err( msg ):
12    sys.stderr.write( msg )
13    sys.exit()
14
15def main():
16    try:
17        lav_file = open(sys.argv[1],'r')
18        bed_file1 = open(sys.argv[2],'w')
19        bed_file2 = open(sys.argv[3],'w')
20    except Exception, e:
21        stop_err( str( e ) )
22        
23    lavsRead = 0
24    bedsWritten = 0
25    species = {}
26    # TODO: this is really bad since everything is read into memory.  Can we eliminate this tool?
27    for lavBlock in bx.align.lav.Reader( lav_file ):
28        lavsRead += 1
29        for c in lavBlock.components:
30            spec, chrom = bx.align.lav.src_split( c.src )
31            if bedsWritten < 1:
32                if len( species )==0:
33                    species[spec]=bed_file1
34                elif len( species )==1:
35                    species[spec]=bed_file2
36                else:
37                    continue #this is a pairwise alignment...
38            if spec in species:
39                species[spec].write( "%s\t%i\t%i\t%s_%s\t%i\t%s\n" % ( chrom, c.start, c.end, spec, str( bedsWritten ), 0, c.strand ) )
40        bedsWritten += 1
41        
42
43    for spec,file in species.items():
44        print "#FILE\t%s\t%s" % (file.name, spec)
45    
46    lav_file.close()
47    bed_file1.close()
48    bed_file2.close()
49    
50    print "%d lav blocks read, %d regions written\n" % (lavsRead,bedsWritten)
51
52
53
54if __name__ == "__main__": main()