/tools/filters/lav_to_bed.py
https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 54 lines · 40 code · 11 blank · 3 comment · 11 complexity · bbfcb4c3d20fad5ce5cae9f937bbc2f3 MD5 · raw file
- #!/usr/bin/env python
- #Reads a LAV file and writes two BED files.
- import sys
- from galaxy import eggs
- import pkg_resources
- pkg_resources.require( "bx-python" )
- import bx.align.lav
-
- assert sys.version_info[:2] >= ( 2, 4 )
-
- def stop_err( msg ):
- sys.stderr.write( msg )
- sys.exit()
-
- def main():
- try:
- lav_file = open(sys.argv[1],'r')
- bed_file1 = open(sys.argv[2],'w')
- bed_file2 = open(sys.argv[3],'w')
- except Exception, e:
- stop_err( str( e ) )
-
- lavsRead = 0
- bedsWritten = 0
- species = {}
- # TODO: this is really bad since everything is read into memory. Can we eliminate this tool?
- for lavBlock in bx.align.lav.Reader( lav_file ):
- lavsRead += 1
- for c in lavBlock.components:
- spec, chrom = bx.align.lav.src_split( c.src )
- if bedsWritten < 1:
- if len( species )==0:
- species[spec]=bed_file1
- elif len( species )==1:
- species[spec]=bed_file2
- else:
- continue #this is a pairwise alignment...
- if spec in species:
- species[spec].write( "%s\t%i\t%i\t%s_%s\t%i\t%s\n" % ( chrom, c.start, c.end, spec, str( bedsWritten ), 0, c.strand ) )
- bedsWritten += 1
-
-
- for spec,file in species.items():
- print "#FILE\t%s\t%s" % (file.name, spec)
-
- lav_file.close()
- bed_file1.close()
- bed_file2.close()
-
- print "%d lav blocks read, %d regions written\n" % (lavsRead,bedsWritten)
-
-
-
- if __name__ == "__main__": main()