/lib/galaxy/datatypes/converters/bedgraph_to_array_tree_converter.py
https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 55 lines · 39 code · 13 blank · 3 comment · 9 complexity · 5312d184e5785849b35e6c0b7f45d39e MD5 · raw file
- #!/usr/bin/env python
- from __future__ import division
- import sys
- from galaxy import eggs
- import pkg_resources; pkg_resources.require( "bx-python" )
- from bx.arrays.array_tree import *
- # from bx.arrays.wiggle import BedReader
- BLOCK_SIZE = 100
- class BedGraphReader:
- def __init__( self, f ):
- self.f = f
- def __iter__( self ):
- return self
- def next( self ):
- while True:
- line = self.f.readline()
- if not line:
- raise StopIteration()
- if line.isspace():
- continue
- if line[0] == "#":
- continue
- if line[0].isalpha():
- if line.startswith( "track" ) or line.startswith( "browser" ):
- continue
- feature = line.strip().split()
- chrom = feature[0]
- chrom_start = int(feature[1])
- chrom_end = int(feature[2])
- score = float(feature[3])
- return chrom, chrom_start, chrom_end, None, score
- def main():
- input_fname = sys.argv[1]
- out_fname = sys.argv[2]
- reader = BedGraphReader( open( input_fname ) )
- # Fill array from reader
- d = array_tree_dict_from_reader( reader, {}, block_size = BLOCK_SIZE )
- for array_tree in d.itervalues():
- array_tree.root.build_summary()
- FileArrayTreeDict.dict_to_file( d, open( out_fname, "w" ) )
- if __name__ == "__main__":
- main()