/lib/galaxy/datatypes/converters/bedgraph_to_array_tree_converter.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 55 lines · 39 code · 13 blank · 3 comment · 9 complexity · 5312d184e5785849b35e6c0b7f45d39e MD5 · raw file

  1. #!/usr/bin/env python
  2. from __future__ import division
  3. import sys
  4. from galaxy import eggs
  5. import pkg_resources; pkg_resources.require( "bx-python" )
  6. from bx.arrays.array_tree import *
  7. # from bx.arrays.wiggle import BedReader
  8. BLOCK_SIZE = 100
  9. class BedGraphReader:
  10. def __init__( self, f ):
  11. self.f = f
  12. def __iter__( self ):
  13. return self
  14. def next( self ):
  15. while True:
  16. line = self.f.readline()
  17. if not line:
  18. raise StopIteration()
  19. if line.isspace():
  20. continue
  21. if line[0] == "#":
  22. continue
  23. if line[0].isalpha():
  24. if line.startswith( "track" ) or line.startswith( "browser" ):
  25. continue
  26. feature = line.strip().split()
  27. chrom = feature[0]
  28. chrom_start = int(feature[1])
  29. chrom_end = int(feature[2])
  30. score = float(feature[3])
  31. return chrom, chrom_start, chrom_end, None, score
  32. def main():
  33. input_fname = sys.argv[1]
  34. out_fname = sys.argv[2]
  35. reader = BedGraphReader( open( input_fname ) )
  36. # Fill array from reader
  37. d = array_tree_dict_from_reader( reader, {}, block_size = BLOCK_SIZE )
  38. for array_tree in d.itervalues():
  39. array_tree.root.build_summary()
  40. FileArrayTreeDict.dict_to_file( d, open( out_fname, "w" ) )
  41. if __name__ == "__main__":
  42. main()