/lib/galaxy/datatypes/converters/interval_to_summary_tree_converter.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 53 lines · 36 code · 6 blank · 11 comment · 7 complexity · cd512dff51d116eda5124031089a99f8 MD5 · raw file

  1. #!/usr/bin/env python
  2. """
  3. Convert from interval file to summary tree file. Default input file format is BED (0-based, half-open intervals).
  4. usage: %prog in_file out_file
  5. -G, --gff: input is GFF format, meaning start and end coordinates are 1-based, closed interval
  6. """
  7. from __future__ import division
  8. import sys, fileinput
  9. from galaxy import eggs
  10. import pkg_resources; pkg_resources.require( "bx-python" )
  11. from galaxy.visualization.tracks.summary import *
  12. from bx.intervals.io import *
  13. from bx.cookbook import doc_optparse
  14. from galaxy.datatypes.util.gff_util import *
  15. def main():
  16. # Read options, args.
  17. options, args = doc_optparse.parse( __doc__ )
  18. try:
  19. gff_format = bool( options.gff )
  20. input_fname, out_fname = args
  21. except:
  22. doc_optparse.exception()
  23. # Do conversion.
  24. # TODO: take column numbers from command line.
  25. if gff_format:
  26. reader_wrapper_class = GFFReaderWrapper
  27. chr_col, start_col, end_col, strand_col = ( 0, 3, 4, 6 )
  28. else:
  29. reader_wrapper_class = NiceReaderWrapper
  30. chr_col, start_col, end_col, strand_col = ( 0, 1, 2, 5 )
  31. reader_wrapper = reader_wrapper_class( fileinput.FileInput( input_fname ),
  32. chrom_col=chr_col,
  33. start_col=start_col,
  34. end_col=end_col,
  35. strand_col=strand_col,
  36. fix_strand=True )
  37. st = SummaryTree(block_size=25, levels=6, draw_cutoff=150, detail_cutoff=30)
  38. for feature in list( reader_wrapper ):
  39. if isinstance( feature, GenomicInterval ):
  40. # Tree expects BED coordinates.
  41. if type( feature ) is GFFFeature:
  42. convert_gff_coords_to_bed( feature )
  43. st.insert_range( feature.chrom, long( feature.start ), long( feature.end ) )
  44. st.write(out_fname)
  45. if __name__ == "__main__":
  46. main()