/lib/galaxy/datatypes/converters/interval_to_interval_index_converter.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 52 lines · 30 code · 10 blank · 12 comment · 5 complexity · ae272bcc99b71004087c87b287623d89 MD5 · raw file

  1. #!/usr/bin/env python
  2. """
  3. Convert from interval file to interval index file.
  4. usage: %prog <options> in_file out_file
  5. -c, --chr-col: chromosome column, default=1
  6. -s, --start-col: start column, default=2
  7. -e, --end-col: end column, default=3
  8. """
  9. from __future__ import division
  10. import optparse
  11. from galaxy import eggs
  12. eggs.require( "bx-python" )
  13. from bx.interval_index_file import Indexes
  14. def main():
  15. # Read options, args.
  16. parser = optparse.OptionParser()
  17. parser.add_option( '-c', '--chr-col', type='int', dest='chrom_col', default=1 )
  18. parser.add_option( '-s', '--start-col', type='int', dest='start_col', default=2 )
  19. parser.add_option( '-e', '--end-col', type='int', dest='end_col', default=3 )
  20. (options, args) = parser.parse_args()
  21. input_fname, output_fname = args
  22. # Make column indices 0-based.
  23. options.chrom_col -= 1
  24. options.start_col -= 1
  25. options.end_col -= 1
  26. # Do conversion.
  27. index = Indexes()
  28. offset = 0
  29. for line in open(input_fname, "r"):
  30. feature = line.strip().split()
  31. if not feature or feature[0].startswith("track") or feature[0].startswith("#"):
  32. offset += len(line)
  33. continue
  34. chrom = feature[ options.chrom_col ]
  35. chrom_start = int( feature[ options.start_col ] )
  36. chrom_end = int( feature[ options.end_col ] )
  37. index.add( chrom, chrom_start, chrom_end, offset )
  38. offset += len(line)
  39. index.write( open(output_fname, "w") )
  40. if __name__ == "__main__":
  41. main()