/lib/galaxy/datatypes/converters/interval_to_tabix_converter.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 38 lines · 22 code · 5 blank · 11 comment · 4 complexity · 92c1952421d6d93c88d319f4d9838542 MD5 · raw file

  1. #!/usr/bin/env python
  2. """
  3. Uses pysam to index a bgzipped interval file with tabix
  4. Supported presets: bed, gff, vcf
  5. usage: %prog in_file out_file
  6. """
  7. from galaxy import eggs
  8. import pkg_resources; pkg_resources.require( "pysam" )
  9. import ctabix, subprocess, tempfile, sys, os, optparse
  10. def main():
  11. # Read options, args.
  12. parser = optparse.OptionParser()
  13. parser.add_option( '-c', '--chr-col', type='int', dest='chrom_col' )
  14. parser.add_option( '-s', '--start-col', type='int', dest='start_col' )
  15. parser.add_option( '-e', '--end-col', type='int', dest='end_col' )
  16. parser.add_option( '-P', '--preset', dest='preset' )
  17. (options, args) = parser.parse_args()
  18. input_fname, index_fname, out_fname = args
  19. # Create index.
  20. if options.preset:
  21. # Preset type.
  22. ctabix.tabix_index(filename=index_fname, preset=options.preset, keep_original=True,
  23. already_compressed=True, index_filename=out_fname)
  24. else:
  25. # For interval files; column indices are 0-based.
  26. ctabix.tabix_index(filename=index_fname, seq_col=(options.chrom_col - 1),
  27. start_col=(options.start_col - 1), end_col=(options.end_col - 1),
  28. keep_original=True, already_compressed=True, index_filename=out_fname)
  29. if os.path.getsize(index_fname) == 0:
  30. sys.stderr.write("The converted tabix index file is empty, meaning the input data is invalid.")
  31. if __name__ == "__main__":
  32. main()