/lib/galaxy/datatypes/converters/bgzip.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 47 lines · 32 code · 7 blank · 8 comment · 7 complexity · 86bcce9063f17ffa9ba824298b917b64 MD5 · raw file

  1. #!/usr/bin/env python
  2. """
  3. Uses pysam to bgzip a file
  4. usage: %prog in_file out_file
  5. """
  6. from galaxy import eggs
  7. import pkg_resources; pkg_resources.require( "pysam" )
  8. import ctabix, subprocess, tempfile, sys, optparse
  9. def main():
  10. # Read options, args.
  11. parser = optparse.OptionParser()
  12. parser.add_option( '-c', '--chr-col', type='int', dest='chrom_col' )
  13. parser.add_option( '-s', '--start-col', type='int', dest='start_col' )
  14. parser.add_option( '-e', '--end-col', type='int', dest='end_col' )
  15. parser.add_option( '-P', '--preset', dest='preset' )
  16. (options, args) = parser.parse_args()
  17. input_fname, output_fname = args
  18. tmpfile = tempfile.NamedTemporaryFile()
  19. sort_params = None
  20. if options.chrom_col and options.start_col and options.end_col:
  21. sort_params = ["sort",
  22. "-k%(i)s,%(i)s" % { 'i': options.chrom_col },
  23. "-k%(i)i,%(i)in" % { 'i': options.start_col },
  24. "-k%(i)i,%(i)in" % { 'i': options.end_col }
  25. ]
  26. elif options.preset == "bed":
  27. sort_params = ["sort", "-k1,1", "-k2,2n", "-k3,3n"]
  28. elif options.preset == "vcf":
  29. sort_params = ["sort", "-k1,1", "-k2,2n"]
  30. elif options.preset == "gff":
  31. sort_params = ["sort", "-s", "-k1,1", "-k4,4n"] # stable sort on start column
  32. # Skip any lines starting with "#" and "track"
  33. grepped = subprocess.Popen(["grep", "-e", "^\"#\"", "-e", "^track", "-v", input_fname], stderr=subprocess.PIPE, stdout=subprocess.PIPE )
  34. after_sort = subprocess.Popen(sort_params, stdin=grepped.stdout, stderr=subprocess.PIPE, stdout=tmpfile )
  35. grepped.stdout.close()
  36. output, err = after_sort.communicate()
  37. ctabix.tabix_compress(tmpfile.name, output_fname, force=True)
  38. if __name__ == "__main__":
  39. main()