/lib/galaxy/datatypes/indexers/interval.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 66 lines · 54 code · 4 blank · 8 comment · 10 complexity · f4dc977baf9343cfe0775c95413d2cc7 MD5 · raw file

  1. #!/usr/bin/env python
  2. """
  3. Generate indices for track browsing of an interval file.
  4. usage: %prog bed_file out_directory
  5. -1, --cols1=N,N,N,N: Columns for chrom, start, end, strand in interval file
  6. """
  7. import sys
  8. from galaxy import eggs
  9. import pkg_resources; pkg_resources.require( "bx-python" )
  10. from bx.intervals import io
  11. from bx.cookbook import doc_optparse
  12. import psyco_full
  13. import commands
  14. import os
  15. from os import environ
  16. import tempfile
  17. from bisect import bisect
  18. def divide( intervals, out_path ):
  19. manifest = {}
  20. current_file = None
  21. lastchrom = ""
  22. for line in intervals:
  23. try:
  24. chrom = line.chrom
  25. except AttributeError, e:
  26. continue
  27. manifest[chrom] = max(manifest.get(chrom,0),line.end)
  28. if not lastchrom == chrom:
  29. if current_file:
  30. current_file.close()
  31. current_file = open( os.path.join( out_path, "%s" % chrom), "a" )
  32. print >> current_file, "\t".join(line)
  33. lastchrom = chrom
  34. if current_file:
  35. current_file.close()
  36. return manifest
  37. if __name__ == "__main__":
  38. options, args = doc_optparse.parse( __doc__ )
  39. try:
  40. chr_col_1, start_col_1, end_col_1, strand_col_1 = [int(x)-1 for x in options.cols1.split(',')]
  41. in_fname, out_path = args
  42. except:
  43. doc_optparse.exception()
  44. # Sort through a tempfile first
  45. temp_file = tempfile.NamedTemporaryFile(mode="r")
  46. environ['LC_ALL'] = 'POSIX'
  47. commandline = "sort -f -n -k %d -k %d -k %d -o %s %s" % (chr_col_1+1,start_col_1+1,end_col_1+1, temp_file.name, in_fname)
  48. errorcode, stdout = commands.getstatusoutput(commandline)
  49. temp_file.seek(0)
  50. interval = io.NiceReaderWrapper( temp_file,
  51. chrom_col=chr_col_1,
  52. start_col=start_col_1,
  53. end_col=end_col_1,
  54. strand_col=strand_col_1,
  55. fix_strand=True )
  56. manifest = divide( interval, out_path )
  57. manifest_file = open( os.path.join( out_path, "manifest.tab" ),"w" )
  58. for key, value in manifest.items():
  59. print >> manifest_file, "%s\t%s" % (key, value)
  60. manifest_file.close()
  61. temp_file.close()