/tools/new_operations/gops_merge.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 71 lines · 53 code · 9 blank · 9 comment · 15 complexity · 97476837c90f5d77e4482cc929a54d45 MD5 · raw file

  1. #!/usr/bin/env python
  2. """
  3. Merge overlaping regions.
  4. usage: %prog in_file out_file
  5. -1, --cols1=N,N,N,N: Columns for start, end, strand in first file
  6. -m, --mincols=N: Require this much overlap (default 1bp)
  7. -3, --threecol: Output 3 column bed
  8. """
  9. from galaxy import eggs
  10. import pkg_resources
  11. pkg_resources.require( "bx-python" )
  12. import sys, traceback, fileinput
  13. from warnings import warn
  14. from bx.intervals import *
  15. from bx.intervals.io import *
  16. from bx.intervals.operations.merge import *
  17. from bx.cookbook import doc_optparse
  18. from galaxy.tools.util.galaxyops import *
  19. assert sys.version_info[:2] >= ( 2, 4 )
  20. def main():
  21. mincols = 1
  22. upstream_pad = 0
  23. downstream_pad = 0
  24. options, args = doc_optparse.parse( __doc__ )
  25. try:
  26. chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 )
  27. if options.mincols: mincols = int( options.mincols )
  28. in_fname, out_fname = args
  29. except:
  30. doc_optparse.exception()
  31. g1 = NiceReaderWrapper( fileinput.FileInput( in_fname ),
  32. chrom_col=chr_col_1,
  33. start_col=start_col_1,
  34. end_col=end_col_1,
  35. strand_col = strand_col_1,
  36. fix_strand=True )
  37. out_file = open( out_fname, "w" )
  38. try:
  39. for line in merge(g1,mincols=mincols):
  40. if options.threecol:
  41. if type( line ) is GenomicInterval:
  42. out_file.write( "%s\t%s\t%s\n" % ( line.chrom, str( line.startCol ), str( line.endCol ) ) )
  43. elif type( line ) is list:
  44. out_file.write( "%s\t%s\t%s\n" % ( line[chr_col_1], str( line[start_col_1] ), str( line[end_col_1] ) ) )
  45. else:
  46. out_file.write( "%s\n" % line )
  47. else:
  48. if type( line ) is GenomicInterval:
  49. out_file.write( "%s\n" % "\t".join( line.fields ) )
  50. elif type( line ) is list:
  51. out_file.write( "%s\n" % "\t".join( line ) )
  52. else:
  53. out_file.write( "%s\n" % line )
  54. except ParseError, exc:
  55. out_file.close()
  56. fail( "Invalid file format: %s" % str( exc ) )
  57. out_file.close()
  58. if g1.skipped > 0:
  59. print skipped( g1, filedesc=" of 1st dataset" )
  60. if __name__ == "__main__":
  61. main()