/tools/regVariation/windowSplitter.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 85 lines · 65 code · 12 blank · 8 comment · 20 complexity · 3fd683ad88b4fd3eac0b15149177a23c MD5 · raw file

  1. #!/usr/bin/env python
  2. """
  3. Split into windows.
  4. usage: %prog input size out_file
  5. -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file
  6. """
  7. import sys, re, os
  8. from galaxy import eggs
  9. import pkg_resources; pkg_resources.require( "bx-python" )
  10. from bx.cookbook import doc_optparse
  11. from galaxy.tools.util.galaxyops import *
  12. def stop_err( msg ):
  13. sys.stderr.write( msg )
  14. sys.exit()
  15. def main():
  16. # Parsing Command Line here
  17. options, args = doc_optparse.parse( __doc__ )
  18. try:
  19. chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols )
  20. inp_file, winsize, out_file, makesliding, offset = args
  21. winsize = int(winsize)
  22. offset = int(offset)
  23. makesliding = int(makesliding)
  24. if strand_col_1 <= 0:
  25. strand = "+" #if strand is not defined, default it to +
  26. except:
  27. stop_err( "Data issue, click the pencil icon in the history item to correct the metadata attributes of the input dataset." )
  28. fo = open(out_file,'w')
  29. skipped_lines = 0
  30. first_invalid_line = 0
  31. invalid_line = None
  32. if offset == 0:
  33. makesliding = 0
  34. for i, line in enumerate( file( inp_file ) ):
  35. line = line.strip()
  36. if line and line[0:1] != "#":
  37. try:
  38. elems = line.split('\t')
  39. if strand_col_1 != -1:
  40. strand = elems[strand_col_1]
  41. start = int(elems[start_col_1])
  42. end = int(elems[end_col_1])
  43. if makesliding == 0:
  44. numwin = (end - start)/winsize
  45. else:
  46. numwin = (end - start)/offset
  47. if numwin > 0:
  48. for win in range(numwin):
  49. elems_1 = elems
  50. elems_1[start_col_1] = str(start)
  51. elems_1[end_col_1] = str(start + winsize)
  52. fo.write( "%s\n" % '\t'.join( elems_1 ) )
  53. if makesliding == 0:
  54. start = start + winsize
  55. else:
  56. start = start + offset
  57. if start+winsize > end:
  58. break
  59. except:
  60. skipped_lines += 1
  61. if not invalid_line:
  62. first_invalid_line = i + 1
  63. invalid_line = line
  64. fo.close()
  65. if makesliding == 1:
  66. print 'Window size=%d, Sliding=Yes, Offset=%d' %(winsize, offset)
  67. else:
  68. print 'Window size=%d, Sliding=No' %(winsize)
  69. if skipped_lines > 0:
  70. print 'Skipped %d invalid lines starting with #%d: "%s"' % ( skipped_lines, first_invalid_line, invalid_line )
  71. if __name__ == "__main__":
  72. main()