/tools/regVariation/windowSplitter.py
https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 85 lines · 65 code · 12 blank · 8 comment · 20 complexity · 3fd683ad88b4fd3eac0b15149177a23c MD5 · raw file
- #!/usr/bin/env python
- """
- Split into windows.
- usage: %prog input size out_file
- -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file
- """
- import sys, re, os
- from galaxy import eggs
- import pkg_resources; pkg_resources.require( "bx-python" )
- from bx.cookbook import doc_optparse
- from galaxy.tools.util.galaxyops import *
- def stop_err( msg ):
- sys.stderr.write( msg )
- sys.exit()
- def main():
- # Parsing Command Line here
- options, args = doc_optparse.parse( __doc__ )
-
- try:
- chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols )
- inp_file, winsize, out_file, makesliding, offset = args
- winsize = int(winsize)
- offset = int(offset)
- makesliding = int(makesliding)
- if strand_col_1 <= 0:
- strand = "+" #if strand is not defined, default it to +
- except:
- stop_err( "Data issue, click the pencil icon in the history item to correct the metadata attributes of the input dataset." )
-
- fo = open(out_file,'w')
- skipped_lines = 0
- first_invalid_line = 0
- invalid_line = None
- if offset == 0:
- makesliding = 0
- for i, line in enumerate( file( inp_file ) ):
- line = line.strip()
- if line and line[0:1] != "#":
- try:
- elems = line.split('\t')
- if strand_col_1 != -1:
- strand = elems[strand_col_1]
- start = int(elems[start_col_1])
- end = int(elems[end_col_1])
- if makesliding == 0:
- numwin = (end - start)/winsize
- else:
- numwin = (end - start)/offset
- if numwin > 0:
- for win in range(numwin):
- elems_1 = elems
- elems_1[start_col_1] = str(start)
- elems_1[end_col_1] = str(start + winsize)
- fo.write( "%s\n" % '\t'.join( elems_1 ) )
- if makesliding == 0:
- start = start + winsize
- else:
- start = start + offset
- if start+winsize > end:
- break
- except:
- skipped_lines += 1
- if not invalid_line:
- first_invalid_line = i + 1
- invalid_line = line
-
- fo.close()
- if makesliding == 1:
- print 'Window size=%d, Sliding=Yes, Offset=%d' %(winsize, offset)
- else:
- print 'Window size=%d, Sliding=No' %(winsize)
- if skipped_lines > 0:
- print 'Skipped %d invalid lines starting with #%d: "%s"' % ( skipped_lines, first_invalid_line, invalid_line )
-
- if __name__ == "__main__":
- main()