PageRenderTime 45ms CodeModel.GetById 19ms app.highlight 20ms RepoModel.GetById 1ms app.codeStats 0ms

/tools/regVariation/windowSplitter.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 85 lines | 65 code | 12 blank | 8 comment | 18 complexity | 3fd683ad88b4fd3eac0b15149177a23c MD5 | raw file
 1#!/usr/bin/env python
 2
 3"""
 4Split into windows.
 5
 6usage: %prog input size out_file
 7   -l, --cols=N,N,N,N: Columns for chrom, start, end, strand in file
 8"""
 9
10import sys, re, os
11
12from galaxy import eggs
13import pkg_resources; pkg_resources.require( "bx-python" )
14from bx.cookbook import doc_optparse
15from galaxy.tools.util.galaxyops import *
16
17def stop_err( msg ):
18    sys.stderr.write( msg )
19    sys.exit()
20
21def main():   
22    # Parsing Command Line here
23    options, args = doc_optparse.parse( __doc__ )
24    
25    try:
26        chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols )
27        inp_file, winsize, out_file, makesliding, offset = args
28        winsize = int(winsize)
29        offset = int(offset)
30        makesliding = int(makesliding)
31        if strand_col_1 <= 0:
32            strand = "+"        #if strand is not defined, default it to +
33    except:
34        stop_err( "Data issue, click the pencil icon in the history item to correct the metadata attributes of the input dataset." )
35    
36    fo = open(out_file,'w')
37
38    skipped_lines = 0
39    first_invalid_line = 0
40    invalid_line = None
41    if offset == 0:
42        makesliding = 0
43
44    for i, line in enumerate( file( inp_file ) ):
45        line = line.strip()
46        if line and line[0:1] != "#":
47            try:
48                elems = line.split('\t')
49                if strand_col_1 != -1:
50                    strand = elems[strand_col_1]
51                start = int(elems[start_col_1])
52                end = int(elems[end_col_1])
53                if makesliding == 0:
54                    numwin = (end - start)/winsize
55                else:
56                    numwin = (end - start)/offset
57                if numwin > 0:
58                    for win in range(numwin):
59                        elems_1 = elems
60                        elems_1[start_col_1] = str(start)
61                        elems_1[end_col_1] = str(start + winsize)
62                        fo.write( "%s\n" % '\t'.join( elems_1 ) )
63                        if makesliding == 0:
64                            start = start + winsize
65                        else:
66                            start = start + offset
67                            if start+winsize > end:
68                                break
69            except:
70                skipped_lines += 1
71                if not invalid_line:
72                    first_invalid_line = i + 1
73                    invalid_line = line
74    
75    fo.close()
76
77    if makesliding == 1:                
78        print 'Window size=%d, Sliding=Yes, Offset=%d' %(winsize, offset)
79    else:
80        print 'Window size=%d, Sliding=No' %(winsize)
81    if skipped_lines > 0:
82        print 'Skipped %d invalid lines starting with #%d: "%s"' % ( skipped_lines, first_invalid_line, invalid_line )             
83    
84if __name__ == "__main__":
85    main()