PageRenderTime 7ms CodeModel.GetById 1ms app.highlight 4ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/galaxy/datatypes/converters/interval_to_interval_index_converter.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 52 lines | 30 code | 10 blank | 12 comment | 7 complexity | ae272bcc99b71004087c87b287623d89 MD5 | raw file
 1#!/usr/bin/env python
 2
 3"""
 4Convert from interval file to interval index file.
 5
 6usage: %prog <options> in_file out_file
 7    -c, --chr-col: chromosome column, default=1
 8    -s, --start-col: start column, default=2
 9    -e, --end-col: end column, default=3
10"""
11
12from __future__ import division
13
14import optparse
15from galaxy import eggs
16eggs.require( "bx-python" )
17from bx.interval_index_file import Indexes
18
19def main():
20
21    # Read options, args.
22    parser = optparse.OptionParser()
23    parser.add_option( '-c', '--chr-col', type='int', dest='chrom_col', default=1 )
24    parser.add_option( '-s', '--start-col', type='int', dest='start_col', default=2 )
25    parser.add_option( '-e', '--end-col', type='int', dest='end_col', default=3 )
26    (options, args) = parser.parse_args()
27    input_fname, output_fname = args
28
29    # Make column indices 0-based.
30    options.chrom_col -= 1
31    options.start_col -= 1
32    options.end_col -= 1
33
34    # Do conversion.
35    index = Indexes()
36    offset = 0
37    for line in open(input_fname, "r"):
38        feature = line.strip().split()
39        if not feature or feature[0].startswith("track") or feature[0].startswith("#"):
40            offset += len(line)
41            continue
42        chrom = feature[ options.chrom_col ]
43        chrom_start = int( feature[ options.start_col ] )
44        chrom_end = int( feature[ options.end_col ] )
45        index.add( chrom, chrom_start, chrom_end, offset )
46        offset += len(line)
47
48    index.write( open(output_fname, "w") )
49
50if __name__ == "__main__":
51    main()
52