PageRenderTime 11ms CodeModel.GetById 1ms app.highlight 7ms RepoModel.GetById 2ms app.codeStats 0ms

/lib/galaxy/datatypes/indexers/interval.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 66 lines | 58 code | 1 blank | 7 comment | 0 complexity | f4dc977baf9343cfe0775c95413d2cc7 MD5 | raw file
 1#!/usr/bin/env python
 2"""
 3Generate indices for track browsing of an interval file.
 4
 5usage: %prog bed_file out_directory
 6    -1, --cols1=N,N,N,N: Columns for chrom, start, end, strand in interval file
 7"""
 8import sys
 9from galaxy import eggs
10import pkg_resources; pkg_resources.require( "bx-python" )
11from bx.intervals import io
12from bx.cookbook import doc_optparse
13import psyco_full
14import commands
15import os
16from os import environ
17import tempfile
18from bisect import bisect
19
20def divide( intervals, out_path ):
21    manifest = {}
22    current_file = None
23    lastchrom = ""
24    for line in intervals:
25        try:
26            chrom = line.chrom
27        except AttributeError, e:
28            continue
29        manifest[chrom] = max(manifest.get(chrom,0),line.end)
30        if not lastchrom == chrom:
31            if current_file:
32                current_file.close()
33            current_file = open( os.path.join( out_path, "%s" % chrom), "a" )
34        print >> current_file, "\t".join(line)
35        lastchrom = chrom
36    if current_file:
37        current_file.close()
38    return manifest
39
40if __name__ == "__main__":
41    options, args = doc_optparse.parse( __doc__ )
42    try:
43        chr_col_1, start_col_1, end_col_1, strand_col_1 = [int(x)-1 for x in options.cols1.split(',')]
44        in_fname, out_path = args
45    except:
46        doc_optparse.exception()
47
48    # Sort through a tempfile first
49    temp_file = tempfile.NamedTemporaryFile(mode="r")
50    environ['LC_ALL'] = 'POSIX'
51    commandline = "sort -f -n -k %d -k %d -k %d -o %s %s" % (chr_col_1+1,start_col_1+1,end_col_1+1, temp_file.name, in_fname)
52    errorcode, stdout = commands.getstatusoutput(commandline)
53
54    temp_file.seek(0)
55    interval = io.NiceReaderWrapper( temp_file, 
56                                     chrom_col=chr_col_1,
57                                     start_col=start_col_1,
58                                     end_col=end_col_1,
59                                     strand_col=strand_col_1,
60                                     fix_strand=True )
61    manifest = divide( interval, out_path )
62    manifest_file = open( os.path.join( out_path, "manifest.tab" ),"w" )
63    for key, value in manifest.items():
64        print >> manifest_file, "%s\t%s" % (key, value)
65    manifest_file.close()
66    temp_file.close()