PageRenderTime 36ms CodeModel.GetById 25ms app.highlight 8ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/galaxy/datatypes/indexers/wiggle.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 74 lines | 53 code | 12 blank | 9 comment | 13 complexity | 571fb9b2c579891ea91f8b2d186c7647 MD5 | raw file
 1#!/usr/bin/env python
 2
 3"""
 4Read a chromosome of wiggle data, and write it as a npy array, as 
 5well as averages over regions of progressively larger size in powers of 10
 6"""
 7
 8from __future__ import division
 9
10import sys
11from galaxy import eggs
12import pkg_resources; pkg_resources.require( "bx-python" )
13import bx.wiggle
14from bx.cookbook import doc_optparse
15from bx import misc
16max2 = max
17pkg_resources.require("numpy>=1.2.1")
18from numpy import *
19import tempfile
20import os
21from galaxy.tracks.store import sanitize_name
22
23
24def write_chrom(max, out_base, instream):
25   
26    scores = zeros( max, float32 ) * nan
27    # Fill array from wiggle
28    for line in instream:
29        line = line.rstrip("\n\r")
30        (chrom, pos, val) = line.split("\t")
31        pos, val = int(pos), float(val)
32        scores[pos] = val
33    
34    # Write ra
35    fname = "%s_%d" % ( out_base, 1 )
36    save( fname, scores )
37    os.rename( fname+".npy", fname )
38
39    # Write average
40    for window in 10, 100, 1000, 10000, 100000:
41        input = scores.copy()
42        size = len( input )
43        input.resize( ( ( size / window ), window ) )
44        masked = ma.masked_array( input, isnan( input ) )
45        averaged = mean( masked, 1 )
46        averaged.set_fill_value( nan )
47        fname = "%s_%d" % ( out_base, window )
48        save( fname, averaged.filled() )
49        del masked, averaged
50        os.rename( fname+".npy", fname )
51
52def main():
53    max = int( 512*1024*1024 )
54    # get chroms and lengths
55    chroms = {}
56    LEN = {}
57    for (chrom, pos, val) in bx.wiggle.Reader( open(sys.argv[1],"r") ):
58        chrom_file = chroms.get(chrom, None)
59        if not chrom_file:
60            chrom_file = chroms[chrom] =  tempfile.NamedTemporaryFile()
61        chrom_file.write("%s\t%s\t%s\n" % (chrom,pos,val))
62        LEN[chrom] = max2( LEN.get(chrom,0), pos+1 )
63    for chrom, stream in chroms.items():
64        stream.seek(0)
65        prefix = os.path.join(sys.argv[2], sanitize_name(chrom))
66        write_chrom( LEN[chrom], prefix, stream )
67
68    manifest_file = open( os.path.join( sys.argv[2], "manifest.tab" ),"w" )
69    for key, value in LEN.items():
70        print >> manifest_file, "%s\t%s" % (key, value)
71    manifest_file.close()
72    
73    
74if __name__ == "__main__": main()