/lib/galaxy/datatypes/indexers/wiggle.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 74 lines · 53 code · 12 blank · 9 comment · 7 complexity · 571fb9b2c579891ea91f8b2d186c7647 MD5 · raw file

  1. #!/usr/bin/env python
  2. """
  3. Read a chromosome of wiggle data, and write it as a npy array, as
  4. well as averages over regions of progressively larger size in powers of 10
  5. """
  6. from __future__ import division
  7. import sys
  8. from galaxy import eggs
  9. import pkg_resources; pkg_resources.require( "bx-python" )
  10. import bx.wiggle
  11. from bx.cookbook import doc_optparse
  12. from bx import misc
  13. max2 = max
  14. pkg_resources.require("numpy>=1.2.1")
  15. from numpy import *
  16. import tempfile
  17. import os
  18. from galaxy.tracks.store import sanitize_name
  19. def write_chrom(max, out_base, instream):
  20. scores = zeros( max, float32 ) * nan
  21. # Fill array from wiggle
  22. for line in instream:
  23. line = line.rstrip("\n\r")
  24. (chrom, pos, val) = line.split("\t")
  25. pos, val = int(pos), float(val)
  26. scores[pos] = val
  27. # Write ra
  28. fname = "%s_%d" % ( out_base, 1 )
  29. save( fname, scores )
  30. os.rename( fname+".npy", fname )
  31. # Write average
  32. for window in 10, 100, 1000, 10000, 100000:
  33. input = scores.copy()
  34. size = len( input )
  35. input.resize( ( ( size / window ), window ) )
  36. masked = ma.masked_array( input, isnan( input ) )
  37. averaged = mean( masked, 1 )
  38. averaged.set_fill_value( nan )
  39. fname = "%s_%d" % ( out_base, window )
  40. save( fname, averaged.filled() )
  41. del masked, averaged
  42. os.rename( fname+".npy", fname )
  43. def main():
  44. max = int( 512*1024*1024 )
  45. # get chroms and lengths
  46. chroms = {}
  47. LEN = {}
  48. for (chrom, pos, val) in bx.wiggle.Reader( open(sys.argv[1],"r") ):
  49. chrom_file = chroms.get(chrom, None)
  50. if not chrom_file:
  51. chrom_file = chroms[chrom] = tempfile.NamedTemporaryFile()
  52. chrom_file.write("%s\t%s\t%s\n" % (chrom,pos,val))
  53. LEN[chrom] = max2( LEN.get(chrom,0), pos+1 )
  54. for chrom, stream in chroms.items():
  55. stream.seek(0)
  56. prefix = os.path.join(sys.argv[2], sanitize_name(chrom))
  57. write_chrom( LEN[chrom], prefix, stream )
  58. manifest_file = open( os.path.join( sys.argv[2], "manifest.tab" ),"w" )
  59. for key, value in LEN.items():
  60. print >> manifest_file, "%s\t%s" % (key, value)
  61. manifest_file.close()
  62. if __name__ == "__main__": main()