/lib/galaxy/visualization/tracks/summary.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 117 lines · 75 code · 20 blank · 22 comment · 21 complexity · 07722c24791f09228677bdfb86fe40f2 MD5 · raw file

  1. '''
  2. This module cannot be moved due to the use of pickling.
  3. '''
  4. import sys, os
  5. import cPickle
  6. # TODO: What are the performance implications of setting min level to 1? Data
  7. # structure size and/or query speed? It would be nice to have level 1 data
  8. # so that client does not have to compute it.
  9. MIN_LEVEL = 2
  10. class SummaryTree:
  11. '''
  12. Summary tree data structure for feature aggregation across large genomic regions.
  13. '''
  14. def __init__( self, block_size=25, levels=6, draw_cutoff=150, detail_cutoff=30 ):
  15. self.chrom_blocks = {}
  16. self.levels = levels
  17. self.draw_cutoff = draw_cutoff
  18. self.detail_cutoff = detail_cutoff
  19. self.block_size = block_size
  20. self.chrom_stats = {}
  21. def find_block( self, num, level ):
  22. """ Returns block that num is in for level. """
  23. return ( num / self.block_size ** level )
  24. def insert_range( self, chrom, start, end ):
  25. """ Inserts a feature at chrom:start-end into the tree. """
  26. # Get or set up chrom blocks.
  27. if chrom in self.chrom_blocks:
  28. blocks = self.chrom_blocks[ chrom ]
  29. else:
  30. blocks = self.chrom_blocks[ chrom ] = {}
  31. self.chrom_stats[ chrom ] = {}
  32. for level in range( MIN_LEVEL, self.levels + 1 ):
  33. blocks[ level ] = {}
  34. # Insert feature into all matching blocks at all levels.
  35. for level in range( MIN_LEVEL, self.levels + 1 ):
  36. block_level = blocks[ level ]
  37. starting_block = self.find_block( start, level )
  38. ending_block = self.find_block( end, level )
  39. for block in range( starting_block, ending_block + 1 ):
  40. if block in block_level:
  41. block_level[ block ] += 1
  42. else:
  43. block_level[ block ] = 1
  44. def finish( self ):
  45. """ Compute stats for levels. """
  46. for chrom, blocks in self.chrom_blocks.iteritems():
  47. for level in range( self.levels, MIN_LEVEL - 1, -1 ):
  48. # Set level's stats.
  49. max_val = max( blocks[ level ].values() )
  50. self.chrom_stats[ chrom ][ level ] = {}
  51. self.chrom_stats[ chrom ][ level ][ "delta" ] = self.block_size ** level
  52. self.chrom_stats[ chrom ][ level ][ "max" ] = max_val
  53. self.chrom_stats[ chrom ][ level ][ "avg" ] = float( max_val ) / len( blocks[ level ] )
  54. self.chrom_blocks[ chrom ] = dict( [ ( key, value ) for key, value in blocks.iteritems() ] )
  55. def query( self, chrom, start, end, level, draw_cutoff=None, detail_cutoff=None ):
  56. """ Queries tree for data. """
  57. # Set cutoffs to self's attributes if not defined.
  58. if draw_cutoff != 0:
  59. draw_cutoff = self.draw_cutoff
  60. if detail_cutoff != 0:
  61. detail_cutoff = self.detail_cutoff
  62. # Get data.
  63. if chrom in self.chrom_blocks:
  64. stats = self.chrom_stats[ chrom ]
  65. # For backwards compatibility:
  66. if "detail_level" in stats and level <= stats[ "detail_level" ]:
  67. return "detail"
  68. elif "draw_level" in stats and level <= stats[ "draw_level" ]:
  69. return "draw"
  70. # If below draw, detail level, return string to denote this.
  71. max = stats[ level ][ "max" ]
  72. if max < detail_cutoff:
  73. return "detail"
  74. if max < draw_cutoff:
  75. return "draw"
  76. # Return block data.
  77. blocks = self.chrom_blocks[ chrom ]
  78. results = []
  79. multiplier = self.block_size ** level
  80. starting_block = self.find_block( start, level )
  81. ending_block = self.find_block( end, level )
  82. for block in range( starting_block, ending_block + 1 ):
  83. val = 0
  84. if block in blocks[ level ]:
  85. val = blocks[ level ][ block ]
  86. results.append( ( block * multiplier, val ) )
  87. return results
  88. return None
  89. def write( self, filename ):
  90. """ Writes tree to file. """
  91. self.finish()
  92. cPickle.dump( self, open( filename, 'wb' ), 2 )
  93. def summary_tree_from_file( filename ):
  94. st_file = open( filename, "rb" )
  95. st = cPickle.load( st_file )
  96. st_file.close()
  97. return st