PageRenderTime 25ms CodeModel.GetById 10ms app.highlight 12ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/galaxy/visualization/tracks/summary.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 117 lines | 75 code | 20 blank | 22 comment | 31 complexity | 07722c24791f09228677bdfb86fe40f2 MD5 | raw file
  1'''
  2This module cannot be moved due to the use of pickling.
  3'''
  4
  5import sys, os
  6import cPickle
  7
  8# TODO: What are the performance implications of setting min level to 1? Data
  9# structure size and/or query speed? It would be nice to have level 1 data
 10# so that client does not have to compute it.
 11MIN_LEVEL = 2
 12
 13class SummaryTree:
 14    '''
 15    Summary tree data structure for feature aggregation across large genomic regions.
 16    '''
 17    def __init__( self, block_size=25, levels=6, draw_cutoff=150, detail_cutoff=30 ):
 18        self.chrom_blocks = {}
 19        self.levels = levels
 20        self.draw_cutoff = draw_cutoff
 21        self.detail_cutoff = detail_cutoff
 22        self.block_size = block_size
 23        self.chrom_stats = {}
 24    
 25    def find_block( self, num, level ):
 26        """ Returns block that num is in for level. """
 27        return ( num / self.block_size ** level )
 28        
 29    def insert_range( self, chrom, start, end ):
 30        """ Inserts a feature at chrom:start-end into the tree. """
 31        
 32        # Get or set up chrom blocks.
 33        if chrom in self.chrom_blocks:
 34            blocks = self.chrom_blocks[ chrom ]
 35        else:
 36            blocks = self.chrom_blocks[ chrom ] = {}
 37            self.chrom_stats[ chrom ] = {}
 38            for level in range( MIN_LEVEL, self.levels + 1 ):
 39                blocks[ level ] = {}
 40            
 41        # Insert feature into all matching blocks at all levels.
 42        for level in range( MIN_LEVEL, self.levels + 1 ):
 43            block_level = blocks[ level ]
 44            starting_block = self.find_block( start, level )
 45            ending_block = self.find_block( end, level )
 46            for block in range( starting_block, ending_block + 1 ):
 47                if block in block_level:
 48                    block_level[ block ] += 1
 49                else:
 50                    block_level[ block ] = 1
 51        
 52    def finish( self ):
 53        """ Compute stats for levels. """
 54        
 55        for chrom, blocks in self.chrom_blocks.iteritems():
 56            for level in range( self.levels, MIN_LEVEL - 1, -1 ):
 57                # Set level's stats.
 58                max_val = max( blocks[ level ].values() )
 59                self.chrom_stats[ chrom ][ level ] = {}
 60                self.chrom_stats[ chrom ][ level ][ "delta" ] = self.block_size ** level
 61                self.chrom_stats[ chrom ][ level ][ "max" ] = max_val
 62                self.chrom_stats[ chrom ][ level ][ "avg" ] = float( max_val ) / len( blocks[ level ] )
 63            
 64            self.chrom_blocks[ chrom ] = dict( [ ( key, value ) for key, value in blocks.iteritems() ] )
 65        
 66    def query( self, chrom, start, end, level, draw_cutoff=None, detail_cutoff=None ):
 67        """ Queries tree for data. """
 68
 69        # Set cutoffs to self's attributes if not defined.
 70        if draw_cutoff != 0:
 71            draw_cutoff = self.draw_cutoff
 72        if detail_cutoff != 0:
 73            detail_cutoff = self.detail_cutoff
 74
 75        # Get data.
 76        if chrom in self.chrom_blocks:
 77            stats = self.chrom_stats[ chrom ]
 78
 79            # For backwards compatibility:
 80            if "detail_level" in stats and level <= stats[ "detail_level" ]:
 81                return "detail"
 82            elif "draw_level" in stats and level <= stats[ "draw_level" ]:
 83                return "draw"
 84
 85            # If below draw, detail level, return string to denote this.
 86            max = stats[ level ][ "max" ]
 87            if max < detail_cutoff:
 88                return "detail"
 89            if max < draw_cutoff:
 90                return "draw"
 91            
 92            # Return block data.
 93            blocks = self.chrom_blocks[ chrom ]
 94            results = []
 95            multiplier = self.block_size ** level
 96            starting_block = self.find_block( start, level )
 97            ending_block = self.find_block( end, level )
 98            for block in range( starting_block, ending_block + 1 ):
 99                val = 0
100                if block in blocks[ level ]:
101                    val = blocks[ level ][ block ]
102                results.append(  ( block * multiplier, val )  )
103            return results
104            
105        return None
106        
107    def write( self, filename ):
108        """ Writes tree to file. """
109        self.finish()
110        cPickle.dump( self, open( filename, 'wb' ), 2 )
111        
112def summary_tree_from_file( filename ):
113    st_file = open( filename, "rb" )
114    st = cPickle.load( st_file )
115    st_file.close()
116    return st
117