PageRenderTime 68ms CodeModel.GetById 48ms app.highlight 11ms RepoModel.GetById 6ms app.codeStats 0ms

/lib/galaxy/tools/data/__init__.py

https://bitbucket.org/h_morita_dbcls/galaxy-central
Python | 131 lines | 108 code | 9 blank | 14 comment | 5 complexity | 3923f738e1e920047c163daf6caa775c MD5 | raw file
  1"""
  2Manage tool data tables, which store (at the application level) data that is
  3used by tools, for example in the generation of dynamic options. Tables are
  4loaded and stored by names which tools use to refer to them. This allows
  5users to configure data tables for a local Galaxy instance without needing
  6to modify the tool configurations. 
  7"""
  8
  9import logging, sys, os.path
 10from galaxy import util
 11
 12log = logging.getLogger( __name__ )
 13
 14class ToolDataTableManager( object ):
 15    """
 16    Manages a collection of tool data tables
 17    """
 18    
 19    def __init__( self, config_filename=None ):
 20        self.data_tables = {}
 21        if config_filename:
 22            self.add_from_config_file( config_filename )
 23        
 24    def __getitem__( self, key ):
 25        return self.data_tables.__getitem__( key )
 26        
 27    def __contains__( self, key ):
 28        return self.data_tables.__contains__( key )
 29        
 30    def add_from_config_file( self, config_filename ):
 31        tree = util.parse_xml( config_filename )
 32        root = tree.getroot()
 33        for table_elem in root.findall( 'table' ):
 34            type = table_elem.get( 'type', 'tabular' )
 35            assert type in tool_data_table_types, "Unknown data table type '%s'" % type
 36            table = tool_data_table_types[ type ]( table_elem )
 37            self.data_tables[ table.name ] = table
 38            log.debug( "Loaded tool data table '%s", table.name )
 39    
 40class ToolDataTable( object ):
 41    def __init__( self, config_element ):
 42        self.name = config_element.get( 'name' )
 43    
 44class TabularToolDataTable( ToolDataTable ):
 45    """
 46    Data stored in a tabular / separated value format on disk, allows multiple
 47    files to be merged but all must have the same column definitions.
 48    
 49    <table type="tabular" name="test">
 50        <column name='...' index = '...' />
 51        <file path="..." />
 52        <file path="..." />
 53    </table>
 54    """
 55    
 56    type_key = 'tabular'
 57    
 58    def __init__( self, config_element ):
 59        super( TabularToolDataTable, self ).__init__( config_element )
 60        self.configure_and_load( config_element )
 61    
 62    def configure_and_load( self, config_element ):
 63        """
 64        Configure and load table from an XML element.
 65        """
 66        self.separator = config_element.get( 'separator', '\t' )
 67        self.comment_char = config_element.get( 'comment_char', '#' )
 68        # Configure columns
 69        self.parse_column_spec( config_element )
 70        # Read every file
 71        all_rows = []
 72        for file_element in config_element.findall( 'file' ):
 73            filename = file_element.get( 'path' )
 74            if not os.path.exists( filename ): 
 75                log.warn( "Cannot find index file '%s' for tool data table '%s'" % ( filename, self.name ) )
 76            else:
 77                all_rows.extend( self.parse_file_fields( open( filename ) ) )
 78        self.data = all_rows
 79        
 80    def get_fields( self ):
 81        return self.data
 82            
 83    def parse_column_spec( self, config_element ):
 84        """
 85        Parse column definitions, which can either be a set of 'column' elements
 86        with a name and index (as in dynamic options config), or a shorthand
 87        comma separated list of names in order as the text of a 'column_names'
 88        element.
 89        
 90        A column named 'value' is required. 
 91        """
 92        self.columns = {}
 93        if config_element.find( 'columns' ) is not None:
 94            column_names = util.xml_text( config_element.find( 'columns' ) )
 95            column_names = [ n.strip() for n in column_names.split( ',' ) ]
 96            for index, name in enumerate( column_names ):
 97                self.columns[ name ] = index
 98                self.largest_index = index
 99        else:
100            for column_elem in config_element.findall( 'column' ):
101                name = column_elem.get( 'name', None )
102                assert name is not None, "Required 'name' attribute missing from column def"
103                index = column_elem.get( 'index', None )
104                assert index is not None, "Required 'index' attribute missing from column def"
105                index = int( index )
106                self.columns[name] = index
107                if index > self.largest_index:
108                    self.largest_index = index
109        assert 'value' in self.columns, "Required 'value' column missing from column def"
110        if 'name' not in self.columns:
111            self.columns['name'] = self.columns['value']
112        
113    def parse_file_fields( self, reader ):
114        """
115        Parse separated lines from file and return a list of tuples.
116        
117        TODO: Allow named access to fields using the column names.
118        """
119        rval = []
120        for line in reader:
121            if line.lstrip().startswith( self.comment_char ):
122                continue
123            line = line.rstrip( "\n\r" )
124            if line:
125                fields = line.split( self.separator )
126                if self.largest_index < len( fields ):
127                    rval.append( fields )
128        return rval        
129
130# Registry of tool data types by type_key
131tool_data_table_types = dict( [ ( cls.type_key, cls ) for cls in [ TabularToolDataTable ] ] )