/lib/galaxy/tools/data/__init__.py
Python | 131 lines | 108 code | 9 blank | 14 comment | 5 complexity | 3923f738e1e920047c163daf6caa775c MD5 | raw file
1""" 2Manage tool data tables, which store (at the application level) data that is 3used by tools, for example in the generation of dynamic options. Tables are 4loaded and stored by names which tools use to refer to them. This allows 5users to configure data tables for a local Galaxy instance without needing 6to modify the tool configurations. 7""" 8 9import logging, sys, os.path 10from galaxy import util 11 12log = logging.getLogger( __name__ ) 13 14class ToolDataTableManager( object ): 15 """ 16 Manages a collection of tool data tables 17 """ 18 19 def __init__( self, config_filename=None ): 20 self.data_tables = {} 21 if config_filename: 22 self.add_from_config_file( config_filename ) 23 24 def __getitem__( self, key ): 25 return self.data_tables.__getitem__( key ) 26 27 def __contains__( self, key ): 28 return self.data_tables.__contains__( key ) 29 30 def add_from_config_file( self, config_filename ): 31 tree = util.parse_xml( config_filename ) 32 root = tree.getroot() 33 for table_elem in root.findall( 'table' ): 34 type = table_elem.get( 'type', 'tabular' ) 35 assert type in tool_data_table_types, "Unknown data table type '%s'" % type 36 table = tool_data_table_types[ type ]( table_elem ) 37 self.data_tables[ table.name ] = table 38 log.debug( "Loaded tool data table '%s", table.name ) 39 40class ToolDataTable( object ): 41 def __init__( self, config_element ): 42 self.name = config_element.get( 'name' ) 43 44class TabularToolDataTable( ToolDataTable ): 45 """ 46 Data stored in a tabular / separated value format on disk, allows multiple 47 files to be merged but all must have the same column definitions. 48 49 <table type="tabular" name="test"> 50 <column name='...' index = '...' /> 51 <file path="..." /> 52 <file path="..." /> 53 </table> 54 """ 55 56 type_key = 'tabular' 57 58 def __init__( self, config_element ): 59 super( TabularToolDataTable, self ).__init__( config_element ) 60 self.configure_and_load( config_element ) 61 62 def configure_and_load( self, config_element ): 63 """ 64 Configure and load table from an XML element. 65 """ 66 self.separator = config_element.get( 'separator', '\t' ) 67 self.comment_char = config_element.get( 'comment_char', '#' ) 68 # Configure columns 69 self.parse_column_spec( config_element ) 70 # Read every file 71 all_rows = [] 72 for file_element in config_element.findall( 'file' ): 73 filename = file_element.get( 'path' ) 74 if not os.path.exists( filename ): 75 log.warn( "Cannot find index file '%s' for tool data table '%s'" % ( filename, self.name ) ) 76 else: 77 all_rows.extend( self.parse_file_fields( open( filename ) ) ) 78 self.data = all_rows 79 80 def get_fields( self ): 81 return self.data 82 83 def parse_column_spec( self, config_element ): 84 """ 85 Parse column definitions, which can either be a set of 'column' elements 86 with a name and index (as in dynamic options config), or a shorthand 87 comma separated list of names in order as the text of a 'column_names' 88 element. 89 90 A column named 'value' is required. 91 """ 92 self.columns = {} 93 if config_element.find( 'columns' ) is not None: 94 column_names = util.xml_text( config_element.find( 'columns' ) ) 95 column_names = [ n.strip() for n in column_names.split( ',' ) ] 96 for index, name in enumerate( column_names ): 97 self.columns[ name ] = index 98 self.largest_index = index 99 else: 100 for column_elem in config_element.findall( 'column' ): 101 name = column_elem.get( 'name', None ) 102 assert name is not None, "Required 'name' attribute missing from column def" 103 index = column_elem.get( 'index', None ) 104 assert index is not None, "Required 'index' attribute missing from column def" 105 index = int( index ) 106 self.columns[name] = index 107 if index > self.largest_index: 108 self.largest_index = index 109 assert 'value' in self.columns, "Required 'value' column missing from column def" 110 if 'name' not in self.columns: 111 self.columns['name'] = self.columns['value'] 112 113 def parse_file_fields( self, reader ): 114 """ 115 Parse separated lines from file and return a list of tuples. 116 117 TODO: Allow named access to fields using the column names. 118 """ 119 rval = [] 120 for line in reader: 121 if line.lstrip().startswith( self.comment_char ): 122 continue 123 line = line.rstrip( "\n\r" ) 124 if line: 125 fields = line.split( self.separator ) 126 if self.largest_index < len( fields ): 127 rval.append( fields ) 128 return rval 129 130# Registry of tool data types by type_key 131tool_data_table_types = dict( [ ( cls.type_key, cls ) for cls in [ TabularToolDataTable ] ] )