/lib/galaxy/tools/data/__init__.py

https://bitbucket.org/h_morita_dbcls/galaxy-central · Python · 131 lines · 97 code · 14 blank · 20 comment · 5 complexity · 3923f738e1e920047c163daf6caa775c MD5 · raw file

  1. """
  2. Manage tool data tables, which store (at the application level) data that is
  3. used by tools, for example in the generation of dynamic options. Tables are
  4. loaded and stored by names which tools use to refer to them. This allows
  5. users to configure data tables for a local Galaxy instance without needing
  6. to modify the tool configurations.
  7. """
  8. import logging, sys, os.path
  9. from galaxy import util
  10. log = logging.getLogger( __name__ )
  11. class ToolDataTableManager( object ):
  12. """
  13. Manages a collection of tool data tables
  14. """
  15. def __init__( self, config_filename=None ):
  16. self.data_tables = {}
  17. if config_filename:
  18. self.add_from_config_file( config_filename )
  19. def __getitem__( self, key ):
  20. return self.data_tables.__getitem__( key )
  21. def __contains__( self, key ):
  22. return self.data_tables.__contains__( key )
  23. def add_from_config_file( self, config_filename ):
  24. tree = util.parse_xml( config_filename )
  25. root = tree.getroot()
  26. for table_elem in root.findall( 'table' ):
  27. type = table_elem.get( 'type', 'tabular' )
  28. assert type in tool_data_table_types, "Unknown data table type '%s'" % type
  29. table = tool_data_table_types[ type ]( table_elem )
  30. self.data_tables[ table.name ] = table
  31. log.debug( "Loaded tool data table '%s", table.name )
  32. class ToolDataTable( object ):
  33. def __init__( self, config_element ):
  34. self.name = config_element.get( 'name' )
  35. class TabularToolDataTable( ToolDataTable ):
  36. """
  37. Data stored in a tabular / separated value format on disk, allows multiple
  38. files to be merged but all must have the same column definitions.
  39. <table type="tabular" name="test">
  40. <column name='...' index = '...' />
  41. <file path="..." />
  42. <file path="..." />
  43. </table>
  44. """
  45. type_key = 'tabular'
  46. def __init__( self, config_element ):
  47. super( TabularToolDataTable, self ).__init__( config_element )
  48. self.configure_and_load( config_element )
  49. def configure_and_load( self, config_element ):
  50. """
  51. Configure and load table from an XML element.
  52. """
  53. self.separator = config_element.get( 'separator', '\t' )
  54. self.comment_char = config_element.get( 'comment_char', '#' )
  55. # Configure columns
  56. self.parse_column_spec( config_element )
  57. # Read every file
  58. all_rows = []
  59. for file_element in config_element.findall( 'file' ):
  60. filename = file_element.get( 'path' )
  61. if not os.path.exists( filename ):
  62. log.warn( "Cannot find index file '%s' for tool data table '%s'" % ( filename, self.name ) )
  63. else:
  64. all_rows.extend( self.parse_file_fields( open( filename ) ) )
  65. self.data = all_rows
  66. def get_fields( self ):
  67. return self.data
  68. def parse_column_spec( self, config_element ):
  69. """
  70. Parse column definitions, which can either be a set of 'column' elements
  71. with a name and index (as in dynamic options config), or a shorthand
  72. comma separated list of names in order as the text of a 'column_names'
  73. element.
  74. A column named 'value' is required.
  75. """
  76. self.columns = {}
  77. if config_element.find( 'columns' ) is not None:
  78. column_names = util.xml_text( config_element.find( 'columns' ) )
  79. column_names = [ n.strip() for n in column_names.split( ',' ) ]
  80. for index, name in enumerate( column_names ):
  81. self.columns[ name ] = index
  82. self.largest_index = index
  83. else:
  84. for column_elem in config_element.findall( 'column' ):
  85. name = column_elem.get( 'name', None )
  86. assert name is not None, "Required 'name' attribute missing from column def"
  87. index = column_elem.get( 'index', None )
  88. assert index is not None, "Required 'index' attribute missing from column def"
  89. index = int( index )
  90. self.columns[name] = index
  91. if index > self.largest_index:
  92. self.largest_index = index
  93. assert 'value' in self.columns, "Required 'value' column missing from column def"
  94. if 'name' not in self.columns:
  95. self.columns['name'] = self.columns['value']
  96. def parse_file_fields( self, reader ):
  97. """
  98. Parse separated lines from file and return a list of tuples.
  99. TODO: Allow named access to fields using the column names.
  100. """
  101. rval = []
  102. for line in reader:
  103. if line.lstrip().startswith( self.comment_char ):
  104. continue
  105. line = line.rstrip( "\n\r" )
  106. if line:
  107. fields = line.split( self.separator )
  108. if self.largest_index < len( fields ):
  109. rval.append( fields )
  110. return rval
  111. # Registry of tool data types by type_key
  112. tool_data_table_types = dict( [ ( cls.type_key, cls ) for cls in [ TabularToolDataTable ] ] )