PageRenderTime 55ms CodeModel.GetById 11ms app.highlight 38ms RepoModel.GetById 1ms app.codeStats 0ms

/lib/galaxy/tools/parameters/dynamic_options.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 605 lines | 562 code | 8 blank | 35 comment | 25 complexity | 0854e00720dc6a111da92df783a1f067 MD5 | raw file
  1"""
  2Support for generating the options for a SelectToolParameter dynamically (based
  3on the values of other parameters or other aspects of the current state)
  4"""
  5
  6import operator, sys, os, logging
  7import basic, validation
  8from galaxy.util import string_as_bool
  9from galaxy.model import User
 10import galaxy.tools
 11
 12log = logging.getLogger(__name__)
 13
 14class Filter( object ):
 15    """
 16    A filter takes the current options list and modifies it.
 17    """
 18    @classmethod
 19    def from_element( cls, d_option, elem ):
 20        """Loads the proper filter by the type attribute of elem"""
 21        type = elem.get( 'type', None )
 22        assert type is not None, "Required 'type' attribute missing from filter"
 23        return filter_types[type.strip()]( d_option, elem )
 24    def __init__( self, d_option, elem ):
 25        self.dynamic_option = d_option
 26        self.elem = elem
 27    def get_dependency_name( self ):
 28        """Returns the name of any depedencies, otherwise None"""
 29        return None
 30    def filter_options( self, options, trans, other_values ):
 31        """Returns a list of options after the filter is applied"""
 32        raise TypeError( "Abstract Method" )
 33
 34class StaticValueFilter( Filter ):
 35    """
 36    Filters a list of options on a column by a static value.
 37
 38    Type: static_value
 39
 40    Required Attributes:
 41        value: static value to compare to
 42        column: column in options to compare with
 43    Optional Attributes:
 44        keep: Keep columns matching value (True)
 45              Discard columns matching value (False)
 46    """
 47    def __init__( self, d_option, elem ):
 48        Filter.__init__( self, d_option, elem )
 49        self.value = elem.get( "value", None )
 50        assert self.value is not None, "Required 'value' attribute missing from filter"
 51        column = elem.get( "column", None )
 52        assert column is not None, "Required 'column' attribute missing from filter, when loading from file"
 53        self.column = d_option.column_spec_to_index( column )
 54        self.keep = string_as_bool( elem.get( "keep", 'True' ) )
 55    def filter_options( self, options, trans, other_values ):
 56        rval = []
 57        filter_value = self.value
 58        try:
 59            filter_value = User.expand_user_properties( trans.user, filter_value)
 60        except:
 61            pass
 62        for fields in options:
 63            if ( self.keep and fields[self.column] == filter_value ) or ( not self.keep and fields[self.column] != filter_value ):
 64                rval.append( fields )
 65        return rval
 66
 67class DataMetaFilter( Filter ):
 68    """
 69    Filters a list of options on a column by a dataset metadata value.
 70
 71    Type: data_meta
 72
 73    When no 'from' source has been specified in the <options> tag, this will populate the options list with (meta_value, meta_value, False).
 74    Otherwise, options which do not match the metadata value in the column are discarded.
 75
 76    Required Attributes:
 77
 78        - ref: Name of input dataset
 79        - key: Metadata key to use for comparison
 80        - column: column in options to compare with (not required when not associated with input options)
 81
 82    Optional Attributes:
 83
 84        - multiple: Option values are multiple, split column by separator (True)
 85        - separator: When multiple split by this (,)
 86
 87    """
 88    def __init__( self, d_option, elem ):
 89        Filter.__init__( self, d_option, elem )
 90        self.ref_name = elem.get( "ref", None )
 91        assert self.ref_name is not None, "Required 'ref' attribute missing from filter"
 92        d_option.has_dataset_dependencies = True
 93        self.key = elem.get( "key", None )
 94        assert self.key is not None, "Required 'key' attribute missing from filter"
 95        self.column = elem.get( "column", None )
 96        if self.column is None:
 97            assert self.dynamic_option.file_fields is None and self.dynamic_option.dataset_ref_name is None, "Required 'column' attribute missing from filter, when loading from file"
 98        else:
 99            self.column = d_option.column_spec_to_index( self.column )
100        self.multiple = string_as_bool( elem.get( "multiple", "False" ) )
101        self.separator = elem.get( "separator", "," )
102    def get_dependency_name( self ):
103        return self.ref_name
104    def filter_options( self, options, trans, other_values ):
105        def compare_meta_value( file_value, dataset_value ):
106            if isinstance( dataset_value, list ):
107                if self.multiple:
108                    file_value = file_value.split( self.separator )
109                    for value in dataset_value:
110                        if value not in file_value:
111                            return False
112                    return True
113                return file_value in dataset_value
114            if self.multiple:
115                return dataset_value in file_value.split( self.separator )
116            return file_value == dataset_value
117        assert self.ref_name in other_values or ( trans is not None and trans.workflow_building_mode), "Required dependency '%s' not found in incoming values" % self.ref_name
118        ref = other_values.get( self.ref_name, None )
119        if not isinstance( ref, self.dynamic_option.tool_param.tool.app.model.HistoryDatasetAssociation ) and not ( isinstance( ref, galaxy.tools.DatasetFilenameWrapper ) ):
120            return [] #not a valid dataset
121        meta_value = ref.metadata.get( self.key, None )
122        if meta_value is None: #assert meta_value is not None, "Required metadata value '%s' not found in referenced dataset" % self.key
123            return [ ( disp_name, basic.UnvalidatedValue( optval ), selected ) for disp_name, optval, selected in options ]
124
125        if self.column is not None:
126            rval = []
127            for fields in options:
128                if compare_meta_value( fields[self.column], meta_value ):
129                    rval.append( fields )
130            return rval
131        else:
132            if not self.dynamic_option.columns:
133                self.dynamic_option.columns = {
134                    "name" : 0,
135                    "value" : 1,
136                    "selected" : 2
137                }
138                self.dynamic_option.largest_index = 2
139            if not isinstance( meta_value, list ):
140                meta_value = [meta_value]
141            for value in meta_value:
142                options.append( ( value, value, False ) )
143            return options
144
145class ParamValueFilter( Filter ):
146    """
147    Filters a list of options on a column by the value of another input.
148
149    Type: param_value
150
151    Required Attributes:
152
153        - ref: Name of input value
154        - column: column in options to compare with
155
156    Optional Attributes:
157
158        - keep: Keep columns matching value (True)
159                Discard columns matching value (False)
160        - ref_attribute: Period (.) separated attribute chain of input (ref) to use as value for filter
161
162    """
163    def __init__( self, d_option, elem ):
164        Filter.__init__( self, d_option, elem )
165        self.ref_name = elem.get( "ref", None )
166        assert self.ref_name is not None, "Required 'ref' attribute missing from filter"
167        column = elem.get( "column", None )
168        assert column is not None, "Required 'column' attribute missing from filter"
169        self.column = d_option.column_spec_to_index( column )
170        self.keep = string_as_bool( elem.get( "keep", 'True' ) )
171        self.ref_attribute = elem.get( "ref_attribute", None )
172        if self.ref_attribute:
173            self.ref_attribute = self.ref_attribute.split( '.' )
174        else:
175            self.ref_attribute = []
176    def get_dependency_name( self ):
177        return self.ref_name
178    def filter_options( self, options, trans, other_values ):
179        if trans is not None and trans.workflow_building_mode: return []
180        assert self.ref_name in other_values, "Required dependency '%s' not found in incoming values" % self.ref_name
181        ref = other_values.get( self.ref_name, None )
182        for ref_attribute in self.ref_attribute:
183            if not hasattr( ref, ref_attribute ):
184                return [] #ref does not have attribute, so we cannot filter, return empty list
185            ref = getattr( ref, ref_attribute )
186        ref = str( ref )
187        rval = []
188        for fields in options:
189            if ( self.keep and fields[self.column] == ref ) or ( not self.keep and fields[self.column] != ref ):
190                rval.append( fields )
191        return rval
192
193class UniqueValueFilter( Filter ):
194    """
195    Filters a list of options to be unique by a column value.
196
197    Type: unique_value
198
199    Required Attributes:
200        column: column in options to compare with
201    """
202    def __init__( self, d_option, elem ):
203        Filter.__init__( self, d_option, elem )
204        column = elem.get( "column", None )
205        assert column is not None, "Required 'column' attribute missing from filter"
206        self.column = d_option.column_spec_to_index( column )
207    def get_dependency_name( self ):
208        return self.dynamic_option.dataset_ref_name
209    def filter_options( self, options, trans, other_values ):
210        rval = []
211        skip_list = []
212        for fields in options:
213            if fields[self.column] not in skip_list:
214                rval.append( fields )
215                skip_list.append( fields[self.column] )
216        return rval
217
218class MultipleSplitterFilter( Filter ):
219    """
220    Turns a single line of options into multiple lines, by splitting a column and creating a line for each item.
221
222    Type: multiple_splitter
223
224    Required Attributes:
225        column: column in options to compare with
226    Optional Attributes:
227        separator: Split column by this (,)
228    """
229    def __init__( self, d_option, elem ):
230        Filter.__init__( self, d_option, elem )
231        self.separator = elem.get( "separator", "," )
232        columns = elem.get( "column", None )
233        assert columns is not None, "Required 'columns' attribute missing from filter"
234        self.columns = [ d_option.column_spec_to_index( column ) for column in columns.split( "," ) ]
235    def filter_options( self, options, trans, other_values ):
236        rval = []
237        for fields in options:
238            for column in self.columns:
239                for field in fields[column].split( self.separator ):
240                    rval.append( fields[0:column] + [field] + fields[column+1:] )
241        return rval
242
243class AttributeValueSplitterFilter( Filter ):
244    """
245    Filters a list of attribute-value pairs to be unique attribute names.
246
247    Type: attribute_value_splitter
248
249    Required Attributes:
250        column: column in options to compare with
251    Optional Attributes:
252        pair_separator: Split column by this (,)
253        name_val_separator: Split name-value pair by this ( whitespace )
254    """
255    def __init__( self, d_option, elem ):
256        Filter.__init__( self, d_option, elem )
257        self.pair_separator = elem.get( "pair_separator", "," )
258        self.name_val_separator = elem.get( "name_val_separator", None )
259        self.columns = elem.get( "column", None )
260        assert self.columns is not None, "Required 'columns' attribute missing from filter"
261        self.columns = [ int ( column ) for column in self.columns.split( "," ) ]
262    def filter_options( self, options, trans, other_values ):
263        attr_names = []
264        rval = []
265        for fields in options:
266            for column in self.columns:
267                for pair in fields[column].split( self.pair_separator ):
268                    ary = pair.split( self.name_val_separator )
269                    if len( ary ) == 2:
270                        name, value = ary
271                        if name not in attr_names:
272                            rval.append( fields[0:column] + [name] + fields[column:] )
273                            attr_names.append( name )
274        return rval
275
276
277class AdditionalValueFilter( Filter ):
278    """
279    Adds a single static value to an options list.
280
281    Type: add_value
282
283    Required Attributes:
284        value: value to appear in select list
285    Optional Attributes:
286        name: Display name to appear in select list (value)
287        index: Index of option list to add value (APPEND)
288    """
289    def __init__( self, d_option, elem ):
290        Filter.__init__( self, d_option, elem )
291        self.value = elem.get( "value", None )
292        assert self.value is not None, "Required 'value' attribute missing from filter"
293        self.name = elem.get( "name", None )
294        if self.name is None:
295            self.name = self.value
296        self.index = elem.get( "index", None )
297        if self.index is not None:
298            self.index = int( self.index )
299    def filter_options( self, options, trans, other_values ):
300        rval = list( options )
301        add_value = []
302        for i in range( self.dynamic_option.largest_index + 1 ):
303            add_value.append( "" )
304        add_value[self.dynamic_option.columns['value']] = self.value
305        add_value[self.dynamic_option.columns['name']] = self.name
306        if self.index is not None:
307            rval.insert( self.index, add_value )
308        else:
309            rval.append( add_value )
310        return rval
311
312class RemoveValueFilter( Filter ):
313    """
314    Removes a value from an options list.
315
316    Type: remove_value
317
318    Required Attributes::
319
320        value: value to remove from select list
321            or
322        ref: param to refer to
323            or
324        meta_ref: dataset to refer to
325        key: metadata key to compare to
326
327    """
328    def __init__( self, d_option, elem ):
329        Filter.__init__( self, d_option, elem )
330        self.value = elem.get( "value", None )
331        self.ref_name = elem.get( "ref", None )
332        self.meta_ref = elem.get( "meta_ref", None )
333        self.metadata_key = elem.get( "key", None )
334        assert self.value is not None or ( ( self.ref_name is not None or self.meta_ref is not None )and self.metadata_key is not None ), ValueError( "Required 'value' or 'ref' and 'key' attributes missing from filter" )
335        self.multiple = string_as_bool( elem.get( "multiple", "False" ) )
336        self.separator = elem.get( "separator", "," )
337    def filter_options( self, options, trans, other_values ):
338        if trans is not None and trans.workflow_building_mode: return options
339        assert self.value is not None or ( self.ref_name is not None and self.ref_name in other_values ) or (self.meta_ref is not None and self.meta_ref in other_values ) or ( trans is not None and trans.workflow_building_mode), Exception( "Required dependency '%s' or '%s' not found in incoming values" % ( self.ref_name, self.meta_ref ) )
340        def compare_value( option_value, filter_value ):
341            if isinstance( filter_value, list ):
342                if self.multiple:
343                    option_value = option_value.split( self.separator )
344                    for value in filter_value:
345                        if value not in filter_value:
346                            return False
347                    return True
348                return option_value in filter_value
349            if self.multiple:
350                return filter_value in option_value.split( self.separator )
351            return option_value == filter_value
352        value = self.value
353        if value is None:
354            if self.ref_name is not None:
355                value = other_values.get( self.ref_name )
356            else:
357                data_ref = other_values.get( self.meta_ref )
358                if not isinstance( data_ref, self.dynamic_option.tool_param.tool.app.model.HistoryDatasetAssociation ) and not ( isinstance( data_ref, galaxy.tools.DatasetFilenameWrapper ) ):
359                    return options #cannot modify options
360                value = data_ref.metadata.get( self.metadata_key, None )
361        return [ ( disp_name, optval, selected ) for disp_name, optval, selected in options if not compare_value( optval, value ) ]
362
363class SortByColumnFilter( Filter ):
364    """
365    Sorts an options list by a column
366
367    Type: sort_by
368
369    Required Attributes:
370        column: column to sort by
371    """
372    def __init__( self, d_option, elem ):
373        Filter.__init__( self, d_option, elem )
374        column = elem.get( "column", None )
375        assert column is not None, "Required 'column' attribute missing from filter"
376        self.column = d_option.column_spec_to_index( column )
377    def filter_options( self, options, trans, other_values ):
378        rval = []
379        for i, fields in enumerate( options ):
380            for j in range( 0, len( rval ) ):
381                if fields[self.column] < rval[j][self.column]:
382                    rval.insert( j, fields )
383                    break
384            else:
385                rval.append( fields )
386        return rval
387
388
389filter_types = dict( data_meta = DataMetaFilter,
390                     param_value = ParamValueFilter,
391                     static_value = StaticValueFilter,
392                     unique_value = UniqueValueFilter,
393                     multiple_splitter = MultipleSplitterFilter,
394                     attribute_value_splitter = AttributeValueSplitterFilter,
395                     add_value = AdditionalValueFilter,
396                     remove_value = RemoveValueFilter,
397                     sort_by = SortByColumnFilter )
398
399class DynamicOptions( object ):
400    """Handles dynamically generated SelectToolParameter options"""
401    def __init__( self, elem, tool_param  ):
402        def load_from_parameter( from_parameter, transform_lines = None ):
403            obj = self.tool_param
404            for field in from_parameter.split( '.' ):
405                obj = getattr( obj, field )
406            if transform_lines:
407                obj = eval( transform_lines )
408            return self.parse_file_fields( obj )
409        self.tool_param = tool_param
410        self.columns = {}
411        self.filters = []
412        self.file_fields = None
413        self.largest_index = 0
414        self.dataset_ref_name = None
415        # True if the options generation depends on one or more other parameters
416        # that are dataset inputs
417        self.has_dataset_dependencies = False
418        self.validators = []
419        self.converter_safe = True
420
421        # Parse the <options> tag
422        self.separator = elem.get( 'separator', '\t' )
423        self.line_startswith = elem.get( 'startswith', None )
424        data_file = elem.get( 'from_file', None )
425        self.index_file = None
426        self.missing_index_file = None
427        dataset_file = elem.get( 'from_dataset', None )
428        from_parameter = elem.get( 'from_parameter', None )
429        tool_data_table_name = elem.get( 'from_data_table', None )
430        # Options are defined from a data table loaded by the app
431        self.tool_data_table = None
432        self.missing_tool_data_table_name = None
433        if tool_data_table_name:
434            app = tool_param.tool.app
435            if tool_data_table_name in app.tool_data_tables:
436                self.tool_data_table = app.tool_data_tables[ tool_data_table_name ]
437                # Column definitions are optional, but if provided override those from the table
438                if elem.find( "column" ) is not None:
439                    self.parse_column_definitions( elem )
440                else:
441                    self.columns = self.tool_data_table.columns
442                # Set self.missing_index_file if the index file to
443                # which the tool_data_table refers does not exist.
444                if self.tool_data_table.missing_index_file:
445                    self.missing_index_file = self.tool_data_table.missing_index_file
446            else:
447                self.missing_tool_data_table_name = tool_data_table_name
448                log.warn( "Data table named '%s' is required by tool but not configured" % tool_data_table_name )
449        # Options are defined by parsing tabular text data from a data file
450        # on disk, a dataset, or the value of another parameter
451        elif data_file is not None or dataset_file is not None or from_parameter is not None:
452            self.parse_column_definitions( elem )
453            if data_file is not None:
454                data_file = data_file.strip()
455                if not os.path.isabs( data_file ):
456                    full_path = os.path.join( self.tool_param.tool.app.config.tool_data_path, data_file )
457                    if os.path.exists( full_path ):
458                        self.index_file = data_file
459                        self.file_fields = self.parse_file_fields( open( full_path ) )
460                    else:
461                        self.missing_index_file = data_file
462            elif dataset_file is not None:
463                self.dataset_ref_name = dataset_file
464                self.has_dataset_dependencies = True
465                self.converter_safe = False
466            elif from_parameter is not None:
467                transform_lines = elem.get( 'transform_lines', None )
468                self.file_fields = list( load_from_parameter( from_parameter, transform_lines ) )
469
470        # Load filters
471        for filter_elem in elem.findall( 'filter' ):
472            self.filters.append( Filter.from_element( self, filter_elem ) )
473
474        # Load Validators
475        for validator in elem.findall( 'validator' ):
476            self.validators.append( validation.Validator.from_element( self.tool_param, validator ) )
477
478        if self.dataset_ref_name:
479            tool_param.data_ref = self.dataset_ref_name
480
481    def parse_column_definitions( self, elem ):
482        for column_elem in elem.findall( 'column' ):
483            name = column_elem.get( 'name', None )
484            assert name is not None, "Required 'name' attribute missing from column def"
485            index = column_elem.get( 'index', None )
486            assert index is not None, "Required 'index' attribute missing from column def"
487            index = int( index )
488            self.columns[name] = index
489            if index > self.largest_index:
490                self.largest_index = index
491        assert 'value' in self.columns, "Required 'value' column missing from column def"
492        if 'name' not in self.columns:
493            self.columns['name'] = self.columns['value']
494
495    def parse_file_fields( self, reader ):
496        rval = []
497        field_count = None
498        for line in reader:
499            if line.startswith( '#' ) or ( self.line_startswith and not line.startswith( self.line_startswith ) ):
500                continue
501            line = line.rstrip( "\n\r" )
502            if line:
503                fields = line.split( self.separator )
504                if self.largest_index < len( fields ):
505                    if not field_count:
506                        field_count = len( fields )
507                    elif field_count != len( fields ):
508                        try:
509                            name = reader.name
510                        except AttributeError:
511                            name = "a configuration file"
512                        # Perhaps this should be an error, but even a warning is useful.
513                        log.warn( "Inconsistent number of fields (%i vs %i) in %s using separator %r, check line: %r" % \
514                                  ( field_count, len( fields ), name, self.separator, line ) )
515                    rval.append( fields )
516        return rval
517
518    def get_dependency_names( self ):
519        """
520        Return the names of parameters these options depend on -- both data
521        and other param types.
522        """
523        rval = []
524        if self.dataset_ref_name:
525            rval.append( self.dataset_ref_name )
526        for filter in self.filters:
527            depend = filter.get_dependency_name()
528            if depend:
529                rval.append( depend )
530        return rval
531
532    def get_fields( self, trans, other_values ):
533        if self.dataset_ref_name:
534            dataset = other_values.get( self.dataset_ref_name, None )
535            assert dataset is not None, "Required dataset '%s' missing from input" % self.dataset_ref_name
536            if not dataset: return [] #no valid dataset in history
537            # Ensure parsing dynamic options does not consume more than a megabyte worth memory.
538            path = dataset.file_name
539            file_size = os.path.getsize( path )
540            if os.path.getsize( path ) < 1048576:
541                options = self.parse_file_fields( open( path ) )
542            else:
543                # Pass just the first megabyte to parse_file_fields.
544                import StringIO
545                log.warn( "Attempting to load options from large file, reading just first megabyte" )
546                contents = open( path, 'r' ).read( 1048576 )
547                options = self.parse_file_fields( StringIO.StringIO( contents ) )
548        elif self.tool_data_table:
549            options = self.tool_data_table.get_fields()
550        else:
551            options = list( self.file_fields )
552        for filter in self.filters:
553            options = filter.filter_options( options, trans, other_values )
554        return options
555
556    def get_fields_by_value( self, value, trans, other_values ):
557        """
558        Return a list of fields with column 'value' matching provided value.
559        """
560        rval = []
561        val_index = self.columns[ 'value' ]
562        for fields in self.get_fields( trans, other_values ):
563            if fields[ val_index ] == value:
564                rval.append( fields )
565        return rval
566
567    def get_field_by_name_for_value( self, field_name, value, trans, other_values ):
568        """
569        Get contents of field by name for specified value.
570        """
571        rval = []
572        if isinstance( field_name, int ):
573            field_index = field_name
574        else:
575            assert field_name in self.columns, "Requested '%s' column missing from column def" % field_name
576            field_index = self.columns[ field_name ]
577        if not isinstance( value, list ):
578            value = [value]
579        for val in value:
580            for fields in self.get_fields_by_value( val, trans, other_values ):
581                rval.append( fields[ field_index ] )
582        return rval
583
584    def get_options( self, trans, other_values ):
585        rval = []
586        if self.file_fields is not None or self.tool_data_table is not None or self.dataset_ref_name is not None:
587            options = self.get_fields( trans, other_values )
588            for fields in options:
589                rval.append( ( fields[self.columns['name']], fields[self.columns['value']], False ) )
590        else:
591            for filter in self.filters:
592                rval = filter.filter_options( rval, trans, other_values )
593        return rval
594
595    def column_spec_to_index( self, column_spec ):
596        """
597        Convert a column specification (as read from the config file), to an
598        index. A column specification can just be a number, a column name, or
599        a column alias.
600        """
601        # Name?
602        if column_spec in self.columns:
603            return self.columns[column_spec]
604        # Int?
605        return int( column_spec )