PageRenderTime 2654ms CodeModel.GetById 2610ms RepoModel.GetById 0ms app.codeStats 1ms

/lib/galaxy/tools/parameters/dynamic_options.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 605 lines | 562 code | 8 blank | 35 comment | 22 complexity | 0854e00720dc6a111da92df783a1f067 MD5 | raw file
  1. """
  2. Support for generating the options for a SelectToolParameter dynamically (based
  3. on the values of other parameters or other aspects of the current state)
  4. """
  5. import operator, sys, os, logging
  6. import basic, validation
  7. from galaxy.util import string_as_bool
  8. from galaxy.model import User
  9. import galaxy.tools
  10. log = logging.getLogger(__name__)
  11. class Filter( object ):
  12. """
  13. A filter takes the current options list and modifies it.
  14. """
  15. @classmethod
  16. def from_element( cls, d_option, elem ):
  17. """Loads the proper filter by the type attribute of elem"""
  18. type = elem.get( 'type', None )
  19. assert type is not None, "Required 'type' attribute missing from filter"
  20. return filter_types[type.strip()]( d_option, elem )
  21. def __init__( self, d_option, elem ):
  22. self.dynamic_option = d_option
  23. self.elem = elem
  24. def get_dependency_name( self ):
  25. """Returns the name of any depedencies, otherwise None"""
  26. return None
  27. def filter_options( self, options, trans, other_values ):
  28. """Returns a list of options after the filter is applied"""
  29. raise TypeError( "Abstract Method" )
  30. class StaticValueFilter( Filter ):
  31. """
  32. Filters a list of options on a column by a static value.
  33. Type: static_value
  34. Required Attributes:
  35. value: static value to compare to
  36. column: column in options to compare with
  37. Optional Attributes:
  38. keep: Keep columns matching value (True)
  39. Discard columns matching value (False)
  40. """
  41. def __init__( self, d_option, elem ):
  42. Filter.__init__( self, d_option, elem )
  43. self.value = elem.get( "value", None )
  44. assert self.value is not None, "Required 'value' attribute missing from filter"
  45. column = elem.get( "column", None )
  46. assert column is not None, "Required 'column' attribute missing from filter, when loading from file"
  47. self.column = d_option.column_spec_to_index( column )
  48. self.keep = string_as_bool( elem.get( "keep", 'True' ) )
  49. def filter_options( self, options, trans, other_values ):
  50. rval = []
  51. filter_value = self.value
  52. try:
  53. filter_value = User.expand_user_properties( trans.user, filter_value)
  54. except:
  55. pass
  56. for fields in options:
  57. if ( self.keep and fields[self.column] == filter_value ) or ( not self.keep and fields[self.column] != filter_value ):
  58. rval.append( fields )
  59. return rval
  60. class DataMetaFilter( Filter ):
  61. """
  62. Filters a list of options on a column by a dataset metadata value.
  63. Type: data_meta
  64. When no 'from' source has been specified in the <options> tag, this will populate the options list with (meta_value, meta_value, False).
  65. Otherwise, options which do not match the metadata value in the column are discarded.
  66. Required Attributes:
  67. - ref: Name of input dataset
  68. - key: Metadata key to use for comparison
  69. - column: column in options to compare with (not required when not associated with input options)
  70. Optional Attributes:
  71. - multiple: Option values are multiple, split column by separator (True)
  72. - separator: When multiple split by this (,)
  73. """
  74. def __init__( self, d_option, elem ):
  75. Filter.__init__( self, d_option, elem )
  76. self.ref_name = elem.get( "ref", None )
  77. assert self.ref_name is not None, "Required 'ref' attribute missing from filter"
  78. d_option.has_dataset_dependencies = True
  79. self.key = elem.get( "key", None )
  80. assert self.key is not None, "Required 'key' attribute missing from filter"
  81. self.column = elem.get( "column", None )
  82. if self.column is None:
  83. assert self.dynamic_option.file_fields is None and self.dynamic_option.dataset_ref_name is None, "Required 'column' attribute missing from filter, when loading from file"
  84. else:
  85. self.column = d_option.column_spec_to_index( self.column )
  86. self.multiple = string_as_bool( elem.get( "multiple", "False" ) )
  87. self.separator = elem.get( "separator", "," )
  88. def get_dependency_name( self ):
  89. return self.ref_name
  90. def filter_options( self, options, trans, other_values ):
  91. def compare_meta_value( file_value, dataset_value ):
  92. if isinstance( dataset_value, list ):
  93. if self.multiple:
  94. file_value = file_value.split( self.separator )
  95. for value in dataset_value:
  96. if value not in file_value:
  97. return False
  98. return True
  99. return file_value in dataset_value
  100. if self.multiple:
  101. return dataset_value in file_value.split( self.separator )
  102. return file_value == dataset_value
  103. assert self.ref_name in other_values or ( trans is not None and trans.workflow_building_mode), "Required dependency '%s' not found in incoming values" % self.ref_name
  104. ref = other_values.get( self.ref_name, None )
  105. if not isinstance( ref, self.dynamic_option.tool_param.tool.app.model.HistoryDatasetAssociation ) and not ( isinstance( ref, galaxy.tools.DatasetFilenameWrapper ) ):
  106. return [] #not a valid dataset
  107. meta_value = ref.metadata.get( self.key, None )
  108. if meta_value is None: #assert meta_value is not None, "Required metadata value '%s' not found in referenced dataset" % self.key
  109. return [ ( disp_name, basic.UnvalidatedValue( optval ), selected ) for disp_name, optval, selected in options ]
  110. if self.column is not None:
  111. rval = []
  112. for fields in options:
  113. if compare_meta_value( fields[self.column], meta_value ):
  114. rval.append( fields )
  115. return rval
  116. else:
  117. if not self.dynamic_option.columns:
  118. self.dynamic_option.columns = {
  119. "name" : 0,
  120. "value" : 1,
  121. "selected" : 2
  122. }
  123. self.dynamic_option.largest_index = 2
  124. if not isinstance( meta_value, list ):
  125. meta_value = [meta_value]
  126. for value in meta_value:
  127. options.append( ( value, value, False ) )
  128. return options
  129. class ParamValueFilter( Filter ):
  130. """
  131. Filters a list of options on a column by the value of another input.
  132. Type: param_value
  133. Required Attributes:
  134. - ref: Name of input value
  135. - column: column in options to compare with
  136. Optional Attributes:
  137. - keep: Keep columns matching value (True)
  138. Discard columns matching value (False)
  139. - ref_attribute: Period (.) separated attribute chain of input (ref) to use as value for filter
  140. """
  141. def __init__( self, d_option, elem ):
  142. Filter.__init__( self, d_option, elem )
  143. self.ref_name = elem.get( "ref", None )
  144. assert self.ref_name is not None, "Required 'ref' attribute missing from filter"
  145. column = elem.get( "column", None )
  146. assert column is not None, "Required 'column' attribute missing from filter"
  147. self.column = d_option.column_spec_to_index( column )
  148. self.keep = string_as_bool( elem.get( "keep", 'True' ) )
  149. self.ref_attribute = elem.get( "ref_attribute", None )
  150. if self.ref_attribute:
  151. self.ref_attribute = self.ref_attribute.split( '.' )
  152. else:
  153. self.ref_attribute = []
  154. def get_dependency_name( self ):
  155. return self.ref_name
  156. def filter_options( self, options, trans, other_values ):
  157. if trans is not None and trans.workflow_building_mode: return []
  158. assert self.ref_name in other_values, "Required dependency '%s' not found in incoming values" % self.ref_name
  159. ref = other_values.get( self.ref_name, None )
  160. for ref_attribute in self.ref_attribute:
  161. if not hasattr( ref, ref_attribute ):
  162. return [] #ref does not have attribute, so we cannot filter, return empty list
  163. ref = getattr( ref, ref_attribute )
  164. ref = str( ref )
  165. rval = []
  166. for fields in options:
  167. if ( self.keep and fields[self.column] == ref ) or ( not self.keep and fields[self.column] != ref ):
  168. rval.append( fields )
  169. return rval
  170. class UniqueValueFilter( Filter ):
  171. """
  172. Filters a list of options to be unique by a column value.
  173. Type: unique_value
  174. Required Attributes:
  175. column: column in options to compare with
  176. """
  177. def __init__( self, d_option, elem ):
  178. Filter.__init__( self, d_option, elem )
  179. column = elem.get( "column", None )
  180. assert column is not None, "Required 'column' attribute missing from filter"
  181. self.column = d_option.column_spec_to_index( column )
  182. def get_dependency_name( self ):
  183. return self.dynamic_option.dataset_ref_name
  184. def filter_options( self, options, trans, other_values ):
  185. rval = []
  186. skip_list = []
  187. for fields in options:
  188. if fields[self.column] not in skip_list:
  189. rval.append( fields )
  190. skip_list.append( fields[self.column] )
  191. return rval
  192. class MultipleSplitterFilter( Filter ):
  193. """
  194. Turns a single line of options into multiple lines, by splitting a column and creating a line for each item.
  195. Type: multiple_splitter
  196. Required Attributes:
  197. column: column in options to compare with
  198. Optional Attributes:
  199. separator: Split column by this (,)
  200. """
  201. def __init__( self, d_option, elem ):
  202. Filter.__init__( self, d_option, elem )
  203. self.separator = elem.get( "separator", "," )
  204. columns = elem.get( "column", None )
  205. assert columns is not None, "Required 'columns' attribute missing from filter"
  206. self.columns = [ d_option.column_spec_to_index( column ) for column in columns.split( "," ) ]
  207. def filter_options( self, options, trans, other_values ):
  208. rval = []
  209. for fields in options:
  210. for column in self.columns:
  211. for field in fields[column].split( self.separator ):
  212. rval.append( fields[0:column] + [field] + fields[column+1:] )
  213. return rval
  214. class AttributeValueSplitterFilter( Filter ):
  215. """
  216. Filters a list of attribute-value pairs to be unique attribute names.
  217. Type: attribute_value_splitter
  218. Required Attributes:
  219. column: column in options to compare with
  220. Optional Attributes:
  221. pair_separator: Split column by this (,)
  222. name_val_separator: Split name-value pair by this ( whitespace )
  223. """
  224. def __init__( self, d_option, elem ):
  225. Filter.__init__( self, d_option, elem )
  226. self.pair_separator = elem.get( "pair_separator", "," )
  227. self.name_val_separator = elem.get( "name_val_separator", None )
  228. self.columns = elem.get( "column", None )
  229. assert self.columns is not None, "Required 'columns' attribute missing from filter"
  230. self.columns = [ int ( column ) for column in self.columns.split( "," ) ]
  231. def filter_options( self, options, trans, other_values ):
  232. attr_names = []
  233. rval = []
  234. for fields in options:
  235. for column in self.columns:
  236. for pair in fields[column].split( self.pair_separator ):
  237. ary = pair.split( self.name_val_separator )
  238. if len( ary ) == 2:
  239. name, value = ary
  240. if name not in attr_names:
  241. rval.append( fields[0:column] + [name] + fields[column:] )
  242. attr_names.append( name )
  243. return rval
  244. class AdditionalValueFilter( Filter ):
  245. """
  246. Adds a single static value to an options list.
  247. Type: add_value
  248. Required Attributes:
  249. value: value to appear in select list
  250. Optional Attributes:
  251. name: Display name to appear in select list (value)
  252. index: Index of option list to add value (APPEND)
  253. """
  254. def __init__( self, d_option, elem ):
  255. Filter.__init__( self, d_option, elem )
  256. self.value = elem.get( "value", None )
  257. assert self.value is not None, "Required 'value' attribute missing from filter"
  258. self.name = elem.get( "name", None )
  259. if self.name is None:
  260. self.name = self.value
  261. self.index = elem.get( "index", None )
  262. if self.index is not None:
  263. self.index = int( self.index )
  264. def filter_options( self, options, trans, other_values ):
  265. rval = list( options )
  266. add_value = []
  267. for i in range( self.dynamic_option.largest_index + 1 ):
  268. add_value.append( "" )
  269. add_value[self.dynamic_option.columns['value']] = self.value
  270. add_value[self.dynamic_option.columns['name']] = self.name
  271. if self.index is not None:
  272. rval.insert( self.index, add_value )
  273. else:
  274. rval.append( add_value )
  275. return rval
  276. class RemoveValueFilter( Filter ):
  277. """
  278. Removes a value from an options list.
  279. Type: remove_value
  280. Required Attributes::
  281. value: value to remove from select list
  282. or
  283. ref: param to refer to
  284. or
  285. meta_ref: dataset to refer to
  286. key: metadata key to compare to
  287. """
  288. def __init__( self, d_option, elem ):
  289. Filter.__init__( self, d_option, elem )
  290. self.value = elem.get( "value", None )
  291. self.ref_name = elem.get( "ref", None )
  292. self.meta_ref = elem.get( "meta_ref", None )
  293. self.metadata_key = elem.get( "key", None )
  294. assert self.value is not None or ( ( self.ref_name is not None or self.meta_ref is not None )and self.metadata_key is not None ), ValueError( "Required 'value' or 'ref' and 'key' attributes missing from filter" )
  295. self.multiple = string_as_bool( elem.get( "multiple", "False" ) )
  296. self.separator = elem.get( "separator", "," )
  297. def filter_options( self, options, trans, other_values ):
  298. if trans is not None and trans.workflow_building_mode: return options
  299. assert self.value is not None or ( self.ref_name is not None and self.ref_name in other_values ) or (self.meta_ref is not None and self.meta_ref in other_values ) or ( trans is not None and trans.workflow_building_mode), Exception( "Required dependency '%s' or '%s' not found in incoming values" % ( self.ref_name, self.meta_ref ) )
  300. def compare_value( option_value, filter_value ):
  301. if isinstance( filter_value, list ):
  302. if self.multiple:
  303. option_value = option_value.split( self.separator )
  304. for value in filter_value:
  305. if value not in filter_value:
  306. return False
  307. return True
  308. return option_value in filter_value
  309. if self.multiple:
  310. return filter_value in option_value.split( self.separator )
  311. return option_value == filter_value
  312. value = self.value
  313. if value is None:
  314. if self.ref_name is not None:
  315. value = other_values.get( self.ref_name )
  316. else:
  317. data_ref = other_values.get( self.meta_ref )
  318. if not isinstance( data_ref, self.dynamic_option.tool_param.tool.app.model.HistoryDatasetAssociation ) and not ( isinstance( data_ref, galaxy.tools.DatasetFilenameWrapper ) ):
  319. return options #cannot modify options
  320. value = data_ref.metadata.get( self.metadata_key, None )
  321. return [ ( disp_name, optval, selected ) for disp_name, optval, selected in options if not compare_value( optval, value ) ]
  322. class SortByColumnFilter( Filter ):
  323. """
  324. Sorts an options list by a column
  325. Type: sort_by
  326. Required Attributes:
  327. column: column to sort by
  328. """
  329. def __init__( self, d_option, elem ):
  330. Filter.__init__( self, d_option, elem )
  331. column = elem.get( "column", None )
  332. assert column is not None, "Required 'column' attribute missing from filter"
  333. self.column = d_option.column_spec_to_index( column )
  334. def filter_options( self, options, trans, other_values ):
  335. rval = []
  336. for i, fields in enumerate( options ):
  337. for j in range( 0, len( rval ) ):
  338. if fields[self.column] < rval[j][self.column]:
  339. rval.insert( j, fields )
  340. break
  341. else:
  342. rval.append( fields )
  343. return rval
  344. filter_types = dict( data_meta = DataMetaFilter,
  345. param_value = ParamValueFilter,
  346. static_value = StaticValueFilter,
  347. unique_value = UniqueValueFilter,
  348. multiple_splitter = MultipleSplitterFilter,
  349. attribute_value_splitter = AttributeValueSplitterFilter,
  350. add_value = AdditionalValueFilter,
  351. remove_value = RemoveValueFilter,
  352. sort_by = SortByColumnFilter )
  353. class DynamicOptions( object ):
  354. """Handles dynamically generated SelectToolParameter options"""
  355. def __init__( self, elem, tool_param ):
  356. def load_from_parameter( from_parameter, transform_lines = None ):
  357. obj = self.tool_param
  358. for field in from_parameter.split( '.' ):
  359. obj = getattr( obj, field )
  360. if transform_lines:
  361. obj = eval( transform_lines )
  362. return self.parse_file_fields( obj )
  363. self.tool_param = tool_param
  364. self.columns = {}
  365. self.filters = []
  366. self.file_fields = None
  367. self.largest_index = 0
  368. self.dataset_ref_name = None
  369. # True if the options generation depends on one or more other parameters
  370. # that are dataset inputs
  371. self.has_dataset_dependencies = False
  372. self.validators = []
  373. self.converter_safe = True
  374. # Parse the <options> tag
  375. self.separator = elem.get( 'separator', '\t' )
  376. self.line_startswith = elem.get( 'startswith', None )
  377. data_file = elem.get( 'from_file', None )
  378. self.index_file = None
  379. self.missing_index_file = None
  380. dataset_file = elem.get( 'from_dataset', None )
  381. from_parameter = elem.get( 'from_parameter', None )
  382. tool_data_table_name = elem.get( 'from_data_table', None )
  383. # Options are defined from a data table loaded by the app
  384. self.tool_data_table = None
  385. self.missing_tool_data_table_name = None
  386. if tool_data_table_name:
  387. app = tool_param.tool.app
  388. if tool_data_table_name in app.tool_data_tables:
  389. self.tool_data_table = app.tool_data_tables[ tool_data_table_name ]
  390. # Column definitions are optional, but if provided override those from the table
  391. if elem.find( "column" ) is not None:
  392. self.parse_column_definitions( elem )
  393. else:
  394. self.columns = self.tool_data_table.columns
  395. # Set self.missing_index_file if the index file to
  396. # which the tool_data_table refers does not exist.
  397. if self.tool_data_table.missing_index_file:
  398. self.missing_index_file = self.tool_data_table.missing_index_file
  399. else:
  400. self.missing_tool_data_table_name = tool_data_table_name
  401. log.warn( "Data table named '%s' is required by tool but not configured" % tool_data_table_name )
  402. # Options are defined by parsing tabular text data from a data file
  403. # on disk, a dataset, or the value of another parameter
  404. elif data_file is not None or dataset_file is not None or from_parameter is not None:
  405. self.parse_column_definitions( elem )
  406. if data_file is not None:
  407. data_file = data_file.strip()
  408. if not os.path.isabs( data_file ):
  409. full_path = os.path.join( self.tool_param.tool.app.config.tool_data_path, data_file )
  410. if os.path.exists( full_path ):
  411. self.index_file = data_file
  412. self.file_fields = self.parse_file_fields( open( full_path ) )
  413. else:
  414. self.missing_index_file = data_file
  415. elif dataset_file is not None:
  416. self.dataset_ref_name = dataset_file
  417. self.has_dataset_dependencies = True
  418. self.converter_safe = False
  419. elif from_parameter is not None:
  420. transform_lines = elem.get( 'transform_lines', None )
  421. self.file_fields = list( load_from_parameter( from_parameter, transform_lines ) )
  422. # Load filters
  423. for filter_elem in elem.findall( 'filter' ):
  424. self.filters.append( Filter.from_element( self, filter_elem ) )
  425. # Load Validators
  426. for validator in elem.findall( 'validator' ):
  427. self.validators.append( validation.Validator.from_element( self.tool_param, validator ) )
  428. if self.dataset_ref_name:
  429. tool_param.data_ref = self.dataset_ref_name
  430. def parse_column_definitions( self, elem ):
  431. for column_elem in elem.findall( 'column' ):
  432. name = column_elem.get( 'name', None )
  433. assert name is not None, "Required 'name' attribute missing from column def"
  434. index = column_elem.get( 'index', None )
  435. assert index is not None, "Required 'index' attribute missing from column def"
  436. index = int( index )
  437. self.columns[name] = index
  438. if index > self.largest_index:
  439. self.largest_index = index
  440. assert 'value' in self.columns, "Required 'value' column missing from column def"
  441. if 'name' not in self.columns:
  442. self.columns['name'] = self.columns['value']
  443. def parse_file_fields( self, reader ):
  444. rval = []
  445. field_count = None
  446. for line in reader:
  447. if line.startswith( '#' ) or ( self.line_startswith and not line.startswith( self.line_startswith ) ):
  448. continue
  449. line = line.rstrip( "\n\r" )
  450. if line:
  451. fields = line.split( self.separator )
  452. if self.largest_index < len( fields ):
  453. if not field_count:
  454. field_count = len( fields )
  455. elif field_count != len( fields ):
  456. try:
  457. name = reader.name
  458. except AttributeError:
  459. name = "a configuration file"
  460. # Perhaps this should be an error, but even a warning is useful.
  461. log.warn( "Inconsistent number of fields (%i vs %i) in %s using separator %r, check line: %r" % \
  462. ( field_count, len( fields ), name, self.separator, line ) )
  463. rval.append( fields )
  464. return rval
  465. def get_dependency_names( self ):
  466. """
  467. Return the names of parameters these options depend on -- both data
  468. and other param types.
  469. """
  470. rval = []
  471. if self.dataset_ref_name:
  472. rval.append( self.dataset_ref_name )
  473. for filter in self.filters:
  474. depend = filter.get_dependency_name()
  475. if depend:
  476. rval.append( depend )
  477. return rval
  478. def get_fields( self, trans, other_values ):
  479. if self.dataset_ref_name:
  480. dataset = other_values.get( self.dataset_ref_name, None )
  481. assert dataset is not None, "Required dataset '%s' missing from input" % self.dataset_ref_name
  482. if not dataset: return [] #no valid dataset in history
  483. # Ensure parsing dynamic options does not consume more than a megabyte worth memory.
  484. path = dataset.file_name
  485. file_size = os.path.getsize( path )
  486. if os.path.getsize( path ) < 1048576:
  487. options = self.parse_file_fields( open( path ) )
  488. else:
  489. # Pass just the first megabyte to parse_file_fields.
  490. import StringIO
  491. log.warn( "Attempting to load options from large file, reading just first megabyte" )
  492. contents = open( path, 'r' ).read( 1048576 )
  493. options = self.parse_file_fields( StringIO.StringIO( contents ) )
  494. elif self.tool_data_table:
  495. options = self.tool_data_table.get_fields()
  496. else:
  497. options = list( self.file_fields )
  498. for filter in self.filters:
  499. options = filter.filter_options( options, trans, other_values )
  500. return options
  501. def get_fields_by_value( self, value, trans, other_values ):
  502. """
  503. Return a list of fields with column 'value' matching provided value.
  504. """
  505. rval = []
  506. val_index = self.columns[ 'value' ]
  507. for fields in self.get_fields( trans, other_values ):
  508. if fields[ val_index ] == value:
  509. rval.append( fields )
  510. return rval
  511. def get_field_by_name_for_value( self, field_name, value, trans, other_values ):
  512. """
  513. Get contents of field by name for specified value.
  514. """
  515. rval = []
  516. if isinstance( field_name, int ):
  517. field_index = field_name
  518. else:
  519. assert field_name in self.columns, "Requested '%s' column missing from column def" % field_name
  520. field_index = self.columns[ field_name ]
  521. if not isinstance( value, list ):
  522. value = [value]
  523. for val in value:
  524. for fields in self.get_fields_by_value( val, trans, other_values ):
  525. rval.append( fields[ field_index ] )
  526. return rval
  527. def get_options( self, trans, other_values ):
  528. rval = []
  529. if self.file_fields is not None or self.tool_data_table is not None or self.dataset_ref_name is not None:
  530. options = self.get_fields( trans, other_values )
  531. for fields in options:
  532. rval.append( ( fields[self.columns['name']], fields[self.columns['value']], False ) )
  533. else:
  534. for filter in self.filters:
  535. rval = filter.filter_options( rval, trans, other_values )
  536. return rval
  537. def column_spec_to_index( self, column_spec ):
  538. """
  539. Convert a column specification (as read from the config file), to an
  540. index. A column specification can just be a number, a column name, or
  541. a column alias.
  542. """
  543. # Name?
  544. if column_spec in self.columns:
  545. return self.columns[column_spec]
  546. # Int?
  547. return int( column_spec )