/lib/galaxy/tools/actions/__init__.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 435 lines · 348 code · 15 blank · 72 comment · 105 complexity · 63bedfebdc4f782de7d882a16dcdf014 MD5 · raw file

  1. import os
  2. from galaxy.exceptions import ObjectInvalid
  3. from galaxy.model import LibraryDatasetDatasetAssociation
  4. from galaxy.tools.parameters import DataToolParameter
  5. from galaxy.tools.parameters.wrapped import WrappedParameters
  6. from galaxy.util.json import from_json_string
  7. from galaxy.util.json import to_json_string
  8. from galaxy.util.none_like import NoneDataset
  9. from galaxy.util.odict import odict
  10. from galaxy.util.template import fill_template
  11. from galaxy.web import url_for
  12. import logging
  13. log = logging.getLogger( __name__ )
  14. class ToolAction( object ):
  15. """
  16. The actions to be taken when a tool is run (after parameters have
  17. been converted and validated).
  18. """
  19. def execute( self, tool, trans, incoming={}, set_output_hid=True ):
  20. raise TypeError("Abstract method")
  21. class DefaultToolAction( object ):
  22. """Default tool action is to run an external command"""
  23. def collect_input_datasets( self, tool, param_values, trans ):
  24. """
  25. Collect any dataset inputs from incoming. Returns a mapping from
  26. parameter name to Dataset instance for each tool parameter that is
  27. of the DataToolParameter type.
  28. """
  29. input_datasets = dict()
  30. def visitor( prefix, input, value, parent=None ):
  31. def process_dataset( data, formats=None ):
  32. if not data:
  33. return data
  34. if formats is None:
  35. formats = input.formats
  36. if not data.datatype.matches_any( formats ):
  37. # Need to refresh in case this conversion just took place, i.e. input above in tool performed the same conversion
  38. trans.sa_session.refresh( data )
  39. target_ext, converted_dataset = data.find_conversion_destination( formats )
  40. if target_ext:
  41. if converted_dataset:
  42. data = converted_dataset
  43. else:
  44. #run converter here
  45. new_data = data.datatype.convert_dataset( trans, data, target_ext, return_output=True, visible=False ).values()[0]
  46. new_data.hid = data.hid
  47. new_data.name = data.name
  48. trans.sa_session.add( new_data )
  49. assoc = trans.app.model.ImplicitlyConvertedDatasetAssociation( parent=data, file_type=target_ext, dataset=new_data, metadata_safe=False )
  50. trans.sa_session.add( assoc )
  51. trans.sa_session.flush()
  52. data = new_data
  53. current_user_roles = trans.get_current_user_roles()
  54. if not trans.app.security_agent.can_access_dataset( current_user_roles, data.dataset ):
  55. raise "User does not have permission to use a dataset (%s) provided for input." % data.id
  56. return data
  57. if isinstance( input, DataToolParameter ):
  58. if isinstance( value, list ):
  59. # If there are multiple inputs with the same name, they
  60. # are stored as name1, name2, ...
  61. for i, v in enumerate( value ):
  62. processed_dataset = process_dataset( v )
  63. if i == 0:
  64. # Allow copying metadata to output, first item will be source.
  65. input_datasets[ prefix + input.name ] = processed_dataset
  66. input_datasets[ prefix + input.name + str( i + 1 ) ] = processed_dataset
  67. conversions = []
  68. for conversion_name, conversion_extensions, conversion_datatypes in input.conversions:
  69. new_data = process_dataset( input_datasets[ prefix + input.name + str( i + 1 ) ], conversion_datatypes )
  70. if not new_data or new_data.datatype.matches_any( conversion_datatypes ):
  71. input_datasets[ prefix + conversion_name + str( i + 1 ) ] = new_data
  72. conversions.append( ( conversion_name, new_data ) )
  73. else:
  74. raise Exception('A path for explicit datatype conversion has not been found: %s --/--> %s' % ( input_datasets[ prefix + input.name + str( i + 1 ) ].extension, conversion_extensions ) )
  75. if parent:
  76. parent[input.name][i] = input_datasets[ prefix + input.name + str( i + 1 ) ]
  77. for conversion_name, conversion_data in conversions:
  78. #allow explicit conversion to be stored in job_parameter table
  79. parent[ conversion_name ][i] = conversion_data.id # a more robust way to determine JSONable value is desired
  80. else:
  81. param_values[input.name][i] = input_datasets[ prefix + input.name + str( i + 1 ) ]
  82. for conversion_name, conversion_data in conversions:
  83. #allow explicit conversion to be stored in job_parameter table
  84. param_values[ conversion_name ][i] = conversion_data.id # a more robust way to determine JSONable value is desired
  85. else:
  86. input_datasets[ prefix + input.name ] = process_dataset( value )
  87. conversions = []
  88. for conversion_name, conversion_extensions, conversion_datatypes in input.conversions:
  89. new_data = process_dataset( input_datasets[ prefix + input.name ], conversion_datatypes )
  90. if not new_data or new_data.datatype.matches_any( conversion_datatypes ):
  91. input_datasets[ prefix + conversion_name ] = new_data
  92. conversions.append( ( conversion_name, new_data ) )
  93. else:
  94. raise Exception( 'A path for explicit datatype conversion has not been found: %s --/--> %s' % ( input_datasets[ prefix + input.name ].extension, conversion_extensions ) )
  95. target_dict = parent
  96. if not target_dict:
  97. target_dict = param_values
  98. target_dict[ input.name ] = input_datasets[ prefix + input.name ]
  99. for conversion_name, conversion_data in conversions:
  100. #allow explicit conversion to be stored in job_parameter table
  101. target_dict[ conversion_name ] = conversion_data.id # a more robust way to determine JSONable value is desired
  102. tool.visit_inputs( param_values, visitor )
  103. return input_datasets
  104. def execute(self, tool, trans, incoming={}, return_job=False, set_output_hid=True, set_output_history=True, history=None, job_params=None, rerun_remap_job_id=None):
  105. """
  106. Executes a tool, creating job and tool outputs, associating them, and
  107. submitting the job to the job queue. If history is not specified, use
  108. trans.history as destination for tool's output datasets.
  109. """
  110. # Set history.
  111. if not history:
  112. history = tool.get_default_history_by_trans( trans, create=True )
  113. out_data = odict()
  114. # Collect any input datasets from the incoming parameters
  115. inp_data = self.collect_input_datasets( tool, incoming, trans )
  116. # Deal with input dataset names, 'dbkey' and types
  117. input_names = []
  118. input_ext = 'data'
  119. input_dbkey = incoming.get( "dbkey", "?" )
  120. for name, data in inp_data.items():
  121. if not data:
  122. data = NoneDataset( datatypes_registry=trans.app.datatypes_registry )
  123. continue
  124. # Convert LDDA to an HDA.
  125. if isinstance(data, LibraryDatasetDatasetAssociation):
  126. data = data.to_history_dataset_association( None )
  127. inp_data[name] = data
  128. else: # HDA
  129. if data.hid:
  130. input_names.append( 'data %s' % data.hid )
  131. input_ext = data.ext
  132. if data.dbkey not in [None, '?']:
  133. input_dbkey = data.dbkey
  134. # Collect chromInfo dataset and add as parameters to incoming
  135. db_datasets = {}
  136. db_dataset = trans.db_dataset_for( input_dbkey )
  137. if db_dataset:
  138. db_datasets[ "chromInfo" ] = db_dataset
  139. incoming[ "chromInfo" ] = db_dataset.file_name
  140. else:
  141. # -- Get chrom_info (len file) from either a custom or built-in build. --
  142. chrom_info = None
  143. if trans.user and ( 'dbkeys' in trans.user.preferences ) and ( input_dbkey in from_json_string( trans.user.preferences[ 'dbkeys' ] ) ):
  144. # Custom build.
  145. custom_build_dict = from_json_string( trans.user.preferences[ 'dbkeys' ] )[ input_dbkey ]
  146. # HACK: the attempt to get chrom_info below will trigger the
  147. # fasta-to-len converter if the dataset is not available or,
  148. # which will in turn create a recursive loop when
  149. # running the fasta-to-len tool. So, use a hack in the second
  150. # condition below to avoid getting chrom_info when running the
  151. # fasta-to-len converter.
  152. if 'fasta' in custom_build_dict and tool.id != 'CONVERTER_fasta_to_len':
  153. # Build is defined by fasta; get len file, which is obtained from converting fasta.
  154. build_fasta_dataset = trans.sa_session.query( trans.app.model.HistoryDatasetAssociation ).get( custom_build_dict[ 'fasta' ] )
  155. chrom_info = build_fasta_dataset.get_converted_dataset( trans, 'len' ).file_name
  156. elif 'len' in custom_build_dict:
  157. # Build is defined by len file, so use it.
  158. chrom_info = trans.sa_session.query( trans.app.model.HistoryDatasetAssociation ).get( custom_build_dict[ 'len' ] ).file_name
  159. if not chrom_info:
  160. # Default to built-in build.
  161. chrom_info = os.path.join( trans.app.config.len_file_path, "%s.len" % input_dbkey )
  162. incoming[ "chromInfo" ] = os.path.abspath( chrom_info )
  163. inp_data.update( db_datasets )
  164. # Determine output dataset permission/roles list
  165. existing_datasets = [ inp for inp in inp_data.values() if inp ]
  166. if existing_datasets:
  167. output_permissions = trans.app.security_agent.guess_derived_permissions_for_datasets( existing_datasets )
  168. else:
  169. # No valid inputs, we will use history defaults
  170. output_permissions = trans.app.security_agent.history_get_default_permissions( history )
  171. # Build name for output datasets based on tool name and input names
  172. on_text = on_text_for_names( input_names )
  173. # Add the dbkey to the incoming parameters
  174. incoming[ "dbkey" ] = input_dbkey
  175. # wrapped params are used by change_format action and by output.label; only perform this wrapping once, as needed
  176. wrapped_params = WrappedParameters( trans, tool, incoming )
  177. # Keep track of parent / child relationships, we'll create all the
  178. # datasets first, then create the associations
  179. parent_to_child_pairs = []
  180. child_dataset_names = set()
  181. object_store_id = None
  182. for name, output in tool.outputs.items():
  183. for filter in output.filters:
  184. try:
  185. if not eval( filter.text.strip(), globals(), incoming ):
  186. break # do not create this dataset
  187. except Exception, e:
  188. log.debug( 'Dataset output filter failed: %s' % e )
  189. else: # all filters passed
  190. if output.parent:
  191. parent_to_child_pairs.append( ( output.parent, name ) )
  192. child_dataset_names.add( name )
  193. ## What is the following hack for? Need to document under what
  194. ## conditions can the following occur? (james@bx.psu.edu)
  195. # HACK: the output data has already been created
  196. # this happens i.e. as a result of the async controller
  197. if name in incoming:
  198. dataid = incoming[name]
  199. data = trans.sa_session.query( trans.app.model.HistoryDatasetAssociation ).get( dataid )
  200. assert data != None
  201. out_data[name] = data
  202. else:
  203. # the type should match the input
  204. ext = output.format
  205. if ext == "input":
  206. ext = input_ext
  207. if output.format_source is not None and output.format_source in inp_data:
  208. try:
  209. input_dataset = inp_data[output.format_source]
  210. input_extension = input_dataset.ext
  211. ext = input_extension
  212. except Exception, e:
  213. pass
  214. #process change_format tags
  215. if output.change_format:
  216. for change_elem in output.change_format:
  217. for when_elem in change_elem.findall( 'when' ):
  218. check = when_elem.get( 'input', None )
  219. if check is not None:
  220. try:
  221. if '$' not in check:
  222. #allow a simple name or more complex specifications
  223. check = '${%s}' % check
  224. if str( fill_template( check, context=wrapped_params.params ) ) == when_elem.get( 'value', None ):
  225. ext = when_elem.get( 'format', ext )
  226. except: # bad tag input value; possibly referencing a param within a different conditional when block or other nonexistent grouping construct
  227. continue
  228. else:
  229. check = when_elem.get( 'input_dataset', None )
  230. if check is not None:
  231. check = inp_data.get( check, None )
  232. if check is not None:
  233. if str( getattr( check, when_elem.get( 'attribute' ) ) ) == when_elem.get( 'value', None ):
  234. ext = when_elem.get( 'format', ext )
  235. data = trans.app.model.HistoryDatasetAssociation( extension=ext, create_dataset=True, sa_session=trans.sa_session )
  236. if output.hidden:
  237. data.visible = False
  238. # Commit the dataset immediately so it gets database assigned unique id
  239. trans.sa_session.add( data )
  240. trans.sa_session.flush()
  241. trans.app.security_agent.set_all_dataset_permissions( data.dataset, output_permissions )
  242. # Create an empty file immediately. The first dataset will be
  243. # created in the "default" store, all others will be created in
  244. # the same store as the first.
  245. data.dataset.object_store_id = object_store_id
  246. try:
  247. trans.app.object_store.create( data.dataset )
  248. except ObjectInvalid:
  249. raise Exception('Unable to create output dataset: object store is full')
  250. object_store_id = data.dataset.object_store_id # these will be the same thing after the first output
  251. # This may not be neccesary with the new parent/child associations
  252. data.designation = name
  253. # Copy metadata from one of the inputs if requested.
  254. if output.metadata_source:
  255. data.init_meta( copy_from=inp_data[output.metadata_source] )
  256. else:
  257. data.init_meta()
  258. # Take dbkey from LAST input
  259. data.dbkey = str(input_dbkey)
  260. # Set state
  261. # FIXME: shouldn't this be NEW until the job runner changes it?
  262. data.state = data.states.QUEUED
  263. data.blurb = "queued"
  264. # Set output label
  265. data.name = self.get_output_name( output, data, tool, on_text, trans, incoming, history, wrapped_params.params, job_params )
  266. # Store output
  267. out_data[ name ] = data
  268. if output.actions:
  269. #Apply pre-job tool-output-dataset actions; e.g. setting metadata, changing format
  270. output_action_params = dict( out_data )
  271. output_action_params.update( incoming )
  272. output.actions.apply_action( data, output_action_params )
  273. # Store all changes to database
  274. trans.sa_session.flush()
  275. # Add all the top-level (non-child) datasets to the history unless otherwise specified
  276. for name in out_data.keys():
  277. if name not in child_dataset_names and name not in incoming: # don't add children; or already existing datasets, i.e. async created
  278. data = out_data[ name ]
  279. if set_output_history:
  280. history.add_dataset( data, set_hid=set_output_hid )
  281. trans.sa_session.add( data )
  282. trans.sa_session.flush()
  283. # Add all the children to their parents
  284. for parent_name, child_name in parent_to_child_pairs:
  285. parent_dataset = out_data[ parent_name ]
  286. child_dataset = out_data[ child_name ]
  287. parent_dataset.children.append( child_dataset )
  288. # Store data after custom code runs
  289. trans.sa_session.flush()
  290. # Create the job object
  291. job = trans.app.model.Job()
  292. galaxy_session = trans.get_galaxy_session()
  293. # If we're submitting from the API, there won't be a session.
  294. if type( galaxy_session ) == trans.model.GalaxySession:
  295. job.session_id = galaxy_session.id
  296. if trans.user is not None:
  297. job.user_id = trans.user.id
  298. job.history_id = history.id
  299. job.tool_id = tool.id
  300. try:
  301. # For backward compatibility, some tools may not have versions yet.
  302. job.tool_version = tool.version
  303. except:
  304. job.tool_version = "1.0.0"
  305. # FIXME: Don't need all of incoming here, just the defined parameters
  306. # from the tool. We need to deal with tools that pass all post
  307. # parameters to the command as a special case.
  308. for name, value in tool.params_to_strings( incoming, trans.app ).iteritems():
  309. job.add_parameter( name, value )
  310. current_user_roles = trans.get_current_user_roles()
  311. for name, dataset in inp_data.iteritems():
  312. if dataset:
  313. if not trans.app.security_agent.can_access_dataset( current_user_roles, dataset.dataset ):
  314. raise "User does not have permission to use a dataset (%s) provided for input." % data.id
  315. job.add_input_dataset( name, dataset )
  316. else:
  317. job.add_input_dataset( name, None )
  318. for name, dataset in out_data.iteritems():
  319. job.add_output_dataset( name, dataset )
  320. job.object_store_id = object_store_id
  321. if job_params:
  322. job.params = to_json_string( job_params )
  323. job.set_handler(tool.get_job_handler(job_params))
  324. trans.sa_session.add( job )
  325. # Now that we have a job id, we can remap any outputs if this is a rerun and the user chose to continue dependent jobs
  326. # This functionality requires tracking jobs in the database.
  327. if trans.app.config.track_jobs_in_database and rerun_remap_job_id is not None:
  328. try:
  329. old_job = trans.sa_session.query( trans.app.model.Job ).get(rerun_remap_job_id)
  330. assert old_job is not None, '(%s/%s): Old job id is invalid' % (rerun_remap_job_id, job.id)
  331. assert old_job.tool_id == job.tool_id, '(%s/%s): Old tool id (%s) does not match rerun tool id (%s)' % (old_job.id, job.id, old_job.tool_id, job.tool_id)
  332. if trans.user is not None:
  333. assert old_job.user_id == trans.user.id, '(%s/%s): Old user id (%s) does not match rerun user id (%s)' % (old_job.id, job.id, old_job.user_id, trans.user.id)
  334. elif trans.user is None and type( galaxy_session ) == trans.model.GalaxySession:
  335. assert old_job.session_id == galaxy_session.id, '(%s/%s): Old session id (%s) does not match rerun session id (%s)' % (old_job.id, job.id, old_job.session_id, galaxy_session.id)
  336. else:
  337. raise Exception('(%s/%s): Remapping via the API is not (yet) supported' % (old_job.id, job.id))
  338. for jtod in old_job.output_datasets:
  339. for (job_to_remap, jtid) in [(jtid.job, jtid) for jtid in jtod.dataset.dependent_jobs]:
  340. if (trans.user is not None and job_to_remap.user_id == trans.user.id) or (trans.user is None and job_to_remap.session_id == galaxy_session.id):
  341. if job_to_remap.state == job_to_remap.states.PAUSED:
  342. job_to_remap.state = job_to_remap.states.NEW
  343. for hda in [ dep_jtod.dataset for dep_jtod in job_to_remap.output_datasets ]:
  344. if hda.state == hda.states.PAUSED:
  345. hda.state = hda.states.NEW
  346. hda.info = None
  347. for p in job_to_remap.parameters:
  348. if p.name == jtid.name and p.value == str(jtod.dataset.id):
  349. p.value = str(out_data[jtod.name].id)
  350. jtid.dataset = out_data[jtod.name]
  351. jtid.dataset.hid = jtod.dataset.hid
  352. log.info('Job %s input HDA %s remapped to new HDA %s' % (job_to_remap.id, jtod.dataset.id, jtid.dataset.id))
  353. trans.sa_session.add(job_to_remap)
  354. trans.sa_session.add(jtid)
  355. jtod.dataset.visible = False
  356. trans.sa_session.add(jtod)
  357. except Exception, e:
  358. log.exception('Cannot remap rerun dependencies.')
  359. trans.sa_session.flush()
  360. # Some tools are not really executable, but jobs are still created for them ( for record keeping ).
  361. # Examples include tools that redirect to other applications ( epigraph ). These special tools must
  362. # include something that can be retrieved from the params ( e.g., REDIRECT_URL ) to keep the job
  363. # from being queued.
  364. if 'REDIRECT_URL' in incoming:
  365. # Get the dataset - there should only be 1
  366. for name in inp_data.keys():
  367. dataset = inp_data[ name ]
  368. redirect_url = tool.parse_redirect_url( dataset, incoming )
  369. # GALAXY_URL should be include in the tool params to enable the external application
  370. # to send back to the current Galaxy instance
  371. GALAXY_URL = incoming.get( 'GALAXY_URL', None )
  372. assert GALAXY_URL is not None, "GALAXY_URL parameter missing in tool config."
  373. redirect_url += "&GALAXY_URL=%s" % GALAXY_URL
  374. # Job should not be queued, so set state to ok
  375. job.state = trans.app.model.Job.states.OK
  376. job.info = "Redirected to: %s" % redirect_url
  377. trans.sa_session.add( job )
  378. trans.sa_session.flush()
  379. trans.response.send_redirect( url_for( controller='tool_runner', action='redirect', redirect_url=redirect_url ) )
  380. else:
  381. # Put the job in the queue if tracking in memory
  382. trans.app.job_queue.put( job.id, job.tool_id )
  383. trans.log_event( "Added job to the job queue, id: %s" % str(job.id), tool_id=job.tool_id )
  384. return job, out_data
  385. def get_output_name( self, output, dataset, tool, on_text, trans, incoming, history, params, job_params ):
  386. if output.label:
  387. params['tool'] = tool
  388. params['on_string'] = on_text
  389. return fill_template( output.label, context=params )
  390. else:
  391. return self._get_default_data_name( dataset, tool, on_text=on_text, trans=trans, incoming=incoming, history=history, params=params, job_params=job_params )
  392. def _get_default_data_name( self, dataset, tool, on_text=None, trans=None, incoming=None, history=None, params=None, job_params=None, **kwd ):
  393. name = tool.name
  394. if on_text:
  395. name += ( " on " + on_text )
  396. return name
  397. def on_text_for_names( input_names ):
  398. # Build name for output datasets based on tool name and input names
  399. if len( input_names ) == 1:
  400. on_text = input_names[0]
  401. elif len( input_names ) == 2:
  402. on_text = '%s and %s' % tuple(input_names[0:2])
  403. elif len( input_names ) == 3:
  404. on_text = '%s, %s, and %s' % tuple(input_names[0:3])
  405. elif len( input_names ) > 3:
  406. on_text = '%s, %s, and others' % tuple(input_names[0:2])
  407. else:
  408. on_text = ""
  409. return on_text