/lib/galaxy/workflow/modules.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 446 lines · 308 code · 87 blank · 51 comment · 59 complexity · 6b177ea5b3494e9200e8bf06c9a571e3 MD5 · raw file

  1. """
  2. Modules used in building workflows
  3. """
  4. import logging
  5. import re
  6. from elementtree.ElementTree import Element
  7. import galaxy.tools
  8. from galaxy import web
  9. from galaxy.jobs.actions.post import ActionBox
  10. from galaxy.model import PostJobAction
  11. from galaxy.tools.parameters import check_param, DataToolParameter, DummyDataset, RuntimeValue, visit_input_values
  12. from galaxy.util.bunch import Bunch
  13. from galaxy.util.json import from_json_string, to_json_string
  14. log = logging.getLogger( __name__ )
  15. class WorkflowModule( object ):
  16. def __init__( self, trans ):
  17. self.trans = trans
  18. ## ---- Creating modules from various representations ---------------------
  19. @classmethod
  20. def new( Class, trans, tool_id=None ):
  21. """
  22. Create a new instance of the module with default state
  23. """
  24. return Class( trans )
  25. @classmethod
  26. def from_dict( Class, trans, d ):
  27. """
  28. Create a new instance of the module initialized from values in the
  29. dictionary `d`.
  30. """
  31. return Class( trans )
  32. @classmethod
  33. def from_workflow_step( Class, trans, step ):
  34. return Class( trans )
  35. ## ---- Saving in various forms ------------------------------------------
  36. def save_to_step( self, step ):
  37. step.type = self.type
  38. ## ---- General attributes -----------------------------------------------
  39. def get_type( self ):
  40. return self.type
  41. def get_name( self ):
  42. return self.name
  43. def get_tool_id( self ):
  44. return None
  45. def get_tooltip( self, static_path='' ):
  46. return None
  47. ## ---- Configuration time -----------------------------------------------
  48. def get_state( self ):
  49. return None
  50. def get_errors( self ):
  51. return None
  52. def get_data_inputs( self ):
  53. return []
  54. def get_data_outputs( self ):
  55. return []
  56. def update_state( self ):
  57. pass
  58. def get_config_form( self ):
  59. raise TypeError( "Abstract method" )
  60. def check_and_update_state( self ):
  61. """
  62. If the state is not in sync with the current implementation of the
  63. module, try to update. Returns a list of messages to be displayed
  64. """
  65. pass
  66. ## ---- Run time ---------------------------------------------------------
  67. def get_runtime_inputs( self ):
  68. raise TypeError( "Abstract method" )
  69. def get_runtime_state( self ):
  70. raise TypeError( "Abstract method" )
  71. def encode_runtime_state( self, trans, state ):
  72. raise TypeError( "Abstract method" )
  73. def decode_runtime_state( self, trans, string ):
  74. raise TypeError( "Abstract method" )
  75. def update_runtime_state( self, trans, state, values ):
  76. raise TypeError( "Abstract method" )
  77. def execute( self, trans, state ):
  78. raise TypeError( "Abstract method" )
  79. class InputDataModule( WorkflowModule ):
  80. type = "data_input"
  81. name = "Input dataset"
  82. @classmethod
  83. def new( Class, trans, tool_id=None ):
  84. module = Class( trans )
  85. module.state = dict( name="Input Dataset" )
  86. return module
  87. @classmethod
  88. def from_dict( Class, trans, d, secure=True ):
  89. module = Class( trans )
  90. state = from_json_string( d["tool_state"] )
  91. module.state = dict( name=state.get( "name", "Input Dataset" ) )
  92. return module
  93. @classmethod
  94. def from_workflow_step( Class, trans, step ):
  95. module = Class( trans )
  96. module.state = dict( name="Input Dataset" )
  97. if step.tool_inputs and "name" in step.tool_inputs:
  98. module.state['name'] = step.tool_inputs[ 'name' ]
  99. return module
  100. def save_to_step( self, step ):
  101. step.type = self.type
  102. step.tool_id = None
  103. step.tool_inputs = self.state
  104. def get_data_inputs( self ):
  105. return []
  106. def get_data_outputs( self ):
  107. return [ dict( name='output', extensions=['input'] ) ]
  108. def get_config_form( self ):
  109. form = web.FormBuilder( title=self.name ) \
  110. .add_text( "name", "Name", value=self.state['name'] )
  111. return self.trans.fill_template( "workflow/editor_generic_form.mako",
  112. module=self, form=form )
  113. def get_state( self, secure=True ):
  114. return to_json_string( self.state )
  115. def update_state( self, incoming ):
  116. self.state['name'] = incoming.get( 'name', 'Input Dataset' )
  117. def get_runtime_inputs( self, filter_set=['data'] ):
  118. label = self.state.get( "name", "Input Dataset" )
  119. return dict( input=DataToolParameter( None, Element( "param", name="input", label=label, multiple=True, type="data", format=', '.join(filter_set) ), self.trans ) )
  120. def get_runtime_state( self ):
  121. state = galaxy.tools.DefaultToolState()
  122. state.inputs = dict( input=None )
  123. return state
  124. def encode_runtime_state( self, trans, state ):
  125. fake_tool = Bunch( inputs=self.get_runtime_inputs() )
  126. return state.encode( fake_tool, trans.app )
  127. def decode_runtime_state( self, trans, string ):
  128. fake_tool = Bunch( inputs=self.get_runtime_inputs() )
  129. state = galaxy.tools.DefaultToolState()
  130. state.decode( string, fake_tool, trans.app )
  131. return state
  132. def update_runtime_state( self, trans, state, values ):
  133. errors = {}
  134. for name, param in self.get_runtime_inputs().iteritems():
  135. value, error = check_param( trans, param, values.get( name, None ), values )
  136. state.inputs[ name ] = value
  137. if error:
  138. errors[ name ] = error
  139. return errors
  140. def execute( self, trans, state ):
  141. return None, dict( output=state.inputs['input'])
  142. class ToolModule( WorkflowModule ):
  143. type = "tool"
  144. def __init__( self, trans, tool_id ):
  145. self.trans = trans
  146. self.tool_id = tool_id
  147. self.tool = trans.app.toolbox.get_tool( tool_id )
  148. self.post_job_actions = {}
  149. self.workflow_outputs = []
  150. self.state = None
  151. self.version_changes = []
  152. if self.tool:
  153. self.errors = None
  154. else:
  155. self.errors = {}
  156. self.errors[ tool_id ] = 'Tool unavailable'
  157. @classmethod
  158. def new( Class, trans, tool_id=None ):
  159. module = Class( trans, tool_id )
  160. module.state = module.tool.new_state( trans, all_pages=True )
  161. return module
  162. @classmethod
  163. def from_dict( Class, trans, d, secure=True ):
  164. tool_id = d[ 'tool_id' ]
  165. module = Class( trans, tool_id )
  166. module.state = galaxy.tools.DefaultToolState()
  167. if module.tool is not None:
  168. if d.get('tool_version', 'Unspecified') != module.get_tool_version():
  169. module.version_changes.append( "%s: using version '%s' instead of version '%s' indicated in this workflow." % ( tool_id, d.get( 'tool_version', 'Unspecified' ), module.get_tool_version() ) )
  170. module.state.decode( d[ "tool_state" ], module.tool, module.trans.app, secure=secure )
  171. module.errors = d.get( "tool_errors", None )
  172. module.post_job_actions = d.get( "post_job_actions", {} )
  173. module.workflow_outputs = d.get( "workflow_outputs", [] )
  174. return module
  175. @classmethod
  176. def from_workflow_step( Class, trans, step ):
  177. tool_id = step.tool_id
  178. if trans.app.toolbox and tool_id not in trans.app.toolbox.tools_by_id:
  179. # See if we have access to a different version of the tool.
  180. # TODO: If workflows are ever enhanced to use tool version
  181. # in addition to tool id, enhance the selection process here
  182. # to retrieve the correct version of the tool.
  183. tool = trans.app.toolbox.get_tool( tool_id )
  184. if tool:
  185. tool_id = tool.id
  186. if ( trans.app.toolbox and tool_id in trans.app.toolbox.tools_by_id ):
  187. if step.config:
  188. # This step has its state saved in the config field due to the
  189. # tool being previously unavailable.
  190. return module_factory.from_dict(trans, from_json_string(step.config), secure=False)
  191. module = Class( trans, tool_id )
  192. module.state = galaxy.tools.DefaultToolState()
  193. if step.tool_version and (step.tool_version != module.tool.version):
  194. module.version_changes.append("%s: using version '%s' instead of version '%s' indicated in this workflow." % (tool_id, module.tool.version, step.tool_version))
  195. module.state.inputs = module.tool.params_from_strings( step.tool_inputs, trans.app, ignore_errors=True )
  196. module.errors = step.tool_errors
  197. module.workflow_outputs = step.workflow_outputs
  198. pjadict = {}
  199. for pja in step.post_job_actions:
  200. pjadict[pja.action_type] = pja
  201. module.post_job_actions = pjadict
  202. return module
  203. return None
  204. @classmethod
  205. def __get_tool_version( cls, trans, tool_id ):
  206. # Return a ToolVersion if one exists for tool_id.
  207. return trans.install_model.context.query( trans.install_model.ToolVersion ) \
  208. .filter( trans.install_model.ToolVersion.table.c.tool_id == tool_id ) \
  209. .first()
  210. def save_to_step( self, step ):
  211. step.type = self.type
  212. step.tool_id = self.tool_id
  213. if self.tool:
  214. step.tool_version = self.get_tool_version()
  215. step.tool_inputs = self.tool.params_to_strings( self.state.inputs, self.trans.app )
  216. else:
  217. step.tool_version = None
  218. step.tool_inputs = None
  219. step.tool_errors = self.errors
  220. for k, v in self.post_job_actions.iteritems():
  221. # Must have action_type, step. output and a_args are optional.
  222. if 'output_name' in v:
  223. output_name = v['output_name']
  224. else:
  225. output_name = None
  226. if 'action_arguments' in v:
  227. action_arguments = v['action_arguments']
  228. else:
  229. action_arguments = None
  230. self.trans.sa_session.add(PostJobAction(v['action_type'], step, output_name, action_arguments))
  231. def get_name( self ):
  232. if self.tool:
  233. return self.tool.name
  234. return 'unavailable'
  235. def get_tool_id( self ):
  236. return self.tool_id
  237. def get_tool_version( self ):
  238. return self.tool.version
  239. def get_state( self, secure=True ):
  240. return self.state.encode( self.tool, self.trans.app, secure=secure )
  241. def get_errors( self ):
  242. return self.errors
  243. def get_tooltip( self, static_path='' ):
  244. if self.tool.help:
  245. return self.tool.help.render( static_path=static_path )
  246. else:
  247. return None
  248. def get_data_inputs( self ):
  249. data_inputs = []
  250. def callback( input, value, prefixed_name, prefixed_label ):
  251. if isinstance( input, DataToolParameter ):
  252. data_inputs.append( dict(
  253. name=prefixed_name,
  254. label=prefixed_label,
  255. multiple=input.multiple,
  256. extensions=input.extensions ) )
  257. visit_input_values( self.tool.inputs, self.state.inputs, callback )
  258. return data_inputs
  259. def get_data_outputs( self ):
  260. data_outputs = []
  261. data_inputs = None
  262. for name, tool_output in self.tool.outputs.iteritems():
  263. if tool_output.format_source != None:
  264. formats = [ 'input' ] # default to special name "input" which remove restrictions on connections
  265. if data_inputs == None:
  266. data_inputs = self.get_data_inputs()
  267. # find the input parameter referenced by format_source
  268. for di in data_inputs:
  269. # input names come prefixed with conditional and repeat names separated by '|'
  270. # remove prefixes when comparing with format_source
  271. if di['name'] != None and di['name'].split('|')[-1] == tool_output.format_source:
  272. formats = di['extensions']
  273. else:
  274. formats = [ tool_output.format ]
  275. for change_elem in tool_output.change_format:
  276. for when_elem in change_elem.findall( 'when' ):
  277. format = when_elem.get( 'format', None )
  278. if format and format not in formats:
  279. formats.append( format )
  280. data_outputs.append( dict( name=name, extensions=formats ) )
  281. return data_outputs
  282. def get_post_job_actions( self ):
  283. return self.post_job_actions
  284. def get_config_form( self ):
  285. self.add_dummy_datasets()
  286. return self.trans.fill_template( "workflow/editor_tool_form.mako",
  287. tool=self.tool, values=self.state.inputs, errors=( self.errors or {} ) )
  288. def update_state( self, incoming ):
  289. # Build a callback that handles setting an input to be required at
  290. # runtime. We still process all other parameters the user might have
  291. # set. We also need to make sure all datasets have a dummy value
  292. # for dependencies to see
  293. self.post_job_actions = ActionBox.handle_incoming(incoming)
  294. make_runtime_key = incoming.get( 'make_runtime', None )
  295. make_buildtime_key = incoming.get( 'make_buildtime', None )
  296. def item_callback( trans, key, input, value, error, old_value, context ):
  297. # Dummy value for Data parameters
  298. if isinstance( input, DataToolParameter ):
  299. return DummyDataset(), None
  300. # Deal with build/runtime (does not apply to Data parameters)
  301. if key == make_buildtime_key:
  302. return input.get_initial_value( trans, context ), None
  303. elif isinstance( old_value, RuntimeValue ):
  304. return old_value, None
  305. elif key == make_runtime_key:
  306. return RuntimeValue(), None
  307. elif isinstance(value, basestring) and re.search("\$\{.+?\}", str(value)):
  308. # Workflow Parameter Replacement, so suppress error from going to the workflow level.
  309. return value, None
  310. else:
  311. return value, error
  312. # Update state using incoming values
  313. errors = self.tool.update_state( self.trans, self.tool.inputs, self.state.inputs, incoming, item_callback=item_callback )
  314. self.errors = errors or None
  315. def check_and_update_state( self ):
  316. return self.tool.check_and_update_param_values( self.state.inputs, self.trans, allow_workflow_parameters=True )
  317. def add_dummy_datasets( self, connections=None):
  318. if connections:
  319. # Store onnections by input name
  320. input_connections_by_name = \
  321. dict( ( conn.input_name, conn ) for conn in connections )
  322. else:
  323. input_connections_by_name = {}
  324. # Any connected input needs to have value DummyDataset (these
  325. # are not persisted so we need to do it every time)
  326. def callback( input, value, prefixed_name, prefixed_label ):
  327. replacement = None
  328. if isinstance( input, DataToolParameter ):
  329. if connections is None or prefixed_name in input_connections_by_name:
  330. if input.multiple:
  331. replacement = [] if not connections else [DummyDataset() for conn in connections]
  332. else:
  333. replacement = DummyDataset()
  334. return replacement
  335. visit_input_values( self.tool.inputs, self.state.inputs, callback )
  336. class WorkflowModuleFactory( object ):
  337. def __init__( self, module_types ):
  338. self.module_types = module_types
  339. def new( self, trans, type, tool_id=None ):
  340. """
  341. Return module for type and (optional) tool_id intialized with
  342. new / default state.
  343. """
  344. assert type in self.module_types
  345. return self.module_types[type].new( trans, tool_id )
  346. def from_dict( self, trans, d, **kwargs ):
  347. """
  348. Return module initialized from the data in dictionary `d`.
  349. """
  350. type = d['type']
  351. assert type in self.module_types
  352. return self.module_types[type].from_dict( trans, d, **kwargs )
  353. def from_workflow_step( self, trans, step ):
  354. """
  355. Return module initializd from the WorkflowStep object `step`.
  356. """
  357. type = step.type
  358. return self.module_types[type].from_workflow_step( trans, step )
  359. module_factory = WorkflowModuleFactory( dict( data_input=InputDataModule, tool=ToolModule ) )