PageRenderTime 66ms CodeModel.GetById 33ms app.highlight 27ms RepoModel.GetById 1ms app.codeStats 1ms

/lib/galaxy/workflow/modules.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 446 lines | 415 code | 18 blank | 13 comment | 0 complexity | 6b177ea5b3494e9200e8bf06c9a571e3 MD5 | raw file
  1"""
  2Modules used in building workflows
  3"""
  4
  5import logging
  6import re
  7
  8from elementtree.ElementTree import Element
  9
 10import galaxy.tools
 11from galaxy import web
 12from galaxy.jobs.actions.post import ActionBox
 13from galaxy.model import PostJobAction
 14from galaxy.tools.parameters import check_param, DataToolParameter, DummyDataset, RuntimeValue, visit_input_values
 15from galaxy.util.bunch import Bunch
 16from galaxy.util.json import from_json_string, to_json_string
 17
 18log = logging.getLogger( __name__ )
 19
 20
 21class WorkflowModule( object ):
 22
 23    def __init__( self, trans ):
 24        self.trans = trans
 25
 26    ## ---- Creating modules from various representations ---------------------
 27
 28    @classmethod
 29    def new( Class, trans, tool_id=None ):
 30        """
 31        Create a new instance of the module with default state
 32        """
 33        return Class( trans )
 34
 35    @classmethod
 36    def from_dict( Class, trans, d ):
 37        """
 38        Create a new instance of the module initialized from values in the
 39        dictionary `d`.
 40        """
 41        return Class( trans )
 42
 43    @classmethod
 44    def from_workflow_step( Class, trans, step ):
 45        return Class( trans )
 46
 47    ## ---- Saving in various forms ------------------------------------------
 48
 49    def save_to_step( self, step ):
 50        step.type = self.type
 51
 52    ## ---- General attributes -----------------------------------------------
 53
 54    def get_type( self ):
 55        return self.type
 56
 57    def get_name( self ):
 58        return self.name
 59
 60    def get_tool_id( self ):
 61        return None
 62
 63    def get_tooltip( self, static_path='' ):
 64        return None
 65
 66    ## ---- Configuration time -----------------------------------------------
 67
 68    def get_state( self ):
 69        return None
 70
 71    def get_errors( self ):
 72        return None
 73
 74    def get_data_inputs( self ):
 75        return []
 76
 77    def get_data_outputs( self ):
 78        return []
 79
 80    def update_state( self ):
 81        pass
 82
 83    def get_config_form( self ):
 84        raise TypeError( "Abstract method" )
 85
 86    def check_and_update_state( self ):
 87        """
 88        If the state is not in sync with the current implementation of the
 89        module, try to update. Returns a list of messages to be displayed
 90        """
 91        pass
 92
 93    ## ---- Run time ---------------------------------------------------------
 94
 95    def get_runtime_inputs( self ):
 96        raise TypeError( "Abstract method" )
 97
 98    def get_runtime_state( self ):
 99        raise TypeError( "Abstract method" )
100
101    def encode_runtime_state( self, trans, state ):
102        raise TypeError( "Abstract method" )
103
104    def decode_runtime_state( self, trans, string ):
105        raise TypeError( "Abstract method" )
106
107    def update_runtime_state( self, trans, state, values ):
108        raise TypeError( "Abstract method" )
109
110    def execute( self, trans, state ):
111        raise TypeError( "Abstract method" )
112
113
114class InputDataModule( WorkflowModule ):
115    type = "data_input"
116    name = "Input dataset"
117
118    @classmethod
119    def new( Class, trans, tool_id=None ):
120        module = Class( trans )
121        module.state = dict( name="Input Dataset" )
122        return module
123
124    @classmethod
125    def from_dict( Class, trans, d, secure=True ):
126        module = Class( trans )
127        state = from_json_string( d["tool_state"] )
128        module.state = dict( name=state.get( "name", "Input Dataset" ) )
129        return module
130
131    @classmethod
132    def from_workflow_step( Class, trans, step ):
133        module = Class( trans )
134        module.state = dict( name="Input Dataset" )
135        if step.tool_inputs and "name" in step.tool_inputs:
136            module.state['name'] = step.tool_inputs[ 'name' ]
137        return module
138
139    def save_to_step( self, step ):
140        step.type = self.type
141        step.tool_id = None
142        step.tool_inputs = self.state
143
144    def get_data_inputs( self ):
145        return []
146
147    def get_data_outputs( self ):
148        return [ dict( name='output', extensions=['input'] ) ]
149
150    def get_config_form( self ):
151        form = web.FormBuilder( title=self.name ) \
152            .add_text( "name", "Name", value=self.state['name'] )
153        return self.trans.fill_template( "workflow/editor_generic_form.mako",
154                                         module=self, form=form )
155
156    def get_state( self, secure=True ):
157        return to_json_string( self.state )
158
159    def update_state( self, incoming ):
160        self.state['name'] = incoming.get( 'name', 'Input Dataset' )
161
162    def get_runtime_inputs( self, filter_set=['data'] ):
163        label = self.state.get( "name", "Input Dataset" )
164        return dict( input=DataToolParameter( None, Element( "param", name="input", label=label, multiple=True, type="data", format=', '.join(filter_set) ), self.trans ) )
165
166    def get_runtime_state( self ):
167        state = galaxy.tools.DefaultToolState()
168        state.inputs = dict( input=None )
169        return state
170
171    def encode_runtime_state( self, trans, state ):
172        fake_tool = Bunch( inputs=self.get_runtime_inputs() )
173        return state.encode( fake_tool, trans.app )
174
175    def decode_runtime_state( self, trans, string ):
176        fake_tool = Bunch( inputs=self.get_runtime_inputs() )
177        state = galaxy.tools.DefaultToolState()
178        state.decode( string, fake_tool, trans.app )
179        return state
180
181    def update_runtime_state( self, trans, state, values ):
182        errors = {}
183        for name, param in self.get_runtime_inputs().iteritems():
184            value, error = check_param( trans, param, values.get( name, None ), values )
185            state.inputs[ name ] = value
186            if error:
187                errors[ name ] = error
188        return errors
189
190    def execute( self, trans, state ):
191        return None, dict( output=state.inputs['input'])
192
193
194class ToolModule( WorkflowModule ):
195
196    type = "tool"
197
198    def __init__( self, trans, tool_id ):
199        self.trans = trans
200        self.tool_id = tool_id
201        self.tool = trans.app.toolbox.get_tool( tool_id )
202        self.post_job_actions = {}
203        self.workflow_outputs = []
204        self.state = None
205        self.version_changes = []
206        if self.tool:
207            self.errors = None
208        else:
209            self.errors = {}
210            self.errors[ tool_id ] = 'Tool unavailable'
211
212    @classmethod
213    def new( Class, trans, tool_id=None ):
214        module = Class( trans, tool_id )
215        module.state = module.tool.new_state( trans, all_pages=True )
216        return module
217
218    @classmethod
219    def from_dict( Class, trans, d, secure=True ):
220        tool_id = d[ 'tool_id' ]
221        module = Class( trans, tool_id )
222        module.state = galaxy.tools.DefaultToolState()
223        if module.tool is not None:
224            if d.get('tool_version', 'Unspecified') != module.get_tool_version():
225                module.version_changes.append( "%s: using version '%s' instead of version '%s' indicated in this workflow." % ( tool_id, d.get( 'tool_version', 'Unspecified' ), module.get_tool_version() ) )
226            module.state.decode( d[ "tool_state" ], module.tool, module.trans.app, secure=secure )
227        module.errors = d.get( "tool_errors", None )
228        module.post_job_actions = d.get( "post_job_actions", {} )
229        module.workflow_outputs = d.get( "workflow_outputs", [] )
230        return module
231
232    @classmethod
233    def from_workflow_step( Class, trans, step ):
234        tool_id = step.tool_id
235        if trans.app.toolbox and tool_id not in trans.app.toolbox.tools_by_id:
236            # See if we have access to a different version of the tool.
237            # TODO: If workflows are ever enhanced to use tool version
238            # in addition to tool id, enhance the selection process here
239            # to retrieve the correct version of the tool.
240            tool = trans.app.toolbox.get_tool( tool_id )
241            if tool:
242                tool_id = tool.id
243        if ( trans.app.toolbox and tool_id in trans.app.toolbox.tools_by_id ):
244            if step.config:
245                # This step has its state saved in the config field due to the
246                # tool being previously unavailable.
247                return module_factory.from_dict(trans, from_json_string(step.config), secure=False)
248            module = Class( trans, tool_id )
249            module.state = galaxy.tools.DefaultToolState()
250            if step.tool_version and (step.tool_version != module.tool.version):
251                module.version_changes.append("%s: using version '%s' instead of version '%s' indicated in this workflow." % (tool_id, module.tool.version, step.tool_version))
252            module.state.inputs = module.tool.params_from_strings( step.tool_inputs, trans.app, ignore_errors=True )
253            module.errors = step.tool_errors
254            module.workflow_outputs = step.workflow_outputs
255            pjadict = {}
256            for pja in step.post_job_actions:
257                pjadict[pja.action_type] = pja
258            module.post_job_actions = pjadict
259            return module
260        return None
261
262    @classmethod
263    def __get_tool_version( cls, trans, tool_id ):
264        # Return a ToolVersion if one exists for tool_id.
265        return trans.install_model.context.query( trans.install_model.ToolVersion ) \
266                               .filter( trans.install_model.ToolVersion.table.c.tool_id == tool_id ) \
267                               .first()
268
269    def save_to_step( self, step ):
270        step.type = self.type
271        step.tool_id = self.tool_id
272        if self.tool:
273            step.tool_version = self.get_tool_version()
274            step.tool_inputs = self.tool.params_to_strings( self.state.inputs, self.trans.app )
275        else:
276            step.tool_version = None
277            step.tool_inputs = None
278        step.tool_errors = self.errors
279        for k, v in self.post_job_actions.iteritems():
280            # Must have action_type, step.  output and a_args are optional.
281            if 'output_name' in v:
282                output_name = v['output_name']
283            else:
284                output_name = None
285            if 'action_arguments' in v:
286                action_arguments = v['action_arguments']
287            else:
288                action_arguments = None
289            self.trans.sa_session.add(PostJobAction(v['action_type'], step, output_name, action_arguments))
290
291    def get_name( self ):
292        if self.tool:
293            return self.tool.name
294        return 'unavailable'
295
296    def get_tool_id( self ):
297        return self.tool_id
298
299    def get_tool_version( self ):
300        return self.tool.version
301
302    def get_state( self, secure=True ):
303        return self.state.encode( self.tool, self.trans.app, secure=secure )
304
305    def get_errors( self ):
306        return self.errors
307
308    def get_tooltip( self, static_path='' ):
309        if self.tool.help:
310            return self.tool.help.render( static_path=static_path )
311        else:
312            return None
313
314    def get_data_inputs( self ):
315        data_inputs = []
316
317        def callback( input, value, prefixed_name, prefixed_label ):
318            if isinstance( input, DataToolParameter ):
319                data_inputs.append( dict(
320                    name=prefixed_name,
321                    label=prefixed_label,
322                    multiple=input.multiple,
323                    extensions=input.extensions ) )
324
325        visit_input_values( self.tool.inputs, self.state.inputs, callback )
326        return data_inputs
327
328    def get_data_outputs( self ):
329        data_outputs = []
330        data_inputs = None
331        for name, tool_output in self.tool.outputs.iteritems():
332            if tool_output.format_source != None:
333                formats = [ 'input' ]  # default to special name "input" which remove restrictions on connections
334                if data_inputs == None:
335                    data_inputs = self.get_data_inputs()
336                # find the input parameter referenced by format_source
337                for di in data_inputs:
338                    # input names come prefixed with conditional and repeat names separated by '|'
339                    # remove prefixes when comparing with format_source
340                    if di['name'] != None and di['name'].split('|')[-1] == tool_output.format_source:
341                        formats = di['extensions']
342            else:
343                formats = [ tool_output.format ]
344            for change_elem in tool_output.change_format:
345                for when_elem in change_elem.findall( 'when' ):
346                    format = when_elem.get( 'format', None )
347                    if format and format not in formats:
348                        formats.append( format )
349            data_outputs.append( dict( name=name, extensions=formats ) )
350        return data_outputs
351
352    def get_post_job_actions( self ):
353        return self.post_job_actions
354
355    def get_config_form( self ):
356        self.add_dummy_datasets()
357        return self.trans.fill_template( "workflow/editor_tool_form.mako",
358            tool=self.tool, values=self.state.inputs, errors=( self.errors or {} ) )
359
360    def update_state( self, incoming ):
361        # Build a callback that handles setting an input to be required at
362        # runtime. We still process all other parameters the user might have
363        # set. We also need to make sure all datasets have a dummy value
364        # for dependencies to see
365
366        self.post_job_actions = ActionBox.handle_incoming(incoming)
367
368        make_runtime_key = incoming.get( 'make_runtime', None )
369        make_buildtime_key = incoming.get( 'make_buildtime', None )
370
371        def item_callback( trans, key, input, value, error, old_value, context ):
372            # Dummy value for Data parameters
373            if isinstance( input, DataToolParameter ):
374                return DummyDataset(), None
375            # Deal with build/runtime (does not apply to Data parameters)
376            if key == make_buildtime_key:
377                return input.get_initial_value( trans, context ), None
378            elif isinstance( old_value, RuntimeValue ):
379                return old_value, None
380            elif key == make_runtime_key:
381                return RuntimeValue(), None
382            elif isinstance(value, basestring) and re.search("\$\{.+?\}", str(value)):
383                # Workflow Parameter Replacement, so suppress error from going to the workflow level.
384                return value, None
385            else:
386                return value, error
387
388        # Update state using incoming values
389        errors = self.tool.update_state( self.trans, self.tool.inputs, self.state.inputs, incoming, item_callback=item_callback )
390        self.errors = errors or None
391
392    def check_and_update_state( self ):
393        return self.tool.check_and_update_param_values( self.state.inputs, self.trans, allow_workflow_parameters=True )
394
395    def add_dummy_datasets( self, connections=None):
396        if connections:
397            # Store onnections by input name
398            input_connections_by_name = \
399                dict( ( conn.input_name, conn ) for conn in connections )
400        else:
401            input_connections_by_name = {}
402        # Any connected input needs to have value DummyDataset (these
403        # are not persisted so we need to do it every time)
404
405        def callback( input, value, prefixed_name, prefixed_label ):
406            replacement = None
407            if isinstance( input, DataToolParameter ):
408                if connections is None or prefixed_name in input_connections_by_name:
409                    if input.multiple:
410                        replacement = [] if not connections else [DummyDataset() for conn in connections]
411                    else:
412                        replacement = DummyDataset()
413            return replacement
414
415        visit_input_values( self.tool.inputs, self.state.inputs, callback )
416
417
418class WorkflowModuleFactory( object ):
419
420    def __init__( self, module_types ):
421        self.module_types = module_types
422
423    def new( self, trans, type, tool_id=None ):
424        """
425        Return module for type and (optional) tool_id intialized with
426        new / default state.
427        """
428        assert type in self.module_types
429        return self.module_types[type].new( trans, tool_id )
430
431    def from_dict( self, trans, d, **kwargs ):
432        """
433        Return module initialized from the data in dictionary `d`.
434        """
435        type = d['type']
436        assert type in self.module_types
437        return self.module_types[type].from_dict( trans, d, **kwargs )
438
439    def from_workflow_step( self, trans, step ):
440        """
441        Return module initializd from the WorkflowStep object `step`.
442        """
443        type = step.type
444        return self.module_types[type].from_workflow_step( trans, step )
445
446module_factory = WorkflowModuleFactory( dict( data_input=InputDataModule, tool=ToolModule ) )