modules.py - This is a factory class that creates instances…

/lib/galaxy/workflow/modules.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 446 lines · 308 code · 87 blank · 51 comment · 59 complexity · 6b177ea5b3494e9200e8bf06c9a571e3 MD5 · raw file

"""
Modules used in building workflows
"""

import logging
import re

from elementtree.ElementTree import Element

import galaxy.tools
from galaxy import web
from galaxy.jobs.actions.post import ActionBox
from galaxy.model import PostJobAction
from galaxy.tools.parameters import check_param, DataToolParameter, DummyDataset, RuntimeValue, visit_input_values
from galaxy.util.bunch import Bunch
from galaxy.util.json import from_json_string, to_json_string

log = logging.getLogger( __name__ )


class WorkflowModule( object ):

    def __init__( self, trans ):
        self.trans = trans

    ## ---- Creating modules from various representations ---------------------

    @classmethod
    def new( Class, trans, tool_id=None ):
        """
        Create a new instance of the module with default state
        """
        return Class( trans )

    @classmethod
    def from_dict( Class, trans, d ):
        """
        Create a new instance of the module initialized from values in the
        dictionary `d`.
        """
        return Class( trans )

    @classmethod
    def from_workflow_step( Class, trans, step ):
        return Class( trans )

    ## ---- Saving in various forms ------------------------------------------

    def save_to_step( self, step ):
        step.type = self.type

    ## ---- General attributes -----------------------------------------------

    def get_type( self ):
        return self.type

    def get_name( self ):
        return self.name

    def get_tool_id( self ):
        return None

    def get_tooltip( self, static_path='' ):
        return None

    ## ---- Configuration time -----------------------------------------------

    def get_state( self ):
        return None

    def get_errors( self ):
        return None

    def get_data_inputs( self ):
        return []

    def get_data_outputs( self ):
        return []

    def update_state( self ):
        pass

    def get_config_form( self ):
        raise TypeError( "Abstract method" )

    def check_and_update_state( self ):
        """
        If the state is not in sync with the current implementation of the
        module, try to update. Returns a list of messages to be displayed
        """
        pass

    ## ---- Run time ---------------------------------------------------------

    def get_runtime_inputs( self ):
        raise TypeError( "Abstract method" )

    def get_runtime_state( self ):
        raise TypeError( "Abstract method" )

    def encode_runtime_state( self, trans, state ):
        raise TypeError( "Abstract method" )

    def decode_runtime_state( self, trans, string ):
        raise TypeError( "Abstract method" )

    def update_runtime_state( self, trans, state, values ):
        raise TypeError( "Abstract method" )

    def execute( self, trans, state ):
        raise TypeError( "Abstract method" )


class InputDataModule( WorkflowModule ):
    type = "data_input"
    name = "Input dataset"

    @classmethod
    def new( Class, trans, tool_id=None ):
        module = Class( trans )
        module.state = dict( name="Input Dataset" )
        return module

    @classmethod
    def from_dict( Class, trans, d, secure=True ):
        module = Class( trans )
        state = from_json_string( d["tool_state"] )
        module.state = dict( name=state.get( "name", "Input Dataset" ) )
        return module

    @classmethod
    def from_workflow_step( Class, trans, step ):
        module = Class( trans )
        module.state = dict( name="Input Dataset" )
        if step.tool_inputs and "name" in step.tool_inputs:
            module.state['name'] = step.tool_inputs[ 'name' ]
        return module

    def save_to_step( self, step ):
        step.type = self.type
        step.tool_id = None
        step.tool_inputs = self.state

    def get_data_inputs( self ):
        return []

    def get_data_outputs( self ):
        return [ dict( name='output', extensions=['input'] ) ]

    def get_config_form( self ):
        form = web.FormBuilder( title=self.name ) \
            .add_text( "name", "Name", value=self.state['name'] )
        return self.trans.fill_template( "workflow/editor_generic_form.mako",
                                         module=self, form=form )

    def get_state( self, secure=True ):
        return to_json_string( self.state )

    def update_state( self, incoming ):
        self.state['name'] = incoming.get( 'name', 'Input Dataset' )

    def get_runtime_inputs( self, filter_set=['data'] ):
        label = self.state.get( "name", "Input Dataset" )
        return dict( input=DataToolParameter( None, Element( "param", name="input", label=label, multiple=True, type="data", format=', '.join(filter_set) ), self.trans ) )

    def get_runtime_state( self ):
        state = galaxy.tools.DefaultToolState()
        state.inputs = dict( input=None )
        return state

    def encode_runtime_state( self, trans, state ):
        fake_tool = Bunch( inputs=self.get_runtime_inputs() )
        return state.encode( fake_tool, trans.app )

    def decode_runtime_state( self, trans, string ):
        fake_tool = Bunch( inputs=self.get_runtime_inputs() )
        state = galaxy.tools.DefaultToolState()
        state.decode( string, fake_tool, trans.app )
        return state

    def update_runtime_state( self, trans, state, values ):
        errors = {}
        for name, param in self.get_runtime_inputs().iteritems():
            value, error = check_param( trans, param, values.get( name, None ), values )
            state.inputs[ name ] = value
            if error:
                errors[ name ] = error
        return errors

    def execute( self, trans, state ):
        return None, dict( output=state.inputs['input'])


class ToolModule( WorkflowModule ):

    type = "tool"

    def __init__( self, trans, tool_id ):
        self.trans = trans
        self.tool_id = tool_id
        self.tool = trans.app.toolbox.get_tool( tool_id )
        self.post_job_actions = {}
        self.workflow_outputs = []
        self.state = None
        self.version_changes = []
        if self.tool:
            self.errors = None
        else:
            self.errors = {}
            self.errors[ tool_id ] = 'Tool unavailable'

    @classmethod
    def new( Class, trans, tool_id=None ):
        module = Class( trans, tool_id )
        module.state = module.tool.new_state( trans, all_pages=True )
        return module

    @classmethod
    def from_dict( Class, trans, d, secure=True ):
        tool_id = d[ 'tool_id' ]
        module = Class( trans, tool_id )
        module.state = galaxy.tools.DefaultToolState()
        if module.tool is not None:
            if d.get('tool_version', 'Unspecified') != module.get_tool_version():
                module.version_changes.append( "%s: using version '%s' instead of version '%s' indicated in this workflow." % ( tool_id, d.get( 'tool_version', 'Unspecified' ), module.get_tool_version() ) )
            module.state.decode( d[ "tool_state" ], module.tool, module.trans.app, secure=secure )
        module.errors = d.get( "tool_errors", None )
        module.post_job_actions = d.get( "post_job_actions", {} )
        module.workflow_outputs = d.get( "workflow_outputs", [] )
        return module

    @classmethod
    def from_workflow_step( Class, trans, step ):
        tool_id = step.tool_id
        if trans.app.toolbox and tool_id not in trans.app.toolbox.tools_by_id:
            # See if we have access to a different version of the tool.
            # TODO: If workflows are ever enhanced to use tool version
            # in addition to tool id, enhance the selection process here
            # to retrieve the correct version of the tool.
            tool = trans.app.toolbox.get_tool( tool_id )
            if tool:
                tool_id = tool.id
        if ( trans.app.toolbox and tool_id in trans.app.toolbox.tools_by_id ):
            if step.config:
                # This step has its state saved in the config field due to the
                # tool being previously unavailable.
                return module_factory.from_dict(trans, from_json_string(step.config), secure=False)
            module = Class( trans, tool_id )
            module.state = galaxy.tools.DefaultToolState()
            if step.tool_version and (step.tool_version != module.tool.version):
                module.version_changes.append("%s: using version '%s' instead of version '%s' indicated in this workflow." % (tool_id, module.tool.version, step.tool_version))
            module.state.inputs = module.tool.params_from_strings( step.tool_inputs, trans.app, ignore_errors=True )
            module.errors = step.tool_errors
            module.workflow_outputs = step.workflow_outputs
            pjadict = {}
            for pja in step.post_job_actions:
                pjadict[pja.action_type] = pja
            module.post_job_actions = pjadict
            return module
        return None

    @classmethod
    def __get_tool_version( cls, trans, tool_id ):
        # Return a ToolVersion if one exists for tool_id.
        return trans.install_model.context.query( trans.install_model.ToolVersion ) \
                               .filter( trans.install_model.ToolVersion.table.c.tool_id == tool_id ) \
                               .first()

    def save_to_step( self, step ):
        step.type = self.type
        step.tool_id = self.tool_id
        if self.tool:
            step.tool_version = self.get_tool_version()
            step.tool_inputs = self.tool.params_to_strings( self.state.inputs, self.trans.app )
        else:
            step.tool_version = None
            step.tool_inputs = None
        step.tool_errors = self.errors
        for k, v in self.post_job_actions.iteritems():
            # Must have action_type, step.  output and a_args are optional.
            if 'output_name' in v:
                output_name = v['output_name']
            else:
                output_name = None
            if 'action_arguments' in v:
                action_arguments = v['action_arguments']
            else:
                action_arguments = None
            self.trans.sa_session.add(PostJobAction(v['action_type'], step, output_name, action_arguments))

    def get_name( self ):
        if self.tool:
            return self.tool.name
        return 'unavailable'

    def get_tool_id( self ):
        return self.tool_id

    def get_tool_version( self ):
        return self.tool.version

    def get_state( self, secure=True ):
        return self.state.encode( self.tool, self.trans.app, secure=secure )

    def get_errors( self ):
        return self.errors

    def get_tooltip( self, static_path='' ):
        if self.tool.help:
            return self.tool.help.render( static_path=static_path )
        else:
            return None

    def get_data_inputs( self ):
        data_inputs = []

        def callback( input, value, prefixed_name, prefixed_label ):
            if isinstance( input, DataToolParameter ):
                data_inputs.append( dict(
                    name=prefixed_name,
                    label=prefixed_label,
                    multiple=input.multiple,
                    extensions=input.extensions ) )

        visit_input_values( self.tool.inputs, self.state.inputs, callback )
        return data_inputs

    def get_data_outputs( self ):
        data_outputs = []
        data_inputs = None
        for name, tool_output in self.tool.outputs.iteritems():
            if tool_output.format_source != None:
                formats = [ 'input' ]  # default to special name "input" which remove restrictions on connections
                if data_inputs == None:
                    data_inputs = self.get_data_inputs()
                # find the input parameter referenced by format_source
                for di in data_inputs:
                    # input names come prefixed with conditional and repeat names separated by '|'
                    # remove prefixes when comparing with format_source
                    if di['name'] != None and di['name'].split('|')[-1] == tool_output.format_source:
                        formats = di['extensions']
            else:
                formats = [ tool_output.format ]
            for change_elem in tool_output.change_format:
                for when_elem in change_elem.findall( 'when' ):
                    format = when_elem.get( 'format', None )
                    if format and format not in formats:
                        formats.append( format )
            data_outputs.append( dict( name=name, extensions=formats ) )
        return data_outputs

    def get_post_job_actions( self ):
        return self.post_job_actions

    def get_config_form( self ):
        self.add_dummy_datasets()
        return self.trans.fill_template( "workflow/editor_tool_form.mako",
            tool=self.tool, values=self.state.inputs, errors=( self.errors or {} ) )

    def update_state( self, incoming ):
        # Build a callback that handles setting an input to be required at
        # runtime. We still process all other parameters the user might have
        # set. We also need to make sure all datasets have a dummy value
        # for dependencies to see

        self.post_job_actions = ActionBox.handle_incoming(incoming)

        make_runtime_key = incoming.get( 'make_runtime', None )
        make_buildtime_key = incoming.get( 'make_buildtime', None )

        def item_callback( trans, key, input, value, error, old_value, context ):
            # Dummy value for Data parameters
            if isinstance( input, DataToolParameter ):
                return DummyDataset(), None
            # Deal with build/runtime (does not apply to Data parameters)
            if key == make_buildtime_key:
                return input.get_initial_value( trans, context ), None
            elif isinstance( old_value, RuntimeValue ):
                return old_value, None
            elif key == make_runtime_key:
                return RuntimeValue(), None
            elif isinstance(value, basestring) and re.search("\$\{.+?\}", str(value)):
                # Workflow Parameter Replacement, so suppress error from going to the workflow level.
                return value, None
            else:
                return value, error

        # Update state using incoming values
        errors = self.tool.update_state( self.trans, self.tool.inputs, self.state.inputs, incoming, item_callback=item_callback )
        self.errors = errors or None

    def check_and_update_state( self ):
        return self.tool.check_and_update_param_values( self.state.inputs, self.trans, allow_workflow_parameters=True )

    def add_dummy_datasets( self, connections=None):
        if connections:
            # Store onnections by input name
            input_connections_by_name = \
                dict( ( conn.input_name, conn ) for conn in connections )
        else:
            input_connections_by_name = {}
        # Any connected input needs to have value DummyDataset (these
        # are not persisted so we need to do it every time)

        def callback( input, value, prefixed_name, prefixed_label ):
            replacement = None
            if isinstance( input, DataToolParameter ):
                if connections is None or prefixed_name in input_connections_by_name:
                    if input.multiple:
                        replacement = [] if not connections else [DummyDataset() for conn in connections]
                    else:
                        replacement = DummyDataset()
            return replacement

        visit_input_values( self.tool.inputs, self.state.inputs, callback )


class WorkflowModuleFactory( object ):

    def __init__( self, module_types ):
        self.module_types = module_types

    def new( self, trans, type, tool_id=None ):
        """
        Return module for type and (optional) tool_id intialized with
        new / default state.
        """
        assert type in self.module_types
        return self.module_types[type].new( trans, tool_id )

    def from_dict( self, trans, d, **kwargs ):
        """
        Return module initialized from the data in dictionary `d`.
        """
        type = d['type']
        assert type in self.module_types
        return self.module_types[type].from_dict( trans, d, **kwargs )

    def from_workflow_step( self, trans, step ):
        """
        Return module initializd from the WorkflowStep object `step`.
        """
        type = step.type
        return self.module_types[type].from_workflow_step( trans, step )

module_factory = WorkflowModuleFactory( dict( data_input=InputDataModule, tool=ToolModule ) )
Summary ✨

This is a factory class that creates instances of different types of workflow modules based on the type of module specified. It has two methods: new and from_dict. The new method takes a trans object and a type string as input, and returns an instance of the corresponding module with default state. The from_dict method takes a trans object, a dictionary containing the data for the module, and optional keyword arguments, and returns an instance of the corresponding module initialized from the data in the dictionary.
Tech Fingerprint

Alerts (64)

'def' Ensure functions have docstrings for documentation
44 49 54 57 60 63 68 71 74 77 80 83 95 98 101 104 107 110 119 125 132 139 144 147 150 156 159 162 166 171 175 181 190 213 219 233 269 291 296 299 302 305 308 314 317 328 352 355 360 371 392 395 405
'isinstance(' Overuse may indicate design issues; consider polymorphism
318 373 378 382 407
'== None' Use 'is' for None comparisons (e.g., x is None)
334
Complexity hotspot; lines 407 to 411 (total complexity: 8)
407 408 409 410 411