operation_filter.py - This is a custom post-processing scri…

/tools/new_operations/operation_filter.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 99 lines · 68 code · 14 blank · 17 comment · 22 complexity · a6dfa24a9ebcbeed24614fda1ec831ff MD5 · raw file


# runs after the job (and after the default post-filter)
import os
from galaxy import eggs
from galaxy import jobs
from galaxy.tools.parameters import DataToolParameter

from galaxy.jobs.handler import JOB_ERROR

# Older py compatibility
try:
    set()
except:
    from sets import Set as set

#def exec_before_process(app, inp_data, out_data, param_dict, tool=None):
#    """Sets the name of the data"""
#    dbkeys = sets.Set( [data.dbkey for data in inp_data.values() ] ) 
#    if len(dbkeys) != 1:
#        raise Exception, '<p><font color="yellow">Both Queries must be from the same genome build</font></p>'

def validate_input( trans, error_map, param_values, page_param_map ):
    dbkeys = set()
    data_param_names = set()
    data_params = 0
    for name, param in page_param_map.iteritems():
        if isinstance( param, DataToolParameter ):
            # for each dataset parameter
            if param_values.get(name, None) != None:
                dbkeys.add( param_values[name].dbkey )
                data_params += 1
                # check meta data
                try:
                    param = param_values[name]
                    if isinstance( param.datatype, trans.app.datatypes_registry.get_datatype_by_extension( 'gff' ).__class__ ):
                        # TODO: currently cannot validate GFF inputs b/c they are not derived from interval.
                        pass
                    else: # Validate interval datatype.
                        startCol = int( param.metadata.startCol )
                        endCol = int( param.metadata.endCol )
                        chromCol = int( param.metadata.chromCol )
                        if param.metadata.strandCol is not None:
                            strandCol = int ( param.metadata.strandCol )
                        else:
                            strandCol = 0
                except:
                    error_msg = "The attributes of this dataset are not properly set. " + \
                    "Click the pencil icon in the history item to set the chrom, start, end and strand columns."
                    error_map[name] = error_msg
            data_param_names.add( name )
    if len( dbkeys ) > 1:
        for name in data_param_names:
            error_map[name] = "All datasets must belong to same genomic build, " \
                "this dataset is linked to build '%s'" % param_values[name].dbkey
    if data_params != len(data_param_names):
        for name in data_param_names:
            error_map[name] = "A dataset of the appropriate type is required"

# Commented out by INS, 5/30/2007.  What is the PURPOSE of this?
def exec_after_process(app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None):
    """Verify the output data after each run"""
    items = out_data.items()

    for name, data in items:
        try:
            if stderr and len( stderr ) > 0:
                raise Exception( stderr )

        except Exception, exc:
            data.blurb = JOB_ERROR
            data.state = JOB_ERROR

## def exec_after_process(app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None):
##     pass


def exec_after_merge(app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None):
    exec_after_process(
        app, inp_data, out_data, param_dict, tool=tool, stdout=stdout, stderr=stderr)

    # strip strand column if clusters were merged
    items = out_data.items()
    for name, data in items:
        if param_dict['returntype'] == True:
            data.metadata.chromCol = 1
            data.metadata.startCol = 2
            data.metadata.endCol = 3
        # merge always clobbers strand
        data.metadata.strandCol = None
            

def exec_after_cluster(app, inp_data, out_data, param_dict, tool=None, stdout=None, stderr=None):
    exec_after_process(
        app, inp_data, out_data, param_dict, tool=tool, stdout=stdout, stderr=stderr)

    # strip strand column if clusters were merged
    if param_dict["returntype"] == '1':
        items = out_data.items()
        for name, data in items:
            data.metadata.strandCol = None

Summary ✨

This is a custom post-processing script for Galaxy that sets the name of the data and verifies the output data after each run. It also strips strand column if clusters were merged.

Tech Fingerprint

Standard Library: OS Interaction

Alerts (12)

'except:' Avoid catching all exceptions; specify exception types to catch only expected errors
12 45
'def' Ensure functions have docstrings for documentation
21 72 76 91
'isinstance(' Overuse may indicate design issues; consider polymorphism
26 34
Complexity hotspot; lines 63 to 65 (total complexity: 4)
63 64 65
'raise Exception(' Raise specific exception types for better error handling
66