go_analysis_code.py - This is a Python script that creates …

/tools/expression/go_analysis_code.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 130 lines · 105 code · 10 blank · 15 comment · 17 complexity · fcae4b4ba932daa2ec251af1650b84b4 MD5 · raw file


#build list of available data
import string, os, sys, glob, shutil, re
import galaxy.util
from galaxy import datatypes, config

from galaxy.model import Dataset

states = Dataset.states

repository = "/usr/local/galaxy/data/rg/library"

def getSampleNames(sampleName):
    fileName = sampleName.file_name
    f = file(fileName, 'r')
    sampleData = f.readlines()
    f.close()
    header = string.strip(sampleData[0], '\n')
    i = 0
    samples = []

    x_re = re.compile(r'" "')
    headerItems = x_re.split(header)

    for (sample) in headerItems:
        sample = string.strip(sample, '"')
        i += 1
        samples.append((sample,str(i),False))

    return samples

def exec_after_process(app, inp_data, out_data, param_dict, tool, stdout, stderr):
    """tricky. We wrote
    outlist = ['%s\t%s\t%s' % (mng,newfiles[i],newnames[i]) for i in range(length(newfiles))]
    to the end of the log
    containing tab separated filepaths and types which we now need to create in
    the history
    This code was written for the security branch
    """
    mng = '### makenewgalaxy'
    tmpString = '### tmp'
    killme = string.punctuation + string.whitespace
    trantab = string.maketrans(killme,'_'*len(killme))
    job_name = param_dict.get( 'title', 'makeAffyBatch' ).translate(trantab)
    dbkey = param_dict.get('dbkey','hg18')
    if dbkey == "fly.db0.db":
        dbkey = "fly.db0"
        return 
    base_dataset = out_data.items()[0][1]
    history = base_dataset.history
    if history == None:
        print "unknown history!"
        return
    logpath = out_data['logmeta'].file_name
    loglist = file(logpath,'r').readlines()
    newfiles = [x for x in loglist if x.split('\t')[0] == mng]
    # parse out the encoded new datasets for galaxy
    newfiles = [x.strip().split('\t')[1:] for x in newfiles] # get rid of #ymakenewgalaxy
    for (file_path,file_name,file_type) in newfiles:
        # note, this gets created - pass extra args?
        #class DatasetInstance( object ):
        #    """A base class for all 'dataset instances', HDAs, LDAs, etc"""
        #    states = Dataset.states
        #    permitted_actions = Dataset.permitted_actions
        #    def __init__( self, id=None, hid=None, name=None, info=None, blurb=None, peek=None, extension=None,
        #                  dbkey=None, metadata=None, history=None, dataset=None, deleted=False, designation=None,
        #                  parent_id=None, validation_errors=None, visible=True, create_dataset = False ):
        file_path = file_path.strip()
        newname = file_name.strip()
        info = '%s, %s' % (job_name, newname)
        iphenofile = file_path
        pk = '' #file(iphenofile,'r').read(512)
        newdata = app.model.HistoryDatasetAssociation(extension=file_type,dbkey=dbkey,info=info,
            name=newname,peek = pk, create_dataset = True, sa_session = app.model.context)
        # as noted in encode_import_code.py on which this was based :)
        # This import should become a library
        #newdata.metadata.base_name = geoid
        efp = newdata.extra_files_path
        try:
            os.makedirs(efp)
        except:
            pass
        phenoname = os.path.split(iphenofile)[-1] # name
        iid = os.path.splitext(phenoname)[0]
        newppath = os.path.join(efp,phenoname)
        shutil.copy(iphenofile,newppath) # save pheno for metadata
        newdata.metadata.pheno_path = newppath
        newdata.metadata.base_name = iid
        #newdata.metadata.pheno='Name\tGroup\na.cel\t1\nb.cel\t1\nc.cel\t0\nd.cel\t0\n'
        try:
            app.security_agent.set_dataset_permissions( newdata.dataset, base_dataset.dataset.groups )
        except:
            pass # old pre-security?
        app.model.context.add(newdata)
        app.model.context.flush()
        try:
            shutil.copyfile(file_path,newdata.file_name)
            newdata.set_dataset_state ( states.OK )
        except:
            s = "The requested file %s is missing from the system." % file_path
            lf = file(logpath,'a')
            lf.write(s)
            lf.write('\n')
            lf.write('Trying to write to %s\n' % (newdata.file_name))
            lf.close()
            newdata.info = s
            newdata.set_dataset_state ( states.ERROR )
        newdata.dbkey = dbkey
        newdata.set_peek()
        newdata.set_meta() # must set peek first
        lf = file(logpath,'a')
        lf.write('## saving %s as %s\n' % (newname, newdata.file_name))
        s = '# newdata %s peek = %s\n' % (newname,newdata.peek)
        lf.write(s)
        s = '# newdata %s metadata pheno_path = %s\n' % (newname,newdata.metadata.pheno_path)
        lf.write(s)
        lf.write('\n')
        lf.close()
        newdata.set_size()
        history.add_dataset( newdata )
        app.model.context.flush()

    tmpDir = [x for x in loglist if x.split('\t')[0] == tmpString]
    if len(tmpDir) > 0:
        tmpDir = [x.strip().split('\t')[1:] for x in tmpDir]
        if len(tmpDir) > 0:
            for (tdir) in tmpDir[0]:
                for (f) in os.listdir(tdir):
                    os.unlink(tdir + '/' + f)
                os.rmdir(tdir)

Summary ✨

This is a Python script that creates new datasets in Galaxy based on a list of file paths and types provided in a log file. It uses the Galaxy API to create the datasets and sets their permissions, peek, metadata, and size. The script also removes temporary directories created during the process.

Tech Fingerprint

Standard Library: OS Interaction

Alerts (7)

'def' Ensure functions have docstrings for documentation
12
Complexity hotspot; lines 122 to 127 (total complexity: 7)
122 123 124 125 126 127