/tools/expression/go_analysis_code.py
https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 130 lines · 105 code · 10 blank · 15 comment · 17 complexity · fcae4b4ba932daa2ec251af1650b84b4 MD5 · raw file
- #build list of available data
- import string, os, sys, glob, shutil, re
- import galaxy.util
- from galaxy import datatypes, config
- from galaxy.model import Dataset
- states = Dataset.states
- repository = "/usr/local/galaxy/data/rg/library"
- def getSampleNames(sampleName):
- fileName = sampleName.file_name
- f = file(fileName, 'r')
- sampleData = f.readlines()
- f.close()
- header = string.strip(sampleData[0], '\n')
- i = 0
- samples = []
- x_re = re.compile(r'" "')
- headerItems = x_re.split(header)
- for (sample) in headerItems:
- sample = string.strip(sample, '"')
- i += 1
- samples.append((sample,str(i),False))
- return samples
- def exec_after_process(app, inp_data, out_data, param_dict, tool, stdout, stderr):
- """tricky. We wrote
- outlist = ['%s\t%s\t%s' % (mng,newfiles[i],newnames[i]) for i in range(length(newfiles))]
- to the end of the log
- containing tab separated filepaths and types which we now need to create in
- the history
- This code was written for the security branch
- """
- mng = '### makenewgalaxy'
- tmpString = '### tmp'
- killme = string.punctuation + string.whitespace
- trantab = string.maketrans(killme,'_'*len(killme))
- job_name = param_dict.get( 'title', 'makeAffyBatch' ).translate(trantab)
- dbkey = param_dict.get('dbkey','hg18')
- if dbkey == "fly.db0.db":
- dbkey = "fly.db0"
- return
- base_dataset = out_data.items()[0][1]
- history = base_dataset.history
- if history == None:
- print "unknown history!"
- return
- logpath = out_data['logmeta'].file_name
- loglist = file(logpath,'r').readlines()
- newfiles = [x for x in loglist if x.split('\t')[0] == mng]
- # parse out the encoded new datasets for galaxy
- newfiles = [x.strip().split('\t')[1:] for x in newfiles] # get rid of #ymakenewgalaxy
- for (file_path,file_name,file_type) in newfiles:
- # note, this gets created - pass extra args?
- #class DatasetInstance( object ):
- # """A base class for all 'dataset instances', HDAs, LDAs, etc"""
- # states = Dataset.states
- # permitted_actions = Dataset.permitted_actions
- # def __init__( self, id=None, hid=None, name=None, info=None, blurb=None, peek=None, extension=None,
- # dbkey=None, metadata=None, history=None, dataset=None, deleted=False, designation=None,
- # parent_id=None, validation_errors=None, visible=True, create_dataset = False ):
- file_path = file_path.strip()
- newname = file_name.strip()
- info = '%s, %s' % (job_name, newname)
- iphenofile = file_path
- pk = '' #file(iphenofile,'r').read(512)
- newdata = app.model.HistoryDatasetAssociation(extension=file_type,dbkey=dbkey,info=info,
- name=newname,peek = pk, create_dataset = True, sa_session = app.model.context)
- # as noted in encode_import_code.py on which this was based :)
- # This import should become a library
- #newdata.metadata.base_name = geoid
- efp = newdata.extra_files_path
- try:
- os.makedirs(efp)
- except:
- pass
- phenoname = os.path.split(iphenofile)[-1] # name
- iid = os.path.splitext(phenoname)[0]
- newppath = os.path.join(efp,phenoname)
- shutil.copy(iphenofile,newppath) # save pheno for metadata
- newdata.metadata.pheno_path = newppath
- newdata.metadata.base_name = iid
- #newdata.metadata.pheno='Name\tGroup\na.cel\t1\nb.cel\t1\nc.cel\t0\nd.cel\t0\n'
- try:
- app.security_agent.set_dataset_permissions( newdata.dataset, base_dataset.dataset.groups )
- except:
- pass # old pre-security?
- app.model.context.add(newdata)
- app.model.context.flush()
- try:
- shutil.copyfile(file_path,newdata.file_name)
- newdata.set_dataset_state ( states.OK )
- except:
- s = "The requested file %s is missing from the system." % file_path
- lf = file(logpath,'a')
- lf.write(s)
- lf.write('\n')
- lf.write('Trying to write to %s\n' % (newdata.file_name))
- lf.close()
- newdata.info = s
- newdata.set_dataset_state ( states.ERROR )
- newdata.dbkey = dbkey
- newdata.set_peek()
- newdata.set_meta() # must set peek first
- lf = file(logpath,'a')
- lf.write('## saving %s as %s\n' % (newname, newdata.file_name))
- s = '# newdata %s peek = %s\n' % (newname,newdata.peek)
- lf.write(s)
- s = '# newdata %s metadata pheno_path = %s\n' % (newname,newdata.metadata.pheno_path)
- lf.write(s)
- lf.write('\n')
- lf.close()
- newdata.set_size()
- history.add_dataset( newdata )
- app.model.context.flush()
- tmpDir = [x for x in loglist if x.split('\t')[0] == tmpString]
- if len(tmpDir) > 0:
- tmpDir = [x.strip().split('\t')[1:] for x in tmpDir]
- if len(tmpDir) > 0:
- for (tdir) in tmpDir[0]:
- for (f) in os.listdir(tdir):
- os.unlink(tdir + '/' + f)
- os.rmdir(tdir)