/tools/expression/corrtf_code.py
https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 162 lines · 132 code · 28 blank · 2 comment · 46 complexity · af525d4f234de80a46f4eada5981dbac MD5 · raw file
- #build list of available data
- import string, os, sys, glob, shutil
- import galaxy.util
- from galaxy import datatypes, config, jobs
- from com.lilly.cistrome.gs import getGeneSymbols
- repository = "/usr/local/galaxy/data/rg/library"
- def mapProbesetToGene(db, filename, coe, geneName):
- if (not db.endswith(".db")):
- db += ".db"
- f = file(filename, 'r')
- lines = f.readlines()
- f.close()
- if (len(lines) == 0):
- return
- elif (len(lines) == 1 and lines[0].strip() == '""'):
- try:
- f = file(filename, 'w')
- f.write("Gene %s not found in expression set\n" % (geneName))
- return
- finally:
- f.close()
- header = "Gene\tCorrelation"
- lines = lines[1:]
- probesetIds = []
- for (line) in lines:
- tokens = line.split()
- probesetId = tokens[0].strip('"')
- if (len(probesetId) > 0):
- probesetIds.append(probesetId)
- if (len(probesetIds) > 0):
- geneSymbols = getGeneSymbols(db, probesetIds)
- else:
- geneSymbols = {}
- if (len(lines) == 1):
- try:
- f = file(filename, 'w')
- f.write("No gene correlated to %s with correlation coefficient of %s\n" % (geneSymbols[probesetIds[0]], coe))
- return
- finally:
- f.close()
- f = file(filename, 'w')
- f.write("%s\n" % (header))
- for (line) in lines:
- tokens = line.split()
- probesetId = tokens[0]
- gene = geneSymbols[probesetId.strip('"')]
- tokens[0] = gene
- f.write(" ".join(tokens) + '\n')
- f.close()
- def filter(outfile, log, species, coA, coR, tf, tr):
- if (species == 'Nil'):
- return
- logLines = log.split("\n")
- s = [x for x in logLines if x.split('\t')[0] == "### cwd"]
- if (len(s)) > 0:
- s = [x.strip().split('\t')[1:] for x in s]
- cwd = s[0][0].strip('\n')
- header = None
- hasFilter = False
- if (coA):
- (header,coAMap) = getMap(os.path.join(cwd, 'coA' + species))
- hasFilter = True
- if (coR):
- (header,coRMap) = getMap(os.path.join(cwd, 'coR' + species))
- hasFilter = True
- if (tf):
- (header,tfMap) = getMap(os.path.join(cwd, 'tf' + species))
- hasFilter = True
- if (tr):
- (header,trMap) = getMap(os.path.join(cwd, 'tr' + species))
- hasFilter = True
- if (hasFilter):
- header = "Gene\tCorrelation\t" + header
- else:
- return
- try:
- f = file(outfile, 'r')
- lines = f.readlines()
- finally:
- f.close()
- try:
- f = file(outfile, 'w')
- f.write(header)
- for (line) in lines[1:]:
- gene = line.split()[0].strip()
- s = None
- if (coA and gene in coAMap):
- s = "%s\t%s" % ("\t".join(line.split()), coAMap[gene])
- elif (coR and gene in coRMap):
- s = "%s\t%s" % ("\t".join(line.split()), coRMap[gene])
- elif (tf and gene in tfMap):
- s = "%s\t%s" % ("\t".join(line.split()), tfMap[gene])
- elif (tr and gene in trMap):
- s = "%s\t%s" % ("\t".join(line.split()), trMap[gene])
- if (s <> None):
- f.write(s)
- finally:
- f.close()
- def getMap(which):
- if (not os.path.isfile(which)):
- return (which, {})
- f = file(which, 'r')
- try:
- lines = f.readlines()
- finally:
- f.close()
- header = lines[0]
- headerTokens = header.split('\t')
- header = "\t".join(headerTokens[1:])
- lines = lines[1:]
- map = {}
- for (line) in lines:
- tokens = line.split('\t')
- map[tokens[0].strip()] = '\t'.join(tokens[1:])
- return (header,map)
- def exec_after_process(app, inp_data, out_data, param_dict, tool, stdout, stderr):
- infile = inp_data['i1'].file_name
- db = inp_data['i1'].dbkey
- outfile = out_data['logmeta'].file_name
- coe = inp_data.get('y')
- param_dict = param_dict.get("opMode")
- geneName = param_dict.get('geneName')
- if (db == '?'):
- return
- mapProbesetToGene(db, outfile, coe, geneName)
- filter(outfile, out_data['logmeta'].info, param_dict.get('filterSpecies'), param_dict.get('coA'), param_dict.get('coR'), param_dict.get('tf'), param_dict.get('tr'))
- # First column has changed from probesetId to gene
- try:
- f = file(outfile, 'r')
- pk = f.read()
- finally:
- f.close()
- out_data['logmeta'].peek = pk
- out_data['logmeta'].set_peek()