PageRenderTime 27ms CodeModel.GetById 14ms app.highlight 10ms RepoModel.GetById 2ms app.codeStats 0ms

/tools/expression/corrtf_code.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 162 lines | 132 code | 28 blank | 2 comment | 38 complexity | af525d4f234de80a46f4eada5981dbac MD5 | raw file
  1#build list of available data
  2import string, os, sys, glob, shutil
  3import galaxy.util
  4from galaxy import datatypes, config, jobs
  5from com.lilly.cistrome.gs import getGeneSymbols
  6
  7repository = "/usr/local/galaxy/data/rg/library"
  8
  9def mapProbesetToGene(db, filename, coe, geneName):
 10    if (not db.endswith(".db")):
 11        db += ".db"
 12
 13    f = file(filename, 'r')
 14    lines = f.readlines()
 15    f.close()
 16
 17    if (len(lines) == 0):
 18        return
 19    elif (len(lines) == 1 and lines[0].strip() == '""'):
 20        try:
 21            f = file(filename, 'w')
 22            f.write("Gene %s not found in expression set\n" % (geneName))
 23            return
 24        finally:
 25            f.close()
 26
 27    header = "Gene\tCorrelation"
 28
 29    lines = lines[1:]
 30    probesetIds = []
 31    for (line) in lines:
 32        tokens = line.split()
 33        probesetId = tokens[0].strip('"')
 34        if (len(probesetId) > 0):
 35            probesetIds.append(probesetId)
 36
 37    if (len(probesetIds) > 0):
 38        geneSymbols = getGeneSymbols(db, probesetIds)
 39    else:
 40        geneSymbols = {}
 41
 42    if (len(lines) == 1):
 43        try:
 44            f = file(filename, 'w')
 45            f.write("No gene correlated to %s with correlation coefficient of %s\n" % (geneSymbols[probesetIds[0]], coe))
 46            return
 47        finally:
 48            f.close()
 49
 50    f = file(filename, 'w')
 51    f.write("%s\n" % (header))
 52    for (line) in lines:
 53        tokens = line.split()
 54        probesetId = tokens[0]
 55        gene = geneSymbols[probesetId.strip('"')]
 56        tokens[0] = gene
 57        f.write(" ".join(tokens) + '\n')
 58    f.close()
 59
 60def filter(outfile, log, species, coA, coR, tf, tr):
 61    if (species == 'Nil'):
 62        return
 63
 64    logLines = log.split("\n")
 65
 66    s = [x for x in logLines if x.split('\t')[0] == "### cwd"]
 67    if (len(s)) > 0:
 68        s = [x.strip().split('\t')[1:] for x in s]
 69        cwd = s[0][0].strip('\n')
 70
 71    header = None
 72    hasFilter = False
 73    if (coA):
 74        (header,coAMap) = getMap(os.path.join(cwd, 'coA' + species))
 75        hasFilter = True
 76    if (coR):
 77        (header,coRMap) = getMap(os.path.join(cwd, 'coR' + species))
 78        hasFilter = True
 79    if (tf):
 80        (header,tfMap) = getMap(os.path.join(cwd, 'tf' + species))
 81        hasFilter = True
 82    if (tr):
 83        (header,trMap) = getMap(os.path.join(cwd, 'tr' + species))
 84        hasFilter = True
 85
 86    if (hasFilter):
 87        header = "Gene\tCorrelation\t" + header
 88    else:
 89        return
 90
 91    try:
 92        f = file(outfile, 'r')
 93        lines = f.readlines()
 94    finally:
 95        f.close()
 96
 97    try:
 98        f = file(outfile, 'w')
 99        f.write(header)
100        for (line) in lines[1:]:
101            gene = line.split()[0].strip()
102            s = None
103            if (coA and gene in coAMap):
104                s = "%s\t%s" % ("\t".join(line.split()), coAMap[gene])
105            elif (coR and gene in coRMap):
106                s = "%s\t%s" % ("\t".join(line.split()), coRMap[gene])
107            elif (tf and gene in tfMap):
108                s = "%s\t%s" % ("\t".join(line.split()), tfMap[gene])
109            elif (tr and gene in trMap):
110                s = "%s\t%s" % ("\t".join(line.split()), trMap[gene])
111
112            if (s <> None):
113                f.write(s)
114    finally:
115        f.close()
116
117def getMap(which):
118    if (not os.path.isfile(which)):
119        return (which, {})
120
121    f = file(which, 'r')
122    try:
123        lines = f.readlines()
124    finally:
125        f.close()
126
127    header = lines[0]
128    headerTokens = header.split('\t')
129    header = "\t".join(headerTokens[1:])
130    lines = lines[1:]
131    map = {}
132    for (line) in lines:
133        tokens = line.split('\t')
134        map[tokens[0].strip()] = '\t'.join(tokens[1:])
135
136    return (header,map)
137
138def exec_after_process(app, inp_data, out_data, param_dict, tool, stdout, stderr):
139    infile = inp_data['i1'].file_name
140    db = inp_data['i1'].dbkey
141    outfile = out_data['logmeta'].file_name
142
143    coe = inp_data.get('y')
144
145    param_dict = param_dict.get("opMode")
146    geneName = param_dict.get('geneName')
147
148    if (db == '?'):
149        return
150
151    mapProbesetToGene(db, outfile, coe, geneName)
152    filter(outfile, out_data['logmeta'].info, param_dict.get('filterSpecies'), param_dict.get('coA'), param_dict.get('coR'), param_dict.get('tf'), param_dict.get('tr'))
153
154    # First column has changed from probesetId to gene
155    try:
156        f = file(outfile, 'r')
157        pk = f.read()
158    finally:
159        f.close()
160    out_data['logmeta'].peek = pk
161    out_data['logmeta'].set_peek()
162