PageRenderTime 33ms CodeModel.GetById 12ms app.highlight 15ms RepoModel.GetById 2ms app.codeStats 0ms

/lib/com/lilly/cistrome/gs.py

https://bitbucket.org/cistrome/cistrome-harvard/
Python | 80 lines | 74 code | 1 blank | 5 comment | 0 complexity | 0cf6f805b72016b193cf05c60732ef13 MD5 | raw file
 1"""
 2gs.py
 3
 4A reusable program to fetch gene symbols using probe set IDs
 5"""
 6import sys, string, os, subprocess, tempfile
 7
 8rprog="""getGeneSymbol=function(probesetIds, database)
 9{
10    library(annaffy)
11
12    gene <- aaf.handler(probesetIds, database, "Symbol")
13    #geneSymbol <- getText(gene)
14    #geneSymbol <- paste("###geneSymbol", geneSymbol, sep='=')
15    #geneSymbol
16    return (getText(gene))
17}
18"""
19
20rcall="""
21  symbols = getGeneSymbol(c(%s), "%s")
22  for (i in 1:length(symbols)) {
23    s = paste("CISTROME_SYM:", symbols[i], "\\n")
24    cat(s)
25  }
26"""
27
28def main():
29    """called as 
30getGeneSymbol.py database probesetId1 [probesetId2, ..., probesetIdn]
31
32eg
33getGeneSymbol.py hgu133a.db 1053_at 1007_s_at
34    """
35    nparm = len(sys.argv) - 1
36    appname = sys.argv[0]
37    if (3 > len(sys.argv)):
38        print '%s needs at least 2 parameters' % (appname)
39        print 'usage: %s database probesetId1 [probesetId2, ..., probesetIdn]' % (appname)
40        print 'eg: %s hgu133a.db 1053_at 1007_s_at' % (appname)
41        sys.exit(1)
42        # short command line error
43    database = sys.argv[1].strip()
44    probesetIds = sys.argv[2:]
45    print getGeneSymbols(database, probesetIds)
46
47def getGeneSymbols(database, probesetIds):
48    ids = ""
49    for (id) in probesetIds:
50        ids += '"' + id + '",'
51    ids = ids.rstrip(',')
52
53    r = rprog + (rcall % (ids, database))
54
55    tempdir = tempfile.mkdtemp()
56    tmpfilename = os.path.join(tempdir, "stdout.txt")
57    tmpfile = open(tmpfilename, "w+b")
58    p = subprocess.Popen("R --vanilla", shell=True, executable="/bin/bash", stdin=subprocess.PIPE, stdout=tmpfile)
59    p.communicate(r)
60    tmpfile.close()
61    tmpfile = open(tmpfilename, "r")
62    symbols = tmpfile.readlines()
63    tmpfile.close()
64    os.unlink(tmpfilename)
65    os.removedirs(tempdir)
66
67    symbols = [x for x in symbols if x.startswith("CISTROME_SYM:")]
68    symbols = [x.split("CISTROME_SYM:")[1].strip() for x in symbols]
69    mappings = {}
70    for i in range(0, len(probesetIds)):
71        symbol = symbols[i]
72        if (symbol == ''):
73            symbol = probesetIds[i]
74        mappings[probesetIds[i]] = symbol
75
76    return mappings
77
78if __name__ == "__main__":
79    main()
80