/lib/com/lilly/cistrome/gs.py
https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 80 lines · 58 code · 10 blank · 12 comment · 8 complexity · 0cf6f805b72016b193cf05c60732ef13 MD5 · raw file
- """
- gs.py
- A reusable program to fetch gene symbols using probe set IDs
- """
- import sys, string, os, subprocess, tempfile
- rprog="""getGeneSymbol=function(probesetIds, database)
- {
- library(annaffy)
- gene <- aaf.handler(probesetIds, database, "Symbol")
- #geneSymbol <- getText(gene)
- #geneSymbol <- paste("###geneSymbol", geneSymbol, sep='=')
- #geneSymbol
- return (getText(gene))
- }
- """
- rcall="""
- symbols = getGeneSymbol(c(%s), "%s")
- for (i in 1:length(symbols)) {
- s = paste("CISTROME_SYM:", symbols[i], "\\n")
- cat(s)
- }
- """
- def main():
- """called as
- getGeneSymbol.py database probesetId1 [probesetId2, ..., probesetIdn]
- eg
- getGeneSymbol.py hgu133a.db 1053_at 1007_s_at
- """
- nparm = len(sys.argv) - 1
- appname = sys.argv[0]
- if (3 > len(sys.argv)):
- print '%s needs at least 2 parameters' % (appname)
- print 'usage: %s database probesetId1 [probesetId2, ..., probesetIdn]' % (appname)
- print 'eg: %s hgu133a.db 1053_at 1007_s_at' % (appname)
- sys.exit(1)
- # short command line error
- database = sys.argv[1].strip()
- probesetIds = sys.argv[2:]
- print getGeneSymbols(database, probesetIds)
- def getGeneSymbols(database, probesetIds):
- ids = ""
- for (id) in probesetIds:
- ids += '"' + id + '",'
- ids = ids.rstrip(',')
- r = rprog + (rcall % (ids, database))
- tempdir = tempfile.mkdtemp()
- tmpfilename = os.path.join(tempdir, "stdout.txt")
- tmpfile = open(tmpfilename, "w+b")
- p = subprocess.Popen("R --vanilla", shell=True, executable="/bin/bash", stdin=subprocess.PIPE, stdout=tmpfile)
- p.communicate(r)
- tmpfile.close()
- tmpfile = open(tmpfilename, "r")
- symbols = tmpfile.readlines()
- tmpfile.close()
- os.unlink(tmpfilename)
- os.removedirs(tempdir)
- symbols = [x for x in symbols if x.startswith("CISTROME_SYM:")]
- symbols = [x.split("CISTROME_SYM:")[1].strip() for x in symbols]
- mappings = {}
- for i in range(0, len(probesetIds)):
- symbol = symbols[i]
- if (symbol == ''):
- symbol = probesetIds[i]
- mappings[probesetIds[i]] = symbol
- return mappings
- if __name__ == "__main__":
- main()