/lib/com/lilly/cistrome/gs.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 80 lines · 58 code · 10 blank · 12 comment · 8 complexity · 0cf6f805b72016b193cf05c60732ef13 MD5 · raw file

  1. """
  2. gs.py
  3. A reusable program to fetch gene symbols using probe set IDs
  4. """
  5. import sys, string, os, subprocess, tempfile
  6. rprog="""getGeneSymbol=function(probesetIds, database)
  7. {
  8. library(annaffy)
  9. gene <- aaf.handler(probesetIds, database, "Symbol")
  10. #geneSymbol <- getText(gene)
  11. #geneSymbol <- paste("###geneSymbol", geneSymbol, sep='=')
  12. #geneSymbol
  13. return (getText(gene))
  14. }
  15. """
  16. rcall="""
  17. symbols = getGeneSymbol(c(%s), "%s")
  18. for (i in 1:length(symbols)) {
  19. s = paste("CISTROME_SYM:", symbols[i], "\\n")
  20. cat(s)
  21. }
  22. """
  23. def main():
  24. """called as
  25. getGeneSymbol.py database probesetId1 [probesetId2, ..., probesetIdn]
  26. eg
  27. getGeneSymbol.py hgu133a.db 1053_at 1007_s_at
  28. """
  29. nparm = len(sys.argv) - 1
  30. appname = sys.argv[0]
  31. if (3 > len(sys.argv)):
  32. print '%s needs at least 2 parameters' % (appname)
  33. print 'usage: %s database probesetId1 [probesetId2, ..., probesetIdn]' % (appname)
  34. print 'eg: %s hgu133a.db 1053_at 1007_s_at' % (appname)
  35. sys.exit(1)
  36. # short command line error
  37. database = sys.argv[1].strip()
  38. probesetIds = sys.argv[2:]
  39. print getGeneSymbols(database, probesetIds)
  40. def getGeneSymbols(database, probesetIds):
  41. ids = ""
  42. for (id) in probesetIds:
  43. ids += '"' + id + '",'
  44. ids = ids.rstrip(',')
  45. r = rprog + (rcall % (ids, database))
  46. tempdir = tempfile.mkdtemp()
  47. tmpfilename = os.path.join(tempdir, "stdout.txt")
  48. tmpfile = open(tmpfilename, "w+b")
  49. p = subprocess.Popen("R --vanilla", shell=True, executable="/bin/bash", stdin=subprocess.PIPE, stdout=tmpfile)
  50. p.communicate(r)
  51. tmpfile.close()
  52. tmpfile = open(tmpfilename, "r")
  53. symbols = tmpfile.readlines()
  54. tmpfile.close()
  55. os.unlink(tmpfilename)
  56. os.removedirs(tempdir)
  57. symbols = [x for x in symbols if x.startswith("CISTROME_SYM:")]
  58. symbols = [x.split("CISTROME_SYM:")[1].strip() for x in symbols]
  59. mappings = {}
  60. for i in range(0, len(probesetIds)):
  61. symbol = symbols[i]
  62. if (symbol == ''):
  63. symbol = probesetIds[i]
  64. mappings[probesetIds[i]] = symbol
  65. return mappings
  66. if __name__ == "__main__":
  67. main()