/tools/expression/corrtf_code.py

https://bitbucket.org/cistrome/cistrome-harvard/ · Python · 162 lines · 132 code · 28 blank · 2 comment · 46 complexity · af525d4f234de80a46f4eada5981dbac MD5 · raw file

  1. #build list of available data
  2. import string, os, sys, glob, shutil
  3. import galaxy.util
  4. from galaxy import datatypes, config, jobs
  5. from com.lilly.cistrome.gs import getGeneSymbols
  6. repository = "/usr/local/galaxy/data/rg/library"
  7. def mapProbesetToGene(db, filename, coe, geneName):
  8. if (not db.endswith(".db")):
  9. db += ".db"
  10. f = file(filename, 'r')
  11. lines = f.readlines()
  12. f.close()
  13. if (len(lines) == 0):
  14. return
  15. elif (len(lines) == 1 and lines[0].strip() == '""'):
  16. try:
  17. f = file(filename, 'w')
  18. f.write("Gene %s not found in expression set\n" % (geneName))
  19. return
  20. finally:
  21. f.close()
  22. header = "Gene\tCorrelation"
  23. lines = lines[1:]
  24. probesetIds = []
  25. for (line) in lines:
  26. tokens = line.split()
  27. probesetId = tokens[0].strip('"')
  28. if (len(probesetId) > 0):
  29. probesetIds.append(probesetId)
  30. if (len(probesetIds) > 0):
  31. geneSymbols = getGeneSymbols(db, probesetIds)
  32. else:
  33. geneSymbols = {}
  34. if (len(lines) == 1):
  35. try:
  36. f = file(filename, 'w')
  37. f.write("No gene correlated to %s with correlation coefficient of %s\n" % (geneSymbols[probesetIds[0]], coe))
  38. return
  39. finally:
  40. f.close()
  41. f = file(filename, 'w')
  42. f.write("%s\n" % (header))
  43. for (line) in lines:
  44. tokens = line.split()
  45. probesetId = tokens[0]
  46. gene = geneSymbols[probesetId.strip('"')]
  47. tokens[0] = gene
  48. f.write(" ".join(tokens) + '\n')
  49. f.close()
  50. def filter(outfile, log, species, coA, coR, tf, tr):
  51. if (species == 'Nil'):
  52. return
  53. logLines = log.split("\n")
  54. s = [x for x in logLines if x.split('\t')[0] == "### cwd"]
  55. if (len(s)) > 0:
  56. s = [x.strip().split('\t')[1:] for x in s]
  57. cwd = s[0][0].strip('\n')
  58. header = None
  59. hasFilter = False
  60. if (coA):
  61. (header,coAMap) = getMap(os.path.join(cwd, 'coA' + species))
  62. hasFilter = True
  63. if (coR):
  64. (header,coRMap) = getMap(os.path.join(cwd, 'coR' + species))
  65. hasFilter = True
  66. if (tf):
  67. (header,tfMap) = getMap(os.path.join(cwd, 'tf' + species))
  68. hasFilter = True
  69. if (tr):
  70. (header,trMap) = getMap(os.path.join(cwd, 'tr' + species))
  71. hasFilter = True
  72. if (hasFilter):
  73. header = "Gene\tCorrelation\t" + header
  74. else:
  75. return
  76. try:
  77. f = file(outfile, 'r')
  78. lines = f.readlines()
  79. finally:
  80. f.close()
  81. try:
  82. f = file(outfile, 'w')
  83. f.write(header)
  84. for (line) in lines[1:]:
  85. gene = line.split()[0].strip()
  86. s = None
  87. if (coA and gene in coAMap):
  88. s = "%s\t%s" % ("\t".join(line.split()), coAMap[gene])
  89. elif (coR and gene in coRMap):
  90. s = "%s\t%s" % ("\t".join(line.split()), coRMap[gene])
  91. elif (tf and gene in tfMap):
  92. s = "%s\t%s" % ("\t".join(line.split()), tfMap[gene])
  93. elif (tr and gene in trMap):
  94. s = "%s\t%s" % ("\t".join(line.split()), trMap[gene])
  95. if (s <> None):
  96. f.write(s)
  97. finally:
  98. f.close()
  99. def getMap(which):
  100. if (not os.path.isfile(which)):
  101. return (which, {})
  102. f = file(which, 'r')
  103. try:
  104. lines = f.readlines()
  105. finally:
  106. f.close()
  107. header = lines[0]
  108. headerTokens = header.split('\t')
  109. header = "\t".join(headerTokens[1:])
  110. lines = lines[1:]
  111. map = {}
  112. for (line) in lines:
  113. tokens = line.split('\t')
  114. map[tokens[0].strip()] = '\t'.join(tokens[1:])
  115. return (header,map)
  116. def exec_after_process(app, inp_data, out_data, param_dict, tool, stdout, stderr):
  117. infile = inp_data['i1'].file_name
  118. db = inp_data['i1'].dbkey
  119. outfile = out_data['logmeta'].file_name
  120. coe = inp_data.get('y')
  121. param_dict = param_dict.get("opMode")
  122. geneName = param_dict.get('geneName')
  123. if (db == '?'):
  124. return
  125. mapProbesetToGene(db, outfile, coe, geneName)
  126. filter(outfile, out_data['logmeta'].info, param_dict.get('filterSpecies'), param_dict.get('coA'), param_dict.get('coR'), param_dict.get('tf'), param_dict.get('tr'))
  127. # First column has changed from probesetId to gene
  128. try:
  129. f = file(outfile, 'r')
  130. pk = f.read()
  131. finally:
  132. f.close()
  133. out_data['logmeta'].peek = pk
  134. out_data['logmeta'].set_peek()