PageRenderTime 35ms CodeModel.GetById 12ms RepoModel.GetById 0ms app.codeStats 0ms

/bin/analyze_lincs_treatments.py

https://bitbucket.org/kljensen/kmj_lincs
Python | 138 lines | 98 code | 12 blank | 28 comment | 10 complexity | b218937e6415428e5e26c1acfd182da2 MD5 | raw file
  1. """ Analyze LINCS Experiment.
  2. Usage:
  3. analyze_lincs_treatments.py [options] -c <config_file> -o <output_directory>
  4. analyze_lincs_treatments.py -h | --help
  5. analyze_lincs_treatments.py --version
  6. Options:
  7. -h --help Show this screen.
  8. -v --verbose Use verbose logging.
  9. -g --histograms Make histograms
  10. --version Show version.
  11. """
  12. from docopt import docopt
  13. import os
  14. import logging
  15. from lincs.io.config_stats import read_config
  16. from lincs.io.stats import read_data_file
  17. from lincs.stats.stats import extract_stats
  18. import pandas
  19. def setup_logging(verbose=False):
  20. """ Configure logging.
  21. :param verbose: Whether or not to use DEBUG log level
  22. :type verbose: bool.
  23. :returns: None.
  24. """
  25. logging.basicConfig(format='%(filename)s %(levelname)s ' \
  26. + 'line %(lineno)d --- %(message)s',)
  27. if verbose:
  28. logging.root.setLevel(logging.DEBUG)
  29. else:
  30. logging.root.setLevel(logging.INFO)
  31. def main(config, output_directory, histograms=False):
  32. """ Main routine
  33. :param config: Dictionary with analysis configuration
  34. :type config: dict
  35. :param output_directory: Path to file where .xlsx output should
  36. be written
  37. :type output_directory: str
  38. :returns: None.
  39. """
  40. if not os.path.exists(output_directory):
  41. os.makedirs(output_directory)
  42. output_file = os.path.join(output_directory, "output.xlsx")
  43. treatment_data = {}
  44. for t, v in config["treatments"].iteritems():
  45. data = read_data_file(
  46. v["path"],
  47. v["sheet"],
  48. config["signals"],
  49. v["cell_count_column"],
  50. )
  51. treatment_data[t] = data
  52. (cleaned_data,
  53. means,
  54. medians,
  55. stddevs,
  56. thresholds,
  57. oncounts,
  58. cprobs,
  59. minfo,
  60. onstats,
  61. thresholded_data,
  62. signature_data,
  63. normalized_treatments,
  64. normalized_treatments_clipped) \
  65. = extract_stats(
  66. treatment_data,
  67. config["signals"],
  68. config["on_threshold"]
  69. )
  70. if histograms:
  71. from lincs.stats.plots import makeIntensityHistograms
  72. makeIntensityHistograms(
  73. cleaned_data,
  74. config["signals"],
  75. thresholds,
  76. output_directory
  77. )
  78. writer = pandas.ExcelWriter(output_file)
  79. means.to_excel(writer, sheet_name="means")
  80. medians.to_excel(writer, sheet_name="medians")
  81. stddevs.to_excel(writer, sheet_name="stddevs")
  82. thresholds.to_excel(writer, sheet_name="thresholds")
  83. oncounts.to_excel(writer, sheet_name="oncounts")
  84. cprobs.to_excel(writer, sheet_name="cprobs")
  85. minfo.to_excel(writer, sheet_name="minfo")
  86. onstats.to_excel(writer, sheet_name="onstats")
  87. thresholded_data.to_excel(writer, sheet_name="thresholded")
  88. signature_data.to_excel(writer, sheet_name="signatures")
  89. normalized_versions = (
  90. ('norm', normalized_treatments),
  91. ('norm_clipped', normalized_treatments_clipped),
  92. )
  93. for (name, nd) in normalized_versions:
  94. for cell_count, normalized_data in nd.iteritems():
  95. for t, v in normalized_data.iteritems():
  96. sheet_name = "{0}_{1}_{2}cell".format(t, name, cell_count)
  97. v.to_excel(writer, sheet_name=sheet_name)
  98. writer.save()
  99. return (
  100. means,
  101. medians,
  102. stddevs,
  103. oncounts,
  104. cprobs,
  105. minfo,
  106. onstats,
  107. thresholded_data,
  108. signature_data,
  109. normalized_treatments
  110. )
  111. if __name__ == '__main__':
  112. arguments = docopt(__doc__, version='Analyze LINCS Treatment 0.1')
  113. setup_logging(verbose=arguments["--verbose"])
  114. histograms = False
  115. if arguments.get('--histograms'):
  116. histograms = True
  117. config = read_config(arguments["<config_file>"])
  118. main(
  119. config,
  120. arguments["<output_directory>"],
  121. histograms=histograms
  122. )