/bin/analyze_lincs_treatments.py
Python | 138 lines | 98 code | 12 blank | 28 comment | 10 complexity | b218937e6415428e5e26c1acfd182da2 MD5 | raw file
- """ Analyze LINCS Experiment.
- Usage:
- analyze_lincs_treatments.py [options] -c <config_file> -o <output_directory>
- analyze_lincs_treatments.py -h | --help
- analyze_lincs_treatments.py --version
- Options:
- -h --help Show this screen.
- -v --verbose Use verbose logging.
- -g --histograms Make histograms
- --version Show version.
- """
- from docopt import docopt
- import os
- import logging
- from lincs.io.config_stats import read_config
- from lincs.io.stats import read_data_file
- from lincs.stats.stats import extract_stats
- import pandas
- def setup_logging(verbose=False):
- """ Configure logging.
- :param verbose: Whether or not to use DEBUG log level
- :type verbose: bool.
- :returns: None.
- """
- logging.basicConfig(format='%(filename)s %(levelname)s ' \
- + 'line %(lineno)d --- %(message)s',)
- if verbose:
- logging.root.setLevel(logging.DEBUG)
- else:
- logging.root.setLevel(logging.INFO)
- def main(config, output_directory, histograms=False):
- """ Main routine
- :param config: Dictionary with analysis configuration
- :type config: dict
- :param output_directory: Path to file where .xlsx output should
- be written
- :type output_directory: str
- :returns: None.
- """
- if not os.path.exists(output_directory):
- os.makedirs(output_directory)
- output_file = os.path.join(output_directory, "output.xlsx")
- treatment_data = {}
- for t, v in config["treatments"].iteritems():
- data = read_data_file(
- v["path"],
- v["sheet"],
- config["signals"],
- v["cell_count_column"],
- )
- treatment_data[t] = data
- (cleaned_data,
- means,
- medians,
- stddevs,
- thresholds,
- oncounts,
- cprobs,
- minfo,
- onstats,
- thresholded_data,
- signature_data,
- normalized_treatments,
- normalized_treatments_clipped) \
- = extract_stats(
- treatment_data,
- config["signals"],
- config["on_threshold"]
- )
- if histograms:
- from lincs.stats.plots import makeIntensityHistograms
- makeIntensityHistograms(
- cleaned_data,
- config["signals"],
- thresholds,
- output_directory
- )
- writer = pandas.ExcelWriter(output_file)
- means.to_excel(writer, sheet_name="means")
- medians.to_excel(writer, sheet_name="medians")
- stddevs.to_excel(writer, sheet_name="stddevs")
- thresholds.to_excel(writer, sheet_name="thresholds")
- oncounts.to_excel(writer, sheet_name="oncounts")
- cprobs.to_excel(writer, sheet_name="cprobs")
- minfo.to_excel(writer, sheet_name="minfo")
- onstats.to_excel(writer, sheet_name="onstats")
- thresholded_data.to_excel(writer, sheet_name="thresholded")
- signature_data.to_excel(writer, sheet_name="signatures")
- normalized_versions = (
- ('norm', normalized_treatments),
- ('norm_clipped', normalized_treatments_clipped),
- )
- for (name, nd) in normalized_versions:
- for cell_count, normalized_data in nd.iteritems():
- for t, v in normalized_data.iteritems():
- sheet_name = "{0}_{1}_{2}cell".format(t, name, cell_count)
- v.to_excel(writer, sheet_name=sheet_name)
- writer.save()
- return (
- means,
- medians,
- stddevs,
- oncounts,
- cprobs,
- minfo,
- onstats,
- thresholded_data,
- signature_data,
- normalized_treatments
- )
- if __name__ == '__main__':
- arguments = docopt(__doc__, version='Analyze LINCS Treatment 0.1')
- setup_logging(verbose=arguments["--verbose"])
- histograms = False
- if arguments.get('--histograms'):
- histograms = True
- config = read_config(arguments["<config_file>"])
- main(
- config,
- arguments["<output_directory>"],
- histograms=histograms
- )