analyze_lincs_treatments.py

/bin/analyze_lincs_treatments.py

https://bitbucket.org/kljensen/kmj_lincs · Python · 138 lines · 98 code · 12 blank · 28 comment · 10 complexity · b218937e6415428e5e26c1acfd182da2 MD5 · raw file

""" Analyze LINCS Experiment.

Usage:
  analyze_lincs_treatments.py [options] -c <config_file> -o <output_directory>
  analyze_lincs_treatments.py -h | --help
  analyze_lincs_treatments.py --version

Options:
  -h --help             Show this screen.
  -v --verbose          Use verbose logging.
  -g --histograms       Make histograms
  --version             Show version.
"""
from docopt import docopt
import os
import logging
from lincs.io.config_stats import read_config
from lincs.io.stats import read_data_file
from lincs.stats.stats import extract_stats
import pandas


def setup_logging(verbose=False):
    """ Configure logging.

        :param verbose: Whether or not to use DEBUG log level
        :type verbose: bool.
        :returns:  None.
    """
    logging.basicConfig(format='%(filename)s %(levelname)s ' \
        + 'line %(lineno)d --- %(message)s',)
    if verbose:
        logging.root.setLevel(logging.DEBUG)
    else:
        logging.root.setLevel(logging.INFO)


def main(config, output_directory, histograms=False):
    """ Main routine

        :param config: Dictionary with analysis configuration
        :type config: dict
        :param output_directory: Path to file where .xlsx output should
         be written
        :type output_directory: str
        :returns:  None.
    """

    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    output_file = os.path.join(output_directory, "output.xlsx")

    treatment_data = {}
    for t, v in config["treatments"].iteritems():
        data = read_data_file(
            v["path"],
            v["sheet"],
            config["signals"],
            v["cell_count_column"],
        )
        treatment_data[t] = data

    (cleaned_data,
        means,
        medians,
        stddevs,
        thresholds,
        oncounts,
        cprobs,
        minfo,
        onstats,
        thresholded_data,
        signature_data,
        normalized_treatments,
        normalized_treatments_clipped)  \
            = extract_stats(
                treatment_data,
                config["signals"],
                config["on_threshold"]
            )

    if histograms:
        from lincs.stats.plots import makeIntensityHistograms
        makeIntensityHistograms(
            cleaned_data,
            config["signals"],
            thresholds,
            output_directory
        )

    writer = pandas.ExcelWriter(output_file)
    means.to_excel(writer, sheet_name="means")
    medians.to_excel(writer, sheet_name="medians")
    stddevs.to_excel(writer, sheet_name="stddevs")
    thresholds.to_excel(writer, sheet_name="thresholds")
    oncounts.to_excel(writer, sheet_name="oncounts")
    cprobs.to_excel(writer, sheet_name="cprobs")
    minfo.to_excel(writer, sheet_name="minfo")
    onstats.to_excel(writer, sheet_name="onstats")
    thresholded_data.to_excel(writer, sheet_name="thresholded")
    signature_data.to_excel(writer, sheet_name="signatures")
    normalized_versions = (
        ('norm', normalized_treatments),
        ('norm_clipped', normalized_treatments_clipped),
    )
    for (name, nd) in normalized_versions:
        for cell_count, normalized_data in nd.iteritems():
            for t, v in normalized_data.iteritems():
                sheet_name = "{0}_{1}_{2}cell".format(t, name, cell_count)
                v.to_excel(writer, sheet_name=sheet_name)
    writer.save()

    return (
        means,
        medians,
        stddevs,
        oncounts,
        cprobs,
        minfo,
        onstats,
        thresholded_data,
        signature_data,
        normalized_treatments
    )


if __name__ == '__main__':
    arguments = docopt(__doc__, version='Analyze LINCS Treatment 0.1')
    setup_logging(verbose=arguments["--verbose"])
    histograms = False
    if arguments.get('--histograms'):
        histograms = True
    config = read_config(arguments["<config_file>"])
    main(
        config,
        arguments["<output_directory>"],
        histograms=histograms
    )
Tech Fingerprint

Alerts (3)

Complexity hotspot; lines 106 to 108 (total complexity: 3)
106 107 108