plots.py | searchcode

/lincs/stats/plots.py

https://bitbucket.org/kljensen/kmj_lincs · Python · 91 lines · 64 code · 19 blank · 8 comment · 5 complexity · fe2ea0e10fd8cc5db69b7c37c08072d3 MD5 · raw file

import pandas
from matplotlib import pyplot
import numpy
import os
import logging


def get_best_bins_for_signal(treatment_data, signal, num_bins=100):
    """ Find the bins for a signal across treatments.
    """
    stddevs = []
    maxes = []
    for treatment, data in treatment_data.iteritems():

        c0_wells = (data.cell_count == 0)
        c1_wells = (data.cell_count == 1)

        logging.info("Creating 0/1 histogram for {0}/{1}".format(
            treatment, signal
        ))
        stddevs.append(
            data[c0_wells][signal].max() \
                + 10 * data[c0_wells][signal].std()
        )
        maxes.append(
            data[c1_wells][signal].max()
        )
    maxes.append(max(stddevs))
    bins = numpy.linspace(0, min(maxes), num_bins)
    return bins


def makeIntensityHistograms(treatment_data, signals, thresholds, output_dir):
    """ Make plots that show the overlap between the 0 and
        1-cell wells for each cytokine.
    """

    basedir = os.path.join(output_dir, "images")
    if not os.path.exists(basedir):
        os.makedirs(basedir)

    for signal in signals:

        bins = get_best_bins_for_signal(treatment_data, signal)

        for treatment, data in treatment_data.iteritems():

            c0_wells = (data.cell_count == 0)
            c1_wells = (data.cell_count == 1)

            logging.info("Creating 0/1 histogram for {0}/{1}".format(
                treatment, signal
            ))

            c0s = data[c0_wells][signal].copy()
            c1s = data[c1_wells][signal].copy()

            threshold = thresholds[signal].ix[(treatment, 'mid98')]
            c0s_on_percent = c0s[c0s > threshold].count() / float(c0s.count())
            c1s_on_percent = c1s[c1s > threshold].count() / float(c1s.count())

            # Apply a threshold, so that anything greater than
            # the greatest bin is just put into the greatest bin.
            c0s[c0s > bins[-1]] = bins[-1]
            c1s[c1s > bins[-1]] = bins[-1]

            pyplot.hist(c0s, bins, alpha=0.5, normed=True)
            pyplot.hist(c1s, bins, alpha=0.5, normed=True)

            if threshold < bins[-1]:
                pyplot.axvline(x=threshold)
            pyplot.text(0.90, 0.90, "0: {0:.2f} %on\n1: {1:.2f} %on".format(
                c0s_on_percent*100,
                c1s_on_percent*100
            ))
            pyplot.title("{0} {1}\n(0={2:.2f}% on; 1={3:.2f}% on)".format(
                treatment,
                signal,
                c0s_on_percent * 100,
                c1s_on_percent * 100
            ))
            pyplot.text(0.5, 0.5,'matplotlib',
             horizontalalignment='center',
             verticalalignment='center',
             )

            output_file = os.path.join(basedir, "{0}-{1}.png".format(treatment, signal))
            pyplot.savefig(output_file)
            pyplot.close()

    # import IPython; IPython.embed()