elements.py | searchcode

/lincs/analysis/elements.py

https://bitbucket.org/kljensen/kmj_lincs
Python | 117 lines | 85 code | 15 blank | 17 comment | 11 complexity | 920c4660581a6217809b57a6d8ecace1 MD5 | raw file

import numpy
import logging
import pandas
from collections import Counter
import pandas, numpy


def join_custom_aligned_counts(joined_intensities, cell_counts, mappings):
    mapped_counts_values = []
    mapped_counts_index = []
    for k, v in mappings.iteritems():
        if not v:
            for iw, c in joined_intensities.ix[k]['cell_count'].iteritems():
                mapped_counts_index.append((k, iw))
                mapped_counts_values.append(c)
        else:
            for iw, cw in v.iteritems():
                mapped_counts_index.append((k, iw))
                mapped_counts_values.append(cell_counts[k][cw])
    len(mapped_counts_index)
    len(mapped_counts_values)
    s = pandas.DataFrame(
        mapped_counts_values,
        index=pandas.MultiIndex.from_tuples(mapped_counts_index),
        columns=["cell_count_a"]
    )
    return joined_intensities.join(s)


def counts_to_dataframe(cell_counts, index):
    """ `index` should be a pandas.MultiIndex, or a list of tuples,
        specifying column, well.
    """
    data = numpy.zeros(len(index))
    for (i, (col, well)) in enumerate(index):
        data[i] = cell_counts.get(col, {}).get(well, 0)
    s = pandas.DataFrame(data, index=index, columns=["cell_count"])
    return s


def count_cells_per_roi(coldf):
    """ Count the number of cells in each ROI for a column on the
        device.

        :param coldf: Data for this column from Elements software
        :type coldf: pandas.DataFrame.
        :returns:  None.
    """
    cell_counts = Counter()
    for roi_num, group in coldf.groupby('RoiID'):

        # RoiID is 1-indexed, but we're using 0-indexing,
        # so subtract one.
        roi = int(roi_num) - 1
        cell_counts[roi] += len(group)

    return cell_counts


def describe_cell_distribution(cell_counts, num_wells):
    total_cells = numpy.asarray(cell_counts.values()).sum()
    logging.info("\t{0} total cells".format(total_cells))
    logging.info("\t{0} max ROI index".format(max(cell_counts.keys())))
    logging.info("\t{0} ROIs with cells".format(len(cell_counts.keys())))
    logging.info("\t{0} wells".format(num_wells))
    logging.info("\t---------- cell per ROI histogram")

    histogram = Counter()
    for v in cell_counts.values():
        histogram[v] += 1

    for i in range(0, max(histogram.keys()) + 1):
        if i == 0:
            v = num_wells - sum(histogram.values())
            histogram[0] = v
        else:
            v = histogram[i]
        logging.info("\t{0:3d} = {1:d}".format(i, v))


def extract_cell_counts(sheetdict, coldfdict):
    """ Count the number of cells in each well of each column.  If data for
        a column is split across multiple sheets of data in the spreadsheet,
        this function will combine the data and re-number the RoiID's
        appropriately.

        :param sheetdict: Mapping from col number to sheet names
        :type sheetdict: dict
        :param sheetdict: Mapping from sheet name to sheet data as DataFrames
        :type sheetdict: dict
        :param num_wells: Number of wells in a single column
        :type num_wells: int
        :returns:  None.
    """
    all_cell_counts = {}
    for col_num, sheets in sheetdict.iteritems():
        logging.info("Processing col {0}, sheets: {1}".format(
            col_num,
            ", ".join(sheets)
        ))
        this_col_cell_counts = Counter()
        max_roi = 0
        for sheet in sheets:
            coldf = coldfdict[sheet]
            this_sheet_cell_counts = count_cells_per_roi(coldf)
            for roi_num, num_cells in this_sheet_cell_counts.iteritems():
                this_col_cell_counts[roi_num + max_roi] = num_cells
            max_roi = max(this_col_cell_counts.keys())

        all_cell_counts[col_num] = this_col_cell_counts
    return all_cell_counts


def describe_cell_distributions(coldfdict, num_wells):

    for colname, colddf in coldfdict.iteritems():
        describe_cell_distribution(colname, colddf, num_wells)