kmj_lincs /lincs/analysis/elements.py

Language Python Lines 118
MD5 Hash 920c4660581a6217809b57a6d8ecace1 Estimated Cost $1,755 (why?)
Repository https://bitbucket.org/kljensen/kmj_lincs.git View Raw File View Project SPDX
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import numpy
import logging
import pandas
from collections import Counter
import pandas, numpy


def join_custom_aligned_counts(joined_intensities, cell_counts, mappings):
    mapped_counts_values = []
    mapped_counts_index = []
    for k, v in mappings.iteritems():
        if not v:
            for iw, c in joined_intensities.ix[k]['cell_count'].iteritems():
                mapped_counts_index.append((k, iw))
                mapped_counts_values.append(c)
        else:
            for iw, cw in v.iteritems():
                mapped_counts_index.append((k, iw))
                mapped_counts_values.append(cell_counts[k][cw])
    len(mapped_counts_index)
    len(mapped_counts_values)
    s = pandas.DataFrame(
        mapped_counts_values,
        index=pandas.MultiIndex.from_tuples(mapped_counts_index),
        columns=["cell_count_a"]
    )
    return joined_intensities.join(s)


def counts_to_dataframe(cell_counts, index):
    """ `index` should be a pandas.MultiIndex, or a list of tuples,
        specifying column, well.
    """
    data = numpy.zeros(len(index))
    for (i, (col, well)) in enumerate(index):
        data[i] = cell_counts.get(col, {}).get(well, 0)
    s = pandas.DataFrame(data, index=index, columns=["cell_count"])
    return s


def count_cells_per_roi(coldf):
    """ Count the number of cells in each ROI for a column on the
        device.

        :param coldf: Data for this column from Elements software
        :type coldf: pandas.DataFrame.
        :returns:  None.
    """
    cell_counts = Counter()
    for roi_num, group in coldf.groupby('RoiID'):

        # RoiID is 1-indexed, but we're using 0-indexing,
        # so subtract one.
        roi = int(roi_num) - 1
        cell_counts[roi] += len(group)

    return cell_counts


def describe_cell_distribution(cell_counts, num_wells):
    total_cells = numpy.asarray(cell_counts.values()).sum()
    logging.info("\t{0} total cells".format(total_cells))
    logging.info("\t{0} max ROI index".format(max(cell_counts.keys())))
    logging.info("\t{0} ROIs with cells".format(len(cell_counts.keys())))
    logging.info("\t{0} wells".format(num_wells))
    logging.info("\t---------- cell per ROI histogram")

    histogram = Counter()
    for v in cell_counts.values():
        histogram[v] += 1

    for i in range(0, max(histogram.keys()) + 1):
        if i == 0:
            v = num_wells - sum(histogram.values())
            histogram[0] = v
        else:
            v = histogram[i]
        logging.info("\t{0:3d} = {1:d}".format(i, v))


def extract_cell_counts(sheetdict, coldfdict):
    """ Count the number of cells in each well of each column.  If data for
        a column is split across multiple sheets of data in the spreadsheet,
        this function will combine the data and re-number the RoiID's
        appropriately.

        :param sheetdict: Mapping from col number to sheet names
        :type sheetdict: dict
        :param sheetdict: Mapping from sheet name to sheet data as DataFrames
        :type sheetdict: dict
        :param num_wells: Number of wells in a single column
        :type num_wells: int
        :returns:  None.
    """
    all_cell_counts = {}
    for col_num, sheets in sheetdict.iteritems():
        logging.info("Processing col {0}, sheets: {1}".format(
            col_num,
            ", ".join(sheets)
        ))
        this_col_cell_counts = Counter()
        max_roi = 0
        for sheet in sheets:
            coldf = coldfdict[sheet]
            this_sheet_cell_counts = count_cells_per_roi(coldf)
            for roi_num, num_cells in this_sheet_cell_counts.iteritems():
                this_col_cell_counts[roi_num + max_roi] = num_cells
            max_roi = max(this_col_cell_counts.keys())

        all_cell_counts[col_num] = this_col_cell_counts
    return all_cell_counts


def describe_cell_distributions(coldfdict, num_wells):

    for colname, colddf in coldfdict.iteritems():
        describe_cell_distribution(colname, colddf, num_wells)
Back to Top