/lincs/analysis/elements.py
Python | 117 lines | 85 code | 15 blank | 17 comment | 11 complexity | 920c4660581a6217809b57a6d8ecace1 MD5 | raw file
- import numpy
- import logging
- import pandas
- from collections import Counter
- import pandas, numpy
- def join_custom_aligned_counts(joined_intensities, cell_counts, mappings):
- mapped_counts_values = []
- mapped_counts_index = []
- for k, v in mappings.iteritems():
- if not v:
- for iw, c in joined_intensities.ix[k]['cell_count'].iteritems():
- mapped_counts_index.append((k, iw))
- mapped_counts_values.append(c)
- else:
- for iw, cw in v.iteritems():
- mapped_counts_index.append((k, iw))
- mapped_counts_values.append(cell_counts[k][cw])
- len(mapped_counts_index)
- len(mapped_counts_values)
- s = pandas.DataFrame(
- mapped_counts_values,
- index=pandas.MultiIndex.from_tuples(mapped_counts_index),
- columns=["cell_count_a"]
- )
- return joined_intensities.join(s)
- def counts_to_dataframe(cell_counts, index):
- """ `index` should be a pandas.MultiIndex, or a list of tuples,
- specifying column, well.
- """
- data = numpy.zeros(len(index))
- for (i, (col, well)) in enumerate(index):
- data[i] = cell_counts.get(col, {}).get(well, 0)
- s = pandas.DataFrame(data, index=index, columns=["cell_count"])
- return s
- def count_cells_per_roi(coldf):
- """ Count the number of cells in each ROI for a column on the
- device.
- :param coldf: Data for this column from Elements software
- :type coldf: pandas.DataFrame.
- :returns: None.
- """
- cell_counts = Counter()
- for roi_num, group in coldf.groupby('RoiID'):
- # RoiID is 1-indexed, but we're using 0-indexing,
- # so subtract one.
- roi = int(roi_num) - 1
- cell_counts[roi] += len(group)
- return cell_counts
- def describe_cell_distribution(cell_counts, num_wells):
- total_cells = numpy.asarray(cell_counts.values()).sum()
- logging.info("\t{0} total cells".format(total_cells))
- logging.info("\t{0} max ROI index".format(max(cell_counts.keys())))
- logging.info("\t{0} ROIs with cells".format(len(cell_counts.keys())))
- logging.info("\t{0} wells".format(num_wells))
- logging.info("\t---------- cell per ROI histogram")
- histogram = Counter()
- for v in cell_counts.values():
- histogram[v] += 1
- for i in range(0, max(histogram.keys()) + 1):
- if i == 0:
- v = num_wells - sum(histogram.values())
- histogram[0] = v
- else:
- v = histogram[i]
- logging.info("\t{0:3d} = {1:d}".format(i, v))
- def extract_cell_counts(sheetdict, coldfdict):
- """ Count the number of cells in each well of each column. If data for
- a column is split across multiple sheets of data in the spreadsheet,
- this function will combine the data and re-number the RoiID's
- appropriately.
- :param sheetdict: Mapping from col number to sheet names
- :type sheetdict: dict
- :param sheetdict: Mapping from sheet name to sheet data as DataFrames
- :type sheetdict: dict
- :param num_wells: Number of wells in a single column
- :type num_wells: int
- :returns: None.
- """
- all_cell_counts = {}
- for col_num, sheets in sheetdict.iteritems():
- logging.info("Processing col {0}, sheets: {1}".format(
- col_num,
- ", ".join(sheets)
- ))
- this_col_cell_counts = Counter()
- max_roi = 0
- for sheet in sheets:
- coldf = coldfdict[sheet]
- this_sheet_cell_counts = count_cells_per_roi(coldf)
- for roi_num, num_cells in this_sheet_cell_counts.iteritems():
- this_col_cell_counts[roi_num + max_roi] = num_cells
- max_roi = max(this_col_cell_counts.keys())
- all_cell_counts[col_num] = this_col_cell_counts
- return all_cell_counts
- def describe_cell_distributions(coldfdict, num_wells):
- for colname, colddf in coldfdict.iteritems():
- describe_cell_distribution(colname, colddf, num_wells)