PageRenderTime 52ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/lincs/analysis/elements.py

https://bitbucket.org/kljensen/kmj_lincs
Python | 117 lines | 85 code | 15 blank | 17 comment | 11 complexity | 920c4660581a6217809b57a6d8ecace1 MD5 | raw file
  1. import numpy
  2. import logging
  3. import pandas
  4. from collections import Counter
  5. import pandas, numpy
  6. def join_custom_aligned_counts(joined_intensities, cell_counts, mappings):
  7. mapped_counts_values = []
  8. mapped_counts_index = []
  9. for k, v in mappings.iteritems():
  10. if not v:
  11. for iw, c in joined_intensities.ix[k]['cell_count'].iteritems():
  12. mapped_counts_index.append((k, iw))
  13. mapped_counts_values.append(c)
  14. else:
  15. for iw, cw in v.iteritems():
  16. mapped_counts_index.append((k, iw))
  17. mapped_counts_values.append(cell_counts[k][cw])
  18. len(mapped_counts_index)
  19. len(mapped_counts_values)
  20. s = pandas.DataFrame(
  21. mapped_counts_values,
  22. index=pandas.MultiIndex.from_tuples(mapped_counts_index),
  23. columns=["cell_count_a"]
  24. )
  25. return joined_intensities.join(s)
  26. def counts_to_dataframe(cell_counts, index):
  27. """ `index` should be a pandas.MultiIndex, or a list of tuples,
  28. specifying column, well.
  29. """
  30. data = numpy.zeros(len(index))
  31. for (i, (col, well)) in enumerate(index):
  32. data[i] = cell_counts.get(col, {}).get(well, 0)
  33. s = pandas.DataFrame(data, index=index, columns=["cell_count"])
  34. return s
  35. def count_cells_per_roi(coldf):
  36. """ Count the number of cells in each ROI for a column on the
  37. device.
  38. :param coldf: Data for this column from Elements software
  39. :type coldf: pandas.DataFrame.
  40. :returns: None.
  41. """
  42. cell_counts = Counter()
  43. for roi_num, group in coldf.groupby('RoiID'):
  44. # RoiID is 1-indexed, but we're using 0-indexing,
  45. # so subtract one.
  46. roi = int(roi_num) - 1
  47. cell_counts[roi] += len(group)
  48. return cell_counts
  49. def describe_cell_distribution(cell_counts, num_wells):
  50. total_cells = numpy.asarray(cell_counts.values()).sum()
  51. logging.info("\t{0} total cells".format(total_cells))
  52. logging.info("\t{0} max ROI index".format(max(cell_counts.keys())))
  53. logging.info("\t{0} ROIs with cells".format(len(cell_counts.keys())))
  54. logging.info("\t{0} wells".format(num_wells))
  55. logging.info("\t---------- cell per ROI histogram")
  56. histogram = Counter()
  57. for v in cell_counts.values():
  58. histogram[v] += 1
  59. for i in range(0, max(histogram.keys()) + 1):
  60. if i == 0:
  61. v = num_wells - sum(histogram.values())
  62. histogram[0] = v
  63. else:
  64. v = histogram[i]
  65. logging.info("\t{0:3d} = {1:d}".format(i, v))
  66. def extract_cell_counts(sheetdict, coldfdict):
  67. """ Count the number of cells in each well of each column. If data for
  68. a column is split across multiple sheets of data in the spreadsheet,
  69. this function will combine the data and re-number the RoiID's
  70. appropriately.
  71. :param sheetdict: Mapping from col number to sheet names
  72. :type sheetdict: dict
  73. :param sheetdict: Mapping from sheet name to sheet data as DataFrames
  74. :type sheetdict: dict
  75. :param num_wells: Number of wells in a single column
  76. :type num_wells: int
  77. :returns: None.
  78. """
  79. all_cell_counts = {}
  80. for col_num, sheets in sheetdict.iteritems():
  81. logging.info("Processing col {0}, sheets: {1}".format(
  82. col_num,
  83. ", ".join(sheets)
  84. ))
  85. this_col_cell_counts = Counter()
  86. max_roi = 0
  87. for sheet in sheets:
  88. coldf = coldfdict[sheet]
  89. this_sheet_cell_counts = count_cells_per_roi(coldf)
  90. for roi_num, num_cells in this_sheet_cell_counts.iteritems():
  91. this_col_cell_counts[roi_num + max_roi] = num_cells
  92. max_roi = max(this_col_cell_counts.keys())
  93. all_cell_counts[col_num] = this_col_cell_counts
  94. return all_cell_counts
  95. def describe_cell_distributions(coldfdict, num_wells):
  96. for colname, colddf in coldfdict.iteritems():
  97. describe_cell_distribution(colname, colddf, num_wells)