PageRenderTime 49ms CodeModel.GetById 21ms RepoModel.GetById 1ms app.codeStats 0ms

/lincs/io/config.py

https://bitbucket.org/kljensen/kmj_lincs
Python | 157 lines | 154 code | 3 blank | 0 comment | 4 complexity | f5ec6890db0c8e202e48b0105400db7b MD5 | raw file
  1. import yaml
  2. import logging
  3. import itertools
  4. import pandas
  5. from collections import defaultdict
  6. REQUIRED_FILES = ('elements_file', 'genepix_file')
  7. REQUIRED_FIELDS = (
  8. 'experiment_name',
  9. 'num_columns',
  10. 'num_wells',
  11. )
  12. class ConfigParseError(Exception):
  13. """docstring for ConfigParseError"""
  14. def check_config(config):
  15. """ Check to make sure all config values
  16. are present and in the correct format.
  17. """
  18. for field in REQUIRED_FIELDS + REQUIRED_FILES:
  19. if field not in config:
  20. raise ConfigParseError("Config is missing {0} field".format(field))
  21. for field in REQUIRED_FIELDS:
  22. if field not in config:
  23. raise ConfigParseError
  24. if field.startswith('num_') and not isinstance(config[field], int):
  25. raise ConfigParseError("Config {0} should be an int".format(field))
  26. config = fill_default_sheets(config)
  27. required_cols = set(range(config['num_columns']))
  28. for file_type in REQUIRED_FILES:
  29. missing_cols = required_cols - set(config[file_type]["sheets"].keys())
  30. if missing_cols:
  31. msg = "{0} is missing col specifications for cols: {1}".format(
  32. file_type,
  33. missing_cols
  34. )
  35. raise ConfigParseError(msg)
  36. # Check for missing sheets
  37. #
  38. find_missing_sheets(config, file_type)
  39. def find_missing_sheets(config, file_type):
  40. """ Ensure all the requested sheets actually exist
  41. """
  42. requested_sheets = set(itertools.chain.from_iterable(
  43. config[file_type]['sheets'].values()
  44. ))
  45. xls_file = pandas.ExcelFile(config[file_type]['path'])
  46. missing_sheets = set(requested_sheets) - set(xls_file.sheet_names)
  47. if missing_sheets:
  48. msg = "Missing sheets in {0} file: {1}".format(
  49. file_type, missing_sheets
  50. )
  51. raise ConfigParseError(msg)
  52. else:
  53. print "No missing sheets for {0}".format(file_type)
  54. return missing_sheets
  55. def fill_default_sheets(config):
  56. for file_type in REQUIRED_FILES:
  57. fill_default = config[file_type].get('default_sheets')
  58. if fill_default:
  59. msg = "default_sheets specified for {0}, inspecting {1}".format(
  60. file_type,
  61. config[file_type]['path'],
  62. )
  63. logging.debug(msg)
  64. xls_file = pandas.ExcelFile(config[file_type]['path'])
  65. sheet_names = xls_file.sheet_names
  66. config[file_type]["sheets"] = {}
  67. for i in range(config['num_columns']):
  68. config[file_type]["sheets"][i] = [sheet_names[i]]
  69. return config
  70. # def check_inversion(config):
  71. # """ Invert columns-sheet mapping if invertion is specified. This means
  72. # that, if there are 12 columns, each mapped to a sheet in sequence,
  73. # we will instead map sheet 0 to column 11, sheet 1 to column 10, etc.
  74. # NOTE: this does not do the inversion of rows! That is done when the
  75. # data is actually read-in.
  76. # """
  77. # for file_type in REQUIRED_FILES:
  78. # if config[file_type].get('inverted'):
  79. # logging.info("Inverting sheets for {0}".format(file_type))
  80. # num_columns = config['num_columns']
  81. # sheets = config[file_type]["sheets"]
  82. # new_sheets = {}
  83. # for i in range(num_columns):
  84. # j = num_columns - (i + 1)
  85. # new_sheets[j] = sheets[i]
  86. # logging.info("\t{0} -> {1}".format(i, j))
  87. # config[file_type]["sheets"] = new_sheets
  88. def process_default_channels(config):
  89. """ If the user specified a default channel in the ab_channels
  90. parameter, fill in the channels for antibodies that are in the
  91. flow pattern for which a channel was not specified in the
  92. config. Alters the config in place.
  93. """
  94. channels = config['genepix_file']['ab_channels']
  95. default_channel = channels.get('default')
  96. if default_channel:
  97. del channels['default']
  98. for ab in config['genepix_file']['flow_pattern'].iterkeys():
  99. if ab not in channels:
  100. channels[ab] = default_channel
  101. def handle_manual_cell_counting(config):
  102. """ The configuration file may specific that some columns of the
  103. device were counted manually rather than using the Elements
  104. software. Setting `manual` to True means that all the columns
  105. were counted manually. Or, it can be a list of columns that
  106. were counted manually.
  107. """
  108. manual_value = config['elements_file'].get('manual')
  109. if manual_value is True:
  110. manual_value = config['elements_file']['sheets'].keys()
  111. elif manual_value in [None, False]:
  112. manual_value = []
  113. elif isinstance(manual_value, list):
  114. pass
  115. else:
  116. msg = "config['elements_file']['sheets'] has bad value!"
  117. raise ConfigParseError(msg)
  118. config['elements_file']['manual'] = manual_value
  119. def read_config(file_path):
  120. with open(file_path) as fh:
  121. config = yaml.safe_load(fh)
  122. config = fill_default_sheets(config)
  123. process_default_channels(config)
  124. handle_manual_cell_counting(config)
  125. check_config(config)
  126. return config