/lincs/io/config.py
Python | 157 lines | 154 code | 3 blank | 0 comment | 4 complexity | f5ec6890db0c8e202e48b0105400db7b MD5 | raw file
- import yaml
- import logging
- import itertools
- import pandas
- from collections import defaultdict
- REQUIRED_FILES = ('elements_file', 'genepix_file')
- REQUIRED_FIELDS = (
- 'experiment_name',
- 'num_columns',
- 'num_wells',
- )
- class ConfigParseError(Exception):
- """docstring for ConfigParseError"""
- def check_config(config):
- """ Check to make sure all config values
- are present and in the correct format.
- """
- for field in REQUIRED_FIELDS + REQUIRED_FILES:
- if field not in config:
- raise ConfigParseError("Config is missing {0} field".format(field))
- for field in REQUIRED_FIELDS:
- if field not in config:
- raise ConfigParseError
- if field.startswith('num_') and not isinstance(config[field], int):
- raise ConfigParseError("Config {0} should be an int".format(field))
- config = fill_default_sheets(config)
- required_cols = set(range(config['num_columns']))
- for file_type in REQUIRED_FILES:
- missing_cols = required_cols - set(config[file_type]["sheets"].keys())
- if missing_cols:
- msg = "{0} is missing col specifications for cols: {1}".format(
- file_type,
- missing_cols
- )
- raise ConfigParseError(msg)
- # Check for missing sheets
- #
- find_missing_sheets(config, file_type)
- def find_missing_sheets(config, file_type):
- """ Ensure all the requested sheets actually exist
- """
- requested_sheets = set(itertools.chain.from_iterable(
- config[file_type]['sheets'].values()
- ))
- xls_file = pandas.ExcelFile(config[file_type]['path'])
- missing_sheets = set(requested_sheets) - set(xls_file.sheet_names)
- if missing_sheets:
- msg = "Missing sheets in {0} file: {1}".format(
- file_type, missing_sheets
- )
- raise ConfigParseError(msg)
- else:
- print "No missing sheets for {0}".format(file_type)
- return missing_sheets
- def fill_default_sheets(config):
- for file_type in REQUIRED_FILES:
- fill_default = config[file_type].get('default_sheets')
- if fill_default:
- msg = "default_sheets specified for {0}, inspecting {1}".format(
- file_type,
- config[file_type]['path'],
- )
- logging.debug(msg)
- xls_file = pandas.ExcelFile(config[file_type]['path'])
- sheet_names = xls_file.sheet_names
- config[file_type]["sheets"] = {}
- for i in range(config['num_columns']):
- config[file_type]["sheets"][i] = [sheet_names[i]]
- return config
- # def check_inversion(config):
- # """ Invert columns-sheet mapping if invertion is specified. This means
- # that, if there are 12 columns, each mapped to a sheet in sequence,
- # we will instead map sheet 0 to column 11, sheet 1 to column 10, etc.
- # NOTE: this does not do the inversion of rows! That is done when the
- # data is actually read-in.
- # """
- # for file_type in REQUIRED_FILES:
- # if config[file_type].get('inverted'):
- # logging.info("Inverting sheets for {0}".format(file_type))
- # num_columns = config['num_columns']
- # sheets = config[file_type]["sheets"]
- # new_sheets = {}
- # for i in range(num_columns):
- # j = num_columns - (i + 1)
- # new_sheets[j] = sheets[i]
- # logging.info("\t{0} -> {1}".format(i, j))
- # config[file_type]["sheets"] = new_sheets
- def process_default_channels(config):
- """ If the user specified a default channel in the ab_channels
- parameter, fill in the channels for antibodies that are in the
- flow pattern for which a channel was not specified in the
- config. Alters the config in place.
- """
- channels = config['genepix_file']['ab_channels']
- default_channel = channels.get('default')
- if default_channel:
- del channels['default']
- for ab in config['genepix_file']['flow_pattern'].iterkeys():
- if ab not in channels:
- channels[ab] = default_channel
- def handle_manual_cell_counting(config):
- """ The configuration file may specific that some columns of the
- device were counted manually rather than using the Elements
- software. Setting `manual` to True means that all the columns
- were counted manually. Or, it can be a list of columns that
- were counted manually.
- """
- manual_value = config['elements_file'].get('manual')
- if manual_value is True:
- manual_value = config['elements_file']['sheets'].keys()
- elif manual_value in [None, False]:
- manual_value = []
- elif isinstance(manual_value, list):
- pass
- else:
- msg = "config['elements_file']['sheets'] has bad value!"
- raise ConfigParseError(msg)
- config['elements_file']['manual'] = manual_value
- def read_config(file_path):
- with open(file_path) as fh:
- config = yaml.safe_load(fh)
- config = fill_default_sheets(config)
- process_default_channels(config)
- handle_manual_cell_counting(config)
- check_config(config)
- return config