kmj_lincs /lincs/io/config.py

Language Python Lines 158
MD5 Hash f5ec6890db0c8e202e48b0105400db7b Estimated Cost $2,131 (why?)
Repository https://bitbucket.org/kljensen/kmj_lincs.git View Raw File View Project SPDX
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
import yaml
import logging
import itertools
import pandas
from collections import defaultdict

REQUIRED_FILES = ('elements_file', 'genepix_file')
REQUIRED_FIELDS = (
    'experiment_name',
    'num_columns',
    'num_wells',
)


class ConfigParseError(Exception):
    """docstring for ConfigParseError"""


def check_config(config):
    """ Check to make sure all config values
        are present and in the correct format.

    """
    for field in REQUIRED_FIELDS + REQUIRED_FILES:
        if field not in config:
            raise ConfigParseError("Config is missing {0} field".format(field))

    for field in REQUIRED_FIELDS:

        if field not in config:
            raise ConfigParseError
        if field.startswith('num_') and not isinstance(config[field], int):
            raise ConfigParseError("Config {0} should be an int".format(field))

    config = fill_default_sheets(config)
    required_cols = set(range(config['num_columns']))
    for file_type in REQUIRED_FILES:
        missing_cols = required_cols - set(config[file_type]["sheets"].keys())
        if missing_cols:
            msg = "{0} is missing col specifications for cols: {1}".format(
                file_type,
                missing_cols
            )
            raise ConfigParseError(msg)

        # Check for missing sheets
        #
        find_missing_sheets(config, file_type)


def find_missing_sheets(config, file_type):
    """ Ensure all the requested sheets actually exist
    """
    requested_sheets = set(itertools.chain.from_iterable(
        config[file_type]['sheets'].values()
    ))
    xls_file = pandas.ExcelFile(config[file_type]['path'])
    missing_sheets = set(requested_sheets) - set(xls_file.sheet_names)
    if missing_sheets:
        msg = "Missing sheets in {0} file: {1}".format(
            file_type, missing_sheets
        )
        raise ConfigParseError(msg)
    else:
        print "No missing sheets for {0}".format(file_type)

    return missing_sheets


def fill_default_sheets(config):
    for file_type in REQUIRED_FILES:

        fill_default = config[file_type].get('default_sheets')

        if fill_default:
            msg = "default_sheets specified for {0}, inspecting {1}".format(
                file_type,
                config[file_type]['path'],
            )
            logging.debug(msg)
            xls_file = pandas.ExcelFile(config[file_type]['path'])
            sheet_names = xls_file.sheet_names
            config[file_type]["sheets"] = {}
            for i in range(config['num_columns']):
                config[file_type]["sheets"][i] = [sheet_names[i]]
    return config


# def check_inversion(config):
#     """ Invert columns-sheet mapping if invertion is specified.  This means
#         that, if there are 12 columns, each mapped to a sheet in sequence,
#         we will instead map sheet 0 to column 11, sheet 1 to column 10, etc.

#         NOTE: this does not do the inversion of rows!  That is done when the
#         data is actually read-in.
#     """
#     for file_type in REQUIRED_FILES:
#         if config[file_type].get('inverted'):
#             logging.info("Inverting sheets for {0}".format(file_type))

#             num_columns = config['num_columns']
#             sheets = config[file_type]["sheets"]
#             new_sheets = {}

#             for i in range(num_columns):
#                 j = num_columns - (i + 1)
#                 new_sheets[j] = sheets[i]
#                 logging.info("\t{0} -> {1}".format(i, j))

#             config[file_type]["sheets"] = new_sheets


def process_default_channels(config):
    """ If the user specified a default channel in the ab_channels
        parameter, fill in the channels for antibodies that are in the
        flow pattern for which a channel was not specified in the
        config.  Alters the config in place.
    """
    channels = config['genepix_file']['ab_channels']
    default_channel = channels.get('default')
    if default_channel:
        del channels['default']
        for ab in config['genepix_file']['flow_pattern'].iterkeys():
            if ab not in channels:
                channels[ab] = default_channel


def handle_manual_cell_counting(config):
    """ The configuration file may specific that some columns of the
        device were counted manually rather than using the Elements
        software.  Setting `manual` to True means that all the columns
        were counted manually.  Or, it can be a list of columns that
        were counted manually.
    """
    manual_value = config['elements_file'].get('manual')
    if manual_value is True:
        manual_value = config['elements_file']['sheets'].keys()
    elif manual_value in [None, False]:
        manual_value = []
    elif isinstance(manual_value, list):
        pass
    else:
        msg = "config['elements_file']['sheets'] has bad value!"
        raise ConfigParseError(msg)
    config['elements_file']['manual'] = manual_value


def read_config(file_path):
    with open(file_path) as fh:
        config = yaml.safe_load(fh)

    config = fill_default_sheets(config)
    process_default_channels(config)
    handle_manual_cell_counting(config)
    check_config(config)

    return config
Back to Top