PageRenderTime 49ms CodeModel.GetById 25ms RepoModel.GetById 1ms app.codeStats 0ms

/lincs/io/config_stats.py

https://bitbucket.org/kljensen/kmj_lincs
Python | 96 lines | 88 code | 4 blank | 4 comment | 0 complexity | b7d17bf0ebe9751b9fc3d0e1218074f0 MD5 | raw file
  1. # -*- coding: utf-8 -*-
  2. """ Code for parsing the configuration file used for
  3. running statistics on Lincs experimental treatments.
  4. """
  5. import yaml
  6. import os
  7. import pandas
  8. REQUIRED_FIELDS = (
  9. ('experiment_name', str),
  10. ('treatments', dict),
  11. ('signals', list),
  12. ('on_threshold', int),
  13. )
  14. class ConfigParseError(Exception):
  15. """docstring for ConfigParseError"""
  16. def check_config(config):
  17. """ Check to make sure all config values are present and in the correct
  18. format. Check to make sure Excel files are in order.
  19. """
  20. for field, obj_type in REQUIRED_FIELDS:
  21. if field not in config:
  22. raise ConfigParseError("Missing field in config: {0}".format(
  23. field
  24. ))
  25. if not isinstance(config[field], obj_type):
  26. msg = "Field '{0}' should be of type {1}".format(field, obj_type)
  27. raise ConfigParseError(msg)
  28. if "control" not in config["treatments"]:
  29. raise ConfigParseError("Configuration missing a control treatment!")
  30. for k, v in config["treatments"].iteritems():
  31. # Make sure each treatment has an Excel file listed
  32. #
  33. if "path" not in v or not v["path"]:
  34. msg = "Treatment {0} is missing a path to data file".format(k)
  35. raise ConfigParseError(msg)
  36. # Make sure that file exists
  37. #
  38. if not os.path.isfile(v["path"]):
  39. msg = "Treatment {0} data file {1} not found!".format(k, v["path"])
  40. raise ConfigParseError(msg)
  41. # Open the file. Make sure the required sheet exists.
  42. # Set the required sheet if it's the default.
  43. #
  44. xls_file = pandas.ExcelFile(v['path'])
  45. if "sheet" not in v:
  46. config["treatments"][k]["sheet"] = xls_file.sheet_names[0]
  47. elif v["sheet"] not in xls_file.sheet_names:
  48. msg = "Sheet {0} not found in {1}!".format(v["sheet"], v["path"])
  49. raise ConfigParseError(msg)
  50. # Ensure there is a cell_count_column
  51. #
  52. if "cell_count_column" not in v:
  53. config["treatments"][k]["cell_count_column"] = "cell count"
  54. # Make sure there is a column for each signal and the cell_count
  55. #
  56. data = xls_file.parse(v["sheet"])
  57. needed_columns = config["signals"] \
  58. + [config["treatments"][k]["cell_count_column"]]
  59. for col in needed_columns:
  60. if col not in data.columns:
  61. msg = "Column {0} is missing in sheet {1} of {2}".format(
  62. col,
  63. v["sheet"],
  64. v["path"]
  65. )
  66. raise ConfigParseError(msg)
  67. def read_config(file_path):
  68. """ Read the configuration file
  69. :param file_path: Path to the config file
  70. :type file_path: string.
  71. :returns: dictionary version of config file.
  72. """
  73. with open(file_path) as fh:
  74. config = yaml.safe_load(fh)
  75. check_config(config)
  76. return config