PageRenderTime 254ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/li6400data/__init__.py

https://bitbucket.org/takluyver/li-6400-data
Python | 109 lines | 84 code | 15 blank | 10 comment | 23 complexity | a47e09da860cef4dcdf95e6e4cce5ba7 MD5 | raw file
  1. import dateutil.parser
  2. import pandas
  3. import io
  4. from . import xmllike
  5. def _add_metadata(xml, metadata):
  6. name = xml.opentag
  7. if name not in metadata:
  8. metadata[name] = {}
  9. try:
  10. value = int(xml.text)
  11. except ValueError:
  12. try:
  13. value = float(xml.text)
  14. except ValueError:
  15. value = xml.text.strip('"\'')
  16. if xml.children:
  17. if xml.text:
  18. metadata[name]["_"] = value
  19. for child in xml.children:
  20. _add_metadata(child, metadata[name])
  21. else:
  22. metadata[name] = value
  23. class LI6400DataSet(object):
  24. """A data set from the LI-6400.
  25. To instantiate, you must pass either file (a path or file handle) or the
  26. dataset components: data, events, metadata, start.
  27. """
  28. def __init__(self, file=None, data=None, events=None, metadata=None, start=None):
  29. if file:
  30. start, metadata, data, events = self._read_file(file)
  31. if data is None:
  32. raise ValueError("file or data must be specified")
  33. self.start = start
  34. self.metadata = metadata
  35. self.data = data
  36. self.events = events
  37. def __repr__(self):
  38. return "<LI6400DataSet: {0} records, started {1}>".format(\
  39. len(self.data), self.start)
  40. @staticmethod
  41. def _read_file(file):
  42. if isinstance(file, str):
  43. file = io.open(file, encoding='iso-8859-1')
  44. next(file) # Version number - this reappears in the XML metadata
  45. start = next(file).strip().strip('"')
  46. start = start.replace("Thr ", "Thu ") # Dateutil doesn't recognise Thr
  47. start = dateutil.parser.parse(start)
  48. metadata = {}
  49. event_times, event_descr = [], []
  50. for line in file:
  51. if line.startswith('<'):
  52. _add_metadata(xmllike.parse(line)[0], metadata)
  53. elif line.startswith('"'):
  54. line = line.strip().strip('"')
  55. time_descr = line.split(None, 1)
  56. if len(time_descr) > 1:
  57. time, descr = time_descr
  58. event_times.append(dateutil.parser.parse(time).time())
  59. event_descr.append(descr)
  60. elif line.strip() == "$STARTOFDATA$":
  61. break
  62. maindata = io.StringIO()
  63. maindata.write(next(file))
  64. for line in file:
  65. if line.startswith('"OPEN '):
  66. # Restarted logging - for now, we just discard the metadata
  67. # until the next $STARTOFDATA$ line.
  68. while line.strip() != "$STARTOFDATA$":
  69. line = next(file)
  70. next(file) # Discard new column headers too
  71. continue
  72. if line.startswith('"'):
  73. line = line.strip().strip('"')
  74. time, descr = line.split(None, 1)
  75. event_times.append(dateutil.parser.parse(time).time())
  76. event_descr.append(descr)
  77. else:
  78. maindata.write(line)
  79. maindata.seek(0)
  80. data = pandas.read_table(maindata, index_col=0)
  81. # Parse times
  82. data['time'] = [dateutil.parser.parse(x).time() for x in data.HHMMSS]
  83. assert len(event_times) == len(event_descr)
  84. events = pandas.DataFrame(event_descr, index=event_times, columns=['Description'])
  85. return start, metadata, data, events
  86. def append(self, other):
  87. """Join another LI6400DataSet onto this one, returning a new data set
  88. containing the data from both."""
  89. newdata = self.data.append(other.data, ignore_index=True)
  90. newevents = self.events.append(other.events)
  91. newmetadata = self.metadata # Discard second set of metadata for now
  92. return type(self)(data=newdata, events=newevents, metadata=newmetadata,
  93. start=self.start)