/li6400data/__init__.py
Python | 109 lines | 84 code | 15 blank | 10 comment | 23 complexity | a47e09da860cef4dcdf95e6e4cce5ba7 MD5 | raw file
- import dateutil.parser
- import pandas
- import io
- from . import xmllike
- def _add_metadata(xml, metadata):
- name = xml.opentag
- if name not in metadata:
- metadata[name] = {}
- try:
- value = int(xml.text)
- except ValueError:
- try:
- value = float(xml.text)
- except ValueError:
- value = xml.text.strip('"\'')
-
-
- if xml.children:
- if xml.text:
- metadata[name]["_"] = value
- for child in xml.children:
- _add_metadata(child, metadata[name])
-
- else:
- metadata[name] = value
- class LI6400DataSet(object):
- """A data set from the LI-6400.
-
- To instantiate, you must pass either file (a path or file handle) or the
- dataset components: data, events, metadata, start.
- """
- def __init__(self, file=None, data=None, events=None, metadata=None, start=None):
- if file:
- start, metadata, data, events = self._read_file(file)
- if data is None:
- raise ValueError("file or data must be specified")
-
- self.start = start
- self.metadata = metadata
- self.data = data
- self.events = events
-
- def __repr__(self):
- return "<LI6400DataSet: {0} records, started {1}>".format(\
- len(self.data), self.start)
-
- @staticmethod
- def _read_file(file):
- if isinstance(file, str):
- file = io.open(file, encoding='iso-8859-1')
-
- next(file) # Version number - this reappears in the XML metadata
- start = next(file).strip().strip('"')
- start = start.replace("Thr ", "Thu ") # Dateutil doesn't recognise Thr
- start = dateutil.parser.parse(start)
-
- metadata = {}
- event_times, event_descr = [], []
- for line in file:
- if line.startswith('<'):
- _add_metadata(xmllike.parse(line)[0], metadata)
- elif line.startswith('"'):
- line = line.strip().strip('"')
- time_descr = line.split(None, 1)
- if len(time_descr) > 1:
- time, descr = time_descr
- event_times.append(dateutil.parser.parse(time).time())
- event_descr.append(descr)
- elif line.strip() == "$STARTOFDATA$":
- break
-
- maindata = io.StringIO()
- maindata.write(next(file))
- for line in file:
- if line.startswith('"OPEN '):
- # Restarted logging - for now, we just discard the metadata
- # until the next $STARTOFDATA$ line.
- while line.strip() != "$STARTOFDATA$":
- line = next(file)
- next(file) # Discard new column headers too
- continue
- if line.startswith('"'):
- line = line.strip().strip('"')
- time, descr = line.split(None, 1)
- event_times.append(dateutil.parser.parse(time).time())
- event_descr.append(descr)
- else:
- maindata.write(line)
-
- maindata.seek(0)
- data = pandas.read_table(maindata, index_col=0)
- # Parse times
- data['time'] = [dateutil.parser.parse(x).time() for x in data.HHMMSS]
- assert len(event_times) == len(event_descr)
- events = pandas.DataFrame(event_descr, index=event_times, columns=['Description'])
-
- return start, metadata, data, events
-
- def append(self, other):
- """Join another LI6400DataSet onto this one, returning a new data set
- containing the data from both."""
- newdata = self.data.append(other.data, ignore_index=True)
- newevents = self.events.append(other.events)
- newmetadata = self.metadata # Discard second set of metadata for now
- return type(self)(data=newdata, events=newevents, metadata=newmetadata,
- start=self.start)