/roof/data.py
Python | 192 lines | 122 code | 37 blank | 33 comment | 24 complexity | 10ac884e1fba0f11306901199af00418 MD5 | raw file
- # -*- coding: utf-8 -*-
- """
- Tools for processing data and metadata.
- """
- import re
- import pandas as pd
- platform_rexp = '(?P<platform>[ABSK][1-4])'
- def get_platform(name):
- 'Parse name for platform letter & number'
- m = re.search(r'\s*\b' + platform_rexp, name, re.I)
- return m.group(1).upper() if m else None
- class Label():
- "Stores metadata about a series (column) of data."
-
- def __init__(self, port, platform, sensor=None, variable=None, units=None):
- # self.location = (port, platform)
- self.port = port
- self.platform = platform
- self.sensor = sensor
- if variable == 'raw': variable = None
- self.variable = variable
- self.units = units
- self.comment = None
-
- def __str__(self):
- rep = '%s_%s' % (self.variable or 'raw', self.port)
- if self.platform:
- rep = self.platform + '_' + rep
- return rep.lower() # all lowercase for consistency
-
- def __repr__(self):
- attrs = []
- for attrname in ('port', 'platform', 'sensor', 'variable', 'units'):
- attr = getattr(self, attrname)
- if attr is not None:
- attrs.append("%s= %s" % (attrname, attr))
- return 'Label(' + ', '.join(attrs) + ')'
-
- # Bug here? want orderability with str.
- def __lt__(self, other):
- if isinstance(other, Label):
- if self.port == other.port:
- try:
- return self.variable < other.variable
- except TypeError:
- return False
- return self.port < other.port
- return str(self) < str(other)
-
- # Potential issues here? Esp. in Python < 3.0
- def __hash__(self):
- # print(self)
- return hash(str(self))
- def __eq__(self, other):
- return str(self) == str(other)
- def from_str(s):
- """
- Create a label based on a string representing one.
- Info extracted will be limited
- Returns None on failure
- """
- from roof.decagon import sensors
- # all possible variable values
- variables = {meas.name for sr in sensors.values()
- for meas in sr.measurements}
- var_rexp = '(?P<variable>' + '|'.join(variables | {'raw'}) + ')'
- rexp = r'(?:%s_)?%s_%s' % (platform_rexp, var_rexp, '(?P<port>[1-9])')
- m = re.match(rexp, s, re.I)
- port, platform, variable = m.group('port', 'platform', 'variable')
- if platform: platform = get_platform(platform)
-
- lbl = Label(int(port), platform, variable=variable)
-
- return lbl
-
- class TimeLabel(Label):
- def __init__(self):
- Label.__init__(self, 0, None)
-
- def __str__(self):
- return 'time'
-
- def __repr__(self):
- return "TimeLabel()"
-
- class RawLabel(Label):
- "C"
- def __init__(self, port, platform, sensor=None, error=None):
- Label.__init__(self, port, platform, sensor)
- # Use this field for extra info, or error
- self.error = error
- class LabeledFrame(pd.DataFrame):
- "A DataFrame containing relevant metadata."
- # def __init__(self, *args, file=None, **kwargs):
- def __init__(self, *args, **kwargs):
- pd.DataFrame.__init__(self, *args, **kwargs) # Python 2 compat.
- # self._filename = file
- self._filename = None
-
- for col in self.columns:
- try:
- object.__setattr__(self, str(col), self[col])
- except UnicodeEncodeError:
- pass # issue in Python < 3
- @property
- def file(self):
- return self._filename
-
- @property
- def sensors(self):
- return {str(c) : c.sensor for c in self.columns}
-
- def to_sqlite(self, name = None):
- "Write to a SQLite3 database"
- import sqlite3
- from pandas.io import sql
- df = self.reset_index()
- labels = df.columns
- linfo = pd.DataFrame(
- columns=labels,
- index = [
- "name",
- "label_id",
- "port",
- "platform",
- "sensor",
- "variable",
- "units",
- ])
-
- for l in labels:
- if isinstance(l, Label):
- linfo[l].update(l.__dict__)
-
- c = sqlite3.connect(':memory:')
- sql.write_frame(df, name = name,
- con = c)
-
- sql.write_frame(df, name = name,
- con = c)
- return c
- def calc(calcfn):
- if isinstance(calcfn, CalcFunction):
- # perform unit-checking
- #
- else:
- # see if function takes dictionary of inputs?
- # try to make it a CalcFunction?
- def aggregate(d, rule=pd.offsets.Day(),
- how='mean',
- shift=pd.offsets.Hour(-5)):
- #, inplace=False):
- """
- Returns data resampled over an interval.
- Resamples by taking mean by default.
- ----------
- rule : time interval to aggregate over
- how : method to use for resampling
- shift : timeshift for data (see DataFrame.tshift) pass None to skip.
- """
- # inplace : boolean, default False
- # Modify the DataFrame in place (do not create a new object)
- df = d
- if shift is not None:
- df = df.tshift(1, shift)
- df = df.resample(rule, how=how)
- # if inplace: self = df
- return LabeledFrame(df)
- def description(df, fun=lambda r: r):
- g = df.groupby(fun)
- res = concat([d.describe().T for (gr, d) in g])
- return concat(gs)