PageRenderTime 56ms CodeModel.GetById 27ms RepoModel.GetById 1ms app.codeStats 0ms

/roof/term.py

https://bitbucket.org/roysc/roof
Python | 251 lines | 208 code | 17 blank | 26 comment | 4 complexity | 74e92a81f47fcfca538b5d017460707d MD5 | raw file
  1. import os, re
  2. from glob import glob
  3. from datetime import datetime, timedelta
  4. import pandas as pd, numpy as np
  5. import roof
  6. from roof.iotools import *
  7. from roof.decagon import *
  8. from roof.data import *
  9. from roof.calc import *
  10. from roof import quirks
  11. def files(path, ext='dzd', plat=None):
  12. from glob import glob
  13. fs = os.listdir(path)
  14. filt = lambda s: re.search('.%s$' % ext, s)
  15. fs = list(filter(filt, fs))
  16. if plat:
  17. filtplat = lambda s: re.search('^%s' % plat, s)
  18. fs = list(filter(filtplat, fs))
  19. return fs
  20. def do_sample(file):
  21. d = pd.read_csv(file,index_col=0,parse_dates=[0])
  22. Rn = d['solar radiation']
  23. et = est_ETpm(d.dailytemp, d.humidity*100, calc_pressure(21),
  24. Rn, d.avgwindspeed)
  25. return et
  26. # def packall(s, letter, plats='1234', concat=True, ext='dzd'):
  27. # "join all dataframes in glob(s)"
  28. # from glob import glob
  29. # # "[aA]*.ext" or "[wW]eather*.ext"
  30. # s += "*["+letter.lower()+letter.upper()+"]"
  31. # if letter == 'w': s += 'eather'
  32. # else: s += '['+plats+']'
  33. # s += "*."+ext
  34. # ls = glob(s)
  35. # print('\n'.join(ls))
  36. # dfs = pack(ls)
  37. # if letter == 'w':
  38. # return concat(dfs.values())
  39. # h = {}
  40. # for n in range(1,5):
  41. # ds = [d for k,d in dfs.items() if int(get_platform(k)[1]) == n]
  42. # if ds and concat:
  43. # print("Concatenating %s #%s platforms" % (len(ds), n))
  44. # h[n] = concat(ds)
  45. # return h
  46. def do_packplat(plat, join_platforms=True):
  47. ls = [f for f in glob("*.dzd") if get_platform(f) == plat]
  48. return packdata(ls, join_platforms)
  49. def do_packdata(ls, join_platforms=True):
  50. """
  51. read df's from list of filenames
  52. returns dict: platform -> list of df's
  53. if join_platforms, concat lists
  54. if there's only one platform, just return a df
  55. """
  56. dfs = {}
  57. for f in ls:
  58. plat = get_platform(f)
  59. if plat: # and plat[0].lower() == letter.lower():
  60. print("Processing",f)
  61. dfs[plat] = (dfs.get(plat) or []) + [process_file(f)]
  62. if join_platforms:
  63. dfs = {pl: do_concat(ds).sort() for (pl, ds) in dfs.items()}
  64. # if len(dfs) == 1:
  65. # [df] = dfs.values()
  66. # return df
  67. return dfs
  68. def do_concat(dfs):
  69. return LabeledFrame(pd.concat(dfs).drop_duplicates())
  70. def do_packall_write(letter, daily=False):
  71. h = packall("*201*/",letter)
  72. for k,d in h.items():
  73. file='text/'+letter.lower()+str(k)
  74. if daily: file += '_daily'
  75. file+='.csv'
  76. if daily: d = d.aggregate()
  77. print('writing',file)
  78. d.to_csv(file, na_rep='.')
  79. def do_query(dfs, dti):
  80. "Query data files at all times in DatetimeIndex"
  81. agg = LabeledFrame(index=dti)
  82. for d in dfs:
  83. # assert(d.index.is_unique)
  84. # d = d.ix[dti].sort()
  85. # agg = pd.concat([agg, d]).sort()
  86. agg = agg.join(d).drop_duplicates().sort()
  87. # agg = agg.join(d, how='inner').sort()
  88. return agg
  89. def splitby_rainevents(waterdata):
  90. """
  91. Split data the times corresponding to rain events
  92. Return: DF: [start times, end times] for all rain events
  93. """
  94. from datetime import timedelta
  95. # rainevts = { "start": [], "end": [] }
  96. # rainevts = pd.DataFrame(columns=["Start", "End"])
  97. rainevts = []
  98. # Distinct event := rain >12 hours after previous rainfall
  99. interval = timedelta(0, 0, 0, 0, 0, 12)
  100. # Empty data
  101. if len(waterdata) == 0: return rainevts
  102. t_evt = waterdata.index[0] # most recent event
  103. for t, rain in waterdata.items():
  104. if rain > 0:
  105. if (t - t_evt) > interval: # a new event
  106. # skip this for first event
  107. if len(rainevts):
  108. rainevts[-1][1] = t_evt
  109. rainevts.append([t, t])
  110. # store most recent
  111. t_evt = t
  112. # a list of [start, stop] pairs
  113. return rainevts
  114. def get_startstoplists(xlspath):
  115. xl=pd.ExcelFile(xlspath)
  116. byplat = {}
  117. for sheet in xl.sheet_names:
  118. plat = get_platform(sheet)
  119. if plat:
  120. dti = get_startstoplist(xl, sheet)
  121. if dti is not None: byplat[plat] = dti
  122. else:
  123. print("Can't tell platform from sheet name '%s'" % sheet)
  124. return byplat
  125. def get_startstoplist(xl, sheet):
  126. """
  127. read list of __-[start|stop] times from an Excel sheet
  128. if sheet is None, returns dict: platform -> list
  129. else, returns list
  130. """
  131. try:
  132. df = xl.parse(sheet, header=None, parse_dates=[1])
  133. try:
  134. cs = df.columns
  135. for c in cs:
  136. try:
  137. dti = pd.DatetimeIndex(df[c])
  138. cs.remove(c)
  139. break
  140. except: pass
  141. # for c in cs:
  142. # s = df[c]
  143. # if s.apply(lambda st: re.search("(rain|runoff)", st)).any():
  144. return dti
  145. except Exception as e:
  146. print(e, "at sheet", sheet)
  147. except StopIteration:
  148. print("Could not parse sheet", sheet, "(possibly empty)")
  149. def query(dfdict, dtis):
  150. """
  151. given dicts of DataFrames and query indexes, w/ platforms as keys
  152. return dict of values at queries
  153. """
  154. byplat = {}
  155. for plat, dti in dtis.items():
  156. df = dfdict.get(plat)
  157. if df is not None:
  158. q = df.ix[dti]
  159. byplat[plat] = q.drop_duplicates()
  160. else:
  161. print("Data dict doesn't contain data for platform", plat)
  162. return byplat
  163. def filter_var(df, var='vwc'):
  164. filtered = df[[c for c in df.columns if c.variable == var]]
  165. return LabeledFrame(filtered)
  166. def season_of(dt):
  167. """get season from a datetime or timestamp
  168. defn:
  169. spring = march-may
  170. summer = june-aug
  171. fall = sept-nov
  172. winter = dec-feb
  173. """
  174. bounds = {
  175. (3, 5): 'spring',
  176. (6, 8): 'summer',
  177. (9, 11): 'fall',
  178. }
  179. for (start, end), sn in bounds.items():
  180. if start <= dt.month <= end: return sn
  181. return 'winter'
  182. def year_season_of(dt):
  183. "get (year, season)"
  184. # return '%s_%s' % (season(dt), dt.year)
  185. return (dt.year, season_of(dt))
  186. def sumwater(col):
  187. lbl = col.name
  188. if isinstance(lbl, str):
  189. try:
  190. lbl = Label.from_str(col.name)
  191. except: pass
  192. if isinstance(lbl, Label) and lbl.variable == 'water_vol':
  193. return col.sum()
  194. return col.mean()
  195. def do_fixtime(df, time, freq=INPUT_FREQ):
  196. """
  197. Shift pieces of DF with time gap by timedelta (on index) and concat
  198. to create new DF without gaps.
  199. """
  200. df0, df1 = df.ix[:time - freq], df.ix[time:]
  201. df0_shifted = df0.tshift(freq = time - freq - df0.index[-1])
  202. df1_shifted = df1.tshift(periods = -1, freq = df1.index[0] - time)
  203. return LabeledFrame(pd.concat([df0_shifted, df1_shifted]))
  204. # def outliers(dfs):
  205. # """Find outliers in data"""