PageRenderTime 61ms CodeModel.GetById 22ms RepoModel.GetById 1ms app.codeStats 0ms

/bctc/load.py

https://bitbucket.org/kc/pybctc
Python | 221 lines | 156 code | 21 blank | 44 comment | 3 complexity | a2c5d15e52fa0f5040a7e32f644a41c9 MD5 | raw file
Possible License(s): GPL-3.0
  1. #~ pybctc is a python package that makes access to British
  2. #~ Columbia[, Canada,] Transmission Corporation (BCTC) electric data
  3. #~ easier.
  4. #~ Copyright (C) 2009, 2010 Keegan Callin
  5. #~ This program is free software: you can redistribute it and/or modify
  6. #~ it under the terms of the GNU General Public License as published by
  7. #~ the Free Software Foundation, either version 3 of the License, or
  8. #~ (at your option) any later version.
  9. #~ This program is distributed in the hope that it will be useful,
  10. #~ but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. #~ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. #~ GNU General Public License for more details.
  13. #~ You should have received a copy of the GNU General Public License
  14. #~ along with this program. If not, see
  15. #~ <http://www.gnu.org/licenses/gpl-3.0.html>.
  16. '''
  17. Tools for parsing control area load historical reports posted at
  18. <http://www.bctc.com/transmission_system/balancing_authority_load_data/historical_transmission_data.htm> (2010-02-08).
  19. '''
  20. from datetime import datetime
  21. from datetime import timedelta
  22. # 3rd party imports
  23. import xlrd
  24. import pytz
  25. # Custom libraries
  26. from bctc import BC_TZ
  27. from bctc._util import dump_to_mkstemp
  28. from bctc._util import open_workbook
  29. from bctc._util import BctcDtNormalizer
  30. from bctc._util import YearBookManager
  31. URL_2001_TO_2003 = 'http://www.bctc.com/NR/rdonlyres/837CF09D-0730-40D8-A2D6-745CD9937A27/0/BCCALoad01April2001to31Dec2003.xls'
  32. URLMAP = {
  33. 2001 : URL_2001_TO_2003,
  34. 2002 : URL_2001_TO_2003,
  35. 2003 : URL_2001_TO_2003,
  36. 2004 : 'http://www.bctc.com/NR/rdonlyres/ABD25491-A560-49F4-AC6F-64924D2DF025/0/2004controlareaload.xls',
  37. 2005 : 'http://www.bctc.com/NR/rdonlyres/4A26DAD4-E8A8-41C6-9335-77A44B0E75F1/0/2005controlareaload.xls',
  38. 2006 : 'http://www.bctc.com/NR/rdonlyres/C4BF362C-B661-4D8D-8FB5-0AF88DD7FFC3/0/2006controlareaload.xls',
  39. 2007 : 'http://www.bctc.com/NR/rdonlyres/C6E06392-7235-4F39-ADCD-D58A70D493C7/0/2007controlareaload.xls',
  40. 2008 : 'http://www.bctc.com/NR/rdonlyres/AB10E645-DC79-42BC-8620-046A245A44EE/0/2008controlareaload.xls',
  41. 2009 : 'http://www.bctc.com/NR/rdonlyres/7386D585-BE05-494F-A377-D846D2A8C486/0/jandec2009controlareaload.xls',
  42. #~ 2010 : 'http://www.bctc.com/NR/rdonlyres/0A1D85D8-C257-4268-BC4A-46B0174AB9D3/0/jan2010controlareaload.xls',
  43. 2010 : 'http://www.bctc.com/NR/rdonlyres/0A1D85D8-C257-4268-BC4A-46B0174AB9D3/0/janfeb2010controlareaload.xls', # 2010-03-02
  44. }
  45. class LoadBookManager(YearBookManager):
  46. '''A cache manager that dynamically downloads historical load data
  47. from 2001 onwards. The managed hourly report files are those at
  48. <http://www.bctc.com/transmission_system/balancing_authority_load_data/historical_transmission_data.htm> (2010-02-23).'''
  49. def __init__(self):
  50. super(type(self), self).__init__(URLMAP)
  51. class LoadPoint(object):
  52. '''Object representing load at a given point in time. The object
  53. is iterable so that it can be unpacked::
  54. >>> from datetime import datetime
  55. >>> import pytz
  56. >>> point = LoadPoint(pytz.utc.localize(datetime(2001, 1, 1)), 1000)
  57. >>> t, load = point
  58. >>> assert t == point.t
  59. >>> assert load = point.load
  60. :param t: UTC :class:`datetime.datetime`
  61. :param load: int
  62. '''
  63. def __init__(self, t, load):
  64. self._t = t
  65. self._load = load
  66. self._iterable = (self.t, self.load)
  67. @property
  68. def t(self):
  69. ''':rtype: UTC offset-aware :class:`datetime.datetime`'''
  70. return self._t
  71. @property
  72. def load(self):
  73. '''Load in MW.
  74. :rtype: int'''
  75. return self._load
  76. def __iter__(self):
  77. return iter(self._iterable)
  78. def parse_load_xlrd_book(book):
  79. '''Yields :class:`LoadPoint` objects extracted from open Excel
  80. :mod:`xlrd` *book*.'''
  81. sheet = book.sheets()[0]
  82. dt_normalizer = BctcDtNormalizer()
  83. num_extracted_rows = 0
  84. for row in xrange(0, sheet.nrows):
  85. try:
  86. cell_date, cell_hour, cell_load, cell_tz = sheet.cell(row, 0), sheet.cell(row, 1), sheet.cell(row, 2), sheet.cell(row, 3)
  87. tz = cell_tz.value
  88. except IndexError:
  89. cell_date, cell_hour, cell_load = sheet.cell(row, 0), sheet.cell(row, 1), sheet.cell(row, 2)
  90. tz = ''
  91. try:
  92. if cell_date.ctype != xlrd.XL_CELL_DATE:
  93. raise TypeError('Expected date ctype')
  94. num_extracted_rows += 1
  95. except TypeError:
  96. if num_extracted_rows == 0:
  97. continue
  98. else:
  99. raise
  100. hour = int(cell_hour.value)
  101. load = int(cell_load.value)
  102. if tz == 'PST' or load == 0:
  103. # This cell contains no data; it is a placeholder for the
  104. # missing hour when DST begins. The row looks something
  105. # like this:
  106. #
  107. # 01 Apr 01, 3, 0, PST
  108. #
  109. # At other times in the same report, the "PST" marker is
  110. # ommitted like this:
  111. #
  112. # 01 Apr 01, 3, 0
  113. #
  114. # And this leaves a zero load as the only indication that
  115. # the row is null-data.
  116. continue
  117. date_tuple = xlrd.xldate_as_tuple(cell_date.value, book.datemode)
  118. dt = dt_normalizer.normalize(date_tuple[0:3], hour)
  119. dt = dt.astimezone(pytz.utc)
  120. yield LoadPoint(dt, load)
  121. def parse_load_xls_file(f):
  122. '''Yields :class:`LoadPoint` objects extracted from Excel file *f*.
  123. File *f* may be either a file-like object or a string containing
  124. the path to an Excel file.'''
  125. book = open_workbook(f)
  126. for p in parse_load_xlrd_book(book):
  127. yield p
  128. def yield_load_points(start_dt = pytz.utc.localize(datetime(2001, 1, 1)), end_dt = pytz.utc.localize(datetime.today() + timedelta(1)), manager = LoadBookManager()):
  129. '''Yields control area :class:`LoadPoint` objects with time *t*
  130. such that *start_dt* <= *t* < *end_dt*. By default all available
  131. data is returned. A *manager* object, if provided, gives advanced
  132. users the ability to use previously cached files to save download
  133. time.
  134. :param start_dt: offset-aware :class:`datetime.datetime`; typically like
  135. ``pytz.utc.localize(datetime(2001, 1, 1))``
  136. :param end_dt: offset-aware :class:`datetime.datetime`; typically like
  137. ``pytz.utc.localize(datetime.today() + timedelta(1))``
  138. :param manager: :class:`YearBookManager` instance like :class:`LoadBookManager`\(\)
  139. Example Usage::
  140. >>> from bctc.load import LoadBookManager, yield_load_points
  141. >>> from datetime import datetime
  142. >>> import pytz
  143. >>>
  144. >>> # list of all available data points
  145. >>> points = list(yield_load_points())
  146. >>> assert len(points) > 10000
  147. >>>
  148. >>> # Create a list of all data for 2007 and use a manager
  149. >>> # object to cache downloaded data for later usage.
  150. >>> manager = LoadBookManager()
  151. >>> start_dt = pytz.utc.localize(datetime(2007, 1, 1))
  152. >>> end_dt = pytz.utc.localize(datetime(2008, 1, 1))
  153. >>> points_2007 = list(yield_load_points(start_dt, end_dt, manager = manager))
  154. >>> assert len(points_2007) > 10000
  155. >>>
  156. >>> # Create a new list of 2007 and 2008 points re-using the
  157. >>> # 2007 data already stored by *manager* to save time.
  158. >>> points_2007_and_2008 = list(yield_load_points(start_dt, end_dt, manager = manager))
  159. >>> assert len(points_2007_and_2008) > 10000
  160. '''
  161. start_dt = start_dt.astimezone(pytz.utc)
  162. end_dt = end_dt.astimezone(pytz.utc)
  163. start_year = start_dt.year
  164. end_year = start_dt.year
  165. min_year = min(manager.years)
  166. max_year = max(manager.years)
  167. if end_year < min_year:
  168. return
  169. if start_year > max_year:
  170. return
  171. if start_year < min_year:
  172. start_year = min_year
  173. if end_year > max_year:
  174. end_year = max_year
  175. for year in xrange(start_year, end_year + 1):
  176. fn = manager.filename(year)
  177. for point in parse_load_xls_file(fn):
  178. if start_dt <= point.t < end_dt:
  179. yield point