PageRenderTime 49ms CodeModel.GetById 21ms RepoModel.GetById 1ms app.codeStats 0ms

/statsmodels/tsa/base/tsa_model.py

https://github.com/danielballan/statsmodels
Python | 269 lines | 267 code | 2 blank | 0 comment | 9 complexity | 43eba361431834e2f5779e7450726ae9 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. import statsmodels.base.model as base
  2. from statsmodels.base import data
  3. import statsmodels.base.wrapper as wrap
  4. from statsmodels.tsa.base import datetools
  5. from numpy import arange, asarray
  6. from pandas import Index
  7. from pandas import datetools as pandas_datetools
  8. import datetime
  9. _freq_to_pandas = datetools._freq_to_pandas
  10. _tsa_doc = """
  11. %(model)s
  12. Parameters
  13. ----------
  14. %(params)s
  15. dates : array-like of datetime, optional
  16. An array-like object of datetime objects. If a pandas object is given
  17. for endog or exog, it is assumed to have a DateIndex.
  18. freq : str, optional
  19. The frequency of the time-series. A Pandas offset or 'B', 'D', 'W',
  20. 'M', 'A', or 'Q'. This is optional if dates are given.
  21. %(extra_params)s
  22. %(extra_sections)s
  23. """
  24. _model_doc = "Timeseries model base class"
  25. _generic_params = base._model_params_doc
  26. _missing_param_doc = base._missing_param_doc
  27. class TimeSeriesModel(base.LikelihoodModel):
  28. __doc__ = _tsa_doc % {"model" : _model_doc, "params" : _generic_params,
  29. "extra_params" : _missing_param_doc,
  30. "extra_sections" : ""}
  31. def __init__(self, endog, exog=None, dates=None, freq=None, missing='none'):
  32. super(TimeSeriesModel, self).__init__(endog, exog, missing=missing)
  33. self._init_dates(dates, freq)
  34. def _init_dates(self, dates, freq):
  35. if dates is None:
  36. dates = self.data.row_labels
  37. if dates is not None:
  38. if (not isinstance(dates[0], datetime.datetime) and
  39. isinstance(self.data, data.PandasData)):
  40. raise ValueError("Given a pandas object and the index does "
  41. "not contain dates")
  42. if not freq:
  43. try:
  44. freq = datetools._infer_freq(dates)
  45. except:
  46. raise ValueError("Frequency inference failed. Use `freq` "
  47. "keyword.")
  48. dates = Index(dates)
  49. self.data.dates = dates
  50. if freq:
  51. try: #NOTE: Can drop this once we move to pandas >= 0.8.x
  52. _freq_to_pandas[freq]
  53. except:
  54. raise ValueError("freq %s not understood" % freq)
  55. self.data.freq = freq
  56. def _get_exog_names(self):
  57. return self.data.xnames
  58. def _set_exog_names(self, vals):
  59. if not isinstance(vals, list):
  60. vals = [vals]
  61. self.data.xnames = vals
  62. #overwrite with writable property for (V)AR models
  63. exog_names = property(_get_exog_names, _set_exog_names)
  64. def _get_dates_loc(self, dates, date):
  65. if hasattr(dates, 'indexMap'): # 0.7.x
  66. date = dates.indexMap[date]
  67. else:
  68. date = dates.get_loc(date)
  69. try: # pandas 0.8.0 returns a boolean array
  70. len(date)
  71. from numpy import where
  72. date = where(date)[0].item()
  73. except TypeError: # this is expected behavior
  74. pass
  75. return date
  76. def _str_to_date(self, date):
  77. """
  78. Takes a string and returns a datetime object
  79. """
  80. return datetools.date_parser(date)
  81. def _set_predict_start_date(self, start):
  82. dates = self.data.dates
  83. if dates is None:
  84. return
  85. if start > len(dates):
  86. raise ValueError("Start must be <= len(endog)")
  87. if start == len(dates):
  88. self.data.predict_start = datetools._date_from_idx(dates[-1],
  89. start, self.data.freq)
  90. elif start < len(dates):
  91. self.data.predict_start = dates[start]
  92. else:
  93. raise ValueError("Start must be <= len(dates)")
  94. def _get_predict_start(self, start):
  95. """
  96. Returns the index of the given start date. Subclasses should define
  97. default behavior for start = None. That isn't handled here.
  98. Start can be a string or an integer if self.data.dates is None.
  99. """
  100. dates = self.data.dates
  101. if isinstance(start, str):
  102. if dates is None:
  103. raise ValueError("Got a string for start and dates is None")
  104. dtstart = self._str_to_date(start)
  105. self.data.predict_start = dtstart
  106. try:
  107. start = self._get_dates_loc(dates, dtstart)
  108. except KeyError:
  109. raise ValueError("Start must be in dates. Got %s | %s" %
  110. (str(start), str(dtstart)))
  111. self._set_predict_start_date(start)
  112. return start
  113. def _get_predict_end(self, end):
  114. """
  115. See _get_predict_start for more information. Subclasses do not
  116. need to define anything for this.
  117. """
  118. out_of_sample = 0 # will be overwritten if needed
  119. if end is None: # use data for ARIMA - endog changes
  120. end = len(self.data.endog) - 1
  121. dates = self.data.dates
  122. freq = self.data.freq
  123. if isinstance(end, str):
  124. if dates is None:
  125. raise ValueError("Got a string for end and dates is None")
  126. try:
  127. dtend = self._str_to_date(end)
  128. self.data.predict_end = dtend
  129. end = self._get_dates_loc(dates, dtend)
  130. except KeyError, err: # end is greater than dates[-1]...probably
  131. if dtend > self.data.dates[-1]:
  132. end = len(self.data.endog) - 1
  133. freq = self.data.freq
  134. out_of_sample = datetools._idx_from_dates(dates[-1], dtend,
  135. freq)
  136. else:
  137. if freq is None:
  138. raise ValueError("There is no frequency for these "
  139. "dates and date %s is not in dates "
  140. "index. Try giving a date that is in "
  141. "the dates index or use an integer."
  142. % dtend)
  143. else: #pragma: no cover
  144. raise err # should never get here
  145. self._make_predict_dates() # attaches self.data.predict_dates
  146. elif isinstance(end, int) and dates is not None:
  147. try:
  148. self.data.predict_end = dates[end]
  149. except IndexError, err:
  150. nobs = len(self.data.endog) - 1 # as an index
  151. out_of_sample = end - nobs
  152. end = nobs
  153. if freq is not None:
  154. self.data.predict_end = datetools._date_from_idx(dates[-1],
  155. out_of_sample, freq)
  156. elif out_of_sample <= 0: # have no frequency but are in sample
  157. #TODO: what error to catch here to make sure dates is
  158. #on the index?
  159. try:
  160. self.data.predict_end = self._get_dates_loc(dates,
  161. end)
  162. except KeyError:
  163. raise
  164. else:
  165. self.data.predict_end = end + out_of_sample
  166. self.data.predict_start = self._get_dates_loc(dates,
  167. self.data.predict_start)
  168. self._make_predict_dates()
  169. elif isinstance(end, int):
  170. nobs = len(self.data.endog) - 1 # is an index
  171. if end > nobs:
  172. out_of_sample = end - nobs
  173. end = nobs
  174. elif freq is None: # should have a date with freq = None
  175. raise ValueError("When freq is None, you must give an integer "
  176. "index for end.")
  177. return end, out_of_sample
  178. def _make_predict_dates(self):
  179. data = self.data
  180. dtstart = data.predict_start
  181. dtend = data.predict_end
  182. freq = data.freq
  183. if freq is not None:
  184. pandas_freq = _freq_to_pandas[freq]
  185. try:
  186. from pandas import DatetimeIndex
  187. dates = DatetimeIndex(start=dtstart, end=dtend,
  188. freq=pandas_freq)
  189. except ImportError, err:
  190. from pandas import DateRange
  191. dates = DateRange(dtstart, dtend, offset = pandas_freq).values
  192. # handle
  193. elif freq is None and (isinstance(dtstart, int) and
  194. isinstance(dtend, int)):
  195. from pandas import Index
  196. dates = Index(range(dtstart, dtend+1))
  197. # if freq is None and dtstart and dtend aren't integers, we're
  198. # in sample
  199. else:
  200. dates = self.data.dates
  201. start = self._get_dates_loc(dates, dtstart)
  202. end = self._get_dates_loc(dates, dtend)
  203. dates = dates[start:end+1] # is this index inclusive?
  204. self.data.predict_dates = dates
  205. class TimeSeriesModelResults(base.LikelihoodModelResults):
  206. def __init__(self, model, params, normalized_cov_params, scale=1.):
  207. self.data = model.data
  208. super(TimeSeriesModelResults,
  209. self).__init__(model, params, normalized_cov_params, scale)
  210. class TimeSeriesResultsWrapper(wrap.ResultsWrapper):
  211. _attrs = {}
  212. _wrap_attrs = wrap.union_dicts(base.LikelihoodResultsWrapper._wrap_attrs,
  213. _attrs)
  214. _methods = {'predict' : 'dates'}
  215. _wrap_methods = wrap.union_dicts(base.LikelihoodResultsWrapper._wrap_methods,
  216. _methods)
  217. wrap.populate_wrapper(TimeSeriesResultsWrapper,
  218. TimeSeriesModelResults)
  219. if __name__ == "__main__":
  220. import statsmodels.api as sm
  221. import datetime
  222. import pandas
  223. data = sm.datasets.macrodata.load()
  224. #make a DataFrame
  225. #TODO: attach a DataFrame to some of the datasets, for quicker use
  226. dates = [str(int(x[0])) +':'+ str(int(x[1])) \
  227. for x in data.data[['year','quarter']]]
  228. df = pandas.DataFrame(data.data[['realgdp','realinv','realcons']], index=dates)
  229. ex_mod = TimeSeriesModel(df)
  230. #ts_series = pandas.TimeSeries()