PageRenderTime 57ms CodeModel.GetById 34ms RepoModel.GetById 0ms app.codeStats 0ms

/statsmodels/tsa/vector_ar/util.py

http://github.com/statsmodels/statsmodels
Python | 332 lines | 314 code | 4 blank | 14 comment | 1 complexity | aff229bf63103b6738e77c255ccedd59 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. # -*- coding: utf-8 -*-
  2. """
  3. Miscellaneous utility code for VAR estimation
  4. """
  5. from statsmodels.compat.pandas import frequencies
  6. from statsmodels.compat.python import asbytes
  7. import numpy as np
  8. import pandas as pd
  9. from scipy import stats, linalg
  10. import statsmodels.tsa.tsatools as tsa
  11. #-------------------------------------------------------------------------------
  12. # Auxiliary functions for estimation
  13. def get_var_endog(y, lags, trend='c', has_constant='skip'):
  14. """
  15. Make predictor matrix for VAR(p) process
  16. Z := (Z_0, ..., Z_T).T (T x Kp)
  17. Z_t = [1 y_t y_{t-1} ... y_{t - p + 1}] (Kp x 1)
  18. Ref: Lütkepohl p.70 (transposed)
  19. has_constant can be 'raise', 'add', or 'skip'. See add_constant.
  20. """
  21. nobs = len(y)
  22. # Ravel C order, need to put in descending order
  23. Z = np.array([y[t-lags : t][::-1].ravel() for t in range(lags, nobs)])
  24. # Add constant, trend, etc.
  25. trend = tsa.rename_trend(trend)
  26. if trend != 'n':
  27. Z = tsa.add_trend(Z, prepend=True, trend=trend,
  28. has_constant=has_constant)
  29. return Z
  30. def get_trendorder(trend='c'):
  31. # Handle constant, etc.
  32. if trend == 'c':
  33. trendorder = 1
  34. elif trend in ('n', 'nc'):
  35. trendorder = 0
  36. elif trend == 'ct':
  37. trendorder = 2
  38. elif trend == 'ctt':
  39. trendorder = 3
  40. else:
  41. raise ValueError(f"Unkown trend: {trend}")
  42. return trendorder
  43. def make_lag_names(names, lag_order, trendorder=1, exog=None):
  44. """
  45. Produce list of lag-variable names. Constant / trends go at the beginning
  46. Examples
  47. --------
  48. >>> make_lag_names(['foo', 'bar'], 2, 1)
  49. ['const', 'L1.foo', 'L1.bar', 'L2.foo', 'L2.bar']
  50. """
  51. lag_names = []
  52. if isinstance(names, str):
  53. names = [names]
  54. # take care of lagged endogenous names
  55. for i in range(1, lag_order + 1):
  56. for name in names:
  57. if not isinstance(name, str):
  58. name = str(name) # will need consistent unicode handling
  59. lag_names.append('L'+str(i)+'.'+name)
  60. # handle the constant name
  61. if trendorder != 0:
  62. lag_names.insert(0, 'const')
  63. if trendorder > 1:
  64. lag_names.insert(1, 'trend')
  65. if trendorder > 2:
  66. lag_names.insert(2, 'trend**2')
  67. if exog is not None:
  68. if isinstance(exog, pd.Series):
  69. exog = pd.DataFrame(exog)
  70. elif not hasattr(exog, 'ndim'):
  71. exog = np.asarray(exog)
  72. if exog.ndim == 1:
  73. exog = exog[:, None]
  74. for i in range(exog.shape[1]):
  75. if isinstance(exog, pd.DataFrame):
  76. exog_name = str(exog.columns[i])
  77. else:
  78. exog_name = "exog" + str(i)
  79. lag_names.insert(trendorder + i, exog_name)
  80. return lag_names
  81. def comp_matrix(coefs):
  82. """
  83. Return compansion matrix for the VAR(1) representation for a VAR(p) process
  84. (companion form)
  85. A = [A_1 A_2 ... A_p-1 A_p
  86. I_K 0 0 0
  87. 0 I_K ... 0 0
  88. 0 ... I_K 0]
  89. """
  90. p, k1, k2 = coefs.shape
  91. if k1 != k2:
  92. raise ValueError('coefs must be 3-d with shape (p, k, k).')
  93. kp = k1 * p
  94. result = np.zeros((kp, kp))
  95. result[:k1] = np.concatenate(coefs, axis=1)
  96. # Set I_K matrices
  97. if p > 1:
  98. result[np.arange(k1, kp), np.arange(kp-k1)] = 1
  99. return result
  100. #-------------------------------------------------------------------------------
  101. # Miscellaneous stuff
  102. def parse_lutkepohl_data(path): # pragma: no cover
  103. """
  104. Parse data files from Lütkepohl (2005) book
  105. Source for data files: www.jmulti.de
  106. """
  107. from collections import deque
  108. from datetime import datetime
  109. import re
  110. regex = re.compile(asbytes(r'<(.*) (\w)([\d]+)>.*'))
  111. with open(path, 'rb') as f:
  112. lines = deque(f)
  113. to_skip = 0
  114. while asbytes('*/') not in lines.popleft():
  115. #while '*/' not in lines.popleft():
  116. to_skip += 1
  117. while True:
  118. to_skip += 1
  119. line = lines.popleft()
  120. m = regex.match(line)
  121. if m:
  122. year, freq, start_point = m.groups()
  123. break
  124. data = (pd.read_csv(path, delimiter=r"\s+", header=to_skip+1)
  125. .to_records(index=False))
  126. n = len(data)
  127. # generate the corresponding date range (using pandas for now)
  128. start_point = int(start_point)
  129. year = int(year)
  130. offsets = {
  131. asbytes('Q'): frequencies.BQuarterEnd(),
  132. asbytes('M'): frequencies.BMonthEnd(),
  133. asbytes('A'): frequencies.BYearEnd()
  134. }
  135. # create an instance
  136. offset = offsets[freq]
  137. inc = offset * (start_point - 1)
  138. start_date = offset.rollforward(datetime(year, 1, 1)) + inc
  139. offset = offsets[freq]
  140. date_range = pd.date_range(start=start_date, freq=offset, periods=n)
  141. return data, date_range
  142. def norm_signif_level(alpha=0.05):
  143. return stats.norm.ppf(1 - alpha / 2)
  144. def acf_to_acorr(acf):
  145. diag = np.diag(acf[0])
  146. # numpy broadcasting sufficient
  147. return acf / np.sqrt(np.outer(diag, diag))
  148. def varsim(coefs, intercept, sig_u, steps=100, initvalues=None, seed=None):
  149. """
  150. Simulate VAR(p) process, given coefficients and assuming Gaussian noise
  151. Parameters
  152. ----------
  153. coefs : ndarray
  154. Coefficients for the VAR lags of endog.
  155. intercept : None or ndarray 1-D (neqs,) or (steps, neqs)
  156. This can be either the intercept for each equation or an offset.
  157. If None, then the VAR process has a zero intercept.
  158. If intercept is 1-D, then the same (endog specific) intercept is added
  159. to all observations.
  160. If intercept is 2-D, then it is treated as an offset and is added as
  161. an observation specific intercept to the autoregression. In this case,
  162. the intercept/offset should have same number of rows as steps, and the
  163. same number of columns as endogenous variables (neqs).
  164. sig_u : ndarray
  165. Covariance matrix of the residuals or innovations.
  166. If sig_u is None, then an identity matrix is used.
  167. steps : {None, int}
  168. number of observations to simulate, this includes the initial
  169. observations to start the autoregressive process.
  170. If offset is not None, then exog of the model are used if they were
  171. provided in the model
  172. seed : {None, int}
  173. If seed is not None, then it will be used with for the random
  174. variables generated by numpy.random.
  175. Returns
  176. -------
  177. endog_simulated : nd_array
  178. Endog of the simulated VAR process
  179. """
  180. rs = np.random.RandomState(seed=seed)
  181. rmvnorm = rs.multivariate_normal
  182. p, k, k = coefs.shape
  183. if sig_u is None:
  184. sig_u = np.eye(k)
  185. ugen = rmvnorm(np.zeros(len(sig_u)), sig_u, steps)
  186. result = np.zeros((steps, k))
  187. if intercept is not None:
  188. # intercept can be 2-D like an offset variable
  189. if np.ndim(intercept) > 1:
  190. if not len(intercept) == len(ugen):
  191. raise ValueError('2-D intercept needs to have length `steps`')
  192. # add intercept/offset also to intial values
  193. result += intercept
  194. result[p:] += ugen[p:]
  195. else:
  196. result[p:] = ugen[p:]
  197. # add in AR terms
  198. for t in range(p, steps):
  199. ygen = result[t]
  200. for j in range(p):
  201. ygen += np.dot(coefs[j], result[t-j-1])
  202. return result
  203. def get_index(lst, name):
  204. try:
  205. result = lst.index(name)
  206. except Exception:
  207. if not isinstance(name, int):
  208. raise
  209. result = name
  210. return result
  211. #method used repeatedly in Sims-Zha error bands
  212. def eigval_decomp(sym_array):
  213. """
  214. Returns
  215. -------
  216. W: array of eigenvectors
  217. eigva: list of eigenvalues
  218. k: largest eigenvector
  219. """
  220. #check if symmetric, do not include shock period
  221. eigva, W = linalg.eig(sym_array, left=True, right=False)
  222. k = np.argmax(eigva)
  223. return W, eigva, k
  224. def vech(A):
  225. """
  226. Simple vech operator
  227. Returns
  228. -------
  229. vechvec: vector of all elements on and below diagonal
  230. """
  231. length=A.shape[1]
  232. vechvec=[]
  233. for i in range(length):
  234. b=i
  235. while b < length:
  236. vechvec.append(A[b,i])
  237. b=b+1
  238. vechvec=np.asarray(vechvec)
  239. return vechvec
  240. def seasonal_dummies(n_seasons, len_endog, first_period=0, centered=False):
  241. """
  242. Parameters
  243. ----------
  244. n_seasons : int >= 0
  245. Number of seasons (e.g. 12 for monthly data and 4 for quarterly data).
  246. len_endog : int >= 0
  247. Total number of observations.
  248. first_period : int, default: 0
  249. Season of the first observation. As an example, suppose we have monthly
  250. data and the first observation is in March (third month of the year).
  251. In this case we pass 2 as first_period. (0 for the first season,
  252. 1 for the second, ..., n_seasons-1 for the last season).
  253. An integer greater than n_seasons-1 are treated in the same way as the
  254. integer modulo n_seasons.
  255. centered : bool, default: False
  256. If True, center (demean) the dummy variables. That is useful in order
  257. to get seasonal dummies that are orthogonal to the vector of constant
  258. dummy variables (a vector of ones).
  259. Returns
  260. -------
  261. seasonal_dummies : ndarray (len_endog x n_seasons-1)
  262. """
  263. if n_seasons == 0:
  264. return np.empty((len_endog, 0))
  265. if n_seasons > 0:
  266. season_exog = np.zeros((len_endog, n_seasons - 1))
  267. for i in range(n_seasons - 1):
  268. season_exog[(i-first_period) % n_seasons::n_seasons, i] = 1
  269. if centered:
  270. season_exog -= 1 / n_seasons
  271. return season_exog