PageRenderTime 618ms CodeModel.GetById 19ms RepoModel.GetById 1ms app.codeStats 0ms

/tser/tser_070_voltar/util.py

https://github.com/burakbayramli/classnotes
Python | 400 lines | 273 code | 71 blank | 56 comment | 40 complexity | 2d9a6e8f0ee893895a2f629e73983519 MD5 | raw file
  1. from scipy.optimize import minimize
  2. import pandas as pd, random
  3. import numpy as np, datetime
  4. import scipy.stats
  5. FLAG_BAD_RETURN=-99999.0
  6. CALENDAR_DAYS_IN_YEAR = 365.25
  7. BUSINESS_DAYS_IN_YEAR = 256.0
  8. ROOT_BDAYS_INYEAR = BUSINESS_DAYS_IN_YEAR**.5
  9. WEEKS_IN_YEAR = CALENDAR_DAYS_IN_YEAR / 7.0
  10. ROOT_WEEKS_IN_YEAR = WEEKS_IN_YEAR**.5
  11. MONTHS_IN_YEAR = 12.0
  12. ROOT_MONTHS_IN_YEAR = MONTHS_IN_YEAR**.5
  13. ARBITRARY_START=pd.datetime(1900,1,1)
  14. DEFAULT_CAPITAL = 1.0
  15. DEFAULT_ANN_RISK_TARGET = 0.16
  16. contract_month_codes = ['F', 'G', 'H', 'J', 'K', 'M','N', 'Q', 'U', 'V', 'X', 'Z']
  17. contract_month_dict = dict(zip(contract_month_codes,\
  18. range(1,len(contract_month_codes)+1)))
  19. def shift(lst,empty):
  20. res = lst[:]
  21. temp = res[0]
  22. for index in range(len(lst) - 1): res[index] = res[index + 1]
  23. res[index + 1] = temp
  24. res[-1] = empty
  25. return res
  26. def stitch_prices(dfs, price_col, dates):
  27. res = []
  28. datesr = list(reversed(dates))
  29. dfsr = list(reversed(dfs))
  30. dfsr_pair = shift(dfsr,pd.DataFrame())
  31. for i,v in enumerate(datesr):
  32. tmp1=float(dfsr[i].ix[v,price_col])
  33. tmp2=float(dfsr_pair[i].ix[v,price_col])
  34. dfsr_pair[i].loc[:,price_col] = dfsr_pair[i][price_col] + tmp1-tmp2
  35. dates.insert(0,'1900-01-01')
  36. dates_end = shift(dates,'2200-01-01')
  37. for i,v in enumerate(dates):
  38. tmp = dfs[i][(dfs[i].index > dates[i]) & (dfs[i].index <= dates_end[i])]
  39. res.append(tmp.Settle)
  40. return pd.concat(res)
  41. def which_contract(contract_list, cycle, offset, expday, expmon):
  42. assert len(contract_list) > 0
  43. start_date = contract_list[contract_list.keys()[0]].head(1).index[0] # first dt of first contract
  44. end_date = contract_list[contract_list.keys()[-1]].tail(1).index[0] # last date of last contract
  45. delta = end_date - start_date
  46. dates = []
  47. for i in range(delta.days + 1):
  48. day = start_date + datetime.timedelta(days=i)
  49. if day.weekday() < 5: dates.append(day)
  50. df = pd.DataFrame(index=dates)
  51. def closest_biz(d): # get closest biz day
  52. diffs = np.abs((d - df.index).days)
  53. return df.index[np.argmin(diffs)]
  54. cycle_d = [contract_month_dict[x] for x in cycle]
  55. df['effcont'] = np.nan
  56. for year in np.unique(df.index.year):
  57. for c in cycle_d:
  58. v = "%d%02d" % (year,c)
  59. exp_d = datetime.datetime(year, c, expday)
  60. if expmon=="prev": exp_d = exp_d - datetime.timedelta(days=30)
  61. df.loc[closest_biz(exp_d),'effcont'] = v
  62. df = df.fillna(method='bfill')
  63. df['effcont'] = df.effcont.shift(-int(offset*2/3 + 3))
  64. return df.fillna(method='ffill')
  65. def create_carry(df, offset, contract_list):
  66. df2 = df.copy()
  67. df2['effcont'] = df2.effcont.astype(str)
  68. def offset_contract(con):
  69. s = pd.to_datetime(con + "15", format='%Y%m%d')
  70. ss = s + datetime.timedelta(days=30*offset)
  71. return "%d%02d" % (int(ss.year), int(ss.month))
  72. df2['carrycont'] = df2.effcont.map(offset_contract)
  73. df2['effprice'] = df2.apply(lambda x: contract_list.get(x.effcont).s.get(x.name) if x.effcont in contract_list else np.nan,axis=1)
  74. df2['carryprice'] = df2.apply(lambda x: contract_list.get(x.carrycont).s.get(x.name) if x.carrycont in contract_list else np.nan,axis=1)
  75. return df2
  76. def ccy_returns(price, forecast):
  77. base_capital = DEFAULT_CAPITAL
  78. daily_risk_capital = DEFAULT_CAPITAL * DEFAULT_ANN_RISK_TARGET / ROOT_BDAYS_INYEAR
  79. ts_capital=pd.Series([DEFAULT_CAPITAL]*len(price), index=price.index)
  80. ann_risk = ts_capital * DEFAULT_ANN_RISK_TARGET
  81. daily_returns_volatility = robust_vol_calc(price.diff())
  82. multiplier = daily_risk_capital * 1.0 * 1.0 / 10.0
  83. numerator = forecast * multiplier
  84. positions = numerator.ffill() / daily_returns_volatility.ffill()
  85. cum_trades = positions.shift(1).ffill()
  86. price_returns = price.diff()
  87. instr_ccy_returns = cum_trades.shift(1)*price_returns
  88. instr_ccy_returns=instr_ccy_returns.cumsum().ffill().reindex(price.index).diff()
  89. return instr_ccy_returns
  90. def skew(price, forecast):
  91. base_capital = DEFAULT_CAPITAL
  92. pct = 100.0 * ccy_returns(price, forecast) / base_capital
  93. return scipy.stats.skew(pct[pd.isnull(pct) == False])
  94. def sharpe(price, forecast):
  95. instr_ccy_returns = ccy_returns(price, forecast)
  96. tval,pval = scipy.stats.ttest_1samp(instr_ccy_returns.dropna(), 0)
  97. mean_return = instr_ccy_returns.mean() * BUSINESS_DAYS_IN_YEAR
  98. vol = instr_ccy_returns.std() * ROOT_BDAYS_INYEAR
  99. return mean_return / vol, tval, pval
  100. def ewma(price, slow, fast):
  101. fast_ewma = pd.ewma(price, span=slow)
  102. slow_ewma = pd.ewma(price, span=fast)
  103. raw_ewmac = fast_ewma - slow_ewma
  104. vol = robust_vol_calc(price.diff())
  105. return raw_ewmac / vol
  106. def bollinger(df,col,lev):
  107. signals = pd.DataFrame(index=df.index)
  108. signals['signal'] = np.nan
  109. middle = pd.rolling_mean(df[col], 40, min_periods=1)
  110. std = pd.rolling_std(df[col], 40, min_periods=1)
  111. df['middle'] = middle
  112. df['top'] = middle+2*std
  113. df['bottom'] = middle-2*std
  114. signals['signal'] = np.where(df[col] > middle+2*std, -1, np.nan)
  115. signals['signal'] = np.where(df[col] < middle-2*std, 1, np.nan)
  116. signals['signal'] = signals['signal'].fillna(method='ffill')
  117. df['ret'] = df[col].pct_change() * signals['signal'].shift(1)
  118. ret = df.ret.dropna() * lev
  119. return ret
  120. def crossover(df,col,lev):
  121. signals = pd.DataFrame(index=df.index)
  122. signals['signal'] = 0
  123. short_ma = pd.rolling_mean(df[col], 40, min_periods=1)
  124. long_ma = pd.rolling_mean(df[col], 100, min_periods=1)
  125. signals['signal'] = np.where(short_ma > long_ma, 1, 0)
  126. df['signal'] = signals['signal'].shift(1)
  127. df['ret'] = df[col].pct_change() * df['signal']
  128. ret = df.ret.dropna() * lev
  129. return ret
  130. def carry(daily_ann_roll, vol, diff_in_years, smooth_days=90):
  131. ann_stdev = vol * ROOT_BDAYS_INYEAR
  132. raw_carry = daily_ann_roll / ann_stdev
  133. smooth_carry = pd.ewma(raw_carry, smooth_days) / diff_in_years
  134. return smooth_carry.fillna(method='ffill')
  135. def estimate_forecast_scalar(x, window=250000, min_periods=500):
  136. target_abs_forecast = 10.
  137. x=x.abs().iloc[:,0]
  138. avg_abs_value=x.mean()
  139. return target_abs_forecast/avg_abs_value
  140. def vol_equaliser(mean_list, stdev_list):
  141. if np.all(np.isnan(stdev_list)):
  142. return (([np.nan]*len(mean_list), [np.nan]*len(stdev_list)))
  143. avg_stdev=np.nanmean(stdev_list)
  144. norm_factor=[asset_stdev/avg_stdev for asset_stdev in stdev_list]
  145. norm_means=[mean_list[i]/norm_factor[i] for (i, notUsed) in enumerate(mean_list)]
  146. norm_stdev=[stdev_list[i]/norm_factor[i] for (i, notUsed) in enumerate(stdev_list)]
  147. return (norm_means, norm_stdev)
  148. def apply_with_min_periods(xcol, my_func=np.nanmean, min_periods=0):
  149. not_nan=sum([not np.isnan(xelement) for xelement in xcol])
  150. if not_nan>=min_periods:
  151. return my_func(xcol)
  152. else:
  153. return np.nan
  154. def vol_estimator(x, using_exponent=True, min_periods=20, ew_lookback=250):
  155. vol=x.apply(apply_with_min_periods,axis=0,min_periods=min_periods, my_func=np.nanstd)
  156. stdev_list=list(vol)
  157. return stdev_list
  158. def mean_estimator(x, using_exponent=True, min_periods=20, ew_lookback=500):
  159. means=x.apply(apply_with_min_periods,axis=0,min_periods=min_periods, my_func=np.nanmean)
  160. mean_list=list(means)
  161. return mean_list
  162. def str2Bool(x):
  163. if type(x) is bool:
  164. return x
  165. return x.lower() in ("t", "true")
  166. def correlation_single_period(data_for_estimate,
  167. using_exponent=True, min_periods=20, ew_lookback=250,
  168. floor_at_zero=True):
  169. ## These may come from config as str
  170. using_exponent=str2Bool(using_exponent)
  171. if using_exponent:
  172. ## If we stack there will be duplicate dates
  173. ## So we massage the span so it's correct
  174. ## This assumes the index is at least daily and on same timestamp
  175. ## This is an artifact of how we prepare the data
  176. dindex=data_for_estimate.index
  177. dlenadj=float(len(dindex))/len(set(list(dindex)))
  178. ## Usual use for IDM, FDM calculation when whole data set is used
  179. corrmat=pd.ewmcorr(data_for_estimate, span=int(ew_lookback*dlenadj), min_periods=min_periods)
  180. ## only want the final one
  181. corrmat=corrmat.values[-1]
  182. else:
  183. ## Use normal correlation
  184. ## Usual use for bootstrapping when only have sub sample
  185. corrmat=data_for_estimate.corr(min_periods=min_periods)
  186. corrmat=corrmat.values
  187. if floor_at_zero:
  188. corrmat[corrmat<0]=0.0
  189. return corrmat
  190. def fix_mus(mean_list):
  191. def _fixit(x):
  192. if np.isnan(x):
  193. return FLAG_BAD_RETURN
  194. else:
  195. return x
  196. mean_list=[_fixit(x) for x in mean_list]
  197. return mean_list
  198. def fix_sigma(sigma):
  199. def _fixit(x):
  200. if np.isnan(x):
  201. return 0.0
  202. else:
  203. return x
  204. sigma=[[_fixit(x) for x in sigma_row] for sigma_row in sigma]
  205. sigma=np.array(sigma)
  206. return sigma
  207. def addem(weights):
  208. ## Used for constraints
  209. return 1.0 - sum(weights)
  210. def neg_SR(weights, sigma, mus):
  211. ## Returns minus the Sharpe Ratio (as we're minimising)
  212. estreturn=(np.matrix(weights)*mus)[0,0]
  213. std_dev=(variance(weights,sigma)**.5)
  214. return -estreturn/std_dev
  215. def variance(weights, sigma):
  216. ## returns the variance (NOT standard deviation) given weights and sigma
  217. return (np.matrix(weights)*sigma*np.matrix(weights).transpose())[0,0]
  218. def un_fix_weights(mean_list, weights):
  219. def _unfixit(xmean, xweight):
  220. if xmean==FLAG_BAD_RETURN:
  221. return np.nan
  222. else:
  223. return xweight
  224. fixed_weights=[_unfixit(xmean, xweight) for (xmean, xweight) in zip(mean_list, weights)]
  225. return fixed_weights
  226. def optimise( sigma, mean_list):
  227. ## will replace nans with big negatives
  228. mean_list=fix_mus(mean_list)
  229. ## replaces nans with zeros
  230. sigma=fix_sigma(sigma)
  231. mus=np.array(mean_list, ndmin=2).transpose()
  232. number_assets=sigma.shape[1]
  233. start_weights=[1.0/number_assets]*number_assets
  234. ## Constraints - positive weights, adding to 1.0
  235. bounds=[(0.0,1.0)]*number_assets
  236. cdict=[{'type':'eq', 'fun':addem}]
  237. ans=minimize(neg_SR, start_weights, (sigma, mus), method='SLSQP', bounds=bounds, constraints=cdict, tol=0.00001)
  238. ## anything that had a nan will now have a zero weight
  239. weights=ans['x']
  240. ## put back the nans
  241. weights=un_fix_weights(mean_list, weights)
  242. return weights
  243. def sigma_from_corr_and_std(stdev_list, corrmatrix):
  244. stdev=np.array(stdev_list, ndmin=2).transpose()
  245. sigma=stdev*corrmatrix*stdev
  246. return sigma
  247. def markosolver(period_subset_data):
  248. mean_list=mean_estimator(period_subset_data)
  249. corrmatrix=correlation_single_period(period_subset_data)
  250. stdev_list=vol_estimator(period_subset_data)
  251. (mean_list, stdev_list)=vol_equaliser(mean_list, stdev_list)
  252. sigma=sigma_from_corr_and_std(stdev_list, corrmatrix)
  253. unclean_weights=optimise( sigma, mean_list)
  254. weights=unclean_weights
  255. diag=dict(raw=(mean_list, stdev_list), sigma=sigma, mean_list=mean_list,
  256. unclean=unclean_weights, weights=weights)
  257. return (weights, diag)
  258. def bootstrap_portfolio(subset_data, monte_runs=100, bootstrap_length=50):
  259. all_results=[bs_one_time(subset_data, bootstrap_length) for unused_index in range(monte_runs)]
  260. ### We can take an average here; only because our weights always add
  261. ### up to 1. If that isn't true then you will need to some kind
  262. ### of renormalisation
  263. weightlist=np.array([x[0] for x in all_results], ndmin=2)
  264. diaglist=[x[1] for x in all_results]
  265. theweights_mean=list(np.mean(weightlist, axis=0))
  266. diag=dict(bootstraps=diaglist)
  267. return (theweights_mean, diag)
  268. def bs_one_time(subset_data, bootstrap_length):
  269. ## choose the data
  270. bs_idx=[int(random.uniform(0,1)*len(subset_data)) for notUsed in range(bootstrap_length)]
  271. returns=subset_data.iloc[bs_idx,:]
  272. (weights, diag)=markosolver(returns)
  273. return (weights, diag)
  274. def robust_vol_calc(x, days=35, min_periods=10, vol_abs_min=0.0000000001, vol_floor=True,
  275. floor_min_quant=0.05, floor_min_periods=100,
  276. floor_days=500):
  277. """
  278. Robust exponential volatility calculation, assuming daily series of prices
  279. We apply an absolute minimum level of vol (absmin);
  280. and a volfloor based on lowest vol over recent history
  281. :param x: data
  282. :type x: Tx1 pd.Series
  283. :param days: Number of days in lookback (*default* 35)
  284. :type days: int
  285. :param min_periods: The minimum number of observations (*default* 10)
  286. :type min_periods: int
  287. :param vol_abs_min: The size of absolute minimum (*default* =0.0000000001) 0.0= not used
  288. :type absmin: float or None
  289. :param vol_floor Apply a floor to volatility (*default* True)
  290. :type vol_floor: bool
  291. :param floor_min_quant: The quantile to use for volatility floor (eg 0.05 means we use 5% vol) (*default 0.05)
  292. :type floor_min_quant: float
  293. :param floor_days: The lookback for calculating volatility floor, in days (*default* 500)
  294. :type floor_days: int
  295. :param floor_min_periods: Minimum observations for floor - until reached floor is zero (*default* 100)
  296. :type floor_min_periods: int
  297. :returns: pd.DataFrame -- volatility measure
  298. """
  299. # Standard deviation will be nan for first 10 non nan values
  300. vol = pd.ewmstd(x, span=days, min_periods=min_periods)
  301. vol[vol < vol_abs_min] = vol_abs_min
  302. if vol_floor:
  303. # Find the rolling 5% quantile point to set as a minimum
  304. vol_min = pd.rolling_quantile(
  305. vol, floor_days, floor_min_quant, floor_min_periods)
  306. # set this to zero for the first value then propogate forward, ensures
  307. # we always have a value
  308. vol_min.set_value(vol_min.index[0], 0.0)
  309. vol_min = vol_min.ffill()
  310. # apply the vol floor
  311. vol_with_min = pd.concat([vol, vol_min], axis=1)
  312. vol_floored = vol_with_min.max(axis=1, skipna=False)
  313. else:
  314. vol_floored = vol
  315. return vol_floored
  316. def ewmac(price, Lfast, Lslow):
  317. price=price.resample("1B", how="last")
  318. fast_ewma = pd.ewma(price, span=Lfast)
  319. slow_ewma = pd.ewma(price, span=Lslow)
  320. raw_ewmac = fast_ewma - slow_ewma
  321. return raw_ewmac.PRICE / robust_vol_calc(price.diff()).vol