PageRenderTime 53ms CodeModel.GetById 12ms RepoModel.GetById 1ms app.codeStats 0ms

/R/data.r

https://github.com/systematicinvestor/SIT
R | 1438 lines | 815 code | 272 blank | 351 comment | 94 complexity | 9c21ae80df2d1b16c05f1d3017bb97f1 MD5 | raw file
  1. ###############################################################################
  2. # This program is free software: you can redistribute it and/or modify
  3. # it under the terms of the GNU General Public License as published by
  4. # the Free Software Foundation, either version 3 of the License, or
  5. # (at your option) any later version.
  6. #
  7. # This program is distributed in the hope that it will be useful,
  8. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. # GNU General Public License for more details.
  11. #
  12. # You should have received a copy of the GNU General Public License
  13. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  14. ###############################################################################
  15. # Collection of routines to work with data
  16. # Copyright (C) 2011 Michael Kapler
  17. #
  18. # For more information please visit my blog at www.SystematicInvestor.wordpress.com
  19. # or drop me a line at TheSystematicInvestor at gmail
  20. ###############################################################################
  21. find.tokens <- function
  22. (
  23. txt, # source text
  24. marker, # key-phrase(s) to find
  25. pos = 1, # position to start searching at
  26. pos.start = T
  27. )
  28. {
  29. # find location of data
  30. marker = spl(marker)
  31. for(i in 1:len(marker)) {
  32. if( pos < 2 )
  33. pos1 = regexpr(marker[i], txt)
  34. else
  35. pos1 = regexpr(marker[i], substr(txt, pos, nchar(txt)))
  36. if( pos1 < 0 )
  37. return(pos1)
  38. else {
  39. if( pos < 2 ) pos = pos1
  40. else pos = pos1 + pos - 1
  41. }
  42. if( !pos.start ) pos = pos + attr(pos1, 'match.length')
  43. }
  44. return(pos)
  45. }
  46. extract.token <- function
  47. (
  48. txt, # source text
  49. smarker, # start key-phrase(s) to find
  50. emarker, # end key-phrase(s) to find
  51. pos = 1, # position to start searching at
  52. keep.marker = F
  53. )
  54. {
  55. pos1 = 1
  56. if (nchar(smarker) > 0)
  57. pos1 = find.tokens(txt, smarker, pos, pos.start = keep.marker)
  58. if( pos1 < 0 ) return("")
  59. pos2 = nchar(txt)
  60. if (nchar(emarker) > 0)
  61. pos2 = find.tokens(txt, emarker, pos1, pos.start = !keep.marker) - 1
  62. if( pos2 < 0 ) return("")
  63. return(substr(txt,pos1,pos2))
  64. }
  65. remove.tags <- function
  66. (
  67. temp # source text
  68. )
  69. {
  70. # remove all formating
  71. temp = gsub(pattern = '<.*?>', replacement = '', temp, perl = TRUE)
  72. temp = gsub(pattern = '\r', replacement = '', temp, perl = TRUE)
  73. temp = gsub(pattern = '\n', replacement = '', temp, perl = TRUE)
  74. temp = gsub(pattern = '\t', replacement = '', temp, perl = TRUE)
  75. temp = gsub(pattern = '&nbsp;', replacement = '', temp, perl = TRUE)
  76. temp = gsub(pattern = '&amp;', replacement = '', temp, perl = TRUE)
  77. temp = gsub(pattern = '&raquo;', replacement = '', temp, perl = TRUE)
  78. temp = gsub(pattern = '&#37;', replacement = '%', temp, perl = TRUE)
  79. return(temp)
  80. }
  81. ###############################################################################
  82. # extract.table.from.webpage
  83. #' @export
  84. ###############################################################################
  85. extract.table.from.webpage <- function
  86. (
  87. txt, # source text of webpage
  88. marker, # key-phrase(s) located in the table to extract
  89. hasHeader=T # flag if table has a header
  90. )
  91. {
  92. tryCatch({
  93. # find location of data
  94. marker = spl(marker)
  95. pos1=1
  96. for(i in 1:len(marker)) {
  97. pos1 = regexpr(marker[i], substr(txt, pos1, nchar(txt))) + pos1
  98. }
  99. # find start/end of table
  100. pos0 = tail(gregexpr('<table', substr(txt, 1, pos1))[[1]], 1)
  101. pos2 = head(gregexpr('</table', substr(txt, pos1, nchar(txt)))[[1]], 1)
  102. temp = substr(txt, pos0, pos1 + pos2 - 2)
  103. # remove all formating
  104. temp = gsub(pattern = '<br>', replacement = '', temp, perl = TRUE)
  105. temp = gsub(pattern = '</tr>', replacement = ';row;', temp, perl = TRUE)
  106. temp = gsub(pattern = '</td>', replacement = ';col;', temp, perl = TRUE)
  107. temp = gsub(pattern = '</th>', replacement = ';col;', temp, perl = TRUE)
  108. temp = gsub(pattern = '<.*?>', replacement = '', temp, perl = TRUE)
  109. temp = gsub(pattern = '\r', replacement = '', temp, perl = TRUE)
  110. temp = gsub(pattern = '\n', replacement = '', temp, perl = TRUE)
  111. temp = gsub(pattern = '\t', replacement = '', temp, perl = TRUE)
  112. temp = gsub(pattern = '&nbsp;', replacement = '', temp, perl = TRUE)
  113. temp = gsub(pattern = '&amp;', replacement = '', temp, perl = TRUE)
  114. temp = gsub(pattern = '&raquo;', replacement = '', temp, perl = TRUE)
  115. # parse into matrix
  116. temp = lapply( strsplit(temp, ';row;'), strsplit, ';col;')
  117. n = max( sapply(temp[[1]], function(x) len(x)) )
  118. temp = t( sapply(temp[[1]], function(x) x[1:n]) )
  119. if(hasHeader) {
  120. colnames(temp) = temp[(hasHeader + 0), ]
  121. temp = temp[-c(1:(hasHeader + 0)), ,drop=F]
  122. }
  123. }, error = function(ex) {
  124. temp <<- txt
  125. }, finally = {
  126. return(temp)
  127. })
  128. }
  129. ###############################################################################
  130. # Test for extract.table.from.webpage function
  131. ###############################################################################
  132. extract.table.from.webpage.test <- function()
  133. {
  134. load.packages('quantmod')
  135. Symbol = 'IBM'
  136. # download Key Statistics from yahoo
  137. url = paste('http://finance.yahoo.com/q/ks?s=', Symbol, sep = '')
  138. txt = join(readLines(url))
  139. # extract Valuation Measures table from this page
  140. temp = extract.table.from.webpage(txt, 'Market Cap', hasHeader = F)
  141. temp = rbind(c('', Symbol), temp) # add header row
  142. # download IBM price history from Yahoo
  143. data = getSymbols(Symbol, from = '1980-01-01', auto.assign = FALSE)
  144. # prepare IBM data for 2010:2011 and compute 50 days moving average
  145. y = data['2010::2011']
  146. sma50 = SMA(Cl(y), 50)
  147. png(filename = 'plot1.png', width = 500, height = 500, units = 'px', pointsize = 12, bg = 'white')
  148. # plote candles and volume and table
  149. layout(c(1,1,2,3,3))
  150. plota(y, type = 'candle', main = Symbol, plotX = F)
  151. plota.lines(sma50, col='blue')
  152. plota.legend(c(Symbol,'SMA 50'), 'green,blue', list(y,sma50))
  153. y = plota.scale.volume(y)
  154. plota(y, type = 'volume')
  155. plot.table(temp)
  156. dev.off()
  157. }
  158. ###############################################################################
  159. # Pricing Zero Coupon Bond (i.e. yield to price)
  160. # http://thinkanddone.com/finance/valuation-of-zero-coupon-bonds.html
  161. #' @export
  162. ###############################################################################
  163. PricingZeroCouponBond <- function
  164. (
  165. yield,
  166. timetomaturity,
  167. parvalue = 100
  168. )
  169. {
  170. parvalue / ( 1 + yield ) ^ timetomaturity
  171. }
  172. ###############################################################################
  173. # Convert Historical TBills rates to Total Returns
  174. # http://timelyportfolio.blogspot.com/2011/04/historical-sources-of-bond-returns_17.html
  175. # http://timelyportfolio.blogspot.ca/2012/11/cashopportunity-lost-or-opportunity.html
  176. #' @export
  177. ###############################################################################
  178. processTBill <- function
  179. (
  180. yields,
  181. timetomaturity = 1/4,
  182. frequency = 365
  183. )
  184. {
  185. yield = coredata(yields) / 100
  186. # price return
  187. pr = sapply( yield, function(x) PricingZeroCouponBond(x, timetomaturity) )
  188. pr = ROC(pr, type='discrete')
  189. pr[1] = 0
  190. # interest return
  191. ir = (1+mlag(yield, nlag=1))^(1 / frequency)-1
  192. #ir = mlag(yield, nlag=1) / frequency
  193. ir[1] = 0
  194. # total return
  195. tr = pr + ir
  196. #out = as.xts( cbind(pr, ir, tr), index(yields) )
  197. # colnames(out) = spl('PR,IR,TR')
  198. close.price = cumprod(1 + pr)
  199. adjusted.price = cumprod(1 + tr)
  200. out = as.xts( cbind(close.price, adjusted.price), index(yields) )
  201. colnames(out) = spl('Close,Adjusted')
  202. return(out)
  203. }
  204. processTBill.test <- function()
  205. {
  206. #*****************************************************************
  207. # Get 1 year t-bill
  208. #******************************************************************
  209. quantmod::getSymbols("GS1", src = "FRED")
  210. ir = (1 + mlag(GS1) / 100) ^ (1/12) - 1
  211. ir[1] = 0
  212. out = processTBill(GS1, timetomaturity = 1,12)
  213. plota(cumprod(1 + ir), type='l', log = 'y')
  214. plota.lines(Ad(out), type='l', col='red')
  215. #*****************************************************************
  216. # Get 3 years t-bill
  217. #******************************************************************
  218. SHY = getSymbols('SHY', src='yahoo', auto.assign = FALSE)
  219. tbill.m = quantmod::getSymbols('GS3', src='FRED', auto.assign = FALSE)
  220. tbill.d = quantmod::getSymbols('DGS3', src='FRED', auto.assign = FALSE)
  221. timetomaturity = 3
  222. compute.raw.annual.factor(tbill.d)
  223. compute.raw.annual.factor(tbill.m)
  224. # compute returns
  225. tbill.m = processTBill(tbill.m, timetomaturity = timetomaturity, 12)
  226. #index(tbill.m) = as.Date(paste('1/', format(index(tbill.m), '%m/%Y'), sep=''), '%d/%m/%Y')
  227. tbill.d[] = ifna.prev(tbill.d)
  228. tbill.d = processTBill(tbill.d, timetomaturity = timetomaturity,261)
  229. # scale to start at 1
  230. dates = '2003::'
  231. tbill.m = tbill.m[dates,2]
  232. tbill.m = tbill.m / as.double(tbill.m[1])
  233. tbill.d = tbill.d[dates,2]
  234. tbill.d = tbill.d / as.double(tbill.d[1])
  235. SHY = Ad(SHY[dates,])
  236. SHY = SHY / as.double(SHY[1])
  237. # plot
  238. plota(tbill.d, type='l')
  239. plota.lines(tbill.m, type='s', col='blue')
  240. plota.lines(SHY, type='l', col='red')
  241. plota.legend('Daily 3YR T-Bills,Monthly 3YR T-Bills,SHY','black,blue,red')
  242. }
  243. ###############################################################################
  244. # Load CRB Commodities Index
  245. # http://www.jefferies.com/cositemgr.pl/html/ProductsServices/SalesTrading/Commodities/ReutersJefferiesCRB/IndexData/index.shtml
  246. ###############################################################################
  247. # ... parameters for read.xls function
  248. # i.e. CRB = get.CRB(perl = 'c:/perl/bin/perl.exe')
  249. #
  250. # This url is not working anymore, for updated example please see
  251. # bt.extend.DBC.update.test in bt.test.r
  252. ###############################################################################
  253. get.CRB <- function(...)
  254. {
  255. load.packages('gtools,gdata')
  256. #http://www.jefferies.com/html/ProductsServices/SalesTrading/Commodities/scripts/genExcel.pl?Index=RJCRB_Excess&StartDate=19940103&EndDate=20111202
  257. url = paste('http://www.jefferies.com/html/ProductsServices/SalesTrading/Commodities/scripts/genExcel.pl?Index=RJCRB_Total&StartDate=19940101&EndDate=', format(Sys.Date(), '%Y%m%d'), sep='')
  258. temp = read.xls(url, ...)
  259. temp = as.matrix(temp[-c(1:7),])
  260. out = repmat(as.double(temp[,2]), 1, 6)
  261. colnames(out) = spl('Open,High,Low,Close,Volume,Adjusted')
  262. out[, 'Volume'] = 0
  263. #out = make.xts( out, as.Date(temp[,1], '%m/%d/%y'))
  264. out = make.xts( out, as.POSIXct(temp[,1], tz = Sys.getenv('TZ'), format='%m/%d/%y'))
  265. indexClass(out) = 'Date'
  266. return(out)
  267. }
  268. get.CRB.test <- function()
  269. {
  270. #*****************************************************************
  271. # Load historical data
  272. #******************************************************************
  273. CRB = get.CRB()
  274. load.packages('quantmod')
  275. # http://etfdb.com/
  276. tickers = spl('GSG,DBC')
  277. getSymbols(tickers, src = 'yahoo', from = '1970-01-01')
  278. #*****************************************************************
  279. # Compare different indexes
  280. #******************************************************************
  281. out = na.omit(merge(Cl(CRB), Cl(GSG), Cl(DBC)))
  282. colnames(out) = spl('CRB,GSG,DBC')
  283. temp = out / t(repmat(as.vector(out[1,]),1,nrow(out)))
  284. layout(1:2)
  285. plota(temp, ylim=range(temp))
  286. plota.lines(temp[,1],col=1)
  287. plota.lines(temp[,2],col=2)
  288. plota.lines(temp[,3],col=3)
  289. plota.legend(colnames(temp),1:3)
  290. temp = cor(temp / mlag(temp)- 1, use='complete.obs', method='pearson')
  291. temp[] = plota.format(100 * temp, 0, '', '%')
  292. plot.table(temp)
  293. layout(1:3)
  294. plota.matplot(CRB[,c('Close','Adjusted')])
  295. plota.matplot(DBC[,c('DBC.Close','DBC.Adjusted')])
  296. plota.matplot(GSG[,c('GSG.Close','GSG.Adjusted')])
  297. layout(1)
  298. comm = extend.data(DBC, CRB, scale=T)
  299. plota(comm, type='l', col=1)
  300. plota.lines(CRB*0.078, type='l', lwd=5, col=col.add.alpha(2,150))
  301. plota.lines(DBC, type='l', lwd=5, col=col.add.alpha(3,150))
  302. plota.lines(comm, type='l', col=1)
  303. plota.legend('comm,CRB,DBC', 1:3, list(comm,CRB,DBC))
  304. }
  305. ###############################################################################
  306. # Get Dow Jones Components
  307. # http://finance.yahoo.com/q/cp?s=^DJI+Components
  308. #' @export
  309. ###############################################################################
  310. dow.jones.components <- function()
  311. {
  312. url = 'http://finance.yahoo.com/q/cp?s=^DJI+Components'
  313. txt = join(readLines(url))
  314. # extract table from this page
  315. temp = extract.table.from.webpage(txt, 'Volume', hasHeader = T)
  316. tickers = temp[, 'Symbol']
  317. return(tickers)
  318. }
  319. ###############################################################################
  320. # Get NASDAQ 100 Components
  321. # http://www.nasdaq.com/markets/indices/nasdaq-100.aspx
  322. #' @export
  323. ###############################################################################
  324. nasdaq.100.components <- function()
  325. {
  326. url = 'http://www.nasdaq.com/markets/indices/nasdaq-100.aspx'
  327. txt = join(readLines(url))
  328. # extract table from this page
  329. temp = extract.table.from.webpage(txt, 'Symbol', hasHeader = 2)
  330. tickers = temp[, 'Symbol']
  331. return(tickers)
  332. }
  333. ###############################################################################
  334. # Get Sector SPDR Components
  335. # http://www.sectorspdr.com/sectorspdr/IDCO.Client.Spdrs.Holdings/Export/ExportCsv?symbol=XLE
  336. # tickers = spl('XLY,XLP,XLE,XLF,XLV,XLI,XLB,XLK,XLU')
  337. # tickers.desc = spl('ConsumerCyclicals,ConsumerStaples,Energy,Financials,HealthCare,Industrials,Materials,Technology,U
  338. #' @export
  339. ###############################################################################
  340. sector.spdr.components <- function(sector.etf = 'XLE')
  341. {
  342. url = paste('http://www.sectorspdr.com/sectorspdr/IDCO.Client.Spdrs.Holdings/Export/ExportCsv?symbol=', sector.etf, sep='')
  343. # extract table from this page
  344. temp = read.csv(url, skip=1, header=TRUE, stringsAsFactors=F)
  345. tickers = temp[, 'Symbol']
  346. return(tickers)
  347. }
  348. ###############################################################################
  349. # S&P 500 Components
  350. # http://en.wikipedia.org/wiki/List_of_S%26P_500_companies
  351. #' @export
  352. ###############################################################################
  353. sp500.components <- function()
  354. {
  355. url = 'http://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
  356. txt = join(readLines(url))
  357. # extract table from this page
  358. temp = extract.table.from.webpage(txt, 'Ticker', hasHeader = T)
  359. tickers = temp[, 'Ticker symbol']
  360. sector = temp[, 'GICS Sector']
  361. return(list(tickers=tickers, sector=sector))
  362. }
  363. # List of sites that keep SP500 Components
  364. # http://www.s-p-500.com/stocks-a-b/
  365. #http://www.forexpros.com/indices/us-spx-500-components
  366. #http://marketvolume.com/indexes_exchanges/sp500_components.asp
  367. #http://en.wikipedia.org/wiki/List_of_S%26P_500_companies
  368. #http://en.wikipedia.org/wiki/Dow_Jones_Index
  369. ###############################################################################
  370. # S&P 100 Components
  371. # http://www.barchart.com/stocks/sp100.php
  372. #' @export
  373. ###############################################################################
  374. sp100.components <- function()
  375. {
  376. url = 'http://www.barchart.com/stocks/sp100.php'
  377. txt = join(readLines(url))
  378. # extract table from this page
  379. temp = extract.table.from.webpage(txt, 'Components', hasHeader = T)
  380. i.start = grep('Name', temp[,2])
  381. tickers = trim(temp[-c(1:i.start), 1])
  382. return(tickers)
  383. }
  384. ###############################################################################
  385. # iShares FTSE 100 (ISF)
  386. # http://uk.ishares.com/en/rc/products/ISF/all-holdings/
  387. # http://www.londonstockexchange.com/exchange/prices-and-markets/stocks/indices/constituents-indices.html?index=UKX
  388. # Yahoo ticker for UK stocks ABF.L
  389. #' @export
  390. ###############################################################################
  391. ftse100.components <- function()
  392. {
  393. # get holdings from uk.ishares.com
  394. url = 'http://uk.ishares.com/en/rc/products/ISF/all-holdings/'
  395. txt = join(readLines(url))
  396. # extract table from this page
  397. txt = gsub('&#37;','%',txt)
  398. temp = extract.table.from.webpage(txt, 'Security', hasHeader = T)
  399. temp = trim(temp)
  400. colnames(temp) = temp[1,]
  401. temp = temp[-1,]
  402. holdings = temp
  403. # get ISIN to ticker map from www.londonstockexchange.com
  404. page.label = ''
  405. ticker2ISIN = c()
  406. for(i in 1:100) {
  407. cat(i,'\n')
  408. # download
  409. url = paste('http://www.londonstockexchange.com/exchange/prices-and-markets/stocks/indices/constituents-indices.html?index=UKX&page=', i, sep='')
  410. txt = join(readLines(url))
  411. # get page label
  412. pos = regexpr('Page [0-9]+ of [0-9]+', txt, ignore.case = T)
  413. page.label.new = substr(txt, pos, pos + attr(pos, 'match.length')-1)
  414. if(page.label == page.label.new) break
  415. page.label = page.label.new
  416. # extract table
  417. temp.table = extract.table.from.webpage(txt, 'Price', hasHeader = T)
  418. colnames(temp.table)[1] = 'tickers'
  419. # extract links
  420. temp = gsub(pattern = '<a', replacement = '<td>', txt, perl = TRUE)
  421. temp = gsub(pattern = '</a>', replacement = '</td>', temp, perl = TRUE)
  422. temp = extract.table.from.webpage(temp, 'Price', hasHeader = T)
  423. pos = regexpr('fourWayKey=', temp[,2])
  424. ISIN = as.vector(sapply(1:nrow(temp), function(j)
  425. substr(temp[j,2], pos[j] + attr(pos, 'match.length')[j], pos[j] + attr(pos, 'match.length')[j] + 12 - 1)
  426. ))
  427. ticker2ISIN = rbind(ticker2ISIN, cbind(temp.table[,spl('ticker,Name,Price'), drop=F], ISIN))
  428. }
  429. ISIN = intersect(holdings[,'ISIN'],ticker2ISIN[,'ISIN'])
  430. holdings = cbind(holdings[match(ISIN, holdings[,'ISIN']), ],
  431. ticker2ISIN[match(ISIN, ticker2ISIN[,'ISIN']), spl('ticker,Name,Price')])
  432. return(apply(holdings, 2, list))
  433. }
  434. ###############################################################################
  435. # Get Dow Jones Components
  436. # http://finance.yahoo.com/q/cp?s=^DJI+Components
  437. # us.ishares.components(date='2008-02-01')
  438. #' @export
  439. ###############################################################################
  440. us.ishares.components <- function(Symbol = 'DVY', date = NULL, debug = F)
  441. {
  442. url = paste('http://us.ishares.com/product_info/fund/holdings/', Symbol, '.htm?periodCd=d', sep='')
  443. if( !is.null(date) )
  444. url = paste('http://us.ishares.com/product_info/fund/holdings/', Symbol, '.htm?asofDt=', date.end(date), '&periodCd=m', sep='')
  445. txt = join(readLines(url))
  446. # extract date from this page
  447. temp = remove.tags(extract.token(txt, 'Holdings Detail', 'Holdings subject to change'))
  448. date = as.Date(spl(trim(temp),' ')[3], '%m/%d/%Y')
  449. # extract table from this page
  450. temp = extract.table.from.webpage(txt, 'Symbol', hasHeader = T)
  451. colnames(temp) = trim(colnames(temp))
  452. temp = trim(temp)
  453. tickers = temp[, 'Symbol']
  454. keep.index = nchar(tickers)>1
  455. weights = as.double(temp[keep.index, '% Net Assets']) / 100
  456. tickers = tickers[keep.index]
  457. out = list(tickers = tickers, weights = weights, date = date)
  458. if(debug) out$txt = txt
  459. out
  460. }
  461. ###############################################################################
  462. # Get Google search results:
  463. # https://gist.github.com/Daapii/7281439
  464. # --- explanation of the parameters in the query ---
  465. #
  466. # ie = input encoding
  467. # oe = output encoding
  468. # q = query (our search term)
  469. # num = amount of search results displayed at a time
  470. # gws_rd=cr = redirects you to your countries version of google (required if you're not in the US)
  471. # url encode our query
  472. # query = "https://encrypted.google.com/search?ie=utf-8&oe=utf-8&q={0}&num=100&gws_rd=cr".format(query)
  473. # google.search("r project")
  474. #' @export
  475. ###############################################################################
  476. google.search <- function
  477. (
  478. query
  479. )
  480. {
  481. url = paste("http://google.com/search?ie=utf-8&oe=utf-8&q=", URLencode(query), "&num=10&gws_rd=cr", sep='')
  482. txt = join(readLines(url))
  483. tokens = spl(txt, '<li class="g">')
  484. if(len(tokens) < 2) return(NULL)
  485. records = matrix('', nrow=len(tokens)-1,nc=2)
  486. colnames(records) = c('label','url')
  487. for(i in 2:len(tokens)) {
  488. token = tokens[i]
  489. token = extract.token(token, '<a href=', '</a>', keep.marker = T)
  490. url = extract.token(token, 'url\\?q=', '&amp;sa=U&amp;')
  491. label = remove.tags(token)
  492. records[i-1,] = c(label,url)
  493. }
  494. return(records)
  495. }
  496. ###############################################################################
  497. # Get the latest prices from the Google finance:
  498. # http://digitalpbk.com/stock/google-finance-get-stock-quote-realtime
  499. # http://finance.google.com/finance/info?client=ig&q=MSFT,AAPL,NYSE:RY
  500. #' @export
  501. ###############################################################################
  502. #getQuote.google(spl('MSFT,AAPL,IBM'))
  503. getQuote.google <- function(tickers) {
  504. url = paste('http://finance.google.com/finance/info?client=ig&q=', join(tickers,','), sep='')
  505. txt = join(readLines(url))
  506. temp = gsub(':', ',', txt)
  507. temp = scan(text = temp, what='', sep=',', quiet=T)
  508. temp = matrix(trim(temp), nr=len(temp)/len(tickers), byrow=F)
  509. index = match(spl('t,l,lt'), tolower(temp[,1]))+1
  510. names(index) = spl('ticker,last,date')
  511. last = as.double(temp[index['last'],])
  512. date = strptime(temp[index['date'],],format=' %b %d, %H,%M')
  513. out = data.frame(last,date)
  514. rownames(out) = temp[index['ticker'],]
  515. out
  516. }
  517. # an xml alternative
  518. # http://www.jarloo.com/google-stock-api/
  519. # http://www.google.com/ig/api?stock=AAPL&stock=GOOG
  520. #getQuote.google.xml(spl('MSFT,AAPL,NYSE:RY'))
  521. #' @export
  522. getQuote.google.xml <- function(tickers) {
  523. url = paste('http://www.google.com/ig/api?', paste('stock=',tickers, '&', sep='', collapse=''), sep='')
  524. txt = join(readLines(url))
  525. temp = txt
  526. temp = gsub('<finance.*?>', '', temp, perl = TRUE)
  527. temp = gsub('</finance>', '', temp, perl = TRUE)
  528. temp = gsub('<xml.*?>', '', temp, perl = TRUE)
  529. temp = gsub('</xml.*?>', '', temp, perl = TRUE)
  530. temp = gsub('<\\?xml.*?>', '', temp, perl = TRUE)
  531. temp = gsub('data=', '', temp, perl = TRUE)
  532. temp = gsub('/><', ' ', temp)
  533. temp = gsub('>', '', temp)
  534. temp = gsub('<', '', temp)
  535. temp = scan(text = temp, what='', sep=' ', quiet=T)
  536. temp = matrix(trim(temp), nr=len(temp)/len(tickers), byrow=F)
  537. cnames = spl('trade_date_utc,trade_time_utc,symbol,last,high,low,volume,open,avg_volume,market_cap,y_close')
  538. index = match(cnames, tolower(temp[,1]))+1
  539. names(index) = cnames
  540. date = strptime(paste(temp[index['trade_date_utc'],], temp[index['trade_time_utc'],]), format='%Y%m%d %H%M%S',tz='UTC')
  541. date = as.POSIXct(date, tz = Sys.getenv('TZ'))
  542. out = data.frame(t(temp[index[-c(1:3)],]))
  543. colnames(out) = cnames[-c(1:3)]
  544. rownames(out) = temp[index['symbol'],]
  545. out
  546. }
  547. ###############################################################################
  548. # extend GLD and SLV historical prices with data from KITCO
  549. # http://wikiposit.org/w?filter=Finance/Commodities/
  550. # http://www.hardassetsinvestor.com/interviews/2091-golds-paper-price.html
  551. #' @export
  552. ###############################################################################
  553. extend.GLD <- function(GLD) {
  554. #extend.data(GLD, KITCO.data('Gold.PM') / 10)
  555. # data$GLD = extend.GLD(data$GLD)
  556. extend.data(GLD, bundes.bank.data.gold(), scale=T)
  557. }
  558. #' @export
  559. extend.SLV <- function(SLV) {
  560. extend.data(SLV, KITCO.data('Silver'))
  561. }
  562. #' @export
  563. KITCO.data <- function
  564. (
  565. symbol = spl('Gold.AM,Gold.PM,Silver,Platinum.AM,Platinum.PM,Palladium.AM,Palladium.PM')
  566. )
  567. {
  568. url = 'http://wikiposit.org/w?action=dl&dltypes=comma%20separated&sp=daily&uid=KITCO'
  569. temp = read.csv(url, skip=4, header=TRUE, stringsAsFactors=F)
  570. #hist = make.xts(as.double(temp[,symbol]), as.Date(temp[,1], '%d-%b-%Y'))
  571. hist = make.xts(as.double(temp[,symbol]), as.POSIXct(temp[,1], tz = Sys.getenv('TZ'), format='%d-%b-%Y'))
  572. indexClass(hist) = 'Date'
  573. colnames(hist)='Close'
  574. return( hist[!is.na(hist)] )
  575. }
  576. # gold = extend.GLD(data$GLD)
  577. # comm = extend.data(data$DBC, get.CRB(), scale=T)
  578. #' @export
  579. extend.data <- function
  580. (
  581. current,
  582. hist,
  583. scale = F
  584. )
  585. {
  586. colnames(current) = sapply(colnames(current), function(x) last(spl(x,'\\.')))
  587. colnames(hist) = sapply(colnames(hist), function(x) last(spl(x,'\\.')))
  588. # find Close in hist
  589. close.index = find.names('Close', colnames(hist))$Close
  590. if(is.na(close.index)) close.index = 1
  591. adjusted.index = find.names('Adjusted', colnames(hist))$Adjusted
  592. if(is.na(adjusted.index)) adjusted.index = close.index
  593. if(scale) {
  594. # find first common observation in current and hist series
  595. common = merge(Cl(current), hist[,close.index], join='inner')
  596. scale = as.numeric(common[1,1]) / as.numeric(common[1,2])
  597. if( close.index == adjusted.index )
  598. hist = hist * scale
  599. else {
  600. hist[,-adjusted.index] = hist[,-adjusted.index] * scale
  601. common = merge(Ad(current), hist[,adjusted.index], join='inner')
  602. scale = as.numeric(common[1,1]) / as.numeric(common[1,2])
  603. hist[,adjusted.index] = hist[,adjusted.index] * scale
  604. }
  605. }
  606. # subset history before current
  607. hist = hist[format(index(current[1])-1,'::%Y:%m:%d'),,drop=F]
  608. if( ncol(hist) != ncol(current) )
  609. hist = make.xts( rep.col(hist[,close.index], ncol(current)), index(hist))
  610. else
  611. hist = hist[, colnames(current)]
  612. colnames(hist) = colnames(current)
  613. rbind( hist, current )
  614. }
  615. ###############################################################################
  616. # Bundes Bank - long history of gold prices
  617. # http://www.bundesbank.de/Navigation/EN/Statistics/Time_series_databases/Macro_economic_time_series/its_list_node.html?listId=www_s331_b01015_3
  618. # http://wikiposit.org/w?filter=Finance/Commodities/
  619. #' @export
  620. ###############################################################################
  621. bundes.bank.data <- function(symbol) {
  622. url = paste('http://www.bundesbank.de/cae/servlet/CsvDownload?tsId=', symbol, '&its_csvFormat=en&mode=its', sep='')
  623. temp = read.csv(url, skip=5, header=F, stringsAsFactors=F)
  624. #hist = make.xts(as.double(temp[,2]), as.Date(temp[,1], '%Y-%m-%d'))
  625. hist = make.xts(as.double(temp[,2]), as.POSIXct(temp[,1], tz = Sys.getenv('TZ'), format='%Y-%m-%d'))
  626. indexClass(hist) = 'Date'
  627. colnames(hist)='Close'
  628. return( hist[!is.na(hist)] )
  629. }
  630. #' @export
  631. bundes.bank.data.gold <- function() {
  632. bundes.bank.data('BBEX3.D.XAU.USD.EA.AC.C05')
  633. }
  634. ###############################################################################
  635. # Pacific Exchange Rate Service - FX rates
  636. # Daily data is maximum for 4 years
  637. # http://fx.sauder.ubc.ca/data.html
  638. # http://fx.sauder.ubc.ca/cgi/fxdata?b=USD&c=AUD&c=GBP&c=CAD&c=NOK&c=EUR&c=JPY&c=NZD&c=SEK&c=CHF&rd=&fd=1&fm=1&fy=2011&ld=31&lm=12&ly=2012&y=daily&q=volume&f=csv&o=
  639. #
  640. # Example
  641. # base.cur = 'USD'
  642. # target.curs = 'AUD,CAD,EUR'
  643. # fx.data = rbind(fx.sauder.data(2000, 2003, base.cur, target.curs),
  644. # fx.sauder.data(2004, 2007, base.cur, target.curs),
  645. # fx.sauder.data(2008, 2011, base.cur, target.curs),
  646. # fx.sauder.data(2012, 2012, base.cur, target.curs))
  647. #' @export
  648. ###############################################################################
  649. fx.sauder.data <- function(start.year, end.year, base.cur, target.curs) {
  650. url = paste('http://fx.sauder.ubc.ca/cgi/fxdata?b=', base.cur, join(paste('&c=', spl(target.curs), sep='')), '&rd=&fd=1&fm=1&fy=', start.year, '&ld=31&lm=12&ly=', end.year, '&y=daily&q=volume&f=csv&o=', sep='')
  651. temp = read.csv(url, skip=1, header=T, stringsAsFactors=F)
  652. #hist = make.xts(as.matrix(temp[,-c(1:3)]), as.Date(temp[,2], '%Y/%m/%d'))
  653. hist = make.xts(as.matrix(temp[,-c(1:3)]), as.POSIXct(temp[,2], tz = Sys.getenv('TZ'), format='%Y/%m/%d'))
  654. indexClass(hist) = 'Date'
  655. colnames(hist) = gsub(paste('.', base.cur, sep=''), '', colnames(hist))
  656. return( hist[!is.na(hist[,1]),] )
  657. }
  658. ###############################################################################
  659. # Download historical prices from Pi Trading - Free Market Data
  660. # http://pitrading.com/free_market_data.htm
  661. #' @export
  662. ###############################################################################
  663. getSymbols.PI <- function
  664. (
  665. Symbols,
  666. env = .GlobalEnv,
  667. auto.assign = TRUE,
  668. download = TRUE
  669. )
  670. {
  671. # setup temp folder
  672. temp.folder = paste(getwd(), 'temp', sep='/')
  673. dir.create(temp.folder, F)
  674. # read all Symbols
  675. for (i in 1:len(Symbols)) {
  676. if(download) {
  677. # http://pitrading.com/free_eod_data/SPX.zip
  678. url = paste('http://pitrading.com/free_eod_data/', Symbols[i], '.zip', sep='')
  679. filename = paste(temp.folder, '/', Symbols[i], '.zip', sep='')
  680. download.file(url, filename, mode = 'wb')
  681. # unpack
  682. unzip(filename, exdir=temp.folder)
  683. }
  684. filename = paste(temp.folder, '/', Symbols[i], '.txt', sep='')
  685. temp = read.delim(filename, header=TRUE, sep=',')
  686. #out = make.xts(temp[,-1], as.Date(temp[,1],'%m/%d/%Y'))
  687. out = make.xts(temp[,-1], as.POSIXct(temp[,1], tz = Sys.getenv('TZ'), format='%m/%d/%Y'))
  688. indexClass(out) = 'Date'
  689. out$Adjusted = out$Close
  690. cat(i, 'out of', len(Symbols), 'Reading', Symbols[i], '\n', sep='\t')
  691. if (auto.assign) {
  692. assign(paste(gsub('\\^', '', Symbols[i]), sep='_'), out, env)
  693. }
  694. }
  695. if (!auto.assign) {
  696. return(out)
  697. } else {
  698. return(env)
  699. }
  700. }
  701. ###############################################################################
  702. # Download FX qoutes: end of day and hourly
  703. # http://www.fxhistoricaldata.com/EURUSD/
  704. #' @export
  705. ###############################################################################
  706. getSymbols.fxhistoricaldata <- function
  707. (
  708. Symbols,
  709. type = spl('hour,day'),
  710. env = .GlobalEnv,
  711. auto.assign = TRUE,
  712. download = FALSE
  713. )
  714. {
  715. type = type[1]
  716. # setup temp folder
  717. temp.folder = paste(getwd(), 'temp', sep='/')
  718. dir.create(temp.folder, F)
  719. # read all Symbols
  720. for (i in 1:len(Symbols)) {
  721. if(download) {
  722. # http://www.fxhistoricaldata.com/download/EURUSD?t=hour
  723. url = paste('http://www.fxhistoricaldata.com/download/', Symbols[i], '?t=', type, sep='')
  724. filename = paste(temp.folder, '/', Symbols[i], '_', type, '.zip', sep='')
  725. download.file(url, filename, mode = 'wb')
  726. # unpack
  727. unzip(filename, exdir=temp.folder)
  728. }
  729. filename = paste(temp.folder, '/', Symbols[i], '_', type, '.csv', sep='')
  730. temp = read.delim(filename, header=TRUE, sep=',')
  731. colnames(temp) = gsub('[X\\.|\\.]', '', colnames(temp))
  732. out = make.xts(temp[,spl('OPEN,LOW,HIGH,CLOSE')],
  733. strptime(paste(temp$DATE, temp$TIME), format='%Y%m%d %H:%M:%S'))
  734. cat(i, 'out of', len(Symbols), 'Reading', Symbols[i], '\n', sep='\t')
  735. if (auto.assign) {
  736. assign(paste(gsub('\\^', '', Symbols[i]), type, sep='_'), out, env)
  737. }
  738. }
  739. if (!auto.assign) {
  740. return(out)
  741. } else {
  742. return(env)
  743. }
  744. }
  745. ###############################################################################
  746. # Download historical data for G10
  747. # The PowerShares DB G10 Currency Harvest Fund
  748. # http://www.invescopowershares.com/products/overview.aspx?ticker=DBV
  749. #
  750. # The G10 currency universe from which the Index selects currently includes
  751. # U.S. dollars,
  752. # euros,
  753. # Japanese yen,
  754. # Canadian dollars,
  755. # Swiss francs,
  756. # British pounds,
  757. # Australian dollars,
  758. # New Zealand dollars,
  759. # Norwegian krone and
  760. # Swedish krona
  761. #' @export
  762. ###############################################################################
  763. get.G10 <- function
  764. (
  765. type = spl('currency')
  766. )
  767. {
  768. if( type[1] != 'currency') {
  769. cat('Warning:', type[1], 'is not yet implemented in getG10 function\n')
  770. return()
  771. }
  772. # FRED acronyms for daily FX rates
  773. map = '
  774. FX FX.NAME
  775. DEXUSAL U.S./Australia
  776. DEXUSUK U.S./U.K.
  777. DEXCAUS Canada/U.S.
  778. DEXNOUS Norway/U.S.
  779. DEXUSEU U.S./Euro
  780. DEXJPUS Japan/U.S.
  781. DEXUSNZ U.S./NewZealand
  782. DEXSDUS Sweden/U.S.
  783. DEXSZUS Switzerland/U.S.
  784. '
  785. map = matrix(scan(text = map, what='', quiet=T), nc=2, byrow=T)
  786. colnames(map) = map[1,]
  787. map = data.frame(map[-1,], stringsAsFactors=F)
  788. # convert all quotes to be vs U.S.
  789. convert.index = grep('DEXUS',map$FX, value=T)
  790. #*****************************************************************
  791. # Load historical data
  792. #******************************************************************
  793. load.packages('quantmod')
  794. # load fx from fred
  795. data.fx <- new.env()
  796. quantmod::getSymbols(map$FX, src = 'FRED', from = '1970-01-01', env = data.fx, auto.assign = T)
  797. for(i in convert.index) data.fx[[i]] = 1 / data.fx[[i]]
  798. # extract fx where all currencies are available
  799. bt.prep(data.fx, align='remove.na')
  800. fx = bt.apply(data.fx, '[')
  801. return(fx)
  802. }
  803. ###############################################################################
  804. # getSymbols interface to tradingblox free futures and forex data
  805. # http://www.tradingblox.com/tradingblox/free-historical-data.htm
  806. # http://www.tradingblox.com/Data/DataOnly.zip
  807. # Date, Open, High, Low, Close, Volume (zero for forex cash markets),
  808. # Open Interest (futures only), Delivery Month ( YYYYMM futures only),
  809. # Unadjusted Close (zero for forex cash markets)
  810. #' @export
  811. ###############################################################################
  812. getSymbols.TB <- function(
  813. env = .GlobalEnv,
  814. auto.assign = TRUE,
  815. download = FALSE,
  816. type = c('Both', 'Futures', 'Forex'),
  817. rm.index = 'PB', # remove Pork Bellies because not traded
  818. clean = FALSE
  819. )
  820. {
  821. # download zip archive
  822. if(download) {
  823. download.file('http://www.tradingblox.com/Data/DataOnly.zip', 'DataOnly.zip')
  824. }
  825. # setup temp folder
  826. temp.folder = paste(getwd(), 'temp', sep='/')
  827. dir.create(temp.folder, F)
  828. ##*****************************************************************
  829. ## Unzip
  830. ##******************************************************************
  831. temp.folder = paste(getwd(), '/', 'temp', sep='')
  832. # clean temp
  833. if(clean) shell('del /F /S /Q temp\\*.*', wait = TRUE)
  834. # unpack
  835. files = unzip('DataOnly.zip', exdir=temp.folder)
  836. # read definitions, based on Financial Instrument Model Infrastructure for R package from http://r-forge.r-project.org/R/?group_id=316
  837. def1 = try(read.csv('http://www.tradingblox.com/tradingblox/CSIUA/FuturesInfo.txt',skip=1,header=FALSE, stringsAsFactors=F),TRUE)
  838. if(inherits(def1, 'try-error')) def1 = read.csv('FuturesInfo.txt',skip=1,header=FALSE, stringsAsFactors=F)
  839. def1 = def1[-match(rm.index, def1[,1]),]
  840. def1[,3] = 'Futures'
  841. def2 = try(read.csv('http://www.tradingblox.com/tradingblox/CSIUA/ForexInfo.txt',skip=1,header=FALSE, stringsAsFactors=F),TRUE)
  842. if(inherits(def2, 'try-error')) def2 = read.csv('ForexInfo.txt',skip=1,header=FALSE, stringsAsFactors=F)
  843. def2[,3] = 'Forex'
  844. def = rbind(def1[,1:4], def2[,1:4])
  845. if(type[1] == 'Futures') def = def1[,1:4]
  846. if(type[1] == 'Forex') def = def2[,1:4]
  847. # read all files
  848. for( i in 1:nrow(def) ) {
  849. symbol = def[i,1]
  850. filename = paste(temp.folder, '/', def[i,3], '/', def[i,4], sep='')
  851. if(file.exists(filename)) {
  852. fr <- read.csv(filename, header = FALSE)
  853. fr <- make.xts(fr[,-1], as.Date(as.character(fr[,1]),'%Y%m%d'))
  854. colnames(fr) <- spl('Open,High,Low,Close,Volume,OpenInterest,DeliveryMonth,Unadjusted')[1:ncol(fr)]
  855. fr$Adjusted = fr$Close
  856. if (auto.assign) assign(symbol, fr, env)
  857. cat(i, 'out of', nrow(def), 'Reading', symbol, format(index.xts(fr)[1],'%Y%m%d'), format(index.xts(fr)[nrow(fr)],'%Y%m%d'), '\n', sep='\t')
  858. } else {
  859. cat('\t\t\t Missing data for ', symbol, '\n');
  860. }
  861. }
  862. #*****************************************************************
  863. # Add symbolnames, symbol.descriptions, and symbol.groups
  864. #******************************************************************
  865. index = match(ls(env)[ na.omit(match(def[,1], ls(env))) ], def[,1])
  866. temp = def[index,1]
  867. names(temp) = def[index,1]
  868. env$symbolnames = temp
  869. temp = def[index,2]
  870. names(temp) = def[index,1]
  871. env$symbol.descriptions = temp
  872. temp = def[index,3]
  873. names(temp) = def[index,1]
  874. env$symbol.groups = temp
  875. #*****************************************************************
  876. # Process symbol descriptions to be more readable
  877. #******************************************************************
  878. names = trim(gsub(pattern = '\\(.*?\\)', replacement = '', env$symbol.descriptions, perl = TRUE))
  879. names = trim(gsub('-NYMEX','',names,ignore.case =T))
  880. names = trim(gsub('-COMEX','',names,ignore.case =T))
  881. names = trim(gsub('-CBT','',names,ignore.case =T))
  882. names = trim(gsub('-CME-','',names,ignore.case =T))
  883. names = trim(gsub('-CME','',names,ignore.case =T))
  884. names = trim(gsub('-NYCE','',names,ignore.case =T))
  885. names = trim(gsub('-Globex','',names,ignore.case =T))
  886. names = trim(gsub('-FINEX','',names,ignore.case =T))
  887. names = trim(gsub('-CSCE','',names,ignore.case =T))
  888. names = trim(gsub(' w/Prj A','',names,ignore.case =T))
  889. env$symbol.descriptions.print = names
  890. #*****************************************************************
  891. # Custom adjustments
  892. #******************************************************************
  893. data = env
  894. # fix DX time series - fixed by the Trading Blox
  895. # if(!is.null(data$DX)) data$DX['::2007:04:04', 'Unadjusted'] = coredata(data$DX['::2007:04:04']$Unadjusted * 10)
  896. #*****************************************************************
  897. # To compute returns and backtest, recreate each futures series:
  898. #
  899. # (unadjusted-futures[t-1] + (back-adjusted-futures[t] - back-adjusted-futures[t-1]))
  900. # futures-return[t] = -------------------------------------------------------------------------------------------------- - 1
  901. # unadjusted-futures[t-1]
  902. #******************************************************************
  903. for(i in data$symbolnames[data$symbol.groups != 'Forex']) {
  904. # adjust spot for roll overs
  905. spot = as.vector(data[[i]]$Unadjusted)
  906. dspot = spot - mlag(spot)
  907. futures = as.vector(data[[i]]$Adjusted)
  908. dfutures = futures - mlag(futures)
  909. index = which(round(dspot - dfutures,4) != 0 )
  910. spot.adjust.roll = spot
  911. spot.adjust.roll[(index-1)] = spot.adjust.roll[index] - dfutures[index]
  912. # compute returns
  913. reta = (mlag(spot.adjust.roll) + futures - mlag(futures)) / mlag(spot.adjust.roll)
  914. reta[1] = 1
  915. n = len(spot)
  916. new.series = cumprod(reta)
  917. data[[i]]$Close = spot[n] * new.series / new.series[n]
  918. data[[i]]$Adjusted = data[[i]]$Close
  919. }
  920. #*****************************************************************
  921. # Done
  922. #******************************************************************
  923. if (!auto.assign) {
  924. return(fr)
  925. } else {
  926. return(env)
  927. }
  928. }
  929. ###############################################################################
  930. # Kenneth R. French - Data Library
  931. # http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html
  932. ###############################################################################
  933. # http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors.zip
  934. # http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_weekly.zip
  935. # http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_daily.zip
  936. #
  937. # data2 = get.fama.french.data('F-F_Research_Data_Factors', periodicity = 'weeks',download = F, clean = F)
  938. # data3 = get.fama.french.data('6_Portfolios_2x3', periodicity = 'days',download = F, clean = F)
  939. #' @export
  940. ###############################################################################
  941. get.fama.french.data <- function(
  942. name = c('F-F_Research_Data_Factors', 'F-F_Research_Data_Factors'),
  943. periodicity = c('days','weeks', 'months'),
  944. download = FALSE,
  945. clean = FALSE
  946. )
  947. {
  948. # map periodicity
  949. map = c('_daily', '_weekly', '')
  950. names(map) = c('days','weeks', 'months')
  951. # url
  952. period = ifna(map[periodicity[1]], periodicity[1])
  953. filename.zip = paste(name[1], period, '.zip', sep='')
  954. filename.txt = paste(name[1], period, '.txt', sep='')
  955. url = paste('http://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/', filename.zip, sep='')
  956. # download zip archive
  957. if(download) {
  958. download.file(url, filename.zip)
  959. }
  960. # setup temp folder
  961. temp.folder = paste(getwd(), 'temp', sep='/')
  962. dir.create(temp.folder, F)
  963. ##*****************************************************************
  964. ## Unzip
  965. ##******************************************************************
  966. temp.folder = paste(getwd(), '/', 'temp', sep='')
  967. # clean temp
  968. if(clean) shell('del /F /S /Q temp\\*.*', wait = TRUE)
  969. # unpack
  970. files = unzip(filename.zip, exdir=temp.folder)
  971. # read data
  972. filename = paste(temp.folder, '/', filename.txt, sep='')
  973. out = readLines(filename)
  974. index = which(nchar(out) == 0)
  975. data.index = grep('^[ 0-9\\.\\+-]+$', out)
  976. temp.index = which(diff(data.index) > 1)
  977. data.index = matrix(data.index[sort(c(1, temp.index, temp.index+1, len(data.index)))], nc=2, byrow=T)
  978. # extract sections
  979. data = list()
  980. for(i in 1:nrow(data.index)) {
  981. start.index = index[which( index > data.index[i,1] ) - 1][1] + 1
  982. if(is.na(start.index)) start.index = index[len(index)] + 1
  983. end.index = data.index[i,1] - 1
  984. n.index = end.index - start.index + 1
  985. # column names
  986. name = 'data'
  987. colnames = scan(text = out[start.index], what='', quiet=T)
  988. if(n.index == 2) {
  989. name = trim(out[start.index])
  990. colnames = scan(text = out[end.index], what='', quiet=T)
  991. } else if(n.index > 2) {
  992. name = trim(out[start.index])
  993. colnames0 = scan(text = out[(end.index-1)], what='', quiet=T)
  994. colnames1 = scan(text = out[end.index], what='', quiet=T)
  995. colnames = paste(rep(colnames0, each = len(colnames1) / len(colnames0)), colnames1, sep='.')
  996. }
  997. colnames = gsub('-', '.', colnames)
  998. #out[start.index:end.index]
  999. # re-read data
  1000. temp = matrix(scan(filename, what = double(), quiet=T,
  1001. skip = (data.index[i,1]-1),
  1002. nlines = (data.index[i,2] - data.index[i,1]+1))
  1003. , nc=len(colnames)+1, byrow=T)
  1004. date.format = '%Y%m%d'
  1005. date.format.add = ''
  1006. date.format.n = nchar(paste(temp[1,1]))
  1007. if( date.format.n == 6 ) {
  1008. date.format.add = '01'
  1009. } else if( date.format.n == 4 ) {
  1010. date.format.add = '0101'
  1011. }
  1012. data[[name]] = make.xts(temp[,-1], as.Date(paste(temp[,1], date.format.add, sep=''),date.format))
  1013. colnames(data[[name]]) = colnames
  1014. }
  1015. return( data )
  1016. }
  1017. ###############################################################################
  1018. # Load FOMC dates
  1019. # http://www.federalreserve.gov/monetarypolicy/fomccalendars.htm
  1020. # http://www.federalreserve.gov/monetarypolicy/fomchistorical2008.htm
  1021. # http://quant.stackexchange.com/questions/141/what-data-sources-are-available-online
  1022. #' @export
  1023. ###############################################################################
  1024. get.FOMC.dates <- function
  1025. (
  1026. download = TRUE,
  1027. fomc.filename = 'FOMC.Rdata'
  1028. )
  1029. {
  1030. if(!download && file.exists(fomc.filename)) {
  1031. load(file=fomc.filename)
  1032. return(FOMC)
  1033. }
  1034. # download data
  1035. url = 'http://www.federalreserve.gov/monetarypolicy/fomccalendars.htm'
  1036. txt = join(readLines(url))
  1037. # extract data from page
  1038. data = c()
  1039. for(year in 2009:(1 + date.year(Sys.Date()))) {
  1040. temp = extract.table.from.webpage(txt, paste(year,'FOMC Meetings'))
  1041. if(nrow(temp) == 0) next
  1042. temp = tolower(trim(temp[,1:2]))
  1043. temp = temp[nchar(temp[,1]) > 0,]
  1044. month = temp[,1]
  1045. day = gsub('\\(','',gsub('\\)','',temp[,2]))
  1046. day = trim(day)
  1047. status = rep('', len(day))
  1048. index = grep('\\*',day)
  1049. if(any(index)) status[index] = '*'
  1050. index = grep(' ',day)
  1051. if(any(index)) for(j in index) status[j] = spl(day[j],' ')[2]
  1052. day = gsub('\\*','', sapply(day,function(x) spl(x,' ')[1]))
  1053. temp = apply(cbind(day, month, status), 1, function(x) paste(year, spl(x[2],'/'), spl(x[1],'-'), '|', x[3]) )
  1054. data = cbind(data, trim(sapply(unlist(temp),spl,'\\|')))
  1055. }
  1056. recent.days = as.Date(data[1,],'%Y %B %d')
  1057. status = as.vector(data[2,])
  1058. # extract data from page
  1059. data = c()
  1060. for(year in 1936:2008) {
  1061. cat(year,'\n')
  1062. url = paste0('http://www.federalreserve.gov/monetarypolicy/fomchistorical', year, '.htm')
  1063. txt = join(readLines(url))
  1064. tokens = spl(txt,'<div id="historical">')
  1065. days = c()
  1066. for(token in tokens[-1])
  1067. days = c(days,colnames(extract.table.from.webpage(token, 'table'))[1])
  1068. data = rbind(data, cbind(year, days))
  1069. }
  1070. day = tolower(data[,2])
  1071. day = gsub(',', '-', gsub('and', '', gsub('conference call', '', gsub('meeting','',day))))
  1072. # remove last token
  1073. day = unlist(lapply(day, function(x) join(rev(rev(spl(x,' '))[-1]),' ')))
  1074. temp = unlist(apply(cbind(day,data[,1]),1, function(x) paste(trim(spl(x[1],'-')),x[2]) ))
  1075. temp = sapply(lapply(temp,spl,' '), function(x) iif(len(x)==3,x,c(NA,x)))
  1076. temp[1,] = ifna.prev(temp[1,])
  1077. days = as.Date(apply(temp,2,join,' '),'%B %d %Y ')
  1078. FOMC = list(day = c(days, recent.days), status=c(rep('',len(days)), status))
  1079. save(FOMC,file=fomc.filename)
  1080. FOMC
  1081. }
  1082. ###############################################################################
  1083. # Download historical intraday prices from Google Finance
  1084. # http://www.mathworks.com/matlabcentral/fileexchange/32745-get-intraday-stock-price
  1085. # http://www.mathworks.com/matlabcentral/fileexchange/36115-volume-weighted-average-price-from-intra-daily-data
  1086. # http://www.codeproject.com/KB/IP/google_finance_downloader.aspx
  1087. # http://www.marketcalls.in/database/google-realtime-intraday-backfill-data.h
  1088. # getSymbol.intraday.google('GOOG','NASDAQ')
  1089. # getSymbol.intraday.google('.DJI','INDEXDJX')
  1090. #' @export
  1091. ###############################################################################
  1092. getSymbol.intraday.google <- function
  1093. (
  1094. Symbol,
  1095. Exchange,
  1096. interval = 60, # 60 seconds
  1097. period = '1d'
  1098. )
  1099. {
  1100. # download Key Statistics from yahoo
  1101. url = paste('http://www.google.com/finance/getprices?q=', Symbol,
  1102. '&x=', Exchange,
  1103. '&i=', interval,
  1104. '&p=', period,
  1105. '&f=', 'd,o,h,l,c,v', sep='')
  1106. load.packages('data.table')
  1107. out = fread(url, stringsAsFactors=F)
  1108. if(ncol(out) < 5) {
  1109. cat('Error getting data from', url, '\n')
  1110. return(NULL)
  1111. }
  1112. setnames(out, spl('Date,Open,High,Low,Close,Volume'))
  1113. # date logic
  1114. date = out$Date
  1115. date.index = substr(out$Date,1,1) == 'a'
  1116. date = as.double(gsub('a','',date))
  1117. temp = NA * date
  1118. temp[date.index] = date[date.index]
  1119. temp = ifna.prev(temp)
  1120. date = temp + date * interval
  1121. date[date.index] = temp[date.index]
  1122. class(date) = c("POSIXt", "POSIXct")
  1123. date = date - (as.double(format(date[1],'%H')) - 9)*60*60
  1124. make.xts(out[, eval(expression(list( Open,High,Low,Close,Volume )))], date)
  1125. }
  1126. ###############################################################################
  1127. # Remove extreme data points
  1128. #' @export
  1129. ###############################################################################
  1130. data.clean <- function
  1131. (
  1132. data,
  1133. min.ratio = 2.5,
  1134. min.obs = 3*252
  1135. )
  1136. {
  1137. # remove all series that has less than minimum number of observations
  1138. index = names(which(lapply(data,function(x) count(x$Close)) < min.obs))
  1139. if (len(index) > 0) {
  1140. cat('Removing', index, 'have less than', min.obs, 'observations','\n')
  1141. rm(list=index, envir=data)
  1142. }
  1143. for(ticker in ls(data)) {
  1144. ticker.data = data[[ticker]]
  1145. ticker.data = ticker.data[ticker.data$Close > 0 & ticker.data$Adjusted > 0]
  1146. nperiods = nrow(ticker.data)
  1147. price = ticker.data$Adjusted
  1148. ratio = as.vector((price)/mlag(price))
  1149. index = which(ratio > min.ratio)
  1150. if(len(index) > 0)
  1151. for(i in index) {
  1152. cat('Abnormal price found for', ticker, format(index(ticker.data)[i],'%d-%b-%Y'),'Ratio :', round(ratio[i],1),'\n')
  1153. for(name in spl('Open,Close,High,Low,Adjusted'))
  1154. ticker.data[i:nperiods,name] = ticker.data[i:nperiods,name] / ratio[i]
  1155. }
  1156. ratio = as.vector(mlag(price)/(price))
  1157. index = which(ratio > min.ratio)
  1158. if(len(index) > 0)
  1159. for(i in index) {
  1160. cat('Abnormal price found for', ticker, format(index(ticker.data)[i],'%d-%b-%Y'),'Inverse Ratio :', round(ratio[i],1),'\n')
  1161. for(name in spl('Open,Close,High,Low,Adjusted'))
  1162. ticker.data[i:nperiods,name] = ticker.data[i:nperiods,name] * ratio[i]
  1163. }
  1164. data[[ticker]] = ticker.data
  1165. }
  1166. }