/readAllSymbols.py
Python | 76 lines | 57 code | 8 blank | 11 comment | 2 complexity | 2167b2bd750439c78ce41d3c08b0b90c MD5 | raw file
- import os
- import sys
- import datetime as dt
- import pandas
- import numpy as np
- def load_data_frame_from_hdf5(fullname):
- store = pandas.HDFStore(fullname, mode='r')
- output = store["data_frame"]
- store.close()
- return output
- os.putenv("TZ","America/New_York")
- #just one column of symbols
- symbol_list_file = sys.argv[1]
- start=dt.datetime(2009,1,2,9,30)
- end=dt.datetime(2012,4,13,15,59)
- symbol_list = open(symbol_list_file, 'rb')
- #create a list of tickers (we'll need them anyway later)
- tickers=[]
- for symbol in symbol_list:
- tickers.append(symbol.rstrip('\n'))
- symbol_list.close()
- #read the first symbol and create starting data frames from it
- hdf5FileName = "/backtest_data/daily-1min/tbFrame1min/" + tickers[0] + ".tb.frame.1min.hdf5"
- print hdf5FileName
- data=load_data_frame_from_hdf5(hdf5FileName)
- cp=pandas.DataFrame(data['cp'].ix[start:end])
- op=pandas.DataFrame(data['op'].ix[start:end])
- hp=pandas.DataFrame(data['hp'].ix[start:end])
- lp=pandas.DataFrame(data['lp'].ix[start:end])
- vol=cp=pandas.DataFrame(data['vol'].ix[start:end])
- #load all other tickers
- count=1
- for symbol in tickers[1:]:
- hdf5FileName = "/backtest_data/daily-1min/tbFrame1min/" + symbol + ".tb.frame.1min.hdf5"
- print hdf5FileName
- data=load_data_frame_from_hdf5(hdf5FileName)
-
- cp=pandas.merge(cp, pandas.DataFrame(data['cp'].ix[start:end]), left_index=True, right_index=True, how='outer')
- op=pandas.merge(op, pandas.DataFrame(data['op'].ix[start:end]), left_index=True, right_index=True, how='outer')
- hp=pandas.merge(hp, pandas.DataFrame(data['hp'].ix[start:end]), left_index=True, right_index=True, how='outer')
- lp=pandas.merge(lp, pandas.DataFrame(data['lp'].ix[start:end]), left_index=True, right_index=True, how='outer')
- vol=pandas.merge(vol, pandas.DataFrame(data['vol'].ix[start:end]), left_index=True, right_index=True, how='outer')
- col_ind=np.arange(0,count+1)
- cp.columns=col_ind
- op.columns=col_ind
- hp.columns=col_ind
- lp.columns=col_ind
- vol.columns=col_ind
- count=count+1
- #rename the columns by tickers
- cp.columns=tickers
- op.columns=tickers
- hp.columns=tickers
- lp.columns=tickers
- vol.columns=tickers
- path="/backtest_data/1second/teams_data/team3/data/"
- #cp.save(path+"close_px_all_from_09.bin")
- #op.save(path +"open_px_all_from_09.bin")
- hp.save(path +"high_px_all_from_09.bin")
- lp.save(path +"low_px_all_from_09.bin")
- #vol.save(path+"volume_all_from_09.bin")
- '''
- #for some reason the first symbol closing prices get screwed up
- #so reload AAPL
- hdf5FileName = "/backtest_data/daily-1min/tbFrame1min/AAPL.tb.frame.1min.hdf5"
- data=load_data_frame_from_hdf5(hdf5FileName)
- cp['AAPL']=data['cp']
- '''