/app/lib/leech/sucker.py
Python | 104 lines | 77 code | 11 blank | 16 comment | 10 complexity | f9418614c20bb641c60e77401a25695b MD5 | raw file
- """
- GOALS
- 1. Subscribe to Redis datastreams
- 2. Update a hdf5 datastore with timestamp, keys as columns, number of elements, sum of elements
- 3. Aux functions given key
- a. Fetch second by second volume, sum price, average price
- b. Fetch min and max
- c. Fetch total volume
- 4. Aux functions general
- a. Fetch list of keys
- b. Fetch top 10 keys by total volume
- c. Fetch top 10 keys by last price
- redis port: 6379
- """
- import numpy as np
- import sys
- import ticker_config
- import pandas
- from pandas.io.pytables import HDFStore
- import redis
- import threading
- import datetime
- import time
- from app.utils import venues
- class TickerStore(threading.Thread):
- KEYS = ['publisher_id', 'domain', 'geo_country']
- def __init__(self):
- super(TickerStore, self).__init__()
- self.venues = venues()
- self.datf = HDFStore(ticker_config.STORE)
- try:
- self.store = self.datf['data']
- except:
- self.store = pandas.DataFrame({}, columns=['timestamp','publisher_id','domain','geo_country','clear_price','volume'], index={})
- self.redis = redis.Redis(host=ticker_config.REDIS_HOSTNAME, port=ticker_config.REDIS_PORT)
- self.pubsub = self.redis.pubsub()
- self.daemon = True
- def run(self):
- self.pubsub.subscribe(ticker_config.FEEDNAME)
- for mess in self.pubsub.listen():
- (pub_id, domain, geo, clear) = mess['data'].split(",")
- try:
- pub_id = int(pub_id)
- clear=float(clear)
- except:
- continue
- print [pub_id, domain, geo, clear]
- cur_min = int(time.mktime(datetime.datetime.now().timetuple()))
- new = pandas.DataFrame([{'timestamp': cur_min, 'publisher_id': pub_id, 'domain': domain, 'geo_country': geo, 'clear_price': clear, 'volume': 1}],
- index=[(pub_id, domain, geo, cur_min)])
- try:
- self.store = self.store.append(new)
- self.datf['data'] = self.store
- except:
- row = self.store.ix[[(pub_id, domain, geo, cur_min)]]
- self.store.ix[[(pub_id, domain, geo, cur_min)], 'volume'] = row['volume'].item(0) + 1
- self.store.ix[[(pub_id, domain, geo, cur_min)], 'clear_price'] = row['clear_price'].item(0) + clear
- def subset_by_key( self, data, key ):
- return data[ (data['publisher_id'] == key[0]) & (data['domain'] == key[1]) & (data['geo_country'] == key[2]) ]
- def get_metrics_for_key( self, key ):
- subs = self.subset_by_key( self.store, key )
- result = dict.fromkeys(['min_price', 'max_price', 'last_price', 'volume'])
- if len(subs):
- min_p = round(np.min(subs.clear_price/subs['volume'].apply(np.float)), 3)
- max_p = round(np.max(subs.clear_price/subs['volume'].apply(np.float)), 3)
- last_p =round((subs.clear_price.ix[-1])/float(subs.volume.ix[-1]), 3)
- sum_v = int(np.sum(subs.volume))
- result.update(min_price=min_p, max_price=max_p, last_price=last_p, volume=sum_v)
- return result
- def get_metrics_for_all_keys(self):
- metrics = {}
- for v in self.venues:
- result = self.get_metrics_for_key((int(v['publisher_id']), v['url'], v['geo_country']))
- key = ','.join((v['publisher_id'], v['url'], v['geo_country']))
- metrics[key] = result
- return metrics
- def get_top10_byVolume(self):
- grps = self.store.groupby(self.KEYS)
- vols = grps['volume'].agg(sum)
- vols.sort()
- return vols[::-1][0:10]#.to_dict()
- def __del__(self):
- self.datf.close()
- # Spawn a new thread to subscribe for clear-price stream.
- ticker_store = TickerStore()
- ticker_store.start()
- if __name__ == '__main__':
- try:
- while True:
- time.sleep(1)
- except KeyboardInterrupt:
- sys.exit()