sucker.py | searchcode

/app/lib/leech/sucker.py

https://bitbucket.org/ajish/priceticker
Python | 104 lines | 77 code | 11 blank | 16 comment | 10 complexity | f9418614c20bb641c60e77401a25695b MD5 | raw file


"""
GOALS

1. Subscribe to Redis datastreams
2. Update a hdf5 datastore with timestamp, keys as columns, number of elements, sum of elements
3. Aux functions given key
    a. Fetch second by second volume, sum price, average price
    b. Fetch min and max
    c. Fetch total volume
4. Aux functions general
    a. Fetch list of keys
    b. Fetch top 10 keys by total volume
    c. Fetch top 10 keys by last price
redis port: 6379
"""

import numpy as np
import sys
import ticker_config
import pandas
from pandas.io.pytables import HDFStore
import redis
import threading
import datetime
import time
from app.utils import venues

class TickerStore(threading.Thread):
    KEYS = ['publisher_id', 'domain', 'geo_country']
    def __init__(self):
        super(TickerStore, self).__init__()
        self.venues = venues()
        self.datf = HDFStore(ticker_config.STORE)
        try:
            self.store = self.datf['data']
        except:
            self.store = pandas.DataFrame({}, columns=['timestamp','publisher_id','domain','geo_country','clear_price','volume'], index={})
        self.redis = redis.Redis(host=ticker_config.REDIS_HOSTNAME, port=ticker_config.REDIS_PORT)
        self.pubsub = self.redis.pubsub()
        self.daemon = True

    def run(self):
        self.pubsub.subscribe(ticker_config.FEEDNAME)
        for mess in self.pubsub.listen():
            (pub_id, domain, geo, clear) = mess['data'].split(",")
            try:
                pub_id = int(pub_id)
                clear=float(clear)
            except:
                continue
            print [pub_id, domain, geo, clear]
            cur_min  = int(time.mktime(datetime.datetime.now().timetuple()))
            new = pandas.DataFrame([{'timestamp': cur_min, 'publisher_id': pub_id, 'domain': domain, 'geo_country': geo, 'clear_price': clear, 'volume': 1}],
                                   index=[(pub_id, domain, geo, cur_min)])
            try:
                self.store = self.store.append(new)
                self.datf['data'] = self.store
            except:
                row = self.store.ix[[(pub_id, domain, geo, cur_min)]]
                self.store.ix[[(pub_id, domain, geo, cur_min)], 'volume'] = row['volume'].item(0) + 1
                self.store.ix[[(pub_id, domain, geo, cur_min)], 'clear_price'] = row['clear_price'].item(0) + clear

    def subset_by_key( self, data, key ):
        return data[ (data['publisher_id'] == key[0]) & (data['domain'] == key[1]) & (data['geo_country'] == key[2]) ]

    def get_metrics_for_key( self, key ):
        subs = self.subset_by_key( self.store, key )
        result = dict.fromkeys(['min_price', 'max_price', 'last_price', 'volume'])
        if len(subs):
            min_p = round(np.min(subs.clear_price/subs['volume'].apply(np.float)), 3)
            max_p = round(np.max(subs.clear_price/subs['volume'].apply(np.float)), 3)
            last_p =round((subs.clear_price.ix[-1])/float(subs.volume.ix[-1]), 3)
            sum_v = int(np.sum(subs.volume))
            result.update(min_price=min_p, max_price=max_p, last_price=last_p, volume=sum_v)
        return result

    def get_metrics_for_all_keys(self):
        metrics = {}
        for v in self.venues:
            result = self.get_metrics_for_key((int(v['publisher_id']), v['url'], v['geo_country']))
            key = ','.join((v['publisher_id'], v['url'], v['geo_country']))
            metrics[key] = result
        return metrics

    def get_top10_byVolume(self):
        grps = self.store.groupby(self.KEYS)
        vols = grps['volume'].agg(sum)
        vols.sort()
        return vols[::-1][0:10]#.to_dict()

    def __del__(self):
        self.datf.close()

# Spawn a new thread to subscribe for clear-price stream.
ticker_store = TickerStore()
ticker_store.start()

if __name__ == '__main__':
   try:
       while True:
           time.sleep(1)
   except KeyboardInterrupt:
       sys.exit()