PageRenderTime 42ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/app/lib/leech/sucker.py

https://bitbucket.org/ajish/priceticker
Python | 104 lines | 77 code | 11 blank | 16 comment | 10 complexity | f9418614c20bb641c60e77401a25695b MD5 | raw file
  1. """
  2. GOALS
  3. 1. Subscribe to Redis datastreams
  4. 2. Update a hdf5 datastore with timestamp, keys as columns, number of elements, sum of elements
  5. 3. Aux functions given key
  6. a. Fetch second by second volume, sum price, average price
  7. b. Fetch min and max
  8. c. Fetch total volume
  9. 4. Aux functions general
  10. a. Fetch list of keys
  11. b. Fetch top 10 keys by total volume
  12. c. Fetch top 10 keys by last price
  13. redis port: 6379
  14. """
  15. import numpy as np
  16. import sys
  17. import ticker_config
  18. import pandas
  19. from pandas.io.pytables import HDFStore
  20. import redis
  21. import threading
  22. import datetime
  23. import time
  24. from app.utils import venues
  25. class TickerStore(threading.Thread):
  26. KEYS = ['publisher_id', 'domain', 'geo_country']
  27. def __init__(self):
  28. super(TickerStore, self).__init__()
  29. self.venues = venues()
  30. self.datf = HDFStore(ticker_config.STORE)
  31. try:
  32. self.store = self.datf['data']
  33. except:
  34. self.store = pandas.DataFrame({}, columns=['timestamp','publisher_id','domain','geo_country','clear_price','volume'], index={})
  35. self.redis = redis.Redis(host=ticker_config.REDIS_HOSTNAME, port=ticker_config.REDIS_PORT)
  36. self.pubsub = self.redis.pubsub()
  37. self.daemon = True
  38. def run(self):
  39. self.pubsub.subscribe(ticker_config.FEEDNAME)
  40. for mess in self.pubsub.listen():
  41. (pub_id, domain, geo, clear) = mess['data'].split(",")
  42. try:
  43. pub_id = int(pub_id)
  44. clear=float(clear)
  45. except:
  46. continue
  47. print [pub_id, domain, geo, clear]
  48. cur_min = int(time.mktime(datetime.datetime.now().timetuple()))
  49. new = pandas.DataFrame([{'timestamp': cur_min, 'publisher_id': pub_id, 'domain': domain, 'geo_country': geo, 'clear_price': clear, 'volume': 1}],
  50. index=[(pub_id, domain, geo, cur_min)])
  51. try:
  52. self.store = self.store.append(new)
  53. self.datf['data'] = self.store
  54. except:
  55. row = self.store.ix[[(pub_id, domain, geo, cur_min)]]
  56. self.store.ix[[(pub_id, domain, geo, cur_min)], 'volume'] = row['volume'].item(0) + 1
  57. self.store.ix[[(pub_id, domain, geo, cur_min)], 'clear_price'] = row['clear_price'].item(0) + clear
  58. def subset_by_key( self, data, key ):
  59. return data[ (data['publisher_id'] == key[0]) & (data['domain'] == key[1]) & (data['geo_country'] == key[2]) ]
  60. def get_metrics_for_key( self, key ):
  61. subs = self.subset_by_key( self.store, key )
  62. result = dict.fromkeys(['min_price', 'max_price', 'last_price', 'volume'])
  63. if len(subs):
  64. min_p = round(np.min(subs.clear_price/subs['volume'].apply(np.float)), 3)
  65. max_p = round(np.max(subs.clear_price/subs['volume'].apply(np.float)), 3)
  66. last_p =round((subs.clear_price.ix[-1])/float(subs.volume.ix[-1]), 3)
  67. sum_v = int(np.sum(subs.volume))
  68. result.update(min_price=min_p, max_price=max_p, last_price=last_p, volume=sum_v)
  69. return result
  70. def get_metrics_for_all_keys(self):
  71. metrics = {}
  72. for v in self.venues:
  73. result = self.get_metrics_for_key((int(v['publisher_id']), v['url'], v['geo_country']))
  74. key = ','.join((v['publisher_id'], v['url'], v['geo_country']))
  75. metrics[key] = result
  76. return metrics
  77. def get_top10_byVolume(self):
  78. grps = self.store.groupby(self.KEYS)
  79. vols = grps['volume'].agg(sum)
  80. vols.sort()
  81. return vols[::-1][0:10]#.to_dict()
  82. def __del__(self):
  83. self.datf.close()
  84. # Spawn a new thread to subscribe for clear-price stream.
  85. ticker_store = TickerStore()
  86. ticker_store.start()
  87. if __name__ == '__main__':
  88. try:
  89. while True:
  90. time.sleep(1)
  91. except KeyboardInterrupt:
  92. sys.exit()