/plistaContestPy/plistaContestBackend/real/packages/models/test/TestDimensionListModel.py

https://bitbucket.org/cwinkelmann/plista-contest-python · Python · 335 lines · 197 code · 94 blank · 44 comment · 16 complexity · 70a159fd503473d689f7aa663dbe6b3f MD5 · raw file

  1. '''
  2. Created on 31.01.2012
  3. @author: christian.winkelmann@plista.com
  4. '''
  5. import unittest
  6. import cql
  7. from random import uniform
  8. from plistaContestPy.plistaContestBackend.config import config_global
  9. from plistaContestPy.plistaContestBackend.config import config_local
  10. from plistaContestPy.plistaContestBackend.real.models.DimensionListModel import DimensionListModel
  11. from contest.migrations.setup_keyspaces import Setup_Keyspaces
  12. from contest.migrations._007_dimensionLists import dimensionListsMigration
  13. class TestDimensionListModel(unittest.TestCase):
  14. def setUp(self):
  15. print "setting up database"
  16. config_global.cassandra_default_keyspace = 'unitTest'
  17. sK = Setup_Keyspaces()
  18. dM = dimensionListsMigration()
  19. try:
  20. self.dbconn = cql.connect(config_local.cassandra_host, config_local.cassandra_port)
  21. self.cursor = self.dbconn.cursor()
  22. except:
  23. print "not able to create a database connection"
  24. self.cursor.execute("USE " + config_global.cassandra_default_keyspace)
  25. print "setting up database done"
  26. def save_test_data(self, timestamp_start = 4000, time_stamp_range = 5, id_range = 2):
  27. """ save some sample data """
  28. for i in xrange(time_stamp_range):
  29. curr_timestamp = timestamp_start + i
  30. for user_id in xrange(id_range):
  31. self.dL.save(user_id, curr_timestamp)
  32. def testSaveDimensionListRangeBySeconds(self):
  33. """ test if the stream of information is written as wished
  34. """
  35. dimension = 'user_ids' # save user ids
  36. self.dL = DimensionListModel(dimension, 'cassandra') # create the model
  37. timestamp_start = 4000
  38. time_stamp_range = 50
  39. id_range = 98
  40. self.save_test_data(timestamp_start, time_stamp_range, id_range)
  41. id_stats = self.dL.getByTime(timestamp_start + 1, timestamp_start + 2)
  42. self.assertEqual(id_stats[u'user_ids_by_seconds_4001'][0], 0, "the entries are not equal")
  43. self.assertEqual(len(id_stats[u'user_ids_by_seconds_4001']), id_range, "the list has the wrong length")
  44. desired_length = 5
  45. id_stats = self.dL.getByTime(timestamp_start, timestamp_start + desired_length)
  46. self.assertEqual(len(id_stats), desired_length, "the requested list has not {} entries".format(desired_length))
  47. def testGetDimensionListRangeByMinutes(self):
  48. """ test if the stream of information is written as wished
  49. """
  50. dimension = 'user_ids'
  51. self.dL = DimensionListModel(dimension, 'cassandra')
  52. self.dL.save(dimension_id=1, timestamp=59)
  53. self.dL.save(dimension_id=2, timestamp=59)
  54. self.dL.save(dimension_id=3, timestamp=59)
  55. self.dL.save(dimension_id=1, timestamp=61)
  56. self.dL.save(dimension_id=4, timestamp=3000)
  57. id_stats = self.dL.getByTime(0, 1, binSize='minutes')
  58. self.assertEquals(3, len(id_stats[u'user_ids_by_minutes_0']), "wrong length")
  59. id_stats = self.dL.getByTime(1, 2, binSize='minutes')
  60. self.assertEquals(1, len(id_stats[u'user_ids_by_minutes_1']), "wrong length")
  61. id_stats = self.dL.getByTime(0, 2, binSize='minutes')
  62. self.assertEquals(1, len(id_stats[u'user_ids_by_minutes_1']), "wrong length")
  63. self.assertEquals(3, len(id_stats[u'user_ids_by_minutes_0']), "wrong length")
  64. def testGetDimensionListRangeByHours(self):
  65. """ test if the stream of information is written as wished
  66. """
  67. dimension = 'user_ids'
  68. self.dL = DimensionListModel(dimension, 'cassandra')
  69. self.dL.save(dimension_id=1, timestamp=59)
  70. self.dL.save(dimension_id=2, timestamp=59)
  71. self.dL.save(dimension_id=3, timestamp=59)
  72. self.dL.save(dimension_id=1, timestamp=61)
  73. self.dL.save(dimension_id=4, timestamp=3000)
  74. #### hours ####
  75. id_stats = self.dL.getByTime(0, 1, binSize='hours')
  76. self.assertEquals(4, len(id_stats[u'user_ids_by_hours_0']), "wrong length")
  77. self.dL.save(dimension_id=4, timestamp=4000)
  78. id_stats = self.dL.getByTime(1, 2, binSize='hours', renew=True)
  79. #print mylist
  80. self.assertEquals(1, len(id_stats[u'user_ids_by_hours_1']), "wrong length")
  81. self.dL.save(dimension_id=3, timestamp=4000)
  82. id_stats = self.dL.getByTime(1, 2, binSize='hours', renew=True)
  83. self.assertEquals(2, len(id_stats[u'user_ids_by_hours_1']), "wrong length")
  84. def testGetDimensionListRangeByDay(self):
  85. """ test if the stream of information is written as wished
  86. """
  87. dimension = 'user_ids'
  88. self.dL = DimensionListModel(dimension, 'cassandra')
  89. #self.save_test_data(timestamp_start, time_stamp_range, id_range)
  90. self.dL.save(dimension_id=1, timestamp=59)
  91. self.dL.save(dimension_id=2, timestamp=59)
  92. self.dL.save(dimension_id=3, timestamp=59)
  93. self.dL.save(dimension_id=1, timestamp=61)
  94. #self.dL.save(dimension_id=1, timestamp=3000)
  95. #self.dL.save(dimension_id=2, timestamp=3000)
  96. #self.dL.save(dimension_id=3, timestamp=3000)
  97. self.dL.save(dimension_id=4, timestamp=3000)
  98. ####### days
  99. #id_stats = self.dL.getByTime(0, 2, binSize = 'days')
  100. #print id_stats
  101. #self.assertEquals(4, len(id_stats[u'user_ids_by_days_0']), "wrong length")
  102. #print id_stats
  103. #self.assertEquals(1, len(id_stats), "wrong length")
  104. # save second based data for hour one
  105. timestamp_start = 86401
  106. timestamp_start = 90000
  107. time_stamp_range = 5
  108. id_range = 10
  109. self.save_test_data(timestamp_start, time_stamp_range, id_range)
  110. id_stats = self.dL.getByTime(0, 2, binSize = 'days')
  111. self.assertEquals(2, len(id_stats), "wrong length")
  112. # self.assertEquals(id_range, len(id_stats[u'user_ids_by_days_1']), "wrong length")
  113. id_stats = self.dL.getByTime(1, 2, binSize = 'days')
  114. self.assertEquals(1, len(id_stats), "wrong length")
  115. print id_stats
  116. #self.assertEquals(id_range, len(id_stats[u'user_ids_by_days_1']), "wrong length")
  117. def testSetComputedIds(self):
  118. dimension = 'user_ids'
  119. dL = DimensionListModel(dimension, mode = 'cassandra')
  120. rangeStart = 0
  121. rangeEnd = 10
  122. binSize = 'minutes'
  123. dL.setComputedIds(dimension, rangeStart, rangeEnd, binSize)
  124. r = dL.getComputedIds(dimension, rangeStart, rangeEnd, binSize)
  125. print r
  126. self.assertEqual(rangeEnd, len(r) )
  127. rangeEnd = 5
  128. binSize = 'hours'
  129. r = dL.getComputedIds(dimension, rangeStart, rangeEnd, binSize)
  130. """ nothing is yet computed for hours """
  131. self.assertEqual(0, len(r) )
  132. dL.setComputedIds(dimension, rangeStart, rangeEnd, binSize)
  133. r = dL.getComputedIds(dimension, rangeStart, rangeEnd, binSize)
  134. print r
  135. self.assertEqual(rangeEnd, len(r) )
  136. def test_Binify_Minutes(self):
  137. """ this function has a stupid name, but will get data from one dimensionList an will aggregate it
  138. """
  139. self.dL = DimensionListModel('user_ids', mode='cassandra')
  140. binSize = 'minutes'
  141. start_seconds = 58
  142. end_seconds = 61
  143. for i in xrange(start_seconds, end_seconds):
  144. for user_id in xrange(int(uniform(1, 6))):
  145. curr_timestamp = i
  146. self.dL.save(user_id, curr_timestamp)
  147. list = self.dL.getByTime(45, 75, 'seconds')
  148. #self.dL.getByTime(timestampStart, timestampEnd, binSize)
  149. #print list
  150. """ save the data """
  151. binified = self.dL.binify(binSize, 0, 1)
  152. print binified
  153. self.assertIn(u'user_ids_by_seconds_58', binified)
  154. self.assertIn(u'user_ids_by_seconds_59', binified)
  155. self.assertNotIn(u'user_ids_by_seconds_60', binified)
  156. binified = self.dL.binify(binSize, 1, 2)
  157. self.assertNotIn(u'user_ids_by_seconds_58', binified)
  158. self.assertNotIn(u'user_ids_by_seconds_59', binified)
  159. self.assertIn(u'user_ids_by_seconds_60', binified)
  160. print binified
  161. def test_Binify_Hours(self):
  162. """ this function has a stupid name, but will get data from one dimensionList an will aggregate it
  163. """
  164. self.dL = DimensionListModel('user_ids', mode='cassandra')
  165. binSize = 'hours'
  166. start_seconds = 58
  167. end_seconds = 61
  168. for i in xrange(start_seconds, end_seconds):
  169. for user_id in xrange(int(uniform(1, 6))):
  170. curr_timestamp = i
  171. self.dL.save(user_id, curr_timestamp)
  172. start_seconds = 3600
  173. end_seconds = start_seconds + 1
  174. for i in xrange(start_seconds, end_seconds):
  175. for user_id in xrange(int(uniform(1, 6))):
  176. curr_timestamp = i
  177. self.dL.save(user_id, curr_timestamp)
  178. binified = self.dL.binify(binSize, 1, 2)
  179. self.assertIn(u'user_ids_by_seconds_3600', binified)
  180. binified = self.dL.binify(binSize, 0, 1)
  181. print binified
  182. self.assertIn(u'user_ids_by_seconds_58', binified)
  183. self.assertIn(u'user_ids_by_seconds_59', binified)
  184. self.assertIn(u'user_ids_by_seconds_60', binified)
  185. self.assertNotIn(u'user_ids_by_seconds_3600', binified)
  186. def test_Binify_Days(self):
  187. """ this function has a stupid name, but will get data from one dimensionList an will aggregate it
  188. """
  189. self.dL = DimensionListModel('user_ids', mode='cassandra')
  190. binSize = 'days'
  191. start_seconds = 58
  192. end_seconds = 61
  193. for i in xrange(start_seconds, end_seconds):
  194. for user_id in xrange(int(uniform(1, 6))):
  195. curr_timestamp = i
  196. self.dL.save(user_id, curr_timestamp)
  197. start_seconds = 86400
  198. end_seconds = start_seconds + 1
  199. for i in xrange(start_seconds, end_seconds):
  200. for user_id in xrange(int(uniform(1, 6))):
  201. curr_timestamp = i
  202. self.dL.save(user_id, curr_timestamp)
  203. """ save the data """
  204. binified = self.dL.binify(binSize, 0, 1)
  205. print binified
  206. self.assertIn(u'user_ids_by_seconds_58', binified)
  207. self.assertIn(u'user_ids_by_seconds_59', binified)
  208. self.assertIn(u'user_ids_by_seconds_60', binified)
  209. self.assertNotIn(u'user_ids_by_seconds_86400', binified)
  210. binified = self.dL.binify(binSize, 1, 2)
  211. self.assertIn(u'user_ids_by_seconds_86400', binified)
  212. binified = self.dL.binify(binSize, 0, 2)
  213. print binified
  214. self.assertIn(u'user_ids_by_seconds_58', binified)
  215. self.assertIn(u'user_ids_by_seconds_59', binified)
  216. self.assertIn(u'user_ids_by_seconds_60', binified)
  217. self.assertIn(u'user_ids_by_seconds_86400', binified)
  218. def _test_Binify_Performance(self):
  219. """ this function has a stupid name, but will get data from one dimensionList an will aggregate it
  220. """
  221. self.dL = DimensionListModel('user_ids', mode='cassandra')
  222. binSize = 'days'
  223. start_seconds = 58
  224. end_seconds = 90000
  225. for i in xrange(start_seconds, end_seconds):
  226. for user_id in xrange(int(uniform(1, 6))):
  227. curr_timestamp = i
  228. self.dL.save(user_id, curr_timestamp)
  229. """ save the data """
  230. binified = self.dL.binify(binSize, 0, 1)
  231. print binified
  232. self.assertIn(u'user_ids_by_seconds_58', binified)
  233. self.assertIn(u'user_ids_by_seconds_59', binified)
  234. self.assertIn(u'user_ids_by_seconds_60', binified)
  235. self.assertNotIn(u'user_ids_by_seconds_86400', binified)
  236. binified = self.dL.binify(binSize, 1, 2)
  237. self.assertIn(u'user_ids_by_seconds_86400', binified)
  238. if __name__ == "__main__":
  239. #import sys;sys.argv = ['', 'Test.testName']
  240. unittest.main()