TestDimensionListModel.py

/plistaContestPy/plistaContestBackend/real/packages/models/test/TestDimensionListModel.py

https://bitbucket.org/cwinkelmann/plista-contest-python · Python · 335 lines · 197 code · 94 blank · 44 comment · 16 complexity · 70a159fd503473d689f7aa663dbe6b3f MD5 · raw file

'''
Created on 31.01.2012

@author: christian.winkelmann@plista.com
'''
import unittest
import cql
from random import uniform

from plistaContestPy.plistaContestBackend.config import config_global
from plistaContestPy.plistaContestBackend.config import config_local
from plistaContestPy.plistaContestBackend.real.models.DimensionListModel import DimensionListModel
from contest.migrations.setup_keyspaces import Setup_Keyspaces
from contest.migrations._007_dimensionLists import dimensionListsMigration



class TestDimensionListModel(unittest.TestCase):
    def setUp(self):
        print "setting up database"
        config_global.cassandra_default_keyspace = 'unitTest'

        sK = Setup_Keyspaces()
        dM = dimensionListsMigration()

        try:
            self.dbconn = cql.connect(config_local.cassandra_host, config_local.cassandra_port)
            self.cursor = self.dbconn.cursor()

        except:
            print "not able to create a database connection"

        self.cursor.execute("USE " + config_global.cassandra_default_keyspace)


        print "setting up database done"



    def save_test_data(self, timestamp_start = 4000, time_stamp_range = 5, id_range = 2):
        """ save some sample data """
        for i in xrange(time_stamp_range):
            curr_timestamp = timestamp_start + i
            for user_id in xrange(id_range):
                self.dL.save(user_id, curr_timestamp)



    def testSaveDimensionListRangeBySeconds(self):
        """ test if the stream of information is written as wished
        """
        dimension = 'user_ids' # save user ids
        self.dL = DimensionListModel(dimension, 'cassandra') # create the model

        timestamp_start = 4000
        time_stamp_range = 50
        id_range = 98

        self.save_test_data(timestamp_start, time_stamp_range, id_range)

        id_stats = self.dL.getByTime(timestamp_start + 1, timestamp_start + 2)
        self.assertEqual(id_stats[u'user_ids_by_seconds_4001'][0], 0, "the entries are not equal")
        self.assertEqual(len(id_stats[u'user_ids_by_seconds_4001']), id_range, "the list has the wrong length")

        desired_length = 5
        id_stats = self.dL.getByTime(timestamp_start, timestamp_start + desired_length)

        self.assertEqual(len(id_stats), desired_length, "the requested list has not {} entries".format(desired_length))





    def testGetDimensionListRangeByMinutes(self):
        """ test if the stream of information is written as wished
        """
        dimension = 'user_ids'
        self.dL = DimensionListModel(dimension, 'cassandra')

        self.dL.save(dimension_id=1, timestamp=59)
        self.dL.save(dimension_id=2, timestamp=59)
        self.dL.save(dimension_id=3, timestamp=59)
        self.dL.save(dimension_id=1, timestamp=61)
        self.dL.save(dimension_id=4, timestamp=3000)

        id_stats = self.dL.getByTime(0, 1, binSize='minutes')
        self.assertEquals(3, len(id_stats[u'user_ids_by_minutes_0']), "wrong length")

        id_stats = self.dL.getByTime(1, 2, binSize='minutes')
        self.assertEquals(1, len(id_stats[u'user_ids_by_minutes_1']), "wrong length")

        id_stats = self.dL.getByTime(0, 2, binSize='minutes')

        self.assertEquals(1, len(id_stats[u'user_ids_by_minutes_1']), "wrong length")
        self.assertEquals(3, len(id_stats[u'user_ids_by_minutes_0']), "wrong length")


    def testGetDimensionListRangeByHours(self):
        """ test if the stream of information is written as wished
        """
        dimension = 'user_ids'
        self.dL = DimensionListModel(dimension, 'cassandra')

        self.dL.save(dimension_id=1, timestamp=59)
        self.dL.save(dimension_id=2, timestamp=59)
        self.dL.save(dimension_id=3, timestamp=59)
        self.dL.save(dimension_id=1, timestamp=61)
        self.dL.save(dimension_id=4, timestamp=3000)


        #### hours ####
        id_stats = self.dL.getByTime(0, 1, binSize='hours')
        self.assertEquals(4, len(id_stats[u'user_ids_by_hours_0']), "wrong length")

        self.dL.save(dimension_id=4, timestamp=4000)
        id_stats = self.dL.getByTime(1, 2, binSize='hours', renew=True)
        #print mylist
        self.assertEquals(1, len(id_stats[u'user_ids_by_hours_1']), "wrong length")

        self.dL.save(dimension_id=3, timestamp=4000)
        id_stats = self.dL.getByTime(1, 2, binSize='hours', renew=True)

        self.assertEquals(2, len(id_stats[u'user_ids_by_hours_1']), "wrong length")


    def testGetDimensionListRangeByDay(self):
        """ test if the stream of information is written as wished
        """
        dimension = 'user_ids'
        self.dL = DimensionListModel(dimension, 'cassandra')


        #self.save_test_data(timestamp_start, time_stamp_range, id_range)
        self.dL.save(dimension_id=1, timestamp=59)
        self.dL.save(dimension_id=2, timestamp=59)
        self.dL.save(dimension_id=3, timestamp=59)
        self.dL.save(dimension_id=1, timestamp=61)

        #self.dL.save(dimension_id=1, timestamp=3000)
        #self.dL.save(dimension_id=2, timestamp=3000)
        #self.dL.save(dimension_id=3, timestamp=3000)
        self.dL.save(dimension_id=4, timestamp=3000)


        ####### days
        #id_stats = self.dL.getByTime(0, 2, binSize = 'days')
        #print id_stats
        #self.assertEquals(4, len(id_stats[u'user_ids_by_days_0']), "wrong length")

        #print id_stats
        #self.assertEquals(1, len(id_stats), "wrong length")



        # save second based data for hour one
        timestamp_start = 86401
        timestamp_start = 90000
        time_stamp_range = 5
        id_range = 10
        self.save_test_data(timestamp_start, time_stamp_range, id_range)

        id_stats = self.dL.getByTime(0, 2, binSize = 'days')
        self.assertEquals(2, len(id_stats), "wrong length")
        # self.assertEquals(id_range, len(id_stats[u'user_ids_by_days_1']), "wrong length")

        id_stats = self.dL.getByTime(1, 2, binSize = 'days')
        self.assertEquals(1, len(id_stats), "wrong length")
        print id_stats
        #self.assertEquals(id_range, len(id_stats[u'user_ids_by_days_1']), "wrong length")




    def testSetComputedIds(self):
        dimension = 'user_ids'
        dL = DimensionListModel(dimension, mode = 'cassandra')
        rangeStart = 0
        rangeEnd = 10
        binSize = 'minutes'
        dL.setComputedIds(dimension, rangeStart, rangeEnd, binSize)

        r = dL.getComputedIds(dimension, rangeStart, rangeEnd, binSize)
        print r
        self.assertEqual(rangeEnd, len(r) )


        rangeEnd = 5
        binSize = 'hours'
        r = dL.getComputedIds(dimension, rangeStart, rangeEnd, binSize)
        """ nothing is yet computed for hours """
        self.assertEqual(0, len(r) )

        dL.setComputedIds(dimension, rangeStart, rangeEnd, binSize)
        r = dL.getComputedIds(dimension, rangeStart, rangeEnd, binSize)
        print r
        self.assertEqual(rangeEnd, len(r) )




    def test_Binify_Minutes(self):
        """ this function has a stupid name, but will get data from one dimensionList an will aggregate it
        """
        self.dL = DimensionListModel('user_ids', mode='cassandra')
        binSize = 'minutes'

        start_seconds = 58
        end_seconds = 61
        for i in xrange(start_seconds, end_seconds):
            for user_id in xrange(int(uniform(1, 6))):
                curr_timestamp = i
                self.dL.save(user_id, curr_timestamp)

        list = self.dL.getByTime(45, 75, 'seconds')
        #self.dL.getByTime(timestampStart, timestampEnd, binSize)

        #print list
        """ save the data """
        binified = self.dL.binify(binSize, 0, 1)
        print binified
        self.assertIn(u'user_ids_by_seconds_58', binified)
        self.assertIn(u'user_ids_by_seconds_59', binified)
        self.assertNotIn(u'user_ids_by_seconds_60', binified)

        binified = self.dL.binify(binSize, 1, 2)
        self.assertNotIn(u'user_ids_by_seconds_58', binified)
        self.assertNotIn(u'user_ids_by_seconds_59', binified)
        self.assertIn(u'user_ids_by_seconds_60', binified)
        print binified

    def test_Binify_Hours(self):
        """ this function has a stupid name, but will get data from one dimensionList an will aggregate it
        """
        self.dL = DimensionListModel('user_ids', mode='cassandra')
        binSize = 'hours'

        start_seconds = 58
        end_seconds = 61
        for i in xrange(start_seconds, end_seconds):
            for user_id in xrange(int(uniform(1, 6))):
                curr_timestamp = i
                self.dL.save(user_id, curr_timestamp)

        start_seconds = 3600
        end_seconds = start_seconds + 1
        for i in xrange(start_seconds, end_seconds):
            for user_id in xrange(int(uniform(1, 6))):
                curr_timestamp = i
                self.dL.save(user_id, curr_timestamp)


        binified = self.dL.binify(binSize, 1, 2)
        self.assertIn(u'user_ids_by_seconds_3600', binified)


        binified = self.dL.binify(binSize, 0, 1)
        print binified
        self.assertIn(u'user_ids_by_seconds_58', binified)
        self.assertIn(u'user_ids_by_seconds_59', binified)
        self.assertIn(u'user_ids_by_seconds_60', binified)
        self.assertNotIn(u'user_ids_by_seconds_3600', binified)



    def test_Binify_Days(self):
        """ this function has a stupid name, but will get data from one dimensionList an will aggregate it
        """
        self.dL = DimensionListModel('user_ids', mode='cassandra')
        binSize = 'days'

        start_seconds = 58
        end_seconds = 61
        for i in xrange(start_seconds, end_seconds):
            for user_id in xrange(int(uniform(1, 6))):
                curr_timestamp = i
                self.dL.save(user_id, curr_timestamp)

        start_seconds = 86400
        end_seconds = start_seconds + 1
        for i in xrange(start_seconds, end_seconds):
            for user_id in xrange(int(uniform(1, 6))):
                curr_timestamp = i
                self.dL.save(user_id, curr_timestamp)

        """ save the data """
        binified = self.dL.binify(binSize, 0, 1)
        print binified
        self.assertIn(u'user_ids_by_seconds_58', binified)
        self.assertIn(u'user_ids_by_seconds_59', binified)
        self.assertIn(u'user_ids_by_seconds_60', binified)
        self.assertNotIn(u'user_ids_by_seconds_86400', binified)

        binified = self.dL.binify(binSize, 1, 2)
        self.assertIn(u'user_ids_by_seconds_86400', binified)


        binified = self.dL.binify(binSize, 0, 2)
        print binified
        self.assertIn(u'user_ids_by_seconds_58', binified)
        self.assertIn(u'user_ids_by_seconds_59', binified)
        self.assertIn(u'user_ids_by_seconds_60', binified)
        self.assertIn(u'user_ids_by_seconds_86400', binified)


    def _test_Binify_Performance(self):
        """ this function has a stupid name, but will get data from one dimensionList an will aggregate it
        """
        self.dL = DimensionListModel('user_ids', mode='cassandra')
        binSize = 'days'

        start_seconds = 58
        end_seconds = 90000
        for i in xrange(start_seconds, end_seconds):
            for user_id in xrange(int(uniform(1, 6))):
                curr_timestamp = i
                self.dL.save(user_id, curr_timestamp)


        """ save the data """
        binified = self.dL.binify(binSize, 0, 1)
        print binified
        self.assertIn(u'user_ids_by_seconds_58', binified)
        self.assertIn(u'user_ids_by_seconds_59', binified)
        self.assertIn(u'user_ids_by_seconds_60', binified)
        self.assertNotIn(u'user_ids_by_seconds_86400', binified)

        binified = self.dL.binify(binSize, 1, 2)
        self.assertIn(u'user_ids_by_seconds_86400', binified)




if __name__ == "__main__":
    #import sys;sys.argv = ['', 'Test.testName']
    unittest.main()
Tech Fingerprint

Alerts (3)

'def' Ensure functions have docstrings for documentation
19 174
'except:' Avoid catching all exceptions; specify exception types to catch only expected errors
30