/miscellaneous/old_code/encodingtest.py

https://github.com/wehriam/awspider
Python | 169 lines | 106 code | 30 blank | 33 comment | 10 complexity | 3feeedb6aeace595869adb4849ac8aff MD5 | raw file
  1. from twisted.internet.defer import Deferred, DeferredList
  2. from twisted.trial import unittest
  3. from awspider import AWSpider
  4. from awspider.aws import AmazonS3, AmazonSDB
  5. import yaml
  6. import hashlib
  7. import os
  8. import glob
  9. import urllib
  10. import simplejson
  11. import chardet
  12. import difflib
  13. def load_data( filename ):
  14. s = open( filename ).read()
  15. try:
  16. encoding = chardet.detect( s )['encoding']
  17. s = unicode(s, encoding )
  18. return s
  19. except:
  20. pass
  21. try:
  22. s = unicode(s)
  23. return s
  24. except:
  25. pass
  26. return u"Doh"
  27. class EncodingTestCase(unittest.TestCase):
  28. def setUp(self):
  29. self.filenames = glob.glob( os.path.join(os.path.dirname(__file__), 'data', '**', '*.xml') )
  30. config_path = os.path.abspath( os.path.join( os.path.dirname(__file__), "config.yaml" ) )
  31. if not os.path.isfile( config_path ):
  32. self.raiseConfigException( config_path )
  33. config = yaml.load( open( config_path, 'r').read() )
  34. if not "aws_access_key_id" in config or "aws_secret_access_key" not in config:
  35. self.raiseConfigException( config_path )
  36. self.uuid = hashlib.sha256( config["aws_access_key_id"] + config["aws_secret_access_key"] + self.__class__.__name__ ).hexdigest()
  37. self.aws_access_key_id = config["aws_access_key_id"]
  38. self.aws_secret_access_key = config["aws_secret_access_key"]
  39. self.aws_s3_cache_bucket = "%s_cache" % self.uuid
  40. self.aws_s3_storage_bucket = "%s_storage" % self.uuid
  41. self.aws_sdb_reservation_domain = "%s_reservation" % self.uuid
  42. #self.aws_sdb_coordination_domain = "%s_coordination" % self.uuid
  43. self.spider = AWSpider(
  44. aws_access_key_id = self.aws_access_key_id,
  45. aws_secret_access_key = self.aws_secret_access_key,
  46. aws_s3_cache_bucket = self.aws_s3_cache_bucket,
  47. aws_s3_storage_bucket = self.aws_s3_storage_bucket,
  48. aws_sdb_reservation_domain = self.aws_sdb_reservation_domain,
  49. #aws_sdb_coordination_domain = self.aws_sdb_coordination_domain,
  50. port = 5000,
  51. log_level="debug" )
  52. self.s3 = AmazonS3( config["aws_access_key_id"], config["aws_secret_access_key"])
  53. self.spider.expose( load_data )
  54. self.spider.expose( load_data, interval=60*60*24, name="load_data_stored" )
  55. return self.spider.start()
  56. def tearDown(self):
  57. deferreds = []
  58. deferreds.append(self.spider.shutdown())
  59. d = DeferredList(deferreds)
  60. d.addCallback(self._tearDownCallback)
  61. return d
  62. def _tearDownCallback(self, data):
  63. self.s3 = AmazonS3(self.aws_access_key_id, self.aws_secret_access_key)
  64. self.sdb = AmazonSDB(self.aws_access_key_id, self.aws_secret_access_key)
  65. deferreds = []
  66. deferreds.append(self.spider.pg.clearCache())
  67. deferreds.append(self.spider.clearStorage())
  68. deferreds.append(self.sdb.deleteDomain(self.aws_sdb_reservation_domain))
  69. #deferreds.append(self.sdb.deleteDomain(self.aws_sdb_coordination_domain))
  70. d = DeferredList(deferreds)
  71. d.addCallback( self._tearDownCallback2 )
  72. return d
  73. def _tearDownCallback2( self, data ):
  74. deferreds = []
  75. deferreds.append(self.s3.deleteBucket(self.aws_s3_cache_bucket))
  76. deferreds.append(self.s3.deleteBucket(self.aws_s3_storage_bucket))
  77. d = DeferredList(deferreds)
  78. return d
  79. # def testImmediateReturn(self):
  80. # deferreds = []
  81. # for filename in self.filenames[0:5]:
  82. # d = self.spider.rq.getPage("http://127.0.0.1:5000/function/load_data", method="POST", postdata={"filename":filename})
  83. # d.addCallback( self._processRequestCallback, filename )
  84. # deferreds.append(d)
  85. # d = DeferredList(deferreds, consumeErrors=True )
  86. # d.addCallback(self._testImmediateReturnCallback)
  87. # d.addErrback( self._testImmediateReturnErrback )
  88. # return d
  89. #
  90. # def _processRequestCallback(self, data, filename):
  91. # processed_data = simplejson.loads( data["response"] )
  92. # original_data = load_data( filename )
  93. # if processed_data == original_data:
  94. # #print "%s passed comparison test." % filename
  95. # return True
  96. # print "%s failed comparison test." % filename
  97. # return False
  98. #
  99. # def _testImmediateReturnCallback(self, data):
  100. # for row in data:
  101. # if row[0] == False:
  102. # raise row[1]
  103. # else:
  104. # self.failUnlessEqual( row[1], True )
  105. #
  106. # def _testImmediateReturnErrback(self, error):
  107. # return error
  108. def testStoredReturn(self):
  109. deferreds = []
  110. for filename in self.filenames:
  111. d = self.spider.rq.getPage("http://127.0.0.1:5000/function/load_data_stored", method="POST", postdata={"filename":filename})
  112. d.addCallback( self._processStoredRequestCallback, filename )
  113. deferreds.append(d)
  114. d = DeferredList(deferreds, consumeErrors=True )
  115. d.addCallback(self._testStoredReturnCallback)
  116. d.addErrback( self._testStoredReturnErrback )
  117. return d
  118. def _processStoredRequestCallback(self, data, filename):
  119. processed_data = simplejson.loads( data["response"] )
  120. uuid = processed_data.keys()[0]
  121. d = self.spider.rq.getPage("http://127.0.0.1:5000/data/get?uuid=%s" % uuid)
  122. d.addCallback( self._processStoredRequestCallback2, filename )
  123. return d
  124. def _processStoredRequestCallback2(self, data, filename):
  125. processed_data = simplejson.loads( data["response"] )
  126. original_data = load_data( filename )
  127. if processed_data == original_data:
  128. #print "%s passed comparison test." % filename
  129. return True
  130. print "%s failed comparison test." % filename
  131. return False
  132. def _testStoredReturnCallback(self, data):
  133. for row in data:
  134. if row[0] == False:
  135. raise row[1]
  136. else:
  137. self.failUnlessEqual( row[1], True )
  138. def _testStoredReturnErrback(self, error):
  139. return error