PageRenderTime 52ms CodeModel.GetById 22ms RepoModel.GetById 1ms app.codeStats 0ms

/boto-2.5.2/tests/integration/s3/test_resumable_downloads.py

#
Python | 415 lines | 310 code | 19 blank | 86 comment | 10 complexity | 93f18db2b734d12d797a718dc8d21461 MD5 | raw file
  1. #!/usr/bin/env python
  2. #
  3. # Copyright 2010 Google Inc.
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a
  6. # copy of this software and associated documentation files (the
  7. # "Software"), to deal in the Software without restriction, including
  8. # without limitation the rights to use, copy, modify, merge, publish, dis-
  9. # tribute, sublicense, and/or sell copies of the Software, and to permit
  10. # persons to whom the Software is furnished to do so, subject to the fol-
  11. # lowing conditions:
  12. #
  13. # The above copyright notice and this permission notice shall be included
  14. # in all copies or substantial portions of the Software.
  15. #
  16. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17. # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
  18. # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
  19. # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  20. # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  22. # IN THE SOFTWARE.
  23. """
  24. Tests of resumable downloads.
  25. """
  26. import errno
  27. import getopt
  28. import os
  29. import random
  30. import re
  31. import shutil
  32. import socket
  33. import StringIO
  34. import sys
  35. import tempfile
  36. import time
  37. import unittest
  38. import boto
  39. from boto import storage_uri
  40. from boto.s3.resumable_download_handler import get_cur_file_size
  41. from boto.s3.resumable_download_handler import ResumableDownloadHandler
  42. from boto.exception import ResumableTransferDisposition
  43. from boto.exception import ResumableDownloadException
  44. from boto.exception import StorageResponseError
  45. from cb_test_harnass import CallbackTestHarnass
  46. # We don't use the OAuth2 authentication plugin directly; importing it here
  47. # ensures that it's loaded and available by default.
  48. try:
  49. from oauth2_plugin import oauth2_plugin
  50. except ImportError:
  51. # Do nothing - if user doesn't have OAuth2 configured it doesn't matter;
  52. # and if they do, the tests will fail (as they should in that case).
  53. pass
  54. class ResumableDownloadTests(unittest.TestCase):
  55. """
  56. Resumable download test suite.
  57. """
  58. gs = True
  59. def get_suite_description(self):
  60. return 'Resumable download test suite'
  61. @staticmethod
  62. def resilient_close(key):
  63. try:
  64. key.close()
  65. except StorageResponseError, e:
  66. pass
  67. def build_input_object(self, obj_name, size):
  68. buf = []
  69. for i in range(size):
  70. buf.append(str(random.randint(0, 9)))
  71. string_data = ''.join(buf)
  72. uri = self.src_bucket_uri.clone_replace_name(obj_name)
  73. key = uri.new_key(validate=False)
  74. key.set_contents_from_file(StringIO.StringIO(string_data))
  75. return (string_data, key)
  76. def setUp(self):
  77. """
  78. Initializes for each test.
  79. """
  80. # Create the test bucket.
  81. hostname = socket.gethostname().split('.')[0]
  82. uri_base_str = 'gs://res-download-test-%s-%s-%s' % (
  83. hostname, os.getpid(), int(time.time()))
  84. self.src_bucket_uri = storage_uri('%s-dst' % uri_base_str)
  85. self.src_bucket_uri.create_bucket()
  86. # Create test source objects.
  87. self.empty_src_key_size = 0
  88. (self.empty_src_key_as_string, self.empty_src_key) = (
  89. self.build_input_object('empty', self.empty_src_key_size))
  90. self.small_src_key_size = 2 * 1024 # 2 KB.
  91. (self.small_src_key_as_string, self.small_src_key) = (
  92. self.build_input_object('small', self.small_src_key_size))
  93. self.larger_src_key_size = 500 * 1024 # 500 KB.
  94. (self.larger_src_key_as_string, self.larger_src_key) = (
  95. self.build_input_object('larger', self.larger_src_key_size))
  96. # Use a designated tmpdir prefix to make it easy to find the end of
  97. # the tmp path.
  98. self.tmpdir_prefix = 'tmp_resumable_download_test'
  99. # Create temp dir and name for download file.
  100. self.tmp_dir = tempfile.mkdtemp(prefix=self.tmpdir_prefix)
  101. self.dst_file_name = '%s%sdst_file' % (self.tmp_dir, os.sep)
  102. self.tracker_file_name = '%s%stracker' % (self.tmp_dir, os.sep)
  103. # Create file-like object for detination of each download test.
  104. self.dst_fp = open(self.dst_file_name, 'w')
  105. self.created_test_data = True
  106. def tearDown(self):
  107. """
  108. Deletes test objects and bucket and tmp dir created by set_up_class,
  109. and closes any keys in case they were read incompletely (which would
  110. leave partial buffers of data for subsequent tests to trip over).
  111. """
  112. if not hasattr(self, 'created_test_data'):
  113. return
  114. # Recursively delete dst dir and then re-create it, so in effect we
  115. # remove all dirs and files under that directory.
  116. shutil.rmtree(self.tmp_dir)
  117. os.mkdir(self.tmp_dir)
  118. # Close test objects.
  119. self.resilient_close(self.empty_src_key)
  120. self.resilient_close(self.small_src_key)
  121. self.resilient_close(self.larger_src_key)
  122. # Delete test objects.
  123. self.empty_src_key.delete()
  124. self.small_src_key.delete()
  125. self.larger_src_key.delete()
  126. # Retry (for up to 2 minutes) the bucket gets deleted (it may not
  127. # the first time round, due to eventual consistency of bucket delete
  128. # operations).
  129. for i in range(60):
  130. try:
  131. self.src_bucket_uri.delete_bucket()
  132. break
  133. except StorageResponseError:
  134. print 'Test bucket (%s) not yet deleted, still trying' % (
  135. self.src_bucket_uri.uri)
  136. time.sleep(2)
  137. shutil.rmtree(self.tmp_dir)
  138. self.tmp_dir = tempfile.mkdtemp(prefix=self.tmpdir_prefix)
  139. def test_non_resumable_download(self):
  140. """
  141. Tests that non-resumable downloads work
  142. """
  143. self.small_src_key.get_contents_to_file(self.dst_fp)
  144. self.assertEqual(self.small_src_key_size,
  145. get_cur_file_size(self.dst_fp))
  146. self.assertEqual(self.small_src_key_as_string,
  147. self.small_src_key.get_contents_as_string())
  148. def test_download_without_persistent_tracker(self):
  149. """
  150. Tests a single resumable download, with no tracker persistence
  151. """
  152. res_download_handler = ResumableDownloadHandler()
  153. self.small_src_key.get_contents_to_file(
  154. self.dst_fp, res_download_handler=res_download_handler)
  155. self.assertEqual(self.small_src_key_size,
  156. get_cur_file_size(self.dst_fp))
  157. self.assertEqual(self.small_src_key_as_string,
  158. self.small_src_key.get_contents_as_string())
  159. def test_failed_download_with_persistent_tracker(self):
  160. """
  161. Tests that failed resumable download leaves a correct tracker file
  162. """
  163. harnass = CallbackTestHarnass()
  164. res_download_handler = ResumableDownloadHandler(
  165. tracker_file_name=self.tracker_file_name, num_retries=0)
  166. try:
  167. self.small_src_key.get_contents_to_file(
  168. self.dst_fp, cb=harnass.call,
  169. res_download_handler=res_download_handler)
  170. self.fail('Did not get expected ResumableDownloadException')
  171. except ResumableDownloadException, e:
  172. # We'll get a ResumableDownloadException at this point because
  173. # of CallbackTestHarnass (above). Check that the tracker file was
  174. # created correctly.
  175. self.assertEqual(e.disposition,
  176. ResumableTransferDisposition.ABORT_CUR_PROCESS)
  177. self.assertTrue(os.path.exists(self.tracker_file_name))
  178. f = open(self.tracker_file_name)
  179. etag_line = f.readline()
  180. m = re.search(ResumableDownloadHandler.ETAG_REGEX, etag_line)
  181. f.close()
  182. self.assertTrue(m)
  183. def test_retryable_exception_recovery(self):
  184. """
  185. Tests handling of a retryable exception
  186. """
  187. # Test one of the RETRYABLE_EXCEPTIONS.
  188. exception = ResumableDownloadHandler.RETRYABLE_EXCEPTIONS[0]
  189. harnass = CallbackTestHarnass(exception=exception)
  190. res_download_handler = ResumableDownloadHandler(num_retries=1)
  191. self.small_src_key.get_contents_to_file(
  192. self.dst_fp, cb=harnass.call,
  193. res_download_handler=res_download_handler)
  194. # Ensure downloaded object has correct content.
  195. self.assertEqual(self.small_src_key_size,
  196. get_cur_file_size(self.dst_fp))
  197. self.assertEqual(self.small_src_key_as_string,
  198. self.small_src_key.get_contents_as_string())
  199. def test_broken_pipe_recovery(self):
  200. """
  201. Tests handling of a Broken Pipe (which interacts with an httplib bug)
  202. """
  203. exception = IOError(errno.EPIPE, "Broken pipe")
  204. harnass = CallbackTestHarnass(exception=exception)
  205. res_download_handler = ResumableDownloadHandler(num_retries=1)
  206. self.small_src_key.get_contents_to_file(
  207. self.dst_fp, cb=harnass.call,
  208. res_download_handler=res_download_handler)
  209. # Ensure downloaded object has correct content.
  210. self.assertEqual(self.small_src_key_size,
  211. get_cur_file_size(self.dst_fp))
  212. self.assertEqual(self.small_src_key_as_string,
  213. self.small_src_key.get_contents_as_string())
  214. def test_non_retryable_exception_handling(self):
  215. """
  216. Tests resumable download that fails with a non-retryable exception
  217. """
  218. harnass = CallbackTestHarnass(
  219. exception=OSError(errno.EACCES, 'Permission denied'))
  220. res_download_handler = ResumableDownloadHandler(num_retries=1)
  221. try:
  222. self.small_src_key.get_contents_to_file(
  223. self.dst_fp, cb=harnass.call,
  224. res_download_handler=res_download_handler)
  225. self.fail('Did not get expected OSError')
  226. except OSError, e:
  227. # Ensure the error was re-raised.
  228. self.assertEqual(e.errno, 13)
  229. def test_failed_and_restarted_download_with_persistent_tracker(self):
  230. """
  231. Tests resumable download that fails once and then completes,
  232. with tracker file
  233. """
  234. harnass = CallbackTestHarnass()
  235. res_download_handler = ResumableDownloadHandler(
  236. tracker_file_name=self.tracker_file_name, num_retries=1)
  237. self.small_src_key.get_contents_to_file(
  238. self.dst_fp, cb=harnass.call,
  239. res_download_handler=res_download_handler)
  240. # Ensure downloaded object has correct content.
  241. self.assertEqual(self.small_src_key_size,
  242. get_cur_file_size(self.dst_fp))
  243. self.assertEqual(self.small_src_key_as_string,
  244. self.small_src_key.get_contents_as_string())
  245. # Ensure tracker file deleted.
  246. self.assertFalse(os.path.exists(self.tracker_file_name))
  247. def test_multiple_in_process_failures_then_succeed(self):
  248. """
  249. Tests resumable download that fails twice in one process, then completes
  250. """
  251. res_download_handler = ResumableDownloadHandler(num_retries=3)
  252. self.small_src_key.get_contents_to_file(
  253. self.dst_fp, res_download_handler=res_download_handler)
  254. # Ensure downloaded object has correct content.
  255. self.assertEqual(self.small_src_key_size,
  256. get_cur_file_size(self.dst_fp))
  257. self.assertEqual(self.small_src_key_as_string,
  258. self.small_src_key.get_contents_as_string())
  259. def test_multiple_in_process_failures_then_succeed_with_tracker_file(self):
  260. """
  261. Tests resumable download that fails completely in one process,
  262. then when restarted completes, using a tracker file
  263. """
  264. # Set up test harnass that causes more failures than a single
  265. # ResumableDownloadHandler instance will handle, writing enough data
  266. # before the first failure that some of it survives that process run.
  267. harnass = CallbackTestHarnass(
  268. fail_after_n_bytes=self.larger_src_key_size/2, num_times_to_fail=2)
  269. res_download_handler = ResumableDownloadHandler(
  270. tracker_file_name=self.tracker_file_name, num_retries=0)
  271. try:
  272. self.larger_src_key.get_contents_to_file(
  273. self.dst_fp, cb=harnass.call,
  274. res_download_handler=res_download_handler)
  275. self.fail('Did not get expected ResumableDownloadException')
  276. except ResumableDownloadException, e:
  277. self.assertEqual(e.disposition,
  278. ResumableTransferDisposition.ABORT_CUR_PROCESS)
  279. # Ensure a tracker file survived.
  280. self.assertTrue(os.path.exists(self.tracker_file_name))
  281. # Try it one more time; this time should succeed.
  282. self.larger_src_key.get_contents_to_file(
  283. self.dst_fp, cb=harnass.call,
  284. res_download_handler=res_download_handler)
  285. self.assertEqual(self.larger_src_key_size,
  286. get_cur_file_size(self.dst_fp))
  287. self.assertEqual(self.larger_src_key_as_string,
  288. self.larger_src_key.get_contents_as_string())
  289. self.assertFalse(os.path.exists(self.tracker_file_name))
  290. # Ensure some of the file was downloaded both before and after failure.
  291. self.assertTrue(
  292. len(harnass.transferred_seq_before_first_failure) > 1 and
  293. len(harnass.transferred_seq_after_first_failure) > 1)
  294. def test_download_with_inital_partial_download_before_failure(self):
  295. """
  296. Tests resumable download that successfully downloads some content
  297. before it fails, then restarts and completes
  298. """
  299. # Set up harnass to fail download after several hundred KB so download
  300. # server will have saved something before we retry.
  301. harnass = CallbackTestHarnass(
  302. fail_after_n_bytes=self.larger_src_key_size/2)
  303. res_download_handler = ResumableDownloadHandler(num_retries=1)
  304. self.larger_src_key.get_contents_to_file(
  305. self.dst_fp, cb=harnass.call,
  306. res_download_handler=res_download_handler)
  307. # Ensure downloaded object has correct content.
  308. self.assertEqual(self.larger_src_key_size,
  309. get_cur_file_size(self.dst_fp))
  310. self.assertEqual(self.larger_src_key_as_string,
  311. self.larger_src_key.get_contents_as_string())
  312. # Ensure some of the file was downloaded both before and after failure.
  313. self.assertTrue(
  314. len(harnass.transferred_seq_before_first_failure) > 1 and
  315. len(harnass.transferred_seq_after_first_failure) > 1)
  316. def test_zero_length_object_download(self):
  317. """
  318. Tests downloading a zero-length object (exercises boundary conditions).
  319. """
  320. res_download_handler = ResumableDownloadHandler()
  321. self.empty_src_key.get_contents_to_file(
  322. self.dst_fp, res_download_handler=res_download_handler)
  323. self.assertEqual(0, get_cur_file_size(self.dst_fp))
  324. def test_download_with_invalid_tracker_etag(self):
  325. """
  326. Tests resumable download with a tracker file containing an invalid etag
  327. """
  328. invalid_etag_tracker_file_name = (
  329. '%s%sinvalid_etag_tracker' % (self.tmp_dir, os.sep))
  330. f = open(invalid_etag_tracker_file_name, 'w')
  331. f.write('3.14159\n')
  332. f.close()
  333. res_download_handler = ResumableDownloadHandler(
  334. tracker_file_name=invalid_etag_tracker_file_name)
  335. # An error should be printed about the invalid tracker, but then it
  336. # should run the update successfully.
  337. self.small_src_key.get_contents_to_file(
  338. self.dst_fp, res_download_handler=res_download_handler)
  339. self.assertEqual(self.small_src_key_size,
  340. get_cur_file_size(self.dst_fp))
  341. self.assertEqual(self.small_src_key_as_string,
  342. self.small_src_key.get_contents_as_string())
  343. def test_download_with_inconsistent_etag_in_tracker(self):
  344. """
  345. Tests resumable download with an inconsistent etag in tracker file
  346. """
  347. inconsistent_etag_tracker_file_name = (
  348. '%s%sinconsistent_etag_tracker' % (self.tmp_dir, os.sep))
  349. f = open(inconsistent_etag_tracker_file_name, 'w')
  350. good_etag = self.small_src_key.etag.strip('"\'')
  351. new_val_as_list = []
  352. for c in reversed(good_etag):
  353. new_val_as_list.append(c)
  354. f.write('%s\n' % ''.join(new_val_as_list))
  355. f.close()
  356. res_download_handler = ResumableDownloadHandler(
  357. tracker_file_name=inconsistent_etag_tracker_file_name)
  358. # An error should be printed about the expired tracker, but then it
  359. # should run the update successfully.
  360. self.small_src_key.get_contents_to_file(
  361. self.dst_fp, res_download_handler=res_download_handler)
  362. self.assertEqual(self.small_src_key_size,
  363. get_cur_file_size(self.dst_fp))
  364. self.assertEqual(self.small_src_key_as_string,
  365. self.small_src_key.get_contents_as_string())
  366. def test_download_with_unwritable_tracker_file(self):
  367. """
  368. Tests resumable download with an unwritable tracker file
  369. """
  370. # Make dir where tracker_file lives temporarily unwritable.
  371. save_mod = os.stat(self.tmp_dir).st_mode
  372. try:
  373. os.chmod(self.tmp_dir, 0)
  374. res_download_handler = ResumableDownloadHandler(
  375. tracker_file_name=self.tracker_file_name)
  376. except ResumableDownloadException, e:
  377. self.assertEqual(e.disposition, ResumableTransferDisposition.ABORT)
  378. self.assertNotEqual(
  379. e.message.find('Couldn\'t write URI tracker file'), -1)
  380. finally:
  381. # Restore original protection of dir where tracker_file lives.
  382. os.chmod(self.tmp_dir, save_mod)