PageRenderTime 50ms CodeModel.GetById 11ms app.highlight 34ms RepoModel.GetById 1ms app.codeStats 1ms

/boto-2.5.2/tests/integration/s3/test_resumable_downloads.py

#
Python | 415 lines | 310 code | 19 blank | 86 comment | 6 complexity | 93f18db2b734d12d797a718dc8d21461 MD5 | raw file
  1#!/usr/bin/env python
  2#
  3# Copyright 2010 Google Inc.
  4#
  5# Permission is hereby granted, free of charge, to any person obtaining a
  6# copy of this software and associated documentation files (the
  7# "Software"), to deal in the Software without restriction, including
  8# without limitation the rights to use, copy, modify, merge, publish, dis-
  9# tribute, sublicense, and/or sell copies of the Software, and to permit
 10# persons to whom the Software is furnished to do so, subject to the fol-
 11# lowing conditions:
 12#
 13# The above copyright notice and this permission notice shall be included
 14# in all copies or substantial portions of the Software.
 15#
 16# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 17# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
 18# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
 19# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 20# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 21# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 22# IN THE SOFTWARE.
 23
 24"""
 25Tests of resumable downloads.
 26"""
 27
 28import errno
 29import getopt
 30import os
 31import random
 32import re
 33import shutil
 34import socket
 35import StringIO
 36import sys
 37import tempfile
 38import time
 39import unittest
 40
 41import boto
 42from boto import storage_uri
 43from boto.s3.resumable_download_handler import get_cur_file_size
 44from boto.s3.resumable_download_handler import ResumableDownloadHandler
 45from boto.exception import ResumableTransferDisposition
 46from boto.exception import ResumableDownloadException
 47from boto.exception import StorageResponseError
 48from cb_test_harnass import CallbackTestHarnass
 49
 50# We don't use the OAuth2 authentication plugin directly; importing it here
 51# ensures that it's loaded and available by default.
 52try:
 53  from oauth2_plugin import oauth2_plugin
 54except ImportError:
 55  # Do nothing - if user doesn't have OAuth2 configured it doesn't matter;
 56  # and if they do, the tests will fail (as they should in that case).
 57  pass
 58
 59
 60class ResumableDownloadTests(unittest.TestCase):
 61    """
 62    Resumable download test suite.
 63    """
 64    gs = True
 65
 66    def get_suite_description(self):
 67        return 'Resumable download test suite'
 68
 69    @staticmethod
 70    def resilient_close(key):
 71        try:
 72            key.close()
 73        except StorageResponseError, e:
 74            pass
 75
 76    def build_input_object(self, obj_name, size):
 77        buf = []
 78        for i in range(size):
 79            buf.append(str(random.randint(0, 9)))
 80        string_data = ''.join(buf)
 81        uri = self.src_bucket_uri.clone_replace_name(obj_name)
 82        key = uri.new_key(validate=False)
 83        key.set_contents_from_file(StringIO.StringIO(string_data))
 84        return (string_data, key)
 85
 86    def setUp(self):
 87        """
 88        Initializes for each test.
 89        """
 90        # Create the test bucket.
 91        hostname = socket.gethostname().split('.')[0]
 92        uri_base_str = 'gs://res-download-test-%s-%s-%s' % (
 93            hostname, os.getpid(), int(time.time()))
 94        self.src_bucket_uri = storage_uri('%s-dst' % uri_base_str)
 95        self.src_bucket_uri.create_bucket()
 96
 97        # Create test source objects.
 98        self.empty_src_key_size = 0
 99        (self.empty_src_key_as_string, self.empty_src_key) = (
100            self.build_input_object('empty', self.empty_src_key_size))
101        self.small_src_key_size = 2 * 1024  # 2 KB.
102        (self.small_src_key_as_string, self.small_src_key) = (
103            self.build_input_object('small', self.small_src_key_size))
104        self.larger_src_key_size = 500 * 1024  # 500 KB.
105        (self.larger_src_key_as_string, self.larger_src_key) = (
106            self.build_input_object('larger', self.larger_src_key_size))
107
108        # Use a designated tmpdir prefix to make it easy to find the end of
109        # the tmp path.
110        self.tmpdir_prefix = 'tmp_resumable_download_test'
111
112        # Create temp dir and name for download file.
113        self.tmp_dir = tempfile.mkdtemp(prefix=self.tmpdir_prefix)
114        self.dst_file_name = '%s%sdst_file' % (self.tmp_dir, os.sep)
115
116        self.tracker_file_name = '%s%stracker' % (self.tmp_dir, os.sep)
117
118        # Create file-like object for detination of each download test.
119        self.dst_fp = open(self.dst_file_name, 'w')
120        self.created_test_data = True
121
122    def tearDown(self):
123        """
124        Deletes test objects and bucket and tmp dir created by set_up_class,
125        and closes any keys in case they were read incompletely (which would
126        leave partial buffers of data for subsequent tests to trip over).
127        """
128        if not hasattr(self, 'created_test_data'):
129            return
130        # Recursively delete dst dir and then re-create it, so in effect we
131        # remove all dirs and files under that directory.
132        shutil.rmtree(self.tmp_dir)
133        os.mkdir(self.tmp_dir)
134
135        # Close test objects.
136        self.resilient_close(self.empty_src_key)
137        self.resilient_close(self.small_src_key)
138        self.resilient_close(self.larger_src_key)
139
140        # Delete test objects.
141        self.empty_src_key.delete()
142        self.small_src_key.delete()
143        self.larger_src_key.delete()
144
145        # Retry (for up to 2 minutes) the bucket gets deleted (it may not
146        # the first time round, due to eventual consistency of bucket delete
147        # operations).
148        for i in range(60):
149            try:
150                self.src_bucket_uri.delete_bucket()
151                break
152            except StorageResponseError:
153                print 'Test bucket (%s) not yet deleted, still trying' % (
154                    self.src_bucket_uri.uri)
155                time.sleep(2)
156        shutil.rmtree(self.tmp_dir)
157        self.tmp_dir = tempfile.mkdtemp(prefix=self.tmpdir_prefix)
158
159    def test_non_resumable_download(self):
160        """
161        Tests that non-resumable downloads work
162        """
163        self.small_src_key.get_contents_to_file(self.dst_fp)
164        self.assertEqual(self.small_src_key_size,
165                         get_cur_file_size(self.dst_fp))
166        self.assertEqual(self.small_src_key_as_string,
167                         self.small_src_key.get_contents_as_string())
168
169    def test_download_without_persistent_tracker(self):
170        """
171        Tests a single resumable download, with no tracker persistence
172        """
173        res_download_handler = ResumableDownloadHandler()
174        self.small_src_key.get_contents_to_file(
175            self.dst_fp, res_download_handler=res_download_handler)
176        self.assertEqual(self.small_src_key_size,
177                         get_cur_file_size(self.dst_fp))
178        self.assertEqual(self.small_src_key_as_string,
179                         self.small_src_key.get_contents_as_string())
180
181    def test_failed_download_with_persistent_tracker(self):
182        """
183        Tests that failed resumable download leaves a correct tracker file
184        """
185        harnass = CallbackTestHarnass()
186        res_download_handler = ResumableDownloadHandler(
187            tracker_file_name=self.tracker_file_name, num_retries=0)
188        try:
189            self.small_src_key.get_contents_to_file(
190                self.dst_fp, cb=harnass.call,
191                res_download_handler=res_download_handler)
192            self.fail('Did not get expected ResumableDownloadException')
193        except ResumableDownloadException, e:
194            # We'll get a ResumableDownloadException at this point because
195            # of CallbackTestHarnass (above). Check that the tracker file was
196            # created correctly.
197            self.assertEqual(e.disposition,
198                             ResumableTransferDisposition.ABORT_CUR_PROCESS)
199            self.assertTrue(os.path.exists(self.tracker_file_name))
200            f = open(self.tracker_file_name)
201            etag_line = f.readline()
202            m = re.search(ResumableDownloadHandler.ETAG_REGEX, etag_line)
203            f.close()
204            self.assertTrue(m)
205
206    def test_retryable_exception_recovery(self):
207        """
208        Tests handling of a retryable exception
209        """
210        # Test one of the RETRYABLE_EXCEPTIONS.
211        exception = ResumableDownloadHandler.RETRYABLE_EXCEPTIONS[0]
212        harnass = CallbackTestHarnass(exception=exception)
213        res_download_handler = ResumableDownloadHandler(num_retries=1)
214        self.small_src_key.get_contents_to_file(
215            self.dst_fp, cb=harnass.call,
216            res_download_handler=res_download_handler)
217        # Ensure downloaded object has correct content.
218        self.assertEqual(self.small_src_key_size,
219                         get_cur_file_size(self.dst_fp))
220        self.assertEqual(self.small_src_key_as_string,
221                         self.small_src_key.get_contents_as_string())
222
223    def test_broken_pipe_recovery(self):
224        """
225        Tests handling of a Broken Pipe (which interacts with an httplib bug)
226        """
227        exception = IOError(errno.EPIPE, "Broken pipe")
228        harnass = CallbackTestHarnass(exception=exception)
229        res_download_handler = ResumableDownloadHandler(num_retries=1)
230        self.small_src_key.get_contents_to_file(
231            self.dst_fp, cb=harnass.call,
232            res_download_handler=res_download_handler)
233        # Ensure downloaded object has correct content.
234        self.assertEqual(self.small_src_key_size,
235                         get_cur_file_size(self.dst_fp))
236        self.assertEqual(self.small_src_key_as_string,
237                         self.small_src_key.get_contents_as_string())
238
239    def test_non_retryable_exception_handling(self):
240        """
241        Tests resumable download that fails with a non-retryable exception
242        """
243        harnass = CallbackTestHarnass(
244            exception=OSError(errno.EACCES, 'Permission denied'))
245        res_download_handler = ResumableDownloadHandler(num_retries=1)
246        try:
247            self.small_src_key.get_contents_to_file(
248                self.dst_fp, cb=harnass.call,
249                res_download_handler=res_download_handler)
250            self.fail('Did not get expected OSError')
251        except OSError, e:
252            # Ensure the error was re-raised.
253            self.assertEqual(e.errno, 13)
254
255    def test_failed_and_restarted_download_with_persistent_tracker(self):
256        """
257        Tests resumable download that fails once and then completes,
258        with tracker file
259        """
260        harnass = CallbackTestHarnass()
261        res_download_handler = ResumableDownloadHandler(
262            tracker_file_name=self.tracker_file_name, num_retries=1)
263        self.small_src_key.get_contents_to_file(
264            self.dst_fp, cb=harnass.call,
265            res_download_handler=res_download_handler)
266        # Ensure downloaded object has correct content.
267        self.assertEqual(self.small_src_key_size,
268                         get_cur_file_size(self.dst_fp))
269        self.assertEqual(self.small_src_key_as_string,
270                         self.small_src_key.get_contents_as_string())
271        # Ensure tracker file deleted.
272        self.assertFalse(os.path.exists(self.tracker_file_name))
273
274    def test_multiple_in_process_failures_then_succeed(self):
275        """
276        Tests resumable download that fails twice in one process, then completes
277        """
278        res_download_handler = ResumableDownloadHandler(num_retries=3)
279        self.small_src_key.get_contents_to_file(
280            self.dst_fp, res_download_handler=res_download_handler)
281        # Ensure downloaded object has correct content.
282        self.assertEqual(self.small_src_key_size,
283                         get_cur_file_size(self.dst_fp))
284        self.assertEqual(self.small_src_key_as_string,
285                         self.small_src_key.get_contents_as_string())
286
287    def test_multiple_in_process_failures_then_succeed_with_tracker_file(self):
288        """
289        Tests resumable download that fails completely in one process,
290        then when restarted completes, using a tracker file
291        """
292        # Set up test harnass that causes more failures than a single
293        # ResumableDownloadHandler instance will handle, writing enough data
294        # before the first failure that some of it survives that process run.
295        harnass = CallbackTestHarnass(
296            fail_after_n_bytes=self.larger_src_key_size/2, num_times_to_fail=2)
297        res_download_handler = ResumableDownloadHandler(
298            tracker_file_name=self.tracker_file_name, num_retries=0)
299        try:
300            self.larger_src_key.get_contents_to_file(
301                self.dst_fp, cb=harnass.call,
302                res_download_handler=res_download_handler)
303            self.fail('Did not get expected ResumableDownloadException')
304        except ResumableDownloadException, e:
305            self.assertEqual(e.disposition,
306                             ResumableTransferDisposition.ABORT_CUR_PROCESS)
307            # Ensure a tracker file survived.
308            self.assertTrue(os.path.exists(self.tracker_file_name))
309        # Try it one more time; this time should succeed.
310        self.larger_src_key.get_contents_to_file(
311            self.dst_fp, cb=harnass.call,
312            res_download_handler=res_download_handler)
313        self.assertEqual(self.larger_src_key_size,
314                         get_cur_file_size(self.dst_fp))
315        self.assertEqual(self.larger_src_key_as_string,
316                         self.larger_src_key.get_contents_as_string())
317        self.assertFalse(os.path.exists(self.tracker_file_name))
318        # Ensure some of the file was downloaded both before and after failure.
319        self.assertTrue(
320            len(harnass.transferred_seq_before_first_failure) > 1 and
321            len(harnass.transferred_seq_after_first_failure) > 1)
322
323    def test_download_with_inital_partial_download_before_failure(self):
324        """
325        Tests resumable download that successfully downloads some content
326        before it fails, then restarts and completes
327        """
328        # Set up harnass to fail download after several hundred KB so download
329        # server will have saved something before we retry.
330        harnass = CallbackTestHarnass(
331            fail_after_n_bytes=self.larger_src_key_size/2)
332        res_download_handler = ResumableDownloadHandler(num_retries=1)
333        self.larger_src_key.get_contents_to_file(
334            self.dst_fp, cb=harnass.call,
335            res_download_handler=res_download_handler)
336        # Ensure downloaded object has correct content.
337        self.assertEqual(self.larger_src_key_size,
338                         get_cur_file_size(self.dst_fp))
339        self.assertEqual(self.larger_src_key_as_string,
340                         self.larger_src_key.get_contents_as_string())
341        # Ensure some of the file was downloaded both before and after failure.
342        self.assertTrue(
343            len(harnass.transferred_seq_before_first_failure) > 1 and
344            len(harnass.transferred_seq_after_first_failure) > 1)
345
346    def test_zero_length_object_download(self):
347        """
348        Tests downloading a zero-length object (exercises boundary conditions).
349        """
350        res_download_handler = ResumableDownloadHandler()
351        self.empty_src_key.get_contents_to_file(
352            self.dst_fp, res_download_handler=res_download_handler)
353        self.assertEqual(0, get_cur_file_size(self.dst_fp))
354
355    def test_download_with_invalid_tracker_etag(self):
356        """
357        Tests resumable download with a tracker file containing an invalid etag
358        """
359        invalid_etag_tracker_file_name = (
360            '%s%sinvalid_etag_tracker' % (self.tmp_dir, os.sep))
361        f = open(invalid_etag_tracker_file_name, 'w')
362        f.write('3.14159\n')
363        f.close()
364        res_download_handler = ResumableDownloadHandler(
365            tracker_file_name=invalid_etag_tracker_file_name)
366        # An error should be printed about the invalid tracker, but then it
367        # should run the update successfully.
368        self.small_src_key.get_contents_to_file(
369            self.dst_fp, res_download_handler=res_download_handler)
370        self.assertEqual(self.small_src_key_size,
371                         get_cur_file_size(self.dst_fp))
372        self.assertEqual(self.small_src_key_as_string,
373                         self.small_src_key.get_contents_as_string())
374
375    def test_download_with_inconsistent_etag_in_tracker(self):
376        """
377        Tests resumable download with an inconsistent etag in tracker file
378        """
379        inconsistent_etag_tracker_file_name = (
380            '%s%sinconsistent_etag_tracker' % (self.tmp_dir, os.sep))
381        f = open(inconsistent_etag_tracker_file_name, 'w')
382        good_etag = self.small_src_key.etag.strip('"\'')
383        new_val_as_list = []
384        for c in reversed(good_etag):
385            new_val_as_list.append(c)
386        f.write('%s\n' % ''.join(new_val_as_list))
387        f.close()
388        res_download_handler = ResumableDownloadHandler(
389            tracker_file_name=inconsistent_etag_tracker_file_name)
390        # An error should be printed about the expired tracker, but then it
391        # should run the update successfully.
392        self.small_src_key.get_contents_to_file(
393            self.dst_fp, res_download_handler=res_download_handler)
394        self.assertEqual(self.small_src_key_size,
395                         get_cur_file_size(self.dst_fp))
396        self.assertEqual(self.small_src_key_as_string,
397                         self.small_src_key.get_contents_as_string())
398
399    def test_download_with_unwritable_tracker_file(self):
400        """
401        Tests resumable download with an unwritable tracker file
402        """
403        # Make dir where tracker_file lives temporarily unwritable.
404        save_mod = os.stat(self.tmp_dir).st_mode
405        try:
406            os.chmod(self.tmp_dir, 0)
407            res_download_handler = ResumableDownloadHandler(
408                tracker_file_name=self.tracker_file_name)
409        except ResumableDownloadException, e:
410            self.assertEqual(e.disposition, ResumableTransferDisposition.ABORT)
411            self.assertNotEqual(
412                e.message.find('Couldn\'t write URI tracker file'), -1)
413        finally:
414            # Restore original protection of dir where tracker_file lives.
415            os.chmod(self.tmp_dir, save_mod)