PageRenderTime 1020ms CodeModel.GetById 172ms app.highlight 638ms RepoModel.GetById 197ms app.codeStats 0ms

/Lib/test/test_tarfile.py

http://unladen-swallow.googlecode.com/
Python | 1233 lines | 1209 code | 22 blank | 2 comment | 14 complexity | 384d3a9eb5a50528660b845851fe86fe MD5 | raw file
   1# -*- coding: iso-8859-15 -*-
   2
   3import sys
   4import os
   5import shutil
   6import tempfile
   7import StringIO
   8from hashlib import md5
   9import errno
  10
  11import unittest
  12import tarfile
  13
  14from test import test_support
  15
  16# Check for our compression modules.
  17try:
  18    import gzip
  19    gzip.GzipFile
  20except (ImportError, AttributeError):
  21    gzip = None
  22try:
  23    import bz2
  24except ImportError:
  25    bz2 = None
  26
  27def md5sum(data):
  28    return md5(data).hexdigest()
  29
  30def path(path):
  31    return test_support.findfile(path)
  32
  33TEMPDIR = os.path.join(tempfile.gettempdir(), "test_tarfile_tmp")
  34tarname = path("testtar.tar")
  35gzipname = os.path.join(TEMPDIR, "testtar.tar.gz")
  36bz2name = os.path.join(TEMPDIR, "testtar.tar.bz2")
  37tmpname = os.path.join(TEMPDIR, "tmp.tar")
  38
  39md5_regtype = "65f477c818ad9e15f7feab0c6d37742f"
  40md5_sparse = "a54fbc4ca4f4399a90e1b27164012fc6"
  41
  42
  43class ReadTest(unittest.TestCase):
  44
  45    tarname = tarname
  46    mode = "r:"
  47
  48    def setUp(self):
  49        self.tar = tarfile.open(self.tarname, mode=self.mode, encoding="iso8859-1")
  50
  51    def tearDown(self):
  52        self.tar.close()
  53
  54
  55class UstarReadTest(ReadTest):
  56
  57    def test_fileobj_regular_file(self):
  58        tarinfo = self.tar.getmember("ustar/regtype")
  59        fobj = self.tar.extractfile(tarinfo)
  60        data = fobj.read()
  61        self.assert_((len(data), md5sum(data)) == (tarinfo.size, md5_regtype),
  62                "regular file extraction failed")
  63
  64    def test_fileobj_readlines(self):
  65        self.tar.extract("ustar/regtype", TEMPDIR)
  66        tarinfo = self.tar.getmember("ustar/regtype")
  67        fobj1 = open(os.path.join(TEMPDIR, "ustar/regtype"), "rU")
  68        fobj2 = self.tar.extractfile(tarinfo)
  69
  70        lines1 = fobj1.readlines()
  71        lines2 = fobj2.readlines()
  72        self.assert_(lines1 == lines2,
  73                "fileobj.readlines() failed")
  74        self.assert_(len(lines2) == 114,
  75                "fileobj.readlines() failed")
  76        self.assert_(lines2[83] == \
  77                "I will gladly admit that Python is not the fastest running scripting language.\n",
  78                "fileobj.readlines() failed")
  79
  80    def test_fileobj_iter(self):
  81        self.tar.extract("ustar/regtype", TEMPDIR)
  82        tarinfo = self.tar.getmember("ustar/regtype")
  83        fobj1 = open(os.path.join(TEMPDIR, "ustar/regtype"), "rU")
  84        fobj2 = self.tar.extractfile(tarinfo)
  85        lines1 = fobj1.readlines()
  86        lines2 = [line for line in fobj2]
  87        self.assert_(lines1 == lines2,
  88                     "fileobj.__iter__() failed")
  89
  90    def test_fileobj_seek(self):
  91        self.tar.extract("ustar/regtype", TEMPDIR)
  92        fobj = open(os.path.join(TEMPDIR, "ustar/regtype"), "rb")
  93        data = fobj.read()
  94        fobj.close()
  95
  96        tarinfo = self.tar.getmember("ustar/regtype")
  97        fobj = self.tar.extractfile(tarinfo)
  98
  99        text = fobj.read()
 100        fobj.seek(0)
 101        self.assert_(0 == fobj.tell(),
 102                     "seek() to file's start failed")
 103        fobj.seek(2048, 0)
 104        self.assert_(2048 == fobj.tell(),
 105                     "seek() to absolute position failed")
 106        fobj.seek(-1024, 1)
 107        self.assert_(1024 == fobj.tell(),
 108                     "seek() to negative relative position failed")
 109        fobj.seek(1024, 1)
 110        self.assert_(2048 == fobj.tell(),
 111                     "seek() to positive relative position failed")
 112        s = fobj.read(10)
 113        self.assert_(s == data[2048:2058],
 114                     "read() after seek failed")
 115        fobj.seek(0, 2)
 116        self.assert_(tarinfo.size == fobj.tell(),
 117                     "seek() to file's end failed")
 118        self.assert_(fobj.read() == "",
 119                     "read() at file's end did not return empty string")
 120        fobj.seek(-tarinfo.size, 2)
 121        self.assert_(0 == fobj.tell(),
 122                     "relative seek() to file's start failed")
 123        fobj.seek(512)
 124        s1 = fobj.readlines()
 125        fobj.seek(512)
 126        s2 = fobj.readlines()
 127        self.assert_(s1 == s2,
 128                     "readlines() after seek failed")
 129        fobj.seek(0)
 130        self.assert_(len(fobj.readline()) == fobj.tell(),
 131                     "tell() after readline() failed")
 132        fobj.seek(512)
 133        self.assert_(len(fobj.readline()) + 512 == fobj.tell(),
 134                     "tell() after seek() and readline() failed")
 135        fobj.seek(0)
 136        line = fobj.readline()
 137        self.assert_(fobj.read() == data[len(line):],
 138                     "read() after readline() failed")
 139        fobj.close()
 140
 141
 142class MiscReadTest(ReadTest):
 143
 144    def test_no_name_argument(self):
 145        fobj = open(self.tarname, "rb")
 146        tar = tarfile.open(fileobj=fobj, mode=self.mode)
 147        self.assertEqual(tar.name, os.path.abspath(fobj.name))
 148
 149    def test_no_name_attribute(self):
 150        data = open(self.tarname, "rb").read()
 151        fobj = StringIO.StringIO(data)
 152        self.assertRaises(AttributeError, getattr, fobj, "name")
 153        tar = tarfile.open(fileobj=fobj, mode=self.mode)
 154        self.assertEqual(tar.name, None)
 155
 156    def test_empty_name_attribute(self):
 157        data = open(self.tarname, "rb").read()
 158        fobj = StringIO.StringIO(data)
 159        fobj.name = ""
 160        tar = tarfile.open(fileobj=fobj, mode=self.mode)
 161        self.assertEqual(tar.name, None)
 162
 163    def test_fileobj_with_offset(self):
 164        # Skip the first member and store values from the second member
 165        # of the testtar.
 166        tar = tarfile.open(self.tarname, mode=self.mode)
 167        tar.next()
 168        t = tar.next()
 169        name = t.name
 170        offset = t.offset
 171        data = tar.extractfile(t).read()
 172        tar.close()
 173
 174        # Open the testtar and seek to the offset of the second member.
 175        if self.mode.endswith(":gz"):
 176            _open = gzip.GzipFile
 177        elif self.mode.endswith(":bz2"):
 178            _open = bz2.BZ2File
 179        else:
 180            _open = open
 181        fobj = _open(self.tarname, "rb")
 182        fobj.seek(offset)
 183
 184        # Test if the tarfile starts with the second member.
 185        tar = tar.open(self.tarname, mode="r:", fileobj=fobj)
 186        t = tar.next()
 187        self.assertEqual(t.name, name)
 188        # Read to the end of fileobj and test if seeking back to the
 189        # beginning works.
 190        tar.getmembers()
 191        self.assertEqual(tar.extractfile(t).read(), data,
 192                "seek back did not work")
 193        tar.close()
 194
 195    def test_fail_comp(self):
 196        # For Gzip and Bz2 Tests: fail with a ReadError on an uncompressed file.
 197        if self.mode == "r:":
 198            return
 199        self.assertRaises(tarfile.ReadError, tarfile.open, tarname, self.mode)
 200        fobj = open(tarname, "rb")
 201        self.assertRaises(tarfile.ReadError, tarfile.open, fileobj=fobj, mode=self.mode)
 202
 203    def test_v7_dirtype(self):
 204        # Test old style dirtype member (bug #1336623):
 205        # Old V7 tars create directory members using an AREGTYPE
 206        # header with a "/" appended to the filename field.
 207        tarinfo = self.tar.getmember("misc/dirtype-old-v7")
 208        self.assert_(tarinfo.type == tarfile.DIRTYPE,
 209                "v7 dirtype failed")
 210
 211    def test_xstar_type(self):
 212        # The xstar format stores extra atime and ctime fields inside the
 213        # space reserved for the prefix field. The prefix field must be
 214        # ignored in this case, otherwise it will mess up the name.
 215        try:
 216            self.tar.getmember("misc/regtype-xstar")
 217        except KeyError:
 218            self.fail("failed to find misc/regtype-xstar (mangled prefix?)")
 219
 220    def test_check_members(self):
 221        for tarinfo in self.tar:
 222            self.assert_(int(tarinfo.mtime) == 07606136617,
 223                    "wrong mtime for %s" % tarinfo.name)
 224            if not tarinfo.name.startswith("ustar/"):
 225                continue
 226            self.assert_(tarinfo.uname == "tarfile",
 227                    "wrong uname for %s" % tarinfo.name)
 228
 229    def test_find_members(self):
 230        self.assert_(self.tar.getmembers()[-1].name == "misc/eof",
 231                "could not find all members")
 232
 233    def test_extract_hardlink(self):
 234        # Test hardlink extraction (e.g. bug #857297).
 235        tar = tarfile.open(tarname, errorlevel=1, encoding="iso8859-1")
 236
 237        tar.extract("ustar/regtype", TEMPDIR)
 238        try:
 239            tar.extract("ustar/lnktype", TEMPDIR)
 240        except EnvironmentError, e:
 241            if e.errno == errno.ENOENT:
 242                self.fail("hardlink not extracted properly")
 243
 244        data = open(os.path.join(TEMPDIR, "ustar/lnktype"), "rb").read()
 245        self.assertEqual(md5sum(data), md5_regtype)
 246
 247        try:
 248            tar.extract("ustar/symtype", TEMPDIR)
 249        except EnvironmentError, e:
 250            if e.errno == errno.ENOENT:
 251                self.fail("symlink not extracted properly")
 252
 253        data = open(os.path.join(TEMPDIR, "ustar/symtype"), "rb").read()
 254        self.assertEqual(md5sum(data), md5_regtype)
 255
 256    def test_extractall(self):
 257        # Test if extractall() correctly restores directory permissions
 258        # and times (see issue1735).
 259        tar = tarfile.open(tarname, encoding="iso8859-1")
 260        directories = [t for t in tar if t.isdir()]
 261        tar.extractall(TEMPDIR, directories)
 262        for tarinfo in directories:
 263            path = os.path.join(TEMPDIR, tarinfo.name)
 264            if sys.platform != "win32":
 265                # Win32 has no support for fine grained permissions.
 266                self.assertEqual(tarinfo.mode & 0777, os.stat(path).st_mode & 0777)
 267            self.assertEqual(tarinfo.mtime, os.path.getmtime(path))
 268        tar.close()
 269
 270
 271class StreamReadTest(ReadTest):
 272
 273    mode="r|"
 274
 275    def test_fileobj_regular_file(self):
 276        tarinfo = self.tar.next() # get "regtype" (can't use getmember)
 277        fobj = self.tar.extractfile(tarinfo)
 278        data = fobj.read()
 279        self.assert_((len(data), md5sum(data)) == (tarinfo.size, md5_regtype),
 280                "regular file extraction failed")
 281
 282    def test_provoke_stream_error(self):
 283        tarinfos = self.tar.getmembers()
 284        f = self.tar.extractfile(tarinfos[0]) # read the first member
 285        self.assertRaises(tarfile.StreamError, f.read)
 286
 287    def test_compare_members(self):
 288        tar1 = tarfile.open(tarname, encoding="iso8859-1")
 289        tar2 = self.tar
 290
 291        while True:
 292            t1 = tar1.next()
 293            t2 = tar2.next()
 294            if t1 is None:
 295                break
 296            self.assert_(t2 is not None, "stream.next() failed.")
 297
 298            if t2.islnk() or t2.issym():
 299                self.assertRaises(tarfile.StreamError, tar2.extractfile, t2)
 300                continue
 301
 302            v1 = tar1.extractfile(t1)
 303            v2 = tar2.extractfile(t2)
 304            if v1 is None:
 305                continue
 306            self.assert_(v2 is not None, "stream.extractfile() failed")
 307            self.assert_(v1.read() == v2.read(), "stream extraction failed")
 308
 309        tar1.close()
 310
 311
 312class DetectReadTest(unittest.TestCase):
 313
 314    def _testfunc_file(self, name, mode):
 315        try:
 316            tarfile.open(name, mode)
 317        except tarfile.ReadError:
 318            self.fail()
 319
 320    def _testfunc_fileobj(self, name, mode):
 321        try:
 322            tarfile.open(name, mode, fileobj=open(name, "rb"))
 323        except tarfile.ReadError:
 324            self.fail()
 325
 326    def _test_modes(self, testfunc):
 327        testfunc(tarname, "r")
 328        testfunc(tarname, "r:")
 329        testfunc(tarname, "r:*")
 330        testfunc(tarname, "r|")
 331        testfunc(tarname, "r|*")
 332
 333        if gzip:
 334            self.assertRaises(tarfile.ReadError, tarfile.open, tarname, mode="r:gz")
 335            self.assertRaises(tarfile.ReadError, tarfile.open, tarname, mode="r|gz")
 336            self.assertRaises(tarfile.ReadError, tarfile.open, gzipname, mode="r:")
 337            self.assertRaises(tarfile.ReadError, tarfile.open, gzipname, mode="r|")
 338
 339            testfunc(gzipname, "r")
 340            testfunc(gzipname, "r:*")
 341            testfunc(gzipname, "r:gz")
 342            testfunc(gzipname, "r|*")
 343            testfunc(gzipname, "r|gz")
 344
 345        if bz2:
 346            self.assertRaises(tarfile.ReadError, tarfile.open, tarname, mode="r:bz2")
 347            self.assertRaises(tarfile.ReadError, tarfile.open, tarname, mode="r|bz2")
 348            self.assertRaises(tarfile.ReadError, tarfile.open, bz2name, mode="r:")
 349            self.assertRaises(tarfile.ReadError, tarfile.open, bz2name, mode="r|")
 350
 351            testfunc(bz2name, "r")
 352            testfunc(bz2name, "r:*")
 353            testfunc(bz2name, "r:bz2")
 354            testfunc(bz2name, "r|*")
 355            testfunc(bz2name, "r|bz2")
 356
 357    def test_detect_file(self):
 358        self._test_modes(self._testfunc_file)
 359
 360    def test_detect_fileobj(self):
 361        self._test_modes(self._testfunc_fileobj)
 362
 363
 364class MemberReadTest(ReadTest):
 365
 366    def _test_member(self, tarinfo, chksum=None, **kwargs):
 367        if chksum is not None:
 368            self.assert_(md5sum(self.tar.extractfile(tarinfo).read()) == chksum,
 369                    "wrong md5sum for %s" % tarinfo.name)
 370
 371        kwargs["mtime"] = 07606136617
 372        kwargs["uid"] = 1000
 373        kwargs["gid"] = 100
 374        if "old-v7" not in tarinfo.name:
 375            # V7 tar can't handle alphabetic owners.
 376            kwargs["uname"] = "tarfile"
 377            kwargs["gname"] = "tarfile"
 378        for k, v in kwargs.iteritems():
 379            self.assert_(getattr(tarinfo, k) == v,
 380                    "wrong value in %s field of %s" % (k, tarinfo.name))
 381
 382    def test_find_regtype(self):
 383        tarinfo = self.tar.getmember("ustar/regtype")
 384        self._test_member(tarinfo, size=7011, chksum=md5_regtype)
 385
 386    def test_find_conttype(self):
 387        tarinfo = self.tar.getmember("ustar/conttype")
 388        self._test_member(tarinfo, size=7011, chksum=md5_regtype)
 389
 390    def test_find_dirtype(self):
 391        tarinfo = self.tar.getmember("ustar/dirtype")
 392        self._test_member(tarinfo, size=0)
 393
 394    def test_find_dirtype_with_size(self):
 395        tarinfo = self.tar.getmember("ustar/dirtype-with-size")
 396        self._test_member(tarinfo, size=255)
 397
 398    def test_find_lnktype(self):
 399        tarinfo = self.tar.getmember("ustar/lnktype")
 400        self._test_member(tarinfo, size=0, linkname="ustar/regtype")
 401
 402    def test_find_symtype(self):
 403        tarinfo = self.tar.getmember("ustar/symtype")
 404        self._test_member(tarinfo, size=0, linkname="regtype")
 405
 406    def test_find_blktype(self):
 407        tarinfo = self.tar.getmember("ustar/blktype")
 408        self._test_member(tarinfo, size=0, devmajor=3, devminor=0)
 409
 410    def test_find_chrtype(self):
 411        tarinfo = self.tar.getmember("ustar/chrtype")
 412        self._test_member(tarinfo, size=0, devmajor=1, devminor=3)
 413
 414    def test_find_fifotype(self):
 415        tarinfo = self.tar.getmember("ustar/fifotype")
 416        self._test_member(tarinfo, size=0)
 417
 418    def test_find_sparse(self):
 419        tarinfo = self.tar.getmember("ustar/sparse")
 420        self._test_member(tarinfo, size=86016, chksum=md5_sparse)
 421
 422    def test_find_umlauts(self):
 423        tarinfo = self.tar.getmember("ustar/umlauts-ÄÖÜäöüß")
 424        self._test_member(tarinfo, size=7011, chksum=md5_regtype)
 425
 426    def test_find_ustar_longname(self):
 427        name = "ustar/" + "12345/" * 39 + "1234567/longname"
 428        self.assert_(name in self.tar.getnames())
 429
 430    def test_find_regtype_oldv7(self):
 431        tarinfo = self.tar.getmember("misc/regtype-old-v7")
 432        self._test_member(tarinfo, size=7011, chksum=md5_regtype)
 433
 434    def test_find_pax_umlauts(self):
 435        self.tar = tarfile.open(self.tarname, mode=self.mode, encoding="iso8859-1")
 436        tarinfo = self.tar.getmember("pax/umlauts-ÄÖÜäöüß")
 437        self._test_member(tarinfo, size=7011, chksum=md5_regtype)
 438
 439
 440class LongnameTest(ReadTest):
 441
 442    def test_read_longname(self):
 443        # Test reading of longname (bug #1471427).
 444        longname = self.subdir + "/" + "123/" * 125 + "longname"
 445        try:
 446            tarinfo = self.tar.getmember(longname)
 447        except KeyError:
 448            self.fail("longname not found")
 449        self.assert_(tarinfo.type != tarfile.DIRTYPE, "read longname as dirtype")
 450
 451    def test_read_longlink(self):
 452        longname = self.subdir + "/" + "123/" * 125 + "longname"
 453        longlink = self.subdir + "/" + "123/" * 125 + "longlink"
 454        try:
 455            tarinfo = self.tar.getmember(longlink)
 456        except KeyError:
 457            self.fail("longlink not found")
 458        self.assert_(tarinfo.linkname == longname, "linkname wrong")
 459
 460    def test_truncated_longname(self):
 461        longname = self.subdir + "/" + "123/" * 125 + "longname"
 462        tarinfo = self.tar.getmember(longname)
 463        offset = tarinfo.offset
 464        self.tar.fileobj.seek(offset)
 465        fobj = StringIO.StringIO(self.tar.fileobj.read(3 * 512))
 466        self.assertRaises(tarfile.ReadError, tarfile.open, name="foo.tar", fileobj=fobj)
 467
 468    def test_header_offset(self):
 469        # Test if the start offset of the TarInfo object includes
 470        # the preceding extended header.
 471        longname = self.subdir + "/" + "123/" * 125 + "longname"
 472        offset = self.tar.getmember(longname).offset
 473        fobj = open(tarname)
 474        fobj.seek(offset)
 475        tarinfo = tarfile.TarInfo.frombuf(fobj.read(512))
 476        self.assertEqual(tarinfo.type, self.longnametype)
 477
 478
 479class GNUReadTest(LongnameTest):
 480
 481    subdir = "gnu"
 482    longnametype = tarfile.GNUTYPE_LONGNAME
 483
 484    def test_sparse_file(self):
 485        tarinfo1 = self.tar.getmember("ustar/sparse")
 486        fobj1 = self.tar.extractfile(tarinfo1)
 487        tarinfo2 = self.tar.getmember("gnu/sparse")
 488        fobj2 = self.tar.extractfile(tarinfo2)
 489        self.assert_(fobj1.read() == fobj2.read(),
 490                "sparse file extraction failed")
 491
 492
 493class PaxReadTest(LongnameTest):
 494
 495    subdir = "pax"
 496    longnametype = tarfile.XHDTYPE
 497
 498    def test_pax_global_headers(self):
 499        tar = tarfile.open(tarname, encoding="iso8859-1")
 500
 501        tarinfo = tar.getmember("pax/regtype1")
 502        self.assertEqual(tarinfo.uname, "foo")
 503        self.assertEqual(tarinfo.gname, "bar")
 504        self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), u"ÄÖÜäöüß")
 505
 506        tarinfo = tar.getmember("pax/regtype2")
 507        self.assertEqual(tarinfo.uname, "")
 508        self.assertEqual(tarinfo.gname, "bar")
 509        self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), u"ÄÖÜäöüß")
 510
 511        tarinfo = tar.getmember("pax/regtype3")
 512        self.assertEqual(tarinfo.uname, "tarfile")
 513        self.assertEqual(tarinfo.gname, "tarfile")
 514        self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), u"ÄÖÜäöüß")
 515
 516    def test_pax_number_fields(self):
 517        # All following number fields are read from the pax header.
 518        tar = tarfile.open(tarname, encoding="iso8859-1")
 519        tarinfo = tar.getmember("pax/regtype4")
 520        self.assertEqual(tarinfo.size, 7011)
 521        self.assertEqual(tarinfo.uid, 123)
 522        self.assertEqual(tarinfo.gid, 123)
 523        self.assertEqual(tarinfo.mtime, 1041808783.0)
 524        self.assertEqual(type(tarinfo.mtime), float)
 525        self.assertEqual(float(tarinfo.pax_headers["atime"]), 1041808783.0)
 526        self.assertEqual(float(tarinfo.pax_headers["ctime"]), 1041808783.0)
 527
 528
 529class WriteTestBase(unittest.TestCase):
 530    # Put all write tests in here that are supposed to be tested
 531    # in all possible mode combinations.
 532
 533    def test_fileobj_no_close(self):
 534        fobj = StringIO.StringIO()
 535        tar = tarfile.open(fileobj=fobj, mode=self.mode)
 536        tar.addfile(tarfile.TarInfo("foo"))
 537        tar.close()
 538        self.assert_(fobj.closed is False, "external fileobjs must never closed")
 539
 540
 541class WriteTest(WriteTestBase):
 542
 543    mode = "w:"
 544
 545    def test_100_char_name(self):
 546        # The name field in a tar header stores strings of at most 100 chars.
 547        # If a string is shorter than 100 chars it has to be padded with '\0',
 548        # which implies that a string of exactly 100 chars is stored without
 549        # a trailing '\0'.
 550        name = "0123456789" * 10
 551        tar = tarfile.open(tmpname, self.mode)
 552        t = tarfile.TarInfo(name)
 553        tar.addfile(t)
 554        tar.close()
 555
 556        tar = tarfile.open(tmpname)
 557        self.assert_(tar.getnames()[0] == name,
 558                "failed to store 100 char filename")
 559        tar.close()
 560
 561    def test_tar_size(self):
 562        # Test for bug #1013882.
 563        tar = tarfile.open(tmpname, self.mode)
 564        path = os.path.join(TEMPDIR, "file")
 565        fobj = open(path, "wb")
 566        fobj.write("aaa")
 567        fobj.close()
 568        tar.add(path)
 569        tar.close()
 570        self.assert_(os.path.getsize(tmpname) > 0,
 571                "tarfile is empty")
 572
 573    # The test_*_size tests test for bug #1167128.
 574    def test_file_size(self):
 575        tar = tarfile.open(tmpname, self.mode)
 576
 577        path = os.path.join(TEMPDIR, "file")
 578        fobj = open(path, "wb")
 579        fobj.close()
 580        tarinfo = tar.gettarinfo(path)
 581        self.assertEqual(tarinfo.size, 0)
 582
 583        fobj = open(path, "wb")
 584        fobj.write("aaa")
 585        fobj.close()
 586        tarinfo = tar.gettarinfo(path)
 587        self.assertEqual(tarinfo.size, 3)
 588
 589        tar.close()
 590
 591    def test_directory_size(self):
 592        path = os.path.join(TEMPDIR, "directory")
 593        os.mkdir(path)
 594        try:
 595            tar = tarfile.open(tmpname, self.mode)
 596            tarinfo = tar.gettarinfo(path)
 597            self.assertEqual(tarinfo.size, 0)
 598        finally:
 599            os.rmdir(path)
 600
 601    def test_link_size(self):
 602        if hasattr(os, "link"):
 603            link = os.path.join(TEMPDIR, "link")
 604            target = os.path.join(TEMPDIR, "link_target")
 605            open(target, "wb").close()
 606            os.link(target, link)
 607            try:
 608                tar = tarfile.open(tmpname, self.mode)
 609                tarinfo = tar.gettarinfo(link)
 610                self.assertEqual(tarinfo.size, 0)
 611            finally:
 612                os.remove(target)
 613                os.remove(link)
 614
 615    def test_symlink_size(self):
 616        if hasattr(os, "symlink"):
 617            path = os.path.join(TEMPDIR, "symlink")
 618            os.symlink("link_target", path)
 619            try:
 620                tar = tarfile.open(tmpname, self.mode)
 621                tarinfo = tar.gettarinfo(path)
 622                self.assertEqual(tarinfo.size, 0)
 623            finally:
 624                os.remove(path)
 625
 626    def test_add_self(self):
 627        # Test for #1257255.
 628        dstname = os.path.abspath(tmpname)
 629
 630        tar = tarfile.open(tmpname, self.mode)
 631        self.assert_(tar.name == dstname, "archive name must be absolute")
 632
 633        tar.add(dstname)
 634        self.assert_(tar.getnames() == [], "added the archive to itself")
 635
 636        cwd = os.getcwd()
 637        os.chdir(TEMPDIR)
 638        tar.add(dstname)
 639        os.chdir(cwd)
 640        self.assert_(tar.getnames() == [], "added the archive to itself")
 641
 642    def test_exclude(self):
 643        tempdir = os.path.join(TEMPDIR, "exclude")
 644        os.mkdir(tempdir)
 645        try:
 646            for name in ("foo", "bar", "baz"):
 647                name = os.path.join(tempdir, name)
 648                open(name, "wb").close()
 649
 650            def exclude(name):
 651                return os.path.isfile(name)
 652
 653            tar = tarfile.open(tmpname, self.mode, encoding="iso8859-1")
 654            tar.add(tempdir, arcname="empty_dir", exclude=exclude)
 655            tar.close()
 656
 657            tar = tarfile.open(tmpname, "r")
 658            self.assertEqual(len(tar.getmembers()), 1)
 659            self.assertEqual(tar.getnames()[0], "empty_dir")
 660        finally:
 661            shutil.rmtree(tempdir)
 662
 663
 664class StreamWriteTest(WriteTestBase):
 665
 666    mode = "w|"
 667
 668    def test_stream_padding(self):
 669        # Test for bug #1543303.
 670        tar = tarfile.open(tmpname, self.mode)
 671        tar.close()
 672
 673        if self.mode.endswith("gz"):
 674            fobj = gzip.GzipFile(tmpname)
 675            data = fobj.read()
 676            fobj.close()
 677        elif self.mode.endswith("bz2"):
 678            dec = bz2.BZ2Decompressor()
 679            data = open(tmpname, "rb").read()
 680            data = dec.decompress(data)
 681            self.assert_(len(dec.unused_data) == 0,
 682                    "found trailing data")
 683        else:
 684            fobj = open(tmpname, "rb")
 685            data = fobj.read()
 686            fobj.close()
 687
 688        self.assert_(data.count("\0") == tarfile.RECORDSIZE,
 689                         "incorrect zero padding")
 690
 691
 692class GNUWriteTest(unittest.TestCase):
 693    # This testcase checks for correct creation of GNU Longname
 694    # and Longlink extended headers (cp. bug #812325).
 695
 696    def _length(self, s):
 697        blocks, remainder = divmod(len(s) + 1, 512)
 698        if remainder:
 699            blocks += 1
 700        return blocks * 512
 701
 702    def _calc_size(self, name, link=None):
 703        # Initial tar header
 704        count = 512
 705
 706        if len(name) > tarfile.LENGTH_NAME:
 707            # GNU longname extended header + longname
 708            count += 512
 709            count += self._length(name)
 710        if link is not None and len(link) > tarfile.LENGTH_LINK:
 711            # GNU longlink extended header + longlink
 712            count += 512
 713            count += self._length(link)
 714        return count
 715
 716    def _test(self, name, link=None):
 717        tarinfo = tarfile.TarInfo(name)
 718        if link:
 719            tarinfo.linkname = link
 720            tarinfo.type = tarfile.LNKTYPE
 721
 722        tar = tarfile.open(tmpname, "w")
 723        tar.format = tarfile.GNU_FORMAT
 724        tar.addfile(tarinfo)
 725
 726        v1 = self._calc_size(name, link)
 727        v2 = tar.offset
 728        self.assert_(v1 == v2, "GNU longname/longlink creation failed")
 729
 730        tar.close()
 731
 732        tar = tarfile.open(tmpname)
 733        member = tar.next()
 734        self.failIf(member is None, "unable to read longname member")
 735        self.assert_(tarinfo.name == member.name and \
 736                     tarinfo.linkname == member.linkname, \
 737                     "unable to read longname member")
 738
 739    def test_longname_1023(self):
 740        self._test(("longnam/" * 127) + "longnam")
 741
 742    def test_longname_1024(self):
 743        self._test(("longnam/" * 127) + "longname")
 744
 745    def test_longname_1025(self):
 746        self._test(("longnam/" * 127) + "longname_")
 747
 748    def test_longlink_1023(self):
 749        self._test("name", ("longlnk/" * 127) + "longlnk")
 750
 751    def test_longlink_1024(self):
 752        self._test("name", ("longlnk/" * 127) + "longlink")
 753
 754    def test_longlink_1025(self):
 755        self._test("name", ("longlnk/" * 127) + "longlink_")
 756
 757    def test_longnamelink_1023(self):
 758        self._test(("longnam/" * 127) + "longnam",
 759                   ("longlnk/" * 127) + "longlnk")
 760
 761    def test_longnamelink_1024(self):
 762        self._test(("longnam/" * 127) + "longname",
 763                   ("longlnk/" * 127) + "longlink")
 764
 765    def test_longnamelink_1025(self):
 766        self._test(("longnam/" * 127) + "longname_",
 767                   ("longlnk/" * 127) + "longlink_")
 768
 769
 770class HardlinkTest(unittest.TestCase):
 771    # Test the creation of LNKTYPE (hardlink) members in an archive.
 772
 773    def setUp(self):
 774        self.foo = os.path.join(TEMPDIR, "foo")
 775        self.bar = os.path.join(TEMPDIR, "bar")
 776
 777        fobj = open(self.foo, "wb")
 778        fobj.write("foo")
 779        fobj.close()
 780
 781        os.link(self.foo, self.bar)
 782
 783        self.tar = tarfile.open(tmpname, "w")
 784        self.tar.add(self.foo)
 785
 786    def tearDown(self):
 787        self.tar.close()
 788        os.remove(self.foo)
 789        os.remove(self.bar)
 790
 791    def test_add_twice(self):
 792        # The same name will be added as a REGTYPE every
 793        # time regardless of st_nlink.
 794        tarinfo = self.tar.gettarinfo(self.foo)
 795        self.assert_(tarinfo.type == tarfile.REGTYPE,
 796                "add file as regular failed")
 797
 798    def test_add_hardlink(self):
 799        tarinfo = self.tar.gettarinfo(self.bar)
 800        self.assert_(tarinfo.type == tarfile.LNKTYPE,
 801                "add file as hardlink failed")
 802
 803    def test_dereference_hardlink(self):
 804        self.tar.dereference = True
 805        tarinfo = self.tar.gettarinfo(self.bar)
 806        self.assert_(tarinfo.type == tarfile.REGTYPE,
 807                "dereferencing hardlink failed")
 808
 809
 810class PaxWriteTest(GNUWriteTest):
 811
 812    def _test(self, name, link=None):
 813        # See GNUWriteTest.
 814        tarinfo = tarfile.TarInfo(name)
 815        if link:
 816            tarinfo.linkname = link
 817            tarinfo.type = tarfile.LNKTYPE
 818
 819        tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT)
 820        tar.addfile(tarinfo)
 821        tar.close()
 822
 823        tar = tarfile.open(tmpname)
 824        if link:
 825            l = tar.getmembers()[0].linkname
 826            self.assert_(link == l, "PAX longlink creation failed")
 827        else:
 828            n = tar.getmembers()[0].name
 829            self.assert_(name == n, "PAX longname creation failed")
 830
 831    def test_pax_global_header(self):
 832        pax_headers = {
 833                u"foo": u"bar",
 834                u"uid": u"0",
 835                u"mtime": u"1.23",
 836                u"test": u"äöü",
 837                u"äöü": u"test"}
 838
 839        tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, \
 840                pax_headers=pax_headers)
 841        tar.addfile(tarfile.TarInfo("test"))
 842        tar.close()
 843
 844        # Test if the global header was written correctly.
 845        tar = tarfile.open(tmpname, encoding="iso8859-1")
 846        self.assertEqual(tar.pax_headers, pax_headers)
 847        self.assertEqual(tar.getmembers()[0].pax_headers, pax_headers)
 848
 849        # Test if all the fields are unicode.
 850        for key, val in tar.pax_headers.iteritems():
 851            self.assert_(type(key) is unicode)
 852            self.assert_(type(val) is unicode)
 853            if key in tarfile.PAX_NUMBER_FIELDS:
 854                try:
 855                    tarfile.PAX_NUMBER_FIELDS[key](val)
 856                except (TypeError, ValueError):
 857                    self.fail("unable to convert pax header field")
 858
 859    def test_pax_extended_header(self):
 860        # The fields from the pax header have priority over the
 861        # TarInfo.
 862        pax_headers = {u"path": u"foo", u"uid": u"123"}
 863
 864        tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, encoding="iso8859-1")
 865        t = tarfile.TarInfo()
 866        t.name = u"äöü"     # non-ASCII
 867        t.uid = 8**8        # too large
 868        t.pax_headers = pax_headers
 869        tar.addfile(t)
 870        tar.close()
 871
 872        tar = tarfile.open(tmpname, encoding="iso8859-1")
 873        t = tar.getmembers()[0]
 874        self.assertEqual(t.pax_headers, pax_headers)
 875        self.assertEqual(t.name, "foo")
 876        self.assertEqual(t.uid, 123)
 877
 878
 879class UstarUnicodeTest(unittest.TestCase):
 880    # All *UnicodeTests FIXME
 881
 882    format = tarfile.USTAR_FORMAT
 883
 884    def test_iso8859_1_filename(self):
 885        self._test_unicode_filename("iso8859-1")
 886
 887    def test_utf7_filename(self):
 888        self._test_unicode_filename("utf7")
 889
 890    def test_utf8_filename(self):
 891        self._test_unicode_filename("utf8")
 892
 893    def _test_unicode_filename(self, encoding):
 894        tar = tarfile.open(tmpname, "w", format=self.format, encoding=encoding, errors="strict")
 895        name = u"äöü"
 896        tar.addfile(tarfile.TarInfo(name))
 897        tar.close()
 898
 899        tar = tarfile.open(tmpname, encoding=encoding)
 900        self.assert_(type(tar.getnames()[0]) is not unicode)
 901        self.assertEqual(tar.getmembers()[0].name, name.encode(encoding))
 902        tar.close()
 903
 904    def test_unicode_filename_error(self):
 905        tar = tarfile.open(tmpname, "w", format=self.format, encoding="ascii", errors="strict")
 906        tarinfo = tarfile.TarInfo()
 907
 908        tarinfo.name = "äöü"
 909        if self.format == tarfile.PAX_FORMAT:
 910            self.assertRaises(UnicodeError, tar.addfile, tarinfo)
 911        else:
 912            tar.addfile(tarinfo)
 913
 914        tarinfo.name = u"äöü"
 915        self.assertRaises(UnicodeError, tar.addfile, tarinfo)
 916
 917        tarinfo.name = "foo"
 918        tarinfo.uname = u"äöü"
 919        self.assertRaises(UnicodeError, tar.addfile, tarinfo)
 920
 921    def test_unicode_argument(self):
 922        tar = tarfile.open(tarname, "r", encoding="iso8859-1", errors="strict")
 923        for t in tar:
 924            self.assert_(type(t.name) is str)
 925            self.assert_(type(t.linkname) is str)
 926            self.assert_(type(t.uname) is str)
 927            self.assert_(type(t.gname) is str)
 928        tar.close()
 929
 930    def test_uname_unicode(self):
 931        for name in (u"äöü", "äöü"):
 932            t = tarfile.TarInfo("foo")
 933            t.uname = name
 934            t.gname = name
 935
 936            fobj = StringIO.StringIO()
 937            tar = tarfile.open("foo.tar", mode="w", fileobj=fobj, format=self.format, encoding="iso8859-1")
 938            tar.addfile(t)
 939            tar.close()
 940            fobj.seek(0)
 941
 942            tar = tarfile.open("foo.tar", fileobj=fobj, encoding="iso8859-1")
 943            t = tar.getmember("foo")
 944            self.assertEqual(t.uname, "äöü")
 945            self.assertEqual(t.gname, "äöü")
 946
 947
 948class GNUUnicodeTest(UstarUnicodeTest):
 949
 950    format = tarfile.GNU_FORMAT
 951
 952
 953class PaxUnicodeTest(UstarUnicodeTest):
 954
 955    format = tarfile.PAX_FORMAT
 956
 957    def _create_unicode_name(self, name):
 958        tar = tarfile.open(tmpname, "w", format=self.format)
 959        t = tarfile.TarInfo()
 960        t.pax_headers["path"] = name
 961        tar.addfile(t)
 962        tar.close()
 963
 964    def test_error_handlers(self):
 965        # Test if the unicode error handlers work correctly for characters
 966        # that cannot be expressed in a given encoding.
 967        self._create_unicode_name(u"äöü")
 968
 969        for handler, name in (("utf-8", u"äöü".encode("utf8")),
 970                    ("replace", "???"), ("ignore", "")):
 971            tar = tarfile.open(tmpname, format=self.format, encoding="ascii",
 972                    errors=handler)
 973            self.assertEqual(tar.getnames()[0], name)
 974
 975        self.assertRaises(UnicodeError, tarfile.open, tmpname,
 976                encoding="ascii", errors="strict")
 977
 978    def test_error_handler_utf8(self):
 979        # Create a pathname that has one component representable using
 980        # iso8859-1 and the other only in iso8859-15.
 981        self._create_unicode_name(u"äöü/¤")
 982
 983        tar = tarfile.open(tmpname, format=self.format, encoding="iso8859-1",
 984                errors="utf-8")
 985        self.assertEqual(tar.getnames()[0], "äöü/" + u"¤".encode("utf8"))
 986
 987
 988class AppendTest(unittest.TestCase):
 989    # Test append mode (cp. patch #1652681).
 990
 991    def setUp(self):
 992        self.tarname = tmpname
 993        if os.path.exists(self.tarname):
 994            os.remove(self.tarname)
 995
 996    def _add_testfile(self, fileobj=None):
 997        tar = tarfile.open(self.tarname, "a", fileobj=fileobj)
 998        tar.addfile(tarfile.TarInfo("bar"))
 999        tar.close()
1000
1001    def _create_testtar(self, mode="w:"):
1002        src = tarfile.open(tarname, encoding="iso8859-1")
1003        t = src.getmember("ustar/regtype")
1004        t.name = "foo"
1005        f = src.extractfile(t)
1006        tar = tarfile.open(self.tarname, mode)
1007        tar.addfile(t, f)
1008        tar.close()
1009
1010    def _test(self, names=["bar"], fileobj=None):
1011        tar = tarfile.open(self.tarname, fileobj=fileobj)
1012        self.assertEqual(tar.getnames(), names)
1013
1014    def test_non_existing(self):
1015        self._add_testfile()
1016        self._test()
1017
1018    def test_empty(self):
1019        open(self.tarname, "w").close()
1020        self._add_testfile()
1021        self._test()
1022
1023    def test_empty_fileobj(self):
1024        fobj = StringIO.StringIO()
1025        self._add_testfile(fobj)
1026        fobj.seek(0)
1027        self._test(fileobj=fobj)
1028
1029    def test_fileobj(self):
1030        self._create_testtar()
1031        data = open(self.tarname).read()
1032        fobj = StringIO.StringIO(data)
1033        self._add_testfile(fobj)
1034        fobj.seek(0)
1035        self._test(names=["foo", "bar"], fileobj=fobj)
1036
1037    def test_existing(self):
1038        self._create_testtar()
1039        self._add_testfile()
1040        self._test(names=["foo", "bar"])
1041
1042    def test_append_gz(self):
1043        if gzip is None:
1044            return
1045        self._create_testtar("w:gz")
1046        self.assertRaises(tarfile.ReadError, tarfile.open, tmpname, "a")
1047
1048    def test_append_bz2(self):
1049        if bz2 is None:
1050            return
1051        self._create_testtar("w:bz2")
1052        self.assertRaises(tarfile.ReadError, tarfile.open, tmpname, "a")
1053
1054
1055class LimitsTest(unittest.TestCase):
1056
1057    def test_ustar_limits(self):
1058        # 100 char name
1059        tarinfo = tarfile.TarInfo("0123456789" * 10)
1060        tarinfo.tobuf(tarfile.USTAR_FORMAT)
1061
1062        # 101 char name that cannot be stored
1063        tarinfo = tarfile.TarInfo("0123456789" * 10 + "0")
1064        self.assertRaises(ValueError, tarinfo.tobuf, tarfile.USTAR_FORMAT)
1065
1066        # 256 char name with a slash at pos 156
1067        tarinfo = tarfile.TarInfo("123/" * 62 + "longname")
1068        tarinfo.tobuf(tarfile.USTAR_FORMAT)
1069
1070        # 256 char name that cannot be stored
1071        tarinfo = tarfile.TarInfo("1234567/" * 31 + "longname")
1072        self.assertRaises(ValueError, tarinfo.tobuf, tarfile.USTAR_FORMAT)
1073
1074        # 512 char name
1075        tarinfo = tarfile.TarInfo("123/" * 126 + "longname")
1076        self.assertRaises(ValueError, tarinfo.tobuf, tarfile.USTAR_FORMAT)
1077
1078        # 512 char linkname
1079        tarinfo = tarfile.TarInfo("longlink")
1080        tarinfo.linkname = "123/" * 126 + "longname"
1081        self.assertRaises(ValueError, tarinfo.tobuf, tarfile.USTAR_FORMAT)
1082
1083        # uid > 8 digits
1084        tarinfo = tarfile.TarInfo("name")
1085        tarinfo.uid = 010000000
1086        self.assertRaises(ValueError, tarinfo.tobuf, tarfile.USTAR_FORMAT)
1087
1088    def test_gnu_limits(self):
1089        tarinfo = tarfile.TarInfo("123/" * 126 + "longname")
1090        tarinfo.tobuf(tarfile.GNU_FORMAT)
1091
1092        tarinfo = tarfile.TarInfo("longlink")
1093        tarinfo.linkname = "123/" * 126 + "longname"
1094        tarinfo.tobuf(tarfile.GNU_FORMAT)
1095
1096        # uid >= 256 ** 7
1097        tarinfo = tarfile.TarInfo("name")
1098        tarinfo.uid = 04000000000000000000L
1099        self.assertRaises(ValueError, tarinfo.tobuf, tarfile.GNU_FORMAT)
1100
1101    def test_pax_limits(self):
1102        tarinfo = tarfile.TarInfo("123/" * 126 + "longname")
1103        tarinfo.tobuf(tarfile.PAX_FORMAT)
1104
1105        tarinfo = tarfile.TarInfo("longlink")
1106        tarinfo.linkname = "123/" * 126 + "longname"
1107        tarinfo.tobuf(tarfile.PAX_FORMAT)
1108
1109        tarinfo = tarfile.TarInfo("name")
1110        tarinfo.uid = 04000000000000000000L
1111        tarinfo.tobuf(tarfile.PAX_FORMAT)
1112
1113
1114class GzipMiscReadTest(MiscReadTest):
1115    tarname = gzipname
1116    mode = "r:gz"
1117class GzipUstarReadTest(UstarReadTest):
1118    tarname = gzipname
1119    mode = "r:gz"
1120class GzipStreamReadTest(StreamReadTest):
1121    tarname = gzipname
1122    mode = "r|gz"
1123class GzipWriteTest(WriteTest):
1124    mode = "w:gz"
1125class GzipStreamWriteTest(StreamWriteTest):
1126    mode = "w|gz"
1127
1128
1129class Bz2MiscReadTest(MiscReadTest):
1130    tarname = bz2name
1131    mode = "r:bz2"
1132class Bz2UstarReadTest(UstarReadTest):
1133    tarname = bz2name
1134    mode = "r:bz2"
1135class Bz2StreamReadTest(StreamReadTest):
1136    tarname = bz2name
1137    mode = "r|bz2"
1138class Bz2WriteTest(WriteTest):
1139    mode = "w:bz2"
1140class Bz2StreamWriteTest(StreamWriteTest):
1141    mode = "w|bz2"
1142
1143class Bz2PartialReadTest(unittest.TestCase):
1144    # Issue5068: The _BZ2Proxy.read() method loops forever
1145    # on an empty or partial bzipped file.
1146
1147    def _test_partial_input(self, mode):
1148        class MyStringIO(StringIO.StringIO):
1149            hit_eof = False
1150            def read(self, n):
1151                if self.hit_eof:
1152                    raise AssertionError("infinite loop detected in tarfile.open()")
1153                self.hit_eof = self.pos == self.len
1154                return StringIO.StringIO.read(self, n)
1155
1156        data = bz2.compress(tarfile.TarInfo("foo").tobuf())
1157        for x in range(len(data) + 1):
1158            tarfile.open(fileobj=MyStringIO(data[:x]), mode=mode)
1159
1160    def test_partial_input(self):
1161        self._test_partial_input("r")
1162
1163    def test_partial_input_bz2(self):
1164        self._test_partial_input("r:bz2")
1165
1166
1167def test_main():
1168    if not os.path.exists(TEMPDIR):
1169        os.mkdir(TEMPDIR)
1170
1171    tests = [
1172        UstarReadTest,
1173        MiscReadTest,
1174        StreamReadTest,
1175        DetectReadTest,
1176        MemberReadTest,
1177        GNUReadTest,
1178        PaxReadTest,
1179        WriteTest,
1180        StreamWriteTest,
1181        GNUWriteTest,
1182        PaxWriteTest,
1183        UstarUnicodeTest,
1184        GNUUnicodeTest,
1185        PaxUnicodeTest,
1186        AppendTest,
1187        LimitsTest,
1188    ]
1189
1190    if hasattr(os, "link"):
1191        tests.append(HardlinkTest)
1192
1193    fobj = open(tarname, "rb")
1194    data = fobj.read()
1195    fobj.close()
1196
1197    if gzip:
1198        # Create testtar.tar.gz and add gzip-specific tests.
1199        tar = gzip.open(gzipname, "wb")
1200        tar.write(data)
1201        tar.close()
1202
1203        tests += [
1204            GzipMiscReadTest,
1205            GzipUstarReadTest,
1206            GzipStreamReadTest,
1207            GzipWriteTest,
1208            GzipStreamWriteTest,
1209        ]
1210
1211    if bz2:
1212        # Create testtar.tar.bz2 and add bz2-specific tests.
1213        tar = bz2.BZ2File(bz2name, "wb")
1214        tar.write(data)
1215        tar.close()
1216
1217        tests += [
1218            Bz2MiscReadTest,
1219            Bz2UstarReadTest,
1220            Bz2StreamReadTest,
1221            Bz2WriteTest,
1222            Bz2StreamWriteTest,
1223            Bz2PartialReadTest,
1224        ]
1225
1226    try:
1227        test_support.run_unittest(*tests)
1228    finally:
1229        if os.path.exists(TEMPDIR):
1230            shutil.rmtree(TEMPDIR)
1231
1232if __name__ == "__main__":
1233    test_main()