PageRenderTime 122ms CodeModel.GetById 25ms app.highlight 82ms RepoModel.GetById 1ms app.codeStats 1ms

/Lib/tarfile.py

http://unladen-swallow.googlecode.com/
Python | 2530 lines | 2419 code | 29 blank | 82 comment | 31 complexity | 0fe24e5b7d3de8de04402c6cfc8ef97d MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1#!/usr/bin/env python
   2# -*- coding: iso-8859-1 -*-
   3#-------------------------------------------------------------------
   4# tarfile.py
   5#-------------------------------------------------------------------
   6# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
   7# All rights reserved.
   8#
   9# Permission  is  hereby granted,  free  of charge,  to  any person
  10# obtaining a  copy of  this software  and associated documentation
  11# files  (the  "Software"),  to   deal  in  the  Software   without
  12# restriction,  including  without limitation  the  rights to  use,
  13# copy, modify, merge, publish, distribute, sublicense, and/or sell
  14# copies  of  the  Software,  and to  permit  persons  to  whom the
  15# Software  is  furnished  to  do  so,  subject  to  the  following
  16# conditions:
  17#
  18# The above copyright  notice and this  permission notice shall  be
  19# included in all copies or substantial portions of the Software.
  20#
  21# THE SOFTWARE IS PROVIDED "AS  IS", WITHOUT WARRANTY OF ANY  KIND,
  22# EXPRESS OR IMPLIED, INCLUDING  BUT NOT LIMITED TO  THE WARRANTIES
  23# OF  MERCHANTABILITY,  FITNESS   FOR  A  PARTICULAR   PURPOSE  AND
  24# NONINFRINGEMENT.  IN  NO  EVENT SHALL  THE  AUTHORS  OR COPYRIGHT
  25# HOLDERS  BE LIABLE  FOR ANY  CLAIM, DAMAGES  OR OTHER  LIABILITY,
  26# WHETHER  IN AN  ACTION OF  CONTRACT, TORT  OR OTHERWISE,  ARISING
  27# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  28# OTHER DEALINGS IN THE SOFTWARE.
  29#
  30"""Read from and write to tar format archives.
  31"""
  32
  33__version__ = "$Revision: 73770 $"
  34# $Source$
  35
  36version     = "0.9.0"
  37__author__  = "Lars Gustäbel (lars@gustaebel.de)"
  38__date__    = "$Date: 2009-07-02 17:37:21 +0200 (Thu, 02 Jul 2009) $"
  39__cvsid__   = "$Id: tarfile.py 73770 2009-07-02 15:37:21Z jesus.cea $"
  40__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
  41
  42#---------
  43# Imports
  44#---------
  45import sys
  46import os
  47import shutil
  48import stat
  49import errno
  50import time
  51import struct
  52import copy
  53import re
  54import operator
  55
  56if sys.platform == 'mac':
  57    # This module needs work for MacOS9, especially in the area of pathname
  58    # handling. In many places it is assumed a simple substitution of / by the
  59    # local os.path.sep is good enough to convert pathnames, but this does not
  60    # work with the mac rooted:path:name versus :nonrooted:path:name syntax
  61    raise ImportError, "tarfile does not work for platform==mac"
  62
  63try:
  64    import grp, pwd
  65except ImportError:
  66    grp = pwd = None
  67
  68# from tarfile import *
  69__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
  70
  71#---------------------------------------------------------
  72# tar constants
  73#---------------------------------------------------------
  74NUL = "\0"                      # the null character
  75BLOCKSIZE = 512                 # length of processing blocks
  76RECORDSIZE = BLOCKSIZE * 20     # length of records
  77GNU_MAGIC = "ustar  \0"         # magic gnu tar string
  78POSIX_MAGIC = "ustar\x0000"     # magic posix tar string
  79
  80LENGTH_NAME = 100               # maximum length of a filename
  81LENGTH_LINK = 100               # maximum length of a linkname
  82LENGTH_PREFIX = 155             # maximum length of the prefix field
  83
  84REGTYPE = "0"                   # regular file
  85AREGTYPE = "\0"                 # regular file
  86LNKTYPE = "1"                   # link (inside tarfile)
  87SYMTYPE = "2"                   # symbolic link
  88CHRTYPE = "3"                   # character special device
  89BLKTYPE = "4"                   # block special device
  90DIRTYPE = "5"                   # directory
  91FIFOTYPE = "6"                  # fifo special device
  92CONTTYPE = "7"                  # contiguous file
  93
  94GNUTYPE_LONGNAME = "L"          # GNU tar longname
  95GNUTYPE_LONGLINK = "K"          # GNU tar longlink
  96GNUTYPE_SPARSE = "S"            # GNU tar sparse file
  97
  98XHDTYPE = "x"                   # POSIX.1-2001 extended header
  99XGLTYPE = "g"                   # POSIX.1-2001 global header
 100SOLARIS_XHDTYPE = "X"           # Solaris extended header
 101
 102USTAR_FORMAT = 0                # POSIX.1-1988 (ustar) format
 103GNU_FORMAT = 1                  # GNU tar format
 104PAX_FORMAT = 2                  # POSIX.1-2001 (pax) format
 105DEFAULT_FORMAT = GNU_FORMAT
 106
 107#---------------------------------------------------------
 108# tarfile constants
 109#---------------------------------------------------------
 110# File types that tarfile supports:
 111SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
 112                   SYMTYPE, DIRTYPE, FIFOTYPE,
 113                   CONTTYPE, CHRTYPE, BLKTYPE,
 114                   GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
 115                   GNUTYPE_SPARSE)
 116
 117# File types that will be treated as a regular file.
 118REGULAR_TYPES = (REGTYPE, AREGTYPE,
 119                 CONTTYPE, GNUTYPE_SPARSE)
 120
 121# File types that are part of the GNU tar format.
 122GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
 123             GNUTYPE_SPARSE)
 124
 125# Fields from a pax header that override a TarInfo attribute.
 126PAX_FIELDS = ("path", "linkpath", "size", "mtime",
 127              "uid", "gid", "uname", "gname")
 128
 129# Fields in a pax header that are numbers, all other fields
 130# are treated as strings.
 131PAX_NUMBER_FIELDS = {
 132    "atime": float,
 133    "ctime": float,
 134    "mtime": float,
 135    "uid": int,
 136    "gid": int,
 137    "size": int
 138}
 139
 140#---------------------------------------------------------
 141# Bits used in the mode field, values in octal.
 142#---------------------------------------------------------
 143S_IFLNK = 0120000        # symbolic link
 144S_IFREG = 0100000        # regular file
 145S_IFBLK = 0060000        # block device
 146S_IFDIR = 0040000        # directory
 147S_IFCHR = 0020000        # character device
 148S_IFIFO = 0010000        # fifo
 149
 150TSUID   = 04000          # set UID on execution
 151TSGID   = 02000          # set GID on execution
 152TSVTX   = 01000          # reserved
 153
 154TUREAD  = 0400           # read by owner
 155TUWRITE = 0200           # write by owner
 156TUEXEC  = 0100           # execute/search by owner
 157TGREAD  = 0040           # read by group
 158TGWRITE = 0020           # write by group
 159TGEXEC  = 0010           # execute/search by group
 160TOREAD  = 0004           # read by other
 161TOWRITE = 0002           # write by other
 162TOEXEC  = 0001           # execute/search by other
 163
 164#---------------------------------------------------------
 165# initialization
 166#---------------------------------------------------------
 167ENCODING = sys.getfilesystemencoding()
 168if ENCODING is None:
 169    ENCODING = sys.getdefaultencoding()
 170
 171#---------------------------------------------------------
 172# Some useful functions
 173#---------------------------------------------------------
 174
 175def stn(s, length):
 176    """Convert a python string to a null-terminated string buffer.
 177    """
 178    return s[:length] + (length - len(s)) * NUL
 179
 180def nts(s):
 181    """Convert a null-terminated string field to a python string.
 182    """
 183    # Use the string up to the first null char.
 184    p = s.find("\0")
 185    if p == -1:
 186        return s
 187    return s[:p]
 188
 189def nti(s):
 190    """Convert a number field to a python number.
 191    """
 192    # There are two possible encodings for a number field, see
 193    # itn() below.
 194    if s[0] != chr(0200):
 195        try:
 196            n = int(nts(s) or "0", 8)
 197        except ValueError:
 198            raise HeaderError("invalid header")
 199    else:
 200        n = 0L
 201        for i in xrange(len(s) - 1):
 202            n <<= 8
 203            n += ord(s[i + 1])
 204    return n
 205
 206def itn(n, digits=8, format=DEFAULT_FORMAT):
 207    """Convert a python number to a number field.
 208    """
 209    # POSIX 1003.1-1988 requires numbers to be encoded as a string of
 210    # octal digits followed by a null-byte, this allows values up to
 211    # (8**(digits-1))-1. GNU tar allows storing numbers greater than
 212    # that if necessary. A leading 0200 byte indicates this particular
 213    # encoding, the following digits-1 bytes are a big-endian
 214    # representation. This allows values up to (256**(digits-1))-1.
 215    if 0 <= n < 8 ** (digits - 1):
 216        s = "%0*o" % (digits - 1, n) + NUL
 217    else:
 218        if format != GNU_FORMAT or n >= 256 ** (digits - 1):
 219            raise ValueError("overflow in number field")
 220
 221        if n < 0:
 222            # XXX We mimic GNU tar's behaviour with negative numbers,
 223            # this could raise OverflowError.
 224            n = struct.unpack("L", struct.pack("l", n))[0]
 225
 226        s = ""
 227        for i in xrange(digits - 1):
 228            s = chr(n & 0377) + s
 229            n >>= 8
 230        s = chr(0200) + s
 231    return s
 232
 233def uts(s, encoding, errors):
 234    """Convert a unicode object to a string.
 235    """
 236    if errors == "utf-8":
 237        # An extra error handler similar to the -o invalid=UTF-8 option
 238        # in POSIX.1-2001. Replace untranslatable characters with their
 239        # UTF-8 representation.
 240        try:
 241            return s.encode(encoding, "strict")
 242        except UnicodeEncodeError:
 243            x = []
 244            for c in s:
 245                try:
 246                    x.append(c.encode(encoding, "strict"))
 247                except UnicodeEncodeError:
 248                    x.append(c.encode("utf8"))
 249            return "".join(x)
 250    else:
 251        return s.encode(encoding, errors)
 252
 253def calc_chksums(buf):
 254    """Calculate the checksum for a member's header by summing up all
 255       characters except for the chksum field which is treated as if
 256       it was filled with spaces. According to the GNU tar sources,
 257       some tars (Sun and NeXT) calculate chksum with signed char,
 258       which will be different if there are chars in the buffer with
 259       the high bit set. So we calculate two checksums, unsigned and
 260       signed.
 261    """
 262    unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
 263    signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
 264    return unsigned_chksum, signed_chksum
 265
 266def copyfileobj(src, dst, length=None):
 267    """Copy length bytes from fileobj src to fileobj dst.
 268       If length is None, copy the entire content.
 269    """
 270    if length == 0:
 271        return
 272    if length is None:
 273        shutil.copyfileobj(src, dst)
 274        return
 275
 276    BUFSIZE = 16 * 1024
 277    blocks, remainder = divmod(length, BUFSIZE)
 278    for b in xrange(blocks):
 279        buf = src.read(BUFSIZE)
 280        if len(buf) < BUFSIZE:
 281            raise IOError("end of file reached")
 282        dst.write(buf)
 283
 284    if remainder != 0:
 285        buf = src.read(remainder)
 286        if len(buf) < remainder:
 287            raise IOError("end of file reached")
 288        dst.write(buf)
 289    return
 290
 291filemode_table = (
 292    ((S_IFLNK,      "l"),
 293     (S_IFREG,      "-"),
 294     (S_IFBLK,      "b"),
 295     (S_IFDIR,      "d"),
 296     (S_IFCHR,      "c"),
 297     (S_IFIFO,      "p")),
 298
 299    ((TUREAD,       "r"),),
 300    ((TUWRITE,      "w"),),
 301    ((TUEXEC|TSUID, "s"),
 302     (TSUID,        "S"),
 303     (TUEXEC,       "x")),
 304
 305    ((TGREAD,       "r"),),
 306    ((TGWRITE,      "w"),),
 307    ((TGEXEC|TSGID, "s"),
 308     (TSGID,        "S"),
 309     (TGEXEC,       "x")),
 310
 311    ((TOREAD,       "r"),),
 312    ((TOWRITE,      "w"),),
 313    ((TOEXEC|TSVTX, "t"),
 314     (TSVTX,        "T"),
 315     (TOEXEC,       "x"))
 316)
 317
 318def filemode(mode):
 319    """Convert a file's mode to a string of the form
 320       -rwxrwxrwx.
 321       Used by TarFile.list()
 322    """
 323    perm = []
 324    for table in filemode_table:
 325        for bit, char in table:
 326            if mode & bit == bit:
 327                perm.append(char)
 328                break
 329        else:
 330            perm.append("-")
 331    return "".join(perm)
 332
 333if os.sep != "/":
 334    normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
 335else:
 336    normpath = os.path.normpath
 337
 338class TarError(Exception):
 339    """Base exception."""
 340    pass
 341class ExtractError(TarError):
 342    """General exception for extract errors."""
 343    pass
 344class ReadError(TarError):
 345    """Exception for unreadble tar archives."""
 346    pass
 347class CompressionError(TarError):
 348    """Exception for unavailable compression methods."""
 349    pass
 350class StreamError(TarError):
 351    """Exception for unsupported operations on stream-like TarFiles."""
 352    pass
 353class HeaderError(TarError):
 354    """Exception for invalid headers."""
 355    pass
 356
 357#---------------------------
 358# internal stream interface
 359#---------------------------
 360class _LowLevelFile:
 361    """Low-level file object. Supports reading and writing.
 362       It is used instead of a regular file object for streaming
 363       access.
 364    """
 365
 366    def __init__(self, name, mode):
 367        mode = {
 368            "r": os.O_RDONLY,
 369            "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
 370        }[mode]
 371        if hasattr(os, "O_BINARY"):
 372            mode |= os.O_BINARY
 373        self.fd = os.open(name, mode)
 374
 375    def close(self):
 376        os.close(self.fd)
 377
 378    def read(self, size):
 379        return os.read(self.fd, size)
 380
 381    def write(self, s):
 382        os.write(self.fd, s)
 383
 384class _Stream:
 385    """Class that serves as an adapter between TarFile and
 386       a stream-like object.  The stream-like object only
 387       needs to have a read() or write() method and is accessed
 388       blockwise.  Use of gzip or bzip2 compression is possible.
 389       A stream-like object could be for example: sys.stdin,
 390       sys.stdout, a socket, a tape device etc.
 391
 392       _Stream is intended to be used only internally.
 393    """
 394
 395    def __init__(self, name, mode, comptype, fileobj, bufsize):
 396        """Construct a _Stream object.
 397        """
 398        self._extfileobj = True
 399        if fileobj is None:
 400            fileobj = _LowLevelFile(name, mode)
 401            self._extfileobj = False
 402
 403        if comptype == '*':
 404            # Enable transparent compression detection for the
 405            # stream interface
 406            fileobj = _StreamProxy(fileobj)
 407            comptype = fileobj.getcomptype()
 408
 409        self.name     = name or ""
 410        self.mode     = mode
 411        self.comptype = comptype
 412        self.fileobj  = fileobj
 413        self.bufsize  = bufsize
 414        self.buf      = ""
 415        self.pos      = 0L
 416        self.closed   = False
 417
 418        if comptype == "gz":
 419            try:
 420                import zlib
 421            except ImportError:
 422                raise CompressionError("zlib module is not available")
 423            self.zlib = zlib
 424            self.crc = zlib.crc32("") & 0xffffffffL
 425            if mode == "r":
 426                self._init_read_gz()
 427            else:
 428                self._init_write_gz()
 429
 430        if comptype == "bz2":
 431            try:
 432                import bz2
 433            except ImportError:
 434                raise CompressionError("bz2 module is not available")
 435            if mode == "r":
 436                self.dbuf = ""
 437                self.cmp = bz2.BZ2Decompressor()
 438            else:
 439                self.cmp = bz2.BZ2Compressor()
 440
 441    def __del__(self):
 442        if hasattr(self, "closed") and not self.closed:
 443            self.close()
 444
 445    def _init_write_gz(self):
 446        """Initialize for writing with gzip compression.
 447        """
 448        self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
 449                                            -self.zlib.MAX_WBITS,
 450                                            self.zlib.DEF_MEM_LEVEL,
 451                                            0)
 452        timestamp = struct.pack("<L", long(time.time()))
 453        self.__write("\037\213\010\010%s\002\377" % timestamp)
 454        if self.name.endswith(".gz"):
 455            self.name = self.name[:-3]
 456        self.__write(self.name + NUL)
 457
 458    def write(self, s):
 459        """Write string s to the stream.
 460        """
 461        if self.comptype == "gz":
 462            self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL
 463        self.pos += len(s)
 464        if self.comptype != "tar":
 465            s = self.cmp.compress(s)
 466        self.__write(s)
 467
 468    def __write(self, s):
 469        """Write string s to the stream if a whole new block
 470           is ready to be written.
 471        """
 472        self.buf += s
 473        while len(self.buf) > self.bufsize:
 474            self.fileobj.write(self.buf[:self.bufsize])
 475            self.buf = self.buf[self.bufsize:]
 476
 477    def close(self):
 478        """Close the _Stream object. No operation should be
 479           done on it afterwards.
 480        """
 481        if self.closed:
 482            return
 483
 484        if self.mode == "w" and self.comptype != "tar":
 485            self.buf += self.cmp.flush()
 486
 487        if self.mode == "w" and self.buf:
 488            self.fileobj.write(self.buf)
 489            self.buf = ""
 490            if self.comptype == "gz":
 491                # The native zlib crc is an unsigned 32-bit integer, but
 492                # the Python wrapper implicitly casts that to a signed C
 493                # long.  So, on a 32-bit box self.crc may "look negative",
 494                # while the same crc on a 64-bit box may "look positive".
 495                # To avoid irksome warnings from the `struct` module, force
 496                # it to look positive on all boxes.
 497                self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
 498                self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
 499
 500        if not self._extfileobj:
 501            self.fileobj.close()
 502
 503        self.closed = True
 504
 505    def _init_read_gz(self):
 506        """Initialize for reading a gzip compressed fileobj.
 507        """
 508        self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
 509        self.dbuf = ""
 510
 511        # taken from gzip.GzipFile with some alterations
 512        if self.__read(2) != "\037\213":
 513            raise ReadError("not a gzip file")
 514        if self.__read(1) != "\010":
 515            raise CompressionError("unsupported compression method")
 516
 517        flag = ord(self.__read(1))
 518        self.__read(6)
 519
 520        if flag & 4:
 521            xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
 522            self.read(xlen)
 523        if flag & 8:
 524            while True:
 525                s = self.__read(1)
 526                if not s or s == NUL:
 527                    break
 528        if flag & 16:
 529            while True:
 530                s = self.__read(1)
 531                if not s or s == NUL:
 532                    break
 533        if flag & 2:
 534            self.__read(2)
 535
 536    def tell(self):
 537        """Return the stream's file pointer position.
 538        """
 539        return self.pos
 540
 541    def seek(self, pos=0):
 542        """Set the stream's file pointer to pos. Negative seeking
 543           is forbidden.
 544        """
 545        if pos - self.pos >= 0:
 546            blocks, remainder = divmod(pos - self.pos, self.bufsize)
 547            for i in xrange(blocks):
 548                self.read(self.bufsize)
 549            self.read(remainder)
 550        else:
 551            raise StreamError("seeking backwards is not allowed")
 552        return self.pos
 553
 554    def read(self, size=None):
 555        """Return the next size number of bytes from the stream.
 556           If size is not defined, return all bytes of the stream
 557           up to EOF.
 558        """
 559        if size is None:
 560            t = []
 561            while True:
 562                buf = self._read(self.bufsize)
 563                if not buf:
 564                    break
 565                t.append(buf)
 566            buf = "".join(t)
 567        else:
 568            buf = self._read(size)
 569        self.pos += len(buf)
 570        return buf
 571
 572    def _read(self, size):
 573        """Return size bytes from the stream.
 574        """
 575        if self.comptype == "tar":
 576            return self.__read(size)
 577
 578        c = len(self.dbuf)
 579        t = [self.dbuf]
 580        while c < size:
 581            buf = self.__read(self.bufsize)
 582            if not buf:
 583                break
 584            try:
 585                buf = self.cmp.decompress(buf)
 586            except IOError:
 587                raise ReadError("invalid compressed data")
 588            t.append(buf)
 589            c += len(buf)
 590        t = "".join(t)
 591        self.dbuf = t[size:]
 592        return t[:size]
 593
 594    def __read(self, size):
 595        """Return size bytes from stream. If internal buffer is empty,
 596           read another block from the stream.
 597        """
 598        c = len(self.buf)
 599        t = [self.buf]
 600        while c < size:
 601            buf = self.fileobj.read(self.bufsize)
 602            if not buf:
 603                break
 604            t.append(buf)
 605            c += len(buf)
 606        t = "".join(t)
 607        self.buf = t[size:]
 608        return t[:size]
 609# class _Stream
 610
 611class _StreamProxy(object):
 612    """Small proxy class that enables transparent compression
 613       detection for the Stream interface (mode 'r|*').
 614    """
 615
 616    def __init__(self, fileobj):
 617        self.fileobj = fileobj
 618        self.buf = self.fileobj.read(BLOCKSIZE)
 619
 620    def read(self, size):
 621        self.read = self.fileobj.read
 622        return self.buf
 623
 624    def getcomptype(self):
 625        if self.buf.startswith("\037\213\010"):
 626            return "gz"
 627        if self.buf.startswith("BZh91"):
 628            return "bz2"
 629        return "tar"
 630
 631    def close(self):
 632        self.fileobj.close()
 633# class StreamProxy
 634
 635class _BZ2Proxy(object):
 636    """Small proxy class that enables external file object
 637       support for "r:bz2" and "w:bz2" modes. This is actually
 638       a workaround for a limitation in bz2 module's BZ2File
 639       class which (unlike gzip.GzipFile) has no support for
 640       a file object argument.
 641    """
 642
 643    blocksize = 16 * 1024
 644
 645    def __init__(self, fileobj, mode):
 646        self.fileobj = fileobj
 647        self.mode = mode
 648        self.name = getattr(self.fileobj, "name", None)
 649        self.init()
 650
 651    def init(self):
 652        import bz2
 653        self.pos = 0
 654        if self.mode == "r":
 655            self.bz2obj = bz2.BZ2Decompressor()
 656            self.fileobj.seek(0)
 657            self.buf = ""
 658        else:
 659            self.bz2obj = bz2.BZ2Compressor()
 660
 661    def read(self, size):
 662        b = [self.buf]
 663        x = len(self.buf)
 664        while x < size:
 665            raw = self.fileobj.read(self.blocksize)
 666            if not raw:
 667                break
 668            try:
 669                data = self.bz2obj.decompress(raw)
 670            except EOFError:
 671                break
 672            b.append(data)
 673            x += len(data)
 674        self.buf = "".join(b)
 675
 676        buf = self.buf[:size]
 677        self.buf = self.buf[size:]
 678        self.pos += len(buf)
 679        return buf
 680
 681    def seek(self, pos):
 682        if pos < self.pos:
 683            self.init()
 684        self.read(pos - self.pos)
 685
 686    def tell(self):
 687        return self.pos
 688
 689    def write(self, data):
 690        self.pos += len(data)
 691        raw = self.bz2obj.compress(data)
 692        self.fileobj.write(raw)
 693
 694    def close(self):
 695        if self.mode == "w":
 696            raw = self.bz2obj.flush()
 697            self.fileobj.write(raw)
 698# class _BZ2Proxy
 699
 700#------------------------
 701# Extraction file object
 702#------------------------
 703class _FileInFile(object):
 704    """A thin wrapper around an existing file object that
 705       provides a part of its data as an individual file
 706       object.
 707    """
 708
 709    def __init__(self, fileobj, offset, size, sparse=None):
 710        self.fileobj = fileobj
 711        self.offset = offset
 712        self.size = size
 713        self.sparse = sparse
 714        self.position = 0
 715
 716    def tell(self):
 717        """Return the current file position.
 718        """
 719        return self.position
 720
 721    def seek(self, position):
 722        """Seek to a position in the file.
 723        """
 724        self.position = position
 725
 726    def read(self, size=None):
 727        """Read data from the file.
 728        """
 729        if size is None:
 730            size = self.size - self.position
 731        else:
 732            size = min(size, self.size - self.position)
 733
 734        if self.sparse is None:
 735            return self.readnormal(size)
 736        else:
 737            return self.readsparse(size)
 738
 739    def readnormal(self, size):
 740        """Read operation for regular files.
 741        """
 742        self.fileobj.seek(self.offset + self.position)
 743        self.position += size
 744        return self.fileobj.read(size)
 745
 746    def readsparse(self, size):
 747        """Read operation for sparse files.
 748        """
 749        data = []
 750        while size > 0:
 751            buf = self.readsparsesection(size)
 752            if not buf:
 753                break
 754            size -= len(buf)
 755            data.append(buf)
 756        return "".join(data)
 757
 758    def readsparsesection(self, size):
 759        """Read a single section of a sparse file.
 760        """
 761        section = self.sparse.find(self.position)
 762
 763        if section is None:
 764            return ""
 765
 766        size = min(size, section.offset + section.size - self.position)
 767
 768        if isinstance(section, _data):
 769            realpos = section.realpos + self.position - section.offset
 770            self.fileobj.seek(self.offset + realpos)
 771            self.position += size
 772            return self.fileobj.read(size)
 773        else:
 774            self.position += size
 775            return NUL * size
 776#class _FileInFile
 777
 778
 779class ExFileObject(object):
 780    """File-like object for reading an archive member.
 781       Is returned by TarFile.extractfile().
 782    """
 783    blocksize = 1024
 784
 785    def __init__(self, tarfile, tarinfo):
 786        self.fileobj = _FileInFile(tarfile.fileobj,
 787                                   tarinfo.offset_data,
 788                                   tarinfo.size,
 789                                   getattr(tarinfo, "sparse", None))
 790        self.name = tarinfo.name
 791        self.mode = "r"
 792        self.closed = False
 793        self.size = tarinfo.size
 794
 795        self.position = 0
 796        self.buffer = ""
 797
 798    def read(self, size=None):
 799        """Read at most size bytes from the file. If size is not
 800           present or None, read all data until EOF is reached.
 801        """
 802        if self.closed:
 803            raise ValueError("I/O operation on closed file")
 804
 805        buf = ""
 806        if self.buffer:
 807            if size is None:
 808                buf = self.buffer
 809                self.buffer = ""
 810            else:
 811                buf = self.buffer[:size]
 812                self.buffer = self.buffer[size:]
 813
 814        if size is None:
 815            buf += self.fileobj.read()
 816        else:
 817            buf += self.fileobj.read(size - len(buf))
 818
 819        self.position += len(buf)
 820        return buf
 821
 822    def readline(self, size=-1):
 823        """Read one entire line from the file. If size is present
 824           and non-negative, return a string with at most that
 825           size, which may be an incomplete line.
 826        """
 827        if self.closed:
 828            raise ValueError("I/O operation on closed file")
 829
 830        if "\n" in self.buffer:
 831            pos = self.buffer.find("\n") + 1
 832        else:
 833            buffers = [self.buffer]
 834            while True:
 835                buf = self.fileobj.read(self.blocksize)
 836                buffers.append(buf)
 837                if not buf or "\n" in buf:
 838                    self.buffer = "".join(buffers)
 839                    pos = self.buffer.find("\n") + 1
 840                    if pos == 0:
 841                        # no newline found.
 842                        pos = len(self.buffer)
 843                    break
 844
 845        if size != -1:
 846            pos = min(size, pos)
 847
 848        buf = self.buffer[:pos]
 849        self.buffer = self.buffer[pos:]
 850        self.position += len(buf)
 851        return buf
 852
 853    def readlines(self):
 854        """Return a list with all remaining lines.
 855        """
 856        result = []
 857        while True:
 858            line = self.readline()
 859            if not line: break
 860            result.append(line)
 861        return result
 862
 863    def tell(self):
 864        """Return the current file position.
 865        """
 866        if self.closed:
 867            raise ValueError("I/O operation on closed file")
 868
 869        return self.position
 870
 871    def seek(self, pos, whence=os.SEEK_SET):
 872        """Seek to a position in the file.
 873        """
 874        if self.closed:
 875            raise ValueError("I/O operation on closed file")
 876
 877        if whence == os.SEEK_SET:
 878            self.position = min(max(pos, 0), self.size)
 879        elif whence == os.SEEK_CUR:
 880            if pos < 0:
 881                self.position = max(self.position + pos, 0)
 882            else:
 883                self.position = min(self.position + pos, self.size)
 884        elif whence == os.SEEK_END:
 885            self.position = max(min(self.size + pos, self.size), 0)
 886        else:
 887            raise ValueError("Invalid argument")
 888
 889        self.buffer = ""
 890        self.fileobj.seek(self.position)
 891
 892    def close(self):
 893        """Close the file object.
 894        """
 895        self.closed = True
 896
 897    def __iter__(self):
 898        """Get an iterator over the file's lines.
 899        """
 900        while True:
 901            line = self.readline()
 902            if not line:
 903                break
 904            yield line
 905#class ExFileObject
 906
 907#------------------
 908# Exported Classes
 909#------------------
 910class TarInfo(object):
 911    """Informational class which holds the details about an
 912       archive member given by a tar header block.
 913       TarInfo objects are returned by TarFile.getmember(),
 914       TarFile.getmembers() and TarFile.gettarinfo() and are
 915       usually created internally.
 916    """
 917
 918    def __init__(self, name=""):
 919        """Construct a TarInfo object. name is the optional name
 920           of the member.
 921        """
 922        self.name = name        # member name
 923        self.mode = 0644        # file permissions
 924        self.uid = 0            # user id
 925        self.gid = 0            # group id
 926        self.size = 0           # file size
 927        self.mtime = 0          # modification time
 928        self.chksum = 0         # header checksum
 929        self.type = REGTYPE     # member type
 930        self.linkname = ""      # link name
 931        self.uname = "root"     # user name
 932        self.gname = "root"     # group name
 933        self.devmajor = 0       # device major number
 934        self.devminor = 0       # device minor number
 935
 936        self.offset = 0         # the tar header starts here
 937        self.offset_data = 0    # the file's data starts here
 938
 939        self.pax_headers = {}   # pax header information
 940
 941    # In pax headers the "name" and "linkname" field are called
 942    # "path" and "linkpath".
 943    def _getpath(self):
 944        return self.name
 945    def _setpath(self, name):
 946        self.name = name
 947    path = property(_getpath, _setpath)
 948
 949    def _getlinkpath(self):
 950        return self.linkname
 951    def _setlinkpath(self, linkname):
 952        self.linkname = linkname
 953    linkpath = property(_getlinkpath, _setlinkpath)
 954
 955    def __repr__(self):
 956        return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
 957
 958    def get_info(self, encoding, errors):
 959        """Return the TarInfo's attributes as a dictionary.
 960        """
 961        info = {
 962            "name":     normpath(self.name),
 963            "mode":     self.mode & 07777,
 964            "uid":      self.uid,
 965            "gid":      self.gid,
 966            "size":     self.size,
 967            "mtime":    self.mtime,
 968            "chksum":   self.chksum,
 969            "type":     self.type,
 970            "linkname": normpath(self.linkname) if self.linkname else "",
 971            "uname":    self.uname,
 972            "gname":    self.gname,
 973            "devmajor": self.devmajor,
 974            "devminor": self.devminor
 975        }
 976
 977        if info["type"] == DIRTYPE and not info["name"].endswith("/"):
 978            info["name"] += "/"
 979
 980        for key in ("name", "linkname", "uname", "gname"):
 981            if type(info[key]) is unicode:
 982                info[key] = info[key].encode(encoding, errors)
 983
 984        return info
 985
 986    def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
 987        """Return a tar header as a string of 512 byte blocks.
 988        """
 989        info = self.get_info(encoding, errors)
 990
 991        if format == USTAR_FORMAT:
 992            return self.create_ustar_header(info)
 993        elif format == GNU_FORMAT:
 994            return self.create_gnu_header(info)
 995        elif format == PAX_FORMAT:
 996            return self.create_pax_header(info, encoding, errors)
 997        else:
 998            raise ValueError("invalid format")
 999
1000    def create_ustar_header(self, info):
1001        """Return the object as a ustar header block.
1002        """
1003        info["magic"] = POSIX_MAGIC
1004
1005        if len(info["linkname"]) > LENGTH_LINK:
1006            raise ValueError("linkname is too long")
1007
1008        if len(info["name"]) > LENGTH_NAME:
1009            info["prefix"], info["name"] = self._posix_split_name(info["name"])
1010
1011        return self._create_header(info, USTAR_FORMAT)
1012
1013    def create_gnu_header(self, info):
1014        """Return the object as a GNU header block sequence.
1015        """
1016        info["magic"] = GNU_MAGIC
1017
1018        buf = ""
1019        if len(info["linkname"]) > LENGTH_LINK:
1020            buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
1021
1022        if len(info["name"]) > LENGTH_NAME:
1023            buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME)
1024
1025        return buf + self._create_header(info, GNU_FORMAT)
1026
1027    def create_pax_header(self, info, encoding, errors):
1028        """Return the object as a ustar header block. If it cannot be
1029           represented this way, prepend a pax extended header sequence
1030           with supplement information.
1031        """
1032        info["magic"] = POSIX_MAGIC
1033        pax_headers = self.pax_headers.copy()
1034
1035        # Test string fields for values that exceed the field length or cannot
1036        # be represented in ASCII encoding.
1037        for name, hname, length in (
1038                ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1039                ("uname", "uname", 32), ("gname", "gname", 32)):
1040
1041            if hname in pax_headers:
1042                # The pax header has priority.
1043                continue
1044
1045            val = info[name].decode(encoding, errors)
1046
1047            # Try to encode the string as ASCII.
1048            try:
1049                val.encode("ascii")
1050            except UnicodeEncodeError:
1051                pax_headers[hname] = val
1052                continue
1053
1054            if len(info[name]) > length:
1055                pax_headers[hname] = val
1056
1057        # Test number fields for values that exceed the field limit or values
1058        # that like to be stored as float.
1059        for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
1060            if name in pax_headers:
1061                # The pax header has priority. Avoid overflow.
1062                info[name] = 0
1063                continue
1064
1065            val = info[name]
1066            if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
1067                pax_headers[name] = unicode(val)
1068                info[name] = 0
1069
1070        # Create a pax extended header if necessary.
1071        if pax_headers:
1072            buf = self._create_pax_generic_header(pax_headers)
1073        else:
1074            buf = ""
1075
1076        return buf + self._create_header(info, USTAR_FORMAT)
1077
1078    @classmethod
1079    def create_pax_global_header(cls, pax_headers):
1080        """Return the object as a pax global header block sequence.
1081        """
1082        return cls._create_pax_generic_header(pax_headers, type=XGLTYPE)
1083
1084    def _posix_split_name(self, name):
1085        """Split a name longer than 100 chars into a prefix
1086           and a name part.
1087        """
1088        prefix = name[:LENGTH_PREFIX + 1]
1089        while prefix and prefix[-1] != "/":
1090            prefix = prefix[:-1]
1091
1092        name = name[len(prefix):]
1093        prefix = prefix[:-1]
1094
1095        if not prefix or len(name) > LENGTH_NAME:
1096            raise ValueError("name is too long")
1097        return prefix, name
1098
1099    @staticmethod
1100    def _create_header(info, format):
1101        """Return a header block. info is a dictionary with file
1102           information, format must be one of the *_FORMAT constants.
1103        """
1104        parts = [
1105            stn(info.get("name", ""), 100),
1106            itn(info.get("mode", 0) & 07777, 8, format),
1107            itn(info.get("uid", 0), 8, format),
1108            itn(info.get("gid", 0), 8, format),
1109            itn(info.get("size", 0), 12, format),
1110            itn(info.get("mtime", 0), 12, format),
1111            "        ", # checksum field
1112            info.get("type", REGTYPE),
1113            stn(info.get("linkname", ""), 100),
1114            stn(info.get("magic", POSIX_MAGIC), 8),
1115            stn(info.get("uname", "root"), 32),
1116            stn(info.get("gname", "root"), 32),
1117            itn(info.get("devmajor", 0), 8, format),
1118            itn(info.get("devminor", 0), 8, format),
1119            stn(info.get("prefix", ""), 155)
1120        ]
1121
1122        buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
1123        chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1124        buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
1125        return buf
1126
1127    @staticmethod
1128    def _create_payload(payload):
1129        """Return the string payload filled with zero bytes
1130           up to the next 512 byte border.
1131        """
1132        blocks, remainder = divmod(len(payload), BLOCKSIZE)
1133        if remainder > 0:
1134            payload += (BLOCKSIZE - remainder) * NUL
1135        return payload
1136
1137    @classmethod
1138    def _create_gnu_long_header(cls, name, type):
1139        """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1140           for name.
1141        """
1142        name += NUL
1143
1144        info = {}
1145        info["name"] = "././@LongLink"
1146        info["type"] = type
1147        info["size"] = len(name)
1148        info["magic"] = GNU_MAGIC
1149
1150        # create extended header + name blocks.
1151        return cls._create_header(info, USTAR_FORMAT) + \
1152                cls._create_payload(name)
1153
1154    @classmethod
1155    def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE):
1156        """Return a POSIX.1-2001 extended or global header sequence
1157           that contains a list of keyword, value pairs. The values
1158           must be unicode objects.
1159        """
1160        records = []
1161        for keyword, value in pax_headers.iteritems():
1162            keyword = keyword.encode("utf8")
1163            value = value.encode("utf8")
1164            l = len(keyword) + len(value) + 3   # ' ' + '=' + '\n'
1165            n = p = 0
1166            while True:
1167                n = l + len(str(p))
1168                if n == p:
1169                    break
1170                p = n
1171            records.append("%d %s=%s\n" % (p, keyword, value))
1172        records = "".join(records)
1173
1174        # We use a hardcoded "././@PaxHeader" name like star does
1175        # instead of the one that POSIX recommends.
1176        info = {}
1177        info["name"] = "././@PaxHeader"
1178        info["type"] = type
1179        info["size"] = len(records)
1180        info["magic"] = POSIX_MAGIC
1181
1182        # Create pax header + record blocks.
1183        return cls._create_header(info, USTAR_FORMAT) + \
1184                cls._create_payload(records)
1185
1186    @classmethod
1187    def frombuf(cls, buf):
1188        """Construct a TarInfo object from a 512 byte string buffer.
1189        """
1190        if len(buf) != BLOCKSIZE:
1191            raise HeaderError("truncated header")
1192        if buf.count(NUL) == BLOCKSIZE:
1193            raise HeaderError("empty header")
1194
1195        chksum = nti(buf[148:156])
1196        if chksum not in calc_chksums(buf):
1197            raise HeaderError("bad checksum")
1198
1199        obj = cls()
1200        obj.buf = buf
1201        obj.name = nts(buf[0:100])
1202        obj.mode = nti(buf[100:108])
1203        obj.uid = nti(buf[108:116])
1204        obj.gid = nti(buf[116:124])
1205        obj.size = nti(buf[124:136])
1206        obj.mtime = nti(buf[136:148])
1207        obj.chksum = chksum
1208        obj.type = buf[156:157]
1209        obj.linkname = nts(buf[157:257])
1210        obj.uname = nts(buf[265:297])
1211        obj.gname = nts(buf[297:329])
1212        obj.devmajor = nti(buf[329:337])
1213        obj.devminor = nti(buf[337:345])
1214        prefix = nts(buf[345:500])
1215
1216        # Old V7 tar format represents a directory as a regular
1217        # file with a trailing slash.
1218        if obj.type == AREGTYPE and obj.name.endswith("/"):
1219            obj.type = DIRTYPE
1220
1221        # Remove redundant slashes from directories.
1222        if obj.isdir():
1223            obj.name = obj.name.rstrip("/")
1224
1225        # Reconstruct a ustar longname.
1226        if prefix and obj.type not in GNU_TYPES:
1227            obj.name = prefix + "/" + obj.name
1228        return obj
1229
1230    @classmethod
1231    def fromtarfile(cls, tarfile):
1232        """Return the next TarInfo object from TarFile object
1233           tarfile.
1234        """
1235        buf = tarfile.fileobj.read(BLOCKSIZE)
1236        if not buf:
1237            return
1238        obj = cls.frombuf(buf)
1239        obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1240        return obj._proc_member(tarfile)
1241
1242    #--------------------------------------------------------------------------
1243    # The following are methods that are called depending on the type of a
1244    # member. The entry point is _proc_member() which can be overridden in a
1245    # subclass to add custom _proc_*() methods. A _proc_*() method MUST
1246    # implement the following
1247    # operations:
1248    # 1. Set self.offset_data to the position where the data blocks begin,
1249    #    if there is data that follows.
1250    # 2. Set tarfile.offset to the position where the next member's header will
1251    #    begin.
1252    # 3. Return self or another valid TarInfo object.
1253    def _proc_member(self, tarfile):
1254        """Choose the right processing method depending on
1255           the type and call it.
1256        """
1257        if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1258            return self._proc_gnulong(tarfile)
1259        elif self.type == GNUTYPE_SPARSE:
1260            return self._proc_sparse(tarfile)
1261        elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1262            return self._proc_pax(tarfile)
1263        else:
1264            return self._proc_builtin(tarfile)
1265
1266    def _proc_builtin(self, tarfile):
1267        """Process a builtin type or an unknown type which
1268           will be treated as a regular file.
1269        """
1270        self.offset_data = tarfile.fileobj.tell()
1271        offset = self.offset_data
1272        if self.isreg() or self.type not in SUPPORTED_TYPES:
1273            # Skip the following data blocks.
1274            offset += self._block(self.size)
1275        tarfile.offset = offset
1276
1277        # Patch the TarInfo object with saved global
1278        # header information.
1279        self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
1280
1281        return self
1282
1283    def _proc_gnulong(self, tarfile):
1284        """Process the blocks that hold a GNU longname
1285           or longlink member.
1286        """
1287        buf = tarfile.fileobj.read(self._block(self.size))
1288
1289        # Fetch the next header and process it.
1290        next = self.fromtarfile(tarfile)
1291        if next is None:
1292            raise HeaderError("missing subsequent header")
1293
1294        # Patch the TarInfo object from the next header with
1295        # the longname information.
1296        next.offset = self.offset
1297        if self.type == GNUTYPE_LONGNAME:
1298            next.name = nts(buf)
1299        elif self.type == GNUTYPE_LONGLINK:
1300            next.linkname = nts(buf)
1301
1302        return next
1303
1304    def _proc_sparse(self, tarfile):
1305        """Process a GNU sparse header plus extra headers.
1306        """
1307        buf = self.buf
1308        sp = _ringbuffer()
1309        pos = 386
1310        lastpos = 0L
1311        realpos = 0L
1312        # There are 4 possible sparse structs in the
1313        # first header.
1314        for i in xrange(4):
1315            try:
1316                offset = nti(buf[pos:pos + 12])
1317                numbytes = nti(buf[pos + 12:pos + 24])
1318            except ValueError:
1319                break
1320            if offset > lastpos:
1321                sp.append(_hole(lastpos, offset - lastpos))
1322            sp.append(_data(offset, numbytes, realpos))
1323            realpos += numbytes
1324            lastpos = offset + numbytes
1325            pos += 24
1326
1327        isextended = ord(buf[482])
1328        origsize = nti(buf[483:495])
1329
1330        # If the isextended flag is given,
1331        # there are extra headers to process.
1332        while isextended == 1:
1333            buf = tarfile.fileobj.read(BLOCKSIZE)
1334            pos = 0
1335            for i in xrange(21):
1336                try:
1337                    offset = nti(buf[pos:pos + 12])
1338                    numbytes = nti(buf[pos + 12:pos + 24])
1339                except ValueError:
1340                    break
1341                if offset > lastpos:
1342                    sp.append(_hole(lastpos, offset - lastpos))
1343                sp.append(_data(offset, numbytes, realpos))
1344                realpos += numbytes
1345                lastpos = offset + numbytes
1346                pos += 24
1347            isextended = ord(buf[504])
1348
1349        if lastpos < origsize:
1350            sp.append(_hole(lastpos, origsize - lastpos))
1351
1352        self.sparse = sp
1353
1354        self.offset_data = tarfile.fileobj.tell()
1355        tarfile.offset = self.offset_data + self._block(self.size)
1356        self.size = origsize
1357
1358        return self
1359
1360    def _proc_pax(self, tarfile):
1361        """Process an extended or global header as described in
1362           POSIX.1-2001.
1363        """
1364        # Read the header information.
1365        buf = tarfile.fileobj.read(self._block(self.size))
1366
1367        # A pax header stores supplemental information for either
1368        # the following file (extended) or all following files
1369        # (global).
1370        if self.type == XGLTYPE:
1371            pax_headers = tarfile.pax_headers
1372        else:
1373            pax_headers = tarfile.pax_headers.copy()
1374
1375        # Parse pax header information. A record looks like that:
1376        # "%d %s=%s\n" % (length, keyword, value). length is the size
1377        # of the complete record including the length field itself and
1378        # the newline. keyword and value are both UTF-8 encoded strings.
1379        regex = re.compile(r"(\d+) ([^=]+)=", re.U)
1380        pos = 0
1381        while True:
1382            match = regex.match(buf, pos)
1383            if not match:
1384                break
1385
1386            length, keyword = match.groups()
1387            length = int(length)
1388            value = buf[match.end(2) + 1:match.start(1) + length - 1]
1389
1390            keyword = keyword.decode("utf8")
1391            value = value.decode("utf8")
1392
1393            pax_headers[keyword] = value
1394            pos += length
1395
1396        # Fetch the next header.
1397        next = self.fromtarfile(tarfile)
1398
1399        if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1400            if next is None:
1401                raise HeaderError("missing subsequent header")
1402
1403            # Patch the TarInfo object with the extended header info.
1404            next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1405            next.offset = self.offset
1406
1407            if "size" in pax_headers:
1408                # If the extended header replaces the size field,
1409                # we need to recalculate the offset where the next
1410                # header starts.
1411                offset = next.offset_data
1412                if next.isreg() or next.type not in SUPPORTED_TYPES:
1413                    offset += next._block(next.size)
1414                tarfile.offset = offset
1415
1416        return next
1417
1418    def _apply_pax_info(self, pax_headers, encoding, errors):
1419        """Replace fields with supplemental information from a previous
1420           pax extended or global header.
1421        """
1422        for keyword, value in pax_headers.iteritems():
1423            if keyword not in PAX_FIELDS:
1424                continue
1425
1426            if keyword == "path":
1427                value = value.rstrip("/")
1428
1429            if keyword in PAX_NUMBER_FIELDS:
1430                try:
1431                    value = PAX_NUMBER_FIELDS[keyword](value)
1432                except ValueError:
1433                    value = 0
1434            else:
1435                value = uts(value, encoding, errors)
1436
1437            setattr(self, keyword, value)
1438
1439        self.pax_headers = pax_headers.copy()
1440
1441    def _block(self, count):
1442        """Round up a byte count by BLOCKSIZE and return it,
1443           e.g. _block(834) => 1024.
1444        """
1445        blocks, remainder = divmod(count, BLOCKSIZE)
1446        if remainder:
1447            blocks += 1
1448        return blocks * BLOCKSIZE
1449
1450    def isreg(self):
1451        return self.type in REGULAR_TYPES
1452    def isfile(self):
1453        return self.isreg()
1454    def isdir(self):
1455        return self.type == DIRTYPE
1456    def issym(self):
1457        return self.type == SYMTYPE
1458    def islnk(self):
1459        return self.type == LNKTYPE
1460    def ischr(self):
1461        return self.type == CHRTYPE
1462    def isblk(self):
1463        return self.type == BLKTYPE
1464    def isfifo(self):
1465        return self.type == FIFOTYPE
1466    def issparse(self):
1467        return self.type == GNUTYPE_SPARSE
1468    def isdev(self):
1469        return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1470# class TarInfo
1471
1472class TarFile(object):
1473    """The TarFile Class provides an interface to tar archives.
1474    """
1475
1476    debug = 0                   # May be set from 0 (no msgs) to 3 (all msgs)
1477
1478    dereference = False         # If true, add content of linked file to the
1479                                # tar file, else the link.
1480
1481    ignore_zeros = False        # If true, skips empty or invalid blocks and
1482                                # continues processing.
1483
1484    errorlevel = 0              # If 0, fatal errors only appear in debug
1485                                # messages (if debug >= 0). If > 0, errors
1486                                # are passed to the caller as exceptions.
1487
1488    format = DEFAULT_FORMAT     # The format to use when creating an archive.
1489
1490    encoding = ENCODING         # Encoding for 8-bit character strings.
1491
1492    errors = None               # Error handler for unicode conversion.
1493
1494    tarinfo = TarInfo           # The default TarInfo class to use.
1495
1496    fileobject = ExFileObject   # The default ExFileObject class to use.
1497
1498    def __init__(self, name=None, mode="r", fileobj=None, format=None,
1499            tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
1500            errors=None, pax_headers=None, debug=None, errorlevel=None):
1501        """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1502           read from an existing archive, 'a' to append data to an existing
1503           file or 'w' to create a new file overwriting an existing one. `mode'
1504           defaults to 'r'.
1505           If `fileobj' is given, it is used for reading or writing data. If it
1506           can be determined, `mode' is overridden by `fileobj's mode.
1507           `fileobj' is not closed, when TarFile is closed.
1508        """
1509        if len(mode) > 1 or mode not in "raw":
1510            raise ValueError("mode must be 'r', 'a' or 'w'")
1511        self.mode = mode
1512        self._mode = {"r": "rb", "a": "r+b",

Large files files are truncated, but you can click here to view the full file