PageRenderTime 924ms CodeModel.GetById 109ms app.highlight 481ms RepoModel.GetById 132ms app.codeStats 1ms

/Lib/pickle.py

http://unladen-swallow.googlecode.com/
Python | 1383 lines | 1222 code | 61 blank | 100 comment | 40 complexity | 4b976b75c16a0933ce9634472b37ac31 MD5 | raw file
   1"""Create portable serialized representations of Python objects.
   2
   3See module cPickle for a (much) faster implementation.
   4See module copy_reg for a mechanism for registering custom picklers.
   5See module pickletools source for extensive comments.
   6
   7Classes:
   8
   9    Pickler
  10    Unpickler
  11
  12Functions:
  13
  14    dump(object, file)
  15    dumps(object) -> string
  16    load(file) -> object
  17    loads(string) -> object
  18
  19Misc variables:
  20
  21    __version__
  22    format_version
  23    compatible_formats
  24
  25"""
  26
  27__version__ = "$Revision: 65524 $"       # Code version
  28
  29from types import *
  30from copy_reg import dispatch_table
  31from copy_reg import _extension_registry, _inverted_registry, _extension_cache
  32import marshal
  33import sys
  34import struct
  35import re
  36
  37__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
  38           "Unpickler", "dump", "dumps", "load", "loads"]
  39
  40# These are purely informational; no code uses these.
  41format_version = "2.0"                  # File format version we write
  42compatible_formats = ["1.0",            # Original protocol 0
  43                      "1.1",            # Protocol 0 with INST added
  44                      "1.2",            # Original protocol 1
  45                      "1.3",            # Protocol 1 with BINFLOAT added
  46                      "2.0",            # Protocol 2
  47                      ]                 # Old format versions we can read
  48
  49# Keep in synch with cPickle.  This is the highest protocol number we
  50# know how to read.
  51HIGHEST_PROTOCOL = 2
  52
  53# Why use struct.pack() for pickling but marshal.loads() for
  54# unpickling?  struct.pack() is 40% faster than marshal.dumps(), but
  55# marshal.loads() is twice as fast as struct.unpack()!
  56mloads = marshal.loads
  57
  58class PickleError(Exception):
  59    """A common base class for the other pickling exceptions."""
  60    pass
  61
  62class PicklingError(PickleError):
  63    """This exception is raised when an unpicklable object is passed to the
  64    dump() method.
  65
  66    """
  67    pass
  68
  69class UnpicklingError(PickleError):
  70    """This exception is raised when there is a problem unpickling an object,
  71    such as a security violation.
  72
  73    Note that other exceptions may also be raised during unpickling, including
  74    (but not necessarily limited to) AttributeError, EOFError, ImportError,
  75    and IndexError.
  76
  77    """
  78    pass
  79
  80# An instance of _Stop is raised by Unpickler.load_stop() in response to
  81# the STOP opcode, passing the object that is the result of unpickling.
  82class _Stop(Exception):
  83    def __init__(self, value):
  84        self.value = value
  85
  86# Jython has PyStringMap; it's a dict subclass with string keys
  87try:
  88    from org.python.core import PyStringMap
  89except ImportError:
  90    PyStringMap = None
  91
  92# UnicodeType may or may not be exported (normally imported from types)
  93try:
  94    UnicodeType
  95except NameError:
  96    UnicodeType = None
  97
  98# Pickle opcodes.  See pickletools.py for extensive docs.  The listing
  99# here is in kind-of alphabetical order of 1-character pickle code.
 100# pickletools groups them by purpose.
 101
 102MARK            = '('   # push special markobject on stack
 103STOP            = '.'   # every pickle ends with STOP
 104POP             = '0'   # discard topmost stack item
 105POP_MARK        = '1'   # discard stack top through topmost markobject
 106DUP             = '2'   # duplicate top stack item
 107FLOAT           = 'F'   # push float object; decimal string argument
 108INT             = 'I'   # push integer or bool; decimal string argument
 109BININT          = 'J'   # push four-byte signed int
 110BININT1         = 'K'   # push 1-byte unsigned int
 111LONG            = 'L'   # push long; decimal string argument
 112BININT2         = 'M'   # push 2-byte unsigned int
 113NONE            = 'N'   # push None
 114PERSID          = 'P'   # push persistent object; id is taken from string arg
 115BINPERSID       = 'Q'   #  "       "         "  ;  "  "   "     "  stack
 116REDUCE          = 'R'   # apply callable to argtuple, both on stack
 117STRING          = 'S'   # push string; NL-terminated string argument
 118BINSTRING       = 'T'   # push string; counted binary string argument
 119SHORT_BINSTRING = 'U'   #  "     "   ;    "      "       "      " < 256 bytes
 120UNICODE         = 'V'   # push Unicode string; raw-unicode-escaped'd argument
 121BINUNICODE      = 'X'   #   "     "       "  ; counted UTF-8 string argument
 122APPEND          = 'a'   # append stack top to list below it
 123BUILD           = 'b'   # call __setstate__ or __dict__.update()
 124GLOBAL          = 'c'   # push self.find_class(modname, name); 2 string args
 125DICT            = 'd'   # build a dict from stack items
 126EMPTY_DICT      = '}'   # push empty dict
 127APPENDS         = 'e'   # extend list on stack by topmost stack slice
 128GET             = 'g'   # push item from memo on stack; index is string arg
 129BINGET          = 'h'   #   "    "    "    "   "   "  ;   "    " 1-byte arg
 130INST            = 'i'   # build & push class instance
 131LONG_BINGET     = 'j'   # push item from memo on stack; index is 4-byte arg
 132LIST            = 'l'   # build list from topmost stack items
 133EMPTY_LIST      = ']'   # push empty list
 134OBJ             = 'o'   # build & push class instance
 135PUT             = 'p'   # store stack top in memo; index is string arg
 136BINPUT          = 'q'   #   "     "    "   "   " ;   "    " 1-byte arg
 137LONG_BINPUT     = 'r'   #   "     "    "   "   " ;   "    " 4-byte arg
 138SETITEM         = 's'   # add key+value pair to dict
 139TUPLE           = 't'   # build tuple from topmost stack items
 140EMPTY_TUPLE     = ')'   # push empty tuple
 141SETITEMS        = 'u'   # modify dict by adding topmost key+value pairs
 142BINFLOAT        = 'G'   # push float; arg is 8-byte float encoding
 143
 144TRUE            = 'I01\n'  # not an opcode; see INT docs in pickletools.py
 145FALSE           = 'I00\n'  # not an opcode; see INT docs in pickletools.py
 146
 147# Protocol 2
 148
 149PROTO           = '\x80'  # identify pickle protocol
 150NEWOBJ          = '\x81'  # build object by applying cls.__new__ to argtuple
 151EXT1            = '\x82'  # push object from extension registry; 1-byte index
 152EXT2            = '\x83'  # ditto, but 2-byte index
 153EXT4            = '\x84'  # ditto, but 4-byte index
 154TUPLE1          = '\x85'  # build 1-tuple from stack top
 155TUPLE2          = '\x86'  # build 2-tuple from two topmost stack items
 156TUPLE3          = '\x87'  # build 3-tuple from three topmost stack items
 157NEWTRUE         = '\x88'  # push True
 158NEWFALSE        = '\x89'  # push False
 159LONG1           = '\x8a'  # push long from < 256 bytes
 160LONG4           = '\x8b'  # push really big long
 161
 162_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
 163
 164
 165__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])
 166del x
 167
 168
 169# Pickling machinery
 170
 171class Pickler:
 172
 173    def __init__(self, file, protocol=None):
 174        """This takes a file-like object for writing a pickle data stream.
 175
 176        The optional protocol argument tells the pickler to use the
 177        given protocol; supported protocols are 0, 1, 2.  The default
 178        protocol is 0, to be backwards compatible.  (Protocol 0 is the
 179        only protocol that can be written to a file opened in text
 180        mode and read back successfully.  When using a protocol higher
 181        than 0, make sure the file is opened in binary mode, both when
 182        pickling and unpickling.)
 183
 184        Protocol 1 is more efficient than protocol 0; protocol 2 is
 185        more efficient than protocol 1.
 186
 187        Specifying a negative protocol version selects the highest
 188        protocol version supported.  The higher the protocol used, the
 189        more recent the version of Python needed to read the pickle
 190        produced.
 191
 192        The file parameter must have a write() method that accepts a single
 193        string argument.  It can thus be an open file object, a StringIO
 194        object, or any other custom object that meets this interface.
 195
 196        """
 197        if protocol is None:
 198            protocol = 0
 199        if protocol < 0:
 200            protocol = HIGHEST_PROTOCOL
 201        elif not 0 <= protocol <= HIGHEST_PROTOCOL:
 202            raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
 203        self.write = file.write
 204        self.memo = {}
 205        self.proto = int(protocol)
 206        self.bin = protocol >= 1
 207        self.fast = 0
 208
 209    def clear_memo(self):
 210        """Clears the pickler's "memo".
 211
 212        The memo is the data structure that remembers which objects the
 213        pickler has already seen, so that shared or recursive objects are
 214        pickled by reference and not by value.  This method is useful when
 215        re-using picklers.
 216
 217        """
 218        self.memo.clear()
 219
 220    def dump(self, obj):
 221        """Write a pickled representation of obj to the open file."""
 222        if self.proto >= 2:
 223            self.write(PROTO + chr(self.proto))
 224        self.save(obj)
 225        self.write(STOP)
 226
 227    def memoize(self, obj):
 228        """Store an object in the memo."""
 229
 230        # The Pickler memo is a dictionary mapping object ids to 2-tuples
 231        # that contain the Unpickler memo key and the object being memoized.
 232        # The memo key is written to the pickle and will become
 233        # the key in the Unpickler's memo.  The object is stored in the
 234        # Pickler memo so that transient objects are kept alive during
 235        # pickling.
 236
 237        # The use of the Unpickler memo length as the memo key is just a
 238        # convention.  The only requirement is that the memo values be unique.
 239        # But there appears no advantage to any other scheme, and this
 240        # scheme allows the Unpickler memo to be implemented as a plain (but
 241        # growable) array, indexed by memo key.
 242        if self.fast:
 243            return
 244        assert id(obj) not in self.memo
 245        memo_len = len(self.memo)
 246        self.write(self.put(memo_len))
 247        self.memo[id(obj)] = memo_len, obj
 248
 249    # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.
 250    def put(self, i, pack=struct.pack):
 251        if self.bin:
 252            if i < 256:
 253                return BINPUT + chr(i)
 254            else:
 255                return LONG_BINPUT + pack("<i", i)
 256
 257        return PUT + repr(i) + '\n'
 258
 259    # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
 260    def get(self, i, pack=struct.pack):
 261        if self.bin:
 262            if i < 256:
 263                return BINGET + chr(i)
 264            else:
 265                return LONG_BINGET + pack("<i", i)
 266
 267        return GET + repr(i) + '\n'
 268
 269    def save(self, obj):
 270        # Check for persistent id (defined by a subclass)
 271        pid = self.persistent_id(obj)
 272        if pid:
 273            self.save_pers(pid)
 274            return
 275
 276        # Check the memo
 277        x = self.memo.get(id(obj))
 278        if x:
 279            self.write(self.get(x[0]))
 280            return
 281
 282        # Check the type dispatch table
 283        t = type(obj)
 284        f = self.dispatch.get(t)
 285        if f:
 286            f(self, obj) # Call unbound method with explicit self
 287            return
 288
 289        # Check for a class with a custom metaclass; treat as regular class
 290        try:
 291            issc = issubclass(t, TypeType)
 292        except TypeError: # t is not a class (old Boost; see SF #502085)
 293            issc = 0
 294        if issc:
 295            self.save_global(obj)
 296            return
 297
 298        # Check copy_reg.dispatch_table
 299        reduce = dispatch_table.get(t)
 300        if reduce:
 301            rv = reduce(obj)
 302        else:
 303            # Check for a __reduce_ex__ method, fall back to __reduce__
 304            reduce = getattr(obj, "__reduce_ex__", None)
 305            if reduce:
 306                rv = reduce(self.proto)
 307            else:
 308                reduce = getattr(obj, "__reduce__", None)
 309                if reduce:
 310                    rv = reduce()
 311                else:
 312                    raise PicklingError("Can't pickle %r object: %r" %
 313                                        (t.__name__, obj))
 314
 315        # Check for string returned by reduce(), meaning "save as global"
 316        if type(rv) is StringType:
 317            self.save_global(obj, rv)
 318            return
 319
 320        # Assert that reduce() returned a tuple
 321        if type(rv) is not TupleType:
 322            raise PicklingError("%s must return string or tuple" % reduce)
 323
 324        # Assert that it returned an appropriately sized tuple
 325        l = len(rv)
 326        if not (2 <= l <= 5):
 327            raise PicklingError("Tuple returned by %s must have "
 328                                "two to five elements" % reduce)
 329
 330        # Save the reduce() output and finally memoize the object
 331        self.save_reduce(obj=obj, *rv)
 332
 333    def persistent_id(self, obj):
 334        # This exists so a subclass can override it
 335        return None
 336
 337    def save_pers(self, pid):
 338        # Save a persistent id reference
 339        if self.bin:
 340            self.save(pid)
 341            self.write(BINPERSID)
 342        else:
 343            self.write(PERSID + str(pid) + '\n')
 344
 345    def save_reduce(self, func, args, state=None,
 346                    listitems=None, dictitems=None, obj=None):
 347        # This API is called by some subclasses
 348
 349        # Assert that args is a tuple or None
 350        if not isinstance(args, TupleType):
 351            raise PicklingError("args from reduce() should be a tuple")
 352
 353        # Assert that func is callable
 354        if not hasattr(func, '__call__'):
 355            raise PicklingError("func from reduce should be callable")
 356
 357        save = self.save
 358        write = self.write
 359
 360        # Protocol 2 special case: if func's name is __newobj__, use NEWOBJ
 361        if self.proto >= 2 and getattr(func, "__name__", "") == "__newobj__":
 362            # A __reduce__ implementation can direct protocol 2 to
 363            # use the more efficient NEWOBJ opcode, while still
 364            # allowing protocol 0 and 1 to work normally.  For this to
 365            # work, the function returned by __reduce__ should be
 366            # called __newobj__, and its first argument should be a
 367            # new-style class.  The implementation for __newobj__
 368            # should be as follows, although pickle has no way to
 369            # verify this:
 370            #
 371            # def __newobj__(cls, *args):
 372            #     return cls.__new__(cls, *args)
 373            #
 374            # Protocols 0 and 1 will pickle a reference to __newobj__,
 375            # while protocol 2 (and above) will pickle a reference to
 376            # cls, the remaining args tuple, and the NEWOBJ code,
 377            # which calls cls.__new__(cls, *args) at unpickling time
 378            # (see load_newobj below).  If __reduce__ returns a
 379            # three-tuple, the state from the third tuple item will be
 380            # pickled regardless of the protocol, calling __setstate__
 381            # at unpickling time (see load_build below).
 382            #
 383            # Note that no standard __newobj__ implementation exists;
 384            # you have to provide your own.  This is to enforce
 385            # compatibility with Python 2.2 (pickles written using
 386            # protocol 0 or 1 in Python 2.3 should be unpicklable by
 387            # Python 2.2).
 388            cls = args[0]
 389            if not hasattr(cls, "__new__"):
 390                raise PicklingError(
 391                    "args[0] from __newobj__ args has no __new__")
 392            if obj is not None and cls is not obj.__class__:
 393                raise PicklingError(
 394                    "args[0] from __newobj__ args has the wrong class")
 395            args = args[1:]
 396            save(cls)
 397            save(args)
 398            write(NEWOBJ)
 399        else:
 400            save(func)
 401            save(args)
 402            write(REDUCE)
 403
 404        if obj is not None:
 405            self.memoize(obj)
 406
 407        # More new special cases (that work with older protocols as
 408        # well): when __reduce__ returns a tuple with 4 or 5 items,
 409        # the 4th and 5th item should be iterators that provide list
 410        # items and dict items (as (key, value) tuples), or None.
 411
 412        if listitems is not None:
 413            self._batch_appends(listitems)
 414
 415        if dictitems is not None:
 416            self._batch_setitems(dictitems)
 417
 418        if state is not None:
 419            save(state)
 420            write(BUILD)
 421
 422    # Methods below this point are dispatched through the dispatch table
 423
 424    dispatch = {}
 425
 426    def save_none(self, obj):
 427        self.write(NONE)
 428    dispatch[NoneType] = save_none
 429
 430    def save_bool(self, obj):
 431        if self.proto >= 2:
 432            self.write(obj and NEWTRUE or NEWFALSE)
 433        else:
 434            self.write(obj and TRUE or FALSE)
 435    dispatch[bool] = save_bool
 436
 437    def save_int(self, obj, pack=struct.pack):
 438        if self.bin:
 439            # If the int is small enough to fit in a signed 4-byte 2's-comp
 440            # format, we can store it more efficiently than the general
 441            # case.
 442            # First one- and two-byte unsigned ints:
 443            if obj >= 0:
 444                if obj <= 0xff:
 445                    self.write(BININT1 + chr(obj))
 446                    return
 447                if obj <= 0xffff:
 448                    self.write("%c%c%c" % (BININT2, obj&0xff, obj>>8))
 449                    return
 450            # Next check for 4-byte signed ints:
 451            high_bits = obj >> 31  # note that Python shift sign-extends
 452            if high_bits == 0 or high_bits == -1:
 453                # All high bits are copies of bit 2**31, so the value
 454                # fits in a 4-byte signed int.
 455                self.write(BININT + pack("<i", obj))
 456                return
 457        # Text pickle, or int too big to fit in signed 4-byte format.
 458        self.write(INT + repr(obj) + '\n')
 459    dispatch[IntType] = save_int
 460
 461    def save_long(self, obj, pack=struct.pack):
 462        if self.proto >= 2:
 463            bytes = encode_long(obj)
 464            n = len(bytes)
 465            if n < 256:
 466                self.write(LONG1 + chr(n) + bytes)
 467            else:
 468                self.write(LONG4 + pack("<i", n) + bytes)
 469            return
 470        self.write(LONG + repr(obj) + '\n')
 471    dispatch[LongType] = save_long
 472
 473    def save_float(self, obj, pack=struct.pack):
 474        if self.bin:
 475            self.write(BINFLOAT + pack('>d', obj))
 476        else:
 477            self.write(FLOAT + repr(obj) + '\n')
 478    dispatch[FloatType] = save_float
 479
 480    def save_string(self, obj, pack=struct.pack):
 481        if self.bin:
 482            n = len(obj)
 483            if n < 256:
 484                self.write(SHORT_BINSTRING + chr(n) + obj)
 485            else:
 486                self.write(BINSTRING + pack("<i", n) + obj)
 487        else:
 488            self.write(STRING + repr(obj) + '\n')
 489        self.memoize(obj)
 490    dispatch[StringType] = save_string
 491
 492    def save_unicode(self, obj, pack=struct.pack):
 493        if self.bin:
 494            encoding = obj.encode('utf-8')
 495            n = len(encoding)
 496            self.write(BINUNICODE + pack("<i", n) + encoding)
 497        else:
 498            obj = obj.replace("\\", "\\u005c")
 499            obj = obj.replace("\n", "\\u000a")
 500            self.write(UNICODE + obj.encode('raw-unicode-escape') + '\n')
 501        self.memoize(obj)
 502    dispatch[UnicodeType] = save_unicode
 503
 504    if StringType == UnicodeType:
 505        # This is true for Jython
 506        def save_string(self, obj, pack=struct.pack):
 507            unicode = obj.isunicode()
 508
 509            if self.bin:
 510                if unicode:
 511                    obj = obj.encode("utf-8")
 512                l = len(obj)
 513                if l < 256 and not unicode:
 514                    self.write(SHORT_BINSTRING + chr(l) + obj)
 515                else:
 516                    s = pack("<i", l)
 517                    if unicode:
 518                        self.write(BINUNICODE + s + obj)
 519                    else:
 520                        self.write(BINSTRING + s + obj)
 521            else:
 522                if unicode:
 523                    obj = obj.replace("\\", "\\u005c")
 524                    obj = obj.replace("\n", "\\u000a")
 525                    obj = obj.encode('raw-unicode-escape')
 526                    self.write(UNICODE + obj + '\n')
 527                else:
 528                    self.write(STRING + repr(obj) + '\n')
 529            self.memoize(obj)
 530        dispatch[StringType] = save_string
 531
 532    def save_tuple(self, obj):
 533        write = self.write
 534        proto = self.proto
 535
 536        n = len(obj)
 537        if n == 0:
 538            if proto:
 539                write(EMPTY_TUPLE)
 540            else:
 541                write(MARK + TUPLE)
 542            return
 543
 544        save = self.save
 545        memo = self.memo
 546        if n <= 3 and proto >= 2:
 547            for element in obj:
 548                save(element)
 549            # Subtle.  Same as in the big comment below.
 550            if id(obj) in memo:
 551                get = self.get(memo[id(obj)][0])
 552                write(POP * n + get)
 553            else:
 554                write(_tuplesize2code[n])
 555                self.memoize(obj)
 556            return
 557
 558        # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple
 559        # has more than 3 elements.
 560        write(MARK)
 561        for element in obj:
 562            save(element)
 563
 564        if id(obj) in memo:
 565            # Subtle.  d was not in memo when we entered save_tuple(), so
 566            # the process of saving the tuple's elements must have saved
 567            # the tuple itself:  the tuple is recursive.  The proper action
 568            # now is to throw away everything we put on the stack, and
 569            # simply GET the tuple (it's already constructed).  This check
 570            # could have been done in the "for element" loop instead, but
 571            # recursive tuples are a rare thing.
 572            get = self.get(memo[id(obj)][0])
 573            if proto:
 574                write(POP_MARK + get)
 575            else:   # proto 0 -- POP_MARK not available
 576                write(POP * (n+1) + get)
 577            return
 578
 579        # No recursion.
 580        self.write(TUPLE)
 581        self.memoize(obj)
 582
 583    dispatch[TupleType] = save_tuple
 584
 585    # save_empty_tuple() isn't used by anything in Python 2.3.  However, I
 586    # found a Pickler subclass in Zope3 that calls it, so it's not harmless
 587    # to remove it.
 588    def save_empty_tuple(self, obj):
 589        self.write(EMPTY_TUPLE)
 590
 591    def save_list(self, obj):
 592        write = self.write
 593
 594        if self.bin:
 595            write(EMPTY_LIST)
 596        else:   # proto 0 -- can't use EMPTY_LIST
 597            write(MARK + LIST)
 598
 599        self.memoize(obj)
 600        self._batch_appends(iter(obj))
 601
 602    dispatch[ListType] = save_list
 603
 604    # Keep in synch with cPickle's BATCHSIZE.  Nothing will break if it gets
 605    # out of synch, though.
 606    _BATCHSIZE = 1000
 607
 608    def _batch_appends(self, items):
 609        # Helper to batch up APPENDS sequences
 610        save = self.save
 611        write = self.write
 612
 613        if not self.bin:
 614            for x in items:
 615                save(x)
 616                write(APPEND)
 617            return
 618
 619        r = xrange(self._BATCHSIZE)
 620        while items is not None:
 621            tmp = []
 622            for i in r:
 623                try:
 624                    x = items.next()
 625                    tmp.append(x)
 626                except StopIteration:
 627                    items = None
 628                    break
 629            n = len(tmp)
 630            if n > 1:
 631                write(MARK)
 632                for x in tmp:
 633                    save(x)
 634                write(APPENDS)
 635            elif n:
 636                save(tmp[0])
 637                write(APPEND)
 638            # else tmp is empty, and we're done
 639
 640    def save_dict(self, obj):
 641        write = self.write
 642
 643        if self.bin:
 644            write(EMPTY_DICT)
 645        else:   # proto 0 -- can't use EMPTY_DICT
 646            write(MARK + DICT)
 647
 648        self.memoize(obj)
 649        self._batch_setitems(obj.iteritems())
 650
 651    dispatch[DictionaryType] = save_dict
 652    if not PyStringMap is None:
 653        dispatch[PyStringMap] = save_dict
 654
 655    def _batch_setitems(self, items):
 656        # Helper to batch up SETITEMS sequences; proto >= 1 only
 657        save = self.save
 658        write = self.write
 659
 660        if not self.bin:
 661            for k, v in items:
 662                save(k)
 663                save(v)
 664                write(SETITEM)
 665            return
 666
 667        r = xrange(self._BATCHSIZE)
 668        while items is not None:
 669            tmp = []
 670            for i in r:
 671                try:
 672                    tmp.append(items.next())
 673                except StopIteration:
 674                    items = None
 675                    break
 676            n = len(tmp)
 677            if n > 1:
 678                write(MARK)
 679                for k, v in tmp:
 680                    save(k)
 681                    save(v)
 682                write(SETITEMS)
 683            elif n:
 684                k, v = tmp[0]
 685                save(k)
 686                save(v)
 687                write(SETITEM)
 688            # else tmp is empty, and we're done
 689
 690    def save_inst(self, obj):
 691        cls = obj.__class__
 692
 693        memo  = self.memo
 694        write = self.write
 695        save  = self.save
 696
 697        if hasattr(obj, '__getinitargs__'):
 698            args = obj.__getinitargs__()
 699            len(args) # XXX Assert it's a sequence
 700            _keep_alive(args, memo)
 701        else:
 702            args = ()
 703
 704        write(MARK)
 705
 706        if self.bin:
 707            save(cls)
 708            for arg in args:
 709                save(arg)
 710            write(OBJ)
 711        else:
 712            for arg in args:
 713                save(arg)
 714            write(INST + cls.__module__ + '\n' + cls.__name__ + '\n')
 715
 716        self.memoize(obj)
 717
 718        try:
 719            getstate = obj.__getstate__
 720        except AttributeError:
 721            stuff = obj.__dict__
 722        else:
 723            stuff = getstate()
 724            _keep_alive(stuff, memo)
 725        save(stuff)
 726        write(BUILD)
 727
 728    dispatch[InstanceType] = save_inst
 729
 730    def save_global(self, obj, name=None, pack=struct.pack):
 731        write = self.write
 732        memo = self.memo
 733
 734        if name is None:
 735            name = obj.__name__
 736
 737        module = getattr(obj, "__module__", None)
 738        if module is None:
 739            module = whichmodule(obj, name)
 740
 741        try:
 742            __import__(module)
 743            mod = sys.modules[module]
 744            klass = getattr(mod, name)
 745        except (ImportError, KeyError, AttributeError):
 746            raise PicklingError(
 747                "Can't pickle %r: it's not found as %s.%s" %
 748                (obj, module, name))
 749        else:
 750            if klass is not obj:
 751                raise PicklingError(
 752                    "Can't pickle %r: it's not the same object as %s.%s" %
 753                    (obj, module, name))
 754
 755        if self.proto >= 2:
 756            code = _extension_registry.get((module, name))
 757            if code:
 758                assert code > 0
 759                if code <= 0xff:
 760                    write(EXT1 + chr(code))
 761                elif code <= 0xffff:
 762                    write("%c%c%c" % (EXT2, code&0xff, code>>8))
 763                else:
 764                    write(EXT4 + pack("<i", code))
 765                return
 766
 767        write(GLOBAL + module + '\n' + name + '\n')
 768        self.memoize(obj)
 769
 770    dispatch[ClassType] = save_global
 771    dispatch[FunctionType] = save_global
 772    dispatch[BuiltinFunctionType] = save_global
 773    dispatch[TypeType] = save_global
 774
 775# Pickling helpers
 776
 777def _keep_alive(x, memo):
 778    """Keeps a reference to the object x in the memo.
 779
 780    Because we remember objects by their id, we have
 781    to assure that possibly temporary objects are kept
 782    alive by referencing them.
 783    We store a reference at the id of the memo, which should
 784    normally not be used unless someone tries to deepcopy
 785    the memo itself...
 786    """
 787    try:
 788        memo[id(memo)].append(x)
 789    except KeyError:
 790        # aha, this is the first one :-)
 791        memo[id(memo)]=[x]
 792
 793
 794# A cache for whichmodule(), mapping a function object to the name of
 795# the module in which the function was found.
 796
 797classmap = {} # called classmap for backwards compatibility
 798
 799def whichmodule(func, funcname):
 800    """Figure out the module in which a function occurs.
 801
 802    Search sys.modules for the module.
 803    Cache in classmap.
 804    Return a module name.
 805    If the function cannot be found, return "__main__".
 806    """
 807    # Python functions should always get an __module__ from their globals.
 808    mod = getattr(func, "__module__", None)
 809    if mod is not None:
 810        return mod
 811    if func in classmap:
 812        return classmap[func]
 813
 814    for name, module in sys.modules.items():
 815        if module is None:
 816            continue # skip dummy package entries
 817        if name != '__main__' and getattr(module, funcname, None) is func:
 818            break
 819    else:
 820        name = '__main__'
 821    classmap[func] = name
 822    return name
 823
 824
 825# Unpickling machinery
 826
 827class Unpickler:
 828
 829    def __init__(self, file):
 830        """This takes a file-like object for reading a pickle data stream.
 831
 832        The protocol version of the pickle is detected automatically, so no
 833        proto argument is needed.
 834
 835        The file-like object must have two methods, a read() method that
 836        takes an integer argument, and a readline() method that requires no
 837        arguments.  Both methods should return a string.  Thus file-like
 838        object can be a file object opened for reading, a StringIO object,
 839        or any other custom object that meets this interface.
 840        """
 841        self.readline = file.readline
 842        self.read = file.read
 843        self.memo = {}
 844
 845    def load(self):
 846        """Read a pickled object representation from the open file.
 847
 848        Return the reconstituted object hierarchy specified in the file.
 849        """
 850        self.mark = object() # any new unique object
 851        self.stack = []
 852        self.append = self.stack.append
 853        read = self.read
 854        dispatch = self.dispatch
 855        try:
 856            while 1:
 857                key = read(1)
 858                dispatch[key](self)
 859        except _Stop, stopinst:
 860            return stopinst.value
 861
 862    # Return largest index k such that self.stack[k] is self.mark.
 863    # If the stack doesn't contain a mark, eventually raises IndexError.
 864    # This could be sped by maintaining another stack, of indices at which
 865    # the mark appears.  For that matter, the latter stack would suffice,
 866    # and we wouldn't need to push mark objects on self.stack at all.
 867    # Doing so is probably a good thing, though, since if the pickle is
 868    # corrupt (or hostile) we may get a clue from finding self.mark embedded
 869    # in unpickled objects.
 870    def marker(self):
 871        stack = self.stack
 872        mark = self.mark
 873        k = len(stack)-1
 874        while stack[k] is not mark: k = k-1
 875        return k
 876
 877    dispatch = {}
 878
 879    def load_eof(self):
 880        raise EOFError
 881    dispatch[''] = load_eof
 882
 883    def load_proto(self):
 884        proto = ord(self.read(1))
 885        if not 0 <= proto <= 2:
 886            raise ValueError, "unsupported pickle protocol: %d" % proto
 887    dispatch[PROTO] = load_proto
 888
 889    def load_persid(self):
 890        pid = self.readline()[:-1]
 891        self.append(self.persistent_load(pid))
 892    dispatch[PERSID] = load_persid
 893
 894    def load_binpersid(self):
 895        pid = self.stack.pop()
 896        self.append(self.persistent_load(pid))
 897    dispatch[BINPERSID] = load_binpersid
 898
 899    def load_none(self):
 900        self.append(None)
 901    dispatch[NONE] = load_none
 902
 903    def load_false(self):
 904        self.append(False)
 905    dispatch[NEWFALSE] = load_false
 906
 907    def load_true(self):
 908        self.append(True)
 909    dispatch[NEWTRUE] = load_true
 910
 911    def load_int(self):
 912        data = self.readline()
 913        if data == FALSE[1:]:
 914            val = False
 915        elif data == TRUE[1:]:
 916            val = True
 917        else:
 918            try:
 919                val = int(data)
 920            except ValueError:
 921                val = long(data)
 922        self.append(val)
 923    dispatch[INT] = load_int
 924
 925    def load_binint(self):
 926        self.append(mloads('i' + self.read(4)))
 927    dispatch[BININT] = load_binint
 928
 929    def load_binint1(self):
 930        self.append(ord(self.read(1)))
 931    dispatch[BININT1] = load_binint1
 932
 933    def load_binint2(self):
 934        self.append(mloads('i' + self.read(2) + '\000\000'))
 935    dispatch[BININT2] = load_binint2
 936
 937    def load_long(self):
 938        self.append(long(self.readline()[:-1], 0))
 939    dispatch[LONG] = load_long
 940
 941    def load_long1(self):
 942        n = ord(self.read(1))
 943        bytes = self.read(n)
 944        self.append(decode_long(bytes))
 945    dispatch[LONG1] = load_long1
 946
 947    def load_long4(self):
 948        n = mloads('i' + self.read(4))
 949        bytes = self.read(n)
 950        self.append(decode_long(bytes))
 951    dispatch[LONG4] = load_long4
 952
 953    def load_float(self):
 954        self.append(float(self.readline()[:-1]))
 955    dispatch[FLOAT] = load_float
 956
 957    def load_binfloat(self, unpack=struct.unpack):
 958        self.append(unpack('>d', self.read(8))[0])
 959    dispatch[BINFLOAT] = load_binfloat
 960
 961    def load_string(self):
 962        rep = self.readline()[:-1]
 963        for q in "\"'": # double or single quote
 964            if rep.startswith(q):
 965                if not rep.endswith(q):
 966                    raise ValueError, "insecure string pickle"
 967                rep = rep[len(q):-len(q)]
 968                break
 969        else:
 970            raise ValueError, "insecure string pickle"
 971        self.append(rep.decode("string-escape"))
 972    dispatch[STRING] = load_string
 973
 974    def load_binstring(self):
 975        len = mloads('i' + self.read(4))
 976        self.append(self.read(len))
 977    dispatch[BINSTRING] = load_binstring
 978
 979    def load_unicode(self):
 980        self.append(unicode(self.readline()[:-1],'raw-unicode-escape'))
 981    dispatch[UNICODE] = load_unicode
 982
 983    def load_binunicode(self):
 984        len = mloads('i' + self.read(4))
 985        self.append(unicode(self.read(len),'utf-8'))
 986    dispatch[BINUNICODE] = load_binunicode
 987
 988    def load_short_binstring(self):
 989        len = ord(self.read(1))
 990        self.append(self.read(len))
 991    dispatch[SHORT_BINSTRING] = load_short_binstring
 992
 993    def load_tuple(self):
 994        k = self.marker()
 995        self.stack[k:] = [tuple(self.stack[k+1:])]
 996    dispatch[TUPLE] = load_tuple
 997
 998    def load_empty_tuple(self):
 999        self.stack.append(())
1000    dispatch[EMPTY_TUPLE] = load_empty_tuple
1001
1002    def load_tuple1(self):
1003        self.stack[-1] = (self.stack[-1],)
1004    dispatch[TUPLE1] = load_tuple1
1005
1006    def load_tuple2(self):
1007        self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
1008    dispatch[TUPLE2] = load_tuple2
1009
1010    def load_tuple3(self):
1011        self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
1012    dispatch[TUPLE3] = load_tuple3
1013
1014    def load_empty_list(self):
1015        self.stack.append([])
1016    dispatch[EMPTY_LIST] = load_empty_list
1017
1018    def load_empty_dictionary(self):
1019        self.stack.append({})
1020    dispatch[EMPTY_DICT] = load_empty_dictionary
1021
1022    def load_list(self):
1023        k = self.marker()
1024        self.stack[k:] = [self.stack[k+1:]]
1025    dispatch[LIST] = load_list
1026
1027    def load_dict(self):
1028        k = self.marker()
1029        d = {}
1030        items = self.stack[k+1:]
1031        for i in range(0, len(items), 2):
1032            key = items[i]
1033            value = items[i+1]
1034            d[key] = value
1035        self.stack[k:] = [d]
1036    dispatch[DICT] = load_dict
1037
1038    # INST and OBJ differ only in how they get a class object.  It's not
1039    # only sensible to do the rest in a common routine, the two routines
1040    # previously diverged and grew different bugs.
1041    # klass is the class to instantiate, and k points to the topmost mark
1042    # object, following which are the arguments for klass.__init__.
1043    def _instantiate(self, klass, k):
1044        args = tuple(self.stack[k+1:])
1045        del self.stack[k:]
1046        instantiated = 0
1047        if (not args and
1048                type(klass) is ClassType and
1049                not hasattr(klass, "__getinitargs__")):
1050            try:
1051                value = _EmptyClass()
1052                value.__class__ = klass
1053                instantiated = 1
1054            except RuntimeError:
1055                # In restricted execution, assignment to inst.__class__ is
1056                # prohibited
1057                pass
1058        if not instantiated:
1059            try:
1060                value = klass(*args)
1061            except TypeError, err:
1062                raise TypeError, "in constructor for %s: %s" % (
1063                    klass.__name__, str(err)), sys.exc_info()[2]
1064        self.append(value)
1065
1066    def load_inst(self):
1067        module = self.readline()[:-1]
1068        name = self.readline()[:-1]
1069        klass = self.find_class(module, name)
1070        self._instantiate(klass, self.marker())
1071    dispatch[INST] = load_inst
1072
1073    def load_obj(self):
1074        # Stack is ... markobject classobject arg1 arg2 ...
1075        k = self.marker()
1076        klass = self.stack.pop(k+1)
1077        self._instantiate(klass, k)
1078    dispatch[OBJ] = load_obj
1079
1080    def load_newobj(self):
1081        args = self.stack.pop()
1082        cls = self.stack[-1]
1083        obj = cls.__new__(cls, *args)
1084        self.stack[-1] = obj
1085    dispatch[NEWOBJ] = load_newobj
1086
1087    def load_global(self):
1088        module = self.readline()[:-1]
1089        name = self.readline()[:-1]
1090        klass = self.find_class(module, name)
1091        self.append(klass)
1092    dispatch[GLOBAL] = load_global
1093
1094    def load_ext1(self):
1095        code = ord(self.read(1))
1096        self.get_extension(code)
1097    dispatch[EXT1] = load_ext1
1098
1099    def load_ext2(self):
1100        code = mloads('i' + self.read(2) + '\000\000')
1101        self.get_extension(code)
1102    dispatch[EXT2] = load_ext2
1103
1104    def load_ext4(self):
1105        code = mloads('i' + self.read(4))
1106        self.get_extension(code)
1107    dispatch[EXT4] = load_ext4
1108
1109    def get_extension(self, code):
1110        nil = []
1111        obj = _extension_cache.get(code, nil)
1112        if obj is not nil:
1113            self.append(obj)
1114            return
1115        key = _inverted_registry.get(code)
1116        if not key:
1117            raise ValueError("unregistered extension code %d" % code)
1118        obj = self.find_class(*key)
1119        _extension_cache[code] = obj
1120        self.append(obj)
1121
1122    def find_class(self, module, name):
1123        # Subclasses may override this
1124        __import__(module)
1125        mod = sys.modules[module]
1126        klass = getattr(mod, name)
1127        return klass
1128
1129    def load_reduce(self):
1130        stack = self.stack
1131        args = stack.pop()
1132        func = stack[-1]
1133        value = func(*args)
1134        stack[-1] = value
1135    dispatch[REDUCE] = load_reduce
1136
1137    def load_pop(self):
1138        del self.stack[-1]
1139    dispatch[POP] = load_pop
1140
1141    def load_pop_mark(self):
1142        k = self.marker()
1143        del self.stack[k:]
1144    dispatch[POP_MARK] = load_pop_mark
1145
1146    def load_dup(self):
1147        self.append(self.stack[-1])
1148    dispatch[DUP] = load_dup
1149
1150    def load_get(self):
1151        self.append(self.memo[self.readline()[:-1]])
1152    dispatch[GET] = load_get
1153
1154    def load_binget(self):
1155        i = ord(self.read(1))
1156        self.append(self.memo[repr(i)])
1157    dispatch[BINGET] = load_binget
1158
1159    def load_long_binget(self):
1160        i = mloads('i' + self.read(4))
1161        self.append(self.memo[repr(i)])
1162    dispatch[LONG_BINGET] = load_long_binget
1163
1164    def load_put(self):
1165        self.memo[self.readline()[:-1]] = self.stack[-1]
1166    dispatch[PUT] = load_put
1167
1168    def load_binput(self):
1169        i = ord(self.read(1))
1170        self.memo[repr(i)] = self.stack[-1]
1171    dispatch[BINPUT] = load_binput
1172
1173    def load_long_binput(self):
1174        i = mloads('i' + self.read(4))
1175        self.memo[repr(i)] = self.stack[-1]
1176    dispatch[LONG_BINPUT] = load_long_binput
1177
1178    def load_append(self):
1179        stack = self.stack
1180        value = stack.pop()
1181        list = stack[-1]
1182        list.append(value)
1183    dispatch[APPEND] = load_append
1184
1185    def load_appends(self):
1186        stack = self.stack
1187        mark = self.marker()
1188        list = stack[mark - 1]
1189        list.extend(stack[mark + 1:])
1190        del stack[mark:]
1191    dispatch[APPENDS] = load_appends
1192
1193    def load_setitem(self):
1194        stack = self.stack
1195        value = stack.pop()
1196        key = stack.pop()
1197        dict = stack[-1]
1198        dict[key] = value
1199    dispatch[SETITEM] = load_setitem
1200
1201    def load_setitems(self):
1202        stack = self.stack
1203        mark = self.marker()
1204        dict = stack[mark - 1]
1205        for i in range(mark + 1, len(stack), 2):
1206            dict[stack[i]] = stack[i + 1]
1207
1208        del stack[mark:]
1209    dispatch[SETITEMS] = load_setitems
1210
1211    def load_build(self):
1212        stack = self.stack
1213        state = stack.pop()
1214        inst = stack[-1]
1215        setstate = getattr(inst, "__setstate__", None)
1216        if setstate:
1217            setstate(state)
1218            return
1219        slotstate = None
1220        if isinstance(state, tuple) and len(state) == 2:
1221            state, slotstate = state
1222        if state:
1223            try:
1224                inst.__dict__.update(state)
1225            except RuntimeError:
1226                # XXX In restricted execution, the instance's __dict__
1227                # is not accessible.  Use the old way of unpickling
1228                # the instance variables.  This is a semantic
1229                # difference when unpickling in restricted
1230                # vs. unrestricted modes.
1231                # Note, however, that cPickle has never tried to do the
1232                # .update() business, and always uses
1233                #     PyObject_SetItem(inst.__dict__, key, value) in a
1234                # loop over state.items().
1235                for k, v in state.items():
1236                    setattr(inst, k, v)
1237        if slotstate:
1238            for k, v in slotstate.items():
1239                setattr(inst, k, v)
1240    dispatch[BUILD] = load_build
1241
1242    def load_mark(self):
1243        self.append(self.mark)
1244    dispatch[MARK] = load_mark
1245
1246    def load_stop(self):
1247        value = self.stack.pop()
1248        raise _Stop(value)
1249    dispatch[STOP] = load_stop
1250
1251# Helper class for load_inst/load_obj
1252
1253class _EmptyClass:
1254    pass
1255
1256# Encode/decode longs in linear time.
1257
1258import binascii as _binascii
1259
1260def encode_long(x):
1261    r"""Encode a long to a two's complement little-endian binary string.
1262    Note that 0L is a special case, returning an empty string, to save a
1263    byte in the LONG1 pickling context.
1264
1265    >>> encode_long(0L)
1266    ''
1267    >>> encode_long(255L)
1268    '\xff\x00'
1269    >>> encode_long(32767L)
1270    '\xff\x7f'
1271    >>> encode_long(-256L)
1272    '\x00\xff'
1273    >>> encode_long(-32768L)
1274    '\x00\x80'
1275    >>> encode_long(-128L)
1276    '\x80'
1277    >>> encode_long(127L)
1278    '\x7f'
1279    >>>
1280    """
1281
1282    if x == 0:
1283        return ''
1284    if x > 0:
1285        ashex = hex(x)
1286        assert ashex.startswith("0x")
1287        njunkchars = 2 + ashex.endswith('L')
1288        nibbles = len(ashex) - njunkchars
1289        if nibbles & 1:
1290            # need an even # of nibbles for unhexlify
1291            ashex = "0x0" + ashex[2:]
1292        elif int(ashex[2], 16) >= 8:
1293            # "looks negative", so need a byte of sign bits
1294            ashex = "0x00" + ashex[2:]
1295    else:
1296        # Build the 256's-complement:  (1L << nbytes) + x.  The trick is
1297        # to find the number of bytes in linear time (although that should
1298        # really be a constant-time task).
1299        ashex = hex(-x)
1300        assert ashex.startswith("0x")
1301        njunkchars = 2 + ashex.endswith('L')
1302        nibbles = len(ashex) - njunkchars
1303        if nibbles & 1:
1304            # Extend to a full byte.
1305            nibbles += 1
1306        nbits = nibbles * 4
1307        x += 1L << nbits
1308        assert x > 0
1309        ashex = hex(x)
1310        njunkchars = 2 + ashex.endswith('L')
1311        newnibbles = len(ashex) - njunkchars
1312        if newnibbles < nibbles:
1313            ashex = "0x" + "0" * (nibbles - newnibbles) + ashex[2:]
1314        if int(ashex[2], 16) < 8:
1315            # "looks positive", so need a byte of sign bits
1316            ashex = "0xff" + ashex[2:]
1317
1318    if ashex.endswith('L'):
1319        ashex = ashex[2:-1]
1320    else:
1321        ashex = ashex[2:]
1322    assert len(ashex) & 1 == 0, (x, ashex)
1323    binary = _binascii.unhexlify(ashex)
1324    return binary[::-1]
1325
1326def decode_long(data):
1327    r"""Decode a long from a two's complement little-endian binary string.
1328
1329    >>> decode_long('')
1330    0L
1331    >>> decode_long("\xff\x00")
1332    255L
1333    >>> decode_long("\xff\x7f")
1334    32767L
1335    >>> decode_long("\x00\xff")
1336    -256L
1337    >>> decode_long("\x00\x80")
1338    -32768L
1339    >>> decode_long("\x80")
1340    -128L
1341    >>> decode_long("\x7f")
1342    127L
1343    """
1344
1345    nbytes = len(data)
1346    if nbytes == 0:
1347        return 0L
1348    ashex = _binascii.hexlify(data[::-1])
1349    n = long(ashex, 16) # quadratic time before Python 2.3; linear now
1350    if data[-1] >= '\x80':
1351        n -= 1L << (nbytes * 8)
1352    return n
1353
1354# Shorthands
1355
1356try:
1357    from cStringIO import StringIO
1358except ImportError:
1359    from StringIO import StringIO
1360
1361def dump(obj, file, protocol=None):
1362    Pickler(file, protocol).dump(obj)
1363
1364def dumps(obj, protocol=None):
1365    file = StringIO()
1366    Pickler(file, protocol).dump(obj)
1367    return file.getvalue()
1368
1369def load(file):
1370    return Unpickler(file).load()
1371
1372def loads(str):
1373    file = StringIO(str)
1374    return Unpickler(file).load()
1375
1376# Doctest
1377
1378def _test():
1379    import doctest
1380    return doctest.testmod()
1381
1382if __name__ == "__main__":
1383    _test()