PageRenderTime 88ms CodeModel.GetById 26ms app.highlight 51ms RepoModel.GetById 1ms app.codeStats 0ms

/External.LCA_RESTRICTED/Languages/IronPython/27/Lib/pickle.py

http://github.com/IronLanguages/main
Python | 1397 lines | 1233 code | 62 blank | 102 comment | 42 complexity | 87151569eaebe6c44724c725064a21d4 MD5 | raw file
   1"""Create portable serialized representations of Python objects.
   2
   3See module cPickle for a (much) faster implementation.
   4See module copy_reg for a mechanism for registering custom picklers.
   5See module pickletools source for extensive comments.
   6
   7Classes:
   8
   9    Pickler
  10    Unpickler
  11
  12Functions:
  13
  14    dump(object, file)
  15    dumps(object) -> string
  16    load(file) -> object
  17    loads(string) -> object
  18
  19Misc variables:
  20
  21    __version__
  22    format_version
  23    compatible_formats
  24
  25"""
  26
  27__version__ = "$Revision: 72223 $"       # Code version
  28
  29from types import *
  30from copy_reg import dispatch_table
  31from copy_reg import _extension_registry, _inverted_registry, _extension_cache
  32import marshal
  33import sys
  34import struct
  35import re
  36
  37__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
  38           "Unpickler", "dump", "dumps", "load", "loads"]
  39
  40# These are purely informational; no code uses these.
  41format_version = "2.0"                  # File format version we write
  42compatible_formats = ["1.0",            # Original protocol 0
  43                      "1.1",            # Protocol 0 with INST added
  44                      "1.2",            # Original protocol 1
  45                      "1.3",            # Protocol 1 with BINFLOAT added
  46                      "2.0",            # Protocol 2
  47                      ]                 # Old format versions we can read
  48
  49# Keep in synch with cPickle.  This is the highest protocol number we
  50# know how to read.
  51HIGHEST_PROTOCOL = 2
  52
  53# Why use struct.pack() for pickling but marshal.loads() for
  54# unpickling?  struct.pack() is 40% faster than marshal.dumps(), but
  55# marshal.loads() is twice as fast as struct.unpack()!
  56mloads = marshal.loads
  57
  58class PickleError(Exception):
  59    """A common base class for the other pickling exceptions."""
  60    pass
  61
  62class PicklingError(PickleError):
  63    """This exception is raised when an unpicklable object is passed to the
  64    dump() method.
  65
  66    """
  67    pass
  68
  69class UnpicklingError(PickleError):
  70    """This exception is raised when there is a problem unpickling an object,
  71    such as a security violation.
  72
  73    Note that other exceptions may also be raised during unpickling, including
  74    (but not necessarily limited to) AttributeError, EOFError, ImportError,
  75    and IndexError.
  76
  77    """
  78    pass
  79
  80# An instance of _Stop is raised by Unpickler.load_stop() in response to
  81# the STOP opcode, passing the object that is the result of unpickling.
  82class _Stop(Exception):
  83    def __init__(self, value):
  84        self.value = value
  85
  86# Jython has PyStringMap; it's a dict subclass with string keys
  87try:
  88    from org.python.core import PyStringMap
  89except ImportError:
  90    PyStringMap = None
  91
  92# UnicodeType may or may not be exported (normally imported from types)
  93try:
  94    UnicodeType
  95except NameError:
  96    UnicodeType = None
  97
  98# Pickle opcodes.  See pickletools.py for extensive docs.  The listing
  99# here is in kind-of alphabetical order of 1-character pickle code.
 100# pickletools groups them by purpose.
 101
 102MARK            = '('   # push special markobject on stack
 103STOP            = '.'   # every pickle ends with STOP
 104POP             = '0'   # discard topmost stack item
 105POP_MARK        = '1'   # discard stack top through topmost markobject
 106DUP             = '2'   # duplicate top stack item
 107FLOAT           = 'F'   # push float object; decimal string argument
 108INT             = 'I'   # push integer or bool; decimal string argument
 109BININT          = 'J'   # push four-byte signed int
 110BININT1         = 'K'   # push 1-byte unsigned int
 111LONG            = 'L'   # push long; decimal string argument
 112BININT2         = 'M'   # push 2-byte unsigned int
 113NONE            = 'N'   # push None
 114PERSID          = 'P'   # push persistent object; id is taken from string arg
 115BINPERSID       = 'Q'   #  "       "         "  ;  "  "   "     "  stack
 116REDUCE          = 'R'   # apply callable to argtuple, both on stack
 117STRING          = 'S'   # push string; NL-terminated string argument
 118BINSTRING       = 'T'   # push string; counted binary string argument
 119SHORT_BINSTRING = 'U'   #  "     "   ;    "      "       "      " < 256 bytes
 120UNICODE         = 'V'   # push Unicode string; raw-unicode-escaped'd argument
 121BINUNICODE      = 'X'   #   "     "       "  ; counted UTF-8 string argument
 122APPEND          = 'a'   # append stack top to list below it
 123BUILD           = 'b'   # call __setstate__ or __dict__.update()
 124GLOBAL          = 'c'   # push self.find_class(modname, name); 2 string args
 125DICT            = 'd'   # build a dict from stack items
 126EMPTY_DICT      = '}'   # push empty dict
 127APPENDS         = 'e'   # extend list on stack by topmost stack slice
 128GET             = 'g'   # push item from memo on stack; index is string arg
 129BINGET          = 'h'   #   "    "    "    "   "   "  ;   "    " 1-byte arg
 130INST            = 'i'   # build & push class instance
 131LONG_BINGET     = 'j'   # push item from memo on stack; index is 4-byte arg
 132LIST            = 'l'   # build list from topmost stack items
 133EMPTY_LIST      = ']'   # push empty list
 134OBJ             = 'o'   # build & push class instance
 135PUT             = 'p'   # store stack top in memo; index is string arg
 136BINPUT          = 'q'   #   "     "    "   "   " ;   "    " 1-byte arg
 137LONG_BINPUT     = 'r'   #   "     "    "   "   " ;   "    " 4-byte arg
 138SETITEM         = 's'   # add key+value pair to dict
 139TUPLE           = 't'   # build tuple from topmost stack items
 140EMPTY_TUPLE     = ')'   # push empty tuple
 141SETITEMS        = 'u'   # modify dict by adding topmost key+value pairs
 142BINFLOAT        = 'G'   # push float; arg is 8-byte float encoding
 143
 144TRUE            = 'I01\n'  # not an opcode; see INT docs in pickletools.py
 145FALSE           = 'I00\n'  # not an opcode; see INT docs in pickletools.py
 146
 147# Protocol 2
 148
 149PROTO           = '\x80'  # identify pickle protocol
 150NEWOBJ          = '\x81'  # build object by applying cls.__new__ to argtuple
 151EXT1            = '\x82'  # push object from extension registry; 1-byte index
 152EXT2            = '\x83'  # ditto, but 2-byte index
 153EXT4            = '\x84'  # ditto, but 4-byte index
 154TUPLE1          = '\x85'  # build 1-tuple from stack top
 155TUPLE2          = '\x86'  # build 2-tuple from two topmost stack items
 156TUPLE3          = '\x87'  # build 3-tuple from three topmost stack items
 157NEWTRUE         = '\x88'  # push True
 158NEWFALSE        = '\x89'  # push False
 159LONG1           = '\x8a'  # push long from < 256 bytes
 160LONG4           = '\x8b'  # push really big long
 161
 162_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
 163
 164
 165__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])
 166del x
 167
 168
 169# Pickling machinery
 170
 171class Pickler:
 172
 173    def __init__(self, file, protocol=None):
 174        """This takes a file-like object for writing a pickle data stream.
 175
 176        The optional protocol argument tells the pickler to use the
 177        given protocol; supported protocols are 0, 1, 2.  The default
 178        protocol is 0, to be backwards compatible.  (Protocol 0 is the
 179        only protocol that can be written to a file opened in text
 180        mode and read back successfully.  When using a protocol higher
 181        than 0, make sure the file is opened in binary mode, both when
 182        pickling and unpickling.)
 183
 184        Protocol 1 is more efficient than protocol 0; protocol 2 is
 185        more efficient than protocol 1.
 186
 187        Specifying a negative protocol version selects the highest
 188        protocol version supported.  The higher the protocol used, the
 189        more recent the version of Python needed to read the pickle
 190        produced.
 191
 192        The file parameter must have a write() method that accepts a single
 193        string argument.  It can thus be an open file object, a StringIO
 194        object, or any other custom object that meets this interface.
 195
 196        """
 197        if protocol is None:
 198            protocol = 0
 199        if protocol < 0:
 200            protocol = HIGHEST_PROTOCOL
 201        elif not 0 <= protocol <= HIGHEST_PROTOCOL:
 202            raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
 203        self.write = file.write
 204        self.memo = {}
 205        self.proto = int(protocol)
 206        self.bin = protocol >= 1
 207        self.fast = 0
 208
 209    def clear_memo(self):
 210        """Clears the pickler's "memo".
 211
 212        The memo is the data structure that remembers which objects the
 213        pickler has already seen, so that shared or recursive objects are
 214        pickled by reference and not by value.  This method is useful when
 215        re-using picklers.
 216
 217        """
 218        self.memo.clear()
 219
 220    def dump(self, obj):
 221        """Write a pickled representation of obj to the open file."""
 222        if self.proto >= 2:
 223            self.write(PROTO + chr(self.proto))
 224        self.save(obj)
 225        self.write(STOP)
 226
 227    def memoize(self, obj):
 228        """Store an object in the memo."""
 229
 230        # The Pickler memo is a dictionary mapping object ids to 2-tuples
 231        # that contain the Unpickler memo key and the object being memoized.
 232        # The memo key is written to the pickle and will become
 233        # the key in the Unpickler's memo.  The object is stored in the
 234        # Pickler memo so that transient objects are kept alive during
 235        # pickling.
 236
 237        # The use of the Unpickler memo length as the memo key is just a
 238        # convention.  The only requirement is that the memo values be unique.
 239        # But there appears no advantage to any other scheme, and this
 240        # scheme allows the Unpickler memo to be implemented as a plain (but
 241        # growable) array, indexed by memo key.
 242        if self.fast:
 243            return
 244        assert id(obj) not in self.memo
 245        memo_len = len(self.memo)
 246        self.write(self.put(memo_len))
 247        self.memo[id(obj)] = memo_len, obj
 248
 249    # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.
 250    def put(self, i, pack=struct.pack):
 251        if self.bin:
 252            if i < 256:
 253                return BINPUT + chr(i)
 254            else:
 255                return LONG_BINPUT + pack("<i", i)
 256
 257        return PUT + repr(i) + '\n'
 258
 259    # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
 260    def get(self, i, pack=struct.pack):
 261        if self.bin:
 262            if i < 256:
 263                return BINGET + chr(i)
 264            else:
 265                return LONG_BINGET + pack("<i", i)
 266
 267        return GET + repr(i) + '\n'
 268
 269    def save(self, obj):
 270        # Check for persistent id (defined by a subclass)
 271        pid = self.persistent_id(obj)
 272        if pid is not None:
 273            self.save_pers(pid)
 274            return
 275
 276        # Check the memo
 277        x = self.memo.get(id(obj))
 278        if x:
 279            self.write(self.get(x[0]))
 280            return
 281
 282        # Check the type dispatch table
 283        t = type(obj)
 284        f = self.dispatch.get(t)
 285        if f:
 286            f(self, obj) # Call unbound method with explicit self
 287            return
 288
 289        # Check copy_reg.dispatch_table
 290        reduce = dispatch_table.get(t)
 291        if reduce:
 292            rv = reduce(obj)
 293        else:
 294            # Check for a class with a custom metaclass; treat as regular class
 295            try:
 296                issc = issubclass(t, TypeType)
 297            except TypeError: # t is not a class (old Boost; see SF #502085)
 298                issc = 0
 299            if issc:
 300                self.save_global(obj)
 301                return
 302
 303            # Check for a __reduce_ex__ method, fall back to __reduce__
 304            reduce = getattr(obj, "__reduce_ex__", None)
 305            if reduce:
 306                rv = reduce(self.proto)
 307            else:
 308                reduce = getattr(obj, "__reduce__", None)
 309                if reduce:
 310                    rv = reduce()
 311                else:
 312                    raise PicklingError("Can't pickle %r object: %r" %
 313                                        (t.__name__, obj))
 314
 315        # Check for string returned by reduce(), meaning "save as global"
 316        if type(rv) is StringType:
 317            self.save_global(obj, rv)
 318            return
 319
 320        # Assert that reduce() returned a tuple
 321        if type(rv) is not TupleType:
 322            raise PicklingError("%s must return string or tuple" % reduce)
 323
 324        # Assert that it returned an appropriately sized tuple
 325        l = len(rv)
 326        if not (2 <= l <= 5):
 327            raise PicklingError("Tuple returned by %s must have "
 328                                "two to five elements" % reduce)
 329
 330        # Save the reduce() output and finally memoize the object
 331        self.save_reduce(obj=obj, *rv)
 332
 333    def persistent_id(self, obj):
 334        # This exists so a subclass can override it
 335        return None
 336
 337    def save_pers(self, pid):
 338        # Save a persistent id reference
 339        if self.bin:
 340            self.save(pid)
 341            self.write(BINPERSID)
 342        else:
 343            self.write(PERSID + str(pid) + '\n')
 344
 345    def save_reduce(self, func, args, state=None,
 346                    listitems=None, dictitems=None, obj=None):
 347        # This API is called by some subclasses
 348
 349        # Assert that args is a tuple or None
 350        if not isinstance(args, TupleType):
 351            raise PicklingError("args from reduce() should be a tuple")
 352
 353        # Assert that func is callable
 354        if not hasattr(func, '__call__'):
 355            raise PicklingError("func from reduce should be callable")
 356
 357        save = self.save
 358        write = self.write
 359
 360        # Protocol 2 special case: if func's name is __newobj__, use NEWOBJ
 361        if self.proto >= 2 and getattr(func, "__name__", "") == "__newobj__":
 362            # A __reduce__ implementation can direct protocol 2 to
 363            # use the more efficient NEWOBJ opcode, while still
 364            # allowing protocol 0 and 1 to work normally.  For this to
 365            # work, the function returned by __reduce__ should be
 366            # called __newobj__, and its first argument should be a
 367            # new-style class.  The implementation for __newobj__
 368            # should be as follows, although pickle has no way to
 369            # verify this:
 370            #
 371            # def __newobj__(cls, *args):
 372            #     return cls.__new__(cls, *args)
 373            #
 374            # Protocols 0 and 1 will pickle a reference to __newobj__,
 375            # while protocol 2 (and above) will pickle a reference to
 376            # cls, the remaining args tuple, and the NEWOBJ code,
 377            # which calls cls.__new__(cls, *args) at unpickling time
 378            # (see load_newobj below).  If __reduce__ returns a
 379            # three-tuple, the state from the third tuple item will be
 380            # pickled regardless of the protocol, calling __setstate__
 381            # at unpickling time (see load_build below).
 382            #
 383            # Note that no standard __newobj__ implementation exists;
 384            # you have to provide your own.  This is to enforce
 385            # compatibility with Python 2.2 (pickles written using
 386            # protocol 0 or 1 in Python 2.3 should be unpicklable by
 387            # Python 2.2).
 388            cls = args[0]
 389            if not hasattr(cls, "__new__"):
 390                raise PicklingError(
 391                    "args[0] from __newobj__ args has no __new__")
 392            if obj is not None and cls is not obj.__class__:
 393                raise PicklingError(
 394                    "args[0] from __newobj__ args has the wrong class")
 395            args = args[1:]
 396            save(cls)
 397            save(args)
 398            write(NEWOBJ)
 399        else:
 400            save(func)
 401            save(args)
 402            write(REDUCE)
 403
 404        if obj is not None:
 405            # If the object is already in the memo, this means it is
 406            # recursive. In this case, throw away everything we put on the
 407            # stack, and fetch the object back from the memo.
 408            if id(obj) in self.memo:
 409                write(POP + self.get(self.memo[id(obj)][0]))
 410            else:
 411                self.memoize(obj)
 412
 413        # More new special cases (that work with older protocols as
 414        # well): when __reduce__ returns a tuple with 4 or 5 items,
 415        # the 4th and 5th item should be iterators that provide list
 416        # items and dict items (as (key, value) tuples), or None.
 417
 418        if listitems is not None:
 419            self._batch_appends(listitems)
 420
 421        if dictitems is not None:
 422            self._batch_setitems(dictitems)
 423
 424        if state is not None:
 425            save(state)
 426            write(BUILD)
 427
 428    # Methods below this point are dispatched through the dispatch table
 429
 430    dispatch = {}
 431
 432    def save_none(self, obj):
 433        self.write(NONE)
 434    dispatch[NoneType] = save_none
 435
 436    def save_bool(self, obj):
 437        if self.proto >= 2:
 438            self.write(obj and NEWTRUE or NEWFALSE)
 439        else:
 440            self.write(obj and TRUE or FALSE)
 441    dispatch[bool] = save_bool
 442
 443    def save_int(self, obj, pack=struct.pack):
 444        if self.bin:
 445            # If the int is small enough to fit in a signed 4-byte 2's-comp
 446            # format, we can store it more efficiently than the general
 447            # case.
 448            # First one- and two-byte unsigned ints:
 449            if obj >= 0:
 450                if obj <= 0xff:
 451                    self.write(BININT1 + chr(obj))
 452                    return
 453                if obj <= 0xffff:
 454                    self.write("%c%c%c" % (BININT2, obj&0xff, obj>>8))
 455                    return
 456            # Next check for 4-byte signed ints:
 457            high_bits = obj >> 31  # note that Python shift sign-extends
 458            if high_bits == 0 or high_bits == -1:
 459                # All high bits are copies of bit 2**31, so the value
 460                # fits in a 4-byte signed int.
 461                self.write(BININT + pack("<i", obj))
 462                return
 463        # Text pickle, or int too big to fit in signed 4-byte format.
 464        self.write(INT + repr(obj) + '\n')
 465    dispatch[IntType] = save_int
 466
 467    def save_long(self, obj, pack=struct.pack):
 468        if self.proto >= 2:
 469            bytes = encode_long(obj)
 470            n = len(bytes)
 471            if n < 256:
 472                self.write(LONG1 + chr(n) + bytes)
 473            else:
 474                self.write(LONG4 + pack("<i", n) + bytes)
 475            return
 476        self.write(LONG + repr(obj) + '\n')
 477    dispatch[LongType] = save_long
 478
 479    def save_float(self, obj, pack=struct.pack):
 480        if self.bin:
 481            self.write(BINFLOAT + pack('>d', obj))
 482        else:
 483            self.write(FLOAT + repr(obj) + '\n')
 484    dispatch[FloatType] = save_float
 485
 486    def save_string(self, obj, pack=struct.pack):
 487        if self.bin:
 488            n = len(obj)
 489            if n < 256:
 490                self.write(SHORT_BINSTRING + chr(n) + obj)
 491            else:
 492                self.write(BINSTRING + pack("<i", n) + obj)
 493        else:
 494            self.write(STRING + repr(obj) + '\n')
 495        self.memoize(obj)
 496    dispatch[StringType] = save_string
 497
 498    def save_unicode(self, obj, pack=struct.pack):
 499        if self.bin:
 500            encoding = obj.encode('utf-8')
 501            n = len(encoding)
 502            self.write(BINUNICODE + pack("<i", n) + encoding)
 503        else:
 504            obj = obj.replace("\\", "\\u005c")
 505            obj = obj.replace("\n", "\\u000a")
 506            self.write(UNICODE + obj.encode('raw-unicode-escape') + '\n')
 507        self.memoize(obj)
 508    dispatch[UnicodeType] = save_unicode
 509
 510    if StringType is UnicodeType:
 511        # This is true for Jython
 512        def save_string(self, obj, pack=struct.pack):
 513            unicode = obj.isunicode()
 514
 515            if self.bin:
 516                if unicode:
 517                    obj = obj.encode("utf-8")
 518                l = len(obj)
 519                if l < 256 and not unicode:
 520                    self.write(SHORT_BINSTRING + chr(l) + obj)
 521                else:
 522                    s = pack("<i", l)
 523                    if unicode:
 524                        self.write(BINUNICODE + s + obj)
 525                    else:
 526                        self.write(BINSTRING + s + obj)
 527            else:
 528                if unicode:
 529                    obj = obj.replace("\\", "\\u005c")
 530                    obj = obj.replace("\n", "\\u000a")
 531                    obj = obj.encode('raw-unicode-escape')
 532                    self.write(UNICODE + obj + '\n')
 533                else:
 534                    self.write(STRING + repr(obj) + '\n')
 535            self.memoize(obj)
 536        dispatch[StringType] = save_string
 537
 538    def save_tuple(self, obj):
 539        write = self.write
 540        proto = self.proto
 541
 542        n = len(obj)
 543        if n == 0:
 544            if proto:
 545                write(EMPTY_TUPLE)
 546            else:
 547                write(MARK + TUPLE)
 548            return
 549
 550        save = self.save
 551        memo = self.memo
 552        if n <= 3 and proto >= 2:
 553            for element in obj:
 554                save(element)
 555            # Subtle.  Same as in the big comment below.
 556            if id(obj) in memo:
 557                get = self.get(memo[id(obj)][0])
 558                write(POP * n + get)
 559            else:
 560                write(_tuplesize2code[n])
 561                self.memoize(obj)
 562            return
 563
 564        # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple
 565        # has more than 3 elements.
 566        write(MARK)
 567        for element in obj:
 568            save(element)
 569
 570        if id(obj) in memo:
 571            # Subtle.  d was not in memo when we entered save_tuple(), so
 572            # the process of saving the tuple's elements must have saved
 573            # the tuple itself:  the tuple is recursive.  The proper action
 574            # now is to throw away everything we put on the stack, and
 575            # simply GET the tuple (it's already constructed).  This check
 576            # could have been done in the "for element" loop instead, but
 577            # recursive tuples are a rare thing.
 578            get = self.get(memo[id(obj)][0])
 579            if proto:
 580                write(POP_MARK + get)
 581            else:   # proto 0 -- POP_MARK not available
 582                write(POP * (n+1) + get)
 583            return
 584
 585        # No recursion.
 586        self.write(TUPLE)
 587        self.memoize(obj)
 588
 589    dispatch[TupleType] = save_tuple
 590
 591    # save_empty_tuple() isn't used by anything in Python 2.3.  However, I
 592    # found a Pickler subclass in Zope3 that calls it, so it's not harmless
 593    # to remove it.
 594    def save_empty_tuple(self, obj):
 595        self.write(EMPTY_TUPLE)
 596
 597    def save_list(self, obj):
 598        write = self.write
 599
 600        if self.bin:
 601            write(EMPTY_LIST)
 602        else:   # proto 0 -- can't use EMPTY_LIST
 603            write(MARK + LIST)
 604
 605        self.memoize(obj)
 606        self._batch_appends(iter(obj))
 607
 608    dispatch[ListType] = save_list
 609
 610    # Keep in synch with cPickle's BATCHSIZE.  Nothing will break if it gets
 611    # out of synch, though.
 612    _BATCHSIZE = 1000
 613
 614    def _batch_appends(self, items):
 615        # Helper to batch up APPENDS sequences
 616        save = self.save
 617        write = self.write
 618
 619        if not self.bin:
 620            for x in items:
 621                save(x)
 622                write(APPEND)
 623            return
 624
 625        r = xrange(self._BATCHSIZE)
 626        while items is not None:
 627            tmp = []
 628            for i in r:
 629                try:
 630                    x = items.next()
 631                    tmp.append(x)
 632                except StopIteration:
 633                    items = None
 634                    break
 635            n = len(tmp)
 636            if n > 1:
 637                write(MARK)
 638                for x in tmp:
 639                    save(x)
 640                write(APPENDS)
 641            elif n:
 642                save(tmp[0])
 643                write(APPEND)
 644            # else tmp is empty, and we're done
 645
 646    def save_dict(self, obj):
 647        write = self.write
 648
 649        if self.bin:
 650            write(EMPTY_DICT)
 651        else:   # proto 0 -- can't use EMPTY_DICT
 652            write(MARK + DICT)
 653
 654        self.memoize(obj)
 655        self._batch_setitems(obj.iteritems())
 656
 657    dispatch[DictionaryType] = save_dict
 658    if not PyStringMap is None:
 659        dispatch[PyStringMap] = save_dict
 660
 661    def _batch_setitems(self, items):
 662        # Helper to batch up SETITEMS sequences; proto >= 1 only
 663        save = self.save
 664        write = self.write
 665
 666        if not self.bin:
 667            for k, v in items:
 668                save(k)
 669                save(v)
 670                write(SETITEM)
 671            return
 672
 673        r = xrange(self._BATCHSIZE)
 674        while items is not None:
 675            tmp = []
 676            for i in r:
 677                try:
 678                    tmp.append(items.next())
 679                except StopIteration:
 680                    items = None
 681                    break
 682            n = len(tmp)
 683            if n > 1:
 684                write(MARK)
 685                for k, v in tmp:
 686                    save(k)
 687                    save(v)
 688                write(SETITEMS)
 689            elif n:
 690                k, v = tmp[0]
 691                save(k)
 692                save(v)
 693                write(SETITEM)
 694            # else tmp is empty, and we're done
 695
 696    def save_inst(self, obj):
 697        cls = obj.__class__
 698
 699        memo  = self.memo
 700        write = self.write
 701        save  = self.save
 702
 703        if hasattr(obj, '__getinitargs__'):
 704            args = obj.__getinitargs__()
 705            len(args) # XXX Assert it's a sequence
 706            _keep_alive(args, memo)
 707        else:
 708            args = ()
 709
 710        write(MARK)
 711
 712        if self.bin:
 713            save(cls)
 714            for arg in args:
 715                save(arg)
 716            write(OBJ)
 717        else:
 718            for arg in args:
 719                save(arg)
 720            write(INST + cls.__module__ + '\n' + cls.__name__ + '\n')
 721
 722        self.memoize(obj)
 723
 724        try:
 725            getstate = obj.__getstate__
 726        except AttributeError:
 727            stuff = obj.__dict__
 728        else:
 729            stuff = getstate()
 730            _keep_alive(stuff, memo)
 731        save(stuff)
 732        write(BUILD)
 733
 734    dispatch[InstanceType] = save_inst
 735
 736    def save_global(self, obj, name=None, pack=struct.pack):
 737        write = self.write
 738        memo = self.memo
 739
 740        if name is None:
 741            name = obj.__name__
 742
 743        module = getattr(obj, "__module__", None)
 744        if module is None:
 745            module = whichmodule(obj, name)
 746
 747        try:
 748            __import__(module)
 749            mod = sys.modules[module]
 750            klass = getattr(mod, name)
 751        except (ImportError, KeyError, AttributeError):
 752            raise PicklingError(
 753                "Can't pickle %r: it's not found as %s.%s" %
 754                (obj, module, name))
 755        else:
 756            if klass is not obj:
 757                raise PicklingError(
 758                    "Can't pickle %r: it's not the same object as %s.%s" %
 759                    (obj, module, name))
 760
 761        if self.proto >= 2:
 762            code = _extension_registry.get((module, name))
 763            if code:
 764                assert code > 0
 765                if code <= 0xff:
 766                    write(EXT1 + chr(code))
 767                elif code <= 0xffff:
 768                    write("%c%c%c" % (EXT2, code&0xff, code>>8))
 769                else:
 770                    write(EXT4 + pack("<i", code))
 771                return
 772
 773        write(GLOBAL + module + '\n' + name + '\n')
 774        self.memoize(obj)
 775
 776    dispatch[ClassType] = save_global
 777    dispatch[FunctionType] = save_global
 778    dispatch[BuiltinFunctionType] = save_global
 779    dispatch[TypeType] = save_global
 780
 781# Pickling helpers
 782
 783def _keep_alive(x, memo):
 784    """Keeps a reference to the object x in the memo.
 785
 786    Because we remember objects by their id, we have
 787    to assure that possibly temporary objects are kept
 788    alive by referencing them.
 789    We store a reference at the id of the memo, which should
 790    normally not be used unless someone tries to deepcopy
 791    the memo itself...
 792    """
 793    try:
 794        memo[id(memo)].append(x)
 795    except KeyError:
 796        # aha, this is the first one :-)
 797        memo[id(memo)]=[x]
 798
 799
 800# A cache for whichmodule(), mapping a function object to the name of
 801# the module in which the function was found.
 802
 803classmap = {} # called classmap for backwards compatibility
 804
 805def whichmodule(func, funcname):
 806    """Figure out the module in which a function occurs.
 807
 808    Search sys.modules for the module.
 809    Cache in classmap.
 810    Return a module name.
 811    If the function cannot be found, return "__main__".
 812    """
 813    # Python functions should always get an __module__ from their globals.
 814    mod = getattr(func, "__module__", None)
 815    if mod is not None:
 816        return mod
 817    if func in classmap:
 818        return classmap[func]
 819
 820    for name, module in sys.modules.items():
 821        if module is None:
 822            continue # skip dummy package entries
 823        if name != '__main__' and getattr(module, funcname, None) is func:
 824            break
 825    else:
 826        name = '__main__'
 827    classmap[func] = name
 828    return name
 829
 830
 831# Unpickling machinery
 832
 833class Unpickler:
 834
 835    def __init__(self, file):
 836        """This takes a file-like object for reading a pickle data stream.
 837
 838        The protocol version of the pickle is detected automatically, so no
 839        proto argument is needed.
 840
 841        The file-like object must have two methods, a read() method that
 842        takes an integer argument, and a readline() method that requires no
 843        arguments.  Both methods should return a string.  Thus file-like
 844        object can be a file object opened for reading, a StringIO object,
 845        or any other custom object that meets this interface.
 846        """
 847        self.readline = file.readline
 848        self.read = file.read
 849        self.memo = {}
 850
 851    def load(self):
 852        """Read a pickled object representation from the open file.
 853
 854        Return the reconstituted object hierarchy specified in the file.
 855        """
 856        self.mark = object() # any new unique object
 857        self.stack = []
 858        self.append = self.stack.append
 859        read = self.read
 860        dispatch = self.dispatch
 861        try:
 862            while 1:
 863                key = read(1)
 864                dispatch[key](self)
 865        except _Stop, stopinst:
 866            return stopinst.value
 867
 868    # Return largest index k such that self.stack[k] is self.mark.
 869    # If the stack doesn't contain a mark, eventually raises IndexError.
 870    # This could be sped by maintaining another stack, of indices at which
 871    # the mark appears.  For that matter, the latter stack would suffice,
 872    # and we wouldn't need to push mark objects on self.stack at all.
 873    # Doing so is probably a good thing, though, since if the pickle is
 874    # corrupt (or hostile) we may get a clue from finding self.mark embedded
 875    # in unpickled objects.
 876    def marker(self):
 877        stack = self.stack
 878        mark = self.mark
 879        k = len(stack)-1
 880        while stack[k] is not mark: k = k-1
 881        return k
 882
 883    dispatch = {}
 884
 885    def load_eof(self):
 886        raise EOFError
 887    dispatch[''] = load_eof
 888
 889    def load_proto(self):
 890        proto = ord(self.read(1))
 891        if not 0 <= proto <= 2:
 892            raise ValueError, "unsupported pickle protocol: %d" % proto
 893    dispatch[PROTO] = load_proto
 894
 895    def load_persid(self):
 896        pid = self.readline()[:-1]
 897        self.append(self.persistent_load(pid))
 898    dispatch[PERSID] = load_persid
 899
 900    def load_binpersid(self):
 901        pid = self.stack.pop()
 902        self.append(self.persistent_load(pid))
 903    dispatch[BINPERSID] = load_binpersid
 904
 905    def load_none(self):
 906        self.append(None)
 907    dispatch[NONE] = load_none
 908
 909    def load_false(self):
 910        self.append(False)
 911    dispatch[NEWFALSE] = load_false
 912
 913    def load_true(self):
 914        self.append(True)
 915    dispatch[NEWTRUE] = load_true
 916
 917    def load_int(self):
 918        data = self.readline()
 919        if data == FALSE[1:]:
 920            val = False
 921        elif data == TRUE[1:]:
 922            val = True
 923        else:
 924            try:
 925                val = int(data)
 926            except ValueError:
 927                val = long(data)
 928        self.append(val)
 929    dispatch[INT] = load_int
 930
 931    def load_binint(self):
 932        self.append(mloads('i' + self.read(4)))
 933    dispatch[BININT] = load_binint
 934
 935    def load_binint1(self):
 936        self.append(ord(self.read(1)))
 937    dispatch[BININT1] = load_binint1
 938
 939    def load_binint2(self):
 940        self.append(mloads('i' + self.read(2) + '\000\000'))
 941    dispatch[BININT2] = load_binint2
 942
 943    def load_long(self):
 944        self.append(long(self.readline()[:-1], 0))
 945    dispatch[LONG] = load_long
 946
 947    def load_long1(self):
 948        n = ord(self.read(1))
 949        bytes = self.read(n)
 950        self.append(decode_long(bytes))
 951    dispatch[LONG1] = load_long1
 952
 953    def load_long4(self):
 954        n = mloads('i' + self.read(4))
 955        bytes = self.read(n)
 956        self.append(decode_long(bytes))
 957    dispatch[LONG4] = load_long4
 958
 959    def load_float(self):
 960        self.append(float(self.readline()[:-1]))
 961    dispatch[FLOAT] = load_float
 962
 963    def load_binfloat(self, unpack=struct.unpack):
 964        self.append(unpack('>d', self.read(8))[0])
 965    dispatch[BINFLOAT] = load_binfloat
 966
 967    def load_string(self):
 968        rep = self.readline()[:-1]
 969        for q in "\"'": # double or single quote
 970            if rep.startswith(q):
 971                if len(rep) < 2 or not rep.endswith(q):
 972                    raise ValueError, "insecure string pickle"
 973                rep = rep[len(q):-len(q)]
 974                break
 975        else:
 976            raise ValueError, "insecure string pickle"
 977        self.append(rep.decode("string-escape"))
 978    dispatch[STRING] = load_string
 979
 980    def load_binstring(self):
 981        len = mloads('i' + self.read(4))
 982        self.append(self.read(len))
 983    dispatch[BINSTRING] = load_binstring
 984
 985    def load_unicode(self):
 986        self.append(unicode(self.readline()[:-1],'raw-unicode-escape'))
 987    dispatch[UNICODE] = load_unicode
 988
 989    def load_binunicode(self):
 990        len = mloads('i' + self.read(4))
 991        self.append(unicode(self.read(len),'utf-8'))
 992    dispatch[BINUNICODE] = load_binunicode
 993
 994    def load_short_binstring(self):
 995        len = ord(self.read(1))
 996        self.append(self.read(len))
 997    dispatch[SHORT_BINSTRING] = load_short_binstring
 998
 999    def load_tuple(self):
1000        k = self.marker()
1001        self.stack[k:] = [tuple(self.stack[k+1:])]
1002    dispatch[TUPLE] = load_tuple
1003
1004    def load_empty_tuple(self):
1005        self.stack.append(())
1006    dispatch[EMPTY_TUPLE] = load_empty_tuple
1007
1008    def load_tuple1(self):
1009        self.stack[-1] = (self.stack[-1],)
1010    dispatch[TUPLE1] = load_tuple1
1011
1012    def load_tuple2(self):
1013        self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
1014    dispatch[TUPLE2] = load_tuple2
1015
1016    def load_tuple3(self):
1017        self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
1018    dispatch[TUPLE3] = load_tuple3
1019
1020    def load_empty_list(self):
1021        self.stack.append([])
1022    dispatch[EMPTY_LIST] = load_empty_list
1023
1024    def load_empty_dictionary(self):
1025        self.stack.append({})
1026    dispatch[EMPTY_DICT] = load_empty_dictionary
1027
1028    def load_list(self):
1029        k = self.marker()
1030        self.stack[k:] = [self.stack[k+1:]]
1031    dispatch[LIST] = load_list
1032
1033    def load_dict(self):
1034        k = self.marker()
1035        d = {}
1036        items = self.stack[k+1:]
1037        for i in range(0, len(items), 2):
1038            key = items[i]
1039            value = items[i+1]
1040            d[key] = value
1041        self.stack[k:] = [d]
1042    dispatch[DICT] = load_dict
1043
1044    # INST and OBJ differ only in how they get a class object.  It's not
1045    # only sensible to do the rest in a common routine, the two routines
1046    # previously diverged and grew different bugs.
1047    # klass is the class to instantiate, and k points to the topmost mark
1048    # object, following which are the arguments for klass.__init__.
1049    def _instantiate(self, klass, k):
1050        args = tuple(self.stack[k+1:])
1051        del self.stack[k:]
1052        instantiated = 0
1053        if (not args and
1054                type(klass) is ClassType and
1055                not hasattr(klass, "__getinitargs__")):
1056            try:
1057                value = _EmptyClass()
1058                value.__class__ = klass
1059                instantiated = 1
1060            except RuntimeError:
1061                # In restricted execution, assignment to inst.__class__ is
1062                # prohibited
1063                pass
1064        if not instantiated:
1065            try:
1066                value = klass(*args)
1067            except TypeError, err:
1068                raise TypeError, "in constructor for %s: %s" % (
1069                    klass.__name__, str(err)), sys.exc_info()[2]
1070        self.append(value)
1071
1072    def load_inst(self):
1073        module = self.readline()[:-1]
1074        name = self.readline()[:-1]
1075        klass = self.find_class(module, name)
1076        self._instantiate(klass, self.marker())
1077    dispatch[INST] = load_inst
1078
1079    def load_obj(self):
1080        # Stack is ... markobject classobject arg1 arg2 ...
1081        k = self.marker()
1082        klass = self.stack.pop(k+1)
1083        self._instantiate(klass, k)
1084    dispatch[OBJ] = load_obj
1085
1086    def load_newobj(self):
1087        args = self.stack.pop()
1088        cls = self.stack[-1]
1089        obj = cls.__new__(cls, *args)
1090        self.stack[-1] = obj
1091    dispatch[NEWOBJ] = load_newobj
1092
1093    def load_global(self):
1094        module = self.readline()[:-1]
1095        name = self.readline()[:-1]
1096        klass = self.find_class(module, name)
1097        self.append(klass)
1098    dispatch[GLOBAL] = load_global
1099
1100    def load_ext1(self):
1101        code = ord(self.read(1))
1102        self.get_extension(code)
1103    dispatch[EXT1] = load_ext1
1104
1105    def load_ext2(self):
1106        code = mloads('i' + self.read(2) + '\000\000')
1107        self.get_extension(code)
1108    dispatch[EXT2] = load_ext2
1109
1110    def load_ext4(self):
1111        code = mloads('i' + self.read(4))
1112        self.get_extension(code)
1113    dispatch[EXT4] = load_ext4
1114
1115    def get_extension(self, code):
1116        nil = []
1117        obj = _extension_cache.get(code, nil)
1118        if obj is not nil:
1119            self.append(obj)
1120            return
1121        key = _inverted_registry.get(code)
1122        if not key:
1123            raise ValueError("unregistered extension code %d" % code)
1124        obj = self.find_class(*key)
1125        _extension_cache[code] = obj
1126        self.append(obj)
1127
1128    def find_class(self, module, name):
1129        # Subclasses may override this
1130        __import__(module)
1131        mod = sys.modules[module]
1132        klass = getattr(mod, name)
1133        return klass
1134
1135    def load_reduce(self):
1136        stack = self.stack
1137        args = stack.pop()
1138        func = stack[-1]
1139        value = func(*args)
1140        stack[-1] = value
1141    dispatch[REDUCE] = load_reduce
1142
1143    def load_pop(self):
1144        del self.stack[-1]
1145    dispatch[POP] = load_pop
1146
1147    def load_pop_mark(self):
1148        k = self.marker()
1149        del self.stack[k:]
1150    dispatch[POP_MARK] = load_pop_mark
1151
1152    def load_dup(self):
1153        self.append(self.stack[-1])
1154    dispatch[DUP] = load_dup
1155
1156    def load_get(self):
1157        self.append(self.memo[self.readline()[:-1]])
1158    dispatch[GET] = load_get
1159
1160    def load_binget(self):
1161        i = ord(self.read(1))
1162        self.append(self.memo[repr(i)])
1163    dispatch[BINGET] = load_binget
1164
1165    def load_long_binget(self):
1166        i = mloads('i' + self.read(4))
1167        self.append(self.memo[repr(i)])
1168    dispatch[LONG_BINGET] = load_long_binget
1169
1170    def load_put(self):
1171        self.memo[self.readline()[:-1]] = self.stack[-1]
1172    dispatch[PUT] = load_put
1173
1174    def load_binput(self):
1175        i = ord(self.read(1))
1176        self.memo[repr(i)] = self.stack[-1]
1177    dispatch[BINPUT] = load_binput
1178
1179    def load_long_binput(self):
1180        i = mloads('i' + self.read(4))
1181        self.memo[repr(i)] = self.stack[-1]
1182    dispatch[LONG_BINPUT] = load_long_binput
1183
1184    def load_append(self):
1185        stack = self.stack
1186        value = stack.pop()
1187        list = stack[-1]
1188        list.append(value)
1189    dispatch[APPEND] = load_append
1190
1191    def load_appends(self):
1192        stack = self.stack
1193        mark = self.marker()
1194        list = stack[mark - 1]
1195        list.extend(stack[mark + 1:])
1196        del stack[mark:]
1197    dispatch[APPENDS] = load_appends
1198
1199    def load_setitem(self):
1200        stack = self.stack
1201        value = stack.pop()
1202        key = stack.pop()
1203        dict = stack[-1]
1204        dict[key] = value
1205    dispatch[SETITEM] = load_setitem
1206
1207    def load_setitems(self):
1208        stack = self.stack
1209        mark = self.marker()
1210        dict = stack[mark - 1]
1211        for i in range(mark + 1, len(stack), 2):
1212            dict[stack[i]] = stack[i + 1]
1213
1214        del stack[mark:]
1215    dispatch[SETITEMS] = load_setitems
1216
1217    def load_build(self):
1218        stack = self.stack
1219        state = stack.pop()
1220        inst = stack[-1]
1221        setstate = getattr(inst, "__setstate__", None)
1222        if setstate:
1223            setstate(state)
1224            return
1225        slotstate = None
1226        if isinstance(state, tuple) and len(state) == 2:
1227            state, slotstate = state
1228        if state:
1229            try:
1230                d = inst.__dict__
1231                try:
1232                    for k, v in state.iteritems():
1233                        d[intern(k)] = v
1234                # keys in state don't have to be strings
1235                # don't blow up, but don't go out of our way
1236                except TypeError:
1237                    d.update(state)
1238
1239            except RuntimeError:
1240                # XXX In restricted execution, the instance's __dict__
1241                # is not accessible.  Use the old way of unpickling
1242                # the instance variables.  This is a semantic
1243                # difference when unpickling in restricted
1244                # vs. unrestricted modes.
1245                # Note, however, that cPickle has never tried to do the
1246                # .update() business, and always uses
1247                #     PyObject_SetItem(inst.__dict__, key, value) in a
1248                # loop over state.items().
1249                for k, v in state.items():
1250                    setattr(inst, k, v)
1251        if slotstate:
1252            for k, v in slotstate.items():
1253                setattr(inst, k, v)
1254    dispatch[BUILD] = load_build
1255
1256    def load_mark(self):
1257        self.append(self.mark)
1258    dispatch[MARK] = load_mark
1259
1260    def load_stop(self):
1261        value = self.stack.pop()
1262        raise _Stop(value)
1263    dispatch[STOP] = load_stop
1264
1265# Helper class for load_inst/load_obj
1266
1267class _EmptyClass:
1268    pass
1269
1270# Encode/decode longs in linear time.
1271
1272import binascii as _binascii
1273
1274def encode_long(x):
1275    r"""Encode a long to a two's complement little-endian binary string.
1276    Note that 0L is a special case, returning an empty string, to save a
1277    byte in the LONG1 pickling context.
1278
1279    >>> encode_long(0L)
1280    ''
1281    >>> encode_long(255L)
1282    '\xff\x00'
1283    >>> encode_long(32767L)
1284    '\xff\x7f'
1285    >>> encode_long(-256L)
1286    '\x00\xff'
1287    >>> encode_long(-32768L)
1288    '\x00\x80'
1289    >>> encode_long(-128L)
1290    '\x80'
1291    >>> encode_long(127L)
1292    '\x7f'
1293    >>>
1294    """
1295
1296    if x == 0:
1297        return ''
1298    if x > 0:
1299        ashex = hex(x)
1300        assert ashex.startswith("0x")
1301        njunkchars = 2 + ashex.endswith('L')
1302        nibbles = len(ashex) - njunkchars
1303        if nibbles & 1:
1304            # need an even # of nibbles for unhexlify
1305            ashex = "0x0" + ashex[2:]
1306        elif int(ashex[2], 16) >= 8:
1307            # "looks negative", so need a byte of sign bits
1308            ashex = "0x00" + ashex[2:]
1309    else:
1310        # Build the 256's-complement:  (1L << nbytes) + x.  The trick is
1311        # to find the number of bytes in linear time (although that should
1312        # really be a constant-time task).
1313        ashex = hex(-x)
1314        assert ashex.startswith("0x")
1315        njunkchars = 2 + ashex.endswith('L')
1316        nibbles = len(ashex) - njunkchars
1317        if nibbles & 1:
1318            # Extend to a full byte.
1319            nibbles += 1
1320        nbits = nibbles * 4
1321        x += 1L << nbits
1322        assert x > 0
1323        ashex = hex(x)
1324        njunkchars = 2 + ashex.endswith('L')
1325        newnibbles = len(ashex) - njunkchars
1326        if newnibbles < nibbles:
1327            ashex = "0x" + "0" * (nibbles - newnibbles) + ashex[2:]
1328        if int(ashex[2], 16) < 8:
1329            # "looks positive", so need a byte of sign bits
1330            ashex = "0xff" + ashex[2:]
1331
1332    if ashex.endswith('L'):
1333        ashex = ashex[2:-1]
1334    else:
1335        ashex = ashex[2:]
1336    assert len(ashex) & 1 == 0, (x, ashex)
1337    binary = _binascii.unhexlify(ashex)
1338    return binary[::-1]
1339
1340def decode_long(data):
1341    r"""Decode a long from a two's complement little-endian binary string.
1342
1343    >>> decode_long('')
1344    0L
1345    >>> decode_long("\xff\x00")
1346    255L
1347    >>> decode_long("\xff\x7f")
1348    32767L
1349    >>> decode_long("\x00\xff")
1350    -256L
1351    >>> decode_long("\x00\x80")
1352    -32768L
1353    >>> decode_long("\x80")
1354    -128L
1355    >>> decode_long("\x7f")
1356    127L
1357    """
1358
1359    nbytes = len(data)
1360    if nbytes == 0:
1361        return 0L
1362    ashex = _binascii.hexlify(data[::-1])
1363    n = long(ashex, 16) # quadratic time before Python 2.3; linear now
1364    if data[-1] >= '\x80':
1365        n -= 1L << (nbytes * 8)
1366    return n
1367
1368# Shorthands
1369
1370try:
1371    from cStringIO import StringIO
1372except ImportError:
1373    from StringIO import StringIO
1374
1375def dump(obj, file, protocol=None):
1376    Pickler(file, protocol).dump(obj)
1377
1378def dumps(obj, protocol=None):
1379    file = StringIO()
1380    Pickler(file, protocol).dump(obj)
1381    return file.getvalue()
1382
1383def load(file):
1384    return Unpickler(file).load()
1385
1386def loads(str):
1387    file = StringIO(str)
1388    return Unpickler(file).load()
1389
1390# Doctest
1391
1392def _test():
1393    import doctest
1394    return doctest.testmod()
1395
1396if __name__ == "__main__":
1397    _test()