PyXR

c:\python24\lib \ pickle.py



0001 """Create portable serialized representations of Python objects.
0002 
0003 See module cPickle for a (much) faster implementation.
0004 See module copy_reg for a mechanism for registering custom picklers.
0005 See module pickletools source for extensive comments.
0006 
0007 Classes:
0008 
0009     Pickler
0010     Unpickler
0011 
0012 Functions:
0013 
0014     dump(object, file)
0015     dumps(object) -> string
0016     load(file) -> object
0017     loads(string) -> object
0018 
0019 Misc variables:
0020 
0021     __version__
0022     format_version
0023     compatible_formats
0024 
0025 """
0026 
0027 __version__ = "$Revision: 1.158 $"       # Code version
0028 
0029 from types import *
0030 from copy_reg import dispatch_table
0031 from copy_reg import _extension_registry, _inverted_registry, _extension_cache
0032 import marshal
0033 import sys
0034 import struct
0035 import re
0036 import warnings
0037 
0038 __all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
0039            "Unpickler", "dump", "dumps", "load", "loads"]
0040 
0041 # These are purely informational; no code uses these.
0042 format_version = "2.0"                  # File format version we write
0043 compatible_formats = ["1.0",            # Original protocol 0
0044                       "1.1",            # Protocol 0 with INST added
0045                       "1.2",            # Original protocol 1
0046                       "1.3",            # Protocol 1 with BINFLOAT added
0047                       "2.0",            # Protocol 2
0048                       ]                 # Old format versions we can read
0049 
0050 # Keep in synch with cPickle.  This is the highest protocol number we
0051 # know how to read.
0052 HIGHEST_PROTOCOL = 2
0053 
0054 # Why use struct.pack() for pickling but marshal.loads() for
0055 # unpickling?  struct.pack() is 40% faster than marshal.dumps(), but
0056 # marshal.loads() is twice as fast as struct.unpack()!
0057 mloads = marshal.loads
0058 
0059 class PickleError(Exception):
0060     """A common base class for the other pickling exceptions."""
0061     pass
0062 
0063 class PicklingError(PickleError):
0064     """This exception is raised when an unpicklable object is passed to the
0065     dump() method.
0066 
0067     """
0068     pass
0069 
0070 class UnpicklingError(PickleError):
0071     """This exception is raised when there is a problem unpickling an object,
0072     such as a security violation.
0073 
0074     Note that other exceptions may also be raised during unpickling, including
0075     (but not necessarily limited to) AttributeError, EOFError, ImportError,
0076     and IndexError.
0077 
0078     """
0079     pass
0080 
0081 # An instance of _Stop is raised by Unpickler.load_stop() in response to
0082 # the STOP opcode, passing the object that is the result of unpickling.
0083 class _Stop(Exception):
0084     def __init__(self, value):
0085         self.value = value
0086 
0087 # Jython has PyStringMap; it's a dict subclass with string keys
0088 try:
0089     from org.python.core import PyStringMap
0090 except ImportError:
0091     PyStringMap = None
0092 
0093 # UnicodeType may or may not be exported (normally imported from types)
0094 try:
0095     UnicodeType
0096 except NameError:
0097     UnicodeType = None
0098 
0099 # Pickle opcodes.  See pickletools.py for extensive docs.  The listing
0100 # here is in kind-of alphabetical order of 1-character pickle code.
0101 # pickletools groups them by purpose.
0102 
0103 MARK            = '('   # push special markobject on stack
0104 STOP            = '.'   # every pickle ends with STOP
0105 POP             = '0'   # discard topmost stack item
0106 POP_MARK        = '1'   # discard stack top through topmost markobject
0107 DUP             = '2'   # duplicate top stack item
0108 FLOAT           = 'F'   # push float object; decimal string argument
0109 INT             = 'I'   # push integer or bool; decimal string argument
0110 BININT          = 'J'   # push four-byte signed int
0111 BININT1         = 'K'   # push 1-byte unsigned int
0112 LONG            = 'L'   # push long; decimal string argument
0113 BININT2         = 'M'   # push 2-byte unsigned int
0114 NONE            = 'N'   # push None
0115 PERSID          = 'P'   # push persistent object; id is taken from string arg
0116 BINPERSID       = 'Q'   #  "       "         "  ;  "  "   "     "  stack
0117 REDUCE          = 'R'   # apply callable to argtuple, both on stack
0118 STRING          = 'S'   # push string; NL-terminated string argument
0119 BINSTRING       = 'T'   # push string; counted binary string argument
0120 SHORT_BINSTRING = 'U'   #  "     "   ;    "      "       "      " < 256 bytes
0121 UNICODE         = 'V'   # push Unicode string; raw-unicode-escaped'd argument
0122 BINUNICODE      = 'X'   #   "     "       "  ; counted UTF-8 string argument
0123 APPEND          = 'a'   # append stack top to list below it
0124 BUILD           = 'b'   # call __setstate__ or __dict__.update()
0125 GLOBAL          = 'c'   # push self.find_class(modname, name); 2 string args
0126 DICT            = 'd'   # build a dict from stack items
0127 EMPTY_DICT      = '}'   # push empty dict
0128 APPENDS         = 'e'   # extend list on stack by topmost stack slice
0129 GET             = 'g'   # push item from memo on stack; index is string arg
0130 BINGET          = 'h'   #   "    "    "    "   "   "  ;   "    " 1-byte arg
0131 INST            = 'i'   # build & push class instance
0132 LONG_BINGET     = 'j'   # push item from memo on stack; index is 4-byte arg
0133 LIST            = 'l'   # build list from topmost stack items
0134 EMPTY_LIST      = ']'   # push empty list
0135 OBJ             = 'o'   # build & push class instance
0136 PUT             = 'p'   # store stack top in memo; index is string arg
0137 BINPUT          = 'q'   #   "     "    "   "   " ;   "    " 1-byte arg
0138 LONG_BINPUT     = 'r'   #   "     "    "   "   " ;   "    " 4-byte arg
0139 SETITEM         = 's'   # add key+value pair to dict
0140 TUPLE           = 't'   # build tuple from topmost stack items
0141 EMPTY_TUPLE     = ')'   # push empty tuple
0142 SETITEMS        = 'u'   # modify dict by adding topmost key+value pairs
0143 BINFLOAT        = 'G'   # push float; arg is 8-byte float encoding
0144 
0145 TRUE            = 'I01\n'  # not an opcode; see INT docs in pickletools.py
0146 FALSE           = 'I00\n'  # not an opcode; see INT docs in pickletools.py
0147 
0148 # Protocol 2
0149 
0150 PROTO           = '\x80'  # identify pickle protocol
0151 NEWOBJ          = '\x81'  # build object by applying cls.__new__ to argtuple
0152 EXT1            = '\x82'  # push object from extension registry; 1-byte index
0153 EXT2            = '\x83'  # ditto, but 2-byte index
0154 EXT4            = '\x84'  # ditto, but 4-byte index
0155 TUPLE1          = '\x85'  # build 1-tuple from stack top
0156 TUPLE2          = '\x86'  # build 2-tuple from two topmost stack items
0157 TUPLE3          = '\x87'  # build 3-tuple from three topmost stack items
0158 NEWTRUE         = '\x88'  # push True
0159 NEWFALSE        = '\x89'  # push False
0160 LONG1           = '\x8a'  # push long from < 256 bytes
0161 LONG4           = '\x8b'  # push really big long
0162 
0163 _tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
0164 
0165 
0166 __all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])
0167 del x
0168 
0169 
0170 # Pickling machinery
0171 
0172 class Pickler:
0173 
0174     def __init__(self, file, protocol=None, bin=None):
0175         """This takes a file-like object for writing a pickle data stream.
0176 
0177         The optional protocol argument tells the pickler to use the
0178         given protocol; supported protocols are 0, 1, 2.  The default
0179         protocol is 0, to be backwards compatible.  (Protocol 0 is the
0180         only protocol that can be written to a file opened in text
0181         mode and read back successfully.  When using a protocol higher
0182         than 0, make sure the file is opened in binary mode, both when
0183         pickling and unpickling.)
0184 
0185         Protocol 1 is more efficient than protocol 0; protocol 2 is
0186         more efficient than protocol 1.
0187 
0188         Specifying a negative protocol version selects the highest
0189         protocol version supported.  The higher the protocol used, the
0190         more recent the version of Python needed to read the pickle
0191         produced.
0192 
0193         The file parameter must have a write() method that accepts a single
0194         string argument.  It can thus be an open file object, a StringIO
0195         object, or any other custom object that meets this interface.
0196 
0197         """
0198         if protocol is not None and bin is not None:
0199             raise ValueError, "can't specify both 'protocol' and 'bin'"
0200         if bin is not None:
0201             warnings.warn("The 'bin' argument to Pickler() is deprecated",
0202                           DeprecationWarning)
0203             protocol = bin
0204         if protocol is None:
0205             protocol = 0
0206         if protocol < 0:
0207             protocol = HIGHEST_PROTOCOL
0208         elif not 0 <= protocol <= HIGHEST_PROTOCOL:
0209             raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
0210         self.write = file.write
0211         self.memo = {}
0212         self.proto = int(protocol)
0213         self.bin = protocol >= 1
0214         self.fast = 0
0215 
0216     def clear_memo(self):
0217         """Clears the pickler's "memo".
0218 
0219         The memo is the data structure that remembers which objects the
0220         pickler has already seen, so that shared or recursive objects are
0221         pickled by reference and not by value.  This method is useful when
0222         re-using picklers.
0223 
0224         """
0225         self.memo.clear()
0226 
0227     def dump(self, obj):
0228         """Write a pickled representation of obj to the open file."""
0229         if self.proto >= 2:
0230             self.write(PROTO + chr(self.proto))
0231         self.save(obj)
0232         self.write(STOP)
0233 
0234     def memoize(self, obj):
0235         """Store an object in the memo."""
0236 
0237         # The Pickler memo is a dictionary mapping object ids to 2-tuples
0238         # that contain the Unpickler memo key and the object being memoized.
0239         # The memo key is written to the pickle and will become
0240         # the key in the Unpickler's memo.  The object is stored in the
0241         # Pickler memo so that transient objects are kept alive during
0242         # pickling.
0243 
0244         # The use of the Unpickler memo length as the memo key is just a
0245         # convention.  The only requirement is that the memo values be unique.
0246         # But there appears no advantage to any other scheme, and this
0247         # scheme allows the Unpickler memo to be implemented as a plain (but
0248         # growable) array, indexed by memo key.
0249         if self.fast:
0250             return
0251         assert id(obj) not in self.memo
0252         memo_len = len(self.memo)
0253         self.write(self.put(memo_len))
0254         self.memo[id(obj)] = memo_len, obj
0255 
0256     # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.
0257     def put(self, i, pack=struct.pack):
0258         if self.bin:
0259             if i < 256:
0260                 return BINPUT + chr(i)
0261             else:
0262                 return LONG_BINPUT + pack("<i", i)
0263 
0264         return PUT + repr(i) + '\n'
0265 
0266     # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
0267     def get(self, i, pack=struct.pack):
0268         if self.bin:
0269             if i < 256:
0270                 return BINGET + chr(i)
0271             else:
0272                 return LONG_BINGET + pack("<i", i)
0273 
0274         return GET + repr(i) + '\n'
0275 
0276     def save(self, obj):
0277         # Check for persistent id (defined by a subclass)
0278         pid = self.persistent_id(obj)
0279         if pid:
0280             self.save_pers(pid)
0281             return
0282 
0283         # Check the memo
0284         x = self.memo.get(id(obj))
0285         if x:
0286             self.write(self.get(x[0]))
0287             return
0288 
0289         # Check the type dispatch table
0290         t = type(obj)
0291         f = self.dispatch.get(t)
0292         if f:
0293             f(self, obj) # Call unbound method with explicit self
0294             return
0295 
0296         # Check for a class with a custom metaclass; treat as regular class
0297         try:
0298             issc = issubclass(t, TypeType)
0299         except TypeError: # t is not a class (old Boost; see SF #502085)
0300             issc = 0
0301         if issc:
0302             self.save_global(obj)
0303             return
0304 
0305         # Check copy_reg.dispatch_table
0306         reduce = dispatch_table.get(t)
0307         if reduce:
0308             rv = reduce(obj)
0309         else:
0310             # Check for a __reduce_ex__ method, fall back to __reduce__
0311             reduce = getattr(obj, "__reduce_ex__", None)
0312             if reduce:
0313                 rv = reduce(self.proto)
0314             else:
0315                 reduce = getattr(obj, "__reduce__", None)
0316                 if reduce:
0317                     rv = reduce()
0318                 else:
0319                     raise PicklingError("Can't pickle %r object: %r" %
0320                                         (t.__name__, obj))
0321 
0322         # Check for string returned by reduce(), meaning "save as global"
0323         if type(rv) is StringType:
0324             self.save_global(obj, rv)
0325             return
0326 
0327         # Assert that reduce() returned a tuple
0328         if type(rv) is not TupleType:
0329             raise PicklingError("%s must return string or tuple" % reduce)
0330 
0331         # Assert that it returned an appropriately sized tuple
0332         l = len(rv)
0333         if not (2 <= l <= 5):
0334             raise PicklingError("Tuple returned by %s must have "
0335                                 "two to five elements" % reduce)
0336 
0337         # Save the reduce() output and finally memoize the object
0338         self.save_reduce(obj=obj, *rv)
0339 
0340     def persistent_id(self, obj):
0341         # This exists so a subclass can override it
0342         return None
0343 
0344     def save_pers(self, pid):
0345         # Save a persistent id reference
0346         if self.bin:
0347             self.save(pid)
0348             self.write(BINPERSID)
0349         else:
0350             self.write(PERSID + str(pid) + '\n')
0351 
0352     def save_reduce(self, func, args, state=None,
0353                     listitems=None, dictitems=None, obj=None):
0354         # This API is called by some subclasses
0355 
0356         # Assert that args is a tuple or None
0357         if not isinstance(args, TupleType):
0358             if args is None:
0359                 # A hack for Jim Fulton's ExtensionClass, now deprecated.
0360                 # See load_reduce()
0361                 warnings.warn("__basicnew__ special case is deprecated",
0362                               DeprecationWarning)
0363             else:
0364                 raise PicklingError(
0365                     "args from reduce() should be a tuple")
0366 
0367         # Assert that func is callable
0368         if not callable(func):
0369             raise PicklingError("func from reduce should be callable")
0370 
0371         save = self.save
0372         write = self.write
0373 
0374         # Protocol 2 special case: if func's name is __newobj__, use NEWOBJ
0375         if self.proto >= 2 and getattr(func, "__name__", "") == "__newobj__":
0376             # A __reduce__ implementation can direct protocol 2 to
0377             # use the more efficient NEWOBJ opcode, while still
0378             # allowing protocol 0 and 1 to work normally.  For this to
0379             # work, the function returned by __reduce__ should be
0380             # called __newobj__, and its first argument should be a
0381             # new-style class.  The implementation for __newobj__
0382             # should be as follows, although pickle has no way to
0383             # verify this:
0384             #
0385             # def __newobj__(cls, *args):
0386             #     return cls.__new__(cls, *args)
0387             #
0388             # Protocols 0 and 1 will pickle a reference to __newobj__,
0389             # while protocol 2 (and above) will pickle a reference to
0390             # cls, the remaining args tuple, and the NEWOBJ code,
0391             # which calls cls.__new__(cls, *args) at unpickling time
0392             # (see load_newobj below).  If __reduce__ returns a
0393             # three-tuple, the state from the third tuple item will be
0394             # pickled regardless of the protocol, calling __setstate__
0395             # at unpickling time (see load_build below).
0396             #
0397             # Note that no standard __newobj__ implementation exists;
0398             # you have to provide your own.  This is to enforce
0399             # compatibility with Python 2.2 (pickles written using
0400             # protocol 0 or 1 in Python 2.3 should be unpicklable by
0401             # Python 2.2).
0402             cls = args[0]
0403             if not hasattr(cls, "__new__"):
0404                 raise PicklingError(
0405                     "args[0] from __newobj__ args has no __new__")
0406             if obj is not None and cls is not obj.__class__:
0407                 raise PicklingError(
0408                     "args[0] from __newobj__ args has the wrong class")
0409             args = args[1:]
0410             save(cls)
0411             save(args)
0412             write(NEWOBJ)
0413         else:
0414             save(func)
0415             save(args)
0416             write(REDUCE)
0417 
0418         if obj is not None:
0419             self.memoize(obj)
0420 
0421         # More new special cases (that work with older protocols as
0422         # well): when __reduce__ returns a tuple with 4 or 5 items,
0423         # the 4th and 5th item should be iterators that provide list
0424         # items and dict items (as (key, value) tuples), or None.
0425 
0426         if listitems is not None:
0427             self._batch_appends(listitems)
0428 
0429         if dictitems is not None:
0430             self._batch_setitems(dictitems)
0431 
0432         if state is not None:
0433             save(state)
0434             write(BUILD)
0435 
0436     # Methods below this point are dispatched through the dispatch table
0437 
0438     dispatch = {}
0439 
0440     def save_none(self, obj):
0441         self.write(NONE)
0442     dispatch[NoneType] = save_none
0443 
0444     def save_bool(self, obj):
0445         if self.proto >= 2:
0446             self.write(obj and NEWTRUE or NEWFALSE)
0447         else:
0448             self.write(obj and TRUE or FALSE)
0449     dispatch[bool] = save_bool
0450 
0451     def save_int(self, obj, pack=struct.pack):
0452         if self.bin:
0453             # If the int is small enough to fit in a signed 4-byte 2's-comp
0454             # format, we can store it more efficiently than the general
0455             # case.
0456             # First one- and two-byte unsigned ints:
0457             if obj >= 0:
0458                 if obj <= 0xff:
0459                     self.write(BININT1 + chr(obj))
0460                     return
0461                 if obj <= 0xffff:
0462                     self.write("%c%c%c" % (BININT2, obj&0xff, obj>>8))
0463                     return
0464             # Next check for 4-byte signed ints:
0465             high_bits = obj >> 31  # note that Python shift sign-extends
0466             if high_bits == 0 or high_bits == -1:
0467                 # All high bits are copies of bit 2**31, so the value
0468                 # fits in a 4-byte signed int.
0469                 self.write(BININT + pack("<i", obj))
0470                 return
0471         # Text pickle, or int too big to fit in signed 4-byte format.
0472         self.write(INT + repr(obj) + '\n')
0473     dispatch[IntType] = save_int
0474 
0475     def save_long(self, obj, pack=struct.pack):
0476         if self.proto >= 2:
0477             bytes = encode_long(obj)
0478             n = len(bytes)
0479             if n < 256:
0480                 self.write(LONG1 + chr(n) + bytes)
0481             else:
0482                 self.write(LONG4 + pack("<i", n) + bytes)
0483             return
0484         self.write(LONG + repr(obj) + '\n')
0485     dispatch[LongType] = save_long
0486 
0487     def save_float(self, obj, pack=struct.pack):
0488         if self.bin:
0489             self.write(BINFLOAT + pack('>d', obj))
0490         else:
0491             self.write(FLOAT + repr(obj) + '\n')
0492     dispatch[FloatType] = save_float
0493 
0494     def save_string(self, obj, pack=struct.pack):
0495         if self.bin:
0496             n = len(obj)
0497             if n < 256:
0498                 self.write(SHORT_BINSTRING + chr(n) + obj)
0499             else:
0500                 self.write(BINSTRING + pack("<i", n) + obj)
0501         else:
0502             self.write(STRING + repr(obj) + '\n')
0503         self.memoize(obj)
0504     dispatch[StringType] = save_string
0505 
0506     def save_unicode(self, obj, pack=struct.pack):
0507         if self.bin:
0508             encoding = obj.encode('utf-8')
0509             n = len(encoding)
0510             self.write(BINUNICODE + pack("<i", n) + encoding)
0511         else:
0512             obj = obj.replace("\\", "\\u005c")
0513             obj = obj.replace("\n", "\\u000a")
0514             self.write(UNICODE + obj.encode('raw-unicode-escape') + '\n')
0515         self.memoize(obj)
0516     dispatch[UnicodeType] = save_unicode
0517 
0518     if StringType == UnicodeType:
0519         # This is true for Jython
0520         def save_string(self, obj, pack=struct.pack):
0521             unicode = obj.isunicode()
0522 
0523             if self.bin:
0524                 if unicode:
0525                     obj = obj.encode("utf-8")
0526                 l = len(obj)
0527                 if l < 256 and not unicode:
0528                     self.write(SHORT_BINSTRING + chr(l) + obj)
0529                 else:
0530                     s = pack("<i", l)
0531                     if unicode:
0532                         self.write(BINUNICODE + s + obj)
0533                     else:
0534                         self.write(BINSTRING + s + obj)
0535             else:
0536                 if unicode:
0537                     obj = obj.replace("\\", "\\u005c")
0538                     obj = obj.replace("\n", "\\u000a")
0539                     obj = obj.encode('raw-unicode-escape')
0540                     self.write(UNICODE + obj + '\n')
0541                 else:
0542                     self.write(STRING + repr(obj) + '\n')
0543             self.memoize(obj)
0544         dispatch[StringType] = save_string
0545 
0546     def save_tuple(self, obj):
0547         write = self.write
0548         proto = self.proto
0549 
0550         n = len(obj)
0551         if n == 0:
0552             if proto:
0553                 write(EMPTY_TUPLE)
0554             else:
0555                 write(MARK + TUPLE)
0556             return
0557 
0558         save = self.save
0559         memo = self.memo
0560         if n <= 3 and proto >= 2:
0561             for element in obj:
0562                 save(element)
0563             # Subtle.  Same as in the big comment below.
0564             if id(obj) in memo:
0565                 get = self.get(memo[id(obj)][0])
0566                 write(POP * n + get)
0567             else:
0568                 write(_tuplesize2code[n])
0569                 self.memoize(obj)
0570             return
0571 
0572         # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple
0573         # has more than 3 elements.
0574         write(MARK)
0575         for element in obj:
0576             save(element)
0577 
0578         if id(obj) in memo:
0579             # Subtle.  d was not in memo when we entered save_tuple(), so
0580             # the process of saving the tuple's elements must have saved
0581             # the tuple itself:  the tuple is recursive.  The proper action
0582             # now is to throw away everything we put on the stack, and
0583             # simply GET the tuple (it's already constructed).  This check
0584             # could have been done in the "for element" loop instead, but
0585             # recursive tuples are a rare thing.
0586             get = self.get(memo[id(obj)][0])
0587             if proto:
0588                 write(POP_MARK + get)
0589             else:   # proto 0 -- POP_MARK not available
0590                 write(POP * (n+1) + get)
0591             return
0592 
0593         # No recursion.
0594         self.write(TUPLE)
0595         self.memoize(obj)
0596 
0597     dispatch[TupleType] = save_tuple
0598 
0599     # save_empty_tuple() isn't used by anything in Python 2.3.  However, I
0600     # found a Pickler subclass in Zope3 that calls it, so it's not harmless
0601     # to remove it.
0602     def save_empty_tuple(self, obj):
0603         self.write(EMPTY_TUPLE)
0604 
0605     def save_list(self, obj):
0606         write = self.write
0607 
0608         if self.bin:
0609             write(EMPTY_LIST)
0610         else:   # proto 0 -- can't use EMPTY_LIST
0611             write(MARK + LIST)
0612 
0613         self.memoize(obj)
0614         self._batch_appends(iter(obj))
0615 
0616     dispatch[ListType] = save_list
0617 
0618     # Keep in synch with cPickle's BATCHSIZE.  Nothing will break if it gets
0619     # out of synch, though.
0620     _BATCHSIZE = 1000
0621 
0622     def _batch_appends(self, items):
0623         # Helper to batch up APPENDS sequences
0624         save = self.save
0625         write = self.write
0626 
0627         if not self.bin:
0628             for x in items:
0629                 save(x)
0630                 write(APPEND)
0631             return
0632 
0633         r = xrange(self._BATCHSIZE)
0634         while items is not None:
0635             tmp = []
0636             for i in r:
0637                 try:
0638                     x = items.next()
0639                     tmp.append(x)
0640                 except StopIteration:
0641                     items = None
0642                     break
0643             n = len(tmp)
0644             if n > 1:
0645                 write(MARK)
0646                 for x in tmp:
0647                     save(x)
0648                 write(APPENDS)
0649             elif n:
0650                 save(tmp[0])
0651                 write(APPEND)
0652             # else tmp is empty, and we're done
0653 
0654     def save_dict(self, obj):
0655         write = self.write
0656 
0657         if self.bin:
0658             write(EMPTY_DICT)
0659         else:   # proto 0 -- can't use EMPTY_DICT
0660             write(MARK + DICT)
0661 
0662         self.memoize(obj)
0663         self._batch_setitems(obj.iteritems())
0664 
0665     dispatch[DictionaryType] = save_dict
0666     if not PyStringMap is None:
0667         dispatch[PyStringMap] = save_dict
0668 
0669     def _batch_setitems(self, items):
0670         # Helper to batch up SETITEMS sequences; proto >= 1 only
0671         save = self.save
0672         write = self.write
0673 
0674         if not self.bin:
0675             for k, v in items:
0676                 save(k)
0677                 save(v)
0678                 write(SETITEM)
0679             return
0680 
0681         r = xrange(self._BATCHSIZE)
0682         while items is not None:
0683             tmp = []
0684             for i in r:
0685                 try:
0686                     tmp.append(items.next())
0687                 except StopIteration:
0688                     items = None
0689                     break
0690             n = len(tmp)
0691             if n > 1:
0692                 write(MARK)
0693                 for k, v in tmp:
0694                     save(k)
0695                     save(v)
0696                 write(SETITEMS)
0697             elif n:
0698                 k, v = tmp[0]
0699                 save(k)
0700                 save(v)
0701                 write(SETITEM)
0702             # else tmp is empty, and we're done
0703 
0704     def save_inst(self, obj):
0705         cls = obj.__class__
0706 
0707         memo  = self.memo
0708         write = self.write
0709         save  = self.save
0710 
0711         if hasattr(obj, '__getinitargs__'):
0712             args = obj.__getinitargs__()
0713             len(args) # XXX Assert it's a sequence
0714             _keep_alive(args, memo)
0715         else:
0716             args = ()
0717 
0718         write(MARK)
0719 
0720         if self.bin:
0721             save(cls)
0722             for arg in args:
0723                 save(arg)
0724             write(OBJ)
0725         else:
0726             for arg in args:
0727                 save(arg)
0728             write(INST + cls.__module__ + '\n' + cls.__name__ + '\n')
0729 
0730         self.memoize(obj)
0731 
0732         try:
0733             getstate = obj.__getstate__
0734         except AttributeError:
0735             stuff = obj.__dict__
0736         else:
0737             stuff = getstate()
0738             _keep_alive(stuff, memo)
0739         save(stuff)
0740         write(BUILD)
0741 
0742     dispatch[InstanceType] = save_inst
0743 
0744     def save_global(self, obj, name=None, pack=struct.pack):
0745         write = self.write
0746         memo = self.memo
0747 
0748         if name is None:
0749             name = obj.__name__
0750 
0751         module = getattr(obj, "__module__", None)
0752         if module is None:
0753             module = whichmodule(obj, name)
0754 
0755         try:
0756             __import__(module)
0757             mod = sys.modules[module]
0758             klass = getattr(mod, name)
0759         except (ImportError, KeyError, AttributeError):
0760             raise PicklingError(
0761                 "Can't pickle %r: it's not found as %s.%s" %
0762                 (obj, module, name))
0763         else:
0764             if klass is not obj:
0765                 raise PicklingError(
0766                     "Can't pickle %r: it's not the same object as %s.%s" %
0767                     (obj, module, name))
0768 
0769         if self.proto >= 2:
0770             code = _extension_registry.get((module, name))
0771             if code:
0772                 assert code > 0
0773                 if code <= 0xff:
0774                     write(EXT1 + chr(code))
0775                 elif code <= 0xffff:
0776                     write("%c%c%c" % (EXT2, code&0xff, code>>8))
0777                 else:
0778                     write(EXT4 + pack("<i", code))
0779                 return
0780 
0781         write(GLOBAL + module + '\n' + name + '\n')
0782         self.memoize(obj)
0783 
0784     dispatch[ClassType] = save_global
0785     dispatch[FunctionType] = save_global
0786     dispatch[BuiltinFunctionType] = save_global
0787     dispatch[TypeType] = save_global
0788 
0789 # Pickling helpers
0790 
0791 def _keep_alive(x, memo):
0792     """Keeps a reference to the object x in the memo.
0793 
0794     Because we remember objects by their id, we have
0795     to assure that possibly temporary objects are kept
0796     alive by referencing them.
0797     We store a reference at the id of the memo, which should
0798     normally not be used unless someone tries to deepcopy
0799     the memo itself...
0800     """
0801     try:
0802         memo[id(memo)].append(x)
0803     except KeyError:
0804         # aha, this is the first one :-)
0805         memo[id(memo)]=[x]
0806 
0807 
0808 # A cache for whichmodule(), mapping a function object to the name of
0809 # the module in which the function was found.
0810 
0811 classmap = {} # called classmap for backwards compatibility
0812 
0813 def whichmodule(func, funcname):
0814     """Figure out the module in which a function occurs.
0815 
0816     Search sys.modules for the module.
0817     Cache in classmap.
0818     Return a module name.
0819     If the function cannot be found, return "__main__".
0820     """
0821     # Python functions should always get an __module__ from their globals.
0822     mod = getattr(func, "__module__", None)
0823     if mod is not None:
0824         return mod
0825     if func in classmap:
0826         return classmap[func]
0827 
0828     for name, module in sys.modules.items():
0829         if module is None:
0830             continue # skip dummy package entries
0831         if name != '__main__' and getattr(module, funcname, None) is func:
0832             break
0833     else:
0834         name = '__main__'
0835     classmap[func] = name
0836     return name
0837 
0838 
0839 # Unpickling machinery
0840 
0841 class Unpickler:
0842 
0843     def __init__(self, file):
0844         """This takes a file-like object for reading a pickle data stream.
0845 
0846         The protocol version of the pickle is detected automatically, so no
0847         proto argument is needed.
0848 
0849         The file-like object must have two methods, a read() method that
0850         takes an integer argument, and a readline() method that requires no
0851         arguments.  Both methods should return a string.  Thus file-like
0852         object can be a file object opened for reading, a StringIO object,
0853         or any other custom object that meets this interface.
0854         """
0855         self.readline = file.readline
0856         self.read = file.read
0857         self.memo = {}
0858 
0859     def load(self):
0860         """Read a pickled object representation from the open file.
0861 
0862         Return the reconstituted object hierarchy specified in the file.
0863         """
0864         self.mark = object() # any new unique object
0865         self.stack = []
0866         self.append = self.stack.append
0867         read = self.read
0868         dispatch = self.dispatch
0869         try:
0870             while 1:
0871                 key = read(1)
0872                 dispatch[key](self)
0873         except _Stop, stopinst:
0874             return stopinst.value
0875 
0876     # Return largest index k such that self.stack[k] is self.mark.
0877     # If the stack doesn't contain a mark, eventually raises IndexError.
0878     # This could be sped by maintaining another stack, of indices at which
0879     # the mark appears.  For that matter, the latter stack would suffice,
0880     # and we wouldn't need to push mark objects on self.stack at all.
0881     # Doing so is probably a good thing, though, since if the pickle is
0882     # corrupt (or hostile) we may get a clue from finding self.mark embedded
0883     # in unpickled objects.
0884     def marker(self):
0885         stack = self.stack
0886         mark = self.mark
0887         k = len(stack)-1
0888         while stack[k] is not mark: k = k-1
0889         return k
0890 
0891     dispatch = {}
0892 
0893     def load_eof(self):
0894         raise EOFError
0895     dispatch[''] = load_eof
0896 
0897     def load_proto(self):
0898         proto = ord(self.read(1))
0899         if not 0 <= proto <= 2:
0900             raise ValueError, "unsupported pickle protocol: %d" % proto
0901     dispatch[PROTO] = load_proto
0902 
0903     def load_persid(self):
0904         pid = self.readline()[:-1]
0905         self.append(self.persistent_load(pid))
0906     dispatch[PERSID] = load_persid
0907 
0908     def load_binpersid(self):
0909         pid = self.stack.pop()
0910         self.append(self.persistent_load(pid))
0911     dispatch[BINPERSID] = load_binpersid
0912 
0913     def load_none(self):
0914         self.append(None)
0915     dispatch[NONE] = load_none
0916 
0917     def load_false(self):
0918         self.append(False)
0919     dispatch[NEWFALSE] = load_false
0920 
0921     def load_true(self):
0922         self.append(True)
0923     dispatch[NEWTRUE] = load_true
0924 
0925     def load_int(self):
0926         data = self.readline()
0927         if data == FALSE[1:]:
0928             val = False
0929         elif data == TRUE[1:]:
0930             val = True
0931         else:
0932             try:
0933                 val = int(data)
0934             except ValueError:
0935                 val = long(data)
0936         self.append(val)
0937     dispatch[INT] = load_int
0938 
0939     def load_binint(self):
0940         self.append(mloads('i' + self.read(4)))
0941     dispatch[BININT] = load_binint
0942 
0943     def load_binint1(self):
0944         self.append(ord(self.read(1)))
0945     dispatch[BININT1] = load_binint1
0946 
0947     def load_binint2(self):
0948         self.append(mloads('i' + self.read(2) + '\000\000'))
0949     dispatch[BININT2] = load_binint2
0950 
0951     def load_long(self):
0952         self.append(long(self.readline()[:-1], 0))
0953     dispatch[LONG] = load_long
0954 
0955     def load_long1(self):
0956         n = ord(self.read(1))
0957         bytes = self.read(n)
0958         self.append(decode_long(bytes))
0959     dispatch[LONG1] = load_long1
0960 
0961     def load_long4(self):
0962         n = mloads('i' + self.read(4))
0963         bytes = self.read(n)
0964         self.append(decode_long(bytes))
0965     dispatch[LONG4] = load_long4
0966 
0967     def load_float(self):
0968         self.append(float(self.readline()[:-1]))
0969     dispatch[FLOAT] = load_float
0970 
0971     def load_binfloat(self, unpack=struct.unpack):
0972         self.append(unpack('>d', self.read(8))[0])
0973     dispatch[BINFLOAT] = load_binfloat
0974 
0975     def load_string(self):
0976         rep = self.readline()[:-1]
0977         for q in "\"'": # double or single quote
0978             if rep.startswith(q):
0979                 if not rep.endswith(q):
0980                     raise ValueError, "insecure string pickle"
0981                 rep = rep[len(q):-len(q)]
0982                 break
0983         else:
0984             raise ValueError, "insecure string pickle"
0985         self.append(rep.decode("string-escape"))
0986     dispatch[STRING] = load_string
0987 
0988     def load_binstring(self):
0989         len = mloads('i' + self.read(4))
0990         self.append(self.read(len))
0991     dispatch[BINSTRING] = load_binstring
0992 
0993     def load_unicode(self):
0994         self.append(unicode(self.readline()[:-1],'raw-unicode-escape'))
0995     dispatch[UNICODE] = load_unicode
0996 
0997     def load_binunicode(self):
0998         len = mloads('i' + self.read(4))
0999         self.append(unicode(self.read(len),'utf-8'))
1000     dispatch[BINUNICODE] = load_binunicode
1001 
1002     def load_short_binstring(self):
1003         len = ord(self.read(1))
1004         self.append(self.read(len))
1005     dispatch[SHORT_BINSTRING] = load_short_binstring
1006 
1007     def load_tuple(self):
1008         k = self.marker()
1009         self.stack[k:] = [tuple(self.stack[k+1:])]
1010     dispatch[TUPLE] = load_tuple
1011 
1012     def load_empty_tuple(self):
1013         self.stack.append(())
1014     dispatch[EMPTY_TUPLE] = load_empty_tuple
1015 
1016     def load_tuple1(self):
1017         self.stack[-1] = (self.stack[-1],)
1018     dispatch[TUPLE1] = load_tuple1
1019 
1020     def load_tuple2(self):
1021         self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
1022     dispatch[TUPLE2] = load_tuple2
1023 
1024     def load_tuple3(self):
1025         self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
1026     dispatch[TUPLE3] = load_tuple3
1027 
1028     def load_empty_list(self):
1029         self.stack.append([])
1030     dispatch[EMPTY_LIST] = load_empty_list
1031 
1032     def load_empty_dictionary(self):
1033         self.stack.append({})
1034     dispatch[EMPTY_DICT] = load_empty_dictionary
1035 
1036     def load_list(self):
1037         k = self.marker()
1038         self.stack[k:] = [self.stack[k+1:]]
1039     dispatch[LIST] = load_list
1040 
1041     def load_dict(self):
1042         k = self.marker()
1043         d = {}
1044         items = self.stack[k+1:]
1045         for i in range(0, len(items), 2):
1046             key = items[i]
1047             value = items[i+1]
1048             d[key] = value
1049         self.stack[k:] = [d]
1050     dispatch[DICT] = load_dict
1051 
1052     # INST and OBJ differ only in how they get a class object.  It's not
1053     # only sensible to do the rest in a common routine, the two routines
1054     # previously diverged and grew different bugs.
1055     # klass is the class to instantiate, and k points to the topmost mark
1056     # object, following which are the arguments for klass.__init__.
1057     def _instantiate(self, klass, k):
1058         args = tuple(self.stack[k+1:])
1059         del self.stack[k:]
1060         instantiated = 0
1061         if (not args and
1062                 type(klass) is ClassType and
1063                 not hasattr(klass, "__getinitargs__")):
1064             try:
1065                 value = _EmptyClass()
1066                 value.__class__ = klass
1067                 instantiated = 1
1068             except RuntimeError:
1069                 # In restricted execution, assignment to inst.__class__ is
1070                 # prohibited
1071                 pass
1072         if not instantiated:
1073             try:
1074                 value = klass(*args)
1075             except TypeError, err:
1076                 raise TypeError, "in constructor for %s: %s" % (
1077                     klass.__name__, str(err)), sys.exc_info()[2]
1078         self.append(value)
1079 
1080     def load_inst(self):
1081         module = self.readline()[:-1]
1082         name = self.readline()[:-1]
1083         klass = self.find_class(module, name)
1084         self._instantiate(klass, self.marker())
1085     dispatch[INST] = load_inst
1086 
1087     def load_obj(self):
1088         # Stack is ... markobject classobject arg1 arg2 ...
1089         k = self.marker()
1090         klass = self.stack.pop(k+1)
1091         self._instantiate(klass, k)
1092     dispatch[OBJ] = load_obj
1093 
1094     def load_newobj(self):
1095         args = self.stack.pop()
1096         cls = self.stack[-1]
1097         obj = cls.__new__(cls, *args)
1098         self.stack[-1] = obj
1099     dispatch[NEWOBJ] = load_newobj
1100 
1101     def load_global(self):
1102         module = self.readline()[:-1]
1103         name = self.readline()[:-1]
1104         klass = self.find_class(module, name)
1105         self.append(klass)
1106     dispatch[GLOBAL] = load_global
1107 
1108     def load_ext1(self):
1109         code = ord(self.read(1))
1110         self.get_extension(code)
1111     dispatch[EXT1] = load_ext1
1112 
1113     def load_ext2(self):
1114         code = mloads('i' + self.read(2) + '\000\000')
1115         self.get_extension(code)
1116     dispatch[EXT2] = load_ext2
1117 
1118     def load_ext4(self):
1119         code = mloads('i' + self.read(4))
1120         self.get_extension(code)
1121     dispatch[EXT4] = load_ext4
1122 
1123     def get_extension(self, code):
1124         nil = []
1125         obj = _extension_cache.get(code, nil)
1126         if obj is not nil:
1127             self.append(obj)
1128             return
1129         key = _inverted_registry.get(code)
1130         if not key:
1131             raise ValueError("unregistered extension code %d" % code)
1132         obj = self.find_class(*key)
1133         _extension_cache[code] = obj
1134         self.append(obj)
1135 
1136     def find_class(self, module, name):
1137         # Subclasses may override this
1138         __import__(module)
1139         mod = sys.modules[module]
1140         klass = getattr(mod, name)
1141         return klass
1142 
1143     def load_reduce(self):
1144         stack = self.stack
1145         args = stack.pop()
1146         func = stack[-1]
1147         if args is None:
1148             # A hack for Jim Fulton's ExtensionClass, now deprecated
1149             warnings.warn("__basicnew__ special case is deprecated",
1150                           DeprecationWarning)
1151             value = func.__basicnew__()
1152         else:
1153             value = func(*args)
1154         stack[-1] = value
1155     dispatch[REDUCE] = load_reduce
1156 
1157     def load_pop(self):
1158         del self.stack[-1]
1159     dispatch[POP] = load_pop
1160 
1161     def load_pop_mark(self):
1162         k = self.marker()
1163         del self.stack[k:]
1164     dispatch[POP_MARK] = load_pop_mark
1165 
1166     def load_dup(self):
1167         self.append(self.stack[-1])
1168     dispatch[DUP] = load_dup
1169 
1170     def load_get(self):
1171         self.append(self.memo[self.readline()[:-1]])
1172     dispatch[GET] = load_get
1173 
1174     def load_binget(self):
1175         i = ord(self.read(1))
1176         self.append(self.memo[repr(i)])
1177     dispatch[BINGET] = load_binget
1178 
1179     def load_long_binget(self):
1180         i = mloads('i' + self.read(4))
1181         self.append(self.memo[repr(i)])
1182     dispatch[LONG_BINGET] = load_long_binget
1183 
1184     def load_put(self):
1185         self.memo[self.readline()[:-1]] = self.stack[-1]
1186     dispatch[PUT] = load_put
1187 
1188     def load_binput(self):
1189         i = ord(self.read(1))
1190         self.memo[repr(i)] = self.stack[-1]
1191     dispatch[BINPUT] = load_binput
1192 
1193     def load_long_binput(self):
1194         i = mloads('i' + self.read(4))
1195         self.memo[repr(i)] = self.stack[-1]
1196     dispatch[LONG_BINPUT] = load_long_binput
1197 
1198     def load_append(self):
1199         stack = self.stack
1200         value = stack.pop()
1201         list = stack[-1]
1202         list.append(value)
1203     dispatch[APPEND] = load_append
1204 
1205     def load_appends(self):
1206         stack = self.stack
1207         mark = self.marker()
1208         list = stack[mark - 1]
1209         list.extend(stack[mark + 1:])
1210         del stack[mark:]
1211     dispatch[APPENDS] = load_appends
1212 
1213     def load_setitem(self):
1214         stack = self.stack
1215         value = stack.pop()
1216         key = stack.pop()
1217         dict = stack[-1]
1218         dict[key] = value
1219     dispatch[SETITEM] = load_setitem
1220 
1221     def load_setitems(self):
1222         stack = self.stack
1223         mark = self.marker()
1224         dict = stack[mark - 1]
1225         for i in range(mark + 1, len(stack), 2):
1226             dict[stack[i]] = stack[i + 1]
1227 
1228         del stack[mark:]
1229     dispatch[SETITEMS] = load_setitems
1230 
1231     def load_build(self):
1232         stack = self.stack
1233         state = stack.pop()
1234         inst = stack[-1]
1235         setstate = getattr(inst, "__setstate__", None)
1236         if setstate:
1237             setstate(state)
1238             return
1239         slotstate = None
1240         if isinstance(state, tuple) and len(state) == 2:
1241             state, slotstate = state
1242         if state:
1243             try:
1244                 inst.__dict__.update(state)
1245             except RuntimeError:
1246                 # XXX In restricted execution, the instance's __dict__
1247                 # is not accessible.  Use the old way of unpickling
1248                 # the instance variables.  This is a semantic
1249                 # difference when unpickling in restricted
1250                 # vs. unrestricted modes.
1251                 # Note, however, that cPickle has never tried to do the
1252                 # .update() business, and always uses
1253                 #     PyObject_SetItem(inst.__dict__, key, value) in a
1254                 # loop over state.items().
1255                 for k, v in state.items():
1256                     setattr(inst, k, v)
1257         if slotstate:
1258             for k, v in slotstate.items():
1259                 setattr(inst, k, v)
1260     dispatch[BUILD] = load_build
1261 
1262     def load_mark(self):
1263         self.append(self.mark)
1264     dispatch[MARK] = load_mark
1265 
1266     def load_stop(self):
1267         value = self.stack.pop()
1268         raise _Stop(value)
1269     dispatch[STOP] = load_stop
1270 
1271 # Helper class for load_inst/load_obj
1272 
1273 class _EmptyClass:
1274     pass
1275 
1276 # Encode/decode longs in linear time.
1277 
1278 import binascii as _binascii
1279 
1280 def encode_long(x):
1281     r"""Encode a long to a two's complement little-endian binary string.
1282     Note that 0L is a special case, returning an empty string, to save a
1283     byte in the LONG1 pickling context.
1284 
1285     >>> encode_long(0L)
1286     ''
1287     >>> encode_long(255L)
1288     '\xff\x00'
1289     >>> encode_long(32767L)
1290     '\xff\x7f'
1291     >>> encode_long(-256L)
1292     '\x00\xff'
1293     >>> encode_long(-32768L)
1294     '\x00\x80'
1295     >>> encode_long(-128L)
1296     '\x80'
1297     >>> encode_long(127L)
1298     '\x7f'
1299     >>>
1300     """
1301 
1302     if x == 0:
1303         return ''
1304     if x > 0:
1305         ashex = hex(x)
1306         assert ashex.startswith("0x")
1307         njunkchars = 2 + ashex.endswith('L')
1308         nibbles = len(ashex) - njunkchars
1309         if nibbles & 1:
1310             # need an even # of nibbles for unhexlify
1311             ashex = "0x0" + ashex[2:]
1312         elif int(ashex[2], 16) >= 8:
1313             # "looks negative", so need a byte of sign bits
1314             ashex = "0x00" + ashex[2:]
1315     else:
1316         # Build the 256's-complement:  (1L << nbytes) + x.  The trick is
1317         # to find the number of bytes in linear time (although that should
1318         # really be a constant-time task).
1319         ashex = hex(-x)
1320         assert ashex.startswith("0x")
1321         njunkchars = 2 + ashex.endswith('L')
1322         nibbles = len(ashex) - njunkchars
1323         if nibbles & 1:
1324             # Extend to a full byte.
1325             nibbles += 1
1326         nbits = nibbles * 4
1327         x += 1L << nbits
1328         assert x > 0
1329         ashex = hex(x)
1330         njunkchars = 2 + ashex.endswith('L')
1331         newnibbles = len(ashex) - njunkchars
1332         if newnibbles < nibbles:
1333             ashex = "0x" + "0" * (nibbles - newnibbles) + ashex[2:]
1334         if int(ashex[2], 16) < 8:
1335             # "looks positive", so need a byte of sign bits
1336             ashex = "0xff" + ashex[2:]
1337 
1338     if ashex.endswith('L'):
1339         ashex = ashex[2:-1]
1340     else:
1341         ashex = ashex[2:]
1342     assert len(ashex) & 1 == 0, (x, ashex)
1343     binary = _binascii.unhexlify(ashex)
1344     return binary[::-1]
1345 
1346 def decode_long(data):
1347     r"""Decode a long from a two's complement little-endian binary string.
1348 
1349     >>> decode_long('')
1350     0L
1351     >>> decode_long("\xff\x00")
1352     255L
1353     >>> decode_long("\xff\x7f")
1354     32767L
1355     >>> decode_long("\x00\xff")
1356     -256L
1357     >>> decode_long("\x00\x80")
1358     -32768L
1359     >>> decode_long("\x80")
1360     -128L
1361     >>> decode_long("\x7f")
1362     127L
1363     """
1364 
1365     nbytes = len(data)
1366     if nbytes == 0:
1367         return 0L
1368     ashex = _binascii.hexlify(data[::-1])
1369     n = long(ashex, 16) # quadratic time before Python 2.3; linear now
1370     if data[-1] >= '\x80':
1371         n -= 1L << (nbytes * 8)
1372     return n
1373 
1374 # Shorthands
1375 
1376 try:
1377     from cStringIO import StringIO
1378 except ImportError:
1379     from StringIO import StringIO
1380 
1381 def dump(obj, file, protocol=None, bin=None):
1382     Pickler(file, protocol, bin).dump(obj)
1383 
1384 def dumps(obj, protocol=None, bin=None):
1385     file = StringIO()
1386     Pickler(file, protocol, bin).dump(obj)
1387     return file.getvalue()
1388 
1389 def load(file):
1390     return Unpickler(file).load()
1391 
1392 def loads(str):
1393     file = StringIO(str)
1394     return Unpickler(file).load()
1395 
1396 # Doctest
1397 
1398 def _test():
1399     import doctest
1400     return doctest.testmod()
1401 
1402 if __name__ == "__main__":
1403     _test()
1404 

Generated by PyXR 0.9.4
SourceForge.net Logo