0001 """Create portable serialized representations of Python objects. 0002 0003 See module cPickle for a (much) faster implementation. 0004 See module copy_reg for a mechanism for registering custom picklers. 0005 See module pickletools source for extensive comments. 0006 0007 Classes: 0008 0009 Pickler 0010 Unpickler 0011 0012 Functions: 0013 0014 dump(object, file) 0015 dumps(object) -> string 0016 load(file) -> object 0017 loads(string) -> object 0018 0019 Misc variables: 0020 0021 __version__ 0022 format_version 0023 compatible_formats 0024 0025 """ 0026 0027 __version__ = "$Revision: 1.158 $" # Code version 0028 0029 from types import * 0030 from copy_reg import dispatch_table 0031 from copy_reg import _extension_registry, _inverted_registry, _extension_cache 0032 import marshal 0033 import sys 0034 import struct 0035 import re 0036 import warnings 0037 0038 __all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler", 0039 "Unpickler", "dump", "dumps", "load", "loads"] 0040 0041 # These are purely informational; no code uses these. 0042 format_version = "2.0" # File format version we write 0043 compatible_formats = ["1.0", # Original protocol 0 0044 "1.1", # Protocol 0 with INST added 0045 "1.2", # Original protocol 1 0046 "1.3", # Protocol 1 with BINFLOAT added 0047 "2.0", # Protocol 2 0048 ] # Old format versions we can read 0049 0050 # Keep in synch with cPickle. This is the highest protocol number we 0051 # know how to read. 0052 HIGHEST_PROTOCOL = 2 0053 0054 # Why use struct.pack() for pickling but marshal.loads() for 0055 # unpickling? struct.pack() is 40% faster than marshal.dumps(), but 0056 # marshal.loads() is twice as fast as struct.unpack()! 0057 mloads = marshal.loads 0058 0059 class PickleError(Exception): 0060 """A common base class for the other pickling exceptions.""" 0061 pass 0062 0063 class PicklingError(PickleError): 0064 """This exception is raised when an unpicklable object is passed to the 0065 dump() method. 0066 0067 """ 0068 pass 0069 0070 class UnpicklingError(PickleError): 0071 """This exception is raised when there is a problem unpickling an object, 0072 such as a security violation. 0073 0074 Note that other exceptions may also be raised during unpickling, including 0075 (but not necessarily limited to) AttributeError, EOFError, ImportError, 0076 and IndexError. 0077 0078 """ 0079 pass 0080 0081 # An instance of _Stop is raised by Unpickler.load_stop() in response to 0082 # the STOP opcode, passing the object that is the result of unpickling. 0083 class _Stop(Exception): 0084 def __init__(self, value): 0085 self.value = value 0086 0087 # Jython has PyStringMap; it's a dict subclass with string keys 0088 try: 0089 from org.python.core import PyStringMap 0090 except ImportError: 0091 PyStringMap = None 0092 0093 # UnicodeType may or may not be exported (normally imported from types) 0094 try: 0095 UnicodeType 0096 except NameError: 0097 UnicodeType = None 0098 0099 # Pickle opcodes. See pickletools.py for extensive docs. The listing 0100 # here is in kind-of alphabetical order of 1-character pickle code. 0101 # pickletools groups them by purpose. 0102 0103 MARK = '(' # push special markobject on stack 0104 STOP = '.' # every pickle ends with STOP 0105 POP = '0' # discard topmost stack item 0106 POP_MARK = '1' # discard stack top through topmost markobject 0107 DUP = '2' # duplicate top stack item 0108 FLOAT = 'F' # push float object; decimal string argument 0109 INT = 'I' # push integer or bool; decimal string argument 0110 BININT = 'J' # push four-byte signed int 0111 BININT1 = 'K' # push 1-byte unsigned int 0112 LONG = 'L' # push long; decimal string argument 0113 BININT2 = 'M' # push 2-byte unsigned int 0114 NONE = 'N' # push None 0115 PERSID = 'P' # push persistent object; id is taken from string arg 0116 BINPERSID = 'Q' # " " " ; " " " " stack 0117 REDUCE = 'R' # apply callable to argtuple, both on stack 0118 STRING = 'S' # push string; NL-terminated string argument 0119 BINSTRING = 'T' # push string; counted binary string argument 0120 SHORT_BINSTRING = 'U' # " " ; " " " " < 256 bytes 0121 UNICODE = 'V' # push Unicode string; raw-unicode-escaped'd argument 0122 BINUNICODE = 'X' # " " " ; counted UTF-8 string argument 0123 APPEND = 'a' # append stack top to list below it 0124 BUILD = 'b' # call __setstate__ or __dict__.update() 0125 GLOBAL = 'c' # push self.find_class(modname, name); 2 string args 0126 DICT = 'd' # build a dict from stack items 0127 EMPTY_DICT = '}' # push empty dict 0128 APPENDS = 'e' # extend list on stack by topmost stack slice 0129 GET = 'g' # push item from memo on stack; index is string arg 0130 BINGET = 'h' # " " " " " " ; " " 1-byte arg 0131 INST = 'i' # build & push class instance 0132 LONG_BINGET = 'j' # push item from memo on stack; index is 4-byte arg 0133 LIST = 'l' # build list from topmost stack items 0134 EMPTY_LIST = ']' # push empty list 0135 OBJ = 'o' # build & push class instance 0136 PUT = 'p' # store stack top in memo; index is string arg 0137 BINPUT = 'q' # " " " " " ; " " 1-byte arg 0138 LONG_BINPUT = 'r' # " " " " " ; " " 4-byte arg 0139 SETITEM = 's' # add key+value pair to dict 0140 TUPLE = 't' # build tuple from topmost stack items 0141 EMPTY_TUPLE = ')' # push empty tuple 0142 SETITEMS = 'u' # modify dict by adding topmost key+value pairs 0143 BINFLOAT = 'G' # push float; arg is 8-byte float encoding 0144 0145 TRUE = 'I01\n' # not an opcode; see INT docs in pickletools.py 0146 FALSE = 'I00\n' # not an opcode; see INT docs in pickletools.py 0147 0148 # Protocol 2 0149 0150 PROTO = '\x80' # identify pickle protocol 0151 NEWOBJ = '\x81' # build object by applying cls.__new__ to argtuple 0152 EXT1 = '\x82' # push object from extension registry; 1-byte index 0153 EXT2 = '\x83' # ditto, but 2-byte index 0154 EXT4 = '\x84' # ditto, but 4-byte index 0155 TUPLE1 = '\x85' # build 1-tuple from stack top 0156 TUPLE2 = '\x86' # build 2-tuple from two topmost stack items 0157 TUPLE3 = '\x87' # build 3-tuple from three topmost stack items 0158 NEWTRUE = '\x88' # push True 0159 NEWFALSE = '\x89' # push False 0160 LONG1 = '\x8a' # push long from < 256 bytes 0161 LONG4 = '\x8b' # push really big long 0162 0163 _tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3] 0164 0165 0166 __all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)]) 0167 del x 0168 0169 0170 # Pickling machinery 0171 0172 class Pickler: 0173 0174 def __init__(self, file, protocol=None, bin=None): 0175 """This takes a file-like object for writing a pickle data stream. 0176 0177 The optional protocol argument tells the pickler to use the 0178 given protocol; supported protocols are 0, 1, 2. The default 0179 protocol is 0, to be backwards compatible. (Protocol 0 is the 0180 only protocol that can be written to a file opened in text 0181 mode and read back successfully. When using a protocol higher 0182 than 0, make sure the file is opened in binary mode, both when 0183 pickling and unpickling.) 0184 0185 Protocol 1 is more efficient than protocol 0; protocol 2 is 0186 more efficient than protocol 1. 0187 0188 Specifying a negative protocol version selects the highest 0189 protocol version supported. The higher the protocol used, the 0190 more recent the version of Python needed to read the pickle 0191 produced. 0192 0193 The file parameter must have a write() method that accepts a single 0194 string argument. It can thus be an open file object, a StringIO 0195 object, or any other custom object that meets this interface. 0196 0197 """ 0198 if protocol is not None and bin is not None: 0199 raise ValueError, "can't specify both 'protocol' and 'bin'" 0200 if bin is not None: 0201 warnings.warn("The 'bin' argument to Pickler() is deprecated", 0202 DeprecationWarning) 0203 protocol = bin 0204 if protocol is None: 0205 protocol = 0 0206 if protocol < 0: 0207 protocol = HIGHEST_PROTOCOL 0208 elif not 0 <= protocol <= HIGHEST_PROTOCOL: 0209 raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL) 0210 self.write = file.write 0211 self.memo = {} 0212 self.proto = int(protocol) 0213 self.bin = protocol >= 1 0214 self.fast = 0 0215 0216 def clear_memo(self): 0217 """Clears the pickler's "memo". 0218 0219 The memo is the data structure that remembers which objects the 0220 pickler has already seen, so that shared or recursive objects are 0221 pickled by reference and not by value. This method is useful when 0222 re-using picklers. 0223 0224 """ 0225 self.memo.clear() 0226 0227 def dump(self, obj): 0228 """Write a pickled representation of obj to the open file.""" 0229 if self.proto >= 2: 0230 self.write(PROTO + chr(self.proto)) 0231 self.save(obj) 0232 self.write(STOP) 0233 0234 def memoize(self, obj): 0235 """Store an object in the memo.""" 0236 0237 # The Pickler memo is a dictionary mapping object ids to 2-tuples 0238 # that contain the Unpickler memo key and the object being memoized. 0239 # The memo key is written to the pickle and will become 0240 # the key in the Unpickler's memo. The object is stored in the 0241 # Pickler memo so that transient objects are kept alive during 0242 # pickling. 0243 0244 # The use of the Unpickler memo length as the memo key is just a 0245 # convention. The only requirement is that the memo values be unique. 0246 # But there appears no advantage to any other scheme, and this 0247 # scheme allows the Unpickler memo to be implemented as a plain (but 0248 # growable) array, indexed by memo key. 0249 if self.fast: 0250 return 0251 assert id(obj) not in self.memo 0252 memo_len = len(self.memo) 0253 self.write(self.put(memo_len)) 0254 self.memo[id(obj)] = memo_len, obj 0255 0256 # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i. 0257 def put(self, i, pack=struct.pack): 0258 if self.bin: 0259 if i < 256: 0260 return BINPUT + chr(i) 0261 else: 0262 return LONG_BINPUT + pack("<i", i) 0263 0264 return PUT + repr(i) + '\n' 0265 0266 # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i. 0267 def get(self, i, pack=struct.pack): 0268 if self.bin: 0269 if i < 256: 0270 return BINGET + chr(i) 0271 else: 0272 return LONG_BINGET + pack("<i", i) 0273 0274 return GET + repr(i) + '\n' 0275 0276 def save(self, obj): 0277 # Check for persistent id (defined by a subclass) 0278 pid = self.persistent_id(obj) 0279 if pid: 0280 self.save_pers(pid) 0281 return 0282 0283 # Check the memo 0284 x = self.memo.get(id(obj)) 0285 if x: 0286 self.write(self.get(x[0])) 0287 return 0288 0289 # Check the type dispatch table 0290 t = type(obj) 0291 f = self.dispatch.get(t) 0292 if f: 0293 f(self, obj) # Call unbound method with explicit self 0294 return 0295 0296 # Check for a class with a custom metaclass; treat as regular class 0297 try: 0298 issc = issubclass(t, TypeType) 0299 except TypeError: # t is not a class (old Boost; see SF #502085) 0300 issc = 0 0301 if issc: 0302 self.save_global(obj) 0303 return 0304 0305 # Check copy_reg.dispatch_table 0306 reduce = dispatch_table.get(t) 0307 if reduce: 0308 rv = reduce(obj) 0309 else: 0310 # Check for a __reduce_ex__ method, fall back to __reduce__ 0311 reduce = getattr(obj, "__reduce_ex__", None) 0312 if reduce: 0313 rv = reduce(self.proto) 0314 else: 0315 reduce = getattr(obj, "__reduce__", None) 0316 if reduce: 0317 rv = reduce() 0318 else: 0319 raise PicklingError("Can't pickle %r object: %r" % 0320 (t.__name__, obj)) 0321 0322 # Check for string returned by reduce(), meaning "save as global" 0323 if type(rv) is StringType: 0324 self.save_global(obj, rv) 0325 return 0326 0327 # Assert that reduce() returned a tuple 0328 if type(rv) is not TupleType: 0329 raise PicklingError("%s must return string or tuple" % reduce) 0330 0331 # Assert that it returned an appropriately sized tuple 0332 l = len(rv) 0333 if not (2 <= l <= 5): 0334 raise PicklingError("Tuple returned by %s must have " 0335 "two to five elements" % reduce) 0336 0337 # Save the reduce() output and finally memoize the object 0338 self.save_reduce(obj=obj, *rv) 0339 0340 def persistent_id(self, obj): 0341 # This exists so a subclass can override it 0342 return None 0343 0344 def save_pers(self, pid): 0345 # Save a persistent id reference 0346 if self.bin: 0347 self.save(pid) 0348 self.write(BINPERSID) 0349 else: 0350 self.write(PERSID + str(pid) + '\n') 0351 0352 def save_reduce(self, func, args, state=None, 0353 listitems=None, dictitems=None, obj=None): 0354 # This API is called by some subclasses 0355 0356 # Assert that args is a tuple or None 0357 if not isinstance(args, TupleType): 0358 if args is None: 0359 # A hack for Jim Fulton's ExtensionClass, now deprecated. 0360 # See load_reduce() 0361 warnings.warn("__basicnew__ special case is deprecated", 0362 DeprecationWarning) 0363 else: 0364 raise PicklingError( 0365 "args from reduce() should be a tuple") 0366 0367 # Assert that func is callable 0368 if not callable(func): 0369 raise PicklingError("func from reduce should be callable") 0370 0371 save = self.save 0372 write = self.write 0373 0374 # Protocol 2 special case: if func's name is __newobj__, use NEWOBJ 0375 if self.proto >= 2 and getattr(func, "__name__", "") == "__newobj__": 0376 # A __reduce__ implementation can direct protocol 2 to 0377 # use the more efficient NEWOBJ opcode, while still 0378 # allowing protocol 0 and 1 to work normally. For this to 0379 # work, the function returned by __reduce__ should be 0380 # called __newobj__, and its first argument should be a 0381 # new-style class. The implementation for __newobj__ 0382 # should be as follows, although pickle has no way to 0383 # verify this: 0384 # 0385 # def __newobj__(cls, *args): 0386 # return cls.__new__(cls, *args) 0387 # 0388 # Protocols 0 and 1 will pickle a reference to __newobj__, 0389 # while protocol 2 (and above) will pickle a reference to 0390 # cls, the remaining args tuple, and the NEWOBJ code, 0391 # which calls cls.__new__(cls, *args) at unpickling time 0392 # (see load_newobj below). If __reduce__ returns a 0393 # three-tuple, the state from the third tuple item will be 0394 # pickled regardless of the protocol, calling __setstate__ 0395 # at unpickling time (see load_build below). 0396 # 0397 # Note that no standard __newobj__ implementation exists; 0398 # you have to provide your own. This is to enforce 0399 # compatibility with Python 2.2 (pickles written using 0400 # protocol 0 or 1 in Python 2.3 should be unpicklable by 0401 # Python 2.2). 0402 cls = args[0] 0403 if not hasattr(cls, "__new__"): 0404 raise PicklingError( 0405 "args[0] from __newobj__ args has no __new__") 0406 if obj is not None and cls is not obj.__class__: 0407 raise PicklingError( 0408 "args[0] from __newobj__ args has the wrong class") 0409 args = args[1:] 0410 save(cls) 0411 save(args) 0412 write(NEWOBJ) 0413 else: 0414 save(func) 0415 save(args) 0416 write(REDUCE) 0417 0418 if obj is not None: 0419 self.memoize(obj) 0420 0421 # More new special cases (that work with older protocols as 0422 # well): when __reduce__ returns a tuple with 4 or 5 items, 0423 # the 4th and 5th item should be iterators that provide list 0424 # items and dict items (as (key, value) tuples), or None. 0425 0426 if listitems is not None: 0427 self._batch_appends(listitems) 0428 0429 if dictitems is not None: 0430 self._batch_setitems(dictitems) 0431 0432 if state is not None: 0433 save(state) 0434 write(BUILD) 0435 0436 # Methods below this point are dispatched through the dispatch table 0437 0438 dispatch = {} 0439 0440 def save_none(self, obj): 0441 self.write(NONE) 0442 dispatch[NoneType] = save_none 0443 0444 def save_bool(self, obj): 0445 if self.proto >= 2: 0446 self.write(obj and NEWTRUE or NEWFALSE) 0447 else: 0448 self.write(obj and TRUE or FALSE) 0449 dispatch[bool] = save_bool 0450 0451 def save_int(self, obj, pack=struct.pack): 0452 if self.bin: 0453 # If the int is small enough to fit in a signed 4-byte 2's-comp 0454 # format, we can store it more efficiently than the general 0455 # case. 0456 # First one- and two-byte unsigned ints: 0457 if obj >= 0: 0458 if obj <= 0xff: 0459 self.write(BININT1 + chr(obj)) 0460 return 0461 if obj <= 0xffff: 0462 self.write("%c%c%c" % (BININT2, obj&0xff, obj>>8)) 0463 return 0464 # Next check for 4-byte signed ints: 0465 high_bits = obj >> 31 # note that Python shift sign-extends 0466 if high_bits == 0 or high_bits == -1: 0467 # All high bits are copies of bit 2**31, so the value 0468 # fits in a 4-byte signed int. 0469 self.write(BININT + pack("<i", obj)) 0470 return 0471 # Text pickle, or int too big to fit in signed 4-byte format. 0472 self.write(INT + repr(obj) + '\n') 0473 dispatch[IntType] = save_int 0474 0475 def save_long(self, obj, pack=struct.pack): 0476 if self.proto >= 2: 0477 bytes = encode_long(obj) 0478 n = len(bytes) 0479 if n < 256: 0480 self.write(LONG1 + chr(n) + bytes) 0481 else: 0482 self.write(LONG4 + pack("<i", n) + bytes) 0483 return 0484 self.write(LONG + repr(obj) + '\n') 0485 dispatch[LongType] = save_long 0486 0487 def save_float(self, obj, pack=struct.pack): 0488 if self.bin: 0489 self.write(BINFLOAT + pack('>d', obj)) 0490 else: 0491 self.write(FLOAT + repr(obj) + '\n') 0492 dispatch[FloatType] = save_float 0493 0494 def save_string(self, obj, pack=struct.pack): 0495 if self.bin: 0496 n = len(obj) 0497 if n < 256: 0498 self.write(SHORT_BINSTRING + chr(n) + obj) 0499 else: 0500 self.write(BINSTRING + pack("<i", n) + obj) 0501 else: 0502 self.write(STRING + repr(obj) + '\n') 0503 self.memoize(obj) 0504 dispatch[StringType] = save_string 0505 0506 def save_unicode(self, obj, pack=struct.pack): 0507 if self.bin: 0508 encoding = obj.encode('utf-8') 0509 n = len(encoding) 0510 self.write(BINUNICODE + pack("<i", n) + encoding) 0511 else: 0512 obj = obj.replace("\\", "\\u005c") 0513 obj = obj.replace("\n", "\\u000a") 0514 self.write(UNICODE + obj.encode('raw-unicode-escape') + '\n') 0515 self.memoize(obj) 0516 dispatch[UnicodeType] = save_unicode 0517 0518 if StringType == UnicodeType: 0519 # This is true for Jython 0520 def save_string(self, obj, pack=struct.pack): 0521 unicode = obj.isunicode() 0522 0523 if self.bin: 0524 if unicode: 0525 obj = obj.encode("utf-8") 0526 l = len(obj) 0527 if l < 256 and not unicode: 0528 self.write(SHORT_BINSTRING + chr(l) + obj) 0529 else: 0530 s = pack("<i", l) 0531 if unicode: 0532 self.write(BINUNICODE + s + obj) 0533 else: 0534 self.write(BINSTRING + s + obj) 0535 else: 0536 if unicode: 0537 obj = obj.replace("\\", "\\u005c") 0538 obj = obj.replace("\n", "\\u000a") 0539 obj = obj.encode('raw-unicode-escape') 0540 self.write(UNICODE + obj + '\n') 0541 else: 0542 self.write(STRING + repr(obj) + '\n') 0543 self.memoize(obj) 0544 dispatch[StringType] = save_string 0545 0546 def save_tuple(self, obj): 0547 write = self.write 0548 proto = self.proto 0549 0550 n = len(obj) 0551 if n == 0: 0552 if proto: 0553 write(EMPTY_TUPLE) 0554 else: 0555 write(MARK + TUPLE) 0556 return 0557 0558 save = self.save 0559 memo = self.memo 0560 if n <= 3 and proto >= 2: 0561 for element in obj: 0562 save(element) 0563 # Subtle. Same as in the big comment below. 0564 if id(obj) in memo: 0565 get = self.get(memo[id(obj)][0]) 0566 write(POP * n + get) 0567 else: 0568 write(_tuplesize2code[n]) 0569 self.memoize(obj) 0570 return 0571 0572 # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple 0573 # has more than 3 elements. 0574 write(MARK) 0575 for element in obj: 0576 save(element) 0577 0578 if id(obj) in memo: 0579 # Subtle. d was not in memo when we entered save_tuple(), so 0580 # the process of saving the tuple's elements must have saved 0581 # the tuple itself: the tuple is recursive. The proper action 0582 # now is to throw away everything we put on the stack, and 0583 # simply GET the tuple (it's already constructed). This check 0584 # could have been done in the "for element" loop instead, but 0585 # recursive tuples are a rare thing. 0586 get = self.get(memo[id(obj)][0]) 0587 if proto: 0588 write(POP_MARK + get) 0589 else: # proto 0 -- POP_MARK not available 0590 write(POP * (n+1) + get) 0591 return 0592 0593 # No recursion. 0594 self.write(TUPLE) 0595 self.memoize(obj) 0596 0597 dispatch[TupleType] = save_tuple 0598 0599 # save_empty_tuple() isn't used by anything in Python 2.3. However, I 0600 # found a Pickler subclass in Zope3 that calls it, so it's not harmless 0601 # to remove it. 0602 def save_empty_tuple(self, obj): 0603 self.write(EMPTY_TUPLE) 0604 0605 def save_list(self, obj): 0606 write = self.write 0607 0608 if self.bin: 0609 write(EMPTY_LIST) 0610 else: # proto 0 -- can't use EMPTY_LIST 0611 write(MARK + LIST) 0612 0613 self.memoize(obj) 0614 self._batch_appends(iter(obj)) 0615 0616 dispatch[ListType] = save_list 0617 0618 # Keep in synch with cPickle's BATCHSIZE. Nothing will break if it gets 0619 # out of synch, though. 0620 _BATCHSIZE = 1000 0621 0622 def _batch_appends(self, items): 0623 # Helper to batch up APPENDS sequences 0624 save = self.save 0625 write = self.write 0626 0627 if not self.bin: 0628 for x in items: 0629 save(x) 0630 write(APPEND) 0631 return 0632 0633 r = xrange(self._BATCHSIZE) 0634 while items is not None: 0635 tmp = [] 0636 for i in r: 0637 try: 0638 x = items.next() 0639 tmp.append(x) 0640 except StopIteration: 0641 items = None 0642 break 0643 n = len(tmp) 0644 if n > 1: 0645 write(MARK) 0646 for x in tmp: 0647 save(x) 0648 write(APPENDS) 0649 elif n: 0650 save(tmp[0]) 0651 write(APPEND) 0652 # else tmp is empty, and we're done 0653 0654 def save_dict(self, obj): 0655 write = self.write 0656 0657 if self.bin: 0658 write(EMPTY_DICT) 0659 else: # proto 0 -- can't use EMPTY_DICT 0660 write(MARK + DICT) 0661 0662 self.memoize(obj) 0663 self._batch_setitems(obj.iteritems()) 0664 0665 dispatch[DictionaryType] = save_dict 0666 if not PyStringMap is None: 0667 dispatch[PyStringMap] = save_dict 0668 0669 def _batch_setitems(self, items): 0670 # Helper to batch up SETITEMS sequences; proto >= 1 only 0671 save = self.save 0672 write = self.write 0673 0674 if not self.bin: 0675 for k, v in items: 0676 save(k) 0677 save(v) 0678 write(SETITEM) 0679 return 0680 0681 r = xrange(self._BATCHSIZE) 0682 while items is not None: 0683 tmp = [] 0684 for i in r: 0685 try: 0686 tmp.append(items.next()) 0687 except StopIteration: 0688 items = None 0689 break 0690 n = len(tmp) 0691 if n > 1: 0692 write(MARK) 0693 for k, v in tmp: 0694 save(k) 0695 save(v) 0696 write(SETITEMS) 0697 elif n: 0698 k, v = tmp[0] 0699 save(k) 0700 save(v) 0701 write(SETITEM) 0702 # else tmp is empty, and we're done 0703 0704 def save_inst(self, obj): 0705 cls = obj.__class__ 0706 0707 memo = self.memo 0708 write = self.write 0709 save = self.save 0710 0711 if hasattr(obj, '__getinitargs__'): 0712 args = obj.__getinitargs__() 0713 len(args) # XXX Assert it's a sequence 0714 _keep_alive(args, memo) 0715 else: 0716 args = () 0717 0718 write(MARK) 0719 0720 if self.bin: 0721 save(cls) 0722 for arg in args: 0723 save(arg) 0724 write(OBJ) 0725 else: 0726 for arg in args: 0727 save(arg) 0728 write(INST + cls.__module__ + '\n' + cls.__name__ + '\n') 0729 0730 self.memoize(obj) 0731 0732 try: 0733 getstate = obj.__getstate__ 0734 except AttributeError: 0735 stuff = obj.__dict__ 0736 else: 0737 stuff = getstate() 0738 _keep_alive(stuff, memo) 0739 save(stuff) 0740 write(BUILD) 0741 0742 dispatch[InstanceType] = save_inst 0743 0744 def save_global(self, obj, name=None, pack=struct.pack): 0745 write = self.write 0746 memo = self.memo 0747 0748 if name is None: 0749 name = obj.__name__ 0750 0751 module = getattr(obj, "__module__", None) 0752 if module is None: 0753 module = whichmodule(obj, name) 0754 0755 try: 0756 __import__(module) 0757 mod = sys.modules[module] 0758 klass = getattr(mod, name) 0759 except (ImportError, KeyError, AttributeError): 0760 raise PicklingError( 0761 "Can't pickle %r: it's not found as %s.%s" % 0762 (obj, module, name)) 0763 else: 0764 if klass is not obj: 0765 raise PicklingError( 0766 "Can't pickle %r: it's not the same object as %s.%s" % 0767 (obj, module, name)) 0768 0769 if self.proto >= 2: 0770 code = _extension_registry.get((module, name)) 0771 if code: 0772 assert code > 0 0773 if code <= 0xff: 0774 write(EXT1 + chr(code)) 0775 elif code <= 0xffff: 0776 write("%c%c%c" % (EXT2, code&0xff, code>>8)) 0777 else: 0778 write(EXT4 + pack("<i", code)) 0779 return 0780 0781 write(GLOBAL + module + '\n' + name + '\n') 0782 self.memoize(obj) 0783 0784 dispatch[ClassType] = save_global 0785 dispatch[FunctionType] = save_global 0786 dispatch[BuiltinFunctionType] = save_global 0787 dispatch[TypeType] = save_global 0788 0789 # Pickling helpers 0790 0791 def _keep_alive(x, memo): 0792 """Keeps a reference to the object x in the memo. 0793 0794 Because we remember objects by their id, we have 0795 to assure that possibly temporary objects are kept 0796 alive by referencing them. 0797 We store a reference at the id of the memo, which should 0798 normally not be used unless someone tries to deepcopy 0799 the memo itself... 0800 """ 0801 try: 0802 memo[id(memo)].append(x) 0803 except KeyError: 0804 # aha, this is the first one :-) 0805 memo[id(memo)]=[x] 0806 0807 0808 # A cache for whichmodule(), mapping a function object to the name of 0809 # the module in which the function was found. 0810 0811 classmap = {} # called classmap for backwards compatibility 0812 0813 def whichmodule(func, funcname): 0814 """Figure out the module in which a function occurs. 0815 0816 Search sys.modules for the module. 0817 Cache in classmap. 0818 Return a module name. 0819 If the function cannot be found, return "__main__". 0820 """ 0821 # Python functions should always get an __module__ from their globals. 0822 mod = getattr(func, "__module__", None) 0823 if mod is not None: 0824 return mod 0825 if func in classmap: 0826 return classmap[func] 0827 0828 for name, module in sys.modules.items(): 0829 if module is None: 0830 continue # skip dummy package entries 0831 if name != '__main__' and getattr(module, funcname, None) is func: 0832 break 0833 else: 0834 name = '__main__' 0835 classmap[func] = name 0836 return name 0837 0838 0839 # Unpickling machinery 0840 0841 class Unpickler: 0842 0843 def __init__(self, file): 0844 """This takes a file-like object for reading a pickle data stream. 0845 0846 The protocol version of the pickle is detected automatically, so no 0847 proto argument is needed. 0848 0849 The file-like object must have two methods, a read() method that 0850 takes an integer argument, and a readline() method that requires no 0851 arguments. Both methods should return a string. Thus file-like 0852 object can be a file object opened for reading, a StringIO object, 0853 or any other custom object that meets this interface. 0854 """ 0855 self.readline = file.readline 0856 self.read = file.read 0857 self.memo = {} 0858 0859 def load(self): 0860 """Read a pickled object representation from the open file. 0861 0862 Return the reconstituted object hierarchy specified in the file. 0863 """ 0864 self.mark = object() # any new unique object 0865 self.stack = [] 0866 self.append = self.stack.append 0867 read = self.read 0868 dispatch = self.dispatch 0869 try: 0870 while 1: 0871 key = read(1) 0872 dispatch[key](self) 0873 except _Stop, stopinst: 0874 return stopinst.value 0875 0876 # Return largest index k such that self.stack[k] is self.mark. 0877 # If the stack doesn't contain a mark, eventually raises IndexError. 0878 # This could be sped by maintaining another stack, of indices at which 0879 # the mark appears. For that matter, the latter stack would suffice, 0880 # and we wouldn't need to push mark objects on self.stack at all. 0881 # Doing so is probably a good thing, though, since if the pickle is 0882 # corrupt (or hostile) we may get a clue from finding self.mark embedded 0883 # in unpickled objects. 0884 def marker(self): 0885 stack = self.stack 0886 mark = self.mark 0887 k = len(stack)-1 0888 while stack[k] is not mark: k = k-1 0889 return k 0890 0891 dispatch = {} 0892 0893 def load_eof(self): 0894 raise EOFError 0895 dispatch[''] = load_eof 0896 0897 def load_proto(self): 0898 proto = ord(self.read(1)) 0899 if not 0 <= proto <= 2: 0900 raise ValueError, "unsupported pickle protocol: %d" % proto 0901 dispatch[PROTO] = load_proto 0902 0903 def load_persid(self): 0904 pid = self.readline()[:-1] 0905 self.append(self.persistent_load(pid)) 0906 dispatch[PERSID] = load_persid 0907 0908 def load_binpersid(self): 0909 pid = self.stack.pop() 0910 self.append(self.persistent_load(pid)) 0911 dispatch[BINPERSID] = load_binpersid 0912 0913 def load_none(self): 0914 self.append(None) 0915 dispatch[NONE] = load_none 0916 0917 def load_false(self): 0918 self.append(False) 0919 dispatch[NEWFALSE] = load_false 0920 0921 def load_true(self): 0922 self.append(True) 0923 dispatch[NEWTRUE] = load_true 0924 0925 def load_int(self): 0926 data = self.readline() 0927 if data == FALSE[1:]: 0928 val = False 0929 elif data == TRUE[1:]: 0930 val = True 0931 else: 0932 try: 0933 val = int(data) 0934 except ValueError: 0935 val = long(data) 0936 self.append(val) 0937 dispatch[INT] = load_int 0938 0939 def load_binint(self): 0940 self.append(mloads('i' + self.read(4))) 0941 dispatch[BININT] = load_binint 0942 0943 def load_binint1(self): 0944 self.append(ord(self.read(1))) 0945 dispatch[BININT1] = load_binint1 0946 0947 def load_binint2(self): 0948 self.append(mloads('i' + self.read(2) + '\000\000')) 0949 dispatch[BININT2] = load_binint2 0950 0951 def load_long(self): 0952 self.append(long(self.readline()[:-1], 0)) 0953 dispatch[LONG] = load_long 0954 0955 def load_long1(self): 0956 n = ord(self.read(1)) 0957 bytes = self.read(n) 0958 self.append(decode_long(bytes)) 0959 dispatch[LONG1] = load_long1 0960 0961 def load_long4(self): 0962 n = mloads('i' + self.read(4)) 0963 bytes = self.read(n) 0964 self.append(decode_long(bytes)) 0965 dispatch[LONG4] = load_long4 0966 0967 def load_float(self): 0968 self.append(float(self.readline()[:-1])) 0969 dispatch[FLOAT] = load_float 0970 0971 def load_binfloat(self, unpack=struct.unpack): 0972 self.append(unpack('>d', self.read(8))[0]) 0973 dispatch[BINFLOAT] = load_binfloat 0974 0975 def load_string(self): 0976 rep = self.readline()[:-1] 0977 for q in "\"'": # double or single quote 0978 if rep.startswith(q): 0979 if not rep.endswith(q): 0980 raise ValueError, "insecure string pickle" 0981 rep = rep[len(q):-len(q)] 0982 break 0983 else: 0984 raise ValueError, "insecure string pickle" 0985 self.append(rep.decode("string-escape")) 0986 dispatch[STRING] = load_string 0987 0988 def load_binstring(self): 0989 len = mloads('i' + self.read(4)) 0990 self.append(self.read(len)) 0991 dispatch[BINSTRING] = load_binstring 0992 0993 def load_unicode(self): 0994 self.append(unicode(self.readline()[:-1],'raw-unicode-escape')) 0995 dispatch[UNICODE] = load_unicode 0996 0997 def load_binunicode(self): 0998 len = mloads('i' + self.read(4)) 0999 self.append(unicode(self.read(len),'utf-8')) 1000 dispatch[BINUNICODE] = load_binunicode 1001 1002 def load_short_binstring(self): 1003 len = ord(self.read(1)) 1004 self.append(self.read(len)) 1005 dispatch[SHORT_BINSTRING] = load_short_binstring 1006 1007 def load_tuple(self): 1008 k = self.marker() 1009 self.stack[k:] = [tuple(self.stack[k+1:])] 1010 dispatch[TUPLE] = load_tuple 1011 1012 def load_empty_tuple(self): 1013 self.stack.append(()) 1014 dispatch[EMPTY_TUPLE] = load_empty_tuple 1015 1016 def load_tuple1(self): 1017 self.stack[-1] = (self.stack[-1],) 1018 dispatch[TUPLE1] = load_tuple1 1019 1020 def load_tuple2(self): 1021 self.stack[-2:] = [(self.stack[-2], self.stack[-1])] 1022 dispatch[TUPLE2] = load_tuple2 1023 1024 def load_tuple3(self): 1025 self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])] 1026 dispatch[TUPLE3] = load_tuple3 1027 1028 def load_empty_list(self): 1029 self.stack.append([]) 1030 dispatch[EMPTY_LIST] = load_empty_list 1031 1032 def load_empty_dictionary(self): 1033 self.stack.append({}) 1034 dispatch[EMPTY_DICT] = load_empty_dictionary 1035 1036 def load_list(self): 1037 k = self.marker() 1038 self.stack[k:] = [self.stack[k+1:]] 1039 dispatch[LIST] = load_list 1040 1041 def load_dict(self): 1042 k = self.marker() 1043 d = {} 1044 items = self.stack[k+1:] 1045 for i in range(0, len(items), 2): 1046 key = items[i] 1047 value = items[i+1] 1048 d[key] = value 1049 self.stack[k:] = [d] 1050 dispatch[DICT] = load_dict 1051 1052 # INST and OBJ differ only in how they get a class object. It's not 1053 # only sensible to do the rest in a common routine, the two routines 1054 # previously diverged and grew different bugs. 1055 # klass is the class to instantiate, and k points to the topmost mark 1056 # object, following which are the arguments for klass.__init__. 1057 def _instantiate(self, klass, k): 1058 args = tuple(self.stack[k+1:]) 1059 del self.stack[k:] 1060 instantiated = 0 1061 if (not args and 1062 type(klass) is ClassType and 1063 not hasattr(klass, "__getinitargs__")): 1064 try: 1065 value = _EmptyClass() 1066 value.__class__ = klass 1067 instantiated = 1 1068 except RuntimeError: 1069 # In restricted execution, assignment to inst.__class__ is 1070 # prohibited 1071 pass 1072 if not instantiated: 1073 try: 1074 value = klass(*args) 1075 except TypeError, err: 1076 raise TypeError, "in constructor for %s: %s" % ( 1077 klass.__name__, str(err)), sys.exc_info()[2] 1078 self.append(value) 1079 1080 def load_inst(self): 1081 module = self.readline()[:-1] 1082 name = self.readline()[:-1] 1083 klass = self.find_class(module, name) 1084 self._instantiate(klass, self.marker()) 1085 dispatch[INST] = load_inst 1086 1087 def load_obj(self): 1088 # Stack is ... markobject classobject arg1 arg2 ... 1089 k = self.marker() 1090 klass = self.stack.pop(k+1) 1091 self._instantiate(klass, k) 1092 dispatch[OBJ] = load_obj 1093 1094 def load_newobj(self): 1095 args = self.stack.pop() 1096 cls = self.stack[-1] 1097 obj = cls.__new__(cls, *args) 1098 self.stack[-1] = obj 1099 dispatch[NEWOBJ] = load_newobj 1100 1101 def load_global(self): 1102 module = self.readline()[:-1] 1103 name = self.readline()[:-1] 1104 klass = self.find_class(module, name) 1105 self.append(klass) 1106 dispatch[GLOBAL] = load_global 1107 1108 def load_ext1(self): 1109 code = ord(self.read(1)) 1110 self.get_extension(code) 1111 dispatch[EXT1] = load_ext1 1112 1113 def load_ext2(self): 1114 code = mloads('i' + self.read(2) + '\000\000') 1115 self.get_extension(code) 1116 dispatch[EXT2] = load_ext2 1117 1118 def load_ext4(self): 1119 code = mloads('i' + self.read(4)) 1120 self.get_extension(code) 1121 dispatch[EXT4] = load_ext4 1122 1123 def get_extension(self, code): 1124 nil = [] 1125 obj = _extension_cache.get(code, nil) 1126 if obj is not nil: 1127 self.append(obj) 1128 return 1129 key = _inverted_registry.get(code) 1130 if not key: 1131 raise ValueError("unregistered extension code %d" % code) 1132 obj = self.find_class(*key) 1133 _extension_cache[code] = obj 1134 self.append(obj) 1135 1136 def find_class(self, module, name): 1137 # Subclasses may override this 1138 __import__(module) 1139 mod = sys.modules[module] 1140 klass = getattr(mod, name) 1141 return klass 1142 1143 def load_reduce(self): 1144 stack = self.stack 1145 args = stack.pop() 1146 func = stack[-1] 1147 if args is None: 1148 # A hack for Jim Fulton's ExtensionClass, now deprecated 1149 warnings.warn("__basicnew__ special case is deprecated", 1150 DeprecationWarning) 1151 value = func.__basicnew__() 1152 else: 1153 value = func(*args) 1154 stack[-1] = value 1155 dispatch[REDUCE] = load_reduce 1156 1157 def load_pop(self): 1158 del self.stack[-1] 1159 dispatch[POP] = load_pop 1160 1161 def load_pop_mark(self): 1162 k = self.marker() 1163 del self.stack[k:] 1164 dispatch[POP_MARK] = load_pop_mark 1165 1166 def load_dup(self): 1167 self.append(self.stack[-1]) 1168 dispatch[DUP] = load_dup 1169 1170 def load_get(self): 1171 self.append(self.memo[self.readline()[:-1]]) 1172 dispatch[GET] = load_get 1173 1174 def load_binget(self): 1175 i = ord(self.read(1)) 1176 self.append(self.memo[repr(i)]) 1177 dispatch[BINGET] = load_binget 1178 1179 def load_long_binget(self): 1180 i = mloads('i' + self.read(4)) 1181 self.append(self.memo[repr(i)]) 1182 dispatch[LONG_BINGET] = load_long_binget 1183 1184 def load_put(self): 1185 self.memo[self.readline()[:-1]] = self.stack[-1] 1186 dispatch[PUT] = load_put 1187 1188 def load_binput(self): 1189 i = ord(self.read(1)) 1190 self.memo[repr(i)] = self.stack[-1] 1191 dispatch[BINPUT] = load_binput 1192 1193 def load_long_binput(self): 1194 i = mloads('i' + self.read(4)) 1195 self.memo[repr(i)] = self.stack[-1] 1196 dispatch[LONG_BINPUT] = load_long_binput 1197 1198 def load_append(self): 1199 stack = self.stack 1200 value = stack.pop() 1201 list = stack[-1] 1202 list.append(value) 1203 dispatch[APPEND] = load_append 1204 1205 def load_appends(self): 1206 stack = self.stack 1207 mark = self.marker() 1208 list = stack[mark - 1] 1209 list.extend(stack[mark + 1:]) 1210 del stack[mark:] 1211 dispatch[APPENDS] = load_appends 1212 1213 def load_setitem(self): 1214 stack = self.stack 1215 value = stack.pop() 1216 key = stack.pop() 1217 dict = stack[-1] 1218 dict[key] = value 1219 dispatch[SETITEM] = load_setitem 1220 1221 def load_setitems(self): 1222 stack = self.stack 1223 mark = self.marker() 1224 dict = stack[mark - 1] 1225 for i in range(mark + 1, len(stack), 2): 1226 dict[stack[i]] = stack[i + 1] 1227 1228 del stack[mark:] 1229 dispatch[SETITEMS] = load_setitems 1230 1231 def load_build(self): 1232 stack = self.stack 1233 state = stack.pop() 1234 inst = stack[-1] 1235 setstate = getattr(inst, "__setstate__", None) 1236 if setstate: 1237 setstate(state) 1238 return 1239 slotstate = None 1240 if isinstance(state, tuple) and len(state) == 2: 1241 state, slotstate = state 1242 if state: 1243 try: 1244 inst.__dict__.update(state) 1245 except RuntimeError: 1246 # XXX In restricted execution, the instance's __dict__ 1247 # is not accessible. Use the old way of unpickling 1248 # the instance variables. This is a semantic 1249 # difference when unpickling in restricted 1250 # vs. unrestricted modes. 1251 # Note, however, that cPickle has never tried to do the 1252 # .update() business, and always uses 1253 # PyObject_SetItem(inst.__dict__, key, value) in a 1254 # loop over state.items(). 1255 for k, v in state.items(): 1256 setattr(inst, k, v) 1257 if slotstate: 1258 for k, v in slotstate.items(): 1259 setattr(inst, k, v) 1260 dispatch[BUILD] = load_build 1261 1262 def load_mark(self): 1263 self.append(self.mark) 1264 dispatch[MARK] = load_mark 1265 1266 def load_stop(self): 1267 value = self.stack.pop() 1268 raise _Stop(value) 1269 dispatch[STOP] = load_stop 1270 1271 # Helper class for load_inst/load_obj 1272 1273 class _EmptyClass: 1274 pass 1275 1276 # Encode/decode longs in linear time. 1277 1278 import binascii as _binascii 1279 1280 def encode_long(x): 1281 r"""Encode a long to a two's complement little-endian binary string. 1282 Note that 0L is a special case, returning an empty string, to save a 1283 byte in the LONG1 pickling context. 1284 1285 >>> encode_long(0L) 1286 '' 1287 >>> encode_long(255L) 1288 '\xff\x00' 1289 >>> encode_long(32767L) 1290 '\xff\x7f' 1291 >>> encode_long(-256L) 1292 '\x00\xff' 1293 >>> encode_long(-32768L) 1294 '\x00\x80' 1295 >>> encode_long(-128L) 1296 '\x80' 1297 >>> encode_long(127L) 1298 '\x7f' 1299 >>> 1300 """ 1301 1302 if x == 0: 1303 return '' 1304 if x > 0: 1305 ashex = hex(x) 1306 assert ashex.startswith("0x") 1307 njunkchars = 2 + ashex.endswith('L') 1308 nibbles = len(ashex) - njunkchars 1309 if nibbles & 1: 1310 # need an even # of nibbles for unhexlify 1311 ashex = "0x0" + ashex[2:] 1312 elif int(ashex[2], 16) >= 8: 1313 # "looks negative", so need a byte of sign bits 1314 ashex = "0x00" + ashex[2:] 1315 else: 1316 # Build the 256's-complement: (1L << nbytes) + x. The trick is 1317 # to find the number of bytes in linear time (although that should 1318 # really be a constant-time task). 1319 ashex = hex(-x) 1320 assert ashex.startswith("0x") 1321 njunkchars = 2 + ashex.endswith('L') 1322 nibbles = len(ashex) - njunkchars 1323 if nibbles & 1: 1324 # Extend to a full byte. 1325 nibbles += 1 1326 nbits = nibbles * 4 1327 x += 1L << nbits 1328 assert x > 0 1329 ashex = hex(x) 1330 njunkchars = 2 + ashex.endswith('L') 1331 newnibbles = len(ashex) - njunkchars 1332 if newnibbles < nibbles: 1333 ashex = "0x" + "0" * (nibbles - newnibbles) + ashex[2:] 1334 if int(ashex[2], 16) < 8: 1335 # "looks positive", so need a byte of sign bits 1336 ashex = "0xff" + ashex[2:] 1337 1338 if ashex.endswith('L'): 1339 ashex = ashex[2:-1] 1340 else: 1341 ashex = ashex[2:] 1342 assert len(ashex) & 1 == 0, (x, ashex) 1343 binary = _binascii.unhexlify(ashex) 1344 return binary[::-1] 1345 1346 def decode_long(data): 1347 r"""Decode a long from a two's complement little-endian binary string. 1348 1349 >>> decode_long('') 1350 0L 1351 >>> decode_long("\xff\x00") 1352 255L 1353 >>> decode_long("\xff\x7f") 1354 32767L 1355 >>> decode_long("\x00\xff") 1356 -256L 1357 >>> decode_long("\x00\x80") 1358 -32768L 1359 >>> decode_long("\x80") 1360 -128L 1361 >>> decode_long("\x7f") 1362 127L 1363 """ 1364 1365 nbytes = len(data) 1366 if nbytes == 0: 1367 return 0L 1368 ashex = _binascii.hexlify(data[::-1]) 1369 n = long(ashex, 16) # quadratic time before Python 2.3; linear now 1370 if data[-1] >= '\x80': 1371 n -= 1L << (nbytes * 8) 1372 return n 1373 1374 # Shorthands 1375 1376 try: 1377 from cStringIO import StringIO 1378 except ImportError: 1379 from StringIO import StringIO 1380 1381 def dump(obj, file, protocol=None, bin=None): 1382 Pickler(file, protocol, bin).dump(obj) 1383 1384 def dumps(obj, protocol=None, bin=None): 1385 file = StringIO() 1386 Pickler(file, protocol, bin).dump(obj) 1387 return file.getvalue() 1388 1389 def load(file): 1390 return Unpickler(file).load() 1391 1392 def loads(str): 1393 file = StringIO(str) 1394 return Unpickler(file).load() 1395 1396 # Doctest 1397 1398 def _test(): 1399 import doctest 1400 return doctest.testmod() 1401 1402 if __name__ == "__main__": 1403 _test() 1404
Generated by PyXR 0.9.4