blob: 25ffd031485eb456a03658f8d63d874ee42d1c7c [file] [log] [blame]
Guido van Rossum54f22ed2000-02-04 15:10:34 +00001"""Create portable serialized representations of Python objects.
Guido van Rossuma48061a1995-01-10 00:31:14 +00002
Alexandre Vassalottif7fa63d2008-05-11 08:55:36 +00003See module copyreg for a mechanism for registering custom picklers.
Tim Peters22a449a2003-01-27 20:16:36 +00004See module pickletools source for extensive comments.
Guido van Rossuma48061a1995-01-10 00:31:14 +00005
Guido van Rossume467be61997-12-05 19:42:42 +00006Classes:
Guido van Rossuma48061a1995-01-10 00:31:14 +00007
Guido van Rossume467be61997-12-05 19:42:42 +00008 Pickler
9 Unpickler
Guido van Rossuma48061a1995-01-10 00:31:14 +000010
Guido van Rossume467be61997-12-05 19:42:42 +000011Functions:
Guido van Rossuma48061a1995-01-10 00:31:14 +000012
Guido van Rossume467be61997-12-05 19:42:42 +000013 dump(object, file)
14 dumps(object) -> string
15 load(file) -> object
16 loads(string) -> object
Guido van Rossuma48061a1995-01-10 00:31:14 +000017
Guido van Rossume467be61997-12-05 19:42:42 +000018Misc variables:
Guido van Rossuma48061a1995-01-10 00:31:14 +000019
Fred Drakefe82acc1998-02-13 03:24:48 +000020 __version__
Guido van Rossume467be61997-12-05 19:42:42 +000021 format_version
22 compatible_formats
Guido van Rossuma48061a1995-01-10 00:31:14 +000023
Guido van Rossuma48061a1995-01-10 00:31:14 +000024"""
25
Guido van Rossum743d17e1998-09-15 20:25:57 +000026__version__ = "$Revision$" # Code version
Guido van Rossuma48061a1995-01-10 00:31:14 +000027
Guido van Rossum13257902007-06-07 23:15:56 +000028from types import FunctionType, BuiltinFunctionType
Alexandre Vassalottif7fa63d2008-05-11 08:55:36 +000029from copyreg import dispatch_table
30from copyreg import _extension_registry, _inverted_registry, _extension_cache
Guido van Rossumd3703791998-10-22 20:15:36 +000031import marshal
32import sys
33import struct
Skip Montanaro23bafc62001-02-18 03:10:09 +000034import re
Guido van Rossum2e6a4b32007-05-04 19:56:22 +000035import io
Walter Dörwald42748a82007-06-12 16:40:17 +000036import codecs
Guido van Rossuma48061a1995-01-10 00:31:14 +000037
Skip Montanaro352674d2001-02-07 23:14:30 +000038__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
39 "Unpickler", "dump", "dumps", "load", "loads"]
40
Guido van Rossum98297ee2007-11-06 21:34:58 +000041# Shortcut for use in isinstance testing
Alexandre Vassalotti8cb02b62008-05-03 01:42:49 +000042bytes_types = (bytes, bytearray)
Guido van Rossum98297ee2007-11-06 21:34:58 +000043
Tim Petersc0c12b52003-01-29 00:56:17 +000044# These are purely informational; no code uses these.
Guido van Rossumf4169812008-03-17 22:56:06 +000045format_version = "3.0" # File format version we write
Guido van Rossumf29d3d62003-01-27 22:47:53 +000046compatible_formats = ["1.0", # Original protocol 0
Guido van Rossumbc64e222003-01-28 16:34:19 +000047 "1.1", # Protocol 0 with INST added
Guido van Rossumf29d3d62003-01-27 22:47:53 +000048 "1.2", # Original protocol 1
49 "1.3", # Protocol 1 with BINFLOAT added
50 "2.0", # Protocol 2
Guido van Rossumf4169812008-03-17 22:56:06 +000051 "3.0", # Protocol 3
Guido van Rossumf29d3d62003-01-27 22:47:53 +000052 ] # Old format versions we can read
Guido van Rossumb72cf2d1997-04-09 17:32:51 +000053
Guido van Rossum99603b02007-07-20 00:22:32 +000054# This is the highest protocol number we know how to read.
Guido van Rossumf4169812008-03-17 22:56:06 +000055HIGHEST_PROTOCOL = 3
Tim Peters8587b3c2003-02-13 15:44:41 +000056
Guido van Rossum2e6a4b32007-05-04 19:56:22 +000057# The protocol we write by default. May be less than HIGHEST_PROTOCOL.
Guido van Rossumf4169812008-03-17 22:56:06 +000058# We intentionally write a protocol that Python 2.x cannot read;
59# there are too many issues with that.
60DEFAULT_PROTOCOL = 3
Guido van Rossum2e6a4b32007-05-04 19:56:22 +000061
Guido van Rossume0b90422003-01-28 03:17:21 +000062# Why use struct.pack() for pickling but marshal.loads() for
Tim Petersc0c12b52003-01-29 00:56:17 +000063# unpickling? struct.pack() is 40% faster than marshal.dumps(), but
Guido van Rossume0b90422003-01-28 03:17:21 +000064# marshal.loads() is twice as fast as struct.unpack()!
Guido van Rossumb72cf2d1997-04-09 17:32:51 +000065mloads = marshal.loads
Guido van Rossum0c891ce1995-03-14 15:09:05 +000066
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000067class PickleError(Exception):
Neal Norwitzefbb67b2002-05-30 12:12:04 +000068 """A common base class for the other pickling exceptions."""
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000069 pass
70
71class PicklingError(PickleError):
72 """This exception is raised when an unpicklable object is passed to the
73 dump() method.
74
75 """
76 pass
77
78class UnpicklingError(PickleError):
79 """This exception is raised when there is a problem unpickling an object,
80 such as a security violation.
81
82 Note that other exceptions may also be raised during unpickling, including
83 (but not necessarily limited to) AttributeError, EOFError, ImportError,
84 and IndexError.
85
86 """
87 pass
Guido van Rossum7849da81995-03-09 14:08:35 +000088
Tim Petersc0c12b52003-01-29 00:56:17 +000089# An instance of _Stop is raised by Unpickler.load_stop() in response to
90# the STOP opcode, passing the object that is the result of unpickling.
Guido van Rossumff871742000-12-13 18:11:56 +000091class _Stop(Exception):
92 def __init__(self, value):
93 self.value = value
94
Guido van Rossum533dbcf2003-01-28 17:55:05 +000095# Jython has PyStringMap; it's a dict subclass with string keys
Jeremy Hylton2b9d0291998-05-27 22:38:22 +000096try:
97 from org.python.core import PyStringMap
98except ImportError:
99 PyStringMap = None
100
Tim Peters22a449a2003-01-27 20:16:36 +0000101# Pickle opcodes. See pickletools.py for extensive docs. The listing
102# here is in kind-of alphabetical order of 1-character pickle code.
103# pickletools groups them by purpose.
Guido van Rossumdbb718f2001-09-21 19:22:34 +0000104
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000105MARK = b'(' # push special markobject on stack
106STOP = b'.' # every pickle ends with STOP
107POP = b'0' # discard topmost stack item
108POP_MARK = b'1' # discard stack top through topmost markobject
109DUP = b'2' # duplicate top stack item
110FLOAT = b'F' # push float object; decimal string argument
111INT = b'I' # push integer or bool; decimal string argument
112BININT = b'J' # push four-byte signed int
113BININT1 = b'K' # push 1-byte unsigned int
114LONG = b'L' # push long; decimal string argument
115BININT2 = b'M' # push 2-byte unsigned int
116NONE = b'N' # push None
117PERSID = b'P' # push persistent object; id is taken from string arg
118BINPERSID = b'Q' # " " " ; " " " " stack
119REDUCE = b'R' # apply callable to argtuple, both on stack
120STRING = b'S' # push string; NL-terminated string argument
121BINSTRING = b'T' # push string; counted binary string argument
122SHORT_BINSTRING= b'U' # " " ; " " " " < 256 bytes
123UNICODE = b'V' # push Unicode string; raw-unicode-escaped'd argument
124BINUNICODE = b'X' # " " " ; counted UTF-8 string argument
125APPEND = b'a' # append stack top to list below it
126BUILD = b'b' # call __setstate__ or __dict__.update()
127GLOBAL = b'c' # push self.find_class(modname, name); 2 string args
128DICT = b'd' # build a dict from stack items
129EMPTY_DICT = b'}' # push empty dict
130APPENDS = b'e' # extend list on stack by topmost stack slice
131GET = b'g' # push item from memo on stack; index is string arg
132BINGET = b'h' # " " " " " " ; " " 1-byte arg
133INST = b'i' # build & push class instance
134LONG_BINGET = b'j' # push item from memo on stack; index is 4-byte arg
135LIST = b'l' # build list from topmost stack items
136EMPTY_LIST = b']' # push empty list
137OBJ = b'o' # build & push class instance
138PUT = b'p' # store stack top in memo; index is string arg
139BINPUT = b'q' # " " " " " ; " " 1-byte arg
140LONG_BINPUT = b'r' # " " " " " ; " " 4-byte arg
141SETITEM = b's' # add key+value pair to dict
142TUPLE = b't' # build tuple from topmost stack items
143EMPTY_TUPLE = b')' # push empty tuple
144SETITEMS = b'u' # modify dict by adding topmost key+value pairs
145BINFLOAT = b'G' # push float; arg is 8-byte float encoding
Tim Peters22a449a2003-01-27 20:16:36 +0000146
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000147TRUE = b'I01\n' # not an opcode; see INT docs in pickletools.py
148FALSE = b'I00\n' # not an opcode; see INT docs in pickletools.py
Guido van Rossum77f6a652002-04-03 22:41:51 +0000149
Guido van Rossum586c9e82003-01-29 06:16:12 +0000150# Protocol 2
Guido van Rossum5a2d8f52003-01-27 21:44:25 +0000151
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000152PROTO = b'\x80' # identify pickle protocol
153NEWOBJ = b'\x81' # build object by applying cls.__new__ to argtuple
154EXT1 = b'\x82' # push object from extension registry; 1-byte index
155EXT2 = b'\x83' # ditto, but 2-byte index
156EXT4 = b'\x84' # ditto, but 4-byte index
157TUPLE1 = b'\x85' # build 1-tuple from stack top
158TUPLE2 = b'\x86' # build 2-tuple from two topmost stack items
159TUPLE3 = b'\x87' # build 3-tuple from three topmost stack items
160NEWTRUE = b'\x88' # push True
161NEWFALSE = b'\x89' # push False
162LONG1 = b'\x8a' # push long from < 256 bytes
163LONG4 = b'\x8b' # push really big long
Guido van Rossum5a2d8f52003-01-27 21:44:25 +0000164
Guido van Rossum44f0ea52003-01-28 04:14:51 +0000165_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
166
Guido van Rossumf4169812008-03-17 22:56:06 +0000167# Protocol 3 (Python 3.x)
168
169BINBYTES = b'B' # push bytes; counted binary string argument
170SHORT_BINBYTES = b'C' # " " ; " " " " < 256 bytes
Guido van Rossuma48061a1995-01-10 00:31:14 +0000171
Skip Montanaro23bafc62001-02-18 03:10:09 +0000172__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])
173
Guido van Rossum1be31752003-01-28 15:19:53 +0000174
175# Pickling machinery
176
Guido van Rossuma48061a1995-01-10 00:31:14 +0000177class Pickler:
178
Raymond Hettinger3489cad2004-12-05 05:20:42 +0000179 def __init__(self, file, protocol=None):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000180 """This takes a binary file for writing a pickle data stream.
181
182 All protocols now read and write bytes.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000183
Guido van Rossumcf117b02003-02-09 17:19:41 +0000184 The optional protocol argument tells the pickler to use the
185 given protocol; supported protocols are 0, 1, 2. The default
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000186 protocol is 2; it's been supported for many years now.
Guido van Rossumf29d3d62003-01-27 22:47:53 +0000187
188 Protocol 1 is more efficient than protocol 0; protocol 2 is
Guido van Rossum7eff63a2003-01-31 19:42:31 +0000189 more efficient than protocol 1.
Guido van Rossumf29d3d62003-01-27 22:47:53 +0000190
Guido van Rossum7eff63a2003-01-31 19:42:31 +0000191 Specifying a negative protocol version selects the highest
Tim Peters5bd2a792003-02-01 16:45:06 +0000192 protocol version supported. The higher the protocol used, the
193 more recent the version of Python needed to read the pickle
194 produced.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000195
196 The file parameter must have a write() method that accepts a single
197 string argument. It can thus be an open file object, a StringIO
198 object, or any other custom object that meets this interface.
199
200 """
Guido van Rossumcf117b02003-02-09 17:19:41 +0000201 if protocol is None:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000202 protocol = DEFAULT_PROTOCOL
Guido van Rossumcf117b02003-02-09 17:19:41 +0000203 if protocol < 0:
Tim Peters8587b3c2003-02-13 15:44:41 +0000204 protocol = HIGHEST_PROTOCOL
205 elif not 0 <= protocol <= HIGHEST_PROTOCOL:
206 raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000207 self.write = file.write
208 self.memo = {}
Guido van Rossumcf117b02003-02-09 17:19:41 +0000209 self.proto = int(protocol)
210 self.bin = protocol >= 1
Guido van Rossum5d9113d2003-01-29 17:58:45 +0000211 self.fast = 0
Guido van Rossuma48061a1995-01-10 00:31:14 +0000212
Fred Drake7f781c92002-05-01 20:33:53 +0000213 def clear_memo(self):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000214 """Clears the pickler's "memo".
215
216 The memo is the data structure that remembers which objects the
Tim Petersb377f8a2003-01-28 00:23:36 +0000217 pickler has already seen, so that shared or recursive objects are
218 pickled by reference and not by value. This method is useful when
219 re-using picklers.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000220
221 """
Fred Drake7f781c92002-05-01 20:33:53 +0000222 self.memo.clear()
223
Guido van Rossum3a41c612003-01-28 15:10:22 +0000224 def dump(self, obj):
Tim Peters5bd2a792003-02-01 16:45:06 +0000225 """Write a pickled representation of obj to the open file."""
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000226 if self.proto >= 2:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000227 self.write(PROTO + bytes([self.proto]))
Guido van Rossum3a41c612003-01-28 15:10:22 +0000228 self.save(obj)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000229 self.write(STOP)
Guido van Rossuma48061a1995-01-10 00:31:14 +0000230
Jeremy Hylton3422c992003-01-24 19:29:52 +0000231 def memoize(self, obj):
232 """Store an object in the memo."""
233
Tim Peterse46b73f2003-01-27 21:22:10 +0000234 # The Pickler memo is a dictionary mapping object ids to 2-tuples
235 # that contain the Unpickler memo key and the object being memoized.
236 # The memo key is written to the pickle and will become
Jeremy Hylton3422c992003-01-24 19:29:52 +0000237 # the key in the Unpickler's memo. The object is stored in the
Tim Peterse46b73f2003-01-27 21:22:10 +0000238 # Pickler memo so that transient objects are kept alive during
239 # pickling.
Jeremy Hylton3422c992003-01-24 19:29:52 +0000240
Tim Peterse46b73f2003-01-27 21:22:10 +0000241 # The use of the Unpickler memo length as the memo key is just a
242 # convention. The only requirement is that the memo values be unique.
243 # But there appears no advantage to any other scheme, and this
Tim Peterscbd0a322003-01-28 00:24:43 +0000244 # scheme allows the Unpickler memo to be implemented as a plain (but
Tim Peterse46b73f2003-01-27 21:22:10 +0000245 # growable) array, indexed by memo key.
Guido van Rossum5d9113d2003-01-29 17:58:45 +0000246 if self.fast:
247 return
Guido van Rossum9b40e802003-01-30 06:37:41 +0000248 assert id(obj) not in self.memo
Jeremy Hylton3422c992003-01-24 19:29:52 +0000249 memo_len = len(self.memo)
250 self.write(self.put(memo_len))
Tim Peters518df0d2003-01-28 01:00:38 +0000251 self.memo[id(obj)] = memo_len, obj
Jeremy Hylton3422c992003-01-24 19:29:52 +0000252
Tim Petersbb38e302003-01-27 21:25:41 +0000253 # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.
Guido van Rossum5c938d02003-01-28 03:03:08 +0000254 def put(self, i, pack=struct.pack):
Tim Petersc32d8242001-04-10 02:48:53 +0000255 if self.bin:
Tim Petersc32d8242001-04-10 02:48:53 +0000256 if i < 256:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000257 return BINPUT + bytes([i])
Guido van Rossum5c938d02003-01-28 03:03:08 +0000258 else:
259 return LONG_BINPUT + pack("<i", i)
Guido van Rossuma48061a1995-01-10 00:31:14 +0000260
Guido van Rossum39478e82007-08-27 17:23:59 +0000261 return PUT + repr(i).encode("ascii") + b'\n'
Guido van Rossuma48061a1995-01-10 00:31:14 +0000262
Tim Petersbb38e302003-01-27 21:25:41 +0000263 # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
Guido van Rossum5c938d02003-01-28 03:03:08 +0000264 def get(self, i, pack=struct.pack):
Tim Petersc32d8242001-04-10 02:48:53 +0000265 if self.bin:
Tim Petersc32d8242001-04-10 02:48:53 +0000266 if i < 256:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000267 return BINGET + bytes([i])
Guido van Rossum5c938d02003-01-28 03:03:08 +0000268 else:
269 return LONG_BINGET + pack("<i", i)
Guido van Rossuma48061a1995-01-10 00:31:14 +0000270
Guido van Rossum39478e82007-08-27 17:23:59 +0000271 return GET + repr(i).encode("ascii") + b'\n'
Tim Peters2344fae2001-01-15 00:50:52 +0000272
Guido van Rossumac5b5d22003-01-28 22:01:16 +0000273 def save(self, obj):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000274 # Check for persistent id (defined by a subclass)
Guido van Rossum3a41c612003-01-28 15:10:22 +0000275 pid = self.persistent_id(obj)
Guido van Rossumbc64e222003-01-28 16:34:19 +0000276 if pid:
Jeremy Hylton5e0f4e72002-11-13 22:01:27 +0000277 self.save_pers(pid)
278 return
Guido van Rossuma48061a1995-01-10 00:31:14 +0000279
Guido van Rossumbc64e222003-01-28 16:34:19 +0000280 # Check the memo
281 x = self.memo.get(id(obj))
282 if x:
283 self.write(self.get(x[0]))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000284 return
285
Guido van Rossumbc64e222003-01-28 16:34:19 +0000286 # Check the type dispatch table
Guido van Rossum3a41c612003-01-28 15:10:22 +0000287 t = type(obj)
Guido van Rossumbc64e222003-01-28 16:34:19 +0000288 f = self.dispatch.get(t)
289 if f:
290 f(self, obj) # Call unbound method with explicit self
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000291 return
292
Guido van Rossumbc64e222003-01-28 16:34:19 +0000293 # Check for a class with a custom metaclass; treat as regular class
Tim Petersb32a8312003-01-28 00:48:09 +0000294 try:
Guido van Rossum13257902007-06-07 23:15:56 +0000295 issc = issubclass(t, type)
Guido van Rossumbc64e222003-01-28 16:34:19 +0000296 except TypeError: # t is not a class (old Boost; see SF #502085)
Tim Petersb32a8312003-01-28 00:48:09 +0000297 issc = 0
298 if issc:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000299 self.save_global(obj)
Tim Petersb32a8312003-01-28 00:48:09 +0000300 return
301
Alexandre Vassalottif7fa63d2008-05-11 08:55:36 +0000302 # Check copyreg.dispatch_table
Guido van Rossumbc64e222003-01-28 16:34:19 +0000303 reduce = dispatch_table.get(t)
Guido van Rossumc53f0092003-02-18 22:05:12 +0000304 if reduce:
305 rv = reduce(obj)
306 else:
307 # Check for a __reduce_ex__ method, fall back to __reduce__
308 reduce = getattr(obj, "__reduce_ex__", None)
309 if reduce:
310 rv = reduce(self.proto)
311 else:
312 reduce = getattr(obj, "__reduce__", None)
313 if reduce:
314 rv = reduce()
315 else:
316 raise PicklingError("Can't pickle %r object: %r" %
317 (t.__name__, obj))
Tim Petersb32a8312003-01-28 00:48:09 +0000318
Guido van Rossumbc64e222003-01-28 16:34:19 +0000319 # Check for string returned by reduce(), meaning "save as global"
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000320 if isinstance(rv, str):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000321 self.save_global(obj, rv)
Tim Petersb32a8312003-01-28 00:48:09 +0000322 return
323
Guido van Rossumbc64e222003-01-28 16:34:19 +0000324 # Assert that reduce() returned a tuple
Guido van Rossum13257902007-06-07 23:15:56 +0000325 if not isinstance(rv, tuple):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000326 raise PicklingError("%s must return string or tuple" % reduce)
Tim Petersb32a8312003-01-28 00:48:09 +0000327
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000328 # Assert that it returned an appropriately sized tuple
Guido van Rossumbc64e222003-01-28 16:34:19 +0000329 l = len(rv)
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000330 if not (2 <= l <= 5):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000331 raise PicklingError("Tuple returned by %s must have "
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000332 "two to five elements" % reduce)
Tim Petersb32a8312003-01-28 00:48:09 +0000333
Guido van Rossumbc64e222003-01-28 16:34:19 +0000334 # Save the reduce() output and finally memoize the object
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000335 self.save_reduce(obj=obj, *rv)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000336
Guido van Rossum3a41c612003-01-28 15:10:22 +0000337 def persistent_id(self, obj):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000338 # This exists so a subclass can override it
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000339 return None
340
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000341 def save_pers(self, pid):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000342 # Save a persistent id reference
Tim Petersbd1cdb92003-01-28 01:03:10 +0000343 if self.bin:
Jeremy Hylton5e0f4e72002-11-13 22:01:27 +0000344 self.save(pid)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000345 self.write(BINPERSID)
Tim Petersbd1cdb92003-01-28 01:03:10 +0000346 else:
Guido van Rossum39478e82007-08-27 17:23:59 +0000347 self.write(PERSID + str(pid).encode("ascii") + b'\n')
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000348
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000349 def save_reduce(self, func, args, state=None,
350 listitems=None, dictitems=None, obj=None):
Jeremy Hyltone3a565e2003-06-29 16:59:59 +0000351 # This API is called by some subclasses
Guido van Rossumbc64e222003-01-28 16:34:19 +0000352
353 # Assert that args is a tuple or None
Guido van Rossum13257902007-06-07 23:15:56 +0000354 if not isinstance(args, tuple):
Raymond Hettingera6b45cc2004-12-07 07:05:57 +0000355 raise PicklingError("args from reduce() should be a tuple")
Guido van Rossumbc64e222003-01-28 16:34:19 +0000356
357 # Assert that func is callable
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000358 if not hasattr(func, '__call__'):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000359 raise PicklingError("func from reduce should be callable")
360
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000361 save = self.save
Guido van Rossumbc64e222003-01-28 16:34:19 +0000362 write = self.write
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000363
Guido van Rossumd053b4b2003-01-31 16:51:45 +0000364 # Protocol 2 special case: if func's name is __newobj__, use NEWOBJ
365 if self.proto >= 2 and getattr(func, "__name__", "") == "__newobj__":
366 # A __reduce__ implementation can direct protocol 2 to
367 # use the more efficient NEWOBJ opcode, while still
368 # allowing protocol 0 and 1 to work normally. For this to
369 # work, the function returned by __reduce__ should be
370 # called __newobj__, and its first argument should be a
371 # new-style class. The implementation for __newobj__
372 # should be as follows, although pickle has no way to
373 # verify this:
374 #
375 # def __newobj__(cls, *args):
376 # return cls.__new__(cls, *args)
377 #
378 # Protocols 0 and 1 will pickle a reference to __newobj__,
379 # while protocol 2 (and above) will pickle a reference to
380 # cls, the remaining args tuple, and the NEWOBJ code,
381 # which calls cls.__new__(cls, *args) at unpickling time
382 # (see load_newobj below). If __reduce__ returns a
383 # three-tuple, the state from the third tuple item will be
384 # pickled regardless of the protocol, calling __setstate__
385 # at unpickling time (see load_build below).
386 #
387 # Note that no standard __newobj__ implementation exists;
388 # you have to provide your own. This is to enforce
389 # compatibility with Python 2.2 (pickles written using
390 # protocol 0 or 1 in Python 2.3 should be unpicklable by
391 # Python 2.2).
392 cls = args[0]
393 if not hasattr(cls, "__new__"):
394 raise PicklingError(
395 "args[0] from __newobj__ args has no __new__")
Guido van Rossumf7f45172003-01-31 17:17:49 +0000396 if obj is not None and cls is not obj.__class__:
397 raise PicklingError(
398 "args[0] from __newobj__ args has the wrong class")
Guido van Rossumd053b4b2003-01-31 16:51:45 +0000399 args = args[1:]
400 save(cls)
401 save(args)
402 write(NEWOBJ)
403 else:
404 save(func)
405 save(args)
406 write(REDUCE)
Tim Peters2344fae2001-01-15 00:50:52 +0000407
Guido van Rossumf7f45172003-01-31 17:17:49 +0000408 if obj is not None:
409 self.memoize(obj)
410
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000411 # More new special cases (that work with older protocols as
412 # well): when __reduce__ returns a tuple with 4 or 5 items,
413 # the 4th and 5th item should be iterators that provide list
414 # items and dict items (as (key, value) tuples), or None.
415
416 if listitems is not None:
417 self._batch_appends(listitems)
418
419 if dictitems is not None:
420 self._batch_setitems(dictitems)
421
Tim Petersc32d8242001-04-10 02:48:53 +0000422 if state is not None:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000423 save(state)
424 write(BUILD)
425
Guido van Rossumbc64e222003-01-28 16:34:19 +0000426 # Methods below this point are dispatched through the dispatch table
427
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000428 dispatch = {}
429
Guido van Rossum3a41c612003-01-28 15:10:22 +0000430 def save_none(self, obj):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000431 self.write(NONE)
Guido van Rossum13257902007-06-07 23:15:56 +0000432 dispatch[type(None)] = save_none
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000433
Guido van Rossum3a41c612003-01-28 15:10:22 +0000434 def save_bool(self, obj):
Guido van Rossum7d97d312003-01-28 04:25:27 +0000435 if self.proto >= 2:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000436 self.write(obj and NEWTRUE or NEWFALSE)
Guido van Rossum7d97d312003-01-28 04:25:27 +0000437 else:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000438 self.write(obj and TRUE or FALSE)
Guido van Rossum77f6a652002-04-03 22:41:51 +0000439 dispatch[bool] = save_bool
440
Guido van Rossum3a41c612003-01-28 15:10:22 +0000441 def save_int(self, obj, pack=struct.pack):
Tim Petersc32d8242001-04-10 02:48:53 +0000442 if self.bin:
Tim Peters44714002001-04-10 05:02:52 +0000443 # If the int is small enough to fit in a signed 4-byte 2's-comp
444 # format, we can store it more efficiently than the general
445 # case.
Guido van Rossum5c938d02003-01-28 03:03:08 +0000446 # First one- and two-byte unsigned ints:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000447 if obj >= 0:
448 if obj <= 0xff:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000449 self.write(BININT1 + bytes([obj]))
Guido van Rossum5c938d02003-01-28 03:03:08 +0000450 return
Guido van Rossum3a41c612003-01-28 15:10:22 +0000451 if obj <= 0xffff:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000452 self.write(BININT2 + bytes([obj&0xff, obj>>8]))
Guido van Rossum5c938d02003-01-28 03:03:08 +0000453 return
454 # Next check for 4-byte signed ints:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000455 high_bits = obj >> 31 # note that Python shift sign-extends
Tim Petersd95c2df2003-01-28 03:41:54 +0000456 if high_bits == 0 or high_bits == -1:
Tim Peters44714002001-04-10 05:02:52 +0000457 # All high bits are copies of bit 2**31, so the value
458 # fits in a 4-byte signed int.
Guido van Rossum3a41c612003-01-28 15:10:22 +0000459 self.write(BININT + pack("<i", obj))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000460 return
Tim Peters44714002001-04-10 05:02:52 +0000461 # Text pickle, or int too big to fit in signed 4-byte format.
Guido van Rossum39478e82007-08-27 17:23:59 +0000462 self.write(INT + repr(obj).encode("ascii") + b'\n')
Guido van Rossumddefaf32007-01-14 03:31:43 +0000463 # XXX save_int is merged into save_long
Guido van Rossum13257902007-06-07 23:15:56 +0000464 # dispatch[int] = save_int
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000465
Guido van Rossum3a41c612003-01-28 15:10:22 +0000466 def save_long(self, obj, pack=struct.pack):
Guido van Rossumddefaf32007-01-14 03:31:43 +0000467 if self.bin:
468 # If the int is small enough to fit in a signed 4-byte 2's-comp
469 # format, we can store it more efficiently than the general
470 # case.
471 # First one- and two-byte unsigned ints:
472 if obj >= 0:
473 if obj <= 0xff:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000474 self.write(BININT1 + bytes([obj]))
Guido van Rossumddefaf32007-01-14 03:31:43 +0000475 return
476 if obj <= 0xffff:
Guido van Rossumcfe5f202007-05-08 21:26:54 +0000477 self.write(BININT2 + bytes([obj&0xff, obj>>8]))
Guido van Rossumddefaf32007-01-14 03:31:43 +0000478 return
479 # Next check for 4-byte signed ints:
480 high_bits = obj >> 31 # note that Python shift sign-extends
481 if high_bits == 0 or high_bits == -1:
482 # All high bits are copies of bit 2**31, so the value
483 # fits in a 4-byte signed int.
484 self.write(BININT + pack("<i", obj))
485 return
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000486 if self.proto >= 2:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000487 encoded = encode_long(obj)
488 n = len(encoded)
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000489 if n < 256:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000490 self.write(LONG1 + bytes([n]) + encoded)
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000491 else:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000492 self.write(LONG4 + pack("<i", n) + encoded)
Tim Petersee1a53c2003-02-02 02:57:53 +0000493 return
Guido van Rossum39478e82007-08-27 17:23:59 +0000494 self.write(LONG + repr(obj).encode("ascii") + b'\n')
Guido van Rossum13257902007-06-07 23:15:56 +0000495 dispatch[int] = save_long
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000496
Guido van Rossum3a41c612003-01-28 15:10:22 +0000497 def save_float(self, obj, pack=struct.pack):
Guido van Rossumd3703791998-10-22 20:15:36 +0000498 if self.bin:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000499 self.write(BINFLOAT + pack('>d', obj))
Guido van Rossumd3703791998-10-22 20:15:36 +0000500 else:
Guido van Rossum39478e82007-08-27 17:23:59 +0000501 self.write(FLOAT + repr(obj).encode("ascii") + b'\n')
Guido van Rossum13257902007-06-07 23:15:56 +0000502 dispatch[float] = save_float
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000503
Guido van Rossumf4169812008-03-17 22:56:06 +0000504 def save_bytes(self, obj, pack=struct.pack):
505 if self.proto < 3:
506 self.save_reduce(bytes, (list(obj),))
507 return
508 n = len(obj)
509 if n < 256:
510 self.write(SHORT_BINBYTES + bytes([n]) + bytes(obj))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000511 else:
Guido van Rossumf4169812008-03-17 22:56:06 +0000512 self.write(BINBYTES + pack("<i", n) + bytes(obj))
Guido van Rossum3a41c612003-01-28 15:10:22 +0000513 self.memoize(obj)
Guido van Rossumf4169812008-03-17 22:56:06 +0000514 dispatch[bytes] = save_bytes
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000515
Guido van Rossumf4169812008-03-17 22:56:06 +0000516 def save_str(self, obj, pack=struct.pack):
Tim Petersc32d8242001-04-10 02:48:53 +0000517 if self.bin:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000518 encoded = obj.encode('utf-8')
519 n = len(encoded)
520 self.write(BINUNICODE + pack("<i", n) + encoded)
Guido van Rossumb5f2f1b2000-03-10 23:20:09 +0000521 else:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000522 obj = obj.replace("\\", "\\u005c")
523 obj = obj.replace("\n", "\\u000a")
Guido van Rossum1255ed62007-05-04 20:30:19 +0000524 self.write(UNICODE + bytes(obj.encode('raw-unicode-escape')) +
525 b'\n')
Guido van Rossum3a41c612003-01-28 15:10:22 +0000526 self.memoize(obj)
Guido van Rossumf4169812008-03-17 22:56:06 +0000527 dispatch[str] = save_str
Tim Peters658cba62001-02-09 20:06:00 +0000528
Guido van Rossum3a41c612003-01-28 15:10:22 +0000529 def save_tuple(self, obj):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000530 write = self.write
Guido van Rossum44f0ea52003-01-28 04:14:51 +0000531 proto = self.proto
532
Guido van Rossum3a41c612003-01-28 15:10:22 +0000533 n = len(obj)
Tim Peters1d63c9f2003-02-02 20:29:39 +0000534 if n == 0:
535 if proto:
536 write(EMPTY_TUPLE)
537 else:
538 write(MARK + TUPLE)
Tim Petersd97da802003-01-28 05:48:29 +0000539 return
540
541 save = self.save
542 memo = self.memo
543 if n <= 3 and proto >= 2:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000544 for element in obj:
Tim Petersd97da802003-01-28 05:48:29 +0000545 save(element)
546 # Subtle. Same as in the big comment below.
Guido van Rossum3a41c612003-01-28 15:10:22 +0000547 if id(obj) in memo:
548 get = self.get(memo[id(obj)][0])
Tim Petersd97da802003-01-28 05:48:29 +0000549 write(POP * n + get)
550 else:
551 write(_tuplesize2code[n])
Guido van Rossum3a41c612003-01-28 15:10:22 +0000552 self.memoize(obj)
Tim Petersd97da802003-01-28 05:48:29 +0000553 return
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000554
Tim Peters1d63c9f2003-02-02 20:29:39 +0000555 # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple
Tim Petersff57bff2003-01-28 05:34:53 +0000556 # has more than 3 elements.
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000557 write(MARK)
Guido van Rossum3a41c612003-01-28 15:10:22 +0000558 for element in obj:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000559 save(element)
560
Tim Peters1d63c9f2003-02-02 20:29:39 +0000561 if id(obj) in memo:
Tim Petersf558da02003-01-28 02:09:55 +0000562 # Subtle. d was not in memo when we entered save_tuple(), so
563 # the process of saving the tuple's elements must have saved
564 # the tuple itself: the tuple is recursive. The proper action
565 # now is to throw away everything we put on the stack, and
566 # simply GET the tuple (it's already constructed). This check
567 # could have been done in the "for element" loop instead, but
568 # recursive tuples are a rare thing.
Guido van Rossum3a41c612003-01-28 15:10:22 +0000569 get = self.get(memo[id(obj)][0])
Guido van Rossum44f0ea52003-01-28 04:14:51 +0000570 if proto:
Tim Petersf558da02003-01-28 02:09:55 +0000571 write(POP_MARK + get)
572 else: # proto 0 -- POP_MARK not available
Tim Petersd97da802003-01-28 05:48:29 +0000573 write(POP * (n+1) + get)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000574 return
575
Tim Peters1d63c9f2003-02-02 20:29:39 +0000576 # No recursion.
Tim Peters518df0d2003-01-28 01:00:38 +0000577 self.write(TUPLE)
Tim Peters1d63c9f2003-02-02 20:29:39 +0000578 self.memoize(obj)
Jeremy Hylton3422c992003-01-24 19:29:52 +0000579
Guido van Rossum13257902007-06-07 23:15:56 +0000580 dispatch[tuple] = save_tuple
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000581
Tim Petersa6ae9a22003-01-28 16:58:41 +0000582 # save_empty_tuple() isn't used by anything in Python 2.3. However, I
583 # found a Pickler subclass in Zope3 that calls it, so it's not harmless
584 # to remove it.
Guido van Rossum3a41c612003-01-28 15:10:22 +0000585 def save_empty_tuple(self, obj):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000586 self.write(EMPTY_TUPLE)
587
Guido van Rossum3a41c612003-01-28 15:10:22 +0000588 def save_list(self, obj):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000589 write = self.write
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000590
Tim Petersc32d8242001-04-10 02:48:53 +0000591 if self.bin:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000592 write(EMPTY_LIST)
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000593 else: # proto 0 -- can't use EMPTY_LIST
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000594 write(MARK + LIST)
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000595
596 self.memoize(obj)
Alexandre Vassalottic7db1d62008-05-14 21:57:18 +0000597 self._batch_appends(obj)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000598
Guido van Rossum13257902007-06-07 23:15:56 +0000599 dispatch[list] = save_list
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000600
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000601 _BATCHSIZE = 1000
602
603 def _batch_appends(self, items):
604 # Helper to batch up APPENDS sequences
605 save = self.save
606 write = self.write
607
608 if not self.bin:
609 for x in items:
610 save(x)
611 write(APPEND)
612 return
613
Alexandre Vassalottic7db1d62008-05-14 21:57:18 +0000614 items = iter(items)
Guido van Rossum805365e2007-05-07 22:24:25 +0000615 r = range(self._BATCHSIZE)
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000616 while items is not None:
617 tmp = []
618 for i in r:
619 try:
Georg Brandla18af4e2007-04-21 15:47:16 +0000620 x = next(items)
Guido van Rossum5aac4e62003-02-06 22:57:00 +0000621 tmp.append(x)
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000622 except StopIteration:
623 items = None
624 break
625 n = len(tmp)
626 if n > 1:
627 write(MARK)
628 for x in tmp:
629 save(x)
630 write(APPENDS)
631 elif n:
632 save(tmp[0])
633 write(APPEND)
634 # else tmp is empty, and we're done
635
Guido van Rossum3a41c612003-01-28 15:10:22 +0000636 def save_dict(self, obj):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000637 write = self.write
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000638
Tim Petersc32d8242001-04-10 02:48:53 +0000639 if self.bin:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000640 write(EMPTY_DICT)
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000641 else: # proto 0 -- can't use EMPTY_DICT
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000642 write(MARK + DICT)
643
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000644 self.memoize(obj)
Alexandre Vassalottic7db1d62008-05-14 21:57:18 +0000645 self._batch_setitems(obj.items())
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000646
Guido van Rossum13257902007-06-07 23:15:56 +0000647 dispatch[dict] = save_dict
648 if PyStringMap is not None:
Jeremy Hylton2b9d0291998-05-27 22:38:22 +0000649 dispatch[PyStringMap] = save_dict
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000650
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000651 def _batch_setitems(self, items):
652 # Helper to batch up SETITEMS sequences; proto >= 1 only
653 save = self.save
654 write = self.write
655
656 if not self.bin:
657 for k, v in items:
658 save(k)
659 save(v)
660 write(SETITEM)
661 return
662
Alexandre Vassalottic7db1d62008-05-14 21:57:18 +0000663 items = iter(items)
Guido van Rossum805365e2007-05-07 22:24:25 +0000664 r = range(self._BATCHSIZE)
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000665 while items is not None:
666 tmp = []
667 for i in r:
668 try:
Georg Brandla18af4e2007-04-21 15:47:16 +0000669 tmp.append(next(items))
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000670 except StopIteration:
671 items = None
672 break
673 n = len(tmp)
674 if n > 1:
675 write(MARK)
676 for k, v in tmp:
677 save(k)
678 save(v)
679 write(SETITEMS)
680 elif n:
681 k, v = tmp[0]
682 save(k)
683 save(v)
684 write(SETITEM)
685 # else tmp is empty, and we're done
686
Guido van Rossum255f3ee2003-01-29 06:14:11 +0000687 def save_global(self, obj, name=None, pack=struct.pack):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000688 write = self.write
689 memo = self.memo
690
Tim Petersc32d8242001-04-10 02:48:53 +0000691 if name is None:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000692 name = obj.__name__
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000693
Jeremy Hylton4f0dcc92003-01-31 18:33:18 +0000694 module = getattr(obj, "__module__", None)
695 if module is None:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000696 module = whichmodule(obj, name)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000697
Guido van Rossumb0a98e92001-08-17 18:49:52 +0000698 try:
699 __import__(module)
700 mod = sys.modules[module]
701 klass = getattr(mod, name)
702 except (ImportError, KeyError, AttributeError):
703 raise PicklingError(
704 "Can't pickle %r: it's not found as %s.%s" %
Guido van Rossum3a41c612003-01-28 15:10:22 +0000705 (obj, module, name))
Guido van Rossumb0a98e92001-08-17 18:49:52 +0000706 else:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000707 if klass is not obj:
Guido van Rossumb0a98e92001-08-17 18:49:52 +0000708 raise PicklingError(
709 "Can't pickle %r: it's not the same object as %s.%s" %
Guido van Rossum3a41c612003-01-28 15:10:22 +0000710 (obj, module, name))
Guido van Rossumb0a98e92001-08-17 18:49:52 +0000711
Guido van Rossum255f3ee2003-01-29 06:14:11 +0000712 if self.proto >= 2:
Guido van Rossumd4b920c2003-02-04 01:54:49 +0000713 code = _extension_registry.get((module, name))
Guido van Rossum255f3ee2003-01-29 06:14:11 +0000714 if code:
715 assert code > 0
716 if code <= 0xff:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000717 write(EXT1 + bytes([code]))
Guido van Rossum255f3ee2003-01-29 06:14:11 +0000718 elif code <= 0xffff:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000719 write(EXT2 + bytes([code&0xff, code>>8]))
Guido van Rossum255f3ee2003-01-29 06:14:11 +0000720 else:
721 write(EXT4 + pack("<i", code))
722 return
723
Guido van Rossum39478e82007-08-27 17:23:59 +0000724 write(GLOBAL + bytes(module, "utf-8") + b'\n' +
725 bytes(name, "utf-8") + b'\n')
Guido van Rossum3a41c612003-01-28 15:10:22 +0000726 self.memoize(obj)
Tim Peters3b769832003-01-28 03:51:36 +0000727
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000728 dispatch[FunctionType] = save_global
729 dispatch[BuiltinFunctionType] = save_global
Guido van Rossum13257902007-06-07 23:15:56 +0000730 dispatch[type] = save_global
Guido van Rossum0c891ce1995-03-14 15:09:05 +0000731
Guido van Rossum1be31752003-01-28 15:19:53 +0000732# Pickling helpers
Guido van Rossuma48061a1995-01-10 00:31:14 +0000733
Guido van Rossum5ed5c4c1997-09-03 00:23:54 +0000734def _keep_alive(x, memo):
735 """Keeps a reference to the object x in the memo.
736
737 Because we remember objects by their id, we have
738 to assure that possibly temporary objects are kept
739 alive by referencing them.
740 We store a reference at the id of the memo, which should
741 normally not be used unless someone tries to deepcopy
742 the memo itself...
743 """
744 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000745 memo[id(memo)].append(x)
Guido van Rossum5ed5c4c1997-09-03 00:23:54 +0000746 except KeyError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000747 # aha, this is the first one :-)
748 memo[id(memo)]=[x]
Guido van Rossum5ed5c4c1997-09-03 00:23:54 +0000749
750
Tim Petersc0c12b52003-01-29 00:56:17 +0000751# A cache for whichmodule(), mapping a function object to the name of
752# the module in which the function was found.
753
Jeremy Hyltonf0cfdf72002-09-19 23:00:12 +0000754classmap = {} # called classmap for backwards compatibility
Guido van Rossuma48061a1995-01-10 00:31:14 +0000755
Jeremy Hyltonf0cfdf72002-09-19 23:00:12 +0000756def whichmodule(func, funcname):
757 """Figure out the module in which a function occurs.
Guido van Rossuma48061a1995-01-10 00:31:14 +0000758
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000759 Search sys.modules for the module.
760 Cache in classmap.
761 Return a module name.
Tim Petersc0c12b52003-01-29 00:56:17 +0000762 If the function cannot be found, return "__main__".
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000763 """
Jeremy Hylton4f0dcc92003-01-31 18:33:18 +0000764 # Python functions should always get an __module__ from their globals.
765 mod = getattr(func, "__module__", None)
766 if mod is not None:
767 return mod
Jeremy Hyltonf0cfdf72002-09-19 23:00:12 +0000768 if func in classmap:
769 return classmap[func]
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000770
Guido van Rossum634e53f2007-02-26 07:07:02 +0000771 for name, module in list(sys.modules.items()):
Jeremy Hyltonf0cfdf72002-09-19 23:00:12 +0000772 if module is None:
Jeremy Hylton065a5ab2002-09-19 22:57:26 +0000773 continue # skip dummy package entries
Jeremy Hyltoncc1fccb2003-02-06 16:23:01 +0000774 if name != '__main__' and getattr(module, funcname, None) is func:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000775 break
776 else:
777 name = '__main__'
Jeremy Hyltonf0cfdf72002-09-19 23:00:12 +0000778 classmap[func] = name
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000779 return name
Guido van Rossuma48061a1995-01-10 00:31:14 +0000780
781
Guido van Rossum1be31752003-01-28 15:19:53 +0000782# Unpickling machinery
783
Guido van Rossuma48061a1995-01-10 00:31:14 +0000784class Unpickler:
785
Guido van Rossumf4169812008-03-17 22:56:06 +0000786 def __init__(self, file, *, encoding="ASCII", errors="strict"):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000787 """This takes a binary file for reading a pickle data stream.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000788
Tim Peters5bd2a792003-02-01 16:45:06 +0000789 The protocol version of the pickle is detected automatically, so no
790 proto argument is needed.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000791
Guido van Rossumfeea0782007-10-10 18:00:50 +0000792 The file-like object must have two methods, a read() method
793 that takes an integer argument, and a readline() method that
794 requires no arguments. Both methods should return bytes.
795 Thus file-like object can be a binary file object opened for
796 reading, a BytesIO object, or any other custom object that
797 meets this interface.
Guido van Rossumf4169812008-03-17 22:56:06 +0000798
799 Optional keyword arguments are encoding and errors, which are
800 used to decode 8-bit string instances pickled by Python 2.x.
801 These default to 'ASCII' and 'strict', respectively.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000802 """
Guido van Rossumfeea0782007-10-10 18:00:50 +0000803 self.readline = file.readline
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000804 self.read = file.read
805 self.memo = {}
Guido van Rossumf4169812008-03-17 22:56:06 +0000806 self.encoding = encoding
807 self.errors = errors
Guido van Rossuma48061a1995-01-10 00:31:14 +0000808
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000809 def load(self):
Guido van Rossum3a41c612003-01-28 15:10:22 +0000810 """Read a pickled object representation from the open file.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000811
Guido van Rossum3a41c612003-01-28 15:10:22 +0000812 Return the reconstituted object hierarchy specified in the file.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000813 """
Jeremy Hylton20747fa2001-11-09 16:15:04 +0000814 self.mark = object() # any new unique object
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000815 self.stack = []
816 self.append = self.stack.append
817 read = self.read
818 dispatch = self.dispatch
819 try:
820 while 1:
821 key = read(1)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000822 if not key:
823 raise EOFError
Guido van Rossum98297ee2007-11-06 21:34:58 +0000824 assert isinstance(key, bytes_types)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000825 dispatch[key[0]](self)
Guido van Rossumb940e112007-01-10 16:19:56 +0000826 except _Stop as stopinst:
Guido van Rossumff871742000-12-13 18:11:56 +0000827 return stopinst.value
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000828
Tim Petersc23d18a2003-01-28 01:41:51 +0000829 # Return largest index k such that self.stack[k] is self.mark.
830 # If the stack doesn't contain a mark, eventually raises IndexError.
831 # This could be sped by maintaining another stack, of indices at which
832 # the mark appears. For that matter, the latter stack would suffice,
833 # and we wouldn't need to push mark objects on self.stack at all.
834 # Doing so is probably a good thing, though, since if the pickle is
835 # corrupt (or hostile) we may get a clue from finding self.mark embedded
836 # in unpickled objects.
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000837 def marker(self):
838 stack = self.stack
839 mark = self.mark
840 k = len(stack)-1
841 while stack[k] is not mark: k = k-1
842 return k
843
844 dispatch = {}
845
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000846 def load_proto(self):
847 proto = ord(self.read(1))
Guido van Rossumf4169812008-03-17 22:56:06 +0000848 if not 0 <= proto <= HIGHEST_PROTOCOL:
Guido van Rossum26d95c32007-08-27 23:18:54 +0000849 raise ValueError("unsupported pickle protocol: %d" % proto)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000850 dispatch[PROTO[0]] = load_proto
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000851
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000852 def load_persid(self):
853 pid = self.readline()[:-1]
854 self.append(self.persistent_load(pid))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000855 dispatch[PERSID[0]] = load_persid
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000856
857 def load_binpersid(self):
Raymond Hettinger46ac8eb2002-06-30 03:39:14 +0000858 pid = self.stack.pop()
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000859 self.append(self.persistent_load(pid))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000860 dispatch[BINPERSID[0]] = load_binpersid
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000861
862 def load_none(self):
863 self.append(None)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000864 dispatch[NONE[0]] = load_none
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000865
Guido van Rossum7d97d312003-01-28 04:25:27 +0000866 def load_false(self):
867 self.append(False)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000868 dispatch[NEWFALSE[0]] = load_false
Guido van Rossum7d97d312003-01-28 04:25:27 +0000869
870 def load_true(self):
871 self.append(True)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000872 dispatch[NEWTRUE[0]] = load_true
Guido van Rossum7d97d312003-01-28 04:25:27 +0000873
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000874 def load_int(self):
Tim Peters19ef62d2001-08-28 22:21:18 +0000875 data = self.readline()
Guido van Rossume2763392002-04-05 19:30:08 +0000876 if data == FALSE[1:]:
877 val = False
878 elif data == TRUE[1:]:
879 val = True
880 else:
881 try:
882 val = int(data)
883 except ValueError:
Guido van Rossume2a383d2007-01-15 16:59:06 +0000884 val = int(data)
Guido van Rossume2763392002-04-05 19:30:08 +0000885 self.append(val)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000886 dispatch[INT[0]] = load_int
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000887
888 def load_binint(self):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000889 self.append(mloads(b'i' + self.read(4)))
890 dispatch[BININT[0]] = load_binint
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000891
892 def load_binint1(self):
Tim Petersbbf63cd2003-01-27 21:15:36 +0000893 self.append(ord(self.read(1)))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000894 dispatch[BININT1[0]] = load_binint1
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000895
896 def load_binint2(self):
Guido van Rossumcfe5f202007-05-08 21:26:54 +0000897 self.append(mloads(b'i' + self.read(2) + b'\000\000'))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000898 dispatch[BININT2[0]] = load_binint2
Tim Peters2344fae2001-01-15 00:50:52 +0000899
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000900 def load_long(self):
Guido van Rossumfeea0782007-10-10 18:00:50 +0000901 val = self.readline()[:-1].decode("ascii")
902 self.append(int(val, 0))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000903 dispatch[LONG[0]] = load_long
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000904
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000905 def load_long1(self):
906 n = ord(self.read(1))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000907 data = self.read(n)
908 self.append(decode_long(data))
909 dispatch[LONG1[0]] = load_long1
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000910
911 def load_long4(self):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000912 n = mloads(b'i' + self.read(4))
913 data = self.read(n)
914 self.append(decode_long(data))
915 dispatch[LONG4[0]] = load_long4
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000916
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000917 def load_float(self):
Guido van Rossumff871742000-12-13 18:11:56 +0000918 self.append(float(self.readline()[:-1]))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000919 dispatch[FLOAT[0]] = load_float
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000920
Guido van Rossumd3703791998-10-22 20:15:36 +0000921 def load_binfloat(self, unpack=struct.unpack):
922 self.append(unpack('>d', self.read(8))[0])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000923 dispatch[BINFLOAT[0]] = load_binfloat
Guido van Rossumd3703791998-10-22 20:15:36 +0000924
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000925 def load_string(self):
Guido van Rossum98297ee2007-11-06 21:34:58 +0000926 orig = self.readline()
927 rep = orig[:-1]
Guido van Rossum26d95c32007-08-27 23:18:54 +0000928 for q in (b'"', b"'"): # double or single quote
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000929 if rep.startswith(q):
930 if not rep.endswith(q):
Guido van Rossum26d95c32007-08-27 23:18:54 +0000931 raise ValueError("insecure string pickle")
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000932 rep = rep[len(q):-len(q)]
933 break
934 else:
Guido van Rossum98297ee2007-11-06 21:34:58 +0000935 raise ValueError("insecure string pickle: %r" % orig)
936 self.append(codecs.escape_decode(rep)[0])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000937 dispatch[STRING[0]] = load_string
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000938
939 def load_binstring(self):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000940 len = mloads(b'i' + self.read(4))
Guido van Rossumf4169812008-03-17 22:56:06 +0000941 data = self.read(len)
942 value = str(data, self.encoding, self.errors)
943 self.append(value)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000944 dispatch[BINSTRING[0]] = load_binstring
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000945
Guido van Rossumf4169812008-03-17 22:56:06 +0000946 def load_binbytes(self):
947 len = mloads(b'i' + self.read(4))
948 self.append(self.read(len))
949 dispatch[BINBYTES[0]] = load_binbytes
950
Guido van Rossumb5f2f1b2000-03-10 23:20:09 +0000951 def load_unicode(self):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000952 self.append(str(self.readline()[:-1], 'raw-unicode-escape'))
953 dispatch[UNICODE[0]] = load_unicode
Guido van Rossumb5f2f1b2000-03-10 23:20:09 +0000954
955 def load_binunicode(self):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000956 len = mloads(b'i' + self.read(4))
957 self.append(str(self.read(len), 'utf-8'))
958 dispatch[BINUNICODE[0]] = load_binunicode
Guido van Rossumb5f2f1b2000-03-10 23:20:09 +0000959
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000960 def load_short_binstring(self):
Tim Petersbbf63cd2003-01-27 21:15:36 +0000961 len = ord(self.read(1))
Guido van Rossumf4169812008-03-17 22:56:06 +0000962 data = bytes(self.read(len))
963 value = str(data, self.encoding, self.errors)
964 self.append(value)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000965 dispatch[SHORT_BINSTRING[0]] = load_short_binstring
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000966
Guido van Rossumf4169812008-03-17 22:56:06 +0000967 def load_short_binbytes(self):
968 len = ord(self.read(1))
969 self.append(bytes(self.read(len)))
970 dispatch[SHORT_BINBYTES[0]] = load_short_binbytes
971
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000972 def load_tuple(self):
973 k = self.marker()
974 self.stack[k:] = [tuple(self.stack[k+1:])]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000975 dispatch[TUPLE[0]] = load_tuple
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000976
977 def load_empty_tuple(self):
978 self.stack.append(())
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000979 dispatch[EMPTY_TUPLE[0]] = load_empty_tuple
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000980
Guido van Rossum44f0ea52003-01-28 04:14:51 +0000981 def load_tuple1(self):
982 self.stack[-1] = (self.stack[-1],)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000983 dispatch[TUPLE1[0]] = load_tuple1
Guido van Rossum44f0ea52003-01-28 04:14:51 +0000984
985 def load_tuple2(self):
986 self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000987 dispatch[TUPLE2[0]] = load_tuple2
Guido van Rossum44f0ea52003-01-28 04:14:51 +0000988
989 def load_tuple3(self):
990 self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000991 dispatch[TUPLE3[0]] = load_tuple3
Guido van Rossum44f0ea52003-01-28 04:14:51 +0000992
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000993 def load_empty_list(self):
994 self.stack.append([])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000995 dispatch[EMPTY_LIST[0]] = load_empty_list
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000996
997 def load_empty_dictionary(self):
998 self.stack.append({})
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000999 dispatch[EMPTY_DICT[0]] = load_empty_dictionary
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001000
1001 def load_list(self):
1002 k = self.marker()
1003 self.stack[k:] = [self.stack[k+1:]]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001004 dispatch[LIST[0]] = load_list
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001005
1006 def load_dict(self):
1007 k = self.marker()
1008 d = {}
1009 items = self.stack[k+1:]
1010 for i in range(0, len(items), 2):
1011 key = items[i]
1012 value = items[i+1]
1013 d[key] = value
1014 self.stack[k:] = [d]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001015 dispatch[DICT[0]] = load_dict
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001016
Tim Petersd01c1e92003-01-30 15:41:46 +00001017 # INST and OBJ differ only in how they get a class object. It's not
1018 # only sensible to do the rest in a common routine, the two routines
1019 # previously diverged and grew different bugs.
1020 # klass is the class to instantiate, and k points to the topmost mark
1021 # object, following which are the arguments for klass.__init__.
1022 def _instantiate(self, klass, k):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001023 args = tuple(self.stack[k+1:])
1024 del self.stack[k:]
Guido van Rossum45e2fbc1998-03-26 21:13:24 +00001025 instantiated = 0
Tim Petersd01c1e92003-01-30 15:41:46 +00001026 if (not args and
Guido van Rossum13257902007-06-07 23:15:56 +00001027 isinstance(klass, type) and
Tim Petersd01c1e92003-01-30 15:41:46 +00001028 not hasattr(klass, "__getinitargs__")):
Guido van Rossuma8add0e2007-05-14 22:03:55 +00001029 value = _EmptyClass()
1030 value.__class__ = klass
1031 instantiated = 1
Guido van Rossum45e2fbc1998-03-26 21:13:24 +00001032 if not instantiated:
Guido van Rossum743d17e1998-09-15 20:25:57 +00001033 try:
Guido van Rossumb26a97a2003-01-28 22:29:13 +00001034 value = klass(*args)
Guido van Rossumb940e112007-01-10 16:19:56 +00001035 except TypeError as err:
Guido van Rossum26d95c32007-08-27 23:18:54 +00001036 raise TypeError("in constructor for %s: %s" %
1037 (klass.__name__, str(err)), sys.exc_info()[2])
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001038 self.append(value)
Tim Petersd01c1e92003-01-30 15:41:46 +00001039
1040 def load_inst(self):
1041 module = self.readline()[:-1]
1042 name = self.readline()[:-1]
1043 klass = self.find_class(module, name)
1044 self._instantiate(klass, self.marker())
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001045 dispatch[INST[0]] = load_inst
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001046
1047 def load_obj(self):
Tim Petersd01c1e92003-01-30 15:41:46 +00001048 # Stack is ... markobject classobject arg1 arg2 ...
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001049 k = self.marker()
Tim Petersd01c1e92003-01-30 15:41:46 +00001050 klass = self.stack.pop(k+1)
1051 self._instantiate(klass, k)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001052 dispatch[OBJ[0]] = load_obj
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001053
Guido van Rossum3a41c612003-01-28 15:10:22 +00001054 def load_newobj(self):
1055 args = self.stack.pop()
1056 cls = self.stack[-1]
1057 obj = cls.__new__(cls, *args)
Guido van Rossum533dbcf2003-01-28 17:55:05 +00001058 self.stack[-1] = obj
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001059 dispatch[NEWOBJ[0]] = load_newobj
Guido van Rossum3a41c612003-01-28 15:10:22 +00001060
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001061 def load_global(self):
1062 module = self.readline()[:-1]
1063 name = self.readline()[:-1]
1064 klass = self.find_class(module, name)
1065 self.append(klass)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001066 dispatch[GLOBAL[0]] = load_global
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001067
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001068 def load_ext1(self):
1069 code = ord(self.read(1))
1070 self.get_extension(code)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001071 dispatch[EXT1[0]] = load_ext1
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001072
1073 def load_ext2(self):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001074 code = mloads(b'i' + self.read(2) + b'\000\000')
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001075 self.get_extension(code)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001076 dispatch[EXT2[0]] = load_ext2
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001077
1078 def load_ext4(self):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001079 code = mloads(b'i' + self.read(4))
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001080 self.get_extension(code)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001081 dispatch[EXT4[0]] = load_ext4
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001082
1083 def get_extension(self, code):
1084 nil = []
Guido van Rossumd4b920c2003-02-04 01:54:49 +00001085 obj = _extension_cache.get(code, nil)
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001086 if obj is not nil:
1087 self.append(obj)
1088 return
Guido van Rossumd4b920c2003-02-04 01:54:49 +00001089 key = _inverted_registry.get(code)
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001090 if not key:
1091 raise ValueError("unregistered extension code %d" % code)
1092 obj = self.find_class(*key)
Guido van Rossumd4b920c2003-02-04 01:54:49 +00001093 _extension_cache[code] = obj
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001094 self.append(obj)
1095
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001096 def find_class(self, module, name):
Guido van Rossumb26a97a2003-01-28 22:29:13 +00001097 # Subclasses may override this
Guido van Rossum98297ee2007-11-06 21:34:58 +00001098 if isinstance(module, bytes_types):
Guido van Rossumfeea0782007-10-10 18:00:50 +00001099 module = module.decode("utf-8")
Guido van Rossum98297ee2007-11-06 21:34:58 +00001100 if isinstance(name, bytes_types):
Guido van Rossumfeea0782007-10-10 18:00:50 +00001101 name = name.decode("utf-8")
Barry Warsawbf4d9592001-11-15 23:42:58 +00001102 __import__(module)
1103 mod = sys.modules[module]
1104 klass = getattr(mod, name)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001105 return klass
1106
1107 def load_reduce(self):
1108 stack = self.stack
Guido van Rossumb26a97a2003-01-28 22:29:13 +00001109 args = stack.pop()
1110 func = stack[-1]
Guido van Rossum99603b02007-07-20 00:22:32 +00001111 try:
1112 value = func(*args)
1113 except:
1114 print(sys.exc_info())
1115 print(func, args)
1116 raise
Guido van Rossumb26a97a2003-01-28 22:29:13 +00001117 stack[-1] = value
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001118 dispatch[REDUCE[0]] = load_reduce
Guido van Rossuma48061a1995-01-10 00:31:14 +00001119
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001120 def load_pop(self):
1121 del self.stack[-1]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001122 dispatch[POP[0]] = load_pop
Guido van Rossum7b5430f1995-03-04 22:25:21 +00001123
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001124 def load_pop_mark(self):
1125 k = self.marker()
Guido van Rossum45e2fbc1998-03-26 21:13:24 +00001126 del self.stack[k:]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001127 dispatch[POP_MARK[0]] = load_pop_mark
Guido van Rossuma48061a1995-01-10 00:31:14 +00001128
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001129 def load_dup(self):
Guido van Rossumb1062fc1998-03-31 17:00:46 +00001130 self.append(self.stack[-1])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001131 dispatch[DUP[0]] = load_dup
Guido van Rossuma48061a1995-01-10 00:31:14 +00001132
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001133 def load_get(self):
Guido van Rossum98297ee2007-11-06 21:34:58 +00001134 self.append(self.memo[self.readline()[:-1].decode("ascii")])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001135 dispatch[GET[0]] = load_get
Guido van Rossum78536471996-04-12 13:36:27 +00001136
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001137 def load_binget(self):
Tim Petersbbf63cd2003-01-27 21:15:36 +00001138 i = ord(self.read(1))
Walter Dörwald70a6b492004-02-12 17:35:32 +00001139 self.append(self.memo[repr(i)])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001140 dispatch[BINGET[0]] = load_binget
Guido van Rossum78536471996-04-12 13:36:27 +00001141
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001142 def load_long_binget(self):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001143 i = mloads(b'i' + self.read(4))
Walter Dörwald70a6b492004-02-12 17:35:32 +00001144 self.append(self.memo[repr(i)])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001145 dispatch[LONG_BINGET[0]] = load_long_binget
Guido van Rossum78536471996-04-12 13:36:27 +00001146
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001147 def load_put(self):
Guido van Rossum98297ee2007-11-06 21:34:58 +00001148 self.memo[self.readline()[:-1].decode("ascii")] = self.stack[-1]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001149 dispatch[PUT[0]] = load_put
Guido van Rossuma48061a1995-01-10 00:31:14 +00001150
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001151 def load_binput(self):
Tim Petersbbf63cd2003-01-27 21:15:36 +00001152 i = ord(self.read(1))
Walter Dörwald70a6b492004-02-12 17:35:32 +00001153 self.memo[repr(i)] = self.stack[-1]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001154 dispatch[BINPUT[0]] = load_binput
Guido van Rossuma48061a1995-01-10 00:31:14 +00001155
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001156 def load_long_binput(self):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001157 i = mloads(b'i' + self.read(4))
Walter Dörwald70a6b492004-02-12 17:35:32 +00001158 self.memo[repr(i)] = self.stack[-1]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001159 dispatch[LONG_BINPUT[0]] = load_long_binput
Guido van Rossuma48061a1995-01-10 00:31:14 +00001160
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001161 def load_append(self):
1162 stack = self.stack
Raymond Hettinger46ac8eb2002-06-30 03:39:14 +00001163 value = stack.pop()
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001164 list = stack[-1]
1165 list.append(value)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001166 dispatch[APPEND[0]] = load_append
Guido van Rossuma48061a1995-01-10 00:31:14 +00001167
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001168 def load_appends(self):
1169 stack = self.stack
1170 mark = self.marker()
1171 list = stack[mark - 1]
Tim Peters209ad952003-01-28 01:44:45 +00001172 list.extend(stack[mark + 1:])
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001173 del stack[mark:]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001174 dispatch[APPENDS[0]] = load_appends
Tim Peters2344fae2001-01-15 00:50:52 +00001175
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001176 def load_setitem(self):
1177 stack = self.stack
Raymond Hettinger46ac8eb2002-06-30 03:39:14 +00001178 value = stack.pop()
1179 key = stack.pop()
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001180 dict = stack[-1]
1181 dict[key] = value
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001182 dispatch[SETITEM[0]] = load_setitem
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001183
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001184 def load_setitems(self):
1185 stack = self.stack
1186 mark = self.marker()
1187 dict = stack[mark - 1]
Guido van Rossum45e2fbc1998-03-26 21:13:24 +00001188 for i in range(mark + 1, len(stack), 2):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001189 dict[stack[i]] = stack[i + 1]
Guido van Rossuma48061a1995-01-10 00:31:14 +00001190
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001191 del stack[mark:]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001192 dispatch[SETITEMS[0]] = load_setitems
Guido van Rossuma48061a1995-01-10 00:31:14 +00001193
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001194 def load_build(self):
1195 stack = self.stack
Guido van Rossumac5b5d22003-01-28 22:01:16 +00001196 state = stack.pop()
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001197 inst = stack[-1]
Guido van Rossumac5b5d22003-01-28 22:01:16 +00001198 setstate = getattr(inst, "__setstate__", None)
1199 if setstate:
1200 setstate(state)
1201 return
1202 slotstate = None
1203 if isinstance(state, tuple) and len(state) == 2:
1204 state, slotstate = state
1205 if state:
Guido van Rossuma8add0e2007-05-14 22:03:55 +00001206 inst.__dict__.update(state)
Guido van Rossumac5b5d22003-01-28 22:01:16 +00001207 if slotstate:
1208 for k, v in slotstate.items():
1209 setattr(inst, k, v)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001210 dispatch[BUILD[0]] = load_build
Guido van Rossuma48061a1995-01-10 00:31:14 +00001211
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001212 def load_mark(self):
1213 self.append(self.mark)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001214 dispatch[MARK[0]] = load_mark
Guido van Rossuma48061a1995-01-10 00:31:14 +00001215
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001216 def load_stop(self):
Raymond Hettinger46ac8eb2002-06-30 03:39:14 +00001217 value = self.stack.pop()
Guido van Rossumff871742000-12-13 18:11:56 +00001218 raise _Stop(value)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001219 dispatch[STOP[0]] = load_stop
Guido van Rossuma48061a1995-01-10 00:31:14 +00001220
Guido van Rossume467be61997-12-05 19:42:42 +00001221# Helper class for load_inst/load_obj
1222
1223class _EmptyClass:
1224 pass
Guido van Rossuma48061a1995-01-10 00:31:14 +00001225
Tim Peters91149822003-01-31 03:43:58 +00001226# Encode/decode longs in linear time.
1227
1228import binascii as _binascii
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001229
1230def encode_long(x):
Tim Peters91149822003-01-31 03:43:58 +00001231 r"""Encode a long to a two's complement little-endian binary string.
Guido van Rossume2a383d2007-01-15 16:59:06 +00001232 Note that 0 is a special case, returning an empty string, to save a
Tim Peters4b23f2b2003-01-31 16:43:39 +00001233 byte in the LONG1 pickling context.
1234
Guido van Rossume2a383d2007-01-15 16:59:06 +00001235 >>> encode_long(0)
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001236 b''
Guido van Rossume2a383d2007-01-15 16:59:06 +00001237 >>> encode_long(255)
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001238 b'\xff\x00'
Guido van Rossume2a383d2007-01-15 16:59:06 +00001239 >>> encode_long(32767)
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001240 b'\xff\x7f'
Guido van Rossume2a383d2007-01-15 16:59:06 +00001241 >>> encode_long(-256)
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001242 b'\x00\xff'
Guido van Rossume2a383d2007-01-15 16:59:06 +00001243 >>> encode_long(-32768)
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001244 b'\x00\x80'
Guido van Rossume2a383d2007-01-15 16:59:06 +00001245 >>> encode_long(-128)
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001246 b'\x80'
Guido van Rossume2a383d2007-01-15 16:59:06 +00001247 >>> encode_long(127)
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001248 b'\x7f'
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001249 >>>
1250 """
Tim Peters91149822003-01-31 03:43:58 +00001251
1252 if x == 0:
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001253 return b''
Tim Peters91149822003-01-31 03:43:58 +00001254 if x > 0:
1255 ashex = hex(x)
1256 assert ashex.startswith("0x")
1257 njunkchars = 2 + ashex.endswith('L')
1258 nibbles = len(ashex) - njunkchars
1259 if nibbles & 1:
1260 # need an even # of nibbles for unhexlify
1261 ashex = "0x0" + ashex[2:]
Tim Peters4b23f2b2003-01-31 16:43:39 +00001262 elif int(ashex[2], 16) >= 8:
Tim Peters91149822003-01-31 03:43:58 +00001263 # "looks negative", so need a byte of sign bits
1264 ashex = "0x00" + ashex[2:]
1265 else:
1266 # Build the 256's-complement: (1L << nbytes) + x. The trick is
1267 # to find the number of bytes in linear time (although that should
1268 # really be a constant-time task).
1269 ashex = hex(-x)
1270 assert ashex.startswith("0x")
1271 njunkchars = 2 + ashex.endswith('L')
1272 nibbles = len(ashex) - njunkchars
1273 if nibbles & 1:
Tim Petersee1a53c2003-02-02 02:57:53 +00001274 # Extend to a full byte.
Tim Peters91149822003-01-31 03:43:58 +00001275 nibbles += 1
Tim Peters4b23f2b2003-01-31 16:43:39 +00001276 nbits = nibbles * 4
Guido van Rossume2a383d2007-01-15 16:59:06 +00001277 x += 1 << nbits
Tim Peters91149822003-01-31 03:43:58 +00001278 assert x > 0
1279 ashex = hex(x)
Tim Petersee1a53c2003-02-02 02:57:53 +00001280 njunkchars = 2 + ashex.endswith('L')
1281 newnibbles = len(ashex) - njunkchars
1282 if newnibbles < nibbles:
1283 ashex = "0x" + "0" * (nibbles - newnibbles) + ashex[2:]
1284 if int(ashex[2], 16) < 8:
Tim Peters91149822003-01-31 03:43:58 +00001285 # "looks positive", so need a byte of sign bits
Tim Petersee1a53c2003-02-02 02:57:53 +00001286 ashex = "0xff" + ashex[2:]
Tim Peters91149822003-01-31 03:43:58 +00001287
1288 if ashex.endswith('L'):
1289 ashex = ashex[2:-1]
1290 else:
1291 ashex = ashex[2:]
Tim Petersee1a53c2003-02-02 02:57:53 +00001292 assert len(ashex) & 1 == 0, (x, ashex)
Tim Peters91149822003-01-31 03:43:58 +00001293 binary = _binascii.unhexlify(ashex)
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001294 return bytes(binary[::-1])
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001295
1296def decode_long(data):
1297 r"""Decode a long from a two's complement little-endian binary string.
Tim Peters4b23f2b2003-01-31 16:43:39 +00001298
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001299 >>> decode_long(b'')
Guido van Rossume2b70bc2006-08-18 22:13:04 +00001300 0
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001301 >>> decode_long(b"\xff\x00")
Guido van Rossume2b70bc2006-08-18 22:13:04 +00001302 255
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001303 >>> decode_long(b"\xff\x7f")
Guido van Rossume2b70bc2006-08-18 22:13:04 +00001304 32767
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001305 >>> decode_long(b"\x00\xff")
Guido van Rossume2b70bc2006-08-18 22:13:04 +00001306 -256
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001307 >>> decode_long(b"\x00\x80")
Guido van Rossume2b70bc2006-08-18 22:13:04 +00001308 -32768
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001309 >>> decode_long(b"\x80")
Guido van Rossume2b70bc2006-08-18 22:13:04 +00001310 -128
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001311 >>> decode_long(b"\x7f")
Guido van Rossume2b70bc2006-08-18 22:13:04 +00001312 127
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001313 """
Tim Peters91149822003-01-31 03:43:58 +00001314
Tim Peters4b23f2b2003-01-31 16:43:39 +00001315 nbytes = len(data)
1316 if nbytes == 0:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001317 return 0
Tim Peters91149822003-01-31 03:43:58 +00001318 ashex = _binascii.hexlify(data[::-1])
Guido van Rossume2a383d2007-01-15 16:59:06 +00001319 n = int(ashex, 16) # quadratic time before Python 2.3; linear now
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001320 if data[-1] >= 0x80:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001321 n -= 1 << (nbytes * 8)
Tim Peters91149822003-01-31 03:43:58 +00001322 return n
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001323
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001324# Shorthands
1325
Raymond Hettinger3489cad2004-12-05 05:20:42 +00001326def dump(obj, file, protocol=None):
1327 Pickler(file, protocol).dump(obj)
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001328
Raymond Hettinger3489cad2004-12-05 05:20:42 +00001329def dumps(obj, protocol=None):
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001330 f = io.BytesIO()
1331 Pickler(f, protocol).dump(obj)
1332 res = f.getvalue()
Guido van Rossum98297ee2007-11-06 21:34:58 +00001333 assert isinstance(res, bytes_types)
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001334 return res
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001335
1336def load(file):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001337 return Unpickler(file).load()
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001338
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001339def loads(s):
1340 if isinstance(s, str):
1341 raise TypeError("Can't load pickle from unicode string")
1342 file = io.BytesIO(s)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001343 return Unpickler(file).load()
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001344
1345# Doctest
1346
1347def _test():
1348 import doctest
1349 return doctest.testmod()
1350
1351if __name__ == "__main__":
1352 _test()