blob: 201dc9914bc72ac47d3e31269d13850c50433e53 [file] [log] [blame]
Guido van Rossum54f22ed2000-02-04 15:10:34 +00001"""Create portable serialized representations of Python objects.
Guido van Rossuma48061a1995-01-10 00:31:14 +00002
Alexandre Vassalottif7fa63d2008-05-11 08:55:36 +00003See module copyreg for a mechanism for registering custom picklers.
Tim Peters22a449a2003-01-27 20:16:36 +00004See module pickletools source for extensive comments.
Guido van Rossuma48061a1995-01-10 00:31:14 +00005
Guido van Rossume467be61997-12-05 19:42:42 +00006Classes:
Guido van Rossuma48061a1995-01-10 00:31:14 +00007
Guido van Rossume467be61997-12-05 19:42:42 +00008 Pickler
9 Unpickler
Guido van Rossuma48061a1995-01-10 00:31:14 +000010
Guido van Rossume467be61997-12-05 19:42:42 +000011Functions:
Guido van Rossuma48061a1995-01-10 00:31:14 +000012
Guido van Rossume467be61997-12-05 19:42:42 +000013 dump(object, file)
14 dumps(object) -> string
15 load(file) -> object
16 loads(string) -> object
Guido van Rossuma48061a1995-01-10 00:31:14 +000017
Guido van Rossume467be61997-12-05 19:42:42 +000018Misc variables:
Guido van Rossuma48061a1995-01-10 00:31:14 +000019
Fred Drakefe82acc1998-02-13 03:24:48 +000020 __version__
Guido van Rossume467be61997-12-05 19:42:42 +000021 format_version
22 compatible_formats
Guido van Rossuma48061a1995-01-10 00:31:14 +000023
Guido van Rossuma48061a1995-01-10 00:31:14 +000024"""
25
Guido van Rossum743d17e1998-09-15 20:25:57 +000026__version__ = "$Revision$" # Code version
Guido van Rossuma48061a1995-01-10 00:31:14 +000027
Guido van Rossum13257902007-06-07 23:15:56 +000028from types import FunctionType, BuiltinFunctionType
Alexandre Vassalottif7fa63d2008-05-11 08:55:36 +000029from copyreg import dispatch_table
30from copyreg import _extension_registry, _inverted_registry, _extension_cache
Guido van Rossumd3703791998-10-22 20:15:36 +000031import marshal
32import sys
33import struct
Skip Montanaro23bafc62001-02-18 03:10:09 +000034import re
Guido van Rossum2e6a4b32007-05-04 19:56:22 +000035import io
Walter Dörwald42748a82007-06-12 16:40:17 +000036import codecs
Guido van Rossuma48061a1995-01-10 00:31:14 +000037
Skip Montanaro352674d2001-02-07 23:14:30 +000038__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
39 "Unpickler", "dump", "dumps", "load", "loads"]
40
Guido van Rossum98297ee2007-11-06 21:34:58 +000041# Shortcut for use in isinstance testing
Alexandre Vassalotti8cb02b62008-05-03 01:42:49 +000042bytes_types = (bytes, bytearray)
Guido van Rossum98297ee2007-11-06 21:34:58 +000043
Tim Petersc0c12b52003-01-29 00:56:17 +000044# These are purely informational; no code uses these.
Guido van Rossumf4169812008-03-17 22:56:06 +000045format_version = "3.0" # File format version we write
Guido van Rossumf29d3d62003-01-27 22:47:53 +000046compatible_formats = ["1.0", # Original protocol 0
Guido van Rossumbc64e222003-01-28 16:34:19 +000047 "1.1", # Protocol 0 with INST added
Guido van Rossumf29d3d62003-01-27 22:47:53 +000048 "1.2", # Original protocol 1
49 "1.3", # Protocol 1 with BINFLOAT added
50 "2.0", # Protocol 2
Guido van Rossumf4169812008-03-17 22:56:06 +000051 "3.0", # Protocol 3
Guido van Rossumf29d3d62003-01-27 22:47:53 +000052 ] # Old format versions we can read
Guido van Rossumb72cf2d1997-04-09 17:32:51 +000053
Guido van Rossum99603b02007-07-20 00:22:32 +000054# This is the highest protocol number we know how to read.
Guido van Rossumf4169812008-03-17 22:56:06 +000055HIGHEST_PROTOCOL = 3
Tim Peters8587b3c2003-02-13 15:44:41 +000056
Guido van Rossum2e6a4b32007-05-04 19:56:22 +000057# The protocol we write by default. May be less than HIGHEST_PROTOCOL.
Guido van Rossumf4169812008-03-17 22:56:06 +000058# We intentionally write a protocol that Python 2.x cannot read;
59# there are too many issues with that.
60DEFAULT_PROTOCOL = 3
Guido van Rossum2e6a4b32007-05-04 19:56:22 +000061
Guido van Rossume0b90422003-01-28 03:17:21 +000062# Why use struct.pack() for pickling but marshal.loads() for
Tim Petersc0c12b52003-01-29 00:56:17 +000063# unpickling? struct.pack() is 40% faster than marshal.dumps(), but
Guido van Rossume0b90422003-01-28 03:17:21 +000064# marshal.loads() is twice as fast as struct.unpack()!
Guido van Rossumb72cf2d1997-04-09 17:32:51 +000065mloads = marshal.loads
Guido van Rossum0c891ce1995-03-14 15:09:05 +000066
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000067class PickleError(Exception):
Neal Norwitzefbb67b2002-05-30 12:12:04 +000068 """A common base class for the other pickling exceptions."""
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000069 pass
70
71class PicklingError(PickleError):
72 """This exception is raised when an unpicklable object is passed to the
73 dump() method.
74
75 """
76 pass
77
78class UnpicklingError(PickleError):
79 """This exception is raised when there is a problem unpickling an object,
80 such as a security violation.
81
82 Note that other exceptions may also be raised during unpickling, including
83 (but not necessarily limited to) AttributeError, EOFError, ImportError,
84 and IndexError.
85
86 """
87 pass
Guido van Rossum7849da81995-03-09 14:08:35 +000088
Tim Petersc0c12b52003-01-29 00:56:17 +000089# An instance of _Stop is raised by Unpickler.load_stop() in response to
90# the STOP opcode, passing the object that is the result of unpickling.
Guido van Rossumff871742000-12-13 18:11:56 +000091class _Stop(Exception):
92 def __init__(self, value):
93 self.value = value
94
Guido van Rossum533dbcf2003-01-28 17:55:05 +000095# Jython has PyStringMap; it's a dict subclass with string keys
Jeremy Hylton2b9d0291998-05-27 22:38:22 +000096try:
97 from org.python.core import PyStringMap
98except ImportError:
99 PyStringMap = None
100
Tim Peters22a449a2003-01-27 20:16:36 +0000101# Pickle opcodes. See pickletools.py for extensive docs. The listing
102# here is in kind-of alphabetical order of 1-character pickle code.
103# pickletools groups them by purpose.
Guido van Rossumdbb718f2001-09-21 19:22:34 +0000104
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000105MARK = b'(' # push special markobject on stack
106STOP = b'.' # every pickle ends with STOP
107POP = b'0' # discard topmost stack item
108POP_MARK = b'1' # discard stack top through topmost markobject
109DUP = b'2' # duplicate top stack item
110FLOAT = b'F' # push float object; decimal string argument
111INT = b'I' # push integer or bool; decimal string argument
112BININT = b'J' # push four-byte signed int
113BININT1 = b'K' # push 1-byte unsigned int
114LONG = b'L' # push long; decimal string argument
115BININT2 = b'M' # push 2-byte unsigned int
116NONE = b'N' # push None
117PERSID = b'P' # push persistent object; id is taken from string arg
118BINPERSID = b'Q' # " " " ; " " " " stack
119REDUCE = b'R' # apply callable to argtuple, both on stack
120STRING = b'S' # push string; NL-terminated string argument
121BINSTRING = b'T' # push string; counted binary string argument
122SHORT_BINSTRING= b'U' # " " ; " " " " < 256 bytes
123UNICODE = b'V' # push Unicode string; raw-unicode-escaped'd argument
124BINUNICODE = b'X' # " " " ; counted UTF-8 string argument
125APPEND = b'a' # append stack top to list below it
126BUILD = b'b' # call __setstate__ or __dict__.update()
127GLOBAL = b'c' # push self.find_class(modname, name); 2 string args
128DICT = b'd' # build a dict from stack items
129EMPTY_DICT = b'}' # push empty dict
130APPENDS = b'e' # extend list on stack by topmost stack slice
131GET = b'g' # push item from memo on stack; index is string arg
132BINGET = b'h' # " " " " " " ; " " 1-byte arg
133INST = b'i' # build & push class instance
134LONG_BINGET = b'j' # push item from memo on stack; index is 4-byte arg
135LIST = b'l' # build list from topmost stack items
136EMPTY_LIST = b']' # push empty list
137OBJ = b'o' # build & push class instance
138PUT = b'p' # store stack top in memo; index is string arg
139BINPUT = b'q' # " " " " " ; " " 1-byte arg
140LONG_BINPUT = b'r' # " " " " " ; " " 4-byte arg
141SETITEM = b's' # add key+value pair to dict
142TUPLE = b't' # build tuple from topmost stack items
143EMPTY_TUPLE = b')' # push empty tuple
144SETITEMS = b'u' # modify dict by adding topmost key+value pairs
145BINFLOAT = b'G' # push float; arg is 8-byte float encoding
Tim Peters22a449a2003-01-27 20:16:36 +0000146
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000147TRUE = b'I01\n' # not an opcode; see INT docs in pickletools.py
148FALSE = b'I00\n' # not an opcode; see INT docs in pickletools.py
Guido van Rossum77f6a652002-04-03 22:41:51 +0000149
Guido van Rossum586c9e82003-01-29 06:16:12 +0000150# Protocol 2
Guido van Rossum5a2d8f52003-01-27 21:44:25 +0000151
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000152PROTO = b'\x80' # identify pickle protocol
153NEWOBJ = b'\x81' # build object by applying cls.__new__ to argtuple
154EXT1 = b'\x82' # push object from extension registry; 1-byte index
155EXT2 = b'\x83' # ditto, but 2-byte index
156EXT4 = b'\x84' # ditto, but 4-byte index
157TUPLE1 = b'\x85' # build 1-tuple from stack top
158TUPLE2 = b'\x86' # build 2-tuple from two topmost stack items
159TUPLE3 = b'\x87' # build 3-tuple from three topmost stack items
160NEWTRUE = b'\x88' # push True
161NEWFALSE = b'\x89' # push False
162LONG1 = b'\x8a' # push long from < 256 bytes
163LONG4 = b'\x8b' # push really big long
Guido van Rossum5a2d8f52003-01-27 21:44:25 +0000164
Guido van Rossum44f0ea52003-01-28 04:14:51 +0000165_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
166
Guido van Rossumf4169812008-03-17 22:56:06 +0000167# Protocol 3 (Python 3.x)
168
169BINBYTES = b'B' # push bytes; counted binary string argument
170SHORT_BINBYTES = b'C' # " " ; " " " " < 256 bytes
Guido van Rossuma48061a1995-01-10 00:31:14 +0000171
Skip Montanaro23bafc62001-02-18 03:10:09 +0000172__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])
173
Guido van Rossum1be31752003-01-28 15:19:53 +0000174
175# Pickling machinery
176
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000177class _Pickler:
Guido van Rossuma48061a1995-01-10 00:31:14 +0000178
Raymond Hettinger3489cad2004-12-05 05:20:42 +0000179 def __init__(self, file, protocol=None):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000180 """This takes a binary file for writing a pickle data stream.
181
Guido van Rossumcf117b02003-02-09 17:19:41 +0000182 The optional protocol argument tells the pickler to use the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000183 given protocol; supported protocols are 0, 1, 2, 3. The default
184 protocol is 3; a backward-incompatible protocol designed for
185 Python 3.0.
Guido van Rossumf29d3d62003-01-27 22:47:53 +0000186
Guido van Rossum7eff63a2003-01-31 19:42:31 +0000187 Specifying a negative protocol version selects the highest
Tim Peters5bd2a792003-02-01 16:45:06 +0000188 protocol version supported. The higher the protocol used, the
189 more recent the version of Python needed to read the pickle
190 produced.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000191
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000192 The file argument must have a write() method that accepts a single
193 bytes argument. It can thus be a file object opened for binary
194 writing, a io.BytesIO instance, or any other custom object that
195 meets this interface.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000196 """
Guido van Rossumcf117b02003-02-09 17:19:41 +0000197 if protocol is None:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000198 protocol = DEFAULT_PROTOCOL
Guido van Rossumcf117b02003-02-09 17:19:41 +0000199 if protocol < 0:
Tim Peters8587b3c2003-02-13 15:44:41 +0000200 protocol = HIGHEST_PROTOCOL
201 elif not 0 <= protocol <= HIGHEST_PROTOCOL:
202 raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000203 try:
204 self.write = file.write
205 except AttributeError:
206 raise TypeError("file must have a 'write' attribute")
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000207 self.memo = {}
Guido van Rossumcf117b02003-02-09 17:19:41 +0000208 self.proto = int(protocol)
209 self.bin = protocol >= 1
Guido van Rossum5d9113d2003-01-29 17:58:45 +0000210 self.fast = 0
Guido van Rossuma48061a1995-01-10 00:31:14 +0000211
Fred Drake7f781c92002-05-01 20:33:53 +0000212 def clear_memo(self):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000213 """Clears the pickler's "memo".
214
215 The memo is the data structure that remembers which objects the
Tim Petersb377f8a2003-01-28 00:23:36 +0000216 pickler has already seen, so that shared or recursive objects are
217 pickled by reference and not by value. This method is useful when
218 re-using picklers.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000219
220 """
Fred Drake7f781c92002-05-01 20:33:53 +0000221 self.memo.clear()
222
Guido van Rossum3a41c612003-01-28 15:10:22 +0000223 def dump(self, obj):
Tim Peters5bd2a792003-02-01 16:45:06 +0000224 """Write a pickled representation of obj to the open file."""
Alexandre Vassalotti3cfcab92008-12-27 09:30:39 +0000225 # Check whether Pickler was initialized correctly. This is
226 # only needed to mimic the behavior of _pickle.Pickler.dump().
227 if not hasattr(self, "write"):
228 raise PicklingError("Pickler.__init__() was not called by "
229 "%s.__init__()" % (self.__class__.__name__,))
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000230 if self.proto >= 2:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000231 self.write(PROTO + bytes([self.proto]))
Guido van Rossum3a41c612003-01-28 15:10:22 +0000232 self.save(obj)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000233 self.write(STOP)
Guido van Rossuma48061a1995-01-10 00:31:14 +0000234
Jeremy Hylton3422c992003-01-24 19:29:52 +0000235 def memoize(self, obj):
236 """Store an object in the memo."""
237
Tim Peterse46b73f2003-01-27 21:22:10 +0000238 # The Pickler memo is a dictionary mapping object ids to 2-tuples
239 # that contain the Unpickler memo key and the object being memoized.
240 # The memo key is written to the pickle and will become
Jeremy Hylton3422c992003-01-24 19:29:52 +0000241 # the key in the Unpickler's memo. The object is stored in the
Tim Peterse46b73f2003-01-27 21:22:10 +0000242 # Pickler memo so that transient objects are kept alive during
243 # pickling.
Jeremy Hylton3422c992003-01-24 19:29:52 +0000244
Tim Peterse46b73f2003-01-27 21:22:10 +0000245 # The use of the Unpickler memo length as the memo key is just a
246 # convention. The only requirement is that the memo values be unique.
247 # But there appears no advantage to any other scheme, and this
Tim Peterscbd0a322003-01-28 00:24:43 +0000248 # scheme allows the Unpickler memo to be implemented as a plain (but
Tim Peterse46b73f2003-01-27 21:22:10 +0000249 # growable) array, indexed by memo key.
Guido van Rossum5d9113d2003-01-29 17:58:45 +0000250 if self.fast:
251 return
Guido van Rossum9b40e802003-01-30 06:37:41 +0000252 assert id(obj) not in self.memo
Jeremy Hylton3422c992003-01-24 19:29:52 +0000253 memo_len = len(self.memo)
254 self.write(self.put(memo_len))
Tim Peters518df0d2003-01-28 01:00:38 +0000255 self.memo[id(obj)] = memo_len, obj
Jeremy Hylton3422c992003-01-24 19:29:52 +0000256
Tim Petersbb38e302003-01-27 21:25:41 +0000257 # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.
Guido van Rossum5c938d02003-01-28 03:03:08 +0000258 def put(self, i, pack=struct.pack):
Tim Petersc32d8242001-04-10 02:48:53 +0000259 if self.bin:
Tim Petersc32d8242001-04-10 02:48:53 +0000260 if i < 256:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000261 return BINPUT + bytes([i])
Guido van Rossum5c938d02003-01-28 03:03:08 +0000262 else:
263 return LONG_BINPUT + pack("<i", i)
Guido van Rossuma48061a1995-01-10 00:31:14 +0000264
Guido van Rossum39478e82007-08-27 17:23:59 +0000265 return PUT + repr(i).encode("ascii") + b'\n'
Guido van Rossuma48061a1995-01-10 00:31:14 +0000266
Tim Petersbb38e302003-01-27 21:25:41 +0000267 # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
Guido van Rossum5c938d02003-01-28 03:03:08 +0000268 def get(self, i, pack=struct.pack):
Tim Petersc32d8242001-04-10 02:48:53 +0000269 if self.bin:
Tim Petersc32d8242001-04-10 02:48:53 +0000270 if i < 256:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000271 return BINGET + bytes([i])
Guido van Rossum5c938d02003-01-28 03:03:08 +0000272 else:
273 return LONG_BINGET + pack("<i", i)
Guido van Rossuma48061a1995-01-10 00:31:14 +0000274
Guido van Rossum39478e82007-08-27 17:23:59 +0000275 return GET + repr(i).encode("ascii") + b'\n'
Tim Peters2344fae2001-01-15 00:50:52 +0000276
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000277 def save(self, obj, save_persistent_id=True):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000278 # Check for persistent id (defined by a subclass)
Guido van Rossum3a41c612003-01-28 15:10:22 +0000279 pid = self.persistent_id(obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000280 if pid is not None and save_persistent_id:
Jeremy Hylton5e0f4e72002-11-13 22:01:27 +0000281 self.save_pers(pid)
282 return
Guido van Rossuma48061a1995-01-10 00:31:14 +0000283
Guido van Rossumbc64e222003-01-28 16:34:19 +0000284 # Check the memo
285 x = self.memo.get(id(obj))
286 if x:
287 self.write(self.get(x[0]))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000288 return
289
Guido van Rossumbc64e222003-01-28 16:34:19 +0000290 # Check the type dispatch table
Guido van Rossum3a41c612003-01-28 15:10:22 +0000291 t = type(obj)
Guido van Rossumbc64e222003-01-28 16:34:19 +0000292 f = self.dispatch.get(t)
293 if f:
294 f(self, obj) # Call unbound method with explicit self
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000295 return
296
Guido van Rossumbc64e222003-01-28 16:34:19 +0000297 # Check for a class with a custom metaclass; treat as regular class
Tim Petersb32a8312003-01-28 00:48:09 +0000298 try:
Guido van Rossum13257902007-06-07 23:15:56 +0000299 issc = issubclass(t, type)
Guido van Rossumbc64e222003-01-28 16:34:19 +0000300 except TypeError: # t is not a class (old Boost; see SF #502085)
Tim Petersb32a8312003-01-28 00:48:09 +0000301 issc = 0
302 if issc:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000303 self.save_global(obj)
Tim Petersb32a8312003-01-28 00:48:09 +0000304 return
305
Alexandre Vassalottif7fa63d2008-05-11 08:55:36 +0000306 # Check copyreg.dispatch_table
Guido van Rossumbc64e222003-01-28 16:34:19 +0000307 reduce = dispatch_table.get(t)
Guido van Rossumc53f0092003-02-18 22:05:12 +0000308 if reduce:
309 rv = reduce(obj)
310 else:
311 # Check for a __reduce_ex__ method, fall back to __reduce__
312 reduce = getattr(obj, "__reduce_ex__", None)
313 if reduce:
314 rv = reduce(self.proto)
315 else:
316 reduce = getattr(obj, "__reduce__", None)
317 if reduce:
318 rv = reduce()
319 else:
320 raise PicklingError("Can't pickle %r object: %r" %
321 (t.__name__, obj))
Tim Petersb32a8312003-01-28 00:48:09 +0000322
Guido van Rossumbc64e222003-01-28 16:34:19 +0000323 # Check for string returned by reduce(), meaning "save as global"
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000324 if isinstance(rv, str):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000325 self.save_global(obj, rv)
Tim Petersb32a8312003-01-28 00:48:09 +0000326 return
327
Guido van Rossumbc64e222003-01-28 16:34:19 +0000328 # Assert that reduce() returned a tuple
Guido van Rossum13257902007-06-07 23:15:56 +0000329 if not isinstance(rv, tuple):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000330 raise PicklingError("%s must return string or tuple" % reduce)
Tim Petersb32a8312003-01-28 00:48:09 +0000331
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000332 # Assert that it returned an appropriately sized tuple
Guido van Rossumbc64e222003-01-28 16:34:19 +0000333 l = len(rv)
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000334 if not (2 <= l <= 5):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000335 raise PicklingError("Tuple returned by %s must have "
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000336 "two to five elements" % reduce)
Tim Petersb32a8312003-01-28 00:48:09 +0000337
Guido van Rossumbc64e222003-01-28 16:34:19 +0000338 # Save the reduce() output and finally memoize the object
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000339 self.save_reduce(obj=obj, *rv)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000340
Guido van Rossum3a41c612003-01-28 15:10:22 +0000341 def persistent_id(self, obj):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000342 # This exists so a subclass can override it
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000343 return None
344
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000345 def save_pers(self, pid):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000346 # Save a persistent id reference
Tim Petersbd1cdb92003-01-28 01:03:10 +0000347 if self.bin:
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000348 self.save(pid, save_persistent_id=False)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000349 self.write(BINPERSID)
Tim Petersbd1cdb92003-01-28 01:03:10 +0000350 else:
Guido van Rossum39478e82007-08-27 17:23:59 +0000351 self.write(PERSID + str(pid).encode("ascii") + b'\n')
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000352
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000353 def save_reduce(self, func, args, state=None,
354 listitems=None, dictitems=None, obj=None):
Jeremy Hyltone3a565e2003-06-29 16:59:59 +0000355 # This API is called by some subclasses
Guido van Rossumbc64e222003-01-28 16:34:19 +0000356
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000357 # Assert that args is a tuple
Guido van Rossum13257902007-06-07 23:15:56 +0000358 if not isinstance(args, tuple):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000359 raise PicklingError("args from save_reduce() should be a tuple")
Guido van Rossumbc64e222003-01-28 16:34:19 +0000360
361 # Assert that func is callable
Guido van Rossumd59da4b2007-05-22 18:11:13 +0000362 if not hasattr(func, '__call__'):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000363 raise PicklingError("func from save_reduce() should be callable")
Guido van Rossumbc64e222003-01-28 16:34:19 +0000364
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000365 save = self.save
Guido van Rossumbc64e222003-01-28 16:34:19 +0000366 write = self.write
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000367
Guido van Rossumd053b4b2003-01-31 16:51:45 +0000368 # Protocol 2 special case: if func's name is __newobj__, use NEWOBJ
369 if self.proto >= 2 and getattr(func, "__name__", "") == "__newobj__":
370 # A __reduce__ implementation can direct protocol 2 to
371 # use the more efficient NEWOBJ opcode, while still
372 # allowing protocol 0 and 1 to work normally. For this to
373 # work, the function returned by __reduce__ should be
374 # called __newobj__, and its first argument should be a
375 # new-style class. The implementation for __newobj__
376 # should be as follows, although pickle has no way to
377 # verify this:
378 #
379 # def __newobj__(cls, *args):
380 # return cls.__new__(cls, *args)
381 #
382 # Protocols 0 and 1 will pickle a reference to __newobj__,
383 # while protocol 2 (and above) will pickle a reference to
384 # cls, the remaining args tuple, and the NEWOBJ code,
385 # which calls cls.__new__(cls, *args) at unpickling time
386 # (see load_newobj below). If __reduce__ returns a
387 # three-tuple, the state from the third tuple item will be
388 # pickled regardless of the protocol, calling __setstate__
389 # at unpickling time (see load_build below).
390 #
391 # Note that no standard __newobj__ implementation exists;
392 # you have to provide your own. This is to enforce
393 # compatibility with Python 2.2 (pickles written using
394 # protocol 0 or 1 in Python 2.3 should be unpicklable by
395 # Python 2.2).
396 cls = args[0]
397 if not hasattr(cls, "__new__"):
398 raise PicklingError(
399 "args[0] from __newobj__ args has no __new__")
Guido van Rossumf7f45172003-01-31 17:17:49 +0000400 if obj is not None and cls is not obj.__class__:
401 raise PicklingError(
402 "args[0] from __newobj__ args has the wrong class")
Guido van Rossumd053b4b2003-01-31 16:51:45 +0000403 args = args[1:]
404 save(cls)
405 save(args)
406 write(NEWOBJ)
407 else:
408 save(func)
409 save(args)
410 write(REDUCE)
Tim Peters2344fae2001-01-15 00:50:52 +0000411
Guido van Rossumf7f45172003-01-31 17:17:49 +0000412 if obj is not None:
413 self.memoize(obj)
414
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000415 # More new special cases (that work with older protocols as
416 # well): when __reduce__ returns a tuple with 4 or 5 items,
417 # the 4th and 5th item should be iterators that provide list
418 # items and dict items (as (key, value) tuples), or None.
419
420 if listitems is not None:
421 self._batch_appends(listitems)
422
423 if dictitems is not None:
424 self._batch_setitems(dictitems)
425
Tim Petersc32d8242001-04-10 02:48:53 +0000426 if state is not None:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000427 save(state)
428 write(BUILD)
429
Guido van Rossumbc64e222003-01-28 16:34:19 +0000430 # Methods below this point are dispatched through the dispatch table
431
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000432 dispatch = {}
433
Guido van Rossum3a41c612003-01-28 15:10:22 +0000434 def save_none(self, obj):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000435 self.write(NONE)
Guido van Rossum13257902007-06-07 23:15:56 +0000436 dispatch[type(None)] = save_none
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000437
Guido van Rossum3a41c612003-01-28 15:10:22 +0000438 def save_bool(self, obj):
Guido van Rossum7d97d312003-01-28 04:25:27 +0000439 if self.proto >= 2:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000440 self.write(obj and NEWTRUE or NEWFALSE)
Guido van Rossum7d97d312003-01-28 04:25:27 +0000441 else:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000442 self.write(obj and TRUE or FALSE)
Guido van Rossum77f6a652002-04-03 22:41:51 +0000443 dispatch[bool] = save_bool
444
Guido van Rossum3a41c612003-01-28 15:10:22 +0000445 def save_long(self, obj, pack=struct.pack):
Guido van Rossumddefaf32007-01-14 03:31:43 +0000446 if self.bin:
447 # If the int is small enough to fit in a signed 4-byte 2's-comp
448 # format, we can store it more efficiently than the general
449 # case.
450 # First one- and two-byte unsigned ints:
451 if obj >= 0:
452 if obj <= 0xff:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000453 self.write(BININT1 + bytes([obj]))
Guido van Rossumddefaf32007-01-14 03:31:43 +0000454 return
455 if obj <= 0xffff:
Guido van Rossumcfe5f202007-05-08 21:26:54 +0000456 self.write(BININT2 + bytes([obj&0xff, obj>>8]))
Guido van Rossumddefaf32007-01-14 03:31:43 +0000457 return
458 # Next check for 4-byte signed ints:
459 high_bits = obj >> 31 # note that Python shift sign-extends
460 if high_bits == 0 or high_bits == -1:
461 # All high bits are copies of bit 2**31, so the value
462 # fits in a 4-byte signed int.
463 self.write(BININT + pack("<i", obj))
464 return
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000465 if self.proto >= 2:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000466 encoded = encode_long(obj)
467 n = len(encoded)
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000468 if n < 256:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000469 self.write(LONG1 + bytes([n]) + encoded)
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000470 else:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000471 self.write(LONG4 + pack("<i", n) + encoded)
Tim Petersee1a53c2003-02-02 02:57:53 +0000472 return
Guido van Rossum39478e82007-08-27 17:23:59 +0000473 self.write(LONG + repr(obj).encode("ascii") + b'\n')
Guido van Rossum13257902007-06-07 23:15:56 +0000474 dispatch[int] = save_long
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000475
Guido van Rossum3a41c612003-01-28 15:10:22 +0000476 def save_float(self, obj, pack=struct.pack):
Guido van Rossumd3703791998-10-22 20:15:36 +0000477 if self.bin:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000478 self.write(BINFLOAT + pack('>d', obj))
Guido van Rossumd3703791998-10-22 20:15:36 +0000479 else:
Guido van Rossum39478e82007-08-27 17:23:59 +0000480 self.write(FLOAT + repr(obj).encode("ascii") + b'\n')
Guido van Rossum13257902007-06-07 23:15:56 +0000481 dispatch[float] = save_float
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000482
Guido van Rossumf4169812008-03-17 22:56:06 +0000483 def save_bytes(self, obj, pack=struct.pack):
484 if self.proto < 3:
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000485 self.save_reduce(bytes, (list(obj),), obj=obj)
Guido van Rossumf4169812008-03-17 22:56:06 +0000486 return
487 n = len(obj)
488 if n < 256:
489 self.write(SHORT_BINBYTES + bytes([n]) + bytes(obj))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000490 else:
Guido van Rossumf4169812008-03-17 22:56:06 +0000491 self.write(BINBYTES + pack("<i", n) + bytes(obj))
Guido van Rossum3a41c612003-01-28 15:10:22 +0000492 self.memoize(obj)
Guido van Rossumf4169812008-03-17 22:56:06 +0000493 dispatch[bytes] = save_bytes
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000494
Guido van Rossumf4169812008-03-17 22:56:06 +0000495 def save_str(self, obj, pack=struct.pack):
Tim Petersc32d8242001-04-10 02:48:53 +0000496 if self.bin:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000497 encoded = obj.encode('utf-8')
498 n = len(encoded)
499 self.write(BINUNICODE + pack("<i", n) + encoded)
Guido van Rossumb5f2f1b2000-03-10 23:20:09 +0000500 else:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000501 obj = obj.replace("\\", "\\u005c")
502 obj = obj.replace("\n", "\\u000a")
Guido van Rossum1255ed62007-05-04 20:30:19 +0000503 self.write(UNICODE + bytes(obj.encode('raw-unicode-escape')) +
504 b'\n')
Guido van Rossum3a41c612003-01-28 15:10:22 +0000505 self.memoize(obj)
Guido van Rossumf4169812008-03-17 22:56:06 +0000506 dispatch[str] = save_str
Tim Peters658cba62001-02-09 20:06:00 +0000507
Guido van Rossum3a41c612003-01-28 15:10:22 +0000508 def save_tuple(self, obj):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000509 write = self.write
Guido van Rossum44f0ea52003-01-28 04:14:51 +0000510 proto = self.proto
511
Guido van Rossum3a41c612003-01-28 15:10:22 +0000512 n = len(obj)
Tim Peters1d63c9f2003-02-02 20:29:39 +0000513 if n == 0:
514 if proto:
515 write(EMPTY_TUPLE)
516 else:
517 write(MARK + TUPLE)
Tim Petersd97da802003-01-28 05:48:29 +0000518 return
519
520 save = self.save
521 memo = self.memo
522 if n <= 3 and proto >= 2:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000523 for element in obj:
Tim Petersd97da802003-01-28 05:48:29 +0000524 save(element)
525 # Subtle. Same as in the big comment below.
Guido van Rossum3a41c612003-01-28 15:10:22 +0000526 if id(obj) in memo:
527 get = self.get(memo[id(obj)][0])
Tim Petersd97da802003-01-28 05:48:29 +0000528 write(POP * n + get)
529 else:
530 write(_tuplesize2code[n])
Guido van Rossum3a41c612003-01-28 15:10:22 +0000531 self.memoize(obj)
Tim Petersd97da802003-01-28 05:48:29 +0000532 return
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000533
Tim Peters1d63c9f2003-02-02 20:29:39 +0000534 # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple
Tim Petersff57bff2003-01-28 05:34:53 +0000535 # has more than 3 elements.
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000536 write(MARK)
Guido van Rossum3a41c612003-01-28 15:10:22 +0000537 for element in obj:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000538 save(element)
539
Tim Peters1d63c9f2003-02-02 20:29:39 +0000540 if id(obj) in memo:
Tim Petersf558da02003-01-28 02:09:55 +0000541 # Subtle. d was not in memo when we entered save_tuple(), so
542 # the process of saving the tuple's elements must have saved
543 # the tuple itself: the tuple is recursive. The proper action
544 # now is to throw away everything we put on the stack, and
545 # simply GET the tuple (it's already constructed). This check
546 # could have been done in the "for element" loop instead, but
547 # recursive tuples are a rare thing.
Guido van Rossum3a41c612003-01-28 15:10:22 +0000548 get = self.get(memo[id(obj)][0])
Guido van Rossum44f0ea52003-01-28 04:14:51 +0000549 if proto:
Tim Petersf558da02003-01-28 02:09:55 +0000550 write(POP_MARK + get)
551 else: # proto 0 -- POP_MARK not available
Tim Petersd97da802003-01-28 05:48:29 +0000552 write(POP * (n+1) + get)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000553 return
554
Tim Peters1d63c9f2003-02-02 20:29:39 +0000555 # No recursion.
Tim Peters518df0d2003-01-28 01:00:38 +0000556 self.write(TUPLE)
Tim Peters1d63c9f2003-02-02 20:29:39 +0000557 self.memoize(obj)
Jeremy Hylton3422c992003-01-24 19:29:52 +0000558
Guido van Rossum13257902007-06-07 23:15:56 +0000559 dispatch[tuple] = save_tuple
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000560
Guido van Rossum3a41c612003-01-28 15:10:22 +0000561 def save_list(self, obj):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000562 write = self.write
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000563
Tim Petersc32d8242001-04-10 02:48:53 +0000564 if self.bin:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000565 write(EMPTY_LIST)
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000566 else: # proto 0 -- can't use EMPTY_LIST
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000567 write(MARK + LIST)
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000568
569 self.memoize(obj)
Alexandre Vassalottic7db1d62008-05-14 21:57:18 +0000570 self._batch_appends(obj)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000571
Guido van Rossum13257902007-06-07 23:15:56 +0000572 dispatch[list] = save_list
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000573
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000574 _BATCHSIZE = 1000
575
576 def _batch_appends(self, items):
577 # Helper to batch up APPENDS sequences
578 save = self.save
579 write = self.write
580
581 if not self.bin:
582 for x in items:
583 save(x)
584 write(APPEND)
585 return
586
Alexandre Vassalottic7db1d62008-05-14 21:57:18 +0000587 items = iter(items)
Guido van Rossum805365e2007-05-07 22:24:25 +0000588 r = range(self._BATCHSIZE)
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000589 while items is not None:
590 tmp = []
591 for i in r:
592 try:
Georg Brandla18af4e2007-04-21 15:47:16 +0000593 x = next(items)
Guido van Rossum5aac4e62003-02-06 22:57:00 +0000594 tmp.append(x)
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000595 except StopIteration:
596 items = None
597 break
598 n = len(tmp)
599 if n > 1:
600 write(MARK)
601 for x in tmp:
602 save(x)
603 write(APPENDS)
604 elif n:
605 save(tmp[0])
606 write(APPEND)
607 # else tmp is empty, and we're done
608
Guido van Rossum3a41c612003-01-28 15:10:22 +0000609 def save_dict(self, obj):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000610 write = self.write
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000611
Tim Petersc32d8242001-04-10 02:48:53 +0000612 if self.bin:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000613 write(EMPTY_DICT)
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000614 else: # proto 0 -- can't use EMPTY_DICT
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000615 write(MARK + DICT)
616
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000617 self.memoize(obj)
Alexandre Vassalottic7db1d62008-05-14 21:57:18 +0000618 self._batch_setitems(obj.items())
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000619
Guido van Rossum13257902007-06-07 23:15:56 +0000620 dispatch[dict] = save_dict
621 if PyStringMap is not None:
Jeremy Hylton2b9d0291998-05-27 22:38:22 +0000622 dispatch[PyStringMap] = save_dict
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000623
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000624 def _batch_setitems(self, items):
625 # Helper to batch up SETITEMS sequences; proto >= 1 only
626 save = self.save
627 write = self.write
628
629 if not self.bin:
630 for k, v in items:
631 save(k)
632 save(v)
633 write(SETITEM)
634 return
635
Alexandre Vassalottic7db1d62008-05-14 21:57:18 +0000636 items = iter(items)
Guido van Rossum805365e2007-05-07 22:24:25 +0000637 r = range(self._BATCHSIZE)
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000638 while items is not None:
639 tmp = []
640 for i in r:
641 try:
Georg Brandla18af4e2007-04-21 15:47:16 +0000642 tmp.append(next(items))
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000643 except StopIteration:
644 items = None
645 break
646 n = len(tmp)
647 if n > 1:
648 write(MARK)
649 for k, v in tmp:
650 save(k)
651 save(v)
652 write(SETITEMS)
653 elif n:
654 k, v = tmp[0]
655 save(k)
656 save(v)
657 write(SETITEM)
658 # else tmp is empty, and we're done
659
Guido van Rossum255f3ee2003-01-29 06:14:11 +0000660 def save_global(self, obj, name=None, pack=struct.pack):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000661 write = self.write
662 memo = self.memo
663
Tim Petersc32d8242001-04-10 02:48:53 +0000664 if name is None:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000665 name = obj.__name__
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000666
Jeremy Hylton4f0dcc92003-01-31 18:33:18 +0000667 module = getattr(obj, "__module__", None)
668 if module is None:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000669 module = whichmodule(obj, name)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000670
Guido van Rossumb0a98e92001-08-17 18:49:52 +0000671 try:
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000672 __import__(module, level=0)
Guido van Rossumb0a98e92001-08-17 18:49:52 +0000673 mod = sys.modules[module]
674 klass = getattr(mod, name)
675 except (ImportError, KeyError, AttributeError):
676 raise PicklingError(
677 "Can't pickle %r: it's not found as %s.%s" %
Guido van Rossum3a41c612003-01-28 15:10:22 +0000678 (obj, module, name))
Guido van Rossumb0a98e92001-08-17 18:49:52 +0000679 else:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000680 if klass is not obj:
Guido van Rossumb0a98e92001-08-17 18:49:52 +0000681 raise PicklingError(
682 "Can't pickle %r: it's not the same object as %s.%s" %
Guido van Rossum3a41c612003-01-28 15:10:22 +0000683 (obj, module, name))
Guido van Rossumb0a98e92001-08-17 18:49:52 +0000684
Guido van Rossum255f3ee2003-01-29 06:14:11 +0000685 if self.proto >= 2:
Guido van Rossumd4b920c2003-02-04 01:54:49 +0000686 code = _extension_registry.get((module, name))
Guido van Rossum255f3ee2003-01-29 06:14:11 +0000687 if code:
688 assert code > 0
689 if code <= 0xff:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000690 write(EXT1 + bytes([code]))
Guido van Rossum255f3ee2003-01-29 06:14:11 +0000691 elif code <= 0xffff:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000692 write(EXT2 + bytes([code&0xff, code>>8]))
Guido van Rossum255f3ee2003-01-29 06:14:11 +0000693 else:
694 write(EXT4 + pack("<i", code))
695 return
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000696 # Non-ASCII identifiers are supported only with protocols >= 3.
697 if self.proto >= 3:
698 write(GLOBAL + bytes(module, "utf-8") + b'\n' +
699 bytes(name, "utf-8") + b'\n')
700 else:
701 try:
702 write(GLOBAL + bytes(module, "ascii") + b'\n' +
703 bytes(name, "ascii") + b'\n')
704 except UnicodeEncodeError:
705 raise PicklingError(
706 "can't pickle global identifier '%s.%s' using "
707 "pickle protocol %i" % (module, name, self.proto))
Guido van Rossum255f3ee2003-01-29 06:14:11 +0000708
Guido van Rossum3a41c612003-01-28 15:10:22 +0000709 self.memoize(obj)
Tim Peters3b769832003-01-28 03:51:36 +0000710
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000711 dispatch[FunctionType] = save_global
712 dispatch[BuiltinFunctionType] = save_global
Guido van Rossum13257902007-06-07 23:15:56 +0000713 dispatch[type] = save_global
Guido van Rossum0c891ce1995-03-14 15:09:05 +0000714
Guido van Rossum1be31752003-01-28 15:19:53 +0000715# Pickling helpers
Guido van Rossuma48061a1995-01-10 00:31:14 +0000716
Guido van Rossum5ed5c4c1997-09-03 00:23:54 +0000717def _keep_alive(x, memo):
718 """Keeps a reference to the object x in the memo.
719
720 Because we remember objects by their id, we have
721 to assure that possibly temporary objects are kept
722 alive by referencing them.
723 We store a reference at the id of the memo, which should
724 normally not be used unless someone tries to deepcopy
725 the memo itself...
726 """
727 try:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000728 memo[id(memo)].append(x)
Guido van Rossum5ed5c4c1997-09-03 00:23:54 +0000729 except KeyError:
Guido van Rossum45e2fbc1998-03-26 21:13:24 +0000730 # aha, this is the first one :-)
731 memo[id(memo)]=[x]
Guido van Rossum5ed5c4c1997-09-03 00:23:54 +0000732
733
Tim Petersc0c12b52003-01-29 00:56:17 +0000734# A cache for whichmodule(), mapping a function object to the name of
735# the module in which the function was found.
736
Jeremy Hyltonf0cfdf72002-09-19 23:00:12 +0000737classmap = {} # called classmap for backwards compatibility
Guido van Rossuma48061a1995-01-10 00:31:14 +0000738
Jeremy Hyltonf0cfdf72002-09-19 23:00:12 +0000739def whichmodule(func, funcname):
740 """Figure out the module in which a function occurs.
Guido van Rossuma48061a1995-01-10 00:31:14 +0000741
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000742 Search sys.modules for the module.
743 Cache in classmap.
744 Return a module name.
Tim Petersc0c12b52003-01-29 00:56:17 +0000745 If the function cannot be found, return "__main__".
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000746 """
Jeremy Hylton4f0dcc92003-01-31 18:33:18 +0000747 # Python functions should always get an __module__ from their globals.
748 mod = getattr(func, "__module__", None)
749 if mod is not None:
750 return mod
Jeremy Hyltonf0cfdf72002-09-19 23:00:12 +0000751 if func in classmap:
752 return classmap[func]
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000753
Guido van Rossum634e53f2007-02-26 07:07:02 +0000754 for name, module in list(sys.modules.items()):
Jeremy Hyltonf0cfdf72002-09-19 23:00:12 +0000755 if module is None:
Jeremy Hylton065a5ab2002-09-19 22:57:26 +0000756 continue # skip dummy package entries
Jeremy Hyltoncc1fccb2003-02-06 16:23:01 +0000757 if name != '__main__' and getattr(module, funcname, None) is func:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000758 break
759 else:
760 name = '__main__'
Jeremy Hyltonf0cfdf72002-09-19 23:00:12 +0000761 classmap[func] = name
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000762 return name
Guido van Rossuma48061a1995-01-10 00:31:14 +0000763
764
Guido van Rossum1be31752003-01-28 15:19:53 +0000765# Unpickling machinery
766
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000767class _Unpickler:
Guido van Rossuma48061a1995-01-10 00:31:14 +0000768
Guido van Rossumf4169812008-03-17 22:56:06 +0000769 def __init__(self, file, *, encoding="ASCII", errors="strict"):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000770 """This takes a binary file for reading a pickle data stream.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000771
Tim Peters5bd2a792003-02-01 16:45:06 +0000772 The protocol version of the pickle is detected automatically, so no
773 proto argument is needed.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000774
Guido van Rossumfeea0782007-10-10 18:00:50 +0000775 The file-like object must have two methods, a read() method
776 that takes an integer argument, and a readline() method that
777 requires no arguments. Both methods should return bytes.
778 Thus file-like object can be a binary file object opened for
779 reading, a BytesIO object, or any other custom object that
780 meets this interface.
Guido van Rossumf4169812008-03-17 22:56:06 +0000781
782 Optional keyword arguments are encoding and errors, which are
783 used to decode 8-bit string instances pickled by Python 2.x.
784 These default to 'ASCII' and 'strict', respectively.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000785 """
Guido van Rossumfeea0782007-10-10 18:00:50 +0000786 self.readline = file.readline
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000787 self.read = file.read
788 self.memo = {}
Guido van Rossumf4169812008-03-17 22:56:06 +0000789 self.encoding = encoding
790 self.errors = errors
Guido van Rossuma48061a1995-01-10 00:31:14 +0000791
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000792 def load(self):
Guido van Rossum3a41c612003-01-28 15:10:22 +0000793 """Read a pickled object representation from the open file.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000794
Guido van Rossum3a41c612003-01-28 15:10:22 +0000795 Return the reconstituted object hierarchy specified in the file.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000796 """
Alexandre Vassalotti3cfcab92008-12-27 09:30:39 +0000797 # Check whether Unpickler was initialized correctly. This is
798 # only needed to mimic the behavior of _pickle.Unpickler.dump().
799 if not hasattr(self, "read"):
800 raise UnpicklingError("Unpickler.__init__() was not called by "
801 "%s.__init__()" % (self.__class__.__name__,))
Jeremy Hylton20747fa2001-11-09 16:15:04 +0000802 self.mark = object() # any new unique object
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000803 self.stack = []
804 self.append = self.stack.append
805 read = self.read
806 dispatch = self.dispatch
807 try:
808 while 1:
809 key = read(1)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000810 if not key:
811 raise EOFError
Guido van Rossum98297ee2007-11-06 21:34:58 +0000812 assert isinstance(key, bytes_types)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000813 dispatch[key[0]](self)
Guido van Rossumb940e112007-01-10 16:19:56 +0000814 except _Stop as stopinst:
Guido van Rossumff871742000-12-13 18:11:56 +0000815 return stopinst.value
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000816
Tim Petersc23d18a2003-01-28 01:41:51 +0000817 # Return largest index k such that self.stack[k] is self.mark.
818 # If the stack doesn't contain a mark, eventually raises IndexError.
819 # This could be sped by maintaining another stack, of indices at which
820 # the mark appears. For that matter, the latter stack would suffice,
821 # and we wouldn't need to push mark objects on self.stack at all.
822 # Doing so is probably a good thing, though, since if the pickle is
823 # corrupt (or hostile) we may get a clue from finding self.mark embedded
824 # in unpickled objects.
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000825 def marker(self):
826 stack = self.stack
827 mark = self.mark
828 k = len(stack)-1
829 while stack[k] is not mark: k = k-1
830 return k
831
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000832 def persistent_load(self, pid):
833 raise UnpickingError("unsupported persistent id encountered")
834
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000835 dispatch = {}
836
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000837 def load_proto(self):
838 proto = ord(self.read(1))
Guido van Rossumf4169812008-03-17 22:56:06 +0000839 if not 0 <= proto <= HIGHEST_PROTOCOL:
Guido van Rossum26d95c32007-08-27 23:18:54 +0000840 raise ValueError("unsupported pickle protocol: %d" % proto)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000841 dispatch[PROTO[0]] = load_proto
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000842
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000843 def load_persid(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000844 pid = self.readline()[:-1].decode("ascii")
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000845 self.append(self.persistent_load(pid))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000846 dispatch[PERSID[0]] = load_persid
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000847
848 def load_binpersid(self):
Raymond Hettinger46ac8eb2002-06-30 03:39:14 +0000849 pid = self.stack.pop()
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000850 self.append(self.persistent_load(pid))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000851 dispatch[BINPERSID[0]] = load_binpersid
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000852
853 def load_none(self):
854 self.append(None)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000855 dispatch[NONE[0]] = load_none
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000856
Guido van Rossum7d97d312003-01-28 04:25:27 +0000857 def load_false(self):
858 self.append(False)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000859 dispatch[NEWFALSE[0]] = load_false
Guido van Rossum7d97d312003-01-28 04:25:27 +0000860
861 def load_true(self):
862 self.append(True)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000863 dispatch[NEWTRUE[0]] = load_true
Guido van Rossum7d97d312003-01-28 04:25:27 +0000864
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000865 def load_int(self):
Tim Peters19ef62d2001-08-28 22:21:18 +0000866 data = self.readline()
Guido van Rossume2763392002-04-05 19:30:08 +0000867 if data == FALSE[1:]:
868 val = False
869 elif data == TRUE[1:]:
870 val = True
871 else:
872 try:
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000873 val = int(data, 0)
Guido van Rossume2763392002-04-05 19:30:08 +0000874 except ValueError:
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000875 val = int(data, 0)
Guido van Rossume2763392002-04-05 19:30:08 +0000876 self.append(val)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000877 dispatch[INT[0]] = load_int
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000878
879 def load_binint(self):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000880 self.append(mloads(b'i' + self.read(4)))
881 dispatch[BININT[0]] = load_binint
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000882
883 def load_binint1(self):
Tim Petersbbf63cd2003-01-27 21:15:36 +0000884 self.append(ord(self.read(1)))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000885 dispatch[BININT1[0]] = load_binint1
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000886
887 def load_binint2(self):
Guido van Rossumcfe5f202007-05-08 21:26:54 +0000888 self.append(mloads(b'i' + self.read(2) + b'\000\000'))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000889 dispatch[BININT2[0]] = load_binint2
Tim Peters2344fae2001-01-15 00:50:52 +0000890
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000891 def load_long(self):
Guido van Rossumfeea0782007-10-10 18:00:50 +0000892 val = self.readline()[:-1].decode("ascii")
893 self.append(int(val, 0))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000894 dispatch[LONG[0]] = load_long
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000895
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000896 def load_long1(self):
897 n = ord(self.read(1))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000898 data = self.read(n)
899 self.append(decode_long(data))
900 dispatch[LONG1[0]] = load_long1
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000901
902 def load_long4(self):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000903 n = mloads(b'i' + self.read(4))
904 data = self.read(n)
905 self.append(decode_long(data))
906 dispatch[LONG4[0]] = load_long4
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000907
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000908 def load_float(self):
Guido van Rossumff871742000-12-13 18:11:56 +0000909 self.append(float(self.readline()[:-1]))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000910 dispatch[FLOAT[0]] = load_float
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000911
Guido van Rossumd3703791998-10-22 20:15:36 +0000912 def load_binfloat(self, unpack=struct.unpack):
913 self.append(unpack('>d', self.read(8))[0])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000914 dispatch[BINFLOAT[0]] = load_binfloat
Guido van Rossumd3703791998-10-22 20:15:36 +0000915
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000916 def load_string(self):
Guido van Rossum98297ee2007-11-06 21:34:58 +0000917 orig = self.readline()
918 rep = orig[:-1]
Guido van Rossum26d95c32007-08-27 23:18:54 +0000919 for q in (b'"', b"'"): # double or single quote
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000920 if rep.startswith(q):
921 if not rep.endswith(q):
Guido van Rossum26d95c32007-08-27 23:18:54 +0000922 raise ValueError("insecure string pickle")
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000923 rep = rep[len(q):-len(q)]
924 break
925 else:
Guido van Rossum98297ee2007-11-06 21:34:58 +0000926 raise ValueError("insecure string pickle: %r" % orig)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000927 self.append(codecs.escape_decode(rep)[0]
928 .decode(self.encoding, self.errors))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000929 dispatch[STRING[0]] = load_string
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000930
931 def load_binstring(self):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000932 len = mloads(b'i' + self.read(4))
Guido van Rossumf4169812008-03-17 22:56:06 +0000933 data = self.read(len)
934 value = str(data, self.encoding, self.errors)
935 self.append(value)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000936 dispatch[BINSTRING[0]] = load_binstring
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000937
Guido van Rossumf4169812008-03-17 22:56:06 +0000938 def load_binbytes(self):
939 len = mloads(b'i' + self.read(4))
940 self.append(self.read(len))
941 dispatch[BINBYTES[0]] = load_binbytes
942
Guido van Rossumb5f2f1b2000-03-10 23:20:09 +0000943 def load_unicode(self):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000944 self.append(str(self.readline()[:-1], 'raw-unicode-escape'))
945 dispatch[UNICODE[0]] = load_unicode
Guido van Rossumb5f2f1b2000-03-10 23:20:09 +0000946
947 def load_binunicode(self):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000948 len = mloads(b'i' + self.read(4))
949 self.append(str(self.read(len), 'utf-8'))
950 dispatch[BINUNICODE[0]] = load_binunicode
Guido van Rossumb5f2f1b2000-03-10 23:20:09 +0000951
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000952 def load_short_binstring(self):
Tim Petersbbf63cd2003-01-27 21:15:36 +0000953 len = ord(self.read(1))
Guido van Rossumf4169812008-03-17 22:56:06 +0000954 data = bytes(self.read(len))
955 value = str(data, self.encoding, self.errors)
956 self.append(value)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000957 dispatch[SHORT_BINSTRING[0]] = load_short_binstring
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000958
Guido van Rossumf4169812008-03-17 22:56:06 +0000959 def load_short_binbytes(self):
960 len = ord(self.read(1))
961 self.append(bytes(self.read(len)))
962 dispatch[SHORT_BINBYTES[0]] = load_short_binbytes
963
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000964 def load_tuple(self):
965 k = self.marker()
966 self.stack[k:] = [tuple(self.stack[k+1:])]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000967 dispatch[TUPLE[0]] = load_tuple
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000968
969 def load_empty_tuple(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000970 self.append(())
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000971 dispatch[EMPTY_TUPLE[0]] = load_empty_tuple
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000972
Guido van Rossum44f0ea52003-01-28 04:14:51 +0000973 def load_tuple1(self):
974 self.stack[-1] = (self.stack[-1],)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000975 dispatch[TUPLE1[0]] = load_tuple1
Guido van Rossum44f0ea52003-01-28 04:14:51 +0000976
977 def load_tuple2(self):
978 self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000979 dispatch[TUPLE2[0]] = load_tuple2
Guido van Rossum44f0ea52003-01-28 04:14:51 +0000980
981 def load_tuple3(self):
982 self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000983 dispatch[TUPLE3[0]] = load_tuple3
Guido van Rossum44f0ea52003-01-28 04:14:51 +0000984
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000985 def load_empty_list(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000986 self.append([])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000987 dispatch[EMPTY_LIST[0]] = load_empty_list
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000988
989 def load_empty_dictionary(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000990 self.append({})
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000991 dispatch[EMPTY_DICT[0]] = load_empty_dictionary
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000992
993 def load_list(self):
994 k = self.marker()
995 self.stack[k:] = [self.stack[k+1:]]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000996 dispatch[LIST[0]] = load_list
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000997
998 def load_dict(self):
999 k = self.marker()
1000 d = {}
1001 items = self.stack[k+1:]
1002 for i in range(0, len(items), 2):
1003 key = items[i]
1004 value = items[i+1]
1005 d[key] = value
1006 self.stack[k:] = [d]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001007 dispatch[DICT[0]] = load_dict
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001008
Tim Petersd01c1e92003-01-30 15:41:46 +00001009 # INST and OBJ differ only in how they get a class object. It's not
1010 # only sensible to do the rest in a common routine, the two routines
1011 # previously diverged and grew different bugs.
1012 # klass is the class to instantiate, and k points to the topmost mark
1013 # object, following which are the arguments for klass.__init__.
1014 def _instantiate(self, klass, k):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001015 args = tuple(self.stack[k+1:])
1016 del self.stack[k:]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001017 instantiated = False
Tim Petersd01c1e92003-01-30 15:41:46 +00001018 if (not args and
Guido van Rossum13257902007-06-07 23:15:56 +00001019 isinstance(klass, type) and
Tim Petersd01c1e92003-01-30 15:41:46 +00001020 not hasattr(klass, "__getinitargs__")):
Guido van Rossuma8add0e2007-05-14 22:03:55 +00001021 value = _EmptyClass()
1022 value.__class__ = klass
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001023 instantiated = True
Guido van Rossum45e2fbc1998-03-26 21:13:24 +00001024 if not instantiated:
Guido van Rossum743d17e1998-09-15 20:25:57 +00001025 try:
Guido van Rossumb26a97a2003-01-28 22:29:13 +00001026 value = klass(*args)
Guido van Rossumb940e112007-01-10 16:19:56 +00001027 except TypeError as err:
Guido van Rossum26d95c32007-08-27 23:18:54 +00001028 raise TypeError("in constructor for %s: %s" %
1029 (klass.__name__, str(err)), sys.exc_info()[2])
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001030 self.append(value)
Tim Petersd01c1e92003-01-30 15:41:46 +00001031
1032 def load_inst(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001033 module = self.readline()[:-1].decode("ascii")
1034 name = self.readline()[:-1].decode("ascii")
Tim Petersd01c1e92003-01-30 15:41:46 +00001035 klass = self.find_class(module, name)
1036 self._instantiate(klass, self.marker())
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001037 dispatch[INST[0]] = load_inst
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001038
1039 def load_obj(self):
Tim Petersd01c1e92003-01-30 15:41:46 +00001040 # Stack is ... markobject classobject arg1 arg2 ...
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001041 k = self.marker()
Tim Petersd01c1e92003-01-30 15:41:46 +00001042 klass = self.stack.pop(k+1)
1043 self._instantiate(klass, k)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001044 dispatch[OBJ[0]] = load_obj
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001045
Guido van Rossum3a41c612003-01-28 15:10:22 +00001046 def load_newobj(self):
1047 args = self.stack.pop()
1048 cls = self.stack[-1]
1049 obj = cls.__new__(cls, *args)
Guido van Rossum533dbcf2003-01-28 17:55:05 +00001050 self.stack[-1] = obj
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001051 dispatch[NEWOBJ[0]] = load_newobj
Guido van Rossum3a41c612003-01-28 15:10:22 +00001052
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001053 def load_global(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001054 module = self.readline()[:-1].decode("utf-8")
1055 name = self.readline()[:-1].decode("utf-8")
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001056 klass = self.find_class(module, name)
1057 self.append(klass)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001058 dispatch[GLOBAL[0]] = load_global
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001059
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001060 def load_ext1(self):
1061 code = ord(self.read(1))
1062 self.get_extension(code)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001063 dispatch[EXT1[0]] = load_ext1
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001064
1065 def load_ext2(self):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001066 code = mloads(b'i' + self.read(2) + b'\000\000')
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001067 self.get_extension(code)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001068 dispatch[EXT2[0]] = load_ext2
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001069
1070 def load_ext4(self):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001071 code = mloads(b'i' + self.read(4))
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001072 self.get_extension(code)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001073 dispatch[EXT4[0]] = load_ext4
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001074
1075 def get_extension(self, code):
1076 nil = []
Guido van Rossumd4b920c2003-02-04 01:54:49 +00001077 obj = _extension_cache.get(code, nil)
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001078 if obj is not nil:
1079 self.append(obj)
1080 return
Guido van Rossumd4b920c2003-02-04 01:54:49 +00001081 key = _inverted_registry.get(code)
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001082 if not key:
1083 raise ValueError("unregistered extension code %d" % code)
1084 obj = self.find_class(*key)
Guido van Rossumd4b920c2003-02-04 01:54:49 +00001085 _extension_cache[code] = obj
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001086 self.append(obj)
1087
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001088 def find_class(self, module, name):
Guido van Rossumb26a97a2003-01-28 22:29:13 +00001089 # Subclasses may override this
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001090 __import__(module, level=0)
Barry Warsawbf4d9592001-11-15 23:42:58 +00001091 mod = sys.modules[module]
1092 klass = getattr(mod, name)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001093 return klass
1094
1095 def load_reduce(self):
1096 stack = self.stack
Guido van Rossumb26a97a2003-01-28 22:29:13 +00001097 args = stack.pop()
1098 func = stack[-1]
Guido van Rossum99603b02007-07-20 00:22:32 +00001099 try:
1100 value = func(*args)
1101 except:
1102 print(sys.exc_info())
1103 print(func, args)
1104 raise
Guido van Rossumb26a97a2003-01-28 22:29:13 +00001105 stack[-1] = value
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001106 dispatch[REDUCE[0]] = load_reduce
Guido van Rossuma48061a1995-01-10 00:31:14 +00001107
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001108 def load_pop(self):
1109 del self.stack[-1]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001110 dispatch[POP[0]] = load_pop
Guido van Rossum7b5430f1995-03-04 22:25:21 +00001111
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001112 def load_pop_mark(self):
1113 k = self.marker()
Guido van Rossum45e2fbc1998-03-26 21:13:24 +00001114 del self.stack[k:]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001115 dispatch[POP_MARK[0]] = load_pop_mark
Guido van Rossuma48061a1995-01-10 00:31:14 +00001116
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001117 def load_dup(self):
Guido van Rossumb1062fc1998-03-31 17:00:46 +00001118 self.append(self.stack[-1])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001119 dispatch[DUP[0]] = load_dup
Guido van Rossuma48061a1995-01-10 00:31:14 +00001120
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001121 def load_get(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001122 i = int(self.readline()[:-1])
1123 self.append(self.memo[i])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001124 dispatch[GET[0]] = load_get
Guido van Rossum78536471996-04-12 13:36:27 +00001125
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001126 def load_binget(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001127 i = self.read(1)[0]
1128 self.append(self.memo[i])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001129 dispatch[BINGET[0]] = load_binget
Guido van Rossum78536471996-04-12 13:36:27 +00001130
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001131 def load_long_binget(self):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001132 i = mloads(b'i' + self.read(4))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001133 self.append(self.memo[i])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001134 dispatch[LONG_BINGET[0]] = load_long_binget
Guido van Rossum78536471996-04-12 13:36:27 +00001135
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001136 def load_put(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001137 i = int(self.readline()[:-1])
1138 self.memo[i] = self.stack[-1]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001139 dispatch[PUT[0]] = load_put
Guido van Rossuma48061a1995-01-10 00:31:14 +00001140
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001141 def load_binput(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001142 i = self.read(1)[0]
1143 self.memo[i] = self.stack[-1]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001144 dispatch[BINPUT[0]] = load_binput
Guido van Rossuma48061a1995-01-10 00:31:14 +00001145
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001146 def load_long_binput(self):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001147 i = mloads(b'i' + self.read(4))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001148 self.memo[i] = self.stack[-1]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001149 dispatch[LONG_BINPUT[0]] = load_long_binput
Guido van Rossuma48061a1995-01-10 00:31:14 +00001150
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001151 def load_append(self):
1152 stack = self.stack
Raymond Hettinger46ac8eb2002-06-30 03:39:14 +00001153 value = stack.pop()
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001154 list = stack[-1]
1155 list.append(value)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001156 dispatch[APPEND[0]] = load_append
Guido van Rossuma48061a1995-01-10 00:31:14 +00001157
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001158 def load_appends(self):
1159 stack = self.stack
1160 mark = self.marker()
1161 list = stack[mark - 1]
Tim Peters209ad952003-01-28 01:44:45 +00001162 list.extend(stack[mark + 1:])
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001163 del stack[mark:]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001164 dispatch[APPENDS[0]] = load_appends
Tim Peters2344fae2001-01-15 00:50:52 +00001165
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001166 def load_setitem(self):
1167 stack = self.stack
Raymond Hettinger46ac8eb2002-06-30 03:39:14 +00001168 value = stack.pop()
1169 key = stack.pop()
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001170 dict = stack[-1]
1171 dict[key] = value
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001172 dispatch[SETITEM[0]] = load_setitem
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001173
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001174 def load_setitems(self):
1175 stack = self.stack
1176 mark = self.marker()
1177 dict = stack[mark - 1]
Guido van Rossum45e2fbc1998-03-26 21:13:24 +00001178 for i in range(mark + 1, len(stack), 2):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001179 dict[stack[i]] = stack[i + 1]
Guido van Rossuma48061a1995-01-10 00:31:14 +00001180
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001181 del stack[mark:]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001182 dispatch[SETITEMS[0]] = load_setitems
Guido van Rossuma48061a1995-01-10 00:31:14 +00001183
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001184 def load_build(self):
1185 stack = self.stack
Guido van Rossumac5b5d22003-01-28 22:01:16 +00001186 state = stack.pop()
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001187 inst = stack[-1]
Guido van Rossumac5b5d22003-01-28 22:01:16 +00001188 setstate = getattr(inst, "__setstate__", None)
1189 if setstate:
1190 setstate(state)
1191 return
1192 slotstate = None
1193 if isinstance(state, tuple) and len(state) == 2:
1194 state, slotstate = state
1195 if state:
Guido van Rossuma8add0e2007-05-14 22:03:55 +00001196 inst.__dict__.update(state)
Guido van Rossumac5b5d22003-01-28 22:01:16 +00001197 if slotstate:
1198 for k, v in slotstate.items():
1199 setattr(inst, k, v)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001200 dispatch[BUILD[0]] = load_build
Guido van Rossuma48061a1995-01-10 00:31:14 +00001201
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001202 def load_mark(self):
1203 self.append(self.mark)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001204 dispatch[MARK[0]] = load_mark
Guido van Rossuma48061a1995-01-10 00:31:14 +00001205
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001206 def load_stop(self):
Raymond Hettinger46ac8eb2002-06-30 03:39:14 +00001207 value = self.stack.pop()
Guido van Rossumff871742000-12-13 18:11:56 +00001208 raise _Stop(value)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001209 dispatch[STOP[0]] = load_stop
Guido van Rossuma48061a1995-01-10 00:31:14 +00001210
Guido van Rossume467be61997-12-05 19:42:42 +00001211# Helper class for load_inst/load_obj
1212
1213class _EmptyClass:
1214 pass
Guido van Rossuma48061a1995-01-10 00:31:14 +00001215
Tim Peters91149822003-01-31 03:43:58 +00001216# Encode/decode longs in linear time.
1217
1218import binascii as _binascii
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001219
1220def encode_long(x):
Tim Peters91149822003-01-31 03:43:58 +00001221 r"""Encode a long to a two's complement little-endian binary string.
Guido van Rossume2a383d2007-01-15 16:59:06 +00001222 Note that 0 is a special case, returning an empty string, to save a
Tim Peters4b23f2b2003-01-31 16:43:39 +00001223 byte in the LONG1 pickling context.
1224
Guido van Rossume2a383d2007-01-15 16:59:06 +00001225 >>> encode_long(0)
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001226 b''
Guido van Rossume2a383d2007-01-15 16:59:06 +00001227 >>> encode_long(255)
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001228 b'\xff\x00'
Guido van Rossume2a383d2007-01-15 16:59:06 +00001229 >>> encode_long(32767)
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001230 b'\xff\x7f'
Guido van Rossume2a383d2007-01-15 16:59:06 +00001231 >>> encode_long(-256)
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001232 b'\x00\xff'
Guido van Rossume2a383d2007-01-15 16:59:06 +00001233 >>> encode_long(-32768)
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001234 b'\x00\x80'
Guido van Rossume2a383d2007-01-15 16:59:06 +00001235 >>> encode_long(-128)
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001236 b'\x80'
Guido van Rossume2a383d2007-01-15 16:59:06 +00001237 >>> encode_long(127)
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001238 b'\x7f'
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001239 >>>
1240 """
Tim Peters91149822003-01-31 03:43:58 +00001241
1242 if x == 0:
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001243 return b''
Tim Peters91149822003-01-31 03:43:58 +00001244 if x > 0:
1245 ashex = hex(x)
1246 assert ashex.startswith("0x")
1247 njunkchars = 2 + ashex.endswith('L')
1248 nibbles = len(ashex) - njunkchars
1249 if nibbles & 1:
1250 # need an even # of nibbles for unhexlify
1251 ashex = "0x0" + ashex[2:]
Tim Peters4b23f2b2003-01-31 16:43:39 +00001252 elif int(ashex[2], 16) >= 8:
Tim Peters91149822003-01-31 03:43:58 +00001253 # "looks negative", so need a byte of sign bits
1254 ashex = "0x00" + ashex[2:]
1255 else:
1256 # Build the 256's-complement: (1L << nbytes) + x. The trick is
1257 # to find the number of bytes in linear time (although that should
1258 # really be a constant-time task).
1259 ashex = hex(-x)
1260 assert ashex.startswith("0x")
1261 njunkchars = 2 + ashex.endswith('L')
1262 nibbles = len(ashex) - njunkchars
1263 if nibbles & 1:
Tim Petersee1a53c2003-02-02 02:57:53 +00001264 # Extend to a full byte.
Tim Peters91149822003-01-31 03:43:58 +00001265 nibbles += 1
Tim Peters4b23f2b2003-01-31 16:43:39 +00001266 nbits = nibbles * 4
Guido van Rossume2a383d2007-01-15 16:59:06 +00001267 x += 1 << nbits
Tim Peters91149822003-01-31 03:43:58 +00001268 assert x > 0
1269 ashex = hex(x)
Tim Petersee1a53c2003-02-02 02:57:53 +00001270 njunkchars = 2 + ashex.endswith('L')
1271 newnibbles = len(ashex) - njunkchars
1272 if newnibbles < nibbles:
1273 ashex = "0x" + "0" * (nibbles - newnibbles) + ashex[2:]
1274 if int(ashex[2], 16) < 8:
Tim Peters91149822003-01-31 03:43:58 +00001275 # "looks positive", so need a byte of sign bits
Tim Petersee1a53c2003-02-02 02:57:53 +00001276 ashex = "0xff" + ashex[2:]
Tim Peters91149822003-01-31 03:43:58 +00001277
1278 if ashex.endswith('L'):
1279 ashex = ashex[2:-1]
1280 else:
1281 ashex = ashex[2:]
Tim Petersee1a53c2003-02-02 02:57:53 +00001282 assert len(ashex) & 1 == 0, (x, ashex)
Tim Peters91149822003-01-31 03:43:58 +00001283 binary = _binascii.unhexlify(ashex)
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001284 return bytes(binary[::-1])
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001285
1286def decode_long(data):
1287 r"""Decode a long from a two's complement little-endian binary string.
Tim Peters4b23f2b2003-01-31 16:43:39 +00001288
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001289 >>> decode_long(b'')
Guido van Rossume2b70bc2006-08-18 22:13:04 +00001290 0
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001291 >>> decode_long(b"\xff\x00")
Guido van Rossume2b70bc2006-08-18 22:13:04 +00001292 255
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001293 >>> decode_long(b"\xff\x7f")
Guido van Rossume2b70bc2006-08-18 22:13:04 +00001294 32767
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001295 >>> decode_long(b"\x00\xff")
Guido van Rossume2b70bc2006-08-18 22:13:04 +00001296 -256
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001297 >>> decode_long(b"\x00\x80")
Guido van Rossume2b70bc2006-08-18 22:13:04 +00001298 -32768
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001299 >>> decode_long(b"\x80")
Guido van Rossume2b70bc2006-08-18 22:13:04 +00001300 -128
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001301 >>> decode_long(b"\x7f")
Guido van Rossume2b70bc2006-08-18 22:13:04 +00001302 127
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001303 """
Tim Peters91149822003-01-31 03:43:58 +00001304
Tim Peters4b23f2b2003-01-31 16:43:39 +00001305 nbytes = len(data)
1306 if nbytes == 0:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001307 return 0
Tim Peters91149822003-01-31 03:43:58 +00001308 ashex = _binascii.hexlify(data[::-1])
Guido van Rossume2a383d2007-01-15 16:59:06 +00001309 n = int(ashex, 16) # quadratic time before Python 2.3; linear now
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001310 if data[-1] >= 0x80:
Guido van Rossume2a383d2007-01-15 16:59:06 +00001311 n -= 1 << (nbytes * 8)
Tim Peters91149822003-01-31 03:43:58 +00001312 return n
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001313
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001314# Use the faster _pickle if possible
1315try:
1316 from _pickle import *
1317except ImportError:
1318 Pickler, Unpickler = _Pickler, _Unpickler
1319
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001320# Shorthands
1321
Raymond Hettinger3489cad2004-12-05 05:20:42 +00001322def dump(obj, file, protocol=None):
1323 Pickler(file, protocol).dump(obj)
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001324
Raymond Hettinger3489cad2004-12-05 05:20:42 +00001325def dumps(obj, protocol=None):
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001326 f = io.BytesIO()
1327 Pickler(f, protocol).dump(obj)
1328 res = f.getvalue()
Guido van Rossum98297ee2007-11-06 21:34:58 +00001329 assert isinstance(res, bytes_types)
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001330 return res
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001331
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001332def load(file, *, encoding="ASCII", errors="strict"):
1333 return Unpickler(file, encoding=encoding, errors=errors).load()
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001334
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001335def loads(s, *, encoding="ASCII", errors="strict"):
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001336 if isinstance(s, str):
1337 raise TypeError("Can't load pickle from unicode string")
1338 file = io.BytesIO(s)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001339 return Unpickler(file, encoding=encoding, errors=errors).load()
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001340
1341# Doctest
1342
1343def _test():
1344 import doctest
1345 return doctest.testmod()
1346
1347if __name__ == "__main__":
1348 _test()