blob: cbac5f168b45eb8c87245d6f1318192b3bcb47f9 [file] [log] [blame]
Guido van Rossum54f22ed2000-02-04 15:10:34 +00001"""Create portable serialized representations of Python objects.
Guido van Rossuma48061a1995-01-10 00:31:14 +00002
Alexandre Vassalottif7fa63d2008-05-11 08:55:36 +00003See module copyreg for a mechanism for registering custom picklers.
Tim Peters22a449a2003-01-27 20:16:36 +00004See module pickletools source for extensive comments.
Guido van Rossuma48061a1995-01-10 00:31:14 +00005
Guido van Rossume467be61997-12-05 19:42:42 +00006Classes:
Guido van Rossuma48061a1995-01-10 00:31:14 +00007
Guido van Rossume467be61997-12-05 19:42:42 +00008 Pickler
9 Unpickler
Guido van Rossuma48061a1995-01-10 00:31:14 +000010
Guido van Rossume467be61997-12-05 19:42:42 +000011Functions:
Guido van Rossuma48061a1995-01-10 00:31:14 +000012
Guido van Rossume467be61997-12-05 19:42:42 +000013 dump(object, file)
14 dumps(object) -> string
15 load(file) -> object
Serhiy Storchaka531d1e52020-05-02 09:38:01 +030016 loads(bytes) -> object
Guido van Rossuma48061a1995-01-10 00:31:14 +000017
Guido van Rossume467be61997-12-05 19:42:42 +000018Misc variables:
Guido van Rossuma48061a1995-01-10 00:31:14 +000019
Fred Drakefe82acc1998-02-13 03:24:48 +000020 __version__
Guido van Rossume467be61997-12-05 19:42:42 +000021 format_version
22 compatible_formats
Guido van Rossuma48061a1995-01-10 00:31:14 +000023
Guido van Rossuma48061a1995-01-10 00:31:14 +000024"""
25
Victor Stinner7fa767e2014-03-20 09:16:38 +010026from types import FunctionType
Alexandre Vassalottif7fa63d2008-05-11 08:55:36 +000027from copyreg import dispatch_table
28from copyreg import _extension_registry, _inverted_registry, _extension_cache
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +030029from itertools import islice
Serhiy Storchaka0d554d72015-10-10 22:42:18 +030030from functools import partial
Guido van Rossumd3703791998-10-22 20:15:36 +000031import sys
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +030032from sys import maxsize
33from struct import pack, unpack
Skip Montanaro23bafc62001-02-18 03:10:09 +000034import re
Guido van Rossum2e6a4b32007-05-04 19:56:22 +000035import io
Walter Dörwald42748a82007-06-12 16:40:17 +000036import codecs
Antoine Pitroud9dfaa92009-06-04 20:32:06 +000037import _compat_pickle
Guido van Rossuma48061a1995-01-10 00:31:14 +000038
Skip Montanaro352674d2001-02-07 23:14:30 +000039__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
Victor Stinner63ab4ba2019-06-13 13:58:51 +020040 "Unpickler", "dump", "dumps", "load", "loads"]
41
42try:
43 from _pickle import PickleBuffer
44 __all__.append("PickleBuffer")
45 _HAVE_PICKLE_BUFFER = True
46except ImportError:
47 _HAVE_PICKLE_BUFFER = False
48
Skip Montanaro352674d2001-02-07 23:14:30 +000049
Guido van Rossum98297ee2007-11-06 21:34:58 +000050# Shortcut for use in isinstance testing
Alexandre Vassalotti8cb02b62008-05-03 01:42:49 +000051bytes_types = (bytes, bytearray)
Guido van Rossum98297ee2007-11-06 21:34:58 +000052
Tim Petersc0c12b52003-01-29 00:56:17 +000053# These are purely informational; no code uses these.
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +010054format_version = "4.0" # File format version we write
Guido van Rossumf29d3d62003-01-27 22:47:53 +000055compatible_formats = ["1.0", # Original protocol 0
Guido van Rossumbc64e222003-01-28 16:34:19 +000056 "1.1", # Protocol 0 with INST added
Guido van Rossumf29d3d62003-01-27 22:47:53 +000057 "1.2", # Original protocol 1
58 "1.3", # Protocol 1 with BINFLOAT added
59 "2.0", # Protocol 2
Guido van Rossumf4169812008-03-17 22:56:06 +000060 "3.0", # Protocol 3
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +010061 "4.0", # Protocol 4
Antoine Pitrou91f43802019-05-26 17:10:09 +020062 "5.0", # Protocol 5
Guido van Rossumf29d3d62003-01-27 22:47:53 +000063 ] # Old format versions we can read
Guido van Rossumb72cf2d1997-04-09 17:32:51 +000064
Guido van Rossum99603b02007-07-20 00:22:32 +000065# This is the highest protocol number we know how to read.
Antoine Pitrou91f43802019-05-26 17:10:09 +020066HIGHEST_PROTOCOL = 5
Tim Peters8587b3c2003-02-13 15:44:41 +000067
Guido van Rossum2e6a4b32007-05-04 19:56:22 +000068# The protocol we write by default. May be less than HIGHEST_PROTOCOL.
Łukasz Langac51d8c92018-04-03 23:06:53 -070069# Only bump this if the oldest still supported version of Python already
70# includes it.
71DEFAULT_PROTOCOL = 4
Guido van Rossum2e6a4b32007-05-04 19:56:22 +000072
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000073class PickleError(Exception):
Neal Norwitzefbb67b2002-05-30 12:12:04 +000074 """A common base class for the other pickling exceptions."""
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000075 pass
76
77class PicklingError(PickleError):
78 """This exception is raised when an unpicklable object is passed to the
79 dump() method.
80
81 """
82 pass
83
84class UnpicklingError(PickleError):
85 """This exception is raised when there is a problem unpickling an object,
86 such as a security violation.
87
88 Note that other exceptions may also be raised during unpickling, including
89 (but not necessarily limited to) AttributeError, EOFError, ImportError,
90 and IndexError.
91
92 """
93 pass
Guido van Rossum7849da81995-03-09 14:08:35 +000094
Tim Petersc0c12b52003-01-29 00:56:17 +000095# An instance of _Stop is raised by Unpickler.load_stop() in response to
96# the STOP opcode, passing the object that is the result of unpickling.
Guido van Rossumff871742000-12-13 18:11:56 +000097class _Stop(Exception):
98 def __init__(self, value):
99 self.value = value
100
Guido van Rossum533dbcf2003-01-28 17:55:05 +0000101# Jython has PyStringMap; it's a dict subclass with string keys
Jeremy Hylton2b9d0291998-05-27 22:38:22 +0000102try:
103 from org.python.core import PyStringMap
Brett Cannoncd171c82013-07-04 17:43:24 -0400104except ImportError:
Jeremy Hylton2b9d0291998-05-27 22:38:22 +0000105 PyStringMap = None
106
Tim Peters22a449a2003-01-27 20:16:36 +0000107# Pickle opcodes. See pickletools.py for extensive docs. The listing
108# here is in kind-of alphabetical order of 1-character pickle code.
109# pickletools groups them by purpose.
Guido van Rossumdbb718f2001-09-21 19:22:34 +0000110
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000111MARK = b'(' # push special markobject on stack
112STOP = b'.' # every pickle ends with STOP
113POP = b'0' # discard topmost stack item
114POP_MARK = b'1' # discard stack top through topmost markobject
115DUP = b'2' # duplicate top stack item
116FLOAT = b'F' # push float object; decimal string argument
117INT = b'I' # push integer or bool; decimal string argument
118BININT = b'J' # push four-byte signed int
119BININT1 = b'K' # push 1-byte unsigned int
120LONG = b'L' # push long; decimal string argument
121BININT2 = b'M' # push 2-byte unsigned int
122NONE = b'N' # push None
123PERSID = b'P' # push persistent object; id is taken from string arg
124BINPERSID = b'Q' # " " " ; " " " " stack
125REDUCE = b'R' # apply callable to argtuple, both on stack
126STRING = b'S' # push string; NL-terminated string argument
127BINSTRING = b'T' # push string; counted binary string argument
128SHORT_BINSTRING= b'U' # " " ; " " " " < 256 bytes
129UNICODE = b'V' # push Unicode string; raw-unicode-escaped'd argument
130BINUNICODE = b'X' # " " " ; counted UTF-8 string argument
131APPEND = b'a' # append stack top to list below it
132BUILD = b'b' # call __setstate__ or __dict__.update()
133GLOBAL = b'c' # push self.find_class(modname, name); 2 string args
134DICT = b'd' # build a dict from stack items
135EMPTY_DICT = b'}' # push empty dict
136APPENDS = b'e' # extend list on stack by topmost stack slice
137GET = b'g' # push item from memo on stack; index is string arg
138BINGET = b'h' # " " " " " " ; " " 1-byte arg
139INST = b'i' # build & push class instance
140LONG_BINGET = b'j' # push item from memo on stack; index is 4-byte arg
141LIST = b'l' # build list from topmost stack items
142EMPTY_LIST = b']' # push empty list
143OBJ = b'o' # build & push class instance
144PUT = b'p' # store stack top in memo; index is string arg
145BINPUT = b'q' # " " " " " ; " " 1-byte arg
146LONG_BINPUT = b'r' # " " " " " ; " " 4-byte arg
147SETITEM = b's' # add key+value pair to dict
148TUPLE = b't' # build tuple from topmost stack items
149EMPTY_TUPLE = b')' # push empty tuple
150SETITEMS = b'u' # modify dict by adding topmost key+value pairs
151BINFLOAT = b'G' # push float; arg is 8-byte float encoding
Tim Peters22a449a2003-01-27 20:16:36 +0000152
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000153TRUE = b'I01\n' # not an opcode; see INT docs in pickletools.py
154FALSE = b'I00\n' # not an opcode; see INT docs in pickletools.py
Guido van Rossum77f6a652002-04-03 22:41:51 +0000155
Guido van Rossum586c9e82003-01-29 06:16:12 +0000156# Protocol 2
Guido van Rossum5a2d8f52003-01-27 21:44:25 +0000157
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000158PROTO = b'\x80' # identify pickle protocol
159NEWOBJ = b'\x81' # build object by applying cls.__new__ to argtuple
160EXT1 = b'\x82' # push object from extension registry; 1-byte index
161EXT2 = b'\x83' # ditto, but 2-byte index
162EXT4 = b'\x84' # ditto, but 4-byte index
163TUPLE1 = b'\x85' # build 1-tuple from stack top
164TUPLE2 = b'\x86' # build 2-tuple from two topmost stack items
165TUPLE3 = b'\x87' # build 3-tuple from three topmost stack items
166NEWTRUE = b'\x88' # push True
167NEWFALSE = b'\x89' # push False
168LONG1 = b'\x8a' # push long from < 256 bytes
169LONG4 = b'\x8b' # push really big long
Guido van Rossum5a2d8f52003-01-27 21:44:25 +0000170
Guido van Rossum44f0ea52003-01-28 04:14:51 +0000171_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
172
Guido van Rossumf4169812008-03-17 22:56:06 +0000173# Protocol 3 (Python 3.x)
174
175BINBYTES = b'B' # push bytes; counted binary string argument
176SHORT_BINBYTES = b'C' # " " ; " " " " < 256 bytes
Guido van Rossuma48061a1995-01-10 00:31:14 +0000177
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100178# Protocol 4
Antoine Pitrou91f43802019-05-26 17:10:09 +0200179
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100180SHORT_BINUNICODE = b'\x8c' # push short string; UTF-8 length < 256 bytes
181BINUNICODE8 = b'\x8d' # push very long string
182BINBYTES8 = b'\x8e' # push very long bytes string
183EMPTY_SET = b'\x8f' # push empty set on the stack
184ADDITEMS = b'\x90' # modify set by adding topmost stack items
185FROZENSET = b'\x91' # build frozenset from topmost stack items
186NEWOBJ_EX = b'\x92' # like NEWOBJ but work with keyword only arguments
187STACK_GLOBAL = b'\x93' # same as GLOBAL but using names on the stacks
188MEMOIZE = b'\x94' # store top of the stack in memo
189FRAME = b'\x95' # indicate the beginning of a new frame
190
Antoine Pitrou91f43802019-05-26 17:10:09 +0200191# Protocol 5
192
193BYTEARRAY8 = b'\x96' # push bytearray
194NEXT_BUFFER = b'\x97' # push next out-of-band buffer
195READONLY_BUFFER = b'\x98' # make top of stack readonly
196
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100197__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$", x)])
198
199
200class _Framer:
201
Serhiy Storchaka1211c9a2018-01-20 16:42:44 +0200202 _FRAME_SIZE_MIN = 4
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100203 _FRAME_SIZE_TARGET = 64 * 1024
204
205 def __init__(self, file_write):
206 self.file_write = file_write
207 self.current_frame = None
208
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100209 def start_framing(self):
210 self.current_frame = io.BytesIO()
211
212 def end_framing(self):
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800213 if self.current_frame and self.current_frame.tell() > 0:
214 self.commit_frame(force=True)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100215 self.current_frame = None
216
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800217 def commit_frame(self, force=False):
218 if self.current_frame:
219 f = self.current_frame
220 if f.tell() >= self._FRAME_SIZE_TARGET or force:
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +0100221 data = f.getbuffer()
222 write = self.file_write
Serhiy Storchaka1211c9a2018-01-20 16:42:44 +0200223 if len(data) >= self._FRAME_SIZE_MIN:
224 # Issue a single call to the write method of the underlying
225 # file object for the frame opcode with the size of the
226 # frame. The concatenation is expected to be less expensive
227 # than issuing an additional call to write.
228 write(FRAME + pack("<Q", len(data)))
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +0100229
230 # Issue a separate call to write to append the frame
231 # contents without concatenation to the above to avoid a
232 # memory copy.
233 write(data)
234
235 # Start the new frame with a new io.BytesIO instance so that
236 # the file object can have delayed access to the previous frame
237 # contents via an unreleased memoryview of the previous
238 # io.BytesIO instance.
239 self.current_frame = io.BytesIO()
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800240
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100241 def write(self, data):
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800242 if self.current_frame:
243 return self.current_frame.write(data)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100244 else:
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800245 return self.file_write(data)
246
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +0100247 def write_large_bytes(self, header, payload):
248 write = self.file_write
249 if self.current_frame:
250 # Terminate the current frame and flush it to the file.
251 self.commit_frame(force=True)
252
253 # Perform direct write of the header and payload of the large binary
254 # object. Be careful not to concatenate the header and the payload
255 # prior to calling 'write' as we do not want to allocate a large
256 # temporary bytes object.
257 # We intentionally do not insert a protocol 4 frame opcode to make
258 # it possible to optimize file.read calls in the loader.
259 write(header)
260 write(payload)
261
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100262
263class _Unframer:
264
265 def __init__(self, file_read, file_readline, file_tell=None):
266 self.file_read = file_read
267 self.file_readline = file_readline
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100268 self.current_frame = None
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100269
Antoine Pitrou91f43802019-05-26 17:10:09 +0200270 def readinto(self, buf):
271 if self.current_frame:
272 n = self.current_frame.readinto(buf)
273 if n == 0 and len(buf) != 0:
274 self.current_frame = None
275 n = len(buf)
276 buf[:] = self.file_read(n)
277 return n
278 if n < len(buf):
279 raise UnpicklingError(
280 "pickle exhausted before end of frame")
281 return n
282 else:
283 n = len(buf)
284 buf[:] = self.file_read(n)
285 return n
286
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100287 def read(self, n):
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800288 if self.current_frame:
289 data = self.current_frame.read(n)
290 if not data and n != 0:
291 self.current_frame = None
292 return self.file_read(n)
293 if len(data) < n:
294 raise UnpicklingError(
295 "pickle exhausted before end of frame")
296 return data
297 else:
298 return self.file_read(n)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100299
300 def readline(self):
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800301 if self.current_frame:
302 data = self.current_frame.readline()
303 if not data:
304 self.current_frame = None
305 return self.file_readline()
Serhiy Storchaka21d75332015-01-26 10:37:01 +0200306 if data[-1] != b'\n'[0]:
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800307 raise UnpicklingError(
308 "pickle exhausted before end of frame")
309 return data
310 else:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100311 return self.file_readline()
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100312
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800313 def load_frame(self, frame_size):
314 if self.current_frame and self.current_frame.read() != b'':
315 raise UnpicklingError(
316 "beginning of a new frame before end of current frame")
317 self.current_frame = io.BytesIO(self.file_read(frame_size))
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100318
319
320# Tools used for pickling.
321
Serhiy Storchaka58e41342015-03-31 14:07:24 +0300322def _getattribute(obj, name):
323 for subpath in name.split('.'):
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100324 if subpath == '<locals>':
325 raise AttributeError("Can't get local attribute {!r} on {!r}"
326 .format(name, obj))
327 try:
Serhiy Storchaka58e41342015-03-31 14:07:24 +0300328 parent = obj
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100329 obj = getattr(obj, subpath)
330 except AttributeError:
331 raise AttributeError("Can't get attribute {!r} on {!r}"
Serhiy Storchaka5affd232017-04-05 09:37:24 +0300332 .format(name, obj)) from None
Serhiy Storchaka58e41342015-03-31 14:07:24 +0300333 return obj, parent
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100334
Serhiy Storchaka58e41342015-03-31 14:07:24 +0300335def whichmodule(obj, name):
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100336 """Find the module an object belong to."""
337 module_name = getattr(obj, '__module__', None)
338 if module_name is not None:
339 return module_name
Antoine Pitroue1618492014-10-04 22:15:27 +0200340 # Protect the iteration by using a list copy of sys.modules against dynamic
341 # modules that trigger imports of other modules upon calls to getattr.
Raymond Hettinger75bedbe2020-04-21 16:20:52 -0700342 for module_name, module in sys.modules.copy().items():
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100343 if module_name == '__main__' or module is None:
344 continue
345 try:
Serhiy Storchaka58e41342015-03-31 14:07:24 +0300346 if _getattribute(module, name)[0] is obj:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100347 return module_name
348 except AttributeError:
349 pass
350 return '__main__'
351
352def encode_long(x):
353 r"""Encode a long to a two's complement little-endian binary string.
354 Note that 0 is a special case, returning an empty string, to save a
355 byte in the LONG1 pickling context.
356
357 >>> encode_long(0)
358 b''
359 >>> encode_long(255)
360 b'\xff\x00'
361 >>> encode_long(32767)
362 b'\xff\x7f'
363 >>> encode_long(-256)
364 b'\x00\xff'
365 >>> encode_long(-32768)
366 b'\x00\x80'
367 >>> encode_long(-128)
368 b'\x80'
369 >>> encode_long(127)
370 b'\x7f'
371 >>>
372 """
373 if x == 0:
374 return b''
375 nbytes = (x.bit_length() >> 3) + 1
376 result = x.to_bytes(nbytes, byteorder='little', signed=True)
377 if x < 0 and nbytes > 1:
378 if result[-1] == 0xff and (result[-2] & 0x80) != 0:
379 result = result[:-1]
380 return result
381
382def decode_long(data):
383 r"""Decode a long from a two's complement little-endian binary string.
384
385 >>> decode_long(b'')
386 0
387 >>> decode_long(b"\xff\x00")
388 255
389 >>> decode_long(b"\xff\x7f")
390 32767
391 >>> decode_long(b"\x00\xff")
392 -256
393 >>> decode_long(b"\x00\x80")
394 -32768
395 >>> decode_long(b"\x80")
396 -128
397 >>> decode_long(b"\x7f")
398 127
399 """
400 return int.from_bytes(data, byteorder='little', signed=True)
401
Skip Montanaro23bafc62001-02-18 03:10:09 +0000402
Guido van Rossum1be31752003-01-28 15:19:53 +0000403# Pickling machinery
404
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000405class _Pickler:
Guido van Rossuma48061a1995-01-10 00:31:14 +0000406
Antoine Pitrou91f43802019-05-26 17:10:09 +0200407 def __init__(self, file, protocol=None, *, fix_imports=True,
408 buffer_callback=None):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000409 """This takes a binary file for writing a pickle data stream.
410
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -0800411 The optional *protocol* argument tells the pickler to use the
Mark Dickinsone9652e82020-01-24 10:03:22 +0000412 given protocol; supported protocols are 0, 1, 2, 3, 4 and 5.
413 The default protocol is 4. It was introduced in Python 3.4, and
414 is incompatible with previous versions.
Guido van Rossumf29d3d62003-01-27 22:47:53 +0000415
Guido van Rossum7eff63a2003-01-31 19:42:31 +0000416 Specifying a negative protocol version selects the highest
Tim Peters5bd2a792003-02-01 16:45:06 +0000417 protocol version supported. The higher the protocol used, the
418 more recent the version of Python needed to read the pickle
419 produced.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000420
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -0800421 The *file* argument must have a write() method that accepts a
422 single bytes argument. It can thus be a file object opened for
Martin Panter7462b6492015-11-02 03:37:02 +0000423 binary writing, an io.BytesIO instance, or any other custom
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -0800424 object that meets this interface.
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000425
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -0800426 If *fix_imports* is True and *protocol* is less than 3, pickle
427 will try to map the new Python 3 names to the old module names
428 used in Python 2, so that the pickle data stream is readable
429 with Python 2.
Antoine Pitrou91f43802019-05-26 17:10:09 +0200430
431 If *buffer_callback* is None (the default), buffer views are
432 serialized into *file* as part of the pickle stream.
433
434 If *buffer_callback* is not None, then it can be called any number
435 of times with a buffer view. If the callback returns a false value
436 (such as None), the given buffer is out-of-band; otherwise the
437 buffer is serialized in-band, i.e. inside the pickle stream.
438
439 It is an error if *buffer_callback* is not None and *protocol*
440 is None or smaller than 5.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000441 """
Guido van Rossumcf117b02003-02-09 17:19:41 +0000442 if protocol is None:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000443 protocol = DEFAULT_PROTOCOL
Guido van Rossumcf117b02003-02-09 17:19:41 +0000444 if protocol < 0:
Tim Peters8587b3c2003-02-13 15:44:41 +0000445 protocol = HIGHEST_PROTOCOL
446 elif not 0 <= protocol <= HIGHEST_PROTOCOL:
447 raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
Antoine Pitrou91f43802019-05-26 17:10:09 +0200448 if buffer_callback is not None and protocol < 5:
449 raise ValueError("buffer_callback needs protocol >= 5")
450 self._buffer_callback = buffer_callback
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000451 try:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100452 self._file_write = file.write
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000453 except AttributeError:
454 raise TypeError("file must have a 'write' attribute")
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800455 self.framer = _Framer(self._file_write)
456 self.write = self.framer.write
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +0100457 self._write_large_bytes = self.framer.write_large_bytes
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000458 self.memo = {}
Guido van Rossumcf117b02003-02-09 17:19:41 +0000459 self.proto = int(protocol)
460 self.bin = protocol >= 1
Guido van Rossum5d9113d2003-01-29 17:58:45 +0000461 self.fast = 0
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000462 self.fix_imports = fix_imports and protocol < 3
Guido van Rossuma48061a1995-01-10 00:31:14 +0000463
Fred Drake7f781c92002-05-01 20:33:53 +0000464 def clear_memo(self):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000465 """Clears the pickler's "memo".
466
467 The memo is the data structure that remembers which objects the
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -0800468 pickler has already seen, so that shared or recursive objects
469 are pickled by reference and not by value. This method is
470 useful when re-using picklers.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000471 """
Fred Drake7f781c92002-05-01 20:33:53 +0000472 self.memo.clear()
473
Guido van Rossum3a41c612003-01-28 15:10:22 +0000474 def dump(self, obj):
Tim Peters5bd2a792003-02-01 16:45:06 +0000475 """Write a pickled representation of obj to the open file."""
Alexandre Vassalotti3cfcab92008-12-27 09:30:39 +0000476 # Check whether Pickler was initialized correctly. This is
477 # only needed to mimic the behavior of _pickle.Pickler.dump().
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100478 if not hasattr(self, "_file_write"):
Alexandre Vassalotti3cfcab92008-12-27 09:30:39 +0000479 raise PicklingError("Pickler.__init__() was not called by "
480 "%s.__init__()" % (self.__class__.__name__,))
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000481 if self.proto >= 2:
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800482 self.write(PROTO + pack("<B", self.proto))
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100483 if self.proto >= 4:
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800484 self.framer.start_framing()
Guido van Rossum3a41c612003-01-28 15:10:22 +0000485 self.save(obj)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000486 self.write(STOP)
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800487 self.framer.end_framing()
Guido van Rossuma48061a1995-01-10 00:31:14 +0000488
Jeremy Hylton3422c992003-01-24 19:29:52 +0000489 def memoize(self, obj):
490 """Store an object in the memo."""
491
Tim Peterse46b73f2003-01-27 21:22:10 +0000492 # The Pickler memo is a dictionary mapping object ids to 2-tuples
493 # that contain the Unpickler memo key and the object being memoized.
494 # The memo key is written to the pickle and will become
Jeremy Hylton3422c992003-01-24 19:29:52 +0000495 # the key in the Unpickler's memo. The object is stored in the
Tim Peterse46b73f2003-01-27 21:22:10 +0000496 # Pickler memo so that transient objects are kept alive during
497 # pickling.
Jeremy Hylton3422c992003-01-24 19:29:52 +0000498
Tim Peterse46b73f2003-01-27 21:22:10 +0000499 # The use of the Unpickler memo length as the memo key is just a
500 # convention. The only requirement is that the memo values be unique.
501 # But there appears no advantage to any other scheme, and this
Tim Peterscbd0a322003-01-28 00:24:43 +0000502 # scheme allows the Unpickler memo to be implemented as a plain (but
Tim Peterse46b73f2003-01-27 21:22:10 +0000503 # growable) array, indexed by memo key.
Guido van Rossum5d9113d2003-01-29 17:58:45 +0000504 if self.fast:
505 return
Guido van Rossum9b40e802003-01-30 06:37:41 +0000506 assert id(obj) not in self.memo
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100507 idx = len(self.memo)
508 self.write(self.put(idx))
509 self.memo[id(obj)] = idx, obj
Jeremy Hylton3422c992003-01-24 19:29:52 +0000510
Tim Petersbb38e302003-01-27 21:25:41 +0000511 # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100512 def put(self, idx):
513 if self.proto >= 4:
514 return MEMOIZE
515 elif self.bin:
516 if idx < 256:
517 return BINPUT + pack("<B", idx)
Guido van Rossum5c938d02003-01-28 03:03:08 +0000518 else:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100519 return LONG_BINPUT + pack("<I", idx)
520 else:
521 return PUT + repr(idx).encode("ascii") + b'\n'
Guido van Rossuma48061a1995-01-10 00:31:14 +0000522
Tim Petersbb38e302003-01-27 21:25:41 +0000523 # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300524 def get(self, i):
Tim Petersc32d8242001-04-10 02:48:53 +0000525 if self.bin:
Tim Petersc32d8242001-04-10 02:48:53 +0000526 if i < 256:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300527 return BINGET + pack("<B", i)
Guido van Rossum5c938d02003-01-28 03:03:08 +0000528 else:
Antoine Pitroubf6ecf92012-11-24 20:40:21 +0100529 return LONG_BINGET + pack("<I", i)
Guido van Rossuma48061a1995-01-10 00:31:14 +0000530
Guido van Rossum39478e82007-08-27 17:23:59 +0000531 return GET + repr(i).encode("ascii") + b'\n'
Tim Peters2344fae2001-01-15 00:50:52 +0000532
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000533 def save(self, obj, save_persistent_id=True):
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800534 self.framer.commit_frame()
535
Guido van Rossumbc64e222003-01-28 16:34:19 +0000536 # Check for persistent id (defined by a subclass)
Guido van Rossum3a41c612003-01-28 15:10:22 +0000537 pid = self.persistent_id(obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000538 if pid is not None and save_persistent_id:
Jeremy Hylton5e0f4e72002-11-13 22:01:27 +0000539 self.save_pers(pid)
540 return
Guido van Rossuma48061a1995-01-10 00:31:14 +0000541
Guido van Rossumbc64e222003-01-28 16:34:19 +0000542 # Check the memo
543 x = self.memo.get(id(obj))
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300544 if x is not None:
Guido van Rossumbc64e222003-01-28 16:34:19 +0000545 self.write(self.get(x[0]))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000546 return
547
Pierre Glaser289f1f82019-05-08 23:08:25 +0200548 rv = NotImplemented
549 reduce = getattr(self, "reducer_override", None)
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300550 if reduce is not None:
Guido van Rossumc53f0092003-02-18 22:05:12 +0000551 rv = reduce(obj)
Pierre Glaser289f1f82019-05-08 23:08:25 +0200552
553 if rv is NotImplemented:
554 # Check the type dispatch table
555 t = type(obj)
556 f = self.dispatch.get(t)
557 if f is not None:
558 f(self, obj) # Call unbound method with explicit self
Antoine Pitrouffd41d92011-10-04 09:23:04 +0200559 return
560
Pierre Glaser289f1f82019-05-08 23:08:25 +0200561 # Check private dispatch table if any, or else
562 # copyreg.dispatch_table
563 reduce = getattr(self, 'dispatch_table', dispatch_table).get(t)
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300564 if reduce is not None:
Pierre Glaser289f1f82019-05-08 23:08:25 +0200565 rv = reduce(obj)
Guido van Rossumc53f0092003-02-18 22:05:12 +0000566 else:
Pierre Glaser289f1f82019-05-08 23:08:25 +0200567 # Check for a class with a custom metaclass; treat as regular
568 # class
569 if issubclass(t, type):
570 self.save_global(obj)
571 return
572
573 # Check for a __reduce_ex__ method, fall back to __reduce__
574 reduce = getattr(obj, "__reduce_ex__", None)
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300575 if reduce is not None:
Pierre Glaser289f1f82019-05-08 23:08:25 +0200576 rv = reduce(self.proto)
Guido van Rossumc53f0092003-02-18 22:05:12 +0000577 else:
Pierre Glaser289f1f82019-05-08 23:08:25 +0200578 reduce = getattr(obj, "__reduce__", None)
579 if reduce is not None:
580 rv = reduce()
581 else:
582 raise PicklingError("Can't pickle %r object: %r" %
583 (t.__name__, obj))
Tim Petersb32a8312003-01-28 00:48:09 +0000584
Guido van Rossumbc64e222003-01-28 16:34:19 +0000585 # Check for string returned by reduce(), meaning "save as global"
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000586 if isinstance(rv, str):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000587 self.save_global(obj, rv)
Tim Petersb32a8312003-01-28 00:48:09 +0000588 return
589
Guido van Rossumbc64e222003-01-28 16:34:19 +0000590 # Assert that reduce() returned a tuple
Guido van Rossum13257902007-06-07 23:15:56 +0000591 if not isinstance(rv, tuple):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000592 raise PicklingError("%s must return string or tuple" % reduce)
Tim Petersb32a8312003-01-28 00:48:09 +0000593
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000594 # Assert that it returned an appropriately sized tuple
Guido van Rossumbc64e222003-01-28 16:34:19 +0000595 l = len(rv)
Pierre Glaser65d98d02019-05-08 21:40:25 +0200596 if not (2 <= l <= 6):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000597 raise PicklingError("Tuple returned by %s must have "
Pierre Glaser65d98d02019-05-08 21:40:25 +0200598 "two to six elements" % reduce)
Tim Petersb32a8312003-01-28 00:48:09 +0000599
Guido van Rossumbc64e222003-01-28 16:34:19 +0000600 # Save the reduce() output and finally memoize the object
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000601 self.save_reduce(obj=obj, *rv)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000602
Guido van Rossum3a41c612003-01-28 15:10:22 +0000603 def persistent_id(self, obj):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000604 # This exists so a subclass can override it
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000605 return None
606
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000607 def save_pers(self, pid):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000608 # Save a persistent id reference
Tim Petersbd1cdb92003-01-28 01:03:10 +0000609 if self.bin:
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000610 self.save(pid, save_persistent_id=False)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000611 self.write(BINPERSID)
Tim Petersbd1cdb92003-01-28 01:03:10 +0000612 else:
Serhiy Storchakadec25af2016-07-17 11:24:17 +0300613 try:
614 self.write(PERSID + str(pid).encode("ascii") + b'\n')
615 except UnicodeEncodeError:
616 raise PicklingError(
617 "persistent IDs in protocol 0 must be ASCII strings")
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000618
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100619 def save_reduce(self, func, args, state=None, listitems=None,
Pierre Glaser65d98d02019-05-08 21:40:25 +0200620 dictitems=None, state_setter=None, obj=None):
Jeremy Hyltone3a565e2003-06-29 16:59:59 +0000621 # This API is called by some subclasses
Guido van Rossumbc64e222003-01-28 16:34:19 +0000622
Guido van Rossum13257902007-06-07 23:15:56 +0000623 if not isinstance(args, tuple):
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100624 raise PicklingError("args from save_reduce() must be a tuple")
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200625 if not callable(func):
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100626 raise PicklingError("func from save_reduce() must be callable")
Guido van Rossumbc64e222003-01-28 16:34:19 +0000627
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000628 save = self.save
Guido van Rossumbc64e222003-01-28 16:34:19 +0000629 write = self.write
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000630
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100631 func_name = getattr(func, "__name__", "")
Serhiy Storchaka0d554d72015-10-10 22:42:18 +0300632 if self.proto >= 2 and func_name == "__newobj_ex__":
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100633 cls, args, kwargs = args
634 if not hasattr(cls, "__new__"):
635 raise PicklingError("args[0] from {} args has no __new__"
636 .format(func_name))
637 if obj is not None and cls is not obj.__class__:
638 raise PicklingError("args[0] from {} args has the wrong class"
639 .format(func_name))
Serhiy Storchaka0d554d72015-10-10 22:42:18 +0300640 if self.proto >= 4:
641 save(cls)
642 save(args)
643 save(kwargs)
644 write(NEWOBJ_EX)
645 else:
646 func = partial(cls.__new__, cls, *args, **kwargs)
647 save(func)
648 save(())
649 write(REDUCE)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100650 elif self.proto >= 2 and func_name == "__newobj__":
651 # A __reduce__ implementation can direct protocol 2 or newer to
Guido van Rossumd053b4b2003-01-31 16:51:45 +0000652 # use the more efficient NEWOBJ opcode, while still
653 # allowing protocol 0 and 1 to work normally. For this to
654 # work, the function returned by __reduce__ should be
655 # called __newobj__, and its first argument should be a
Florent Xiclunaaa6c1d22011-12-12 18:54:29 +0100656 # class. The implementation for __newobj__
Guido van Rossumd053b4b2003-01-31 16:51:45 +0000657 # should be as follows, although pickle has no way to
658 # verify this:
659 #
660 # def __newobj__(cls, *args):
661 # return cls.__new__(cls, *args)
662 #
663 # Protocols 0 and 1 will pickle a reference to __newobj__,
664 # while protocol 2 (and above) will pickle a reference to
665 # cls, the remaining args tuple, and the NEWOBJ code,
666 # which calls cls.__new__(cls, *args) at unpickling time
667 # (see load_newobj below). If __reduce__ returns a
668 # three-tuple, the state from the third tuple item will be
669 # pickled regardless of the protocol, calling __setstate__
670 # at unpickling time (see load_build below).
671 #
672 # Note that no standard __newobj__ implementation exists;
673 # you have to provide your own. This is to enforce
674 # compatibility with Python 2.2 (pickles written using
675 # protocol 0 or 1 in Python 2.3 should be unpicklable by
676 # Python 2.2).
677 cls = args[0]
678 if not hasattr(cls, "__new__"):
679 raise PicklingError(
680 "args[0] from __newobj__ args has no __new__")
Guido van Rossumf7f45172003-01-31 17:17:49 +0000681 if obj is not None and cls is not obj.__class__:
682 raise PicklingError(
683 "args[0] from __newobj__ args has the wrong class")
Guido van Rossumd053b4b2003-01-31 16:51:45 +0000684 args = args[1:]
685 save(cls)
686 save(args)
687 write(NEWOBJ)
688 else:
689 save(func)
690 save(args)
691 write(REDUCE)
Tim Peters2344fae2001-01-15 00:50:52 +0000692
Guido van Rossumf7f45172003-01-31 17:17:49 +0000693 if obj is not None:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100694 # If the object is already in the memo, this means it is
695 # recursive. In this case, throw away everything we put on the
696 # stack, and fetch the object back from the memo.
697 if id(obj) in self.memo:
698 write(POP + self.get(self.memo[id(obj)][0]))
699 else:
700 self.memoize(obj)
Guido van Rossumf7f45172003-01-31 17:17:49 +0000701
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000702 # More new special cases (that work with older protocols as
703 # well): when __reduce__ returns a tuple with 4 or 5 items,
704 # the 4th and 5th item should be iterators that provide list
705 # items and dict items (as (key, value) tuples), or None.
706
707 if listitems is not None:
708 self._batch_appends(listitems)
709
710 if dictitems is not None:
711 self._batch_setitems(dictitems)
712
Tim Petersc32d8242001-04-10 02:48:53 +0000713 if state is not None:
Pierre Glaser65d98d02019-05-08 21:40:25 +0200714 if state_setter is None:
715 save(state)
716 write(BUILD)
717 else:
718 # If a state_setter is specified, call it instead of load_build
719 # to update obj's with its previous state.
720 # First, push state_setter and its tuple of expected arguments
721 # (obj, state) onto the stack.
722 save(state_setter)
723 save(obj) # simple BINGET opcode as obj is already memoized.
724 save(state)
725 write(TUPLE2)
726 # Trigger a state_setter(obj, state) function call.
727 write(REDUCE)
728 # The purpose of state_setter is to carry-out an
729 # inplace modification of obj. We do not care about what the
730 # method might return, so its output is eventually removed from
731 # the stack.
732 write(POP)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000733
Guido van Rossumbc64e222003-01-28 16:34:19 +0000734 # Methods below this point are dispatched through the dispatch table
735
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000736 dispatch = {}
737
Guido van Rossum3a41c612003-01-28 15:10:22 +0000738 def save_none(self, obj):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000739 self.write(NONE)
Guido van Rossum13257902007-06-07 23:15:56 +0000740 dispatch[type(None)] = save_none
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000741
Guido van Rossum3a41c612003-01-28 15:10:22 +0000742 def save_bool(self, obj):
Guido van Rossum7d97d312003-01-28 04:25:27 +0000743 if self.proto >= 2:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300744 self.write(NEWTRUE if obj else NEWFALSE)
Guido van Rossum7d97d312003-01-28 04:25:27 +0000745 else:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300746 self.write(TRUE if obj else FALSE)
Guido van Rossum77f6a652002-04-03 22:41:51 +0000747 dispatch[bool] = save_bool
748
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300749 def save_long(self, obj):
Guido van Rossumddefaf32007-01-14 03:31:43 +0000750 if self.bin:
751 # If the int is small enough to fit in a signed 4-byte 2's-comp
752 # format, we can store it more efficiently than the general
753 # case.
754 # First one- and two-byte unsigned ints:
755 if obj >= 0:
756 if obj <= 0xff:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300757 self.write(BININT1 + pack("<B", obj))
Guido van Rossumddefaf32007-01-14 03:31:43 +0000758 return
759 if obj <= 0xffff:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300760 self.write(BININT2 + pack("<H", obj))
Guido van Rossumddefaf32007-01-14 03:31:43 +0000761 return
762 # Next check for 4-byte signed ints:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300763 if -0x80000000 <= obj <= 0x7fffffff:
Guido van Rossumddefaf32007-01-14 03:31:43 +0000764 self.write(BININT + pack("<i", obj))
765 return
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000766 if self.proto >= 2:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000767 encoded = encode_long(obj)
768 n = len(encoded)
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000769 if n < 256:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300770 self.write(LONG1 + pack("<B", n) + encoded)
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000771 else:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000772 self.write(LONG4 + pack("<i", n) + encoded)
Tim Petersee1a53c2003-02-02 02:57:53 +0000773 return
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +0200774 if -0x80000000 <= obj <= 0x7fffffff:
775 self.write(INT + repr(obj).encode("ascii") + b'\n')
776 else:
777 self.write(LONG + repr(obj).encode("ascii") + b'L\n')
Guido van Rossum13257902007-06-07 23:15:56 +0000778 dispatch[int] = save_long
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000779
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300780 def save_float(self, obj):
Guido van Rossumd3703791998-10-22 20:15:36 +0000781 if self.bin:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000782 self.write(BINFLOAT + pack('>d', obj))
Guido van Rossumd3703791998-10-22 20:15:36 +0000783 else:
Guido van Rossum39478e82007-08-27 17:23:59 +0000784 self.write(FLOAT + repr(obj).encode("ascii") + b'\n')
Guido van Rossum13257902007-06-07 23:15:56 +0000785 dispatch[float] = save_float
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000786
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300787 def save_bytes(self, obj):
Guido van Rossumf4169812008-03-17 22:56:06 +0000788 if self.proto < 3:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300789 if not obj: # bytes object is empty
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -0500790 self.save_reduce(bytes, (), obj=obj)
791 else:
792 self.save_reduce(codecs.encode,
793 (str(obj, 'latin1'), 'latin1'), obj=obj)
Guido van Rossumf4169812008-03-17 22:56:06 +0000794 return
795 n = len(obj)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100796 if n <= 0xff:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300797 self.write(SHORT_BINBYTES + pack("<B", n) + obj)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100798 elif n > 0xffffffff and self.proto >= 4:
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +0100799 self._write_large_bytes(BINBYTES8 + pack("<Q", n), obj)
800 elif n >= self.framer._FRAME_SIZE_TARGET:
801 self._write_large_bytes(BINBYTES + pack("<I", n), obj)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000802 else:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300803 self.write(BINBYTES + pack("<I", n) + obj)
Guido van Rossum3a41c612003-01-28 15:10:22 +0000804 self.memoize(obj)
Guido van Rossumf4169812008-03-17 22:56:06 +0000805 dispatch[bytes] = save_bytes
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000806
Antoine Pitrou91f43802019-05-26 17:10:09 +0200807 def save_bytearray(self, obj):
808 if self.proto < 5:
809 if not obj: # bytearray is empty
810 self.save_reduce(bytearray, (), obj=obj)
811 else:
812 self.save_reduce(bytearray, (bytes(obj),), obj=obj)
813 return
814 n = len(obj)
815 if n >= self.framer._FRAME_SIZE_TARGET:
816 self._write_large_bytes(BYTEARRAY8 + pack("<Q", n), obj)
817 else:
818 self.write(BYTEARRAY8 + pack("<Q", n) + obj)
819 dispatch[bytearray] = save_bytearray
820
Victor Stinner63ab4ba2019-06-13 13:58:51 +0200821 if _HAVE_PICKLE_BUFFER:
822 def save_picklebuffer(self, obj):
823 if self.proto < 5:
824 raise PicklingError("PickleBuffer can only pickled with "
825 "protocol >= 5")
826 with obj.raw() as m:
827 if not m.contiguous:
828 raise PicklingError("PickleBuffer can not be pickled when "
829 "pointing to a non-contiguous buffer")
830 in_band = True
831 if self._buffer_callback is not None:
832 in_band = bool(self._buffer_callback(obj))
833 if in_band:
834 # Write data in-band
835 # XXX The C implementation avoids a copy here
836 if m.readonly:
837 self.save_bytes(m.tobytes())
838 else:
839 self.save_bytearray(m.tobytes())
Antoine Pitrou91f43802019-05-26 17:10:09 +0200840 else:
Victor Stinner63ab4ba2019-06-13 13:58:51 +0200841 # Write data out-of-band
842 self.write(NEXT_BUFFER)
843 if m.readonly:
844 self.write(READONLY_BUFFER)
Antoine Pitrou91f43802019-05-26 17:10:09 +0200845
Victor Stinner63ab4ba2019-06-13 13:58:51 +0200846 dispatch[PickleBuffer] = save_picklebuffer
Antoine Pitrou91f43802019-05-26 17:10:09 +0200847
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300848 def save_str(self, obj):
Tim Petersc32d8242001-04-10 02:48:53 +0000849 if self.bin:
Victor Stinner485fb562010-04-13 11:07:24 +0000850 encoded = obj.encode('utf-8', 'surrogatepass')
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000851 n = len(encoded)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100852 if n <= 0xff and self.proto >= 4:
853 self.write(SHORT_BINUNICODE + pack("<B", n) + encoded)
854 elif n > 0xffffffff and self.proto >= 4:
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +0100855 self._write_large_bytes(BINUNICODE8 + pack("<Q", n), encoded)
856 elif n >= self.framer._FRAME_SIZE_TARGET:
857 self._write_large_bytes(BINUNICODE + pack("<I", n), encoded)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100858 else:
859 self.write(BINUNICODE + pack("<I", n) + encoded)
Guido van Rossumb5f2f1b2000-03-10 23:20:09 +0000860 else:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000861 obj = obj.replace("\\", "\\u005c")
Serhiy Storchaka38ab7d42019-05-31 11:29:39 +0300862 obj = obj.replace("\0", "\\u0000")
Guido van Rossum3a41c612003-01-28 15:10:22 +0000863 obj = obj.replace("\n", "\\u000a")
Serhiy Storchaka38ab7d42019-05-31 11:29:39 +0300864 obj = obj.replace("\r", "\\u000d")
865 obj = obj.replace("\x1a", "\\u001a") # EOF on DOS
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100866 self.write(UNICODE + obj.encode('raw-unicode-escape') +
867 b'\n')
Guido van Rossum3a41c612003-01-28 15:10:22 +0000868 self.memoize(obj)
Guido van Rossumf4169812008-03-17 22:56:06 +0000869 dispatch[str] = save_str
Tim Peters658cba62001-02-09 20:06:00 +0000870
Guido van Rossum3a41c612003-01-28 15:10:22 +0000871 def save_tuple(self, obj):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300872 if not obj: # tuple is empty
873 if self.bin:
874 self.write(EMPTY_TUPLE)
Tim Peters1d63c9f2003-02-02 20:29:39 +0000875 else:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300876 self.write(MARK + TUPLE)
Tim Petersd97da802003-01-28 05:48:29 +0000877 return
878
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300879 n = len(obj)
Tim Petersd97da802003-01-28 05:48:29 +0000880 save = self.save
881 memo = self.memo
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300882 if n <= 3 and self.proto >= 2:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000883 for element in obj:
Tim Petersd97da802003-01-28 05:48:29 +0000884 save(element)
885 # Subtle. Same as in the big comment below.
Guido van Rossum3a41c612003-01-28 15:10:22 +0000886 if id(obj) in memo:
887 get = self.get(memo[id(obj)][0])
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300888 self.write(POP * n + get)
Tim Petersd97da802003-01-28 05:48:29 +0000889 else:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300890 self.write(_tuplesize2code[n])
Guido van Rossum3a41c612003-01-28 15:10:22 +0000891 self.memoize(obj)
Tim Petersd97da802003-01-28 05:48:29 +0000892 return
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000893
Tim Peters1d63c9f2003-02-02 20:29:39 +0000894 # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple
Tim Petersff57bff2003-01-28 05:34:53 +0000895 # has more than 3 elements.
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300896 write = self.write
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000897 write(MARK)
Guido van Rossum3a41c612003-01-28 15:10:22 +0000898 for element in obj:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000899 save(element)
900
Tim Peters1d63c9f2003-02-02 20:29:39 +0000901 if id(obj) in memo:
Tim Petersf558da02003-01-28 02:09:55 +0000902 # Subtle. d was not in memo when we entered save_tuple(), so
903 # the process of saving the tuple's elements must have saved
904 # the tuple itself: the tuple is recursive. The proper action
905 # now is to throw away everything we put on the stack, and
906 # simply GET the tuple (it's already constructed). This check
907 # could have been done in the "for element" loop instead, but
908 # recursive tuples are a rare thing.
Guido van Rossum3a41c612003-01-28 15:10:22 +0000909 get = self.get(memo[id(obj)][0])
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300910 if self.bin:
Tim Petersf558da02003-01-28 02:09:55 +0000911 write(POP_MARK + get)
912 else: # proto 0 -- POP_MARK not available
Tim Petersd97da802003-01-28 05:48:29 +0000913 write(POP * (n+1) + get)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000914 return
915
Tim Peters1d63c9f2003-02-02 20:29:39 +0000916 # No recursion.
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300917 write(TUPLE)
Tim Peters1d63c9f2003-02-02 20:29:39 +0000918 self.memoize(obj)
Jeremy Hylton3422c992003-01-24 19:29:52 +0000919
Guido van Rossum13257902007-06-07 23:15:56 +0000920 dispatch[tuple] = save_tuple
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000921
Guido van Rossum3a41c612003-01-28 15:10:22 +0000922 def save_list(self, obj):
Tim Petersc32d8242001-04-10 02:48:53 +0000923 if self.bin:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300924 self.write(EMPTY_LIST)
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000925 else: # proto 0 -- can't use EMPTY_LIST
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300926 self.write(MARK + LIST)
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000927
928 self.memoize(obj)
Alexandre Vassalottic7db1d62008-05-14 21:57:18 +0000929 self._batch_appends(obj)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000930
Guido van Rossum13257902007-06-07 23:15:56 +0000931 dispatch[list] = save_list
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000932
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000933 _BATCHSIZE = 1000
934
935 def _batch_appends(self, items):
936 # Helper to batch up APPENDS sequences
937 save = self.save
938 write = self.write
939
940 if not self.bin:
941 for x in items:
942 save(x)
943 write(APPEND)
944 return
945
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300946 it = iter(items)
947 while True:
948 tmp = list(islice(it, self._BATCHSIZE))
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000949 n = len(tmp)
950 if n > 1:
951 write(MARK)
952 for x in tmp:
953 save(x)
954 write(APPENDS)
955 elif n:
956 save(tmp[0])
957 write(APPEND)
958 # else tmp is empty, and we're done
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300959 if n < self._BATCHSIZE:
960 return
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000961
Guido van Rossum3a41c612003-01-28 15:10:22 +0000962 def save_dict(self, obj):
Tim Petersc32d8242001-04-10 02:48:53 +0000963 if self.bin:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300964 self.write(EMPTY_DICT)
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000965 else: # proto 0 -- can't use EMPTY_DICT
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300966 self.write(MARK + DICT)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000967
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000968 self.memoize(obj)
Alexandre Vassalottic7db1d62008-05-14 21:57:18 +0000969 self._batch_setitems(obj.items())
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000970
Guido van Rossum13257902007-06-07 23:15:56 +0000971 dispatch[dict] = save_dict
972 if PyStringMap is not None:
Jeremy Hylton2b9d0291998-05-27 22:38:22 +0000973 dispatch[PyStringMap] = save_dict
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000974
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000975 def _batch_setitems(self, items):
976 # Helper to batch up SETITEMS sequences; proto >= 1 only
977 save = self.save
978 write = self.write
979
980 if not self.bin:
981 for k, v in items:
982 save(k)
983 save(v)
984 write(SETITEM)
985 return
986
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300987 it = iter(items)
988 while True:
989 tmp = list(islice(it, self._BATCHSIZE))
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000990 n = len(tmp)
991 if n > 1:
992 write(MARK)
993 for k, v in tmp:
994 save(k)
995 save(v)
996 write(SETITEMS)
997 elif n:
998 k, v = tmp[0]
999 save(k)
1000 save(v)
1001 write(SETITEM)
1002 # else tmp is empty, and we're done
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001003 if n < self._BATCHSIZE:
1004 return
Guido van Rossum25cb7df2003-01-31 18:53:21 +00001005
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001006 def save_set(self, obj):
1007 save = self.save
1008 write = self.write
1009
1010 if self.proto < 4:
1011 self.save_reduce(set, (list(obj),), obj=obj)
1012 return
1013
1014 write(EMPTY_SET)
1015 self.memoize(obj)
1016
1017 it = iter(obj)
1018 while True:
1019 batch = list(islice(it, self._BATCHSIZE))
1020 n = len(batch)
1021 if n > 0:
1022 write(MARK)
1023 for item in batch:
1024 save(item)
1025 write(ADDITEMS)
1026 if n < self._BATCHSIZE:
1027 return
1028 dispatch[set] = save_set
1029
1030 def save_frozenset(self, obj):
1031 save = self.save
1032 write = self.write
1033
1034 if self.proto < 4:
1035 self.save_reduce(frozenset, (list(obj),), obj=obj)
1036 return
1037
1038 write(MARK)
1039 for item in obj:
1040 save(item)
1041
1042 if id(obj) in self.memo:
1043 # If the object is already in the memo, this means it is
1044 # recursive. In this case, throw away everything we put on the
1045 # stack, and fetch the object back from the memo.
1046 write(POP_MARK + self.get(self.memo[id(obj)][0]))
1047 return
1048
1049 write(FROZENSET)
1050 self.memoize(obj)
1051 dispatch[frozenset] = save_frozenset
1052
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001053 def save_global(self, obj, name=None):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001054 write = self.write
1055 memo = self.memo
1056
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001057 if name is None:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001058 name = getattr(obj, '__qualname__', None)
Tim Petersc32d8242001-04-10 02:48:53 +00001059 if name is None:
Guido van Rossum3a41c612003-01-28 15:10:22 +00001060 name = obj.__name__
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001061
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001062 module_name = whichmodule(obj, name)
Guido van Rossumb0a98e92001-08-17 18:49:52 +00001063 try:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001064 __import__(module_name, level=0)
1065 module = sys.modules[module_name]
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001066 obj2, parent = _getattribute(module, name)
Guido van Rossumb0a98e92001-08-17 18:49:52 +00001067 except (ImportError, KeyError, AttributeError):
1068 raise PicklingError(
1069 "Can't pickle %r: it's not found as %s.%s" %
Serhiy Storchaka5affd232017-04-05 09:37:24 +03001070 (obj, module_name, name)) from None
Guido van Rossumb0a98e92001-08-17 18:49:52 +00001071 else:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001072 if obj2 is not obj:
Guido van Rossumb0a98e92001-08-17 18:49:52 +00001073 raise PicklingError(
1074 "Can't pickle %r: it's not the same object as %s.%s" %
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001075 (obj, module_name, name))
Guido van Rossumb0a98e92001-08-17 18:49:52 +00001076
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001077 if self.proto >= 2:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001078 code = _extension_registry.get((module_name, name))
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001079 if code:
1080 assert code > 0
1081 if code <= 0xff:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001082 write(EXT1 + pack("<B", code))
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001083 elif code <= 0xffff:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001084 write(EXT2 + pack("<H", code))
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001085 else:
1086 write(EXT4 + pack("<i", code))
1087 return
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001088 lastname = name.rpartition('.')[2]
1089 if parent is module:
1090 name = lastname
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001091 # Non-ASCII identifiers are supported only with protocols >= 3.
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001092 if self.proto >= 4:
1093 self.save(module_name)
1094 self.save(name)
1095 write(STACK_GLOBAL)
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001096 elif parent is not module:
1097 self.save_reduce(getattr, (parent, lastname))
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001098 elif self.proto >= 3:
1099 write(GLOBAL + bytes(module_name, "utf-8") + b'\n' +
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001100 bytes(name, "utf-8") + b'\n')
1101 else:
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00001102 if self.fix_imports:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001103 r_name_mapping = _compat_pickle.REVERSE_NAME_MAPPING
1104 r_import_mapping = _compat_pickle.REVERSE_IMPORT_MAPPING
1105 if (module_name, name) in r_name_mapping:
1106 module_name, name = r_name_mapping[(module_name, name)]
Serhiy Storchakabfe18242015-03-31 13:12:37 +03001107 elif module_name in r_import_mapping:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001108 module_name = r_import_mapping[module_name]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001109 try:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001110 write(GLOBAL + bytes(module_name, "ascii") + b'\n' +
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001111 bytes(name, "ascii") + b'\n')
1112 except UnicodeEncodeError:
1113 raise PicklingError(
1114 "can't pickle global identifier '%s.%s' using "
Serhiy Storchaka5affd232017-04-05 09:37:24 +03001115 "pickle protocol %i" % (module, name, self.proto)) from None
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001116
Guido van Rossum3a41c612003-01-28 15:10:22 +00001117 self.memoize(obj)
Tim Peters3b769832003-01-28 03:51:36 +00001118
Alexandre Vassalotti19b6fa62013-11-30 16:06:39 -08001119 def save_type(self, obj):
1120 if obj is type(None):
1121 return self.save_reduce(type, (None,), obj=obj)
1122 elif obj is type(NotImplemented):
1123 return self.save_reduce(type, (NotImplemented,), obj=obj)
1124 elif obj is type(...):
1125 return self.save_reduce(type, (...,), obj=obj)
1126 return self.save_global(obj)
1127
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001128 dispatch[FunctionType] = save_global
Alexandre Vassalotti19b6fa62013-11-30 16:06:39 -08001129 dispatch[type] = save_type
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001130
Guido van Rossuma48061a1995-01-10 00:31:14 +00001131
Guido van Rossum1be31752003-01-28 15:19:53 +00001132# Unpickling machinery
1133
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001134class _Unpickler:
Guido van Rossuma48061a1995-01-10 00:31:14 +00001135
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00001136 def __init__(self, file, *, fix_imports=True,
Antoine Pitrou91f43802019-05-26 17:10:09 +02001137 encoding="ASCII", errors="strict", buffers=None):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001138 """This takes a binary file for reading a pickle data stream.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +00001139
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08001140 The protocol version of the pickle is detected automatically, so
1141 no proto argument is needed.
1142
1143 The argument *file* must have two methods, a read() method that
1144 takes an integer argument, and a readline() method that requires
1145 no arguments. Both methods should return bytes. Thus *file*
Martin Panter7462b6492015-11-02 03:37:02 +00001146 can be a binary file object opened for reading, an io.BytesIO
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08001147 object, or any other custom object that meets this interface.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +00001148
Guido van Rossumfeea0782007-10-10 18:00:50 +00001149 The file-like object must have two methods, a read() method
1150 that takes an integer argument, and a readline() method that
1151 requires no arguments. Both methods should return bytes.
1152 Thus file-like object can be a binary file object opened for
1153 reading, a BytesIO object, or any other custom object that
1154 meets this interface.
Guido van Rossumf4169812008-03-17 22:56:06 +00001155
Antoine Pitrou91f43802019-05-26 17:10:09 +02001156 If *buffers* is not None, it should be an iterable of buffer-enabled
1157 objects that is consumed each time the pickle stream references
1158 an out-of-band buffer view. Such buffers have been given in order
1159 to the *buffer_callback* of a Pickler object.
1160
1161 If *buffers* is None (the default), then the buffers are taken
1162 from the pickle stream, assuming they are serialized there.
1163 It is an error for *buffers* to be None if the pickle stream
1164 was produced with a non-None *buffer_callback*.
1165
1166 Other optional arguments are *fix_imports*, *encoding* and
Martin Panter46f50722016-05-26 05:35:26 +00001167 *errors*, which are used to control compatibility support for
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08001168 pickle stream generated by Python 2. If *fix_imports* is True,
1169 pickle will try to map the old Python 2 names to the new names
1170 used in Python 3. The *encoding* and *errors* tell pickle how
1171 to decode 8-bit string instances pickled by Python 2; these
1172 default to 'ASCII' and 'strict', respectively. *encoding* can be
1173 'bytes' to read theses 8-bit string instances as bytes objects.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +00001174 """
Antoine Pitrou91f43802019-05-26 17:10:09 +02001175 self._buffers = iter(buffers) if buffers is not None else None
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001176 self._file_readline = file.readline
1177 self._file_read = file.read
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001178 self.memo = {}
Guido van Rossumf4169812008-03-17 22:56:06 +00001179 self.encoding = encoding
1180 self.errors = errors
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00001181 self.proto = 0
1182 self.fix_imports = fix_imports
Guido van Rossuma48061a1995-01-10 00:31:14 +00001183
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001184 def load(self):
Guido van Rossum3a41c612003-01-28 15:10:22 +00001185 """Read a pickled object representation from the open file.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +00001186
Guido van Rossum3a41c612003-01-28 15:10:22 +00001187 Return the reconstituted object hierarchy specified in the file.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +00001188 """
Alexandre Vassalotti3cfcab92008-12-27 09:30:39 +00001189 # Check whether Unpickler was initialized correctly. This is
1190 # only needed to mimic the behavior of _pickle.Unpickler.dump().
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001191 if not hasattr(self, "_file_read"):
Alexandre Vassalotti3cfcab92008-12-27 09:30:39 +00001192 raise UnpicklingError("Unpickler.__init__() was not called by "
1193 "%s.__init__()" % (self.__class__.__name__,))
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001194 self._unframer = _Unframer(self._file_read, self._file_readline)
1195 self.read = self._unframer.read
Antoine Pitrou91f43802019-05-26 17:10:09 +02001196 self.readinto = self._unframer.readinto
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001197 self.readline = self._unframer.readline
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001198 self.metastack = []
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001199 self.stack = []
1200 self.append = self.stack.append
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001201 self.proto = 0
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001202 read = self.read
1203 dispatch = self.dispatch
1204 try:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001205 while True:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001206 key = read(1)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001207 if not key:
1208 raise EOFError
Guido van Rossum98297ee2007-11-06 21:34:58 +00001209 assert isinstance(key, bytes_types)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001210 dispatch[key[0]](self)
Guido van Rossumb940e112007-01-10 16:19:56 +00001211 except _Stop as stopinst:
Guido van Rossumff871742000-12-13 18:11:56 +00001212 return stopinst.value
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001213
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001214 # Return a list of items pushed in the stack after last MARK instruction.
1215 def pop_mark(self):
1216 items = self.stack
1217 self.stack = self.metastack.pop()
1218 self.append = self.stack.append
1219 return items
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001220
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001221 def persistent_load(self, pid):
Benjamin Peterson49956b22009-01-10 17:05:44 +00001222 raise UnpicklingError("unsupported persistent id encountered")
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001223
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001224 dispatch = {}
1225
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001226 def load_proto(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001227 proto = self.read(1)[0]
Guido van Rossumf4169812008-03-17 22:56:06 +00001228 if not 0 <= proto <= HIGHEST_PROTOCOL:
Guido van Rossum26d95c32007-08-27 23:18:54 +00001229 raise ValueError("unsupported pickle protocol: %d" % proto)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00001230 self.proto = proto
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001231 dispatch[PROTO[0]] = load_proto
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001232
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08001233 def load_frame(self):
1234 frame_size, = unpack('<Q', self.read(8))
1235 if frame_size > sys.maxsize:
1236 raise ValueError("frame size > sys.maxsize: %d" % frame_size)
1237 self._unframer.load_frame(frame_size)
1238 dispatch[FRAME[0]] = load_frame
1239
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001240 def load_persid(self):
Serhiy Storchakadec25af2016-07-17 11:24:17 +03001241 try:
1242 pid = self.readline()[:-1].decode("ascii")
1243 except UnicodeDecodeError:
1244 raise UnpicklingError(
1245 "persistent IDs in protocol 0 must be ASCII strings")
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001246 self.append(self.persistent_load(pid))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001247 dispatch[PERSID[0]] = load_persid
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001248
1249 def load_binpersid(self):
Raymond Hettinger46ac8eb2002-06-30 03:39:14 +00001250 pid = self.stack.pop()
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001251 self.append(self.persistent_load(pid))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001252 dispatch[BINPERSID[0]] = load_binpersid
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001253
1254 def load_none(self):
1255 self.append(None)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001256 dispatch[NONE[0]] = load_none
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001257
Guido van Rossum7d97d312003-01-28 04:25:27 +00001258 def load_false(self):
1259 self.append(False)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001260 dispatch[NEWFALSE[0]] = load_false
Guido van Rossum7d97d312003-01-28 04:25:27 +00001261
1262 def load_true(self):
1263 self.append(True)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001264 dispatch[NEWTRUE[0]] = load_true
Guido van Rossum7d97d312003-01-28 04:25:27 +00001265
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001266 def load_int(self):
Tim Peters19ef62d2001-08-28 22:21:18 +00001267 data = self.readline()
Guido van Rossume2763392002-04-05 19:30:08 +00001268 if data == FALSE[1:]:
1269 val = False
1270 elif data == TRUE[1:]:
1271 val = True
1272 else:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001273 val = int(data, 0)
Guido van Rossume2763392002-04-05 19:30:08 +00001274 self.append(val)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001275 dispatch[INT[0]] = load_int
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001276
1277 def load_binint(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001278 self.append(unpack('<i', self.read(4))[0])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001279 dispatch[BININT[0]] = load_binint
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001280
1281 def load_binint1(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001282 self.append(self.read(1)[0])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001283 dispatch[BININT1[0]] = load_binint1
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001284
1285 def load_binint2(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001286 self.append(unpack('<H', self.read(2))[0])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001287 dispatch[BININT2[0]] = load_binint2
Tim Peters2344fae2001-01-15 00:50:52 +00001288
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001289 def load_long(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001290 val = self.readline()[:-1]
1291 if val and val[-1] == b'L'[0]:
Mark Dickinson8dd05142009-01-20 20:43:58 +00001292 val = val[:-1]
Guido van Rossumfeea0782007-10-10 18:00:50 +00001293 self.append(int(val, 0))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001294 dispatch[LONG[0]] = load_long
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001295
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001296 def load_long1(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001297 n = self.read(1)[0]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001298 data = self.read(n)
1299 self.append(decode_long(data))
1300 dispatch[LONG1[0]] = load_long1
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001301
1302 def load_long4(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001303 n, = unpack('<i', self.read(4))
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01001304 if n < 0:
1305 # Corrupt or hostile pickle -- we never write one like this
Alexandre Vassalotticc757172013-04-14 02:25:10 -07001306 raise UnpicklingError("LONG pickle has negative byte count")
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001307 data = self.read(n)
1308 self.append(decode_long(data))
1309 dispatch[LONG4[0]] = load_long4
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001310
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001311 def load_float(self):
Guido van Rossumff871742000-12-13 18:11:56 +00001312 self.append(float(self.readline()[:-1]))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001313 dispatch[FLOAT[0]] = load_float
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001314
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001315 def load_binfloat(self):
Guido van Rossumd3703791998-10-22 20:15:36 +00001316 self.append(unpack('>d', self.read(8))[0])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001317 dispatch[BINFLOAT[0]] = load_binfloat
Guido van Rossumd3703791998-10-22 20:15:36 +00001318
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08001319 def _decode_string(self, value):
1320 # Used to allow strings from Python 2 to be decoded either as
1321 # bytes or Unicode strings. This should be used only with the
1322 # STRING, BINSTRING and SHORT_BINSTRING opcodes.
1323 if self.encoding == "bytes":
1324 return value
1325 else:
1326 return value.decode(self.encoding, self.errors)
1327
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001328 def load_string(self):
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07001329 data = self.readline()[:-1]
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001330 # Strip outermost quotes
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07001331 if len(data) >= 2 and data[0] == data[-1] and data[0] in b'"\'':
1332 data = data[1:-1]
Martin v. Löwis8a8da792002-08-14 07:46:28 +00001333 else:
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07001334 raise UnpicklingError("the STRING opcode argument must be quoted")
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08001335 self.append(self._decode_string(codecs.escape_decode(data)[0]))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001336 dispatch[STRING[0]] = load_string
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001337
1338 def load_binstring(self):
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01001339 # Deprecated BINSTRING uses signed 32-bit length
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001340 len, = unpack('<i', self.read(4))
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01001341 if len < 0:
Alexandre Vassalotticc757172013-04-14 02:25:10 -07001342 raise UnpicklingError("BINSTRING pickle has negative byte count")
Guido van Rossumf4169812008-03-17 22:56:06 +00001343 data = self.read(len)
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08001344 self.append(self._decode_string(data))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001345 dispatch[BINSTRING[0]] = load_binstring
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001346
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001347 def load_binbytes(self):
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01001348 len, = unpack('<I', self.read(4))
1349 if len > maxsize:
Alexandre Vassalotticc757172013-04-14 02:25:10 -07001350 raise UnpicklingError("BINBYTES exceeds system's maximum size "
1351 "of %d bytes" % maxsize)
Guido van Rossumf4169812008-03-17 22:56:06 +00001352 self.append(self.read(len))
1353 dispatch[BINBYTES[0]] = load_binbytes
1354
Guido van Rossumb5f2f1b2000-03-10 23:20:09 +00001355 def load_unicode(self):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001356 self.append(str(self.readline()[:-1], 'raw-unicode-escape'))
1357 dispatch[UNICODE[0]] = load_unicode
Guido van Rossumb5f2f1b2000-03-10 23:20:09 +00001358
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001359 def load_binunicode(self):
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01001360 len, = unpack('<I', self.read(4))
1361 if len > maxsize:
Alexandre Vassalotticc757172013-04-14 02:25:10 -07001362 raise UnpicklingError("BINUNICODE exceeds system's maximum size "
1363 "of %d bytes" % maxsize)
Victor Stinner485fb562010-04-13 11:07:24 +00001364 self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001365 dispatch[BINUNICODE[0]] = load_binunicode
Guido van Rossumb5f2f1b2000-03-10 23:20:09 +00001366
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001367 def load_binunicode8(self):
1368 len, = unpack('<Q', self.read(8))
1369 if len > maxsize:
1370 raise UnpicklingError("BINUNICODE8 exceeds system's maximum size "
1371 "of %d bytes" % maxsize)
1372 self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
1373 dispatch[BINUNICODE8[0]] = load_binunicode8
1374
Serhiy Storchakae0606192015-09-29 22:10:07 +03001375 def load_binbytes8(self):
1376 len, = unpack('<Q', self.read(8))
1377 if len > maxsize:
1378 raise UnpicklingError("BINBYTES8 exceeds system's maximum size "
1379 "of %d bytes" % maxsize)
1380 self.append(self.read(len))
1381 dispatch[BINBYTES8[0]] = load_binbytes8
1382
Antoine Pitrou91f43802019-05-26 17:10:09 +02001383 def load_bytearray8(self):
1384 len, = unpack('<Q', self.read(8))
1385 if len > maxsize:
1386 raise UnpicklingError("BYTEARRAY8 exceeds system's maximum size "
1387 "of %d bytes" % maxsize)
1388 b = bytearray(len)
1389 self.readinto(b)
1390 self.append(b)
1391 dispatch[BYTEARRAY8[0]] = load_bytearray8
1392
1393 def load_next_buffer(self):
1394 if self._buffers is None:
1395 raise UnpicklingError("pickle stream refers to out-of-band data "
1396 "but no *buffers* argument was given")
1397 try:
1398 buf = next(self._buffers)
1399 except StopIteration:
1400 raise UnpicklingError("not enough out-of-band buffers")
1401 self.append(buf)
1402 dispatch[NEXT_BUFFER[0]] = load_next_buffer
1403
1404 def load_readonly_buffer(self):
1405 buf = self.stack[-1]
1406 with memoryview(buf) as m:
1407 if not m.readonly:
1408 self.stack[-1] = m.toreadonly()
1409 dispatch[READONLY_BUFFER[0]] = load_readonly_buffer
1410
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001411 def load_short_binstring(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001412 len = self.read(1)[0]
1413 data = self.read(len)
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08001414 self.append(self._decode_string(data))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001415 dispatch[SHORT_BINSTRING[0]] = load_short_binstring
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001416
Guido van Rossumf4169812008-03-17 22:56:06 +00001417 def load_short_binbytes(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001418 len = self.read(1)[0]
1419 self.append(self.read(len))
Guido van Rossumf4169812008-03-17 22:56:06 +00001420 dispatch[SHORT_BINBYTES[0]] = load_short_binbytes
1421
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001422 def load_short_binunicode(self):
1423 len = self.read(1)[0]
1424 self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
1425 dispatch[SHORT_BINUNICODE[0]] = load_short_binunicode
1426
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001427 def load_tuple(self):
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001428 items = self.pop_mark()
1429 self.append(tuple(items))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001430 dispatch[TUPLE[0]] = load_tuple
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001431
1432 def load_empty_tuple(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001433 self.append(())
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001434 dispatch[EMPTY_TUPLE[0]] = load_empty_tuple
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001435
Guido van Rossum44f0ea52003-01-28 04:14:51 +00001436 def load_tuple1(self):
1437 self.stack[-1] = (self.stack[-1],)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001438 dispatch[TUPLE1[0]] = load_tuple1
Guido van Rossum44f0ea52003-01-28 04:14:51 +00001439
1440 def load_tuple2(self):
1441 self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001442 dispatch[TUPLE2[0]] = load_tuple2
Guido van Rossum44f0ea52003-01-28 04:14:51 +00001443
1444 def load_tuple3(self):
1445 self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001446 dispatch[TUPLE3[0]] = load_tuple3
Guido van Rossum44f0ea52003-01-28 04:14:51 +00001447
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001448 def load_empty_list(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001449 self.append([])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001450 dispatch[EMPTY_LIST[0]] = load_empty_list
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001451
1452 def load_empty_dictionary(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001453 self.append({})
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001454 dispatch[EMPTY_DICT[0]] = load_empty_dictionary
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001455
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001456 def load_empty_set(self):
1457 self.append(set())
1458 dispatch[EMPTY_SET[0]] = load_empty_set
1459
1460 def load_frozenset(self):
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001461 items = self.pop_mark()
1462 self.append(frozenset(items))
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001463 dispatch[FROZENSET[0]] = load_frozenset
1464
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001465 def load_list(self):
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001466 items = self.pop_mark()
1467 self.append(items)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001468 dispatch[LIST[0]] = load_list
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001469
1470 def load_dict(self):
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001471 items = self.pop_mark()
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001472 d = {items[i]: items[i+1]
1473 for i in range(0, len(items), 2)}
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001474 self.append(d)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001475 dispatch[DICT[0]] = load_dict
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001476
Tim Petersd01c1e92003-01-30 15:41:46 +00001477 # INST and OBJ differ only in how they get a class object. It's not
1478 # only sensible to do the rest in a common routine, the two routines
1479 # previously diverged and grew different bugs.
1480 # klass is the class to instantiate, and k points to the topmost mark
1481 # object, following which are the arguments for klass.__init__.
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001482 def _instantiate(self, klass, args):
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00001483 if (args or not isinstance(klass, type) or
1484 hasattr(klass, "__getinitargs__")):
Guido van Rossum743d17e1998-09-15 20:25:57 +00001485 try:
Guido van Rossumb26a97a2003-01-28 22:29:13 +00001486 value = klass(*args)
Guido van Rossumb940e112007-01-10 16:19:56 +00001487 except TypeError as err:
Guido van Rossum26d95c32007-08-27 23:18:54 +00001488 raise TypeError("in constructor for %s: %s" %
1489 (klass.__name__, str(err)), sys.exc_info()[2])
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00001490 else:
1491 value = klass.__new__(klass)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001492 self.append(value)
Tim Petersd01c1e92003-01-30 15:41:46 +00001493
1494 def load_inst(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001495 module = self.readline()[:-1].decode("ascii")
1496 name = self.readline()[:-1].decode("ascii")
Tim Petersd01c1e92003-01-30 15:41:46 +00001497 klass = self.find_class(module, name)
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001498 self._instantiate(klass, self.pop_mark())
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001499 dispatch[INST[0]] = load_inst
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001500
1501 def load_obj(self):
Tim Petersd01c1e92003-01-30 15:41:46 +00001502 # Stack is ... markobject classobject arg1 arg2 ...
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001503 args = self.pop_mark()
1504 cls = args.pop(0)
1505 self._instantiate(cls, args)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001506 dispatch[OBJ[0]] = load_obj
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001507
Guido van Rossum3a41c612003-01-28 15:10:22 +00001508 def load_newobj(self):
1509 args = self.stack.pop()
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001510 cls = self.stack.pop()
Guido van Rossum3a41c612003-01-28 15:10:22 +00001511 obj = cls.__new__(cls, *args)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001512 self.append(obj)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001513 dispatch[NEWOBJ[0]] = load_newobj
Guido van Rossum3a41c612003-01-28 15:10:22 +00001514
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001515 def load_newobj_ex(self):
1516 kwargs = self.stack.pop()
1517 args = self.stack.pop()
1518 cls = self.stack.pop()
1519 obj = cls.__new__(cls, *args, **kwargs)
1520 self.append(obj)
1521 dispatch[NEWOBJ_EX[0]] = load_newobj_ex
1522
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001523 def load_global(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001524 module = self.readline()[:-1].decode("utf-8")
1525 name = self.readline()[:-1].decode("utf-8")
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001526 klass = self.find_class(module, name)
1527 self.append(klass)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001528 dispatch[GLOBAL[0]] = load_global
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001529
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001530 def load_stack_global(self):
1531 name = self.stack.pop()
1532 module = self.stack.pop()
1533 if type(name) is not str or type(module) is not str:
1534 raise UnpicklingError("STACK_GLOBAL requires str")
1535 self.append(self.find_class(module, name))
1536 dispatch[STACK_GLOBAL[0]] = load_stack_global
1537
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001538 def load_ext1(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001539 code = self.read(1)[0]
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001540 self.get_extension(code)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001541 dispatch[EXT1[0]] = load_ext1
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001542
1543 def load_ext2(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001544 code, = unpack('<H', self.read(2))
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001545 self.get_extension(code)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001546 dispatch[EXT2[0]] = load_ext2
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001547
1548 def load_ext4(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001549 code, = unpack('<i', self.read(4))
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001550 self.get_extension(code)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001551 dispatch[EXT4[0]] = load_ext4
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001552
1553 def get_extension(self, code):
1554 nil = []
Guido van Rossumd4b920c2003-02-04 01:54:49 +00001555 obj = _extension_cache.get(code, nil)
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001556 if obj is not nil:
1557 self.append(obj)
1558 return
Guido van Rossumd4b920c2003-02-04 01:54:49 +00001559 key = _inverted_registry.get(code)
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001560 if not key:
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01001561 if code <= 0: # note that 0 is forbidden
1562 # Corrupt or hostile pickle.
Alexandre Vassalotticc757172013-04-14 02:25:10 -07001563 raise UnpicklingError("EXT specifies code <= 0")
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001564 raise ValueError("unregistered extension code %d" % code)
1565 obj = self.find_class(*key)
Guido van Rossumd4b920c2003-02-04 01:54:49 +00001566 _extension_cache[code] = obj
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001567 self.append(obj)
1568
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001569 def find_class(self, module, name):
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00001570 # Subclasses may override this.
Steve Dowerb82e17e2019-05-23 08:45:22 -07001571 sys.audit('pickle.find_class', module, name)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00001572 if self.proto < 3 and self.fix_imports:
1573 if (module, name) in _compat_pickle.NAME_MAPPING:
1574 module, name = _compat_pickle.NAME_MAPPING[(module, name)]
Serhiy Storchakabfe18242015-03-31 13:12:37 +03001575 elif module in _compat_pickle.IMPORT_MAPPING:
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00001576 module = _compat_pickle.IMPORT_MAPPING[module]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001577 __import__(module, level=0)
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001578 if self.proto >= 4:
1579 return _getattribute(sys.modules[module], name)[0]
1580 else:
1581 return getattr(sys.modules[module], name)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001582
1583 def load_reduce(self):
1584 stack = self.stack
Guido van Rossumb26a97a2003-01-28 22:29:13 +00001585 args = stack.pop()
1586 func = stack[-1]
Serhiy Storchakaa8d83f52015-12-01 00:39:25 +02001587 stack[-1] = func(*args)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001588 dispatch[REDUCE[0]] = load_reduce
Guido van Rossuma48061a1995-01-10 00:31:14 +00001589
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001590 def load_pop(self):
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001591 if self.stack:
1592 del self.stack[-1]
1593 else:
1594 self.pop_mark()
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001595 dispatch[POP[0]] = load_pop
Guido van Rossum7b5430f1995-03-04 22:25:21 +00001596
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001597 def load_pop_mark(self):
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001598 self.pop_mark()
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001599 dispatch[POP_MARK[0]] = load_pop_mark
Guido van Rossuma48061a1995-01-10 00:31:14 +00001600
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001601 def load_dup(self):
Guido van Rossumb1062fc1998-03-31 17:00:46 +00001602 self.append(self.stack[-1])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001603 dispatch[DUP[0]] = load_dup
Guido van Rossuma48061a1995-01-10 00:31:14 +00001604
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001605 def load_get(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001606 i = int(self.readline()[:-1])
Claudiu Popa6f03b232019-11-24 20:15:08 +01001607 try:
1608 self.append(self.memo[i])
1609 except KeyError:
1610 msg = f'Memo value not found at index {i}'
1611 raise UnpicklingError(msg) from None
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001612 dispatch[GET[0]] = load_get
Guido van Rossum78536471996-04-12 13:36:27 +00001613
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001614 def load_binget(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001615 i = self.read(1)[0]
Claudiu Popa6f03b232019-11-24 20:15:08 +01001616 try:
1617 self.append(self.memo[i])
1618 except KeyError as exc:
1619 msg = f'Memo value not found at index {i}'
1620 raise UnpicklingError(msg) from None
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001621 dispatch[BINGET[0]] = load_binget
Guido van Rossum78536471996-04-12 13:36:27 +00001622
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001623 def load_long_binget(self):
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01001624 i, = unpack('<I', self.read(4))
Claudiu Popa6f03b232019-11-24 20:15:08 +01001625 try:
1626 self.append(self.memo[i])
1627 except KeyError as exc:
1628 msg = f'Memo value not found at index {i}'
1629 raise UnpicklingError(msg) from None
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001630 dispatch[LONG_BINGET[0]] = load_long_binget
Guido van Rossum78536471996-04-12 13:36:27 +00001631
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001632 def load_put(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001633 i = int(self.readline()[:-1])
Antoine Pitrou55549ec2011-08-30 00:27:10 +02001634 if i < 0:
1635 raise ValueError("negative PUT argument")
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001636 self.memo[i] = self.stack[-1]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001637 dispatch[PUT[0]] = load_put
Guido van Rossuma48061a1995-01-10 00:31:14 +00001638
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001639 def load_binput(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001640 i = self.read(1)[0]
Antoine Pitrou55549ec2011-08-30 00:27:10 +02001641 if i < 0:
1642 raise ValueError("negative BINPUT argument")
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001643 self.memo[i] = self.stack[-1]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001644 dispatch[BINPUT[0]] = load_binput
Guido van Rossuma48061a1995-01-10 00:31:14 +00001645
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001646 def load_long_binput(self):
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01001647 i, = unpack('<I', self.read(4))
1648 if i > maxsize:
Antoine Pitrou55549ec2011-08-30 00:27:10 +02001649 raise ValueError("negative LONG_BINPUT argument")
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001650 self.memo[i] = self.stack[-1]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001651 dispatch[LONG_BINPUT[0]] = load_long_binput
Guido van Rossuma48061a1995-01-10 00:31:14 +00001652
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001653 def load_memoize(self):
1654 memo = self.memo
1655 memo[len(memo)] = self.stack[-1]
1656 dispatch[MEMOIZE[0]] = load_memoize
1657
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001658 def load_append(self):
1659 stack = self.stack
Raymond Hettinger46ac8eb2002-06-30 03:39:14 +00001660 value = stack.pop()
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001661 list = stack[-1]
1662 list.append(value)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001663 dispatch[APPEND[0]] = load_append
Guido van Rossuma48061a1995-01-10 00:31:14 +00001664
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001665 def load_appends(self):
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001666 items = self.pop_mark()
1667 list_obj = self.stack[-1]
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02001668 try:
1669 extend = list_obj.extend
1670 except AttributeError:
1671 pass
Alexandre Vassalotti1f7492c2013-04-20 13:19:46 -07001672 else:
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02001673 extend(items)
1674 return
1675 # Even if the PEP 307 requires extend() and append() methods,
1676 # fall back on append() if the object has no extend() method
1677 # for backward compatibility.
1678 append = list_obj.append
1679 for item in items:
1680 append(item)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001681 dispatch[APPENDS[0]] = load_appends
Tim Peters2344fae2001-01-15 00:50:52 +00001682
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001683 def load_setitem(self):
1684 stack = self.stack
Raymond Hettinger46ac8eb2002-06-30 03:39:14 +00001685 value = stack.pop()
1686 key = stack.pop()
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001687 dict = stack[-1]
1688 dict[key] = value
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001689 dispatch[SETITEM[0]] = load_setitem
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001690
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001691 def load_setitems(self):
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001692 items = self.pop_mark()
1693 dict = self.stack[-1]
1694 for i in range(0, len(items), 2):
1695 dict[items[i]] = items[i + 1]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001696 dispatch[SETITEMS[0]] = load_setitems
Guido van Rossuma48061a1995-01-10 00:31:14 +00001697
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001698 def load_additems(self):
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001699 items = self.pop_mark()
1700 set_obj = self.stack[-1]
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001701 if isinstance(set_obj, set):
1702 set_obj.update(items)
1703 else:
1704 add = set_obj.add
1705 for item in items:
1706 add(item)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001707 dispatch[ADDITEMS[0]] = load_additems
1708
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001709 def load_build(self):
1710 stack = self.stack
Guido van Rossumac5b5d22003-01-28 22:01:16 +00001711 state = stack.pop()
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001712 inst = stack[-1]
Guido van Rossumac5b5d22003-01-28 22:01:16 +00001713 setstate = getattr(inst, "__setstate__", None)
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001714 if setstate is not None:
Guido van Rossumac5b5d22003-01-28 22:01:16 +00001715 setstate(state)
1716 return
1717 slotstate = None
1718 if isinstance(state, tuple) and len(state) == 2:
1719 state, slotstate = state
1720 if state:
Alexandre Vassalottiebfecfd2009-05-25 18:50:33 +00001721 inst_dict = inst.__dict__
Antoine Pitroua9f48a02009-05-02 21:41:14 +00001722 intern = sys.intern
Alexandre Vassalottiebfecfd2009-05-25 18:50:33 +00001723 for k, v in state.items():
1724 if type(k) is str:
1725 inst_dict[intern(k)] = v
1726 else:
1727 inst_dict[k] = v
Guido van Rossumac5b5d22003-01-28 22:01:16 +00001728 if slotstate:
1729 for k, v in slotstate.items():
1730 setattr(inst, k, v)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001731 dispatch[BUILD[0]] = load_build
Guido van Rossuma48061a1995-01-10 00:31:14 +00001732
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001733 def load_mark(self):
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001734 self.metastack.append(self.stack)
1735 self.stack = []
1736 self.append = self.stack.append
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001737 dispatch[MARK[0]] = load_mark
Guido van Rossuma48061a1995-01-10 00:31:14 +00001738
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001739 def load_stop(self):
Raymond Hettinger46ac8eb2002-06-30 03:39:14 +00001740 value = self.stack.pop()
Guido van Rossumff871742000-12-13 18:11:56 +00001741 raise _Stop(value)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001742 dispatch[STOP[0]] = load_stop
Guido van Rossuma48061a1995-01-10 00:31:14 +00001743
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001744
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001745# Shorthands
1746
Antoine Pitrou91f43802019-05-26 17:10:09 +02001747def _dump(obj, file, protocol=None, *, fix_imports=True, buffer_callback=None):
1748 _Pickler(file, protocol, fix_imports=fix_imports,
1749 buffer_callback=buffer_callback).dump(obj)
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001750
Antoine Pitrou91f43802019-05-26 17:10:09 +02001751def _dumps(obj, protocol=None, *, fix_imports=True, buffer_callback=None):
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001752 f = io.BytesIO()
Antoine Pitrou91f43802019-05-26 17:10:09 +02001753 _Pickler(f, protocol, fix_imports=fix_imports,
1754 buffer_callback=buffer_callback).dump(obj)
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001755 res = f.getvalue()
Guido van Rossum98297ee2007-11-06 21:34:58 +00001756 assert isinstance(res, bytes_types)
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001757 return res
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001758
Antoine Pitrou91f43802019-05-26 17:10:09 +02001759def _load(file, *, fix_imports=True, encoding="ASCII", errors="strict",
1760 buffers=None):
1761 return _Unpickler(file, fix_imports=fix_imports, buffers=buffers,
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00001762 encoding=encoding, errors=errors).load()
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001763
Serhiy Storchaka531d1e52020-05-02 09:38:01 +03001764def _loads(s, /, *, fix_imports=True, encoding="ASCII", errors="strict",
Antoine Pitrou91f43802019-05-26 17:10:09 +02001765 buffers=None):
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001766 if isinstance(s, str):
1767 raise TypeError("Can't load pickle from unicode string")
1768 file = io.BytesIO(s)
Antoine Pitrou91f43802019-05-26 17:10:09 +02001769 return _Unpickler(file, fix_imports=fix_imports, buffers=buffers,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001770 encoding=encoding, errors=errors).load()
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001771
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001772# Use the faster _pickle if possible
1773try:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001774 from _pickle import (
1775 PickleError,
1776 PicklingError,
1777 UnpicklingError,
1778 Pickler,
1779 Unpickler,
1780 dump,
1781 dumps,
1782 load,
1783 loads
1784 )
Brett Cannoncd171c82013-07-04 17:43:24 -04001785except ImportError:
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001786 Pickler, Unpickler = _Pickler, _Unpickler
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001787 dump, dumps, load, loads = _dump, _dumps, _load, _loads
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001788
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001789# Doctest
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001790def _test():
1791 import doctest
1792 return doctest.testmod()
1793
1794if __name__ == "__main__":
Florent Xicluna54540ec2011-11-04 08:29:17 +01001795 import argparse
Alexander Belopolsky455f7bd2010-07-27 23:02:38 +00001796 parser = argparse.ArgumentParser(
1797 description='display contents of the pickle files')
1798 parser.add_argument(
1799 'pickle_file', type=argparse.FileType('br'),
1800 nargs='*', help='the pickle file')
1801 parser.add_argument(
1802 '-t', '--test', action='store_true',
1803 help='run self-test suite')
1804 parser.add_argument(
1805 '-v', action='store_true',
1806 help='run verbosely; only affects self-test run')
1807 args = parser.parse_args()
1808 if args.test:
1809 _test()
1810 else:
1811 if not args.pickle_file:
1812 parser.print_help()
1813 else:
1814 import pprint
1815 for f in args.pickle_file:
1816 obj = load(f)
1817 pprint.pprint(obj)