blob: a67ac7dd8b686ace0bf4e9510b7683eb40e87c4b [file] [log] [blame]
Guido van Rossum54f22ed2000-02-04 15:10:34 +00001"""Create portable serialized representations of Python objects.
Guido van Rossuma48061a1995-01-10 00:31:14 +00002
Alexandre Vassalottif7fa63d2008-05-11 08:55:36 +00003See module copyreg for a mechanism for registering custom picklers.
Tim Peters22a449a2003-01-27 20:16:36 +00004See module pickletools source for extensive comments.
Guido van Rossuma48061a1995-01-10 00:31:14 +00005
Guido van Rossume467be61997-12-05 19:42:42 +00006Classes:
Guido van Rossuma48061a1995-01-10 00:31:14 +00007
Guido van Rossume467be61997-12-05 19:42:42 +00008 Pickler
9 Unpickler
Guido van Rossuma48061a1995-01-10 00:31:14 +000010
Guido van Rossume467be61997-12-05 19:42:42 +000011Functions:
Guido van Rossuma48061a1995-01-10 00:31:14 +000012
Guido van Rossume467be61997-12-05 19:42:42 +000013 dump(object, file)
14 dumps(object) -> string
15 load(file) -> object
16 loads(string) -> object
Guido van Rossuma48061a1995-01-10 00:31:14 +000017
Guido van Rossume467be61997-12-05 19:42:42 +000018Misc variables:
Guido van Rossuma48061a1995-01-10 00:31:14 +000019
Fred Drakefe82acc1998-02-13 03:24:48 +000020 __version__
Guido van Rossume467be61997-12-05 19:42:42 +000021 format_version
22 compatible_formats
Guido van Rossuma48061a1995-01-10 00:31:14 +000023
Guido van Rossuma48061a1995-01-10 00:31:14 +000024"""
25
Victor Stinner7fa767e2014-03-20 09:16:38 +010026from types import FunctionType
Alexandre Vassalottif7fa63d2008-05-11 08:55:36 +000027from copyreg import dispatch_table
28from copyreg import _extension_registry, _inverted_registry, _extension_cache
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +030029from itertools import islice
Serhiy Storchaka0d554d72015-10-10 22:42:18 +030030from functools import partial
Guido van Rossumd3703791998-10-22 20:15:36 +000031import sys
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +030032from sys import maxsize
33from struct import pack, unpack
Skip Montanaro23bafc62001-02-18 03:10:09 +000034import re
Guido van Rossum2e6a4b32007-05-04 19:56:22 +000035import io
Walter Dörwald42748a82007-06-12 16:40:17 +000036import codecs
Antoine Pitroud9dfaa92009-06-04 20:32:06 +000037import _compat_pickle
Guido van Rossuma48061a1995-01-10 00:31:14 +000038
Antoine Pitrou91f43802019-05-26 17:10:09 +020039from _pickle import PickleBuffer
40
Skip Montanaro352674d2001-02-07 23:14:30 +000041__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
Antoine Pitrou91f43802019-05-26 17:10:09 +020042 "Unpickler", "dump", "dumps", "load", "loads", "PickleBuffer"]
Skip Montanaro352674d2001-02-07 23:14:30 +000043
Guido van Rossum98297ee2007-11-06 21:34:58 +000044# Shortcut for use in isinstance testing
Alexandre Vassalotti8cb02b62008-05-03 01:42:49 +000045bytes_types = (bytes, bytearray)
Guido van Rossum98297ee2007-11-06 21:34:58 +000046
Tim Petersc0c12b52003-01-29 00:56:17 +000047# These are purely informational; no code uses these.
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +010048format_version = "4.0" # File format version we write
Guido van Rossumf29d3d62003-01-27 22:47:53 +000049compatible_formats = ["1.0", # Original protocol 0
Guido van Rossumbc64e222003-01-28 16:34:19 +000050 "1.1", # Protocol 0 with INST added
Guido van Rossumf29d3d62003-01-27 22:47:53 +000051 "1.2", # Original protocol 1
52 "1.3", # Protocol 1 with BINFLOAT added
53 "2.0", # Protocol 2
Guido van Rossumf4169812008-03-17 22:56:06 +000054 "3.0", # Protocol 3
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +010055 "4.0", # Protocol 4
Antoine Pitrou91f43802019-05-26 17:10:09 +020056 "5.0", # Protocol 5
Guido van Rossumf29d3d62003-01-27 22:47:53 +000057 ] # Old format versions we can read
Guido van Rossumb72cf2d1997-04-09 17:32:51 +000058
Guido van Rossum99603b02007-07-20 00:22:32 +000059# This is the highest protocol number we know how to read.
Antoine Pitrou91f43802019-05-26 17:10:09 +020060HIGHEST_PROTOCOL = 5
Tim Peters8587b3c2003-02-13 15:44:41 +000061
Guido van Rossum2e6a4b32007-05-04 19:56:22 +000062# The protocol we write by default. May be less than HIGHEST_PROTOCOL.
Łukasz Langac51d8c92018-04-03 23:06:53 -070063# Only bump this if the oldest still supported version of Python already
64# includes it.
65DEFAULT_PROTOCOL = 4
Guido van Rossum2e6a4b32007-05-04 19:56:22 +000066
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000067class PickleError(Exception):
Neal Norwitzefbb67b2002-05-30 12:12:04 +000068 """A common base class for the other pickling exceptions."""
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000069 pass
70
71class PicklingError(PickleError):
72 """This exception is raised when an unpicklable object is passed to the
73 dump() method.
74
75 """
76 pass
77
78class UnpicklingError(PickleError):
79 """This exception is raised when there is a problem unpickling an object,
80 such as a security violation.
81
82 Note that other exceptions may also be raised during unpickling, including
83 (but not necessarily limited to) AttributeError, EOFError, ImportError,
84 and IndexError.
85
86 """
87 pass
Guido van Rossum7849da81995-03-09 14:08:35 +000088
Tim Petersc0c12b52003-01-29 00:56:17 +000089# An instance of _Stop is raised by Unpickler.load_stop() in response to
90# the STOP opcode, passing the object that is the result of unpickling.
Guido van Rossumff871742000-12-13 18:11:56 +000091class _Stop(Exception):
92 def __init__(self, value):
93 self.value = value
94
Guido van Rossum533dbcf2003-01-28 17:55:05 +000095# Jython has PyStringMap; it's a dict subclass with string keys
Jeremy Hylton2b9d0291998-05-27 22:38:22 +000096try:
97 from org.python.core import PyStringMap
Brett Cannoncd171c82013-07-04 17:43:24 -040098except ImportError:
Jeremy Hylton2b9d0291998-05-27 22:38:22 +000099 PyStringMap = None
100
Tim Peters22a449a2003-01-27 20:16:36 +0000101# Pickle opcodes. See pickletools.py for extensive docs. The listing
102# here is in kind-of alphabetical order of 1-character pickle code.
103# pickletools groups them by purpose.
Guido van Rossumdbb718f2001-09-21 19:22:34 +0000104
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000105MARK = b'(' # push special markobject on stack
106STOP = b'.' # every pickle ends with STOP
107POP = b'0' # discard topmost stack item
108POP_MARK = b'1' # discard stack top through topmost markobject
109DUP = b'2' # duplicate top stack item
110FLOAT = b'F' # push float object; decimal string argument
111INT = b'I' # push integer or bool; decimal string argument
112BININT = b'J' # push four-byte signed int
113BININT1 = b'K' # push 1-byte unsigned int
114LONG = b'L' # push long; decimal string argument
115BININT2 = b'M' # push 2-byte unsigned int
116NONE = b'N' # push None
117PERSID = b'P' # push persistent object; id is taken from string arg
118BINPERSID = b'Q' # " " " ; " " " " stack
119REDUCE = b'R' # apply callable to argtuple, both on stack
120STRING = b'S' # push string; NL-terminated string argument
121BINSTRING = b'T' # push string; counted binary string argument
122SHORT_BINSTRING= b'U' # " " ; " " " " < 256 bytes
123UNICODE = b'V' # push Unicode string; raw-unicode-escaped'd argument
124BINUNICODE = b'X' # " " " ; counted UTF-8 string argument
125APPEND = b'a' # append stack top to list below it
126BUILD = b'b' # call __setstate__ or __dict__.update()
127GLOBAL = b'c' # push self.find_class(modname, name); 2 string args
128DICT = b'd' # build a dict from stack items
129EMPTY_DICT = b'}' # push empty dict
130APPENDS = b'e' # extend list on stack by topmost stack slice
131GET = b'g' # push item from memo on stack; index is string arg
132BINGET = b'h' # " " " " " " ; " " 1-byte arg
133INST = b'i' # build & push class instance
134LONG_BINGET = b'j' # push item from memo on stack; index is 4-byte arg
135LIST = b'l' # build list from topmost stack items
136EMPTY_LIST = b']' # push empty list
137OBJ = b'o' # build & push class instance
138PUT = b'p' # store stack top in memo; index is string arg
139BINPUT = b'q' # " " " " " ; " " 1-byte arg
140LONG_BINPUT = b'r' # " " " " " ; " " 4-byte arg
141SETITEM = b's' # add key+value pair to dict
142TUPLE = b't' # build tuple from topmost stack items
143EMPTY_TUPLE = b')' # push empty tuple
144SETITEMS = b'u' # modify dict by adding topmost key+value pairs
145BINFLOAT = b'G' # push float; arg is 8-byte float encoding
Tim Peters22a449a2003-01-27 20:16:36 +0000146
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000147TRUE = b'I01\n' # not an opcode; see INT docs in pickletools.py
148FALSE = b'I00\n' # not an opcode; see INT docs in pickletools.py
Guido van Rossum77f6a652002-04-03 22:41:51 +0000149
Guido van Rossum586c9e82003-01-29 06:16:12 +0000150# Protocol 2
Guido van Rossum5a2d8f52003-01-27 21:44:25 +0000151
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000152PROTO = b'\x80' # identify pickle protocol
153NEWOBJ = b'\x81' # build object by applying cls.__new__ to argtuple
154EXT1 = b'\x82' # push object from extension registry; 1-byte index
155EXT2 = b'\x83' # ditto, but 2-byte index
156EXT4 = b'\x84' # ditto, but 4-byte index
157TUPLE1 = b'\x85' # build 1-tuple from stack top
158TUPLE2 = b'\x86' # build 2-tuple from two topmost stack items
159TUPLE3 = b'\x87' # build 3-tuple from three topmost stack items
160NEWTRUE = b'\x88' # push True
161NEWFALSE = b'\x89' # push False
162LONG1 = b'\x8a' # push long from < 256 bytes
163LONG4 = b'\x8b' # push really big long
Guido van Rossum5a2d8f52003-01-27 21:44:25 +0000164
Guido van Rossum44f0ea52003-01-28 04:14:51 +0000165_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
166
Guido van Rossumf4169812008-03-17 22:56:06 +0000167# Protocol 3 (Python 3.x)
168
169BINBYTES = b'B' # push bytes; counted binary string argument
170SHORT_BINBYTES = b'C' # " " ; " " " " < 256 bytes
Guido van Rossuma48061a1995-01-10 00:31:14 +0000171
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100172# Protocol 4
Antoine Pitrou91f43802019-05-26 17:10:09 +0200173
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100174SHORT_BINUNICODE = b'\x8c' # push short string; UTF-8 length < 256 bytes
175BINUNICODE8 = b'\x8d' # push very long string
176BINBYTES8 = b'\x8e' # push very long bytes string
177EMPTY_SET = b'\x8f' # push empty set on the stack
178ADDITEMS = b'\x90' # modify set by adding topmost stack items
179FROZENSET = b'\x91' # build frozenset from topmost stack items
180NEWOBJ_EX = b'\x92' # like NEWOBJ but work with keyword only arguments
181STACK_GLOBAL = b'\x93' # same as GLOBAL but using names on the stacks
182MEMOIZE = b'\x94' # store top of the stack in memo
183FRAME = b'\x95' # indicate the beginning of a new frame
184
Antoine Pitrou91f43802019-05-26 17:10:09 +0200185# Protocol 5
186
187BYTEARRAY8 = b'\x96' # push bytearray
188NEXT_BUFFER = b'\x97' # push next out-of-band buffer
189READONLY_BUFFER = b'\x98' # make top of stack readonly
190
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100191__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$", x)])
192
193
194class _Framer:
195
Serhiy Storchaka1211c9a2018-01-20 16:42:44 +0200196 _FRAME_SIZE_MIN = 4
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100197 _FRAME_SIZE_TARGET = 64 * 1024
198
199 def __init__(self, file_write):
200 self.file_write = file_write
201 self.current_frame = None
202
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100203 def start_framing(self):
204 self.current_frame = io.BytesIO()
205
206 def end_framing(self):
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800207 if self.current_frame and self.current_frame.tell() > 0:
208 self.commit_frame(force=True)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100209 self.current_frame = None
210
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800211 def commit_frame(self, force=False):
212 if self.current_frame:
213 f = self.current_frame
214 if f.tell() >= self._FRAME_SIZE_TARGET or force:
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +0100215 data = f.getbuffer()
216 write = self.file_write
Serhiy Storchaka1211c9a2018-01-20 16:42:44 +0200217 if len(data) >= self._FRAME_SIZE_MIN:
218 # Issue a single call to the write method of the underlying
219 # file object for the frame opcode with the size of the
220 # frame. The concatenation is expected to be less expensive
221 # than issuing an additional call to write.
222 write(FRAME + pack("<Q", len(data)))
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +0100223
224 # Issue a separate call to write to append the frame
225 # contents without concatenation to the above to avoid a
226 # memory copy.
227 write(data)
228
229 # Start the new frame with a new io.BytesIO instance so that
230 # the file object can have delayed access to the previous frame
231 # contents via an unreleased memoryview of the previous
232 # io.BytesIO instance.
233 self.current_frame = io.BytesIO()
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800234
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100235 def write(self, data):
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800236 if self.current_frame:
237 return self.current_frame.write(data)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100238 else:
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800239 return self.file_write(data)
240
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +0100241 def write_large_bytes(self, header, payload):
242 write = self.file_write
243 if self.current_frame:
244 # Terminate the current frame and flush it to the file.
245 self.commit_frame(force=True)
246
247 # Perform direct write of the header and payload of the large binary
248 # object. Be careful not to concatenate the header and the payload
249 # prior to calling 'write' as we do not want to allocate a large
250 # temporary bytes object.
251 # We intentionally do not insert a protocol 4 frame opcode to make
252 # it possible to optimize file.read calls in the loader.
253 write(header)
254 write(payload)
255
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100256
257class _Unframer:
258
259 def __init__(self, file_read, file_readline, file_tell=None):
260 self.file_read = file_read
261 self.file_readline = file_readline
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100262 self.current_frame = None
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100263
Antoine Pitrou91f43802019-05-26 17:10:09 +0200264 def readinto(self, buf):
265 if self.current_frame:
266 n = self.current_frame.readinto(buf)
267 if n == 0 and len(buf) != 0:
268 self.current_frame = None
269 n = len(buf)
270 buf[:] = self.file_read(n)
271 return n
272 if n < len(buf):
273 raise UnpicklingError(
274 "pickle exhausted before end of frame")
275 return n
276 else:
277 n = len(buf)
278 buf[:] = self.file_read(n)
279 return n
280
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100281 def read(self, n):
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800282 if self.current_frame:
283 data = self.current_frame.read(n)
284 if not data and n != 0:
285 self.current_frame = None
286 return self.file_read(n)
287 if len(data) < n:
288 raise UnpicklingError(
289 "pickle exhausted before end of frame")
290 return data
291 else:
292 return self.file_read(n)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100293
294 def readline(self):
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800295 if self.current_frame:
296 data = self.current_frame.readline()
297 if not data:
298 self.current_frame = None
299 return self.file_readline()
Serhiy Storchaka21d75332015-01-26 10:37:01 +0200300 if data[-1] != b'\n'[0]:
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800301 raise UnpicklingError(
302 "pickle exhausted before end of frame")
303 return data
304 else:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100305 return self.file_readline()
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100306
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800307 def load_frame(self, frame_size):
308 if self.current_frame and self.current_frame.read() != b'':
309 raise UnpicklingError(
310 "beginning of a new frame before end of current frame")
311 self.current_frame = io.BytesIO(self.file_read(frame_size))
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100312
313
314# Tools used for pickling.
315
Serhiy Storchaka58e41342015-03-31 14:07:24 +0300316def _getattribute(obj, name):
317 for subpath in name.split('.'):
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100318 if subpath == '<locals>':
319 raise AttributeError("Can't get local attribute {!r} on {!r}"
320 .format(name, obj))
321 try:
Serhiy Storchaka58e41342015-03-31 14:07:24 +0300322 parent = obj
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100323 obj = getattr(obj, subpath)
324 except AttributeError:
325 raise AttributeError("Can't get attribute {!r} on {!r}"
Serhiy Storchaka5affd232017-04-05 09:37:24 +0300326 .format(name, obj)) from None
Serhiy Storchaka58e41342015-03-31 14:07:24 +0300327 return obj, parent
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100328
Serhiy Storchaka58e41342015-03-31 14:07:24 +0300329def whichmodule(obj, name):
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100330 """Find the module an object belong to."""
331 module_name = getattr(obj, '__module__', None)
332 if module_name is not None:
333 return module_name
Antoine Pitroue1618492014-10-04 22:15:27 +0200334 # Protect the iteration by using a list copy of sys.modules against dynamic
335 # modules that trigger imports of other modules upon calls to getattr.
336 for module_name, module in list(sys.modules.items()):
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100337 if module_name == '__main__' or module is None:
338 continue
339 try:
Serhiy Storchaka58e41342015-03-31 14:07:24 +0300340 if _getattribute(module, name)[0] is obj:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100341 return module_name
342 except AttributeError:
343 pass
344 return '__main__'
345
346def encode_long(x):
347 r"""Encode a long to a two's complement little-endian binary string.
348 Note that 0 is a special case, returning an empty string, to save a
349 byte in the LONG1 pickling context.
350
351 >>> encode_long(0)
352 b''
353 >>> encode_long(255)
354 b'\xff\x00'
355 >>> encode_long(32767)
356 b'\xff\x7f'
357 >>> encode_long(-256)
358 b'\x00\xff'
359 >>> encode_long(-32768)
360 b'\x00\x80'
361 >>> encode_long(-128)
362 b'\x80'
363 >>> encode_long(127)
364 b'\x7f'
365 >>>
366 """
367 if x == 0:
368 return b''
369 nbytes = (x.bit_length() >> 3) + 1
370 result = x.to_bytes(nbytes, byteorder='little', signed=True)
371 if x < 0 and nbytes > 1:
372 if result[-1] == 0xff and (result[-2] & 0x80) != 0:
373 result = result[:-1]
374 return result
375
376def decode_long(data):
377 r"""Decode a long from a two's complement little-endian binary string.
378
379 >>> decode_long(b'')
380 0
381 >>> decode_long(b"\xff\x00")
382 255
383 >>> decode_long(b"\xff\x7f")
384 32767
385 >>> decode_long(b"\x00\xff")
386 -256
387 >>> decode_long(b"\x00\x80")
388 -32768
389 >>> decode_long(b"\x80")
390 -128
391 >>> decode_long(b"\x7f")
392 127
393 """
394 return int.from_bytes(data, byteorder='little', signed=True)
395
Skip Montanaro23bafc62001-02-18 03:10:09 +0000396
Guido van Rossum1be31752003-01-28 15:19:53 +0000397# Pickling machinery
398
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000399class _Pickler:
Guido van Rossuma48061a1995-01-10 00:31:14 +0000400
Antoine Pitrou91f43802019-05-26 17:10:09 +0200401 def __init__(self, file, protocol=None, *, fix_imports=True,
402 buffer_callback=None):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000403 """This takes a binary file for writing a pickle data stream.
404
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -0800405 The optional *protocol* argument tells the pickler to use the
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100406 given protocol; supported protocols are 0, 1, 2, 3 and 4. The
Łukasz Langac51d8c92018-04-03 23:06:53 -0700407 default protocol is 4. It was introduced in Python 3.4, it is
408 incompatible with previous versions.
Guido van Rossumf29d3d62003-01-27 22:47:53 +0000409
Guido van Rossum7eff63a2003-01-31 19:42:31 +0000410 Specifying a negative protocol version selects the highest
Tim Peters5bd2a792003-02-01 16:45:06 +0000411 protocol version supported. The higher the protocol used, the
412 more recent the version of Python needed to read the pickle
413 produced.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000414
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -0800415 The *file* argument must have a write() method that accepts a
416 single bytes argument. It can thus be a file object opened for
Martin Panter7462b6492015-11-02 03:37:02 +0000417 binary writing, an io.BytesIO instance, or any other custom
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -0800418 object that meets this interface.
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000419
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -0800420 If *fix_imports* is True and *protocol* is less than 3, pickle
421 will try to map the new Python 3 names to the old module names
422 used in Python 2, so that the pickle data stream is readable
423 with Python 2.
Antoine Pitrou91f43802019-05-26 17:10:09 +0200424
425 If *buffer_callback* is None (the default), buffer views are
426 serialized into *file* as part of the pickle stream.
427
428 If *buffer_callback* is not None, then it can be called any number
429 of times with a buffer view. If the callback returns a false value
430 (such as None), the given buffer is out-of-band; otherwise the
431 buffer is serialized in-band, i.e. inside the pickle stream.
432
433 It is an error if *buffer_callback* is not None and *protocol*
434 is None or smaller than 5.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000435 """
Guido van Rossumcf117b02003-02-09 17:19:41 +0000436 if protocol is None:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000437 protocol = DEFAULT_PROTOCOL
Guido van Rossumcf117b02003-02-09 17:19:41 +0000438 if protocol < 0:
Tim Peters8587b3c2003-02-13 15:44:41 +0000439 protocol = HIGHEST_PROTOCOL
440 elif not 0 <= protocol <= HIGHEST_PROTOCOL:
441 raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
Antoine Pitrou91f43802019-05-26 17:10:09 +0200442 if buffer_callback is not None and protocol < 5:
443 raise ValueError("buffer_callback needs protocol >= 5")
444 self._buffer_callback = buffer_callback
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000445 try:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100446 self._file_write = file.write
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000447 except AttributeError:
448 raise TypeError("file must have a 'write' attribute")
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800449 self.framer = _Framer(self._file_write)
450 self.write = self.framer.write
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +0100451 self._write_large_bytes = self.framer.write_large_bytes
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000452 self.memo = {}
Guido van Rossumcf117b02003-02-09 17:19:41 +0000453 self.proto = int(protocol)
454 self.bin = protocol >= 1
Guido van Rossum5d9113d2003-01-29 17:58:45 +0000455 self.fast = 0
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000456 self.fix_imports = fix_imports and protocol < 3
Guido van Rossuma48061a1995-01-10 00:31:14 +0000457
Fred Drake7f781c92002-05-01 20:33:53 +0000458 def clear_memo(self):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000459 """Clears the pickler's "memo".
460
461 The memo is the data structure that remembers which objects the
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -0800462 pickler has already seen, so that shared or recursive objects
463 are pickled by reference and not by value. This method is
464 useful when re-using picklers.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000465 """
Fred Drake7f781c92002-05-01 20:33:53 +0000466 self.memo.clear()
467
Guido van Rossum3a41c612003-01-28 15:10:22 +0000468 def dump(self, obj):
Tim Peters5bd2a792003-02-01 16:45:06 +0000469 """Write a pickled representation of obj to the open file."""
Alexandre Vassalotti3cfcab92008-12-27 09:30:39 +0000470 # Check whether Pickler was initialized correctly. This is
471 # only needed to mimic the behavior of _pickle.Pickler.dump().
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100472 if not hasattr(self, "_file_write"):
Alexandre Vassalotti3cfcab92008-12-27 09:30:39 +0000473 raise PicklingError("Pickler.__init__() was not called by "
474 "%s.__init__()" % (self.__class__.__name__,))
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000475 if self.proto >= 2:
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800476 self.write(PROTO + pack("<B", self.proto))
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100477 if self.proto >= 4:
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800478 self.framer.start_framing()
Guido van Rossum3a41c612003-01-28 15:10:22 +0000479 self.save(obj)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000480 self.write(STOP)
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800481 self.framer.end_framing()
Guido van Rossuma48061a1995-01-10 00:31:14 +0000482
Jeremy Hylton3422c992003-01-24 19:29:52 +0000483 def memoize(self, obj):
484 """Store an object in the memo."""
485
Tim Peterse46b73f2003-01-27 21:22:10 +0000486 # The Pickler memo is a dictionary mapping object ids to 2-tuples
487 # that contain the Unpickler memo key and the object being memoized.
488 # The memo key is written to the pickle and will become
Jeremy Hylton3422c992003-01-24 19:29:52 +0000489 # the key in the Unpickler's memo. The object is stored in the
Tim Peterse46b73f2003-01-27 21:22:10 +0000490 # Pickler memo so that transient objects are kept alive during
491 # pickling.
Jeremy Hylton3422c992003-01-24 19:29:52 +0000492
Tim Peterse46b73f2003-01-27 21:22:10 +0000493 # The use of the Unpickler memo length as the memo key is just a
494 # convention. The only requirement is that the memo values be unique.
495 # But there appears no advantage to any other scheme, and this
Tim Peterscbd0a322003-01-28 00:24:43 +0000496 # scheme allows the Unpickler memo to be implemented as a plain (but
Tim Peterse46b73f2003-01-27 21:22:10 +0000497 # growable) array, indexed by memo key.
Guido van Rossum5d9113d2003-01-29 17:58:45 +0000498 if self.fast:
499 return
Guido van Rossum9b40e802003-01-30 06:37:41 +0000500 assert id(obj) not in self.memo
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100501 idx = len(self.memo)
502 self.write(self.put(idx))
503 self.memo[id(obj)] = idx, obj
Jeremy Hylton3422c992003-01-24 19:29:52 +0000504
Tim Petersbb38e302003-01-27 21:25:41 +0000505 # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100506 def put(self, idx):
507 if self.proto >= 4:
508 return MEMOIZE
509 elif self.bin:
510 if idx < 256:
511 return BINPUT + pack("<B", idx)
Guido van Rossum5c938d02003-01-28 03:03:08 +0000512 else:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100513 return LONG_BINPUT + pack("<I", idx)
514 else:
515 return PUT + repr(idx).encode("ascii") + b'\n'
Guido van Rossuma48061a1995-01-10 00:31:14 +0000516
Tim Petersbb38e302003-01-27 21:25:41 +0000517 # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300518 def get(self, i):
Tim Petersc32d8242001-04-10 02:48:53 +0000519 if self.bin:
Tim Petersc32d8242001-04-10 02:48:53 +0000520 if i < 256:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300521 return BINGET + pack("<B", i)
Guido van Rossum5c938d02003-01-28 03:03:08 +0000522 else:
Antoine Pitroubf6ecf92012-11-24 20:40:21 +0100523 return LONG_BINGET + pack("<I", i)
Guido van Rossuma48061a1995-01-10 00:31:14 +0000524
Guido van Rossum39478e82007-08-27 17:23:59 +0000525 return GET + repr(i).encode("ascii") + b'\n'
Tim Peters2344fae2001-01-15 00:50:52 +0000526
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000527 def save(self, obj, save_persistent_id=True):
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800528 self.framer.commit_frame()
529
Guido van Rossumbc64e222003-01-28 16:34:19 +0000530 # Check for persistent id (defined by a subclass)
Guido van Rossum3a41c612003-01-28 15:10:22 +0000531 pid = self.persistent_id(obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000532 if pid is not None and save_persistent_id:
Jeremy Hylton5e0f4e72002-11-13 22:01:27 +0000533 self.save_pers(pid)
534 return
Guido van Rossuma48061a1995-01-10 00:31:14 +0000535
Guido van Rossumbc64e222003-01-28 16:34:19 +0000536 # Check the memo
537 x = self.memo.get(id(obj))
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300538 if x is not None:
Guido van Rossumbc64e222003-01-28 16:34:19 +0000539 self.write(self.get(x[0]))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000540 return
541
Pierre Glaser289f1f82019-05-08 23:08:25 +0200542 rv = NotImplemented
543 reduce = getattr(self, "reducer_override", None)
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300544 if reduce is not None:
Guido van Rossumc53f0092003-02-18 22:05:12 +0000545 rv = reduce(obj)
Pierre Glaser289f1f82019-05-08 23:08:25 +0200546
547 if rv is NotImplemented:
548 # Check the type dispatch table
549 t = type(obj)
550 f = self.dispatch.get(t)
551 if f is not None:
552 f(self, obj) # Call unbound method with explicit self
Antoine Pitrouffd41d92011-10-04 09:23:04 +0200553 return
554
Pierre Glaser289f1f82019-05-08 23:08:25 +0200555 # Check private dispatch table if any, or else
556 # copyreg.dispatch_table
557 reduce = getattr(self, 'dispatch_table', dispatch_table).get(t)
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300558 if reduce is not None:
Pierre Glaser289f1f82019-05-08 23:08:25 +0200559 rv = reduce(obj)
Guido van Rossumc53f0092003-02-18 22:05:12 +0000560 else:
Pierre Glaser289f1f82019-05-08 23:08:25 +0200561 # Check for a class with a custom metaclass; treat as regular
562 # class
563 if issubclass(t, type):
564 self.save_global(obj)
565 return
566
567 # Check for a __reduce_ex__ method, fall back to __reduce__
568 reduce = getattr(obj, "__reduce_ex__", None)
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300569 if reduce is not None:
Pierre Glaser289f1f82019-05-08 23:08:25 +0200570 rv = reduce(self.proto)
Guido van Rossumc53f0092003-02-18 22:05:12 +0000571 else:
Pierre Glaser289f1f82019-05-08 23:08:25 +0200572 reduce = getattr(obj, "__reduce__", None)
573 if reduce is not None:
574 rv = reduce()
575 else:
576 raise PicklingError("Can't pickle %r object: %r" %
577 (t.__name__, obj))
Tim Petersb32a8312003-01-28 00:48:09 +0000578
Guido van Rossumbc64e222003-01-28 16:34:19 +0000579 # Check for string returned by reduce(), meaning "save as global"
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000580 if isinstance(rv, str):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000581 self.save_global(obj, rv)
Tim Petersb32a8312003-01-28 00:48:09 +0000582 return
583
Guido van Rossumbc64e222003-01-28 16:34:19 +0000584 # Assert that reduce() returned a tuple
Guido van Rossum13257902007-06-07 23:15:56 +0000585 if not isinstance(rv, tuple):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000586 raise PicklingError("%s must return string or tuple" % reduce)
Tim Petersb32a8312003-01-28 00:48:09 +0000587
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000588 # Assert that it returned an appropriately sized tuple
Guido van Rossumbc64e222003-01-28 16:34:19 +0000589 l = len(rv)
Pierre Glaser65d98d02019-05-08 21:40:25 +0200590 if not (2 <= l <= 6):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000591 raise PicklingError("Tuple returned by %s must have "
Pierre Glaser65d98d02019-05-08 21:40:25 +0200592 "two to six elements" % reduce)
Tim Petersb32a8312003-01-28 00:48:09 +0000593
Guido van Rossumbc64e222003-01-28 16:34:19 +0000594 # Save the reduce() output and finally memoize the object
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000595 self.save_reduce(obj=obj, *rv)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000596
Guido van Rossum3a41c612003-01-28 15:10:22 +0000597 def persistent_id(self, obj):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000598 # This exists so a subclass can override it
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000599 return None
600
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000601 def save_pers(self, pid):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000602 # Save a persistent id reference
Tim Petersbd1cdb92003-01-28 01:03:10 +0000603 if self.bin:
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000604 self.save(pid, save_persistent_id=False)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000605 self.write(BINPERSID)
Tim Petersbd1cdb92003-01-28 01:03:10 +0000606 else:
Serhiy Storchakadec25af2016-07-17 11:24:17 +0300607 try:
608 self.write(PERSID + str(pid).encode("ascii") + b'\n')
609 except UnicodeEncodeError:
610 raise PicklingError(
611 "persistent IDs in protocol 0 must be ASCII strings")
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000612
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100613 def save_reduce(self, func, args, state=None, listitems=None,
Pierre Glaser65d98d02019-05-08 21:40:25 +0200614 dictitems=None, state_setter=None, obj=None):
Jeremy Hyltone3a565e2003-06-29 16:59:59 +0000615 # This API is called by some subclasses
Guido van Rossumbc64e222003-01-28 16:34:19 +0000616
Guido van Rossum13257902007-06-07 23:15:56 +0000617 if not isinstance(args, tuple):
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100618 raise PicklingError("args from save_reduce() must be a tuple")
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200619 if not callable(func):
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100620 raise PicklingError("func from save_reduce() must be callable")
Guido van Rossumbc64e222003-01-28 16:34:19 +0000621
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000622 save = self.save
Guido van Rossumbc64e222003-01-28 16:34:19 +0000623 write = self.write
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000624
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100625 func_name = getattr(func, "__name__", "")
Serhiy Storchaka0d554d72015-10-10 22:42:18 +0300626 if self.proto >= 2 and func_name == "__newobj_ex__":
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100627 cls, args, kwargs = args
628 if not hasattr(cls, "__new__"):
629 raise PicklingError("args[0] from {} args has no __new__"
630 .format(func_name))
631 if obj is not None and cls is not obj.__class__:
632 raise PicklingError("args[0] from {} args has the wrong class"
633 .format(func_name))
Serhiy Storchaka0d554d72015-10-10 22:42:18 +0300634 if self.proto >= 4:
635 save(cls)
636 save(args)
637 save(kwargs)
638 write(NEWOBJ_EX)
639 else:
640 func = partial(cls.__new__, cls, *args, **kwargs)
641 save(func)
642 save(())
643 write(REDUCE)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100644 elif self.proto >= 2 and func_name == "__newobj__":
645 # A __reduce__ implementation can direct protocol 2 or newer to
Guido van Rossumd053b4b2003-01-31 16:51:45 +0000646 # use the more efficient NEWOBJ opcode, while still
647 # allowing protocol 0 and 1 to work normally. For this to
648 # work, the function returned by __reduce__ should be
649 # called __newobj__, and its first argument should be a
Florent Xiclunaaa6c1d22011-12-12 18:54:29 +0100650 # class. The implementation for __newobj__
Guido van Rossumd053b4b2003-01-31 16:51:45 +0000651 # should be as follows, although pickle has no way to
652 # verify this:
653 #
654 # def __newobj__(cls, *args):
655 # return cls.__new__(cls, *args)
656 #
657 # Protocols 0 and 1 will pickle a reference to __newobj__,
658 # while protocol 2 (and above) will pickle a reference to
659 # cls, the remaining args tuple, and the NEWOBJ code,
660 # which calls cls.__new__(cls, *args) at unpickling time
661 # (see load_newobj below). If __reduce__ returns a
662 # three-tuple, the state from the third tuple item will be
663 # pickled regardless of the protocol, calling __setstate__
664 # at unpickling time (see load_build below).
665 #
666 # Note that no standard __newobj__ implementation exists;
667 # you have to provide your own. This is to enforce
668 # compatibility with Python 2.2 (pickles written using
669 # protocol 0 or 1 in Python 2.3 should be unpicklable by
670 # Python 2.2).
671 cls = args[0]
672 if not hasattr(cls, "__new__"):
673 raise PicklingError(
674 "args[0] from __newobj__ args has no __new__")
Guido van Rossumf7f45172003-01-31 17:17:49 +0000675 if obj is not None and cls is not obj.__class__:
676 raise PicklingError(
677 "args[0] from __newobj__ args has the wrong class")
Guido van Rossumd053b4b2003-01-31 16:51:45 +0000678 args = args[1:]
679 save(cls)
680 save(args)
681 write(NEWOBJ)
682 else:
683 save(func)
684 save(args)
685 write(REDUCE)
Tim Peters2344fae2001-01-15 00:50:52 +0000686
Guido van Rossumf7f45172003-01-31 17:17:49 +0000687 if obj is not None:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100688 # If the object is already in the memo, this means it is
689 # recursive. In this case, throw away everything we put on the
690 # stack, and fetch the object back from the memo.
691 if id(obj) in self.memo:
692 write(POP + self.get(self.memo[id(obj)][0]))
693 else:
694 self.memoize(obj)
Guido van Rossumf7f45172003-01-31 17:17:49 +0000695
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000696 # More new special cases (that work with older protocols as
697 # well): when __reduce__ returns a tuple with 4 or 5 items,
698 # the 4th and 5th item should be iterators that provide list
699 # items and dict items (as (key, value) tuples), or None.
700
701 if listitems is not None:
702 self._batch_appends(listitems)
703
704 if dictitems is not None:
705 self._batch_setitems(dictitems)
706
Tim Petersc32d8242001-04-10 02:48:53 +0000707 if state is not None:
Pierre Glaser65d98d02019-05-08 21:40:25 +0200708 if state_setter is None:
709 save(state)
710 write(BUILD)
711 else:
712 # If a state_setter is specified, call it instead of load_build
713 # to update obj's with its previous state.
714 # First, push state_setter and its tuple of expected arguments
715 # (obj, state) onto the stack.
716 save(state_setter)
717 save(obj) # simple BINGET opcode as obj is already memoized.
718 save(state)
719 write(TUPLE2)
720 # Trigger a state_setter(obj, state) function call.
721 write(REDUCE)
722 # The purpose of state_setter is to carry-out an
723 # inplace modification of obj. We do not care about what the
724 # method might return, so its output is eventually removed from
725 # the stack.
726 write(POP)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000727
Guido van Rossumbc64e222003-01-28 16:34:19 +0000728 # Methods below this point are dispatched through the dispatch table
729
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000730 dispatch = {}
731
Guido van Rossum3a41c612003-01-28 15:10:22 +0000732 def save_none(self, obj):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000733 self.write(NONE)
Guido van Rossum13257902007-06-07 23:15:56 +0000734 dispatch[type(None)] = save_none
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000735
Guido van Rossum3a41c612003-01-28 15:10:22 +0000736 def save_bool(self, obj):
Guido van Rossum7d97d312003-01-28 04:25:27 +0000737 if self.proto >= 2:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300738 self.write(NEWTRUE if obj else NEWFALSE)
Guido van Rossum7d97d312003-01-28 04:25:27 +0000739 else:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300740 self.write(TRUE if obj else FALSE)
Guido van Rossum77f6a652002-04-03 22:41:51 +0000741 dispatch[bool] = save_bool
742
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300743 def save_long(self, obj):
Guido van Rossumddefaf32007-01-14 03:31:43 +0000744 if self.bin:
745 # If the int is small enough to fit in a signed 4-byte 2's-comp
746 # format, we can store it more efficiently than the general
747 # case.
748 # First one- and two-byte unsigned ints:
749 if obj >= 0:
750 if obj <= 0xff:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300751 self.write(BININT1 + pack("<B", obj))
Guido van Rossumddefaf32007-01-14 03:31:43 +0000752 return
753 if obj <= 0xffff:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300754 self.write(BININT2 + pack("<H", obj))
Guido van Rossumddefaf32007-01-14 03:31:43 +0000755 return
756 # Next check for 4-byte signed ints:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300757 if -0x80000000 <= obj <= 0x7fffffff:
Guido van Rossumddefaf32007-01-14 03:31:43 +0000758 self.write(BININT + pack("<i", obj))
759 return
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000760 if self.proto >= 2:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000761 encoded = encode_long(obj)
762 n = len(encoded)
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000763 if n < 256:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300764 self.write(LONG1 + pack("<B", n) + encoded)
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000765 else:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000766 self.write(LONG4 + pack("<i", n) + encoded)
Tim Petersee1a53c2003-02-02 02:57:53 +0000767 return
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +0200768 if -0x80000000 <= obj <= 0x7fffffff:
769 self.write(INT + repr(obj).encode("ascii") + b'\n')
770 else:
771 self.write(LONG + repr(obj).encode("ascii") + b'L\n')
Guido van Rossum13257902007-06-07 23:15:56 +0000772 dispatch[int] = save_long
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000773
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300774 def save_float(self, obj):
Guido van Rossumd3703791998-10-22 20:15:36 +0000775 if self.bin:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000776 self.write(BINFLOAT + pack('>d', obj))
Guido van Rossumd3703791998-10-22 20:15:36 +0000777 else:
Guido van Rossum39478e82007-08-27 17:23:59 +0000778 self.write(FLOAT + repr(obj).encode("ascii") + b'\n')
Guido van Rossum13257902007-06-07 23:15:56 +0000779 dispatch[float] = save_float
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000780
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300781 def save_bytes(self, obj):
Guido van Rossumf4169812008-03-17 22:56:06 +0000782 if self.proto < 3:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300783 if not obj: # bytes object is empty
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -0500784 self.save_reduce(bytes, (), obj=obj)
785 else:
786 self.save_reduce(codecs.encode,
787 (str(obj, 'latin1'), 'latin1'), obj=obj)
Guido van Rossumf4169812008-03-17 22:56:06 +0000788 return
789 n = len(obj)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100790 if n <= 0xff:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300791 self.write(SHORT_BINBYTES + pack("<B", n) + obj)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100792 elif n > 0xffffffff and self.proto >= 4:
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +0100793 self._write_large_bytes(BINBYTES8 + pack("<Q", n), obj)
794 elif n >= self.framer._FRAME_SIZE_TARGET:
795 self._write_large_bytes(BINBYTES + pack("<I", n), obj)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000796 else:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300797 self.write(BINBYTES + pack("<I", n) + obj)
Guido van Rossum3a41c612003-01-28 15:10:22 +0000798 self.memoize(obj)
Guido van Rossumf4169812008-03-17 22:56:06 +0000799 dispatch[bytes] = save_bytes
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000800
Antoine Pitrou91f43802019-05-26 17:10:09 +0200801 def save_bytearray(self, obj):
802 if self.proto < 5:
803 if not obj: # bytearray is empty
804 self.save_reduce(bytearray, (), obj=obj)
805 else:
806 self.save_reduce(bytearray, (bytes(obj),), obj=obj)
807 return
808 n = len(obj)
809 if n >= self.framer._FRAME_SIZE_TARGET:
810 self._write_large_bytes(BYTEARRAY8 + pack("<Q", n), obj)
811 else:
812 self.write(BYTEARRAY8 + pack("<Q", n) + obj)
813 dispatch[bytearray] = save_bytearray
814
815 def save_picklebuffer(self, obj):
816 if self.proto < 5:
817 raise PicklingError("PickleBuffer can only pickled with "
818 "protocol >= 5")
819 with obj.raw() as m:
820 if not m.contiguous:
821 raise PicklingError("PickleBuffer can not be pickled when "
822 "pointing to a non-contiguous buffer")
823 in_band = True
824 if self._buffer_callback is not None:
825 in_band = bool(self._buffer_callback(obj))
826 if in_band:
827 # Write data in-band
828 # XXX The C implementation avoids a copy here
829 if m.readonly:
830 self.save_bytes(m.tobytes())
831 else:
832 self.save_bytearray(m.tobytes())
833 else:
834 # Write data out-of-band
835 self.write(NEXT_BUFFER)
836 if m.readonly:
837 self.write(READONLY_BUFFER)
838
839 dispatch[PickleBuffer] = save_picklebuffer
840
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300841 def save_str(self, obj):
Tim Petersc32d8242001-04-10 02:48:53 +0000842 if self.bin:
Victor Stinner485fb562010-04-13 11:07:24 +0000843 encoded = obj.encode('utf-8', 'surrogatepass')
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000844 n = len(encoded)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100845 if n <= 0xff and self.proto >= 4:
846 self.write(SHORT_BINUNICODE + pack("<B", n) + encoded)
847 elif n > 0xffffffff and self.proto >= 4:
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +0100848 self._write_large_bytes(BINUNICODE8 + pack("<Q", n), encoded)
849 elif n >= self.framer._FRAME_SIZE_TARGET:
850 self._write_large_bytes(BINUNICODE + pack("<I", n), encoded)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100851 else:
852 self.write(BINUNICODE + pack("<I", n) + encoded)
Guido van Rossumb5f2f1b2000-03-10 23:20:09 +0000853 else:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000854 obj = obj.replace("\\", "\\u005c")
Serhiy Storchaka38ab7d42019-05-31 11:29:39 +0300855 obj = obj.replace("\0", "\\u0000")
Guido van Rossum3a41c612003-01-28 15:10:22 +0000856 obj = obj.replace("\n", "\\u000a")
Serhiy Storchaka38ab7d42019-05-31 11:29:39 +0300857 obj = obj.replace("\r", "\\u000d")
858 obj = obj.replace("\x1a", "\\u001a") # EOF on DOS
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100859 self.write(UNICODE + obj.encode('raw-unicode-escape') +
860 b'\n')
Guido van Rossum3a41c612003-01-28 15:10:22 +0000861 self.memoize(obj)
Guido van Rossumf4169812008-03-17 22:56:06 +0000862 dispatch[str] = save_str
Tim Peters658cba62001-02-09 20:06:00 +0000863
Guido van Rossum3a41c612003-01-28 15:10:22 +0000864 def save_tuple(self, obj):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300865 if not obj: # tuple is empty
866 if self.bin:
867 self.write(EMPTY_TUPLE)
Tim Peters1d63c9f2003-02-02 20:29:39 +0000868 else:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300869 self.write(MARK + TUPLE)
Tim Petersd97da802003-01-28 05:48:29 +0000870 return
871
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300872 n = len(obj)
Tim Petersd97da802003-01-28 05:48:29 +0000873 save = self.save
874 memo = self.memo
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300875 if n <= 3 and self.proto >= 2:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000876 for element in obj:
Tim Petersd97da802003-01-28 05:48:29 +0000877 save(element)
878 # Subtle. Same as in the big comment below.
Guido van Rossum3a41c612003-01-28 15:10:22 +0000879 if id(obj) in memo:
880 get = self.get(memo[id(obj)][0])
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300881 self.write(POP * n + get)
Tim Petersd97da802003-01-28 05:48:29 +0000882 else:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300883 self.write(_tuplesize2code[n])
Guido van Rossum3a41c612003-01-28 15:10:22 +0000884 self.memoize(obj)
Tim Petersd97da802003-01-28 05:48:29 +0000885 return
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000886
Tim Peters1d63c9f2003-02-02 20:29:39 +0000887 # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple
Tim Petersff57bff2003-01-28 05:34:53 +0000888 # has more than 3 elements.
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300889 write = self.write
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000890 write(MARK)
Guido van Rossum3a41c612003-01-28 15:10:22 +0000891 for element in obj:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000892 save(element)
893
Tim Peters1d63c9f2003-02-02 20:29:39 +0000894 if id(obj) in memo:
Tim Petersf558da02003-01-28 02:09:55 +0000895 # Subtle. d was not in memo when we entered save_tuple(), so
896 # the process of saving the tuple's elements must have saved
897 # the tuple itself: the tuple is recursive. The proper action
898 # now is to throw away everything we put on the stack, and
899 # simply GET the tuple (it's already constructed). This check
900 # could have been done in the "for element" loop instead, but
901 # recursive tuples are a rare thing.
Guido van Rossum3a41c612003-01-28 15:10:22 +0000902 get = self.get(memo[id(obj)][0])
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300903 if self.bin:
Tim Petersf558da02003-01-28 02:09:55 +0000904 write(POP_MARK + get)
905 else: # proto 0 -- POP_MARK not available
Tim Petersd97da802003-01-28 05:48:29 +0000906 write(POP * (n+1) + get)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000907 return
908
Tim Peters1d63c9f2003-02-02 20:29:39 +0000909 # No recursion.
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300910 write(TUPLE)
Tim Peters1d63c9f2003-02-02 20:29:39 +0000911 self.memoize(obj)
Jeremy Hylton3422c992003-01-24 19:29:52 +0000912
Guido van Rossum13257902007-06-07 23:15:56 +0000913 dispatch[tuple] = save_tuple
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000914
Guido van Rossum3a41c612003-01-28 15:10:22 +0000915 def save_list(self, obj):
Tim Petersc32d8242001-04-10 02:48:53 +0000916 if self.bin:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300917 self.write(EMPTY_LIST)
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000918 else: # proto 0 -- can't use EMPTY_LIST
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300919 self.write(MARK + LIST)
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000920
921 self.memoize(obj)
Alexandre Vassalottic7db1d62008-05-14 21:57:18 +0000922 self._batch_appends(obj)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000923
Guido van Rossum13257902007-06-07 23:15:56 +0000924 dispatch[list] = save_list
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000925
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000926 _BATCHSIZE = 1000
927
928 def _batch_appends(self, items):
929 # Helper to batch up APPENDS sequences
930 save = self.save
931 write = self.write
932
933 if not self.bin:
934 for x in items:
935 save(x)
936 write(APPEND)
937 return
938
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300939 it = iter(items)
940 while True:
941 tmp = list(islice(it, self._BATCHSIZE))
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000942 n = len(tmp)
943 if n > 1:
944 write(MARK)
945 for x in tmp:
946 save(x)
947 write(APPENDS)
948 elif n:
949 save(tmp[0])
950 write(APPEND)
951 # else tmp is empty, and we're done
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300952 if n < self._BATCHSIZE:
953 return
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000954
Guido van Rossum3a41c612003-01-28 15:10:22 +0000955 def save_dict(self, obj):
Tim Petersc32d8242001-04-10 02:48:53 +0000956 if self.bin:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300957 self.write(EMPTY_DICT)
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000958 else: # proto 0 -- can't use EMPTY_DICT
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300959 self.write(MARK + DICT)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000960
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000961 self.memoize(obj)
Alexandre Vassalottic7db1d62008-05-14 21:57:18 +0000962 self._batch_setitems(obj.items())
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000963
Guido van Rossum13257902007-06-07 23:15:56 +0000964 dispatch[dict] = save_dict
965 if PyStringMap is not None:
Jeremy Hylton2b9d0291998-05-27 22:38:22 +0000966 dispatch[PyStringMap] = save_dict
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000967
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000968 def _batch_setitems(self, items):
969 # Helper to batch up SETITEMS sequences; proto >= 1 only
970 save = self.save
971 write = self.write
972
973 if not self.bin:
974 for k, v in items:
975 save(k)
976 save(v)
977 write(SETITEM)
978 return
979
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300980 it = iter(items)
981 while True:
982 tmp = list(islice(it, self._BATCHSIZE))
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000983 n = len(tmp)
984 if n > 1:
985 write(MARK)
986 for k, v in tmp:
987 save(k)
988 save(v)
989 write(SETITEMS)
990 elif n:
991 k, v = tmp[0]
992 save(k)
993 save(v)
994 write(SETITEM)
995 # else tmp is empty, and we're done
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300996 if n < self._BATCHSIZE:
997 return
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000998
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100999 def save_set(self, obj):
1000 save = self.save
1001 write = self.write
1002
1003 if self.proto < 4:
1004 self.save_reduce(set, (list(obj),), obj=obj)
1005 return
1006
1007 write(EMPTY_SET)
1008 self.memoize(obj)
1009
1010 it = iter(obj)
1011 while True:
1012 batch = list(islice(it, self._BATCHSIZE))
1013 n = len(batch)
1014 if n > 0:
1015 write(MARK)
1016 for item in batch:
1017 save(item)
1018 write(ADDITEMS)
1019 if n < self._BATCHSIZE:
1020 return
1021 dispatch[set] = save_set
1022
1023 def save_frozenset(self, obj):
1024 save = self.save
1025 write = self.write
1026
1027 if self.proto < 4:
1028 self.save_reduce(frozenset, (list(obj),), obj=obj)
1029 return
1030
1031 write(MARK)
1032 for item in obj:
1033 save(item)
1034
1035 if id(obj) in self.memo:
1036 # If the object is already in the memo, this means it is
1037 # recursive. In this case, throw away everything we put on the
1038 # stack, and fetch the object back from the memo.
1039 write(POP_MARK + self.get(self.memo[id(obj)][0]))
1040 return
1041
1042 write(FROZENSET)
1043 self.memoize(obj)
1044 dispatch[frozenset] = save_frozenset
1045
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001046 def save_global(self, obj, name=None):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001047 write = self.write
1048 memo = self.memo
1049
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001050 if name is None:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001051 name = getattr(obj, '__qualname__', None)
Tim Petersc32d8242001-04-10 02:48:53 +00001052 if name is None:
Guido van Rossum3a41c612003-01-28 15:10:22 +00001053 name = obj.__name__
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001054
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001055 module_name = whichmodule(obj, name)
Guido van Rossumb0a98e92001-08-17 18:49:52 +00001056 try:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001057 __import__(module_name, level=0)
1058 module = sys.modules[module_name]
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001059 obj2, parent = _getattribute(module, name)
Guido van Rossumb0a98e92001-08-17 18:49:52 +00001060 except (ImportError, KeyError, AttributeError):
1061 raise PicklingError(
1062 "Can't pickle %r: it's not found as %s.%s" %
Serhiy Storchaka5affd232017-04-05 09:37:24 +03001063 (obj, module_name, name)) from None
Guido van Rossumb0a98e92001-08-17 18:49:52 +00001064 else:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001065 if obj2 is not obj:
Guido van Rossumb0a98e92001-08-17 18:49:52 +00001066 raise PicklingError(
1067 "Can't pickle %r: it's not the same object as %s.%s" %
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001068 (obj, module_name, name))
Guido van Rossumb0a98e92001-08-17 18:49:52 +00001069
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001070 if self.proto >= 2:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001071 code = _extension_registry.get((module_name, name))
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001072 if code:
1073 assert code > 0
1074 if code <= 0xff:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001075 write(EXT1 + pack("<B", code))
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001076 elif code <= 0xffff:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001077 write(EXT2 + pack("<H", code))
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001078 else:
1079 write(EXT4 + pack("<i", code))
1080 return
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001081 lastname = name.rpartition('.')[2]
1082 if parent is module:
1083 name = lastname
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001084 # Non-ASCII identifiers are supported only with protocols >= 3.
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001085 if self.proto >= 4:
1086 self.save(module_name)
1087 self.save(name)
1088 write(STACK_GLOBAL)
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001089 elif parent is not module:
1090 self.save_reduce(getattr, (parent, lastname))
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001091 elif self.proto >= 3:
1092 write(GLOBAL + bytes(module_name, "utf-8") + b'\n' +
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001093 bytes(name, "utf-8") + b'\n')
1094 else:
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00001095 if self.fix_imports:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001096 r_name_mapping = _compat_pickle.REVERSE_NAME_MAPPING
1097 r_import_mapping = _compat_pickle.REVERSE_IMPORT_MAPPING
1098 if (module_name, name) in r_name_mapping:
1099 module_name, name = r_name_mapping[(module_name, name)]
Serhiy Storchakabfe18242015-03-31 13:12:37 +03001100 elif module_name in r_import_mapping:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001101 module_name = r_import_mapping[module_name]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001102 try:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001103 write(GLOBAL + bytes(module_name, "ascii") + b'\n' +
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001104 bytes(name, "ascii") + b'\n')
1105 except UnicodeEncodeError:
1106 raise PicklingError(
1107 "can't pickle global identifier '%s.%s' using "
Serhiy Storchaka5affd232017-04-05 09:37:24 +03001108 "pickle protocol %i" % (module, name, self.proto)) from None
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001109
Guido van Rossum3a41c612003-01-28 15:10:22 +00001110 self.memoize(obj)
Tim Peters3b769832003-01-28 03:51:36 +00001111
Alexandre Vassalotti19b6fa62013-11-30 16:06:39 -08001112 def save_type(self, obj):
1113 if obj is type(None):
1114 return self.save_reduce(type, (None,), obj=obj)
1115 elif obj is type(NotImplemented):
1116 return self.save_reduce(type, (NotImplemented,), obj=obj)
1117 elif obj is type(...):
1118 return self.save_reduce(type, (...,), obj=obj)
1119 return self.save_global(obj)
1120
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001121 dispatch[FunctionType] = save_global
Alexandre Vassalotti19b6fa62013-11-30 16:06:39 -08001122 dispatch[type] = save_type
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001123
Guido van Rossuma48061a1995-01-10 00:31:14 +00001124
Guido van Rossum1be31752003-01-28 15:19:53 +00001125# Unpickling machinery
1126
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001127class _Unpickler:
Guido van Rossuma48061a1995-01-10 00:31:14 +00001128
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00001129 def __init__(self, file, *, fix_imports=True,
Antoine Pitrou91f43802019-05-26 17:10:09 +02001130 encoding="ASCII", errors="strict", buffers=None):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001131 """This takes a binary file for reading a pickle data stream.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +00001132
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08001133 The protocol version of the pickle is detected automatically, so
1134 no proto argument is needed.
1135
1136 The argument *file* must have two methods, a read() method that
1137 takes an integer argument, and a readline() method that requires
1138 no arguments. Both methods should return bytes. Thus *file*
Martin Panter7462b6492015-11-02 03:37:02 +00001139 can be a binary file object opened for reading, an io.BytesIO
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08001140 object, or any other custom object that meets this interface.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +00001141
Guido van Rossumfeea0782007-10-10 18:00:50 +00001142 The file-like object must have two methods, a read() method
1143 that takes an integer argument, and a readline() method that
1144 requires no arguments. Both methods should return bytes.
1145 Thus file-like object can be a binary file object opened for
1146 reading, a BytesIO object, or any other custom object that
1147 meets this interface.
Guido van Rossumf4169812008-03-17 22:56:06 +00001148
Antoine Pitrou91f43802019-05-26 17:10:09 +02001149 If *buffers* is not None, it should be an iterable of buffer-enabled
1150 objects that is consumed each time the pickle stream references
1151 an out-of-band buffer view. Such buffers have been given in order
1152 to the *buffer_callback* of a Pickler object.
1153
1154 If *buffers* is None (the default), then the buffers are taken
1155 from the pickle stream, assuming they are serialized there.
1156 It is an error for *buffers* to be None if the pickle stream
1157 was produced with a non-None *buffer_callback*.
1158
1159 Other optional arguments are *fix_imports*, *encoding* and
Martin Panter46f50722016-05-26 05:35:26 +00001160 *errors*, which are used to control compatibility support for
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08001161 pickle stream generated by Python 2. If *fix_imports* is True,
1162 pickle will try to map the old Python 2 names to the new names
1163 used in Python 3. The *encoding* and *errors* tell pickle how
1164 to decode 8-bit string instances pickled by Python 2; these
1165 default to 'ASCII' and 'strict', respectively. *encoding* can be
1166 'bytes' to read theses 8-bit string instances as bytes objects.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +00001167 """
Antoine Pitrou91f43802019-05-26 17:10:09 +02001168 self._buffers = iter(buffers) if buffers is not None else None
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001169 self._file_readline = file.readline
1170 self._file_read = file.read
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001171 self.memo = {}
Guido van Rossumf4169812008-03-17 22:56:06 +00001172 self.encoding = encoding
1173 self.errors = errors
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00001174 self.proto = 0
1175 self.fix_imports = fix_imports
Guido van Rossuma48061a1995-01-10 00:31:14 +00001176
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001177 def load(self):
Guido van Rossum3a41c612003-01-28 15:10:22 +00001178 """Read a pickled object representation from the open file.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +00001179
Guido van Rossum3a41c612003-01-28 15:10:22 +00001180 Return the reconstituted object hierarchy specified in the file.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +00001181 """
Alexandre Vassalotti3cfcab92008-12-27 09:30:39 +00001182 # Check whether Unpickler was initialized correctly. This is
1183 # only needed to mimic the behavior of _pickle.Unpickler.dump().
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001184 if not hasattr(self, "_file_read"):
Alexandre Vassalotti3cfcab92008-12-27 09:30:39 +00001185 raise UnpicklingError("Unpickler.__init__() was not called by "
1186 "%s.__init__()" % (self.__class__.__name__,))
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001187 self._unframer = _Unframer(self._file_read, self._file_readline)
1188 self.read = self._unframer.read
Antoine Pitrou91f43802019-05-26 17:10:09 +02001189 self.readinto = self._unframer.readinto
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001190 self.readline = self._unframer.readline
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001191 self.metastack = []
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001192 self.stack = []
1193 self.append = self.stack.append
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001194 self.proto = 0
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001195 read = self.read
1196 dispatch = self.dispatch
1197 try:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001198 while True:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001199 key = read(1)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001200 if not key:
1201 raise EOFError
Guido van Rossum98297ee2007-11-06 21:34:58 +00001202 assert isinstance(key, bytes_types)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001203 dispatch[key[0]](self)
Guido van Rossumb940e112007-01-10 16:19:56 +00001204 except _Stop as stopinst:
Guido van Rossumff871742000-12-13 18:11:56 +00001205 return stopinst.value
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001206
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001207 # Return a list of items pushed in the stack after last MARK instruction.
1208 def pop_mark(self):
1209 items = self.stack
1210 self.stack = self.metastack.pop()
1211 self.append = self.stack.append
1212 return items
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001213
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001214 def persistent_load(self, pid):
Benjamin Peterson49956b22009-01-10 17:05:44 +00001215 raise UnpicklingError("unsupported persistent id encountered")
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001216
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001217 dispatch = {}
1218
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001219 def load_proto(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001220 proto = self.read(1)[0]
Guido van Rossumf4169812008-03-17 22:56:06 +00001221 if not 0 <= proto <= HIGHEST_PROTOCOL:
Guido van Rossum26d95c32007-08-27 23:18:54 +00001222 raise ValueError("unsupported pickle protocol: %d" % proto)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00001223 self.proto = proto
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001224 dispatch[PROTO[0]] = load_proto
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001225
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08001226 def load_frame(self):
1227 frame_size, = unpack('<Q', self.read(8))
1228 if frame_size > sys.maxsize:
1229 raise ValueError("frame size > sys.maxsize: %d" % frame_size)
1230 self._unframer.load_frame(frame_size)
1231 dispatch[FRAME[0]] = load_frame
1232
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001233 def load_persid(self):
Serhiy Storchakadec25af2016-07-17 11:24:17 +03001234 try:
1235 pid = self.readline()[:-1].decode("ascii")
1236 except UnicodeDecodeError:
1237 raise UnpicklingError(
1238 "persistent IDs in protocol 0 must be ASCII strings")
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001239 self.append(self.persistent_load(pid))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001240 dispatch[PERSID[0]] = load_persid
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001241
1242 def load_binpersid(self):
Raymond Hettinger46ac8eb2002-06-30 03:39:14 +00001243 pid = self.stack.pop()
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001244 self.append(self.persistent_load(pid))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001245 dispatch[BINPERSID[0]] = load_binpersid
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001246
1247 def load_none(self):
1248 self.append(None)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001249 dispatch[NONE[0]] = load_none
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001250
Guido van Rossum7d97d312003-01-28 04:25:27 +00001251 def load_false(self):
1252 self.append(False)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001253 dispatch[NEWFALSE[0]] = load_false
Guido van Rossum7d97d312003-01-28 04:25:27 +00001254
1255 def load_true(self):
1256 self.append(True)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001257 dispatch[NEWTRUE[0]] = load_true
Guido van Rossum7d97d312003-01-28 04:25:27 +00001258
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001259 def load_int(self):
Tim Peters19ef62d2001-08-28 22:21:18 +00001260 data = self.readline()
Guido van Rossume2763392002-04-05 19:30:08 +00001261 if data == FALSE[1:]:
1262 val = False
1263 elif data == TRUE[1:]:
1264 val = True
1265 else:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001266 val = int(data, 0)
Guido van Rossume2763392002-04-05 19:30:08 +00001267 self.append(val)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001268 dispatch[INT[0]] = load_int
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001269
1270 def load_binint(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001271 self.append(unpack('<i', self.read(4))[0])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001272 dispatch[BININT[0]] = load_binint
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001273
1274 def load_binint1(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001275 self.append(self.read(1)[0])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001276 dispatch[BININT1[0]] = load_binint1
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001277
1278 def load_binint2(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001279 self.append(unpack('<H', self.read(2))[0])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001280 dispatch[BININT2[0]] = load_binint2
Tim Peters2344fae2001-01-15 00:50:52 +00001281
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001282 def load_long(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001283 val = self.readline()[:-1]
1284 if val and val[-1] == b'L'[0]:
Mark Dickinson8dd05142009-01-20 20:43:58 +00001285 val = val[:-1]
Guido van Rossumfeea0782007-10-10 18:00:50 +00001286 self.append(int(val, 0))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001287 dispatch[LONG[0]] = load_long
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001288
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001289 def load_long1(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001290 n = self.read(1)[0]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001291 data = self.read(n)
1292 self.append(decode_long(data))
1293 dispatch[LONG1[0]] = load_long1
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001294
1295 def load_long4(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001296 n, = unpack('<i', self.read(4))
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01001297 if n < 0:
1298 # Corrupt or hostile pickle -- we never write one like this
Alexandre Vassalotticc757172013-04-14 02:25:10 -07001299 raise UnpicklingError("LONG pickle has negative byte count")
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001300 data = self.read(n)
1301 self.append(decode_long(data))
1302 dispatch[LONG4[0]] = load_long4
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001303
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001304 def load_float(self):
Guido van Rossumff871742000-12-13 18:11:56 +00001305 self.append(float(self.readline()[:-1]))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001306 dispatch[FLOAT[0]] = load_float
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001307
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001308 def load_binfloat(self):
Guido van Rossumd3703791998-10-22 20:15:36 +00001309 self.append(unpack('>d', self.read(8))[0])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001310 dispatch[BINFLOAT[0]] = load_binfloat
Guido van Rossumd3703791998-10-22 20:15:36 +00001311
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08001312 def _decode_string(self, value):
1313 # Used to allow strings from Python 2 to be decoded either as
1314 # bytes or Unicode strings. This should be used only with the
1315 # STRING, BINSTRING and SHORT_BINSTRING opcodes.
1316 if self.encoding == "bytes":
1317 return value
1318 else:
1319 return value.decode(self.encoding, self.errors)
1320
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001321 def load_string(self):
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07001322 data = self.readline()[:-1]
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001323 # Strip outermost quotes
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07001324 if len(data) >= 2 and data[0] == data[-1] and data[0] in b'"\'':
1325 data = data[1:-1]
Martin v. Löwis8a8da792002-08-14 07:46:28 +00001326 else:
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07001327 raise UnpicklingError("the STRING opcode argument must be quoted")
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08001328 self.append(self._decode_string(codecs.escape_decode(data)[0]))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001329 dispatch[STRING[0]] = load_string
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001330
1331 def load_binstring(self):
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01001332 # Deprecated BINSTRING uses signed 32-bit length
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001333 len, = unpack('<i', self.read(4))
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01001334 if len < 0:
Alexandre Vassalotticc757172013-04-14 02:25:10 -07001335 raise UnpicklingError("BINSTRING pickle has negative byte count")
Guido van Rossumf4169812008-03-17 22:56:06 +00001336 data = self.read(len)
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08001337 self.append(self._decode_string(data))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001338 dispatch[BINSTRING[0]] = load_binstring
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001339
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001340 def load_binbytes(self):
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01001341 len, = unpack('<I', self.read(4))
1342 if len > maxsize:
Alexandre Vassalotticc757172013-04-14 02:25:10 -07001343 raise UnpicklingError("BINBYTES exceeds system's maximum size "
1344 "of %d bytes" % maxsize)
Guido van Rossumf4169812008-03-17 22:56:06 +00001345 self.append(self.read(len))
1346 dispatch[BINBYTES[0]] = load_binbytes
1347
Guido van Rossumb5f2f1b2000-03-10 23:20:09 +00001348 def load_unicode(self):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001349 self.append(str(self.readline()[:-1], 'raw-unicode-escape'))
1350 dispatch[UNICODE[0]] = load_unicode
Guido van Rossumb5f2f1b2000-03-10 23:20:09 +00001351
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001352 def load_binunicode(self):
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01001353 len, = unpack('<I', self.read(4))
1354 if len > maxsize:
Alexandre Vassalotticc757172013-04-14 02:25:10 -07001355 raise UnpicklingError("BINUNICODE exceeds system's maximum size "
1356 "of %d bytes" % maxsize)
Victor Stinner485fb562010-04-13 11:07:24 +00001357 self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001358 dispatch[BINUNICODE[0]] = load_binunicode
Guido van Rossumb5f2f1b2000-03-10 23:20:09 +00001359
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001360 def load_binunicode8(self):
1361 len, = unpack('<Q', self.read(8))
1362 if len > maxsize:
1363 raise UnpicklingError("BINUNICODE8 exceeds system's maximum size "
1364 "of %d bytes" % maxsize)
1365 self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
1366 dispatch[BINUNICODE8[0]] = load_binunicode8
1367
Serhiy Storchakae0606192015-09-29 22:10:07 +03001368 def load_binbytes8(self):
1369 len, = unpack('<Q', self.read(8))
1370 if len > maxsize:
1371 raise UnpicklingError("BINBYTES8 exceeds system's maximum size "
1372 "of %d bytes" % maxsize)
1373 self.append(self.read(len))
1374 dispatch[BINBYTES8[0]] = load_binbytes8
1375
Antoine Pitrou91f43802019-05-26 17:10:09 +02001376 def load_bytearray8(self):
1377 len, = unpack('<Q', self.read(8))
1378 if len > maxsize:
1379 raise UnpicklingError("BYTEARRAY8 exceeds system's maximum size "
1380 "of %d bytes" % maxsize)
1381 b = bytearray(len)
1382 self.readinto(b)
1383 self.append(b)
1384 dispatch[BYTEARRAY8[0]] = load_bytearray8
1385
1386 def load_next_buffer(self):
1387 if self._buffers is None:
1388 raise UnpicklingError("pickle stream refers to out-of-band data "
1389 "but no *buffers* argument was given")
1390 try:
1391 buf = next(self._buffers)
1392 except StopIteration:
1393 raise UnpicklingError("not enough out-of-band buffers")
1394 self.append(buf)
1395 dispatch[NEXT_BUFFER[0]] = load_next_buffer
1396
1397 def load_readonly_buffer(self):
1398 buf = self.stack[-1]
1399 with memoryview(buf) as m:
1400 if not m.readonly:
1401 self.stack[-1] = m.toreadonly()
1402 dispatch[READONLY_BUFFER[0]] = load_readonly_buffer
1403
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001404 def load_short_binstring(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001405 len = self.read(1)[0]
1406 data = self.read(len)
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08001407 self.append(self._decode_string(data))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001408 dispatch[SHORT_BINSTRING[0]] = load_short_binstring
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001409
Guido van Rossumf4169812008-03-17 22:56:06 +00001410 def load_short_binbytes(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001411 len = self.read(1)[0]
1412 self.append(self.read(len))
Guido van Rossumf4169812008-03-17 22:56:06 +00001413 dispatch[SHORT_BINBYTES[0]] = load_short_binbytes
1414
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001415 def load_short_binunicode(self):
1416 len = self.read(1)[0]
1417 self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
1418 dispatch[SHORT_BINUNICODE[0]] = load_short_binunicode
1419
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001420 def load_tuple(self):
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001421 items = self.pop_mark()
1422 self.append(tuple(items))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001423 dispatch[TUPLE[0]] = load_tuple
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001424
1425 def load_empty_tuple(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001426 self.append(())
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001427 dispatch[EMPTY_TUPLE[0]] = load_empty_tuple
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001428
Guido van Rossum44f0ea52003-01-28 04:14:51 +00001429 def load_tuple1(self):
1430 self.stack[-1] = (self.stack[-1],)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001431 dispatch[TUPLE1[0]] = load_tuple1
Guido van Rossum44f0ea52003-01-28 04:14:51 +00001432
1433 def load_tuple2(self):
1434 self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001435 dispatch[TUPLE2[0]] = load_tuple2
Guido van Rossum44f0ea52003-01-28 04:14:51 +00001436
1437 def load_tuple3(self):
1438 self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001439 dispatch[TUPLE3[0]] = load_tuple3
Guido van Rossum44f0ea52003-01-28 04:14:51 +00001440
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001441 def load_empty_list(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001442 self.append([])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001443 dispatch[EMPTY_LIST[0]] = load_empty_list
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001444
1445 def load_empty_dictionary(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001446 self.append({})
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001447 dispatch[EMPTY_DICT[0]] = load_empty_dictionary
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001448
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001449 def load_empty_set(self):
1450 self.append(set())
1451 dispatch[EMPTY_SET[0]] = load_empty_set
1452
1453 def load_frozenset(self):
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001454 items = self.pop_mark()
1455 self.append(frozenset(items))
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001456 dispatch[FROZENSET[0]] = load_frozenset
1457
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001458 def load_list(self):
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001459 items = self.pop_mark()
1460 self.append(items)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001461 dispatch[LIST[0]] = load_list
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001462
1463 def load_dict(self):
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001464 items = self.pop_mark()
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001465 d = {items[i]: items[i+1]
1466 for i in range(0, len(items), 2)}
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001467 self.append(d)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001468 dispatch[DICT[0]] = load_dict
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001469
Tim Petersd01c1e92003-01-30 15:41:46 +00001470 # INST and OBJ differ only in how they get a class object. It's not
1471 # only sensible to do the rest in a common routine, the two routines
1472 # previously diverged and grew different bugs.
1473 # klass is the class to instantiate, and k points to the topmost mark
1474 # object, following which are the arguments for klass.__init__.
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001475 def _instantiate(self, klass, args):
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00001476 if (args or not isinstance(klass, type) or
1477 hasattr(klass, "__getinitargs__")):
Guido van Rossum743d17e1998-09-15 20:25:57 +00001478 try:
Guido van Rossumb26a97a2003-01-28 22:29:13 +00001479 value = klass(*args)
Guido van Rossumb940e112007-01-10 16:19:56 +00001480 except TypeError as err:
Guido van Rossum26d95c32007-08-27 23:18:54 +00001481 raise TypeError("in constructor for %s: %s" %
1482 (klass.__name__, str(err)), sys.exc_info()[2])
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00001483 else:
1484 value = klass.__new__(klass)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001485 self.append(value)
Tim Petersd01c1e92003-01-30 15:41:46 +00001486
1487 def load_inst(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001488 module = self.readline()[:-1].decode("ascii")
1489 name = self.readline()[:-1].decode("ascii")
Tim Petersd01c1e92003-01-30 15:41:46 +00001490 klass = self.find_class(module, name)
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001491 self._instantiate(klass, self.pop_mark())
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001492 dispatch[INST[0]] = load_inst
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001493
1494 def load_obj(self):
Tim Petersd01c1e92003-01-30 15:41:46 +00001495 # Stack is ... markobject classobject arg1 arg2 ...
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001496 args = self.pop_mark()
1497 cls = args.pop(0)
1498 self._instantiate(cls, args)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001499 dispatch[OBJ[0]] = load_obj
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001500
Guido van Rossum3a41c612003-01-28 15:10:22 +00001501 def load_newobj(self):
1502 args = self.stack.pop()
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001503 cls = self.stack.pop()
Guido van Rossum3a41c612003-01-28 15:10:22 +00001504 obj = cls.__new__(cls, *args)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001505 self.append(obj)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001506 dispatch[NEWOBJ[0]] = load_newobj
Guido van Rossum3a41c612003-01-28 15:10:22 +00001507
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001508 def load_newobj_ex(self):
1509 kwargs = self.stack.pop()
1510 args = self.stack.pop()
1511 cls = self.stack.pop()
1512 obj = cls.__new__(cls, *args, **kwargs)
1513 self.append(obj)
1514 dispatch[NEWOBJ_EX[0]] = load_newobj_ex
1515
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001516 def load_global(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001517 module = self.readline()[:-1].decode("utf-8")
1518 name = self.readline()[:-1].decode("utf-8")
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001519 klass = self.find_class(module, name)
1520 self.append(klass)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001521 dispatch[GLOBAL[0]] = load_global
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001522
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001523 def load_stack_global(self):
1524 name = self.stack.pop()
1525 module = self.stack.pop()
1526 if type(name) is not str or type(module) is not str:
1527 raise UnpicklingError("STACK_GLOBAL requires str")
1528 self.append(self.find_class(module, name))
1529 dispatch[STACK_GLOBAL[0]] = load_stack_global
1530
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001531 def load_ext1(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001532 code = self.read(1)[0]
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001533 self.get_extension(code)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001534 dispatch[EXT1[0]] = load_ext1
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001535
1536 def load_ext2(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001537 code, = unpack('<H', self.read(2))
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001538 self.get_extension(code)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001539 dispatch[EXT2[0]] = load_ext2
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001540
1541 def load_ext4(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001542 code, = unpack('<i', self.read(4))
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001543 self.get_extension(code)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001544 dispatch[EXT4[0]] = load_ext4
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001545
1546 def get_extension(self, code):
1547 nil = []
Guido van Rossumd4b920c2003-02-04 01:54:49 +00001548 obj = _extension_cache.get(code, nil)
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001549 if obj is not nil:
1550 self.append(obj)
1551 return
Guido van Rossumd4b920c2003-02-04 01:54:49 +00001552 key = _inverted_registry.get(code)
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001553 if not key:
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01001554 if code <= 0: # note that 0 is forbidden
1555 # Corrupt or hostile pickle.
Alexandre Vassalotticc757172013-04-14 02:25:10 -07001556 raise UnpicklingError("EXT specifies code <= 0")
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001557 raise ValueError("unregistered extension code %d" % code)
1558 obj = self.find_class(*key)
Guido van Rossumd4b920c2003-02-04 01:54:49 +00001559 _extension_cache[code] = obj
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001560 self.append(obj)
1561
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001562 def find_class(self, module, name):
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00001563 # Subclasses may override this.
Steve Dowerb82e17e2019-05-23 08:45:22 -07001564 sys.audit('pickle.find_class', module, name)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00001565 if self.proto < 3 and self.fix_imports:
1566 if (module, name) in _compat_pickle.NAME_MAPPING:
1567 module, name = _compat_pickle.NAME_MAPPING[(module, name)]
Serhiy Storchakabfe18242015-03-31 13:12:37 +03001568 elif module in _compat_pickle.IMPORT_MAPPING:
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00001569 module = _compat_pickle.IMPORT_MAPPING[module]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001570 __import__(module, level=0)
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001571 if self.proto >= 4:
1572 return _getattribute(sys.modules[module], name)[0]
1573 else:
1574 return getattr(sys.modules[module], name)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001575
1576 def load_reduce(self):
1577 stack = self.stack
Guido van Rossumb26a97a2003-01-28 22:29:13 +00001578 args = stack.pop()
1579 func = stack[-1]
Serhiy Storchakaa8d83f52015-12-01 00:39:25 +02001580 stack[-1] = func(*args)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001581 dispatch[REDUCE[0]] = load_reduce
Guido van Rossuma48061a1995-01-10 00:31:14 +00001582
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001583 def load_pop(self):
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001584 if self.stack:
1585 del self.stack[-1]
1586 else:
1587 self.pop_mark()
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001588 dispatch[POP[0]] = load_pop
Guido van Rossum7b5430f1995-03-04 22:25:21 +00001589
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001590 def load_pop_mark(self):
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001591 self.pop_mark()
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001592 dispatch[POP_MARK[0]] = load_pop_mark
Guido van Rossuma48061a1995-01-10 00:31:14 +00001593
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001594 def load_dup(self):
Guido van Rossumb1062fc1998-03-31 17:00:46 +00001595 self.append(self.stack[-1])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001596 dispatch[DUP[0]] = load_dup
Guido van Rossuma48061a1995-01-10 00:31:14 +00001597
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001598 def load_get(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001599 i = int(self.readline()[:-1])
1600 self.append(self.memo[i])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001601 dispatch[GET[0]] = load_get
Guido van Rossum78536471996-04-12 13:36:27 +00001602
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001603 def load_binget(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001604 i = self.read(1)[0]
1605 self.append(self.memo[i])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001606 dispatch[BINGET[0]] = load_binget
Guido van Rossum78536471996-04-12 13:36:27 +00001607
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001608 def load_long_binget(self):
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01001609 i, = unpack('<I', self.read(4))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001610 self.append(self.memo[i])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001611 dispatch[LONG_BINGET[0]] = load_long_binget
Guido van Rossum78536471996-04-12 13:36:27 +00001612
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001613 def load_put(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001614 i = int(self.readline()[:-1])
Antoine Pitrou55549ec2011-08-30 00:27:10 +02001615 if i < 0:
1616 raise ValueError("negative PUT argument")
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001617 self.memo[i] = self.stack[-1]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001618 dispatch[PUT[0]] = load_put
Guido van Rossuma48061a1995-01-10 00:31:14 +00001619
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001620 def load_binput(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001621 i = self.read(1)[0]
Antoine Pitrou55549ec2011-08-30 00:27:10 +02001622 if i < 0:
1623 raise ValueError("negative BINPUT argument")
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001624 self.memo[i] = self.stack[-1]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001625 dispatch[BINPUT[0]] = load_binput
Guido van Rossuma48061a1995-01-10 00:31:14 +00001626
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001627 def load_long_binput(self):
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01001628 i, = unpack('<I', self.read(4))
1629 if i > maxsize:
Antoine Pitrou55549ec2011-08-30 00:27:10 +02001630 raise ValueError("negative LONG_BINPUT argument")
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001631 self.memo[i] = self.stack[-1]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001632 dispatch[LONG_BINPUT[0]] = load_long_binput
Guido van Rossuma48061a1995-01-10 00:31:14 +00001633
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001634 def load_memoize(self):
1635 memo = self.memo
1636 memo[len(memo)] = self.stack[-1]
1637 dispatch[MEMOIZE[0]] = load_memoize
1638
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001639 def load_append(self):
1640 stack = self.stack
Raymond Hettinger46ac8eb2002-06-30 03:39:14 +00001641 value = stack.pop()
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001642 list = stack[-1]
1643 list.append(value)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001644 dispatch[APPEND[0]] = load_append
Guido van Rossuma48061a1995-01-10 00:31:14 +00001645
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001646 def load_appends(self):
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001647 items = self.pop_mark()
1648 list_obj = self.stack[-1]
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02001649 try:
1650 extend = list_obj.extend
1651 except AttributeError:
1652 pass
Alexandre Vassalotti1f7492c2013-04-20 13:19:46 -07001653 else:
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02001654 extend(items)
1655 return
1656 # Even if the PEP 307 requires extend() and append() methods,
1657 # fall back on append() if the object has no extend() method
1658 # for backward compatibility.
1659 append = list_obj.append
1660 for item in items:
1661 append(item)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001662 dispatch[APPENDS[0]] = load_appends
Tim Peters2344fae2001-01-15 00:50:52 +00001663
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001664 def load_setitem(self):
1665 stack = self.stack
Raymond Hettinger46ac8eb2002-06-30 03:39:14 +00001666 value = stack.pop()
1667 key = stack.pop()
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001668 dict = stack[-1]
1669 dict[key] = value
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001670 dispatch[SETITEM[0]] = load_setitem
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001671
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001672 def load_setitems(self):
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001673 items = self.pop_mark()
1674 dict = self.stack[-1]
1675 for i in range(0, len(items), 2):
1676 dict[items[i]] = items[i + 1]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001677 dispatch[SETITEMS[0]] = load_setitems
Guido van Rossuma48061a1995-01-10 00:31:14 +00001678
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001679 def load_additems(self):
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001680 items = self.pop_mark()
1681 set_obj = self.stack[-1]
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001682 if isinstance(set_obj, set):
1683 set_obj.update(items)
1684 else:
1685 add = set_obj.add
1686 for item in items:
1687 add(item)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001688 dispatch[ADDITEMS[0]] = load_additems
1689
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001690 def load_build(self):
1691 stack = self.stack
Guido van Rossumac5b5d22003-01-28 22:01:16 +00001692 state = stack.pop()
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001693 inst = stack[-1]
Guido van Rossumac5b5d22003-01-28 22:01:16 +00001694 setstate = getattr(inst, "__setstate__", None)
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001695 if setstate is not None:
Guido van Rossumac5b5d22003-01-28 22:01:16 +00001696 setstate(state)
1697 return
1698 slotstate = None
1699 if isinstance(state, tuple) and len(state) == 2:
1700 state, slotstate = state
1701 if state:
Alexandre Vassalottiebfecfd2009-05-25 18:50:33 +00001702 inst_dict = inst.__dict__
Antoine Pitroua9f48a02009-05-02 21:41:14 +00001703 intern = sys.intern
Alexandre Vassalottiebfecfd2009-05-25 18:50:33 +00001704 for k, v in state.items():
1705 if type(k) is str:
1706 inst_dict[intern(k)] = v
1707 else:
1708 inst_dict[k] = v
Guido van Rossumac5b5d22003-01-28 22:01:16 +00001709 if slotstate:
1710 for k, v in slotstate.items():
1711 setattr(inst, k, v)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001712 dispatch[BUILD[0]] = load_build
Guido van Rossuma48061a1995-01-10 00:31:14 +00001713
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001714 def load_mark(self):
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001715 self.metastack.append(self.stack)
1716 self.stack = []
1717 self.append = self.stack.append
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001718 dispatch[MARK[0]] = load_mark
Guido van Rossuma48061a1995-01-10 00:31:14 +00001719
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001720 def load_stop(self):
Raymond Hettinger46ac8eb2002-06-30 03:39:14 +00001721 value = self.stack.pop()
Guido van Rossumff871742000-12-13 18:11:56 +00001722 raise _Stop(value)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001723 dispatch[STOP[0]] = load_stop
Guido van Rossuma48061a1995-01-10 00:31:14 +00001724
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001725
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001726# Shorthands
1727
Antoine Pitrou91f43802019-05-26 17:10:09 +02001728def _dump(obj, file, protocol=None, *, fix_imports=True, buffer_callback=None):
1729 _Pickler(file, protocol, fix_imports=fix_imports,
1730 buffer_callback=buffer_callback).dump(obj)
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001731
Antoine Pitrou91f43802019-05-26 17:10:09 +02001732def _dumps(obj, protocol=None, *, fix_imports=True, buffer_callback=None):
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001733 f = io.BytesIO()
Antoine Pitrou91f43802019-05-26 17:10:09 +02001734 _Pickler(f, protocol, fix_imports=fix_imports,
1735 buffer_callback=buffer_callback).dump(obj)
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001736 res = f.getvalue()
Guido van Rossum98297ee2007-11-06 21:34:58 +00001737 assert isinstance(res, bytes_types)
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001738 return res
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001739
Antoine Pitrou91f43802019-05-26 17:10:09 +02001740def _load(file, *, fix_imports=True, encoding="ASCII", errors="strict",
1741 buffers=None):
1742 return _Unpickler(file, fix_imports=fix_imports, buffers=buffers,
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00001743 encoding=encoding, errors=errors).load()
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001744
Antoine Pitrou91f43802019-05-26 17:10:09 +02001745def _loads(s, *, fix_imports=True, encoding="ASCII", errors="strict",
1746 buffers=None):
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001747 if isinstance(s, str):
1748 raise TypeError("Can't load pickle from unicode string")
1749 file = io.BytesIO(s)
Antoine Pitrou91f43802019-05-26 17:10:09 +02001750 return _Unpickler(file, fix_imports=fix_imports, buffers=buffers,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001751 encoding=encoding, errors=errors).load()
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001752
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001753# Use the faster _pickle if possible
1754try:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001755 from _pickle import (
1756 PickleError,
1757 PicklingError,
1758 UnpicklingError,
1759 Pickler,
1760 Unpickler,
1761 dump,
1762 dumps,
1763 load,
1764 loads
1765 )
Brett Cannoncd171c82013-07-04 17:43:24 -04001766except ImportError:
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001767 Pickler, Unpickler = _Pickler, _Unpickler
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001768 dump, dumps, load, loads = _dump, _dumps, _load, _loads
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001769
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001770# Doctest
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001771def _test():
1772 import doctest
1773 return doctest.testmod()
1774
1775if __name__ == "__main__":
Florent Xicluna54540ec2011-11-04 08:29:17 +01001776 import argparse
Alexander Belopolsky455f7bd2010-07-27 23:02:38 +00001777 parser = argparse.ArgumentParser(
1778 description='display contents of the pickle files')
1779 parser.add_argument(
1780 'pickle_file', type=argparse.FileType('br'),
1781 nargs='*', help='the pickle file')
1782 parser.add_argument(
1783 '-t', '--test', action='store_true',
1784 help='run self-test suite')
1785 parser.add_argument(
1786 '-v', action='store_true',
1787 help='run verbosely; only affects self-test run')
1788 args = parser.parse_args()
1789 if args.test:
1790 _test()
1791 else:
1792 if not args.pickle_file:
1793 parser.print_help()
1794 else:
1795 import pprint
1796 for f in args.pickle_file:
1797 obj = load(f)
1798 pprint.pprint(obj)