blob: cb768b28586a1de6acd146de288e9a9785c13a02 [file] [log] [blame]
Guido van Rossum54f22ed2000-02-04 15:10:34 +00001"""Create portable serialized representations of Python objects.
Guido van Rossuma48061a1995-01-10 00:31:14 +00002
Alexandre Vassalottif7fa63d2008-05-11 08:55:36 +00003See module copyreg for a mechanism for registering custom picklers.
Tim Peters22a449a2003-01-27 20:16:36 +00004See module pickletools source for extensive comments.
Guido van Rossuma48061a1995-01-10 00:31:14 +00005
Guido van Rossume467be61997-12-05 19:42:42 +00006Classes:
Guido van Rossuma48061a1995-01-10 00:31:14 +00007
Guido van Rossume467be61997-12-05 19:42:42 +00008 Pickler
9 Unpickler
Guido van Rossuma48061a1995-01-10 00:31:14 +000010
Guido van Rossume467be61997-12-05 19:42:42 +000011Functions:
Guido van Rossuma48061a1995-01-10 00:31:14 +000012
Guido van Rossume467be61997-12-05 19:42:42 +000013 dump(object, file)
14 dumps(object) -> string
15 load(file) -> object
16 loads(string) -> object
Guido van Rossuma48061a1995-01-10 00:31:14 +000017
Guido van Rossume467be61997-12-05 19:42:42 +000018Misc variables:
Guido van Rossuma48061a1995-01-10 00:31:14 +000019
Fred Drakefe82acc1998-02-13 03:24:48 +000020 __version__
Guido van Rossume467be61997-12-05 19:42:42 +000021 format_version
22 compatible_formats
Guido van Rossuma48061a1995-01-10 00:31:14 +000023
Guido van Rossuma48061a1995-01-10 00:31:14 +000024"""
25
Victor Stinner7fa767e2014-03-20 09:16:38 +010026from types import FunctionType
Alexandre Vassalottif7fa63d2008-05-11 08:55:36 +000027from copyreg import dispatch_table
28from copyreg import _extension_registry, _inverted_registry, _extension_cache
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +030029from itertools import islice
Serhiy Storchaka0d554d72015-10-10 22:42:18 +030030from functools import partial
Guido van Rossumd3703791998-10-22 20:15:36 +000031import sys
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +030032from sys import maxsize
33from struct import pack, unpack
Skip Montanaro23bafc62001-02-18 03:10:09 +000034import re
Guido van Rossum2e6a4b32007-05-04 19:56:22 +000035import io
Walter Dörwald42748a82007-06-12 16:40:17 +000036import codecs
Antoine Pitroud9dfaa92009-06-04 20:32:06 +000037import _compat_pickle
Guido van Rossuma48061a1995-01-10 00:31:14 +000038
Antoine Pitrou91f43802019-05-26 17:10:09 +020039from _pickle import PickleBuffer
40
Skip Montanaro352674d2001-02-07 23:14:30 +000041__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
Antoine Pitrou91f43802019-05-26 17:10:09 +020042 "Unpickler", "dump", "dumps", "load", "loads", "PickleBuffer"]
Skip Montanaro352674d2001-02-07 23:14:30 +000043
Guido van Rossum98297ee2007-11-06 21:34:58 +000044# Shortcut for use in isinstance testing
Alexandre Vassalotti8cb02b62008-05-03 01:42:49 +000045bytes_types = (bytes, bytearray)
Guido van Rossum98297ee2007-11-06 21:34:58 +000046
Tim Petersc0c12b52003-01-29 00:56:17 +000047# These are purely informational; no code uses these.
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +010048format_version = "4.0" # File format version we write
Guido van Rossumf29d3d62003-01-27 22:47:53 +000049compatible_formats = ["1.0", # Original protocol 0
Guido van Rossumbc64e222003-01-28 16:34:19 +000050 "1.1", # Protocol 0 with INST added
Guido van Rossumf29d3d62003-01-27 22:47:53 +000051 "1.2", # Original protocol 1
52 "1.3", # Protocol 1 with BINFLOAT added
53 "2.0", # Protocol 2
Guido van Rossumf4169812008-03-17 22:56:06 +000054 "3.0", # Protocol 3
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +010055 "4.0", # Protocol 4
Antoine Pitrou91f43802019-05-26 17:10:09 +020056 "5.0", # Protocol 5
Guido van Rossumf29d3d62003-01-27 22:47:53 +000057 ] # Old format versions we can read
Guido van Rossumb72cf2d1997-04-09 17:32:51 +000058
Guido van Rossum99603b02007-07-20 00:22:32 +000059# This is the highest protocol number we know how to read.
Antoine Pitrou91f43802019-05-26 17:10:09 +020060HIGHEST_PROTOCOL = 5
Tim Peters8587b3c2003-02-13 15:44:41 +000061
Guido van Rossum2e6a4b32007-05-04 19:56:22 +000062# The protocol we write by default. May be less than HIGHEST_PROTOCOL.
Łukasz Langac51d8c92018-04-03 23:06:53 -070063# Only bump this if the oldest still supported version of Python already
64# includes it.
65DEFAULT_PROTOCOL = 4
Guido van Rossum2e6a4b32007-05-04 19:56:22 +000066
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000067class PickleError(Exception):
Neal Norwitzefbb67b2002-05-30 12:12:04 +000068 """A common base class for the other pickling exceptions."""
Raymond Hettingeraef22fb2002-05-29 16:18:42 +000069 pass
70
71class PicklingError(PickleError):
72 """This exception is raised when an unpicklable object is passed to the
73 dump() method.
74
75 """
76 pass
77
78class UnpicklingError(PickleError):
79 """This exception is raised when there is a problem unpickling an object,
80 such as a security violation.
81
82 Note that other exceptions may also be raised during unpickling, including
83 (but not necessarily limited to) AttributeError, EOFError, ImportError,
84 and IndexError.
85
86 """
87 pass
Guido van Rossum7849da81995-03-09 14:08:35 +000088
Tim Petersc0c12b52003-01-29 00:56:17 +000089# An instance of _Stop is raised by Unpickler.load_stop() in response to
90# the STOP opcode, passing the object that is the result of unpickling.
Guido van Rossumff871742000-12-13 18:11:56 +000091class _Stop(Exception):
92 def __init__(self, value):
93 self.value = value
94
Guido van Rossum533dbcf2003-01-28 17:55:05 +000095# Jython has PyStringMap; it's a dict subclass with string keys
Jeremy Hylton2b9d0291998-05-27 22:38:22 +000096try:
97 from org.python.core import PyStringMap
Brett Cannoncd171c82013-07-04 17:43:24 -040098except ImportError:
Jeremy Hylton2b9d0291998-05-27 22:38:22 +000099 PyStringMap = None
100
Tim Peters22a449a2003-01-27 20:16:36 +0000101# Pickle opcodes. See pickletools.py for extensive docs. The listing
102# here is in kind-of alphabetical order of 1-character pickle code.
103# pickletools groups them by purpose.
Guido van Rossumdbb718f2001-09-21 19:22:34 +0000104
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000105MARK = b'(' # push special markobject on stack
106STOP = b'.' # every pickle ends with STOP
107POP = b'0' # discard topmost stack item
108POP_MARK = b'1' # discard stack top through topmost markobject
109DUP = b'2' # duplicate top stack item
110FLOAT = b'F' # push float object; decimal string argument
111INT = b'I' # push integer or bool; decimal string argument
112BININT = b'J' # push four-byte signed int
113BININT1 = b'K' # push 1-byte unsigned int
114LONG = b'L' # push long; decimal string argument
115BININT2 = b'M' # push 2-byte unsigned int
116NONE = b'N' # push None
117PERSID = b'P' # push persistent object; id is taken from string arg
118BINPERSID = b'Q' # " " " ; " " " " stack
119REDUCE = b'R' # apply callable to argtuple, both on stack
120STRING = b'S' # push string; NL-terminated string argument
121BINSTRING = b'T' # push string; counted binary string argument
122SHORT_BINSTRING= b'U' # " " ; " " " " < 256 bytes
123UNICODE = b'V' # push Unicode string; raw-unicode-escaped'd argument
124BINUNICODE = b'X' # " " " ; counted UTF-8 string argument
125APPEND = b'a' # append stack top to list below it
126BUILD = b'b' # call __setstate__ or __dict__.update()
127GLOBAL = b'c' # push self.find_class(modname, name); 2 string args
128DICT = b'd' # build a dict from stack items
129EMPTY_DICT = b'}' # push empty dict
130APPENDS = b'e' # extend list on stack by topmost stack slice
131GET = b'g' # push item from memo on stack; index is string arg
132BINGET = b'h' # " " " " " " ; " " 1-byte arg
133INST = b'i' # build & push class instance
134LONG_BINGET = b'j' # push item from memo on stack; index is 4-byte arg
135LIST = b'l' # build list from topmost stack items
136EMPTY_LIST = b']' # push empty list
137OBJ = b'o' # build & push class instance
138PUT = b'p' # store stack top in memo; index is string arg
139BINPUT = b'q' # " " " " " ; " " 1-byte arg
140LONG_BINPUT = b'r' # " " " " " ; " " 4-byte arg
141SETITEM = b's' # add key+value pair to dict
142TUPLE = b't' # build tuple from topmost stack items
143EMPTY_TUPLE = b')' # push empty tuple
144SETITEMS = b'u' # modify dict by adding topmost key+value pairs
145BINFLOAT = b'G' # push float; arg is 8-byte float encoding
Tim Peters22a449a2003-01-27 20:16:36 +0000146
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000147TRUE = b'I01\n' # not an opcode; see INT docs in pickletools.py
148FALSE = b'I00\n' # not an opcode; see INT docs in pickletools.py
Guido van Rossum77f6a652002-04-03 22:41:51 +0000149
Guido van Rossum586c9e82003-01-29 06:16:12 +0000150# Protocol 2
Guido van Rossum5a2d8f52003-01-27 21:44:25 +0000151
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000152PROTO = b'\x80' # identify pickle protocol
153NEWOBJ = b'\x81' # build object by applying cls.__new__ to argtuple
154EXT1 = b'\x82' # push object from extension registry; 1-byte index
155EXT2 = b'\x83' # ditto, but 2-byte index
156EXT4 = b'\x84' # ditto, but 4-byte index
157TUPLE1 = b'\x85' # build 1-tuple from stack top
158TUPLE2 = b'\x86' # build 2-tuple from two topmost stack items
159TUPLE3 = b'\x87' # build 3-tuple from three topmost stack items
160NEWTRUE = b'\x88' # push True
161NEWFALSE = b'\x89' # push False
162LONG1 = b'\x8a' # push long from < 256 bytes
163LONG4 = b'\x8b' # push really big long
Guido van Rossum5a2d8f52003-01-27 21:44:25 +0000164
Guido van Rossum44f0ea52003-01-28 04:14:51 +0000165_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
166
Guido van Rossumf4169812008-03-17 22:56:06 +0000167# Protocol 3 (Python 3.x)
168
169BINBYTES = b'B' # push bytes; counted binary string argument
170SHORT_BINBYTES = b'C' # " " ; " " " " < 256 bytes
Guido van Rossuma48061a1995-01-10 00:31:14 +0000171
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100172# Protocol 4
Antoine Pitrou91f43802019-05-26 17:10:09 +0200173
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100174SHORT_BINUNICODE = b'\x8c' # push short string; UTF-8 length < 256 bytes
175BINUNICODE8 = b'\x8d' # push very long string
176BINBYTES8 = b'\x8e' # push very long bytes string
177EMPTY_SET = b'\x8f' # push empty set on the stack
178ADDITEMS = b'\x90' # modify set by adding topmost stack items
179FROZENSET = b'\x91' # build frozenset from topmost stack items
180NEWOBJ_EX = b'\x92' # like NEWOBJ but work with keyword only arguments
181STACK_GLOBAL = b'\x93' # same as GLOBAL but using names on the stacks
182MEMOIZE = b'\x94' # store top of the stack in memo
183FRAME = b'\x95' # indicate the beginning of a new frame
184
Antoine Pitrou91f43802019-05-26 17:10:09 +0200185# Protocol 5
186
187BYTEARRAY8 = b'\x96' # push bytearray
188NEXT_BUFFER = b'\x97' # push next out-of-band buffer
189READONLY_BUFFER = b'\x98' # make top of stack readonly
190
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100191__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$", x)])
192
193
194class _Framer:
195
Serhiy Storchaka1211c9a2018-01-20 16:42:44 +0200196 _FRAME_SIZE_MIN = 4
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100197 _FRAME_SIZE_TARGET = 64 * 1024
198
199 def __init__(self, file_write):
200 self.file_write = file_write
201 self.current_frame = None
202
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100203 def start_framing(self):
204 self.current_frame = io.BytesIO()
205
206 def end_framing(self):
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800207 if self.current_frame and self.current_frame.tell() > 0:
208 self.commit_frame(force=True)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100209 self.current_frame = None
210
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800211 def commit_frame(self, force=False):
212 if self.current_frame:
213 f = self.current_frame
214 if f.tell() >= self._FRAME_SIZE_TARGET or force:
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +0100215 data = f.getbuffer()
216 write = self.file_write
Serhiy Storchaka1211c9a2018-01-20 16:42:44 +0200217 if len(data) >= self._FRAME_SIZE_MIN:
218 # Issue a single call to the write method of the underlying
219 # file object for the frame opcode with the size of the
220 # frame. The concatenation is expected to be less expensive
221 # than issuing an additional call to write.
222 write(FRAME + pack("<Q", len(data)))
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +0100223
224 # Issue a separate call to write to append the frame
225 # contents without concatenation to the above to avoid a
226 # memory copy.
227 write(data)
228
229 # Start the new frame with a new io.BytesIO instance so that
230 # the file object can have delayed access to the previous frame
231 # contents via an unreleased memoryview of the previous
232 # io.BytesIO instance.
233 self.current_frame = io.BytesIO()
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800234
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100235 def write(self, data):
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800236 if self.current_frame:
237 return self.current_frame.write(data)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100238 else:
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800239 return self.file_write(data)
240
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +0100241 def write_large_bytes(self, header, payload):
242 write = self.file_write
243 if self.current_frame:
244 # Terminate the current frame and flush it to the file.
245 self.commit_frame(force=True)
246
247 # Perform direct write of the header and payload of the large binary
248 # object. Be careful not to concatenate the header and the payload
249 # prior to calling 'write' as we do not want to allocate a large
250 # temporary bytes object.
251 # We intentionally do not insert a protocol 4 frame opcode to make
252 # it possible to optimize file.read calls in the loader.
253 write(header)
254 write(payload)
255
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100256
257class _Unframer:
258
259 def __init__(self, file_read, file_readline, file_tell=None):
260 self.file_read = file_read
261 self.file_readline = file_readline
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100262 self.current_frame = None
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100263
Antoine Pitrou91f43802019-05-26 17:10:09 +0200264 def readinto(self, buf):
265 if self.current_frame:
266 n = self.current_frame.readinto(buf)
267 if n == 0 and len(buf) != 0:
268 self.current_frame = None
269 n = len(buf)
270 buf[:] = self.file_read(n)
271 return n
272 if n < len(buf):
273 raise UnpicklingError(
274 "pickle exhausted before end of frame")
275 return n
276 else:
277 n = len(buf)
278 buf[:] = self.file_read(n)
279 return n
280
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100281 def read(self, n):
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800282 if self.current_frame:
283 data = self.current_frame.read(n)
284 if not data and n != 0:
285 self.current_frame = None
286 return self.file_read(n)
287 if len(data) < n:
288 raise UnpicklingError(
289 "pickle exhausted before end of frame")
290 return data
291 else:
292 return self.file_read(n)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100293
294 def readline(self):
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800295 if self.current_frame:
296 data = self.current_frame.readline()
297 if not data:
298 self.current_frame = None
299 return self.file_readline()
Serhiy Storchaka21d75332015-01-26 10:37:01 +0200300 if data[-1] != b'\n'[0]:
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800301 raise UnpicklingError(
302 "pickle exhausted before end of frame")
303 return data
304 else:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100305 return self.file_readline()
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100306
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800307 def load_frame(self, frame_size):
308 if self.current_frame and self.current_frame.read() != b'':
309 raise UnpicklingError(
310 "beginning of a new frame before end of current frame")
311 self.current_frame = io.BytesIO(self.file_read(frame_size))
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100312
313
314# Tools used for pickling.
315
Serhiy Storchaka58e41342015-03-31 14:07:24 +0300316def _getattribute(obj, name):
317 for subpath in name.split('.'):
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100318 if subpath == '<locals>':
319 raise AttributeError("Can't get local attribute {!r} on {!r}"
320 .format(name, obj))
321 try:
Serhiy Storchaka58e41342015-03-31 14:07:24 +0300322 parent = obj
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100323 obj = getattr(obj, subpath)
324 except AttributeError:
325 raise AttributeError("Can't get attribute {!r} on {!r}"
Serhiy Storchaka5affd232017-04-05 09:37:24 +0300326 .format(name, obj)) from None
Serhiy Storchaka58e41342015-03-31 14:07:24 +0300327 return obj, parent
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100328
Serhiy Storchaka58e41342015-03-31 14:07:24 +0300329def whichmodule(obj, name):
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100330 """Find the module an object belong to."""
331 module_name = getattr(obj, '__module__', None)
332 if module_name is not None:
333 return module_name
Antoine Pitroue1618492014-10-04 22:15:27 +0200334 # Protect the iteration by using a list copy of sys.modules against dynamic
335 # modules that trigger imports of other modules upon calls to getattr.
336 for module_name, module in list(sys.modules.items()):
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100337 if module_name == '__main__' or module is None:
338 continue
339 try:
Serhiy Storchaka58e41342015-03-31 14:07:24 +0300340 if _getattribute(module, name)[0] is obj:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100341 return module_name
342 except AttributeError:
343 pass
344 return '__main__'
345
346def encode_long(x):
347 r"""Encode a long to a two's complement little-endian binary string.
348 Note that 0 is a special case, returning an empty string, to save a
349 byte in the LONG1 pickling context.
350
351 >>> encode_long(0)
352 b''
353 >>> encode_long(255)
354 b'\xff\x00'
355 >>> encode_long(32767)
356 b'\xff\x7f'
357 >>> encode_long(-256)
358 b'\x00\xff'
359 >>> encode_long(-32768)
360 b'\x00\x80'
361 >>> encode_long(-128)
362 b'\x80'
363 >>> encode_long(127)
364 b'\x7f'
365 >>>
366 """
367 if x == 0:
368 return b''
369 nbytes = (x.bit_length() >> 3) + 1
370 result = x.to_bytes(nbytes, byteorder='little', signed=True)
371 if x < 0 and nbytes > 1:
372 if result[-1] == 0xff and (result[-2] & 0x80) != 0:
373 result = result[:-1]
374 return result
375
376def decode_long(data):
377 r"""Decode a long from a two's complement little-endian binary string.
378
379 >>> decode_long(b'')
380 0
381 >>> decode_long(b"\xff\x00")
382 255
383 >>> decode_long(b"\xff\x7f")
384 32767
385 >>> decode_long(b"\x00\xff")
386 -256
387 >>> decode_long(b"\x00\x80")
388 -32768
389 >>> decode_long(b"\x80")
390 -128
391 >>> decode_long(b"\x7f")
392 127
393 """
394 return int.from_bytes(data, byteorder='little', signed=True)
395
Skip Montanaro23bafc62001-02-18 03:10:09 +0000396
Guido van Rossum1be31752003-01-28 15:19:53 +0000397# Pickling machinery
398
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000399class _Pickler:
Guido van Rossuma48061a1995-01-10 00:31:14 +0000400
Antoine Pitrou91f43802019-05-26 17:10:09 +0200401 def __init__(self, file, protocol=None, *, fix_imports=True,
402 buffer_callback=None):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000403 """This takes a binary file for writing a pickle data stream.
404
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -0800405 The optional *protocol* argument tells the pickler to use the
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100406 given protocol; supported protocols are 0, 1, 2, 3 and 4. The
Łukasz Langac51d8c92018-04-03 23:06:53 -0700407 default protocol is 4. It was introduced in Python 3.4, it is
408 incompatible with previous versions.
Guido van Rossumf29d3d62003-01-27 22:47:53 +0000409
Guido van Rossum7eff63a2003-01-31 19:42:31 +0000410 Specifying a negative protocol version selects the highest
Tim Peters5bd2a792003-02-01 16:45:06 +0000411 protocol version supported. The higher the protocol used, the
412 more recent the version of Python needed to read the pickle
413 produced.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000414
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -0800415 The *file* argument must have a write() method that accepts a
416 single bytes argument. It can thus be a file object opened for
Martin Panter7462b6492015-11-02 03:37:02 +0000417 binary writing, an io.BytesIO instance, or any other custom
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -0800418 object that meets this interface.
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000419
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -0800420 If *fix_imports* is True and *protocol* is less than 3, pickle
421 will try to map the new Python 3 names to the old module names
422 used in Python 2, so that the pickle data stream is readable
423 with Python 2.
Antoine Pitrou91f43802019-05-26 17:10:09 +0200424
425 If *buffer_callback* is None (the default), buffer views are
426 serialized into *file* as part of the pickle stream.
427
428 If *buffer_callback* is not None, then it can be called any number
429 of times with a buffer view. If the callback returns a false value
430 (such as None), the given buffer is out-of-band; otherwise the
431 buffer is serialized in-band, i.e. inside the pickle stream.
432
433 It is an error if *buffer_callback* is not None and *protocol*
434 is None or smaller than 5.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000435 """
Guido van Rossumcf117b02003-02-09 17:19:41 +0000436 if protocol is None:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000437 protocol = DEFAULT_PROTOCOL
Guido van Rossumcf117b02003-02-09 17:19:41 +0000438 if protocol < 0:
Tim Peters8587b3c2003-02-13 15:44:41 +0000439 protocol = HIGHEST_PROTOCOL
440 elif not 0 <= protocol <= HIGHEST_PROTOCOL:
441 raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
Antoine Pitrou91f43802019-05-26 17:10:09 +0200442 if buffer_callback is not None and protocol < 5:
443 raise ValueError("buffer_callback needs protocol >= 5")
444 self._buffer_callback = buffer_callback
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000445 try:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100446 self._file_write = file.write
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000447 except AttributeError:
448 raise TypeError("file must have a 'write' attribute")
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800449 self.framer = _Framer(self._file_write)
450 self.write = self.framer.write
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +0100451 self._write_large_bytes = self.framer.write_large_bytes
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000452 self.memo = {}
Guido van Rossumcf117b02003-02-09 17:19:41 +0000453 self.proto = int(protocol)
454 self.bin = protocol >= 1
Guido van Rossum5d9113d2003-01-29 17:58:45 +0000455 self.fast = 0
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000456 self.fix_imports = fix_imports and protocol < 3
Guido van Rossuma48061a1995-01-10 00:31:14 +0000457
Fred Drake7f781c92002-05-01 20:33:53 +0000458 def clear_memo(self):
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000459 """Clears the pickler's "memo".
460
461 The memo is the data structure that remembers which objects the
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -0800462 pickler has already seen, so that shared or recursive objects
463 are pickled by reference and not by value. This method is
464 useful when re-using picklers.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +0000465 """
Fred Drake7f781c92002-05-01 20:33:53 +0000466 self.memo.clear()
467
Guido van Rossum3a41c612003-01-28 15:10:22 +0000468 def dump(self, obj):
Tim Peters5bd2a792003-02-01 16:45:06 +0000469 """Write a pickled representation of obj to the open file."""
Alexandre Vassalotti3cfcab92008-12-27 09:30:39 +0000470 # Check whether Pickler was initialized correctly. This is
471 # only needed to mimic the behavior of _pickle.Pickler.dump().
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100472 if not hasattr(self, "_file_write"):
Alexandre Vassalotti3cfcab92008-12-27 09:30:39 +0000473 raise PicklingError("Pickler.__init__() was not called by "
474 "%s.__init__()" % (self.__class__.__name__,))
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000475 if self.proto >= 2:
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800476 self.write(PROTO + pack("<B", self.proto))
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100477 if self.proto >= 4:
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800478 self.framer.start_framing()
Guido van Rossum3a41c612003-01-28 15:10:22 +0000479 self.save(obj)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000480 self.write(STOP)
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800481 self.framer.end_framing()
Guido van Rossuma48061a1995-01-10 00:31:14 +0000482
Jeremy Hylton3422c992003-01-24 19:29:52 +0000483 def memoize(self, obj):
484 """Store an object in the memo."""
485
Tim Peterse46b73f2003-01-27 21:22:10 +0000486 # The Pickler memo is a dictionary mapping object ids to 2-tuples
487 # that contain the Unpickler memo key and the object being memoized.
488 # The memo key is written to the pickle and will become
Jeremy Hylton3422c992003-01-24 19:29:52 +0000489 # the key in the Unpickler's memo. The object is stored in the
Tim Peterse46b73f2003-01-27 21:22:10 +0000490 # Pickler memo so that transient objects are kept alive during
491 # pickling.
Jeremy Hylton3422c992003-01-24 19:29:52 +0000492
Tim Peterse46b73f2003-01-27 21:22:10 +0000493 # The use of the Unpickler memo length as the memo key is just a
494 # convention. The only requirement is that the memo values be unique.
495 # But there appears no advantage to any other scheme, and this
Tim Peterscbd0a322003-01-28 00:24:43 +0000496 # scheme allows the Unpickler memo to be implemented as a plain (but
Tim Peterse46b73f2003-01-27 21:22:10 +0000497 # growable) array, indexed by memo key.
Guido van Rossum5d9113d2003-01-29 17:58:45 +0000498 if self.fast:
499 return
Guido van Rossum9b40e802003-01-30 06:37:41 +0000500 assert id(obj) not in self.memo
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100501 idx = len(self.memo)
502 self.write(self.put(idx))
503 self.memo[id(obj)] = idx, obj
Jeremy Hylton3422c992003-01-24 19:29:52 +0000504
Tim Petersbb38e302003-01-27 21:25:41 +0000505 # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100506 def put(self, idx):
507 if self.proto >= 4:
508 return MEMOIZE
509 elif self.bin:
510 if idx < 256:
511 return BINPUT + pack("<B", idx)
Guido van Rossum5c938d02003-01-28 03:03:08 +0000512 else:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100513 return LONG_BINPUT + pack("<I", idx)
514 else:
515 return PUT + repr(idx).encode("ascii") + b'\n'
Guido van Rossuma48061a1995-01-10 00:31:14 +0000516
Tim Petersbb38e302003-01-27 21:25:41 +0000517 # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300518 def get(self, i):
Tim Petersc32d8242001-04-10 02:48:53 +0000519 if self.bin:
Tim Petersc32d8242001-04-10 02:48:53 +0000520 if i < 256:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300521 return BINGET + pack("<B", i)
Guido van Rossum5c938d02003-01-28 03:03:08 +0000522 else:
Antoine Pitroubf6ecf92012-11-24 20:40:21 +0100523 return LONG_BINGET + pack("<I", i)
Guido van Rossuma48061a1995-01-10 00:31:14 +0000524
Guido van Rossum39478e82007-08-27 17:23:59 +0000525 return GET + repr(i).encode("ascii") + b'\n'
Tim Peters2344fae2001-01-15 00:50:52 +0000526
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000527 def save(self, obj, save_persistent_id=True):
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -0800528 self.framer.commit_frame()
529
Guido van Rossumbc64e222003-01-28 16:34:19 +0000530 # Check for persistent id (defined by a subclass)
Guido van Rossum3a41c612003-01-28 15:10:22 +0000531 pid = self.persistent_id(obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000532 if pid is not None and save_persistent_id:
Jeremy Hylton5e0f4e72002-11-13 22:01:27 +0000533 self.save_pers(pid)
534 return
Guido van Rossuma48061a1995-01-10 00:31:14 +0000535
Guido van Rossumbc64e222003-01-28 16:34:19 +0000536 # Check the memo
537 x = self.memo.get(id(obj))
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300538 if x is not None:
Guido van Rossumbc64e222003-01-28 16:34:19 +0000539 self.write(self.get(x[0]))
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000540 return
541
Pierre Glaser289f1f82019-05-08 23:08:25 +0200542 rv = NotImplemented
543 reduce = getattr(self, "reducer_override", None)
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300544 if reduce is not None:
Guido van Rossumc53f0092003-02-18 22:05:12 +0000545 rv = reduce(obj)
Pierre Glaser289f1f82019-05-08 23:08:25 +0200546
547 if rv is NotImplemented:
548 # Check the type dispatch table
549 t = type(obj)
550 f = self.dispatch.get(t)
551 if f is not None:
552 f(self, obj) # Call unbound method with explicit self
Antoine Pitrouffd41d92011-10-04 09:23:04 +0200553 return
554
Pierre Glaser289f1f82019-05-08 23:08:25 +0200555 # Check private dispatch table if any, or else
556 # copyreg.dispatch_table
557 reduce = getattr(self, 'dispatch_table', dispatch_table).get(t)
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300558 if reduce is not None:
Pierre Glaser289f1f82019-05-08 23:08:25 +0200559 rv = reduce(obj)
Guido van Rossumc53f0092003-02-18 22:05:12 +0000560 else:
Pierre Glaser289f1f82019-05-08 23:08:25 +0200561 # Check for a class with a custom metaclass; treat as regular
562 # class
563 if issubclass(t, type):
564 self.save_global(obj)
565 return
566
567 # Check for a __reduce_ex__ method, fall back to __reduce__
568 reduce = getattr(obj, "__reduce_ex__", None)
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300569 if reduce is not None:
Pierre Glaser289f1f82019-05-08 23:08:25 +0200570 rv = reduce(self.proto)
Guido van Rossumc53f0092003-02-18 22:05:12 +0000571 else:
Pierre Glaser289f1f82019-05-08 23:08:25 +0200572 reduce = getattr(obj, "__reduce__", None)
573 if reduce is not None:
574 rv = reduce()
575 else:
576 raise PicklingError("Can't pickle %r object: %r" %
577 (t.__name__, obj))
Tim Petersb32a8312003-01-28 00:48:09 +0000578
Guido van Rossumbc64e222003-01-28 16:34:19 +0000579 # Check for string returned by reduce(), meaning "save as global"
Guido van Rossum3172c5d2007-10-16 18:12:55 +0000580 if isinstance(rv, str):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000581 self.save_global(obj, rv)
Tim Petersb32a8312003-01-28 00:48:09 +0000582 return
583
Guido van Rossumbc64e222003-01-28 16:34:19 +0000584 # Assert that reduce() returned a tuple
Guido van Rossum13257902007-06-07 23:15:56 +0000585 if not isinstance(rv, tuple):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000586 raise PicklingError("%s must return string or tuple" % reduce)
Tim Petersb32a8312003-01-28 00:48:09 +0000587
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000588 # Assert that it returned an appropriately sized tuple
Guido van Rossumbc64e222003-01-28 16:34:19 +0000589 l = len(rv)
Pierre Glaser65d98d02019-05-08 21:40:25 +0200590 if not (2 <= l <= 6):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000591 raise PicklingError("Tuple returned by %s must have "
Pierre Glaser65d98d02019-05-08 21:40:25 +0200592 "two to six elements" % reduce)
Tim Petersb32a8312003-01-28 00:48:09 +0000593
Guido van Rossumbc64e222003-01-28 16:34:19 +0000594 # Save the reduce() output and finally memoize the object
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000595 self.save_reduce(obj=obj, *rv)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000596
Guido van Rossum3a41c612003-01-28 15:10:22 +0000597 def persistent_id(self, obj):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000598 # This exists so a subclass can override it
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000599 return None
600
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000601 def save_pers(self, pid):
Guido van Rossumbc64e222003-01-28 16:34:19 +0000602 # Save a persistent id reference
Tim Petersbd1cdb92003-01-28 01:03:10 +0000603 if self.bin:
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000604 self.save(pid, save_persistent_id=False)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000605 self.write(BINPERSID)
Tim Petersbd1cdb92003-01-28 01:03:10 +0000606 else:
Serhiy Storchakadec25af2016-07-17 11:24:17 +0300607 try:
608 self.write(PERSID + str(pid).encode("ascii") + b'\n')
609 except UnicodeEncodeError:
610 raise PicklingError(
611 "persistent IDs in protocol 0 must be ASCII strings")
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000612
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100613 def save_reduce(self, func, args, state=None, listitems=None,
Pierre Glaser65d98d02019-05-08 21:40:25 +0200614 dictitems=None, state_setter=None, obj=None):
Jeremy Hyltone3a565e2003-06-29 16:59:59 +0000615 # This API is called by some subclasses
Guido van Rossumbc64e222003-01-28 16:34:19 +0000616
Guido van Rossum13257902007-06-07 23:15:56 +0000617 if not isinstance(args, tuple):
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100618 raise PicklingError("args from save_reduce() must be a tuple")
Florent Xicluna5d1155c2011-10-28 14:45:05 +0200619 if not callable(func):
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100620 raise PicklingError("func from save_reduce() must be callable")
Guido van Rossumbc64e222003-01-28 16:34:19 +0000621
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000622 save = self.save
Guido van Rossumbc64e222003-01-28 16:34:19 +0000623 write = self.write
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000624
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100625 func_name = getattr(func, "__name__", "")
Serhiy Storchaka0d554d72015-10-10 22:42:18 +0300626 if self.proto >= 2 and func_name == "__newobj_ex__":
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100627 cls, args, kwargs = args
628 if not hasattr(cls, "__new__"):
629 raise PicklingError("args[0] from {} args has no __new__"
630 .format(func_name))
631 if obj is not None and cls is not obj.__class__:
632 raise PicklingError("args[0] from {} args has the wrong class"
633 .format(func_name))
Serhiy Storchaka0d554d72015-10-10 22:42:18 +0300634 if self.proto >= 4:
635 save(cls)
636 save(args)
637 save(kwargs)
638 write(NEWOBJ_EX)
639 else:
640 func = partial(cls.__new__, cls, *args, **kwargs)
641 save(func)
642 save(())
643 write(REDUCE)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100644 elif self.proto >= 2 and func_name == "__newobj__":
645 # A __reduce__ implementation can direct protocol 2 or newer to
Guido van Rossumd053b4b2003-01-31 16:51:45 +0000646 # use the more efficient NEWOBJ opcode, while still
647 # allowing protocol 0 and 1 to work normally. For this to
648 # work, the function returned by __reduce__ should be
649 # called __newobj__, and its first argument should be a
Florent Xiclunaaa6c1d22011-12-12 18:54:29 +0100650 # class. The implementation for __newobj__
Guido van Rossumd053b4b2003-01-31 16:51:45 +0000651 # should be as follows, although pickle has no way to
652 # verify this:
653 #
654 # def __newobj__(cls, *args):
655 # return cls.__new__(cls, *args)
656 #
657 # Protocols 0 and 1 will pickle a reference to __newobj__,
658 # while protocol 2 (and above) will pickle a reference to
659 # cls, the remaining args tuple, and the NEWOBJ code,
660 # which calls cls.__new__(cls, *args) at unpickling time
661 # (see load_newobj below). If __reduce__ returns a
662 # three-tuple, the state from the third tuple item will be
663 # pickled regardless of the protocol, calling __setstate__
664 # at unpickling time (see load_build below).
665 #
666 # Note that no standard __newobj__ implementation exists;
667 # you have to provide your own. This is to enforce
668 # compatibility with Python 2.2 (pickles written using
669 # protocol 0 or 1 in Python 2.3 should be unpicklable by
670 # Python 2.2).
671 cls = args[0]
672 if not hasattr(cls, "__new__"):
673 raise PicklingError(
674 "args[0] from __newobj__ args has no __new__")
Guido van Rossumf7f45172003-01-31 17:17:49 +0000675 if obj is not None and cls is not obj.__class__:
676 raise PicklingError(
677 "args[0] from __newobj__ args has the wrong class")
Guido van Rossumd053b4b2003-01-31 16:51:45 +0000678 args = args[1:]
679 save(cls)
680 save(args)
681 write(NEWOBJ)
682 else:
683 save(func)
684 save(args)
685 write(REDUCE)
Tim Peters2344fae2001-01-15 00:50:52 +0000686
Guido van Rossumf7f45172003-01-31 17:17:49 +0000687 if obj is not None:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100688 # If the object is already in the memo, this means it is
689 # recursive. In this case, throw away everything we put on the
690 # stack, and fetch the object back from the memo.
691 if id(obj) in self.memo:
692 write(POP + self.get(self.memo[id(obj)][0]))
693 else:
694 self.memoize(obj)
Guido van Rossumf7f45172003-01-31 17:17:49 +0000695
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000696 # More new special cases (that work with older protocols as
697 # well): when __reduce__ returns a tuple with 4 or 5 items,
698 # the 4th and 5th item should be iterators that provide list
699 # items and dict items (as (key, value) tuples), or None.
700
701 if listitems is not None:
702 self._batch_appends(listitems)
703
704 if dictitems is not None:
705 self._batch_setitems(dictitems)
706
Tim Petersc32d8242001-04-10 02:48:53 +0000707 if state is not None:
Pierre Glaser65d98d02019-05-08 21:40:25 +0200708 if state_setter is None:
709 save(state)
710 write(BUILD)
711 else:
712 # If a state_setter is specified, call it instead of load_build
713 # to update obj's with its previous state.
714 # First, push state_setter and its tuple of expected arguments
715 # (obj, state) onto the stack.
716 save(state_setter)
717 save(obj) # simple BINGET opcode as obj is already memoized.
718 save(state)
719 write(TUPLE2)
720 # Trigger a state_setter(obj, state) function call.
721 write(REDUCE)
722 # The purpose of state_setter is to carry-out an
723 # inplace modification of obj. We do not care about what the
724 # method might return, so its output is eventually removed from
725 # the stack.
726 write(POP)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000727
Guido van Rossumbc64e222003-01-28 16:34:19 +0000728 # Methods below this point are dispatched through the dispatch table
729
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000730 dispatch = {}
731
Guido van Rossum3a41c612003-01-28 15:10:22 +0000732 def save_none(self, obj):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000733 self.write(NONE)
Guido van Rossum13257902007-06-07 23:15:56 +0000734 dispatch[type(None)] = save_none
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000735
Guido van Rossum3a41c612003-01-28 15:10:22 +0000736 def save_bool(self, obj):
Guido van Rossum7d97d312003-01-28 04:25:27 +0000737 if self.proto >= 2:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300738 self.write(NEWTRUE if obj else NEWFALSE)
Guido van Rossum7d97d312003-01-28 04:25:27 +0000739 else:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300740 self.write(TRUE if obj else FALSE)
Guido van Rossum77f6a652002-04-03 22:41:51 +0000741 dispatch[bool] = save_bool
742
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300743 def save_long(self, obj):
Guido van Rossumddefaf32007-01-14 03:31:43 +0000744 if self.bin:
745 # If the int is small enough to fit in a signed 4-byte 2's-comp
746 # format, we can store it more efficiently than the general
747 # case.
748 # First one- and two-byte unsigned ints:
749 if obj >= 0:
750 if obj <= 0xff:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300751 self.write(BININT1 + pack("<B", obj))
Guido van Rossumddefaf32007-01-14 03:31:43 +0000752 return
753 if obj <= 0xffff:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300754 self.write(BININT2 + pack("<H", obj))
Guido van Rossumddefaf32007-01-14 03:31:43 +0000755 return
756 # Next check for 4-byte signed ints:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300757 if -0x80000000 <= obj <= 0x7fffffff:
Guido van Rossumddefaf32007-01-14 03:31:43 +0000758 self.write(BININT + pack("<i", obj))
759 return
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000760 if self.proto >= 2:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000761 encoded = encode_long(obj)
762 n = len(encoded)
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000763 if n < 256:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300764 self.write(LONG1 + pack("<B", n) + encoded)
Guido van Rossumd6c9e632003-01-28 03:49:52 +0000765 else:
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000766 self.write(LONG4 + pack("<i", n) + encoded)
Tim Petersee1a53c2003-02-02 02:57:53 +0000767 return
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +0200768 if -0x80000000 <= obj <= 0x7fffffff:
769 self.write(INT + repr(obj).encode("ascii") + b'\n')
770 else:
771 self.write(LONG + repr(obj).encode("ascii") + b'L\n')
Guido van Rossum13257902007-06-07 23:15:56 +0000772 dispatch[int] = save_long
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000773
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300774 def save_float(self, obj):
Guido van Rossumd3703791998-10-22 20:15:36 +0000775 if self.bin:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000776 self.write(BINFLOAT + pack('>d', obj))
Guido van Rossumd3703791998-10-22 20:15:36 +0000777 else:
Guido van Rossum39478e82007-08-27 17:23:59 +0000778 self.write(FLOAT + repr(obj).encode("ascii") + b'\n')
Guido van Rossum13257902007-06-07 23:15:56 +0000779 dispatch[float] = save_float
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000780
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300781 def save_bytes(self, obj):
Guido van Rossumf4169812008-03-17 22:56:06 +0000782 if self.proto < 3:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300783 if not obj: # bytes object is empty
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -0500784 self.save_reduce(bytes, (), obj=obj)
785 else:
786 self.save_reduce(codecs.encode,
787 (str(obj, 'latin1'), 'latin1'), obj=obj)
Guido van Rossumf4169812008-03-17 22:56:06 +0000788 return
789 n = len(obj)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100790 if n <= 0xff:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300791 self.write(SHORT_BINBYTES + pack("<B", n) + obj)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100792 elif n > 0xffffffff and self.proto >= 4:
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +0100793 self._write_large_bytes(BINBYTES8 + pack("<Q", n), obj)
794 elif n >= self.framer._FRAME_SIZE_TARGET:
795 self._write_large_bytes(BINBYTES + pack("<I", n), obj)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000796 else:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300797 self.write(BINBYTES + pack("<I", n) + obj)
Guido van Rossum3a41c612003-01-28 15:10:22 +0000798 self.memoize(obj)
Guido van Rossumf4169812008-03-17 22:56:06 +0000799 dispatch[bytes] = save_bytes
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000800
Antoine Pitrou91f43802019-05-26 17:10:09 +0200801 def save_bytearray(self, obj):
802 if self.proto < 5:
803 if not obj: # bytearray is empty
804 self.save_reduce(bytearray, (), obj=obj)
805 else:
806 self.save_reduce(bytearray, (bytes(obj),), obj=obj)
807 return
808 n = len(obj)
809 if n >= self.framer._FRAME_SIZE_TARGET:
810 self._write_large_bytes(BYTEARRAY8 + pack("<Q", n), obj)
811 else:
812 self.write(BYTEARRAY8 + pack("<Q", n) + obj)
813 dispatch[bytearray] = save_bytearray
814
815 def save_picklebuffer(self, obj):
816 if self.proto < 5:
817 raise PicklingError("PickleBuffer can only pickled with "
818 "protocol >= 5")
819 with obj.raw() as m:
820 if not m.contiguous:
821 raise PicklingError("PickleBuffer can not be pickled when "
822 "pointing to a non-contiguous buffer")
823 in_band = True
824 if self._buffer_callback is not None:
825 in_band = bool(self._buffer_callback(obj))
826 if in_band:
827 # Write data in-band
828 # XXX The C implementation avoids a copy here
829 if m.readonly:
830 self.save_bytes(m.tobytes())
831 else:
832 self.save_bytearray(m.tobytes())
833 else:
834 # Write data out-of-band
835 self.write(NEXT_BUFFER)
836 if m.readonly:
837 self.write(READONLY_BUFFER)
838
839 dispatch[PickleBuffer] = save_picklebuffer
840
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300841 def save_str(self, obj):
Tim Petersc32d8242001-04-10 02:48:53 +0000842 if self.bin:
Victor Stinner485fb562010-04-13 11:07:24 +0000843 encoded = obj.encode('utf-8', 'surrogatepass')
Guido van Rossum2e6a4b32007-05-04 19:56:22 +0000844 n = len(encoded)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100845 if n <= 0xff and self.proto >= 4:
846 self.write(SHORT_BINUNICODE + pack("<B", n) + encoded)
847 elif n > 0xffffffff and self.proto >= 4:
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +0100848 self._write_large_bytes(BINUNICODE8 + pack("<Q", n), encoded)
849 elif n >= self.framer._FRAME_SIZE_TARGET:
850 self._write_large_bytes(BINUNICODE + pack("<I", n), encoded)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100851 else:
852 self.write(BINUNICODE + pack("<I", n) + encoded)
Guido van Rossumb5f2f1b2000-03-10 23:20:09 +0000853 else:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000854 obj = obj.replace("\\", "\\u005c")
855 obj = obj.replace("\n", "\\u000a")
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100856 self.write(UNICODE + obj.encode('raw-unicode-escape') +
857 b'\n')
Guido van Rossum3a41c612003-01-28 15:10:22 +0000858 self.memoize(obj)
Guido van Rossumf4169812008-03-17 22:56:06 +0000859 dispatch[str] = save_str
Tim Peters658cba62001-02-09 20:06:00 +0000860
Guido van Rossum3a41c612003-01-28 15:10:22 +0000861 def save_tuple(self, obj):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300862 if not obj: # tuple is empty
863 if self.bin:
864 self.write(EMPTY_TUPLE)
Tim Peters1d63c9f2003-02-02 20:29:39 +0000865 else:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300866 self.write(MARK + TUPLE)
Tim Petersd97da802003-01-28 05:48:29 +0000867 return
868
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300869 n = len(obj)
Tim Petersd97da802003-01-28 05:48:29 +0000870 save = self.save
871 memo = self.memo
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300872 if n <= 3 and self.proto >= 2:
Guido van Rossum3a41c612003-01-28 15:10:22 +0000873 for element in obj:
Tim Petersd97da802003-01-28 05:48:29 +0000874 save(element)
875 # Subtle. Same as in the big comment below.
Guido van Rossum3a41c612003-01-28 15:10:22 +0000876 if id(obj) in memo:
877 get = self.get(memo[id(obj)][0])
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300878 self.write(POP * n + get)
Tim Petersd97da802003-01-28 05:48:29 +0000879 else:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300880 self.write(_tuplesize2code[n])
Guido van Rossum3a41c612003-01-28 15:10:22 +0000881 self.memoize(obj)
Tim Petersd97da802003-01-28 05:48:29 +0000882 return
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000883
Tim Peters1d63c9f2003-02-02 20:29:39 +0000884 # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple
Tim Petersff57bff2003-01-28 05:34:53 +0000885 # has more than 3 elements.
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300886 write = self.write
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000887 write(MARK)
Guido van Rossum3a41c612003-01-28 15:10:22 +0000888 for element in obj:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000889 save(element)
890
Tim Peters1d63c9f2003-02-02 20:29:39 +0000891 if id(obj) in memo:
Tim Petersf558da02003-01-28 02:09:55 +0000892 # Subtle. d was not in memo when we entered save_tuple(), so
893 # the process of saving the tuple's elements must have saved
894 # the tuple itself: the tuple is recursive. The proper action
895 # now is to throw away everything we put on the stack, and
896 # simply GET the tuple (it's already constructed). This check
897 # could have been done in the "for element" loop instead, but
898 # recursive tuples are a rare thing.
Guido van Rossum3a41c612003-01-28 15:10:22 +0000899 get = self.get(memo[id(obj)][0])
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300900 if self.bin:
Tim Petersf558da02003-01-28 02:09:55 +0000901 write(POP_MARK + get)
902 else: # proto 0 -- POP_MARK not available
Tim Petersd97da802003-01-28 05:48:29 +0000903 write(POP * (n+1) + get)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000904 return
905
Tim Peters1d63c9f2003-02-02 20:29:39 +0000906 # No recursion.
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300907 write(TUPLE)
Tim Peters1d63c9f2003-02-02 20:29:39 +0000908 self.memoize(obj)
Jeremy Hylton3422c992003-01-24 19:29:52 +0000909
Guido van Rossum13257902007-06-07 23:15:56 +0000910 dispatch[tuple] = save_tuple
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000911
Guido van Rossum3a41c612003-01-28 15:10:22 +0000912 def save_list(self, obj):
Tim Petersc32d8242001-04-10 02:48:53 +0000913 if self.bin:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300914 self.write(EMPTY_LIST)
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000915 else: # proto 0 -- can't use EMPTY_LIST
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300916 self.write(MARK + LIST)
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000917
918 self.memoize(obj)
Alexandre Vassalottic7db1d62008-05-14 21:57:18 +0000919 self._batch_appends(obj)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000920
Guido van Rossum13257902007-06-07 23:15:56 +0000921 dispatch[list] = save_list
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000922
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000923 _BATCHSIZE = 1000
924
925 def _batch_appends(self, items):
926 # Helper to batch up APPENDS sequences
927 save = self.save
928 write = self.write
929
930 if not self.bin:
931 for x in items:
932 save(x)
933 write(APPEND)
934 return
935
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300936 it = iter(items)
937 while True:
938 tmp = list(islice(it, self._BATCHSIZE))
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000939 n = len(tmp)
940 if n > 1:
941 write(MARK)
942 for x in tmp:
943 save(x)
944 write(APPENDS)
945 elif n:
946 save(tmp[0])
947 write(APPEND)
948 # else tmp is empty, and we're done
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300949 if n < self._BATCHSIZE:
950 return
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000951
Guido van Rossum3a41c612003-01-28 15:10:22 +0000952 def save_dict(self, obj):
Tim Petersc32d8242001-04-10 02:48:53 +0000953 if self.bin:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300954 self.write(EMPTY_DICT)
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000955 else: # proto 0 -- can't use EMPTY_DICT
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300956 self.write(MARK + DICT)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000957
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000958 self.memoize(obj)
Alexandre Vassalottic7db1d62008-05-14 21:57:18 +0000959 self._batch_setitems(obj.items())
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000960
Guido van Rossum13257902007-06-07 23:15:56 +0000961 dispatch[dict] = save_dict
962 if PyStringMap is not None:
Jeremy Hylton2b9d0291998-05-27 22:38:22 +0000963 dispatch[PyStringMap] = save_dict
Guido van Rossumb72cf2d1997-04-09 17:32:51 +0000964
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000965 def _batch_setitems(self, items):
966 # Helper to batch up SETITEMS sequences; proto >= 1 only
967 save = self.save
968 write = self.write
969
970 if not self.bin:
971 for k, v in items:
972 save(k)
973 save(v)
974 write(SETITEM)
975 return
976
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300977 it = iter(items)
978 while True:
979 tmp = list(islice(it, self._BATCHSIZE))
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000980 n = len(tmp)
981 if n > 1:
982 write(MARK)
983 for k, v in tmp:
984 save(k)
985 save(v)
986 write(SETITEMS)
987 elif n:
988 k, v = tmp[0]
989 save(k)
990 save(v)
991 write(SETITEM)
992 # else tmp is empty, and we're done
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +0300993 if n < self._BATCHSIZE:
994 return
Guido van Rossum25cb7df2003-01-31 18:53:21 +0000995
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100996 def save_set(self, obj):
997 save = self.save
998 write = self.write
999
1000 if self.proto < 4:
1001 self.save_reduce(set, (list(obj),), obj=obj)
1002 return
1003
1004 write(EMPTY_SET)
1005 self.memoize(obj)
1006
1007 it = iter(obj)
1008 while True:
1009 batch = list(islice(it, self._BATCHSIZE))
1010 n = len(batch)
1011 if n > 0:
1012 write(MARK)
1013 for item in batch:
1014 save(item)
1015 write(ADDITEMS)
1016 if n < self._BATCHSIZE:
1017 return
1018 dispatch[set] = save_set
1019
1020 def save_frozenset(self, obj):
1021 save = self.save
1022 write = self.write
1023
1024 if self.proto < 4:
1025 self.save_reduce(frozenset, (list(obj),), obj=obj)
1026 return
1027
1028 write(MARK)
1029 for item in obj:
1030 save(item)
1031
1032 if id(obj) in self.memo:
1033 # If the object is already in the memo, this means it is
1034 # recursive. In this case, throw away everything we put on the
1035 # stack, and fetch the object back from the memo.
1036 write(POP_MARK + self.get(self.memo[id(obj)][0]))
1037 return
1038
1039 write(FROZENSET)
1040 self.memoize(obj)
1041 dispatch[frozenset] = save_frozenset
1042
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001043 def save_global(self, obj, name=None):
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001044 write = self.write
1045 memo = self.memo
1046
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001047 if name is None:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001048 name = getattr(obj, '__qualname__', None)
Tim Petersc32d8242001-04-10 02:48:53 +00001049 if name is None:
Guido van Rossum3a41c612003-01-28 15:10:22 +00001050 name = obj.__name__
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001051
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001052 module_name = whichmodule(obj, name)
Guido van Rossumb0a98e92001-08-17 18:49:52 +00001053 try:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001054 __import__(module_name, level=0)
1055 module = sys.modules[module_name]
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001056 obj2, parent = _getattribute(module, name)
Guido van Rossumb0a98e92001-08-17 18:49:52 +00001057 except (ImportError, KeyError, AttributeError):
1058 raise PicklingError(
1059 "Can't pickle %r: it's not found as %s.%s" %
Serhiy Storchaka5affd232017-04-05 09:37:24 +03001060 (obj, module_name, name)) from None
Guido van Rossumb0a98e92001-08-17 18:49:52 +00001061 else:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001062 if obj2 is not obj:
Guido van Rossumb0a98e92001-08-17 18:49:52 +00001063 raise PicklingError(
1064 "Can't pickle %r: it's not the same object as %s.%s" %
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001065 (obj, module_name, name))
Guido van Rossumb0a98e92001-08-17 18:49:52 +00001066
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001067 if self.proto >= 2:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001068 code = _extension_registry.get((module_name, name))
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001069 if code:
1070 assert code > 0
1071 if code <= 0xff:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001072 write(EXT1 + pack("<B", code))
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001073 elif code <= 0xffff:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001074 write(EXT2 + pack("<H", code))
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001075 else:
1076 write(EXT4 + pack("<i", code))
1077 return
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001078 lastname = name.rpartition('.')[2]
1079 if parent is module:
1080 name = lastname
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001081 # Non-ASCII identifiers are supported only with protocols >= 3.
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001082 if self.proto >= 4:
1083 self.save(module_name)
1084 self.save(name)
1085 write(STACK_GLOBAL)
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001086 elif parent is not module:
1087 self.save_reduce(getattr, (parent, lastname))
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001088 elif self.proto >= 3:
1089 write(GLOBAL + bytes(module_name, "utf-8") + b'\n' +
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001090 bytes(name, "utf-8") + b'\n')
1091 else:
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00001092 if self.fix_imports:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001093 r_name_mapping = _compat_pickle.REVERSE_NAME_MAPPING
1094 r_import_mapping = _compat_pickle.REVERSE_IMPORT_MAPPING
1095 if (module_name, name) in r_name_mapping:
1096 module_name, name = r_name_mapping[(module_name, name)]
Serhiy Storchakabfe18242015-03-31 13:12:37 +03001097 elif module_name in r_import_mapping:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001098 module_name = r_import_mapping[module_name]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001099 try:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001100 write(GLOBAL + bytes(module_name, "ascii") + b'\n' +
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001101 bytes(name, "ascii") + b'\n')
1102 except UnicodeEncodeError:
1103 raise PicklingError(
1104 "can't pickle global identifier '%s.%s' using "
Serhiy Storchaka5affd232017-04-05 09:37:24 +03001105 "pickle protocol %i" % (module, name, self.proto)) from None
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001106
Guido van Rossum3a41c612003-01-28 15:10:22 +00001107 self.memoize(obj)
Tim Peters3b769832003-01-28 03:51:36 +00001108
Alexandre Vassalotti19b6fa62013-11-30 16:06:39 -08001109 def save_type(self, obj):
1110 if obj is type(None):
1111 return self.save_reduce(type, (None,), obj=obj)
1112 elif obj is type(NotImplemented):
1113 return self.save_reduce(type, (NotImplemented,), obj=obj)
1114 elif obj is type(...):
1115 return self.save_reduce(type, (...,), obj=obj)
1116 return self.save_global(obj)
1117
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001118 dispatch[FunctionType] = save_global
Alexandre Vassalotti19b6fa62013-11-30 16:06:39 -08001119 dispatch[type] = save_type
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001120
Guido van Rossuma48061a1995-01-10 00:31:14 +00001121
Guido van Rossum1be31752003-01-28 15:19:53 +00001122# Unpickling machinery
1123
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001124class _Unpickler:
Guido van Rossuma48061a1995-01-10 00:31:14 +00001125
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00001126 def __init__(self, file, *, fix_imports=True,
Antoine Pitrou91f43802019-05-26 17:10:09 +02001127 encoding="ASCII", errors="strict", buffers=None):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001128 """This takes a binary file for reading a pickle data stream.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +00001129
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08001130 The protocol version of the pickle is detected automatically, so
1131 no proto argument is needed.
1132
1133 The argument *file* must have two methods, a read() method that
1134 takes an integer argument, and a readline() method that requires
1135 no arguments. Both methods should return bytes. Thus *file*
Martin Panter7462b6492015-11-02 03:37:02 +00001136 can be a binary file object opened for reading, an io.BytesIO
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08001137 object, or any other custom object that meets this interface.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +00001138
Guido van Rossumfeea0782007-10-10 18:00:50 +00001139 The file-like object must have two methods, a read() method
1140 that takes an integer argument, and a readline() method that
1141 requires no arguments. Both methods should return bytes.
1142 Thus file-like object can be a binary file object opened for
1143 reading, a BytesIO object, or any other custom object that
1144 meets this interface.
Guido van Rossumf4169812008-03-17 22:56:06 +00001145
Antoine Pitrou91f43802019-05-26 17:10:09 +02001146 If *buffers* is not None, it should be an iterable of buffer-enabled
1147 objects that is consumed each time the pickle stream references
1148 an out-of-band buffer view. Such buffers have been given in order
1149 to the *buffer_callback* of a Pickler object.
1150
1151 If *buffers* is None (the default), then the buffers are taken
1152 from the pickle stream, assuming they are serialized there.
1153 It is an error for *buffers* to be None if the pickle stream
1154 was produced with a non-None *buffer_callback*.
1155
1156 Other optional arguments are *fix_imports*, *encoding* and
Martin Panter46f50722016-05-26 05:35:26 +00001157 *errors*, which are used to control compatibility support for
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08001158 pickle stream generated by Python 2. If *fix_imports* is True,
1159 pickle will try to map the old Python 2 names to the new names
1160 used in Python 3. The *encoding* and *errors* tell pickle how
1161 to decode 8-bit string instances pickled by Python 2; these
1162 default to 'ASCII' and 'strict', respectively. *encoding* can be
1163 'bytes' to read theses 8-bit string instances as bytes objects.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +00001164 """
Antoine Pitrou91f43802019-05-26 17:10:09 +02001165 self._buffers = iter(buffers) if buffers is not None else None
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001166 self._file_readline = file.readline
1167 self._file_read = file.read
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001168 self.memo = {}
Guido van Rossumf4169812008-03-17 22:56:06 +00001169 self.encoding = encoding
1170 self.errors = errors
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00001171 self.proto = 0
1172 self.fix_imports = fix_imports
Guido van Rossuma48061a1995-01-10 00:31:14 +00001173
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001174 def load(self):
Guido van Rossum3a41c612003-01-28 15:10:22 +00001175 """Read a pickled object representation from the open file.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +00001176
Guido van Rossum3a41c612003-01-28 15:10:22 +00001177 Return the reconstituted object hierarchy specified in the file.
Raymond Hettingeraef22fb2002-05-29 16:18:42 +00001178 """
Alexandre Vassalotti3cfcab92008-12-27 09:30:39 +00001179 # Check whether Unpickler was initialized correctly. This is
1180 # only needed to mimic the behavior of _pickle.Unpickler.dump().
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001181 if not hasattr(self, "_file_read"):
Alexandre Vassalotti3cfcab92008-12-27 09:30:39 +00001182 raise UnpicklingError("Unpickler.__init__() was not called by "
1183 "%s.__init__()" % (self.__class__.__name__,))
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001184 self._unframer = _Unframer(self._file_read, self._file_readline)
1185 self.read = self._unframer.read
Antoine Pitrou91f43802019-05-26 17:10:09 +02001186 self.readinto = self._unframer.readinto
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001187 self.readline = self._unframer.readline
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001188 self.metastack = []
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001189 self.stack = []
1190 self.append = self.stack.append
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001191 self.proto = 0
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001192 read = self.read
1193 dispatch = self.dispatch
1194 try:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001195 while True:
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001196 key = read(1)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001197 if not key:
1198 raise EOFError
Guido van Rossum98297ee2007-11-06 21:34:58 +00001199 assert isinstance(key, bytes_types)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001200 dispatch[key[0]](self)
Guido van Rossumb940e112007-01-10 16:19:56 +00001201 except _Stop as stopinst:
Guido van Rossumff871742000-12-13 18:11:56 +00001202 return stopinst.value
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001203
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001204 # Return a list of items pushed in the stack after last MARK instruction.
1205 def pop_mark(self):
1206 items = self.stack
1207 self.stack = self.metastack.pop()
1208 self.append = self.stack.append
1209 return items
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001210
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001211 def persistent_load(self, pid):
Benjamin Peterson49956b22009-01-10 17:05:44 +00001212 raise UnpicklingError("unsupported persistent id encountered")
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001213
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001214 dispatch = {}
1215
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001216 def load_proto(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001217 proto = self.read(1)[0]
Guido van Rossumf4169812008-03-17 22:56:06 +00001218 if not 0 <= proto <= HIGHEST_PROTOCOL:
Guido van Rossum26d95c32007-08-27 23:18:54 +00001219 raise ValueError("unsupported pickle protocol: %d" % proto)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00001220 self.proto = proto
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001221 dispatch[PROTO[0]] = load_proto
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001222
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08001223 def load_frame(self):
1224 frame_size, = unpack('<Q', self.read(8))
1225 if frame_size > sys.maxsize:
1226 raise ValueError("frame size > sys.maxsize: %d" % frame_size)
1227 self._unframer.load_frame(frame_size)
1228 dispatch[FRAME[0]] = load_frame
1229
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001230 def load_persid(self):
Serhiy Storchakadec25af2016-07-17 11:24:17 +03001231 try:
1232 pid = self.readline()[:-1].decode("ascii")
1233 except UnicodeDecodeError:
1234 raise UnpicklingError(
1235 "persistent IDs in protocol 0 must be ASCII strings")
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001236 self.append(self.persistent_load(pid))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001237 dispatch[PERSID[0]] = load_persid
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001238
1239 def load_binpersid(self):
Raymond Hettinger46ac8eb2002-06-30 03:39:14 +00001240 pid = self.stack.pop()
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001241 self.append(self.persistent_load(pid))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001242 dispatch[BINPERSID[0]] = load_binpersid
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001243
1244 def load_none(self):
1245 self.append(None)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001246 dispatch[NONE[0]] = load_none
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001247
Guido van Rossum7d97d312003-01-28 04:25:27 +00001248 def load_false(self):
1249 self.append(False)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001250 dispatch[NEWFALSE[0]] = load_false
Guido van Rossum7d97d312003-01-28 04:25:27 +00001251
1252 def load_true(self):
1253 self.append(True)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001254 dispatch[NEWTRUE[0]] = load_true
Guido van Rossum7d97d312003-01-28 04:25:27 +00001255
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001256 def load_int(self):
Tim Peters19ef62d2001-08-28 22:21:18 +00001257 data = self.readline()
Guido van Rossume2763392002-04-05 19:30:08 +00001258 if data == FALSE[1:]:
1259 val = False
1260 elif data == TRUE[1:]:
1261 val = True
1262 else:
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001263 val = int(data, 0)
Guido van Rossume2763392002-04-05 19:30:08 +00001264 self.append(val)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001265 dispatch[INT[0]] = load_int
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001266
1267 def load_binint(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001268 self.append(unpack('<i', self.read(4))[0])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001269 dispatch[BININT[0]] = load_binint
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001270
1271 def load_binint1(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001272 self.append(self.read(1)[0])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001273 dispatch[BININT1[0]] = load_binint1
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001274
1275 def load_binint2(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001276 self.append(unpack('<H', self.read(2))[0])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001277 dispatch[BININT2[0]] = load_binint2
Tim Peters2344fae2001-01-15 00:50:52 +00001278
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001279 def load_long(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001280 val = self.readline()[:-1]
1281 if val and val[-1] == b'L'[0]:
Mark Dickinson8dd05142009-01-20 20:43:58 +00001282 val = val[:-1]
Guido van Rossumfeea0782007-10-10 18:00:50 +00001283 self.append(int(val, 0))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001284 dispatch[LONG[0]] = load_long
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001285
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001286 def load_long1(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001287 n = self.read(1)[0]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001288 data = self.read(n)
1289 self.append(decode_long(data))
1290 dispatch[LONG1[0]] = load_long1
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001291
1292 def load_long4(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001293 n, = unpack('<i', self.read(4))
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01001294 if n < 0:
1295 # Corrupt or hostile pickle -- we never write one like this
Alexandre Vassalotticc757172013-04-14 02:25:10 -07001296 raise UnpicklingError("LONG pickle has negative byte count")
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001297 data = self.read(n)
1298 self.append(decode_long(data))
1299 dispatch[LONG4[0]] = load_long4
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001300
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001301 def load_float(self):
Guido van Rossumff871742000-12-13 18:11:56 +00001302 self.append(float(self.readline()[:-1]))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001303 dispatch[FLOAT[0]] = load_float
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001304
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001305 def load_binfloat(self):
Guido van Rossumd3703791998-10-22 20:15:36 +00001306 self.append(unpack('>d', self.read(8))[0])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001307 dispatch[BINFLOAT[0]] = load_binfloat
Guido van Rossumd3703791998-10-22 20:15:36 +00001308
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08001309 def _decode_string(self, value):
1310 # Used to allow strings from Python 2 to be decoded either as
1311 # bytes or Unicode strings. This should be used only with the
1312 # STRING, BINSTRING and SHORT_BINSTRING opcodes.
1313 if self.encoding == "bytes":
1314 return value
1315 else:
1316 return value.decode(self.encoding, self.errors)
1317
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001318 def load_string(self):
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07001319 data = self.readline()[:-1]
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001320 # Strip outermost quotes
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07001321 if len(data) >= 2 and data[0] == data[-1] and data[0] in b'"\'':
1322 data = data[1:-1]
Martin v. Löwis8a8da792002-08-14 07:46:28 +00001323 else:
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07001324 raise UnpicklingError("the STRING opcode argument must be quoted")
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08001325 self.append(self._decode_string(codecs.escape_decode(data)[0]))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001326 dispatch[STRING[0]] = load_string
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001327
1328 def load_binstring(self):
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01001329 # Deprecated BINSTRING uses signed 32-bit length
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001330 len, = unpack('<i', self.read(4))
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01001331 if len < 0:
Alexandre Vassalotticc757172013-04-14 02:25:10 -07001332 raise UnpicklingError("BINSTRING pickle has negative byte count")
Guido van Rossumf4169812008-03-17 22:56:06 +00001333 data = self.read(len)
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08001334 self.append(self._decode_string(data))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001335 dispatch[BINSTRING[0]] = load_binstring
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001336
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001337 def load_binbytes(self):
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01001338 len, = unpack('<I', self.read(4))
1339 if len > maxsize:
Alexandre Vassalotticc757172013-04-14 02:25:10 -07001340 raise UnpicklingError("BINBYTES exceeds system's maximum size "
1341 "of %d bytes" % maxsize)
Guido van Rossumf4169812008-03-17 22:56:06 +00001342 self.append(self.read(len))
1343 dispatch[BINBYTES[0]] = load_binbytes
1344
Guido van Rossumb5f2f1b2000-03-10 23:20:09 +00001345 def load_unicode(self):
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001346 self.append(str(self.readline()[:-1], 'raw-unicode-escape'))
1347 dispatch[UNICODE[0]] = load_unicode
Guido van Rossumb5f2f1b2000-03-10 23:20:09 +00001348
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001349 def load_binunicode(self):
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01001350 len, = unpack('<I', self.read(4))
1351 if len > maxsize:
Alexandre Vassalotticc757172013-04-14 02:25:10 -07001352 raise UnpicklingError("BINUNICODE exceeds system's maximum size "
1353 "of %d bytes" % maxsize)
Victor Stinner485fb562010-04-13 11:07:24 +00001354 self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001355 dispatch[BINUNICODE[0]] = load_binunicode
Guido van Rossumb5f2f1b2000-03-10 23:20:09 +00001356
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001357 def load_binunicode8(self):
1358 len, = unpack('<Q', self.read(8))
1359 if len > maxsize:
1360 raise UnpicklingError("BINUNICODE8 exceeds system's maximum size "
1361 "of %d bytes" % maxsize)
1362 self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
1363 dispatch[BINUNICODE8[0]] = load_binunicode8
1364
Serhiy Storchakae0606192015-09-29 22:10:07 +03001365 def load_binbytes8(self):
1366 len, = unpack('<Q', self.read(8))
1367 if len > maxsize:
1368 raise UnpicklingError("BINBYTES8 exceeds system's maximum size "
1369 "of %d bytes" % maxsize)
1370 self.append(self.read(len))
1371 dispatch[BINBYTES8[0]] = load_binbytes8
1372
Antoine Pitrou91f43802019-05-26 17:10:09 +02001373 def load_bytearray8(self):
1374 len, = unpack('<Q', self.read(8))
1375 if len > maxsize:
1376 raise UnpicklingError("BYTEARRAY8 exceeds system's maximum size "
1377 "of %d bytes" % maxsize)
1378 b = bytearray(len)
1379 self.readinto(b)
1380 self.append(b)
1381 dispatch[BYTEARRAY8[0]] = load_bytearray8
1382
1383 def load_next_buffer(self):
1384 if self._buffers is None:
1385 raise UnpicklingError("pickle stream refers to out-of-band data "
1386 "but no *buffers* argument was given")
1387 try:
1388 buf = next(self._buffers)
1389 except StopIteration:
1390 raise UnpicklingError("not enough out-of-band buffers")
1391 self.append(buf)
1392 dispatch[NEXT_BUFFER[0]] = load_next_buffer
1393
1394 def load_readonly_buffer(self):
1395 buf = self.stack[-1]
1396 with memoryview(buf) as m:
1397 if not m.readonly:
1398 self.stack[-1] = m.toreadonly()
1399 dispatch[READONLY_BUFFER[0]] = load_readonly_buffer
1400
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001401 def load_short_binstring(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001402 len = self.read(1)[0]
1403 data = self.read(len)
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08001404 self.append(self._decode_string(data))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001405 dispatch[SHORT_BINSTRING[0]] = load_short_binstring
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001406
Guido van Rossumf4169812008-03-17 22:56:06 +00001407 def load_short_binbytes(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001408 len = self.read(1)[0]
1409 self.append(self.read(len))
Guido van Rossumf4169812008-03-17 22:56:06 +00001410 dispatch[SHORT_BINBYTES[0]] = load_short_binbytes
1411
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001412 def load_short_binunicode(self):
1413 len = self.read(1)[0]
1414 self.append(str(self.read(len), 'utf-8', 'surrogatepass'))
1415 dispatch[SHORT_BINUNICODE[0]] = load_short_binunicode
1416
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001417 def load_tuple(self):
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001418 items = self.pop_mark()
1419 self.append(tuple(items))
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001420 dispatch[TUPLE[0]] = load_tuple
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001421
1422 def load_empty_tuple(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001423 self.append(())
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001424 dispatch[EMPTY_TUPLE[0]] = load_empty_tuple
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001425
Guido van Rossum44f0ea52003-01-28 04:14:51 +00001426 def load_tuple1(self):
1427 self.stack[-1] = (self.stack[-1],)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001428 dispatch[TUPLE1[0]] = load_tuple1
Guido van Rossum44f0ea52003-01-28 04:14:51 +00001429
1430 def load_tuple2(self):
1431 self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001432 dispatch[TUPLE2[0]] = load_tuple2
Guido van Rossum44f0ea52003-01-28 04:14:51 +00001433
1434 def load_tuple3(self):
1435 self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001436 dispatch[TUPLE3[0]] = load_tuple3
Guido van Rossum44f0ea52003-01-28 04:14:51 +00001437
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001438 def load_empty_list(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001439 self.append([])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001440 dispatch[EMPTY_LIST[0]] = load_empty_list
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001441
1442 def load_empty_dictionary(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001443 self.append({})
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001444 dispatch[EMPTY_DICT[0]] = load_empty_dictionary
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001445
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001446 def load_empty_set(self):
1447 self.append(set())
1448 dispatch[EMPTY_SET[0]] = load_empty_set
1449
1450 def load_frozenset(self):
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001451 items = self.pop_mark()
1452 self.append(frozenset(items))
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001453 dispatch[FROZENSET[0]] = load_frozenset
1454
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001455 def load_list(self):
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001456 items = self.pop_mark()
1457 self.append(items)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001458 dispatch[LIST[0]] = load_list
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001459
1460 def load_dict(self):
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001461 items = self.pop_mark()
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001462 d = {items[i]: items[i+1]
1463 for i in range(0, len(items), 2)}
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001464 self.append(d)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001465 dispatch[DICT[0]] = load_dict
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001466
Tim Petersd01c1e92003-01-30 15:41:46 +00001467 # INST and OBJ differ only in how they get a class object. It's not
1468 # only sensible to do the rest in a common routine, the two routines
1469 # previously diverged and grew different bugs.
1470 # klass is the class to instantiate, and k points to the topmost mark
1471 # object, following which are the arguments for klass.__init__.
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001472 def _instantiate(self, klass, args):
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00001473 if (args or not isinstance(klass, type) or
1474 hasattr(klass, "__getinitargs__")):
Guido van Rossum743d17e1998-09-15 20:25:57 +00001475 try:
Guido van Rossumb26a97a2003-01-28 22:29:13 +00001476 value = klass(*args)
Guido van Rossumb940e112007-01-10 16:19:56 +00001477 except TypeError as err:
Guido van Rossum26d95c32007-08-27 23:18:54 +00001478 raise TypeError("in constructor for %s: %s" %
1479 (klass.__name__, str(err)), sys.exc_info()[2])
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00001480 else:
1481 value = klass.__new__(klass)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001482 self.append(value)
Tim Petersd01c1e92003-01-30 15:41:46 +00001483
1484 def load_inst(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001485 module = self.readline()[:-1].decode("ascii")
1486 name = self.readline()[:-1].decode("ascii")
Tim Petersd01c1e92003-01-30 15:41:46 +00001487 klass = self.find_class(module, name)
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001488 self._instantiate(klass, self.pop_mark())
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001489 dispatch[INST[0]] = load_inst
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001490
1491 def load_obj(self):
Tim Petersd01c1e92003-01-30 15:41:46 +00001492 # Stack is ... markobject classobject arg1 arg2 ...
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001493 args = self.pop_mark()
1494 cls = args.pop(0)
1495 self._instantiate(cls, args)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001496 dispatch[OBJ[0]] = load_obj
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001497
Guido van Rossum3a41c612003-01-28 15:10:22 +00001498 def load_newobj(self):
1499 args = self.stack.pop()
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001500 cls = self.stack.pop()
Guido van Rossum3a41c612003-01-28 15:10:22 +00001501 obj = cls.__new__(cls, *args)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001502 self.append(obj)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001503 dispatch[NEWOBJ[0]] = load_newobj
Guido van Rossum3a41c612003-01-28 15:10:22 +00001504
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001505 def load_newobj_ex(self):
1506 kwargs = self.stack.pop()
1507 args = self.stack.pop()
1508 cls = self.stack.pop()
1509 obj = cls.__new__(cls, *args, **kwargs)
1510 self.append(obj)
1511 dispatch[NEWOBJ_EX[0]] = load_newobj_ex
1512
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001513 def load_global(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001514 module = self.readline()[:-1].decode("utf-8")
1515 name = self.readline()[:-1].decode("utf-8")
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001516 klass = self.find_class(module, name)
1517 self.append(klass)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001518 dispatch[GLOBAL[0]] = load_global
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001519
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001520 def load_stack_global(self):
1521 name = self.stack.pop()
1522 module = self.stack.pop()
1523 if type(name) is not str or type(module) is not str:
1524 raise UnpicklingError("STACK_GLOBAL requires str")
1525 self.append(self.find_class(module, name))
1526 dispatch[STACK_GLOBAL[0]] = load_stack_global
1527
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001528 def load_ext1(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001529 code = self.read(1)[0]
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001530 self.get_extension(code)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001531 dispatch[EXT1[0]] = load_ext1
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001532
1533 def load_ext2(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001534 code, = unpack('<H', self.read(2))
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001535 self.get_extension(code)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001536 dispatch[EXT2[0]] = load_ext2
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001537
1538 def load_ext4(self):
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001539 code, = unpack('<i', self.read(4))
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001540 self.get_extension(code)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001541 dispatch[EXT4[0]] = load_ext4
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001542
1543 def get_extension(self, code):
1544 nil = []
Guido van Rossumd4b920c2003-02-04 01:54:49 +00001545 obj = _extension_cache.get(code, nil)
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001546 if obj is not nil:
1547 self.append(obj)
1548 return
Guido van Rossumd4b920c2003-02-04 01:54:49 +00001549 key = _inverted_registry.get(code)
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001550 if not key:
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01001551 if code <= 0: # note that 0 is forbidden
1552 # Corrupt or hostile pickle.
Alexandre Vassalotticc757172013-04-14 02:25:10 -07001553 raise UnpicklingError("EXT specifies code <= 0")
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001554 raise ValueError("unregistered extension code %d" % code)
1555 obj = self.find_class(*key)
Guido van Rossumd4b920c2003-02-04 01:54:49 +00001556 _extension_cache[code] = obj
Guido van Rossum255f3ee2003-01-29 06:14:11 +00001557 self.append(obj)
1558
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001559 def find_class(self, module, name):
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00001560 # Subclasses may override this.
Steve Dowerb82e17e2019-05-23 08:45:22 -07001561 sys.audit('pickle.find_class', module, name)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00001562 if self.proto < 3 and self.fix_imports:
1563 if (module, name) in _compat_pickle.NAME_MAPPING:
1564 module, name = _compat_pickle.NAME_MAPPING[(module, name)]
Serhiy Storchakabfe18242015-03-31 13:12:37 +03001565 elif module in _compat_pickle.IMPORT_MAPPING:
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00001566 module = _compat_pickle.IMPORT_MAPPING[module]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001567 __import__(module, level=0)
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001568 if self.proto >= 4:
1569 return _getattribute(sys.modules[module], name)[0]
1570 else:
1571 return getattr(sys.modules[module], name)
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001572
1573 def load_reduce(self):
1574 stack = self.stack
Guido van Rossumb26a97a2003-01-28 22:29:13 +00001575 args = stack.pop()
1576 func = stack[-1]
Serhiy Storchakaa8d83f52015-12-01 00:39:25 +02001577 stack[-1] = func(*args)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001578 dispatch[REDUCE[0]] = load_reduce
Guido van Rossuma48061a1995-01-10 00:31:14 +00001579
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001580 def load_pop(self):
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001581 if self.stack:
1582 del self.stack[-1]
1583 else:
1584 self.pop_mark()
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001585 dispatch[POP[0]] = load_pop
Guido van Rossum7b5430f1995-03-04 22:25:21 +00001586
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001587 def load_pop_mark(self):
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001588 self.pop_mark()
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001589 dispatch[POP_MARK[0]] = load_pop_mark
Guido van Rossuma48061a1995-01-10 00:31:14 +00001590
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001591 def load_dup(self):
Guido van Rossumb1062fc1998-03-31 17:00:46 +00001592 self.append(self.stack[-1])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001593 dispatch[DUP[0]] = load_dup
Guido van Rossuma48061a1995-01-10 00:31:14 +00001594
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001595 def load_get(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001596 i = int(self.readline()[:-1])
1597 self.append(self.memo[i])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001598 dispatch[GET[0]] = load_get
Guido van Rossum78536471996-04-12 13:36:27 +00001599
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001600 def load_binget(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001601 i = self.read(1)[0]
1602 self.append(self.memo[i])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001603 dispatch[BINGET[0]] = load_binget
Guido van Rossum78536471996-04-12 13:36:27 +00001604
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001605 def load_long_binget(self):
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01001606 i, = unpack('<I', self.read(4))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001607 self.append(self.memo[i])
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001608 dispatch[LONG_BINGET[0]] = load_long_binget
Guido van Rossum78536471996-04-12 13:36:27 +00001609
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001610 def load_put(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001611 i = int(self.readline()[:-1])
Antoine Pitrou55549ec2011-08-30 00:27:10 +02001612 if i < 0:
1613 raise ValueError("negative PUT argument")
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001614 self.memo[i] = self.stack[-1]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001615 dispatch[PUT[0]] = load_put
Guido van Rossuma48061a1995-01-10 00:31:14 +00001616
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001617 def load_binput(self):
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001618 i = self.read(1)[0]
Antoine Pitrou55549ec2011-08-30 00:27:10 +02001619 if i < 0:
1620 raise ValueError("negative BINPUT argument")
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001621 self.memo[i] = self.stack[-1]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001622 dispatch[BINPUT[0]] = load_binput
Guido van Rossuma48061a1995-01-10 00:31:14 +00001623
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001624 def load_long_binput(self):
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01001625 i, = unpack('<I', self.read(4))
1626 if i > maxsize:
Antoine Pitrou55549ec2011-08-30 00:27:10 +02001627 raise ValueError("negative LONG_BINPUT argument")
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001628 self.memo[i] = self.stack[-1]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001629 dispatch[LONG_BINPUT[0]] = load_long_binput
Guido van Rossuma48061a1995-01-10 00:31:14 +00001630
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001631 def load_memoize(self):
1632 memo = self.memo
1633 memo[len(memo)] = self.stack[-1]
1634 dispatch[MEMOIZE[0]] = load_memoize
1635
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001636 def load_append(self):
1637 stack = self.stack
Raymond Hettinger46ac8eb2002-06-30 03:39:14 +00001638 value = stack.pop()
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001639 list = stack[-1]
1640 list.append(value)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001641 dispatch[APPEND[0]] = load_append
Guido van Rossuma48061a1995-01-10 00:31:14 +00001642
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001643 def load_appends(self):
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001644 items = self.pop_mark()
1645 list_obj = self.stack[-1]
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02001646 try:
1647 extend = list_obj.extend
1648 except AttributeError:
1649 pass
Alexandre Vassalotti1f7492c2013-04-20 13:19:46 -07001650 else:
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02001651 extend(items)
1652 return
1653 # Even if the PEP 307 requires extend() and append() methods,
1654 # fall back on append() if the object has no extend() method
1655 # for backward compatibility.
1656 append = list_obj.append
1657 for item in items:
1658 append(item)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001659 dispatch[APPENDS[0]] = load_appends
Tim Peters2344fae2001-01-15 00:50:52 +00001660
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001661 def load_setitem(self):
1662 stack = self.stack
Raymond Hettinger46ac8eb2002-06-30 03:39:14 +00001663 value = stack.pop()
1664 key = stack.pop()
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001665 dict = stack[-1]
1666 dict[key] = value
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001667 dispatch[SETITEM[0]] = load_setitem
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001668
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001669 def load_setitems(self):
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001670 items = self.pop_mark()
1671 dict = self.stack[-1]
1672 for i in range(0, len(items), 2):
1673 dict[items[i]] = items[i + 1]
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001674 dispatch[SETITEMS[0]] = load_setitems
Guido van Rossuma48061a1995-01-10 00:31:14 +00001675
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001676 def load_additems(self):
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001677 items = self.pop_mark()
1678 set_obj = self.stack[-1]
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001679 if isinstance(set_obj, set):
1680 set_obj.update(items)
1681 else:
1682 add = set_obj.add
1683 for item in items:
1684 add(item)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001685 dispatch[ADDITEMS[0]] = load_additems
1686
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001687 def load_build(self):
1688 stack = self.stack
Guido van Rossumac5b5d22003-01-28 22:01:16 +00001689 state = stack.pop()
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001690 inst = stack[-1]
Guido van Rossumac5b5d22003-01-28 22:01:16 +00001691 setstate = getattr(inst, "__setstate__", None)
Serhiy Storchakaa3e32c92013-04-14 13:37:02 +03001692 if setstate is not None:
Guido van Rossumac5b5d22003-01-28 22:01:16 +00001693 setstate(state)
1694 return
1695 slotstate = None
1696 if isinstance(state, tuple) and len(state) == 2:
1697 state, slotstate = state
1698 if state:
Alexandre Vassalottiebfecfd2009-05-25 18:50:33 +00001699 inst_dict = inst.__dict__
Antoine Pitroua9f48a02009-05-02 21:41:14 +00001700 intern = sys.intern
Alexandre Vassalottiebfecfd2009-05-25 18:50:33 +00001701 for k, v in state.items():
1702 if type(k) is str:
1703 inst_dict[intern(k)] = v
1704 else:
1705 inst_dict[k] = v
Guido van Rossumac5b5d22003-01-28 22:01:16 +00001706 if slotstate:
1707 for k, v in slotstate.items():
1708 setattr(inst, k, v)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001709 dispatch[BUILD[0]] = load_build
Guido van Rossuma48061a1995-01-10 00:31:14 +00001710
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001711 def load_mark(self):
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02001712 self.metastack.append(self.stack)
1713 self.stack = []
1714 self.append = self.stack.append
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001715 dispatch[MARK[0]] = load_mark
Guido van Rossuma48061a1995-01-10 00:31:14 +00001716
Guido van Rossumb72cf2d1997-04-09 17:32:51 +00001717 def load_stop(self):
Raymond Hettinger46ac8eb2002-06-30 03:39:14 +00001718 value = self.stack.pop()
Guido van Rossumff871742000-12-13 18:11:56 +00001719 raise _Stop(value)
Guido van Rossum2e6a4b32007-05-04 19:56:22 +00001720 dispatch[STOP[0]] = load_stop
Guido van Rossuma48061a1995-01-10 00:31:14 +00001721
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001722
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001723# Shorthands
1724
Antoine Pitrou91f43802019-05-26 17:10:09 +02001725def _dump(obj, file, protocol=None, *, fix_imports=True, buffer_callback=None):
1726 _Pickler(file, protocol, fix_imports=fix_imports,
1727 buffer_callback=buffer_callback).dump(obj)
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001728
Antoine Pitrou91f43802019-05-26 17:10:09 +02001729def _dumps(obj, protocol=None, *, fix_imports=True, buffer_callback=None):
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001730 f = io.BytesIO()
Antoine Pitrou91f43802019-05-26 17:10:09 +02001731 _Pickler(f, protocol, fix_imports=fix_imports,
1732 buffer_callback=buffer_callback).dump(obj)
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001733 res = f.getvalue()
Guido van Rossum98297ee2007-11-06 21:34:58 +00001734 assert isinstance(res, bytes_types)
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001735 return res
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001736
Antoine Pitrou91f43802019-05-26 17:10:09 +02001737def _load(file, *, fix_imports=True, encoding="ASCII", errors="strict",
1738 buffers=None):
1739 return _Unpickler(file, fix_imports=fix_imports, buffers=buffers,
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00001740 encoding=encoding, errors=errors).load()
Guido van Rossum0c891ce1995-03-14 15:09:05 +00001741
Antoine Pitrou91f43802019-05-26 17:10:09 +02001742def _loads(s, *, fix_imports=True, encoding="ASCII", errors="strict",
1743 buffers=None):
Guido van Rossumcfe5f202007-05-08 21:26:54 +00001744 if isinstance(s, str):
1745 raise TypeError("Can't load pickle from unicode string")
1746 file = io.BytesIO(s)
Antoine Pitrou91f43802019-05-26 17:10:09 +02001747 return _Unpickler(file, fix_imports=fix_imports, buffers=buffers,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001748 encoding=encoding, errors=errors).load()
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001749
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001750# Use the faster _pickle if possible
1751try:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001752 from _pickle import (
1753 PickleError,
1754 PicklingError,
1755 UnpicklingError,
1756 Pickler,
1757 Unpickler,
1758 dump,
1759 dumps,
1760 load,
1761 loads
1762 )
Brett Cannoncd171c82013-07-04 17:43:24 -04001763except ImportError:
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001764 Pickler, Unpickler = _Pickler, _Unpickler
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001765 dump, dumps, load, loads = _dump, _dumps, _load, _loads
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001766
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001767# Doctest
Guido van Rossumd6c9e632003-01-28 03:49:52 +00001768def _test():
1769 import doctest
1770 return doctest.testmod()
1771
1772if __name__ == "__main__":
Florent Xicluna54540ec2011-11-04 08:29:17 +01001773 import argparse
Alexander Belopolsky455f7bd2010-07-27 23:02:38 +00001774 parser = argparse.ArgumentParser(
1775 description='display contents of the pickle files')
1776 parser.add_argument(
1777 'pickle_file', type=argparse.FileType('br'),
1778 nargs='*', help='the pickle file')
1779 parser.add_argument(
1780 '-t', '--test', action='store_true',
1781 help='run self-test suite')
1782 parser.add_argument(
1783 '-v', action='store_true',
1784 help='run verbosely; only affects self-test run')
1785 args = parser.parse_args()
1786 if args.test:
1787 _test()
1788 else:
1789 if not args.pickle_file:
1790 parser.print_help()
1791 else:
1792 import pprint
1793 for f in args.pickle_file:
1794 obj = load(f)
1795 pprint.pprint(obj)