blob: 5a8aad9de7679d3b0848fa4afb7c05f0126e7b19 [file] [log] [blame]
Victor Stinner5c75f372019-04-17 23:02:26 +02001/* pickle accelerator C extensor: _pickle module.
2 *
3 * It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
4 * and as an extension module (Py_BUILD_CORE_MODULE define) on other
5 * platforms. */
Eric Snow2ebc5ce2017-09-07 23:51:28 -06006
Victor Stinner5c75f372019-04-17 23:02:26 +02007#if !defined(Py_BUILD_CORE_BUILTIN) && !defined(Py_BUILD_CORE_MODULE)
8# error "Py_BUILD_CORE_BUILTIN or Py_BUILD_CORE_MODULE must be defined"
Eric Snow2ebc5ce2017-09-07 23:51:28 -06009#endif
10
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000011#include "Python.h"
Victor Stinner4a21e572020-04-15 02:35:41 +020012#include "structmember.h" // PyMemberDef
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000013
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -080014PyDoc_STRVAR(pickle_module_doc,
15"Optimized C implementation for the Python pickle module.");
16
Larry Hastings61272b72014-01-07 12:41:53 -080017/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -080018module _pickle
Larry Hastingsc2047262014-01-25 20:43:29 -080019class _pickle.Pickler "PicklerObject *" "&Pickler_Type"
20class _pickle.PicklerMemoProxy "PicklerMemoProxyObject *" "&PicklerMemoProxyType"
21class _pickle.Unpickler "UnpicklerObject *" "&Unpickler_Type"
22class _pickle.UnpicklerMemoProxy "UnpicklerMemoProxyObject *" "&UnpicklerMemoProxyType"
Larry Hastings61272b72014-01-07 12:41:53 -080023[clinic start generated code]*/
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030024/*[clinic end generated code: output=da39a3ee5e6b4b0d input=4b3e113468a58e6c]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -080025
Łukasz Langac51d8c92018-04-03 23:06:53 -070026/* Bump HIGHEST_PROTOCOL when new opcodes are added to the pickle protocol.
27 Bump DEFAULT_PROTOCOL only when the oldest still supported version of Python
28 already includes it. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000029enum {
Antoine Pitrou91f43802019-05-26 17:10:09 +020030 HIGHEST_PROTOCOL = 5,
Łukasz Langac51d8c92018-04-03 23:06:53 -070031 DEFAULT_PROTOCOL = 4
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000032};
33
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000034/* Pickle opcodes. These must be kept updated with pickle.py.
35 Extensive docs are in pickletools.py. */
36enum opcode {
37 MARK = '(',
38 STOP = '.',
39 POP = '0',
40 POP_MARK = '1',
41 DUP = '2',
42 FLOAT = 'F',
43 INT = 'I',
44 BININT = 'J',
45 BININT1 = 'K',
46 LONG = 'L',
47 BININT2 = 'M',
48 NONE = 'N',
49 PERSID = 'P',
50 BINPERSID = 'Q',
51 REDUCE = 'R',
52 STRING = 'S',
53 BINSTRING = 'T',
54 SHORT_BINSTRING = 'U',
55 UNICODE = 'V',
56 BINUNICODE = 'X',
57 APPEND = 'a',
58 BUILD = 'b',
59 GLOBAL = 'c',
60 DICT = 'd',
61 EMPTY_DICT = '}',
62 APPENDS = 'e',
63 GET = 'g',
64 BINGET = 'h',
65 INST = 'i',
66 LONG_BINGET = 'j',
67 LIST = 'l',
68 EMPTY_LIST = ']',
69 OBJ = 'o',
70 PUT = 'p',
71 BINPUT = 'q',
72 LONG_BINPUT = 'r',
73 SETITEM = 's',
74 TUPLE = 't',
75 EMPTY_TUPLE = ')',
76 SETITEMS = 'u',
77 BINFLOAT = 'G',
78
79 /* Protocol 2. */
80 PROTO = '\x80',
81 NEWOBJ = '\x81',
82 EXT1 = '\x82',
83 EXT2 = '\x83',
84 EXT4 = '\x84',
85 TUPLE1 = '\x85',
86 TUPLE2 = '\x86',
87 TUPLE3 = '\x87',
88 NEWTRUE = '\x88',
89 NEWFALSE = '\x89',
90 LONG1 = '\x8a',
91 LONG4 = '\x8b',
92
93 /* Protocol 3 (Python 3.x) */
94 BINBYTES = 'B',
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +010095 SHORT_BINBYTES = 'C',
96
97 /* Protocol 4 */
98 SHORT_BINUNICODE = '\x8c',
99 BINUNICODE8 = '\x8d',
100 BINBYTES8 = '\x8e',
101 EMPTY_SET = '\x8f',
102 ADDITEMS = '\x90',
103 FROZENSET = '\x91',
104 NEWOBJ_EX = '\x92',
105 STACK_GLOBAL = '\x93',
106 MEMOIZE = '\x94',
Antoine Pitrou91f43802019-05-26 17:10:09 +0200107 FRAME = '\x95',
108
109 /* Protocol 5 */
110 BYTEARRAY8 = '\x96',
111 NEXT_BUFFER = '\x97',
112 READONLY_BUFFER = '\x98'
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000113};
114
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000115enum {
116 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
117 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
118 break if this gets out of synch with pickle.py, but it's unclear that would
119 help anything either. */
120 BATCHSIZE = 1000,
121
122 /* Nesting limit until Pickler, when running in "fast mode", starts
123 checking for self-referential data-structures. */
124 FAST_NESTING_LIMIT = 50,
125
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000126 /* Initial size of the write buffer of Pickler. */
127 WRITE_BUF_SIZE = 4096,
128
Antoine Pitrou04248a82010-10-12 20:51:21 +0000129 /* Prefetch size when unpickling (disabled on unpeekable streams) */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100130 PREFETCH = 8192 * 16,
131
Serhiy Storchaka1211c9a2018-01-20 16:42:44 +0200132 FRAME_SIZE_MIN = 4,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100133 FRAME_SIZE_TARGET = 64 * 1024,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100134 FRAME_HEADER_SIZE = 9
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000135};
136
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800137/*************************************************************************/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000138
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800139/* State of the pickle module, per PEP 3121. */
140typedef struct {
141 /* Exception classes for pickle. */
142 PyObject *PickleError;
143 PyObject *PicklingError;
144 PyObject *UnpicklingError;
Larry Hastings61272b72014-01-07 12:41:53 -0800145
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800146 /* copyreg.dispatch_table, {type_object: pickling_function} */
147 PyObject *dispatch_table;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000148
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800149 /* For the extension opcodes EXT1, EXT2 and EXT4. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000150
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800151 /* copyreg._extension_registry, {(module_name, function_name): code} */
152 PyObject *extension_registry;
153 /* copyreg._extension_cache, {code: object} */
154 PyObject *extension_cache;
155 /* copyreg._inverted_registry, {code: (module_name, function_name)} */
156 PyObject *inverted_registry;
157
158 /* Import mappings for compatibility with Python 2.x */
159
160 /* _compat_pickle.NAME_MAPPING,
161 {(oldmodule, oldname): (newmodule, newname)} */
162 PyObject *name_mapping_2to3;
163 /* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
164 PyObject *import_mapping_2to3;
165 /* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
166 PyObject *name_mapping_3to2;
167 PyObject *import_mapping_3to2;
168
169 /* codecs.encode, used for saving bytes in older protocols */
170 PyObject *codecs_encode;
Serhiy Storchaka58e41342015-03-31 14:07:24 +0300171 /* builtins.getattr, used for saving nested names with protocol < 4 */
172 PyObject *getattr;
Serhiy Storchaka0d554d72015-10-10 22:42:18 +0300173 /* functools.partial, used for implementing __newobj_ex__ with protocols
174 2 and 3 */
175 PyObject *partial;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800176} PickleState;
177
178/* Forward declaration of the _pickle module definition. */
179static struct PyModuleDef _picklemodule;
180
181/* Given a module object, get its per-module state. */
182static PickleState *
183_Pickle_GetState(PyObject *module)
184{
185 return (PickleState *)PyModule_GetState(module);
186}
187
188/* Find the module instance imported in the currently running sub-interpreter
189 and get its state. */
190static PickleState *
191_Pickle_GetGlobalState(void)
192{
193 return _Pickle_GetState(PyState_FindModule(&_picklemodule));
194}
195
196/* Clear the given pickle module state. */
197static void
198_Pickle_ClearState(PickleState *st)
199{
200 Py_CLEAR(st->PickleError);
201 Py_CLEAR(st->PicklingError);
202 Py_CLEAR(st->UnpicklingError);
203 Py_CLEAR(st->dispatch_table);
204 Py_CLEAR(st->extension_registry);
205 Py_CLEAR(st->extension_cache);
206 Py_CLEAR(st->inverted_registry);
207 Py_CLEAR(st->name_mapping_2to3);
208 Py_CLEAR(st->import_mapping_2to3);
209 Py_CLEAR(st->name_mapping_3to2);
210 Py_CLEAR(st->import_mapping_3to2);
211 Py_CLEAR(st->codecs_encode);
Serhiy Storchaka58e41342015-03-31 14:07:24 +0300212 Py_CLEAR(st->getattr);
Victor Stinner9ba97df2015-11-17 12:15:07 +0100213 Py_CLEAR(st->partial);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800214}
215
216/* Initialize the given pickle module state. */
217static int
218_Pickle_InitState(PickleState *st)
219{
220 PyObject *copyreg = NULL;
221 PyObject *compat_pickle = NULL;
222 PyObject *codecs = NULL;
Serhiy Storchaka0d554d72015-10-10 22:42:18 +0300223 PyObject *functools = NULL;
Serhiy Storchakabb86bf42018-12-11 08:28:18 +0200224 _Py_IDENTIFIER(getattr);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800225
Serhiy Storchakabb86bf42018-12-11 08:28:18 +0200226 st->getattr = _PyEval_GetBuiltinId(&PyId_getattr);
Serhiy Storchaka58e41342015-03-31 14:07:24 +0300227 if (st->getattr == NULL)
228 goto error;
Serhiy Storchaka58e41342015-03-31 14:07:24 +0300229
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800230 copyreg = PyImport_ImportModule("copyreg");
231 if (!copyreg)
232 goto error;
233 st->dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
234 if (!st->dispatch_table)
235 goto error;
236 if (!PyDict_CheckExact(st->dispatch_table)) {
237 PyErr_Format(PyExc_RuntimeError,
238 "copyreg.dispatch_table should be a dict, not %.200s",
239 Py_TYPE(st->dispatch_table)->tp_name);
240 goto error;
241 }
242 st->extension_registry = \
243 PyObject_GetAttrString(copyreg, "_extension_registry");
244 if (!st->extension_registry)
245 goto error;
246 if (!PyDict_CheckExact(st->extension_registry)) {
247 PyErr_Format(PyExc_RuntimeError,
248 "copyreg._extension_registry should be a dict, "
249 "not %.200s", Py_TYPE(st->extension_registry)->tp_name);
250 goto error;
251 }
252 st->inverted_registry = \
253 PyObject_GetAttrString(copyreg, "_inverted_registry");
254 if (!st->inverted_registry)
255 goto error;
256 if (!PyDict_CheckExact(st->inverted_registry)) {
257 PyErr_Format(PyExc_RuntimeError,
258 "copyreg._inverted_registry should be a dict, "
259 "not %.200s", Py_TYPE(st->inverted_registry)->tp_name);
260 goto error;
261 }
262 st->extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
263 if (!st->extension_cache)
264 goto error;
265 if (!PyDict_CheckExact(st->extension_cache)) {
266 PyErr_Format(PyExc_RuntimeError,
267 "copyreg._extension_cache should be a dict, "
268 "not %.200s", Py_TYPE(st->extension_cache)->tp_name);
269 goto error;
270 }
271 Py_CLEAR(copyreg);
272
273 /* Load the 2.x -> 3.x stdlib module mapping tables */
274 compat_pickle = PyImport_ImportModule("_compat_pickle");
275 if (!compat_pickle)
276 goto error;
277 st->name_mapping_2to3 = \
278 PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
279 if (!st->name_mapping_2to3)
280 goto error;
281 if (!PyDict_CheckExact(st->name_mapping_2to3)) {
282 PyErr_Format(PyExc_RuntimeError,
283 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
284 Py_TYPE(st->name_mapping_2to3)->tp_name);
285 goto error;
286 }
287 st->import_mapping_2to3 = \
288 PyObject_GetAttrString(compat_pickle, "IMPORT_MAPPING");
289 if (!st->import_mapping_2to3)
290 goto error;
291 if (!PyDict_CheckExact(st->import_mapping_2to3)) {
292 PyErr_Format(PyExc_RuntimeError,
293 "_compat_pickle.IMPORT_MAPPING should be a dict, "
294 "not %.200s", Py_TYPE(st->import_mapping_2to3)->tp_name);
295 goto error;
296 }
297 /* ... and the 3.x -> 2.x mapping tables */
298 st->name_mapping_3to2 = \
299 PyObject_GetAttrString(compat_pickle, "REVERSE_NAME_MAPPING");
300 if (!st->name_mapping_3to2)
301 goto error;
302 if (!PyDict_CheckExact(st->name_mapping_3to2)) {
303 PyErr_Format(PyExc_RuntimeError,
304 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
305 "not %.200s", Py_TYPE(st->name_mapping_3to2)->tp_name);
306 goto error;
307 }
308 st->import_mapping_3to2 = \
309 PyObject_GetAttrString(compat_pickle, "REVERSE_IMPORT_MAPPING");
310 if (!st->import_mapping_3to2)
311 goto error;
312 if (!PyDict_CheckExact(st->import_mapping_3to2)) {
313 PyErr_Format(PyExc_RuntimeError,
314 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
315 "not %.200s", Py_TYPE(st->import_mapping_3to2)->tp_name);
316 goto error;
317 }
318 Py_CLEAR(compat_pickle);
319
320 codecs = PyImport_ImportModule("codecs");
321 if (codecs == NULL)
322 goto error;
323 st->codecs_encode = PyObject_GetAttrString(codecs, "encode");
324 if (st->codecs_encode == NULL) {
325 goto error;
326 }
327 if (!PyCallable_Check(st->codecs_encode)) {
328 PyErr_Format(PyExc_RuntimeError,
329 "codecs.encode should be a callable, not %.200s",
330 Py_TYPE(st->codecs_encode)->tp_name);
331 goto error;
332 }
333 Py_CLEAR(codecs);
334
Serhiy Storchaka0d554d72015-10-10 22:42:18 +0300335 functools = PyImport_ImportModule("functools");
336 if (!functools)
337 goto error;
338 st->partial = PyObject_GetAttrString(functools, "partial");
339 if (!st->partial)
340 goto error;
341 Py_CLEAR(functools);
342
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800343 return 0;
344
345 error:
346 Py_CLEAR(copyreg);
347 Py_CLEAR(compat_pickle);
348 Py_CLEAR(codecs);
Serhiy Storchaka0d554d72015-10-10 22:42:18 +0300349 Py_CLEAR(functools);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800350 _Pickle_ClearState(st);
351 return -1;
352}
353
354/* Helper for calling a function with a single argument quickly.
355
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800356 This function steals the reference of the given argument. */
357static PyObject *
358_Pickle_FastCall(PyObject *func, PyObject *obj)
359{
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800360 PyObject *result;
361
Petr Viktorinffd97532020-02-11 17:46:57 +0100362 result = PyObject_CallOneArg(func, obj);
Victor Stinner75210692016-08-19 18:59:15 +0200363 Py_DECREF(obj);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800364 return result;
365}
366
367/*************************************************************************/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000368
Serhiy Storchaka986375e2017-11-30 22:48:31 +0200369/* Retrieve and deconstruct a method for avoiding a reference cycle
370 (pickler -> bound method of pickler -> pickler) */
371static int
372init_method_ref(PyObject *self, _Py_Identifier *name,
373 PyObject **method_func, PyObject **method_self)
374{
375 PyObject *func, *func2;
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200376 int ret;
Serhiy Storchaka986375e2017-11-30 22:48:31 +0200377
378 /* *method_func and *method_self should be consistent. All refcount decrements
379 should be occurred after setting *method_self and *method_func. */
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200380 ret = _PyObject_LookupAttrId(self, name, &func);
Serhiy Storchaka986375e2017-11-30 22:48:31 +0200381 if (func == NULL) {
382 *method_self = NULL;
383 Py_CLEAR(*method_func);
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200384 return ret;
Serhiy Storchaka986375e2017-11-30 22:48:31 +0200385 }
386
387 if (PyMethod_Check(func) && PyMethod_GET_SELF(func) == self) {
388 /* Deconstruct a bound Python method */
389 func2 = PyMethod_GET_FUNCTION(func);
390 Py_INCREF(func2);
391 *method_self = self; /* borrowed */
392 Py_XSETREF(*method_func, func2);
393 Py_DECREF(func);
394 return 0;
395 }
396 else {
397 *method_self = NULL;
398 Py_XSETREF(*method_func, func);
399 return 0;
400 }
401}
402
403/* Bind a method if it was deconstructed */
404static PyObject *
405reconstruct_method(PyObject *func, PyObject *self)
406{
407 if (self) {
408 return PyMethod_New(func, self);
409 }
410 else {
411 Py_INCREF(func);
412 return func;
413 }
414}
415
416static PyObject *
417call_method(PyObject *func, PyObject *self, PyObject *obj)
418{
419 if (self) {
420 return PyObject_CallFunctionObjArgs(func, self, obj, NULL);
421 }
422 else {
Petr Viktorinffd97532020-02-11 17:46:57 +0100423 return PyObject_CallOneArg(func, obj);
Serhiy Storchaka986375e2017-11-30 22:48:31 +0200424 }
425}
426
427/*************************************************************************/
428
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000429/* Internal data type used as the unpickling stack. */
430typedef struct {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000431 PyObject_VAR_HEAD
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000432 PyObject **data;
Serhiy Storchaka59fb6342015-12-06 22:01:35 +0200433 int mark_set; /* is MARK set? */
434 Py_ssize_t fence; /* position of top MARK or 0 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000435 Py_ssize_t allocated; /* number of slots in data allocated */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000436} Pdata;
437
438static void
439Pdata_dealloc(Pdata *self)
440{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200441 Py_ssize_t i = Py_SIZE(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000442 while (--i >= 0) {
443 Py_DECREF(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000444 }
Victor Stinner00d7abd2020-12-01 09:56:42 +0100445 PyMem_Free(self->data);
Victor Stinner32bd68c2020-12-01 10:37:39 +0100446 PyObject_Free(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000447}
448
449static PyTypeObject Pdata_Type = {
450 PyVarObject_HEAD_INIT(NULL, 0)
451 "_pickle.Pdata", /*tp_name*/
452 sizeof(Pdata), /*tp_basicsize*/
Serhiy Storchaka5bbd2312014-12-16 19:39:08 +0200453 sizeof(PyObject *), /*tp_itemsize*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000454 (destructor)Pdata_dealloc, /*tp_dealloc*/
455};
456
457static PyObject *
458Pdata_New(void)
459{
460 Pdata *self;
461
462 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
463 return NULL;
Victor Stinner60ac6ed2020-02-07 23:18:08 +0100464 Py_SET_SIZE(self, 0);
Serhiy Storchaka59fb6342015-12-06 22:01:35 +0200465 self->mark_set = 0;
466 self->fence = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000467 self->allocated = 8;
Victor Stinner00d7abd2020-12-01 09:56:42 +0100468 self->data = PyMem_Malloc(self->allocated * sizeof(PyObject *));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000469 if (self->data)
470 return (PyObject *)self;
471 Py_DECREF(self);
472 return PyErr_NoMemory();
473}
474
475
476/* Retain only the initial clearto items. If clearto >= the current
477 * number of items, this is a (non-erroneous) NOP.
478 */
479static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200480Pdata_clear(Pdata *self, Py_ssize_t clearto)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000481{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200482 Py_ssize_t i = Py_SIZE(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000483
Serhiy Storchaka59fb6342015-12-06 22:01:35 +0200484 assert(clearto >= self->fence);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000485 if (clearto >= i)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000486 return 0;
487
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000488 while (--i >= clearto) {
489 Py_CLEAR(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000490 }
Victor Stinner60ac6ed2020-02-07 23:18:08 +0100491 Py_SET_SIZE(self, clearto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000492 return 0;
493}
494
495static int
496Pdata_grow(Pdata *self)
497{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000498 PyObject **data = self->data;
Victor Stinnerf13c46c2014-08-17 21:05:55 +0200499 size_t allocated = (size_t)self->allocated;
500 size_t new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000501
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000502 new_allocated = (allocated >> 3) + 6;
503 /* check for integer overflow */
Victor Stinnerf13c46c2014-08-17 21:05:55 +0200504 if (new_allocated > (size_t)PY_SSIZE_T_MAX - allocated)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000505 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000506 new_allocated += allocated;
Benjamin Peterson59b08c12015-06-27 13:41:33 -0500507 PyMem_RESIZE(data, PyObject *, new_allocated);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000508 if (data == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000509 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000510
511 self->data = data;
Victor Stinnerf13c46c2014-08-17 21:05:55 +0200512 self->allocated = (Py_ssize_t)new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000513 return 0;
514
515 nomemory:
516 PyErr_NoMemory();
517 return -1;
518}
519
Serhiy Storchaka59fb6342015-12-06 22:01:35 +0200520static int
521Pdata_stack_underflow(Pdata *self)
522{
523 PickleState *st = _Pickle_GetGlobalState();
524 PyErr_SetString(st->UnpicklingError,
525 self->mark_set ?
526 "unexpected MARK found" :
527 "unpickling stack underflow");
528 return -1;
529}
530
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000531/* D is a Pdata*. Pop the topmost element and store it into V, which
532 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
533 * is raised and V is set to NULL.
534 */
535static PyObject *
536Pdata_pop(Pdata *self)
537{
Serhiy Storchaka59fb6342015-12-06 22:01:35 +0200538 if (Py_SIZE(self) <= self->fence) {
539 Pdata_stack_underflow(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000540 return NULL;
541 }
Victor Stinner60ac6ed2020-02-07 23:18:08 +0100542 Py_SET_SIZE(self, Py_SIZE(self) - 1);
543 return self->data[Py_SIZE(self)];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000544}
545#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
546
547static int
548Pdata_push(Pdata *self, PyObject *obj)
549{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000550 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000551 return -1;
552 }
Victor Stinner60ac6ed2020-02-07 23:18:08 +0100553 self->data[Py_SIZE(self)] = obj;
554 Py_SET_SIZE(self, Py_SIZE(self) + 1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000555 return 0;
556}
557
558/* Push an object on stack, transferring its ownership to the stack. */
559#define PDATA_PUSH(D, O, ER) do { \
560 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
561
562/* Push an object on stack, adding a new reference to the object. */
563#define PDATA_APPEND(D, O, ER) do { \
564 Py_INCREF((O)); \
565 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
566
567static PyObject *
568Pdata_poptuple(Pdata *self, Py_ssize_t start)
569{
570 PyObject *tuple;
571 Py_ssize_t len, i, j;
572
Serhiy Storchaka59fb6342015-12-06 22:01:35 +0200573 if (start < self->fence) {
574 Pdata_stack_underflow(self);
575 return NULL;
576 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000577 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000578 tuple = PyTuple_New(len);
579 if (tuple == NULL)
580 return NULL;
581 for (i = start, j = 0; j < len; i++, j++)
582 PyTuple_SET_ITEM(tuple, j, self->data[i]);
583
Victor Stinner60ac6ed2020-02-07 23:18:08 +0100584 Py_SET_SIZE(self, start);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000585 return tuple;
586}
587
588static PyObject *
589Pdata_poplist(Pdata *self, Py_ssize_t start)
590{
591 PyObject *list;
592 Py_ssize_t len, i, j;
593
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000594 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000595 list = PyList_New(len);
596 if (list == NULL)
597 return NULL;
598 for (i = start, j = 0; j < len; i++, j++)
599 PyList_SET_ITEM(list, j, self->data[i]);
600
Victor Stinner60ac6ed2020-02-07 23:18:08 +0100601 Py_SET_SIZE(self, start);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000602 return list;
603}
604
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000605typedef struct {
606 PyObject *me_key;
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200607 Py_ssize_t me_value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000608} PyMemoEntry;
609
610typedef struct {
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700611 size_t mt_mask;
612 size_t mt_used;
613 size_t mt_allocated;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000614 PyMemoEntry *mt_table;
615} PyMemoTable;
616
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000617typedef struct PicklerObject {
618 PyObject_HEAD
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000619 PyMemoTable *memo; /* Memo table, keep track of the seen
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000620 objects to support self-referential objects
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000621 pickling. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000622 PyObject *pers_func; /* persistent_id() method, can be NULL */
Serhiy Storchaka986375e2017-11-30 22:48:31 +0200623 PyObject *pers_func_self; /* borrowed reference to self if pers_func
624 is an unbound method, NULL otherwise */
Antoine Pitrou8d3c2902012-03-04 18:31:48 +0100625 PyObject *dispatch_table; /* private dispatch_table, can be NULL */
Pierre Glaser289f1f82019-05-08 23:08:25 +0200626 PyObject *reducer_override; /* hook for invoking user-defined callbacks
627 instead of save_global when pickling
628 functions and classes*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000629
630 PyObject *write; /* write() method of the output stream. */
631 PyObject *output_buffer; /* Write into a local bytearray buffer before
632 flushing to the stream. */
633 Py_ssize_t output_len; /* Length of output_buffer. */
634 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000635 int proto; /* Pickle protocol number, >= 0 */
636 int bin; /* Boolean, true if proto > 0 */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100637 int framing; /* True when framing is enabled, proto >= 4 */
638 Py_ssize_t frame_start; /* Position in output_buffer where the
Martin Pantera90a4a92016-05-30 04:04:50 +0000639 current frame begins. -1 if there
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100640 is no frame currently open. */
641
642 Py_ssize_t buf_size; /* Size of the current buffered pickle data */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000643 int fast; /* Enable fast mode if set to a true value.
644 The fast mode disable the usage of memo,
645 therefore speeding the pickling process by
646 not generating superfluous PUT opcodes. It
647 should not be used if with self-referential
648 objects. */
649 int fast_nesting;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000650 int fix_imports; /* Indicate whether Pickler should fix
651 the name of globals for Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000652 PyObject *fast_memo;
Antoine Pitrou91f43802019-05-26 17:10:09 +0200653 PyObject *buffer_callback; /* Callback for out-of-band buffers, or NULL */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000654} PicklerObject;
655
656typedef struct UnpicklerObject {
657 PyObject_HEAD
658 Pdata *stack; /* Pickle data stack, store unpickled objects. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000659
660 /* The unpickler memo is just an array of PyObject *s. Using a dict
661 is unnecessary, since the keys are contiguous ints. */
662 PyObject **memo;
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700663 size_t memo_size; /* Capacity of the memo array */
664 size_t memo_len; /* Number of objects in the memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000665
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000666 PyObject *pers_func; /* persistent_load() method, can be NULL. */
Serhiy Storchaka986375e2017-11-30 22:48:31 +0200667 PyObject *pers_func_self; /* borrowed reference to self if pers_func
668 is an unbound method, NULL otherwise */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000669
670 Py_buffer buffer;
671 char *input_buffer;
672 char *input_line;
673 Py_ssize_t input_len;
674 Py_ssize_t next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000675 Py_ssize_t prefetched_idx; /* index of first prefetched byte */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100676
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000677 PyObject *read; /* read() method of the input stream. */
Antoine Pitrou91f43802019-05-26 17:10:09 +0200678 PyObject *readinto; /* readinto() method of the input stream. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000679 PyObject *readline; /* readline() method of the input stream. */
Antoine Pitrou04248a82010-10-12 20:51:21 +0000680 PyObject *peek; /* peek() method of the input stream, or NULL */
Antoine Pitrou91f43802019-05-26 17:10:09 +0200681 PyObject *buffers; /* iterable of out-of-band buffers, or NULL */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000682
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000683 char *encoding; /* Name of the encoding to be used for
684 decoding strings pickled using Python
685 2.x. The default value is "ASCII" */
686 char *errors; /* Name of errors handling scheme to used when
687 decoding strings. The default value is
688 "strict". */
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -0500689 Py_ssize_t *marks; /* Mark stack, used for unpickling container
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000690 objects. */
691 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
692 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000693 int proto; /* Protocol of the pickle loaded. */
694 int fix_imports; /* Indicate whether Unpickler should fix
695 the name of globals pickled by Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000696} UnpicklerObject;
697
Serhiy Storchaka3c1f0f12014-01-27 10:34:22 +0200698typedef struct {
699 PyObject_HEAD
700 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
701} PicklerMemoProxyObject;
702
703typedef struct {
704 PyObject_HEAD
705 UnpicklerObject *unpickler;
706} UnpicklerMemoProxyObject;
707
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000708/* Forward declarations */
709static int save(PicklerObject *, PyObject *, int);
710static int save_reduce(PicklerObject *, PyObject *, PyObject *);
711static PyTypeObject Pickler_Type;
712static PyTypeObject Unpickler_Type;
713
Serhiy Storchaka3c1f0f12014-01-27 10:34:22 +0200714#include "clinic/_pickle.c.h"
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000715
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000716/*************************************************************************
Serhiy Storchaka95949422013-08-27 19:40:23 +0300717 A custom hashtable mapping void* to Python ints. This is used by the pickler
718 for memoization. Using a custom hashtable rather than PyDict allows us to skip
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000719 a bunch of unnecessary object creation. This makes a huge performance
720 difference. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000721
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000722#define MT_MINSIZE 8
723#define PERTURB_SHIFT 5
724
725
726static PyMemoTable *
727PyMemoTable_New(void)
728{
Victor Stinner00d7abd2020-12-01 09:56:42 +0100729 PyMemoTable *memo = PyMem_Malloc(sizeof(PyMemoTable));
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000730 if (memo == NULL) {
731 PyErr_NoMemory();
732 return NULL;
733 }
734
735 memo->mt_used = 0;
736 memo->mt_allocated = MT_MINSIZE;
737 memo->mt_mask = MT_MINSIZE - 1;
Victor Stinner00d7abd2020-12-01 09:56:42 +0100738 memo->mt_table = PyMem_Malloc(MT_MINSIZE * sizeof(PyMemoEntry));
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000739 if (memo->mt_table == NULL) {
Victor Stinner00d7abd2020-12-01 09:56:42 +0100740 PyMem_Free(memo);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000741 PyErr_NoMemory();
742 return NULL;
743 }
744 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
745
746 return memo;
747}
748
749static PyMemoTable *
750PyMemoTable_Copy(PyMemoTable *self)
751{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000752 PyMemoTable *new = PyMemoTable_New();
753 if (new == NULL)
754 return NULL;
755
756 new->mt_used = self->mt_used;
757 new->mt_allocated = self->mt_allocated;
758 new->mt_mask = self->mt_mask;
759 /* The table we get from _New() is probably smaller than we wanted.
760 Free it and allocate one that's the right size. */
Victor Stinner00d7abd2020-12-01 09:56:42 +0100761 PyMem_Free(new->mt_table);
Benjamin Peterson59b08c12015-06-27 13:41:33 -0500762 new->mt_table = PyMem_NEW(PyMemoEntry, self->mt_allocated);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000763 if (new->mt_table == NULL) {
Victor Stinner00d7abd2020-12-01 09:56:42 +0100764 PyMem_Free(new);
Victor Stinner42024562013-07-12 00:53:57 +0200765 PyErr_NoMemory();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000766 return NULL;
767 }
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700768 for (size_t i = 0; i < self->mt_allocated; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000769 Py_XINCREF(self->mt_table[i].me_key);
770 }
771 memcpy(new->mt_table, self->mt_table,
772 sizeof(PyMemoEntry) * self->mt_allocated);
773
774 return new;
775}
776
777static Py_ssize_t
778PyMemoTable_Size(PyMemoTable *self)
779{
780 return self->mt_used;
781}
782
783static int
784PyMemoTable_Clear(PyMemoTable *self)
785{
786 Py_ssize_t i = self->mt_allocated;
787
788 while (--i >= 0) {
789 Py_XDECREF(self->mt_table[i].me_key);
790 }
791 self->mt_used = 0;
792 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
793 return 0;
794}
795
796static void
797PyMemoTable_Del(PyMemoTable *self)
798{
799 if (self == NULL)
800 return;
801 PyMemoTable_Clear(self);
802
Victor Stinner00d7abd2020-12-01 09:56:42 +0100803 PyMem_Free(self->mt_table);
804 PyMem_Free(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000805}
806
807/* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
808 can be considerably simpler than dictobject.c's lookdict(). */
809static PyMemoEntry *
810_PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
811{
812 size_t i;
813 size_t perturb;
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700814 size_t mask = self->mt_mask;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000815 PyMemoEntry *table = self->mt_table;
816 PyMemoEntry *entry;
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000817 Py_hash_t hash = (Py_hash_t)key >> 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000818
819 i = hash & mask;
820 entry = &table[i];
821 if (entry->me_key == NULL || entry->me_key == key)
822 return entry;
823
824 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
825 i = (i << 2) + i + perturb + 1;
826 entry = &table[i & mask];
827 if (entry->me_key == NULL || entry->me_key == key)
828 return entry;
829 }
Barry Warsawb2e57942017-09-14 18:13:16 -0700830 Py_UNREACHABLE();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000831}
832
833/* Returns -1 on failure, 0 on success. */
834static int
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700835_PyMemoTable_ResizeTable(PyMemoTable *self, size_t min_size)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000836{
837 PyMemoEntry *oldtable = NULL;
838 PyMemoEntry *oldentry, *newentry;
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700839 size_t new_size = MT_MINSIZE;
840 size_t to_process;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000841
842 assert(min_size > 0);
843
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700844 if (min_size > PY_SSIZE_T_MAX) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000845 PyErr_NoMemory();
846 return -1;
847 }
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700848
849 /* Find the smallest valid table size >= min_size. */
850 while (new_size < min_size) {
851 new_size <<= 1;
852 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000853 /* new_size needs to be a power of two. */
854 assert((new_size & (new_size - 1)) == 0);
855
856 /* Allocate new table. */
857 oldtable = self->mt_table;
Benjamin Peterson59b08c12015-06-27 13:41:33 -0500858 self->mt_table = PyMem_NEW(PyMemoEntry, new_size);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000859 if (self->mt_table == NULL) {
Victor Stinner8ca72e22013-07-12 00:53:26 +0200860 self->mt_table = oldtable;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000861 PyErr_NoMemory();
862 return -1;
863 }
864 self->mt_allocated = new_size;
865 self->mt_mask = new_size - 1;
866 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
867
868 /* Copy entries from the old table. */
869 to_process = self->mt_used;
870 for (oldentry = oldtable; to_process > 0; oldentry++) {
871 if (oldentry->me_key != NULL) {
872 to_process--;
873 /* newentry is a pointer to a chunk of the new
874 mt_table, so we're setting the key:value pair
875 in-place. */
876 newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
877 newentry->me_key = oldentry->me_key;
878 newentry->me_value = oldentry->me_value;
879 }
880 }
881
882 /* Deallocate the old table. */
Victor Stinner00d7abd2020-12-01 09:56:42 +0100883 PyMem_Free(oldtable);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000884 return 0;
885}
886
887/* Returns NULL on failure, a pointer to the value otherwise. */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200888static Py_ssize_t *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000889PyMemoTable_Get(PyMemoTable *self, PyObject *key)
890{
891 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
892 if (entry->me_key == NULL)
893 return NULL;
894 return &entry->me_value;
895}
896
897/* Returns -1 on failure, 0 on success. */
898static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200899PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000900{
901 PyMemoEntry *entry;
902
903 assert(key != NULL);
904
905 entry = _PyMemoTable_Lookup(self, key);
906 if (entry->me_key != NULL) {
907 entry->me_value = value;
908 return 0;
909 }
910 Py_INCREF(key);
911 entry->me_key = key;
912 entry->me_value = value;
913 self->mt_used++;
914
915 /* If we added a key, we can safely resize. Otherwise just return!
916 * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
917 *
918 * Quadrupling the size improves average table sparseness
919 * (reducing collisions) at the cost of some memory. It also halves
920 * the number of expensive resize operations in a growing memo table.
921 *
922 * Very large memo tables (over 50K items) use doubling instead.
923 * This may help applications with severe memory constraints.
924 */
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700925 if (SIZE_MAX / 3 >= self->mt_used && self->mt_used * 3 < self->mt_allocated * 2) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000926 return 0;
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700927 }
928 // self->mt_used is always < PY_SSIZE_T_MAX, so this can't overflow.
929 size_t desired_size = (self->mt_used > 50000 ? 2 : 4) * self->mt_used;
930 return _PyMemoTable_ResizeTable(self, desired_size);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000931}
932
933#undef MT_MINSIZE
934#undef PERTURB_SHIFT
935
936/*************************************************************************/
937
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000938
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000939static int
940_Pickler_ClearBuffer(PicklerObject *self)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000941{
Serhiy Storchaka48842712016-04-06 09:45:48 +0300942 Py_XSETREF(self->output_buffer,
Serhiy Storchaka4a1e70f2015-12-27 12:36:18 +0200943 PyBytes_FromStringAndSize(NULL, self->max_output_len));
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000944 if (self->output_buffer == NULL)
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000945 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000946 self->output_len = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100947 self->frame_start = -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000948 return 0;
949}
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000950
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100951static void
Antoine Pitrou8f2ee6e2013-11-23 21:05:08 +0100952_write_size64(char *out, size_t value)
953{
Victor Stinnerf13c46c2014-08-17 21:05:55 +0200954 size_t i;
Alexandre Vassalotti1048fb52013-11-25 11:35:46 -0800955
Serhiy Storchakafad85aa2015-11-07 15:42:38 +0200956 Py_BUILD_ASSERT(sizeof(size_t) <= 8);
Alexandre Vassalotti1048fb52013-11-25 11:35:46 -0800957
958 for (i = 0; i < sizeof(size_t); i++) {
959 out[i] = (unsigned char)((value >> (8 * i)) & 0xff);
960 }
961 for (i = sizeof(size_t); i < 8; i++) {
962 out[i] = 0;
Alexandre Vassalottided929b2013-11-24 22:41:13 -0800963 }
Antoine Pitrou8f2ee6e2013-11-23 21:05:08 +0100964}
965
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100966static int
967_Pickler_CommitFrame(PicklerObject *self)
968{
969 size_t frame_len;
970 char *qdata;
971
972 if (!self->framing || self->frame_start == -1)
973 return 0;
974 frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
975 qdata = PyBytes_AS_STRING(self->output_buffer) + self->frame_start;
Serhiy Storchaka1211c9a2018-01-20 16:42:44 +0200976 if (frame_len >= FRAME_SIZE_MIN) {
977 qdata[0] = FRAME;
978 _write_size64(qdata + 1, frame_len);
979 }
980 else {
981 memmove(qdata, qdata + FRAME_HEADER_SIZE, frame_len);
982 self->output_len -= FRAME_HEADER_SIZE;
983 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100984 self->frame_start = -1;
985 return 0;
986}
987
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000988static PyObject *
989_Pickler_GetString(PicklerObject *self)
990{
991 PyObject *output_buffer = self->output_buffer;
992
993 assert(self->output_buffer != NULL);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100994
995 if (_Pickler_CommitFrame(self))
996 return NULL;
997
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000998 self->output_buffer = NULL;
999 /* Resize down to exact size */
1000 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
1001 return NULL;
1002 return output_buffer;
1003}
1004
1005static int
1006_Pickler_FlushToFile(PicklerObject *self)
1007{
1008 PyObject *output, *result;
1009
1010 assert(self->write != NULL);
1011
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001012 /* This will commit the frame first */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001013 output = _Pickler_GetString(self);
1014 if (output == NULL)
1015 return -1;
1016
Alexandre Vassalotti20c28c12013-11-27 02:26:54 -08001017 result = _Pickle_FastCall(self->write, output);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001018 Py_XDECREF(result);
1019 return (result == NULL) ? -1 : 0;
1020}
1021
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01001022static int
1023_Pickler_OpcodeBoundary(PicklerObject *self)
1024{
1025 Py_ssize_t frame_len;
1026
1027 if (!self->framing || self->frame_start == -1) {
1028 return 0;
1029 }
1030 frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
1031 if (frame_len >= FRAME_SIZE_TARGET) {
1032 if(_Pickler_CommitFrame(self)) {
1033 return -1;
1034 }
Leo Ariasc3d95082018-02-03 18:36:10 -06001035 /* Flush the content of the committed frame to the underlying
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01001036 * file and reuse the pickler buffer for the next frame so as
1037 * to limit memory usage when dumping large complex objects to
1038 * a file.
1039 *
1040 * self->write is NULL when called via dumps.
1041 */
1042 if (self->write != NULL) {
1043 if (_Pickler_FlushToFile(self) < 0) {
1044 return -1;
1045 }
1046 if (_Pickler_ClearBuffer(self) < 0) {
1047 return -1;
1048 }
1049 }
1050 }
1051 return 0;
1052}
1053
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001054static Py_ssize_t
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001055_Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t data_len)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001056{
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001057 Py_ssize_t i, n, required;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001058 char *buffer;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001059 int need_new_frame;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001060
1061 assert(s != NULL);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001062 need_new_frame = (self->framing && self->frame_start == -1);
1063
1064 if (need_new_frame)
1065 n = data_len + FRAME_HEADER_SIZE;
1066 else
1067 n = data_len;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001068
1069 required = self->output_len + n;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001070 if (required > self->max_output_len) {
1071 /* Make place in buffer for the pickle chunk */
1072 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
1073 PyErr_NoMemory();
1074 return -1;
1075 }
1076 self->max_output_len = (self->output_len + n) / 2 * 3;
1077 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
1078 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001079 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001080 buffer = PyBytes_AS_STRING(self->output_buffer);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001081 if (need_new_frame) {
1082 /* Setup new frame */
1083 Py_ssize_t frame_start = self->output_len;
1084 self->frame_start = frame_start;
1085 for (i = 0; i < FRAME_HEADER_SIZE; i++) {
1086 /* Write an invalid value, for debugging */
1087 buffer[frame_start + i] = 0xFE;
1088 }
1089 self->output_len += FRAME_HEADER_SIZE;
1090 }
1091 if (data_len < 8) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001092 /* This is faster than memcpy when the string is short. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001093 for (i = 0; i < data_len; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001094 buffer[self->output_len + i] = s[i];
1095 }
1096 }
1097 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001098 memcpy(buffer + self->output_len, s, data_len);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001099 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001100 self->output_len += data_len;
1101 return data_len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001102}
1103
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001104static PicklerObject *
1105_Pickler_New(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001106{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001107 PicklerObject *self;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001108
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001109 self = PyObject_GC_New(PicklerObject, &Pickler_Type);
1110 if (self == NULL)
1111 return NULL;
1112
1113 self->pers_func = NULL;
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01001114 self->dispatch_table = NULL;
Antoine Pitrou91f43802019-05-26 17:10:09 +02001115 self->buffer_callback = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001116 self->write = NULL;
1117 self->proto = 0;
1118 self->bin = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001119 self->framing = 0;
1120 self->frame_start = -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001121 self->fast = 0;
1122 self->fast_nesting = 0;
1123 self->fix_imports = 0;
1124 self->fast_memo = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001125 self->max_output_len = WRITE_BUF_SIZE;
1126 self->output_len = 0;
Pierre Glaser289f1f82019-05-08 23:08:25 +02001127 self->reducer_override = NULL;
Victor Stinner68c8ea22013-07-11 22:56:25 +02001128
1129 self->memo = PyMemoTable_New();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001130 self->output_buffer = PyBytes_FromStringAndSize(NULL,
1131 self->max_output_len);
Victor Stinner68c8ea22013-07-11 22:56:25 +02001132
1133 if (self->memo == NULL || self->output_buffer == NULL) {
Victor Stinnerc31df042013-07-12 00:08:59 +02001134 Py_DECREF(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001135 return NULL;
1136 }
Zackery Spytz359bd4f2019-04-23 05:56:08 -06001137
1138 PyObject_GC_Track(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001139 return self;
1140}
1141
1142static int
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08001143_Pickler_SetProtocol(PicklerObject *self, PyObject *protocol, int fix_imports)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001144{
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08001145 long proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001146
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001147 if (protocol == Py_None) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001148 proto = DEFAULT_PROTOCOL;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08001149 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001150 else {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08001151 proto = PyLong_AsLong(protocol);
1152 if (proto < 0) {
1153 if (proto == -1 && PyErr_Occurred())
1154 return -1;
1155 proto = HIGHEST_PROTOCOL;
1156 }
1157 else if (proto > HIGHEST_PROTOCOL) {
1158 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
1159 HIGHEST_PROTOCOL);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001160 return -1;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08001161 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001162 }
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08001163 self->proto = (int)proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001164 self->bin = proto > 0;
1165 self->fix_imports = fix_imports && proto < 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001166 return 0;
1167}
1168
1169/* Returns -1 (with an exception set) on failure, 0 on success. This may
1170 be called once on a freshly created Pickler. */
1171static int
1172_Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
1173{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001174 _Py_IDENTIFIER(write);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001175 assert(file != NULL);
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001176 if (_PyObject_LookupAttrId(file, &PyId_write, &self->write) < 0) {
1177 return -1;
1178 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001179 if (self->write == NULL) {
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001180 PyErr_SetString(PyExc_TypeError,
1181 "file must have a 'write' attribute");
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001182 return -1;
1183 }
1184
1185 return 0;
1186}
1187
Antoine Pitrou91f43802019-05-26 17:10:09 +02001188static int
1189_Pickler_SetBufferCallback(PicklerObject *self, PyObject *buffer_callback)
1190{
1191 if (buffer_callback == Py_None) {
1192 buffer_callback = NULL;
1193 }
1194 if (buffer_callback != NULL && self->proto < 5) {
1195 PyErr_SetString(PyExc_ValueError,
1196 "buffer_callback needs protocol >= 5");
1197 return -1;
1198 }
1199
1200 Py_XINCREF(buffer_callback);
1201 self->buffer_callback = buffer_callback;
1202 return 0;
1203}
1204
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001205/* Returns the size of the input on success, -1 on failure. This takes its
1206 own reference to `input`. */
1207static Py_ssize_t
1208_Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
1209{
1210 if (self->buffer.buf != NULL)
1211 PyBuffer_Release(&self->buffer);
1212 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
1213 return -1;
1214 self->input_buffer = self->buffer.buf;
1215 self->input_len = self->buffer.len;
1216 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001217 self->prefetched_idx = self->input_len;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001218 return self->input_len;
1219}
1220
Antoine Pitrou04248a82010-10-12 20:51:21 +00001221static int
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03001222bad_readline(void)
1223{
1224 PickleState *st = _Pickle_GetGlobalState();
1225 PyErr_SetString(st->UnpicklingError, "pickle data was truncated");
1226 return -1;
1227}
1228
Antoine Pitrou91f43802019-05-26 17:10:09 +02001229/* Skip any consumed data that was only prefetched using peek() */
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03001230static int
Antoine Pitrou04248a82010-10-12 20:51:21 +00001231_Unpickler_SkipConsumed(UnpicklerObject *self)
1232{
Victor Stinnerb43ad1d2013-10-31 13:38:42 +01001233 Py_ssize_t consumed;
1234 PyObject *r;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001235
Victor Stinnerb43ad1d2013-10-31 13:38:42 +01001236 consumed = self->next_read_idx - self->prefetched_idx;
1237 if (consumed <= 0)
1238 return 0;
1239
1240 assert(self->peek); /* otherwise we did something wrong */
Martin Panter6245cb32016-04-15 02:14:19 +00001241 /* This makes a useless copy... */
Victor Stinnerb43ad1d2013-10-31 13:38:42 +01001242 r = PyObject_CallFunction(self->read, "n", consumed);
1243 if (r == NULL)
1244 return -1;
1245 Py_DECREF(r);
1246
1247 self->prefetched_idx = self->next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001248 return 0;
1249}
1250
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001251static const Py_ssize_t READ_WHOLE_LINE = -1;
1252
1253/* If reading from a file, we need to only pull the bytes we need, since there
1254 may be multiple pickle objects arranged contiguously in the same input
1255 buffer.
1256
1257 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
1258 bytes from the input stream/buffer.
1259
1260 Update the unpickler's input buffer with the newly-read data. Returns -1 on
1261 failure; on success, returns the number of bytes read from the file.
1262
1263 On success, self->input_len will be 0; this is intentional so that when
1264 unpickling from a file, the "we've run out of data" code paths will trigger,
1265 causing the Unpickler to go back to the file for more data. Use the returned
1266 size to tell you how much data you can process. */
1267static Py_ssize_t
1268_Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
1269{
1270 PyObject *data;
Serhiy Storchaka6fe39b72013-11-30 23:15:38 +02001271 Py_ssize_t read_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001272
1273 assert(self->read != NULL);
Victor Stinner121aab42011-09-29 23:40:53 +02001274
Antoine Pitrou04248a82010-10-12 20:51:21 +00001275 if (_Unpickler_SkipConsumed(self) < 0)
1276 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001277
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08001278 if (n == READ_WHOLE_LINE) {
Victor Stinner2ff58a22019-06-17 14:27:23 +02001279 data = PyObject_CallNoArgs(self->readline);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08001280 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001281 else {
Serhiy Storchaka6fe39b72013-11-30 23:15:38 +02001282 PyObject *len;
1283 /* Prefetch some data without advancing the file pointer, if possible */
1284 if (self->peek && n < PREFETCH) {
1285 len = PyLong_FromSsize_t(PREFETCH);
1286 if (len == NULL)
1287 return -1;
1288 data = _Pickle_FastCall(self->peek, len);
1289 if (data == NULL) {
1290 if (!PyErr_ExceptionMatches(PyExc_NotImplementedError))
1291 return -1;
1292 /* peek() is probably not supported by the given file object */
1293 PyErr_Clear();
1294 Py_CLEAR(self->peek);
1295 }
1296 else {
1297 read_size = _Unpickler_SetStringInput(self, data);
1298 Py_DECREF(data);
1299 self->prefetched_idx = 0;
1300 if (n <= read_size)
1301 return n;
1302 }
1303 }
1304 len = PyLong_FromSsize_t(n);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001305 if (len == NULL)
1306 return -1;
Alexandre Vassalotti20c28c12013-11-27 02:26:54 -08001307 data = _Pickle_FastCall(self->read, len);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001308 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001309 if (data == NULL)
1310 return -1;
1311
Serhiy Storchaka6fe39b72013-11-30 23:15:38 +02001312 read_size = _Unpickler_SetStringInput(self, data);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001313 Py_DECREF(data);
1314 return read_size;
1315}
1316
Victor Stinner19ed27e2016-05-20 11:42:37 +02001317/* Don't call it directly: use _Unpickler_Read() */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001318static Py_ssize_t
Victor Stinner19ed27e2016-05-20 11:42:37 +02001319_Unpickler_ReadImpl(UnpicklerObject *self, char **s, Py_ssize_t n)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001320{
Antoine Pitrou04248a82010-10-12 20:51:21 +00001321 Py_ssize_t num_read;
1322
Benjamin Peterson6aa15642015-09-27 01:16:03 -07001323 *s = NULL;
Benjamin Petersone48cf7e2015-09-26 00:08:34 -07001324 if (self->next_read_idx > PY_SSIZE_T_MAX - n) {
1325 PickleState *st = _Pickle_GetGlobalState();
1326 PyErr_SetString(st->UnpicklingError,
1327 "read would overflow (invalid bytecode)");
1328 return -1;
1329 }
Victor Stinner19ed27e2016-05-20 11:42:37 +02001330
1331 /* This case is handled by the _Unpickler_Read() macro for efficiency */
1332 assert(self->next_read_idx + n > self->input_len);
1333
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03001334 if (!self->read)
1335 return bad_readline();
1336
Antoine Pitrou91f43802019-05-26 17:10:09 +02001337 /* Extend the buffer to satisfy desired size */
Antoine Pitrou04248a82010-10-12 20:51:21 +00001338 num_read = _Unpickler_ReadFromFile(self, n);
1339 if (num_read < 0)
1340 return -1;
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03001341 if (num_read < n)
1342 return bad_readline();
Antoine Pitrou04248a82010-10-12 20:51:21 +00001343 *s = self->input_buffer;
1344 self->next_read_idx = n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001345 return n;
1346}
1347
Antoine Pitrou91f43802019-05-26 17:10:09 +02001348/* Read `n` bytes from the unpickler's data source, storing the result in `buf`.
1349 *
1350 * This should only be used for non-small data reads where potentially
1351 * avoiding a copy is beneficial. This method does not try to prefetch
1352 * more data into the input buffer.
1353 *
1354 * _Unpickler_Read() is recommended in most cases.
1355 */
1356static Py_ssize_t
1357_Unpickler_ReadInto(UnpicklerObject *self, char *buf, Py_ssize_t n)
1358{
1359 assert(n != READ_WHOLE_LINE);
1360
1361 /* Read from available buffer data, if any */
1362 Py_ssize_t in_buffer = self->input_len - self->next_read_idx;
1363 if (in_buffer > 0) {
1364 Py_ssize_t to_read = Py_MIN(in_buffer, n);
1365 memcpy(buf, self->input_buffer + self->next_read_idx, to_read);
1366 self->next_read_idx += to_read;
1367 buf += to_read;
1368 n -= to_read;
1369 if (n == 0) {
1370 /* Entire read was satisfied from buffer */
1371 return n;
1372 }
1373 }
1374
1375 /* Read from file */
Antoine Pitrou9f378722020-02-23 23:33:53 +01001376 if (!self->read) {
1377 /* We're unpickling memory, this means the input is truncated */
Antoine Pitrou91f43802019-05-26 17:10:09 +02001378 return bad_readline();
1379 }
1380 if (_Unpickler_SkipConsumed(self) < 0) {
1381 return -1;
1382 }
1383
Antoine Pitrou9f378722020-02-23 23:33:53 +01001384 if (!self->readinto) {
1385 /* readinto() not supported on file-like object, fall back to read()
1386 * and copy into destination buffer (bpo-39681) */
1387 PyObject* len = PyLong_FromSsize_t(n);
1388 if (len == NULL) {
1389 return -1;
1390 }
1391 PyObject* data = _Pickle_FastCall(self->read, len);
1392 if (data == NULL) {
1393 return -1;
1394 }
1395 if (!PyBytes_Check(data)) {
1396 PyErr_Format(PyExc_ValueError,
1397 "read() returned non-bytes object (%R)",
1398 Py_TYPE(data));
1399 Py_DECREF(data);
1400 return -1;
1401 }
1402 Py_ssize_t read_size = PyBytes_GET_SIZE(data);
1403 if (read_size < n) {
1404 Py_DECREF(data);
1405 return bad_readline();
1406 }
1407 memcpy(buf, PyBytes_AS_STRING(data), n);
1408 Py_DECREF(data);
1409 return n;
1410 }
1411
Antoine Pitrou91f43802019-05-26 17:10:09 +02001412 /* Call readinto() into user buffer */
1413 PyObject *buf_obj = PyMemoryView_FromMemory(buf, n, PyBUF_WRITE);
1414 if (buf_obj == NULL) {
1415 return -1;
1416 }
1417 PyObject *read_size_obj = _Pickle_FastCall(self->readinto, buf_obj);
1418 if (read_size_obj == NULL) {
1419 return -1;
1420 }
1421 Py_ssize_t read_size = PyLong_AsSsize_t(read_size_obj);
1422 Py_DECREF(read_size_obj);
1423
1424 if (read_size < 0) {
1425 if (!PyErr_Occurred()) {
1426 PyErr_SetString(PyExc_ValueError,
1427 "readinto() returned negative size");
1428 }
1429 return -1;
1430 }
1431 if (read_size < n) {
1432 return bad_readline();
1433 }
1434 return n;
1435}
1436
Victor Stinner19ed27e2016-05-20 11:42:37 +02001437/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
1438
1439 This should be used for all data reads, rather than accessing the unpickler's
1440 input buffer directly. This method deals correctly with reading from input
1441 streams, which the input buffer doesn't deal with.
1442
1443 Note that when reading from a file-like object, self->next_read_idx won't
1444 be updated (it should remain at 0 for the entire unpickling process). You
1445 should use this function's return value to know how many bytes you can
1446 consume.
1447
1448 Returns -1 (with an exception set) on failure. On success, return the
1449 number of chars read. */
1450#define _Unpickler_Read(self, s, n) \
Victor Stinnerda230562016-05-20 21:16:59 +02001451 (((n) <= (self)->input_len - (self)->next_read_idx) \
Victor Stinner19ed27e2016-05-20 11:42:37 +02001452 ? (*(s) = (self)->input_buffer + (self)->next_read_idx, \
1453 (self)->next_read_idx += (n), \
1454 (n)) \
1455 : _Unpickler_ReadImpl(self, (s), (n)))
1456
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001457static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001458_Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1459 char **result)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001460{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001461 char *input_line = PyMem_Realloc(self->input_line, len + 1);
Victor Stinner42024562013-07-12 00:53:57 +02001462 if (input_line == NULL) {
1463 PyErr_NoMemory();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001464 return -1;
Victor Stinner42024562013-07-12 00:53:57 +02001465 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001466
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001467 memcpy(input_line, line, len);
1468 input_line[len] = '\0';
1469 self->input_line = input_line;
1470 *result = self->input_line;
1471 return len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001472}
1473
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001474/* Read a line from the input stream/buffer. If we run off the end of the input
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03001475 before hitting \n, raise an error.
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001476
1477 Returns the number of chars read, or -1 on failure. */
1478static Py_ssize_t
1479_Unpickler_Readline(UnpicklerObject *self, char **result)
1480{
1481 Py_ssize_t i, num_read;
1482
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001483 for (i = self->next_read_idx; i < self->input_len; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001484 if (self->input_buffer[i] == '\n') {
1485 char *line_start = self->input_buffer + self->next_read_idx;
1486 num_read = i - self->next_read_idx + 1;
1487 self->next_read_idx = i + 1;
1488 return _Unpickler_CopyLine(self, line_start, num_read, result);
1489 }
1490 }
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03001491 if (!self->read)
1492 return bad_readline();
Victor Stinner121aab42011-09-29 23:40:53 +02001493
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03001494 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1495 if (num_read < 0)
1496 return -1;
1497 if (num_read == 0 || self->input_buffer[num_read - 1] != '\n')
1498 return bad_readline();
1499 self->next_read_idx = num_read;
1500 return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001501}
1502
1503/* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1504 will be modified in place. */
1505static int
Benjamin Petersona4ae8282018-09-20 18:36:40 -07001506_Unpickler_ResizeMemoList(UnpicklerObject *self, size_t new_size)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001507{
Benjamin Petersona4ae8282018-09-20 18:36:40 -07001508 size_t i;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001509
1510 assert(new_size > self->memo_size);
1511
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001512 PyObject **memo_new = self->memo;
1513 PyMem_RESIZE(memo_new, PyObject *, new_size);
1514 if (memo_new == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001515 PyErr_NoMemory();
1516 return -1;
1517 }
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001518 self->memo = memo_new;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001519 for (i = self->memo_size; i < new_size; i++)
1520 self->memo[i] = NULL;
1521 self->memo_size = new_size;
1522 return 0;
1523}
1524
1525/* Returns NULL if idx is out of bounds. */
1526static PyObject *
Benjamin Petersona4ae8282018-09-20 18:36:40 -07001527_Unpickler_MemoGet(UnpicklerObject *self, size_t idx)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001528{
Benjamin Petersona4ae8282018-09-20 18:36:40 -07001529 if (idx >= self->memo_size)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001530 return NULL;
1531
1532 return self->memo[idx];
1533}
1534
1535/* Returns -1 (with an exception set) on failure, 0 on success.
1536 This takes its own reference to `value`. */
1537static int
Benjamin Petersona4ae8282018-09-20 18:36:40 -07001538_Unpickler_MemoPut(UnpicklerObject *self, size_t idx, PyObject *value)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001539{
1540 PyObject *old_item;
1541
1542 if (idx >= self->memo_size) {
1543 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1544 return -1;
1545 assert(idx < self->memo_size);
1546 }
1547 Py_INCREF(value);
1548 old_item = self->memo[idx];
1549 self->memo[idx] = value;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001550 if (old_item != NULL) {
1551 Py_DECREF(old_item);
1552 }
1553 else {
1554 self->memo_len++;
1555 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001556 return 0;
1557}
1558
1559static PyObject **
1560_Unpickler_NewMemo(Py_ssize_t new_size)
1561{
Benjamin Peterson59b08c12015-06-27 13:41:33 -05001562 PyObject **memo = PyMem_NEW(PyObject *, new_size);
Victor Stinner42024562013-07-12 00:53:57 +02001563 if (memo == NULL) {
1564 PyErr_NoMemory();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001565 return NULL;
Victor Stinner42024562013-07-12 00:53:57 +02001566 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001567 memset(memo, 0, new_size * sizeof(PyObject *));
1568 return memo;
1569}
1570
1571/* Free the unpickler's memo, taking care to decref any items left in it. */
1572static void
1573_Unpickler_MemoCleanup(UnpicklerObject *self)
1574{
1575 Py_ssize_t i;
1576 PyObject **memo = self->memo;
1577
1578 if (self->memo == NULL)
1579 return;
1580 self->memo = NULL;
1581 i = self->memo_size;
1582 while (--i >= 0) {
1583 Py_XDECREF(memo[i]);
1584 }
Victor Stinner00d7abd2020-12-01 09:56:42 +01001585 PyMem_Free(memo);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001586}
1587
1588static UnpicklerObject *
1589_Unpickler_New(void)
1590{
1591 UnpicklerObject *self;
1592
1593 self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1594 if (self == NULL)
1595 return NULL;
1596
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001597 self->pers_func = NULL;
1598 self->input_buffer = NULL;
1599 self->input_line = NULL;
1600 self->input_len = 0;
1601 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001602 self->prefetched_idx = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001603 self->read = NULL;
Antoine Pitrou91f43802019-05-26 17:10:09 +02001604 self->readinto = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001605 self->readline = NULL;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001606 self->peek = NULL;
Antoine Pitrou91f43802019-05-26 17:10:09 +02001607 self->buffers = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001608 self->encoding = NULL;
1609 self->errors = NULL;
1610 self->marks = NULL;
1611 self->num_marks = 0;
1612 self->marks_size = 0;
1613 self->proto = 0;
1614 self->fix_imports = 0;
Victor Stinner68c8ea22013-07-11 22:56:25 +02001615 memset(&self->buffer, 0, sizeof(Py_buffer));
1616 self->memo_size = 32;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001617 self->memo_len = 0;
Victor Stinner68c8ea22013-07-11 22:56:25 +02001618 self->memo = _Unpickler_NewMemo(self->memo_size);
1619 self->stack = (Pdata *)Pdata_New();
1620
1621 if (self->memo == NULL || self->stack == NULL) {
1622 Py_DECREF(self);
1623 return NULL;
1624 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001625
Zackery Spytz359bd4f2019-04-23 05:56:08 -06001626 PyObject_GC_Track(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001627 return self;
1628}
1629
1630/* Returns -1 (with an exception set) on failure, 0 on success. This may
Antoine Pitrou91f43802019-05-26 17:10:09 +02001631 be called once on a freshly created Unpickler. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001632static int
1633_Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1634{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001635 _Py_IDENTIFIER(peek);
1636 _Py_IDENTIFIER(read);
Antoine Pitrou91f43802019-05-26 17:10:09 +02001637 _Py_IDENTIFIER(readinto);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001638 _Py_IDENTIFIER(readline);
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001639
Antoine Pitrou9f378722020-02-23 23:33:53 +01001640 /* Optional file methods */
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001641 if (_PyObject_LookupAttrId(file, &PyId_peek, &self->peek) < 0) {
1642 return -1;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001643 }
Antoine Pitrou9f378722020-02-23 23:33:53 +01001644 if (_PyObject_LookupAttrId(file, &PyId_readinto, &self->readinto) < 0) {
1645 return -1;
1646 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001647 (void)_PyObject_LookupAttrId(file, &PyId_read, &self->read);
1648 (void)_PyObject_LookupAttrId(file, &PyId_readline, &self->readline);
Antoine Pitrou9f378722020-02-23 23:33:53 +01001649 if (!self->readline || !self->read) {
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001650 if (!PyErr_Occurred()) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001651 PyErr_SetString(PyExc_TypeError,
Antoine Pitrou9f378722020-02-23 23:33:53 +01001652 "file must have 'read' and 'readline' attributes");
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001653 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001654 Py_CLEAR(self->read);
Antoine Pitrou91f43802019-05-26 17:10:09 +02001655 Py_CLEAR(self->readinto);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001656 Py_CLEAR(self->readline);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001657 Py_CLEAR(self->peek);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001658 return -1;
1659 }
1660 return 0;
1661}
1662
1663/* Returns -1 (with an exception set) on failure, 0 on success. This may
Antoine Pitrou91f43802019-05-26 17:10:09 +02001664 be called once on a freshly created Unpickler. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001665static int
1666_Unpickler_SetInputEncoding(UnpicklerObject *self,
1667 const char *encoding,
1668 const char *errors)
1669{
1670 if (encoding == NULL)
1671 encoding = "ASCII";
1672 if (errors == NULL)
1673 errors = "strict";
1674
Victor Stinner49fc8ec2013-07-07 23:30:24 +02001675 self->encoding = _PyMem_Strdup(encoding);
1676 self->errors = _PyMem_Strdup(errors);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001677 if (self->encoding == NULL || self->errors == NULL) {
1678 PyErr_NoMemory();
1679 return -1;
1680 }
1681 return 0;
1682}
1683
Antoine Pitrou91f43802019-05-26 17:10:09 +02001684/* Returns -1 (with an exception set) on failure, 0 on success. This may
1685 be called once on a freshly created Unpickler. */
1686static int
1687_Unpickler_SetBuffers(UnpicklerObject *self, PyObject *buffers)
1688{
Markus Mohrhard898318b2019-07-26 00:00:34 +08001689 if (buffers == NULL || buffers == Py_None) {
Antoine Pitrou91f43802019-05-26 17:10:09 +02001690 self->buffers = NULL;
1691 }
1692 else {
1693 self->buffers = PyObject_GetIter(buffers);
1694 if (self->buffers == NULL) {
1695 return -1;
1696 }
1697 }
1698 return 0;
1699}
1700
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001701/* Generate a GET opcode for an object stored in the memo. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001702static int
1703memo_get(PicklerObject *self, PyObject *key)
1704{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001705 Py_ssize_t *value;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001706 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001707 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001708
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001709 value = PyMemoTable_Get(self->memo, key);
1710 if (value == NULL) {
1711 PyErr_SetObject(PyExc_KeyError, key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001712 return -1;
1713 }
1714
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001715 if (!self->bin) {
1716 pdata[0] = GET;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001717 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
Victor Stinnerd36cf5f2020-06-10 18:38:05 +02001718 "%zd\n", *value);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001719 len = strlen(pdata);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001720 }
1721 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001722 if (*value < 256) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001723 pdata[0] = BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001724 pdata[1] = (unsigned char)(*value & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001725 len = 2;
1726 }
Serhiy Storchaka67c719b2014-09-05 10:10:23 +03001727 else if ((size_t)*value <= 0xffffffffUL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001728 pdata[0] = LONG_BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001729 pdata[1] = (unsigned char)(*value & 0xff);
1730 pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1731 pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1732 pdata[4] = (unsigned char)((*value >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001733 len = 5;
1734 }
1735 else { /* unlikely */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08001736 PickleState *st = _Pickle_GetGlobalState();
1737 PyErr_SetString(st->PicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001738 "memo id too large for LONG_BINGET");
1739 return -1;
1740 }
1741 }
1742
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001743 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001744 return -1;
1745
1746 return 0;
1747}
1748
1749/* Store an object in the memo, assign it a new unique ID based on the number
1750 of objects currently stored in the memo and generate a PUT opcode. */
1751static int
1752memo_put(PicklerObject *self, PyObject *obj)
1753{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001754 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001755 Py_ssize_t len;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001756 Py_ssize_t idx;
1757
1758 const char memoize_op = MEMOIZE;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001759
1760 if (self->fast)
1761 return 0;
1762
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001763 idx = PyMemoTable_Size(self->memo);
1764 if (PyMemoTable_Set(self->memo, obj, idx) < 0)
1765 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001766
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001767 if (self->proto >= 4) {
1768 if (_Pickler_Write(self, &memoize_op, 1) < 0)
1769 return -1;
1770 return 0;
1771 }
1772 else if (!self->bin) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001773 pdata[0] = PUT;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001774 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
Victor Stinnerd36cf5f2020-06-10 18:38:05 +02001775 "%zd\n", idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001776 len = strlen(pdata);
1777 }
1778 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001779 if (idx < 256) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001780 pdata[0] = BINPUT;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001781 pdata[1] = (unsigned char)idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001782 len = 2;
1783 }
Serhiy Storchaka67c719b2014-09-05 10:10:23 +03001784 else if ((size_t)idx <= 0xffffffffUL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001785 pdata[0] = LONG_BINPUT;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001786 pdata[1] = (unsigned char)(idx & 0xff);
1787 pdata[2] = (unsigned char)((idx >> 8) & 0xff);
1788 pdata[3] = (unsigned char)((idx >> 16) & 0xff);
1789 pdata[4] = (unsigned char)((idx >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001790 len = 5;
1791 }
1792 else { /* unlikely */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08001793 PickleState *st = _Pickle_GetGlobalState();
1794 PyErr_SetString(st->PicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001795 "memo id too large for LONG_BINPUT");
1796 return -1;
1797 }
1798 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001799 if (_Pickler_Write(self, pdata, len) < 0)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001800 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001801
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001802 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001803}
1804
1805static PyObject *
Serhiy Storchaka9937d902017-01-09 10:04:34 +02001806get_dotted_path(PyObject *obj, PyObject *name)
1807{
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001808 _Py_static_string(PyId_dot, ".");
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001809 PyObject *dotted_path;
1810 Py_ssize_t i, n;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001811
1812 dotted_path = PyUnicode_Split(name, _PyUnicode_FromId(&PyId_dot), -1);
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001813 if (dotted_path == NULL)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001814 return NULL;
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001815 n = PyList_GET_SIZE(dotted_path);
1816 assert(n >= 1);
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001817 for (i = 0; i < n; i++) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001818 PyObject *subpath = PyList_GET_ITEM(dotted_path, i);
Serhiy Storchaka9937d902017-01-09 10:04:34 +02001819 if (_PyUnicode_EqualToASCIIString(subpath, "<locals>")) {
Antoine Pitrou6cd5eda2014-12-02 00:20:03 +01001820 if (obj == NULL)
1821 PyErr_Format(PyExc_AttributeError,
1822 "Can't pickle local object %R", name);
1823 else
1824 PyErr_Format(PyExc_AttributeError,
1825 "Can't pickle local attribute %R on %R", name, obj);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001826 Py_DECREF(dotted_path);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001827 return NULL;
1828 }
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001829 }
1830 return dotted_path;
1831}
1832
1833static PyObject *
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001834get_deep_attribute(PyObject *obj, PyObject *names, PyObject **pparent)
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001835{
1836 Py_ssize_t i, n;
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001837 PyObject *parent = NULL;
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001838
1839 assert(PyList_CheckExact(names));
1840 Py_INCREF(obj);
1841 n = PyList_GET_SIZE(names);
1842 for (i = 0; i < n; i++) {
1843 PyObject *name = PyList_GET_ITEM(names, i);
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001844 Py_XDECREF(parent);
1845 parent = obj;
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001846 (void)_PyObject_LookupAttr(parent, name, &obj);
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001847 if (obj == NULL) {
1848 Py_DECREF(parent);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001849 return NULL;
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001850 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001851 }
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001852 if (pparent != NULL)
1853 *pparent = parent;
1854 else
1855 Py_XDECREF(parent);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001856 return obj;
1857}
1858
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001859
1860static PyObject *
1861getattribute(PyObject *obj, PyObject *name, int allow_qualname)
1862{
1863 PyObject *dotted_path, *attr;
1864
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001865 if (allow_qualname) {
1866 dotted_path = get_dotted_path(obj, name);
1867 if (dotted_path == NULL)
1868 return NULL;
1869 attr = get_deep_attribute(obj, dotted_path, NULL);
1870 Py_DECREF(dotted_path);
1871 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001872 else {
1873 (void)_PyObject_LookupAttr(obj, name, &attr);
1874 }
1875 if (attr == NULL && !PyErr_Occurred()) {
1876 PyErr_Format(PyExc_AttributeError,
1877 "Can't get attribute %R on %R", name, obj);
1878 }
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001879 return attr;
1880}
1881
Eric Snow3f9eee62017-09-15 16:35:20 -06001882static int
1883_checkmodule(PyObject *module_name, PyObject *module,
1884 PyObject *global, PyObject *dotted_path)
1885{
1886 if (module == Py_None) {
1887 return -1;
1888 }
1889 if (PyUnicode_Check(module_name) &&
1890 _PyUnicode_EqualToASCIIString(module_name, "__main__")) {
1891 return -1;
1892 }
1893
1894 PyObject *candidate = get_deep_attribute(module, dotted_path, NULL);
1895 if (candidate == NULL) {
Eric Snow3f9eee62017-09-15 16:35:20 -06001896 return -1;
1897 }
1898 if (candidate != global) {
1899 Py_DECREF(candidate);
1900 return -1;
1901 }
1902 Py_DECREF(candidate);
1903 return 0;
1904}
1905
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001906static PyObject *
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001907whichmodule(PyObject *global, PyObject *dotted_path)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001908{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001909 PyObject *module_name;
Eric Snow3f9eee62017-09-15 16:35:20 -06001910 PyObject *module = NULL;
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001911 Py_ssize_t i;
Eric Snow3f9eee62017-09-15 16:35:20 -06001912 PyObject *modules;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001913 _Py_IDENTIFIER(__module__);
1914 _Py_IDENTIFIER(modules);
1915 _Py_IDENTIFIER(__main__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001916
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001917 if (_PyObject_LookupAttrId(global, &PyId___module__, &module_name) < 0) {
1918 return NULL;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001919 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001920 if (module_name) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001921 /* In some rare cases (e.g., bound methods of extension types),
1922 __module__ can be None. If it is so, then search sys.modules for
1923 the module of global. */
1924 if (module_name != Py_None)
1925 return module_name;
1926 Py_CLEAR(module_name);
1927 }
1928 assert(module_name == NULL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001929
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001930 /* Fallback on walking sys.modules */
Eric Snow3f9eee62017-09-15 16:35:20 -06001931 modules = _PySys_GetObjectId(&PyId_modules);
1932 if (modules == NULL) {
Victor Stinner1e53bba2013-07-16 22:26:05 +02001933 PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001934 return NULL;
Victor Stinner1e53bba2013-07-16 22:26:05 +02001935 }
Eric Snow3f9eee62017-09-15 16:35:20 -06001936 if (PyDict_CheckExact(modules)) {
1937 i = 0;
1938 while (PyDict_Next(modules, &i, &module_name, &module)) {
1939 if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1940 Py_INCREF(module_name);
1941 return module_name;
1942 }
1943 if (PyErr_Occurred()) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001944 return NULL;
Eric Snow3f9eee62017-09-15 16:35:20 -06001945 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001946 }
Eric Snow3f9eee62017-09-15 16:35:20 -06001947 }
1948 else {
1949 PyObject *iterator = PyObject_GetIter(modules);
1950 if (iterator == NULL) {
1951 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001952 }
Eric Snow3f9eee62017-09-15 16:35:20 -06001953 while ((module_name = PyIter_Next(iterator))) {
1954 module = PyObject_GetItem(modules, module_name);
1955 if (module == NULL) {
1956 Py_DECREF(module_name);
1957 Py_DECREF(iterator);
1958 return NULL;
1959 }
1960 if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1961 Py_DECREF(module);
1962 Py_DECREF(iterator);
1963 return module_name;
1964 }
1965 Py_DECREF(module);
1966 Py_DECREF(module_name);
1967 if (PyErr_Occurred()) {
1968 Py_DECREF(iterator);
1969 return NULL;
1970 }
1971 }
1972 Py_DECREF(iterator);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001973 }
1974
1975 /* If no module is found, use __main__. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001976 module_name = _PyUnicode_FromId(&PyId___main__);
Victor Stinneraf46eb82017-09-05 23:30:16 +02001977 Py_XINCREF(module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001978 return module_name;
1979}
1980
1981/* fast_save_enter() and fast_save_leave() are guards against recursive
1982 objects when Pickler is used with the "fast mode" (i.e., with object
1983 memoization disabled). If the nesting of a list or dict object exceed
1984 FAST_NESTING_LIMIT, these guards will start keeping an internal
1985 reference to the seen list or dict objects and check whether these objects
1986 are recursive. These are not strictly necessary, since save() has a
1987 hard-coded recursion limit, but they give a nicer error message than the
1988 typical RuntimeError. */
1989static int
1990fast_save_enter(PicklerObject *self, PyObject *obj)
1991{
1992 /* if fast_nesting < 0, we're doing an error exit. */
1993 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1994 PyObject *key = NULL;
1995 if (self->fast_memo == NULL) {
1996 self->fast_memo = PyDict_New();
1997 if (self->fast_memo == NULL) {
1998 self->fast_nesting = -1;
1999 return 0;
2000 }
2001 }
2002 key = PyLong_FromVoidPtr(obj);
Mat Mf76231f2017-11-13 02:50:16 -05002003 if (key == NULL) {
2004 self->fast_nesting = -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002005 return 0;
Mat Mf76231f2017-11-13 02:50:16 -05002006 }
Serhiy Storchakab510e102020-10-26 12:47:57 +02002007 int r = PyDict_Contains(self->fast_memo, key);
2008 if (r > 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002009 PyErr_Format(PyExc_ValueError,
2010 "fast mode: can't pickle cyclic objects "
2011 "including object type %.200s at %p",
Victor Stinnerdaa97562020-02-07 03:37:06 +01002012 Py_TYPE(obj)->tp_name, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002013 }
Serhiy Storchakab510e102020-10-26 12:47:57 +02002014 else if (r == 0) {
2015 r = PyDict_SetItem(self->fast_memo, key, Py_None);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002016 }
2017 Py_DECREF(key);
Serhiy Storchakab510e102020-10-26 12:47:57 +02002018 if (r != 0) {
2019 self->fast_nesting = -1;
2020 return 0;
2021 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002022 }
2023 return 1;
2024}
2025
2026static int
2027fast_save_leave(PicklerObject *self, PyObject *obj)
2028{
2029 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
2030 PyObject *key = PyLong_FromVoidPtr(obj);
2031 if (key == NULL)
2032 return 0;
2033 if (PyDict_DelItem(self->fast_memo, key) < 0) {
2034 Py_DECREF(key);
2035 return 0;
2036 }
2037 Py_DECREF(key);
2038 }
2039 return 1;
2040}
2041
2042static int
2043save_none(PicklerObject *self, PyObject *obj)
2044{
2045 const char none_op = NONE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002046 if (_Pickler_Write(self, &none_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002047 return -1;
2048
2049 return 0;
2050}
2051
2052static int
2053save_bool(PicklerObject *self, PyObject *obj)
2054{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002055 if (self->proto >= 2) {
Alexandre Vassalotti8a67f522013-11-24 21:40:18 -08002056 const char bool_op = (obj == Py_True) ? NEWTRUE : NEWFALSE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002057 if (_Pickler_Write(self, &bool_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002058 return -1;
2059 }
Alexandre Vassalotti8a67f522013-11-24 21:40:18 -08002060 else {
2061 /* These aren't opcodes -- they're ways to pickle bools before protocol 2
2062 * so that unpicklers written before bools were introduced unpickle them
2063 * as ints, but unpicklers after can recognize that bools were intended.
2064 * Note that protocol 2 added direct ways to pickle bools.
2065 */
2066 const char *bool_str = (obj == Py_True) ? "I01\n" : "I00\n";
2067 if (_Pickler_Write(self, bool_str, strlen(bool_str)) < 0)
2068 return -1;
2069 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002070 return 0;
2071}
2072
2073static int
Alexandre Vassalottided929b2013-11-24 22:41:13 -08002074save_long(PicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002075{
Alexandre Vassalottided929b2013-11-24 22:41:13 -08002076 PyObject *repr = NULL;
2077 Py_ssize_t size;
2078 long val;
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002079 int overflow;
Alexandre Vassalottided929b2013-11-24 22:41:13 -08002080 int status = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002081
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002082 val= PyLong_AsLongAndOverflow(obj, &overflow);
2083 if (!overflow && (sizeof(long) <= 4 ||
2084 (val <= 0x7fffffffL && val >= (-0x7fffffffL - 1))))
2085 {
Larry Hastings61272b72014-01-07 12:41:53 -08002086 /* result fits in a signed 4-byte integer.
Alexandre Vassalotti1048fb52013-11-25 11:35:46 -08002087
2088 Note: we can't use -0x80000000L in the above condition because some
2089 compilers (e.g., MSVC) will promote 0x80000000L to an unsigned type
2090 before applying the unary minus when sizeof(long) <= 4. The
2091 resulting value stays unsigned which is commonly not what we want,
2092 so MSVC happily warns us about it. However, that result would have
2093 been fine because we guard for sizeof(long) <= 4 which turns the
2094 condition true in that particular case. */
Alexandre Vassalottided929b2013-11-24 22:41:13 -08002095 char pdata[32];
2096 Py_ssize_t len = 0;
2097
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002098 if (self->bin) {
2099 pdata[1] = (unsigned char)(val & 0xff);
2100 pdata[2] = (unsigned char)((val >> 8) & 0xff);
2101 pdata[3] = (unsigned char)((val >> 16) & 0xff);
2102 pdata[4] = (unsigned char)((val >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002103
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002104 if ((pdata[4] != 0) || (pdata[3] != 0)) {
2105 pdata[0] = BININT;
2106 len = 5;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002107 }
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002108 else if (pdata[2] != 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002109 pdata[0] = BININT2;
2110 len = 3;
2111 }
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002112 else {
2113 pdata[0] = BININT1;
2114 len = 2;
2115 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002116 }
2117 else {
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002118 sprintf(pdata, "%c%ld\n", INT, val);
2119 len = strlen(pdata);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002120 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002121 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002122 return -1;
Alexandre Vassalottided929b2013-11-24 22:41:13 -08002123
2124 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002125 }
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002126 assert(!PyErr_Occurred());
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002127
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002128 if (self->proto >= 2) {
2129 /* Linear-time pickling. */
2130 size_t nbits;
2131 size_t nbytes;
2132 unsigned char *pdata;
2133 char header[5];
2134 int i;
2135 int sign = _PyLong_Sign(obj);
2136
2137 if (sign == 0) {
2138 header[0] = LONG1;
2139 header[1] = 0; /* It's 0 -- an empty bytestring. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002140 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002141 goto error;
2142 return 0;
2143 }
2144 nbits = _PyLong_NumBits(obj);
2145 if (nbits == (size_t)-1 && PyErr_Occurred())
2146 goto error;
2147 /* How many bytes do we need? There are nbits >> 3 full
2148 * bytes of data, and nbits & 7 leftover bits. If there
2149 * are any leftover bits, then we clearly need another
Min ho Kim96e12d52019-07-22 06:12:33 +10002150 * byte. What's not so obvious is that we *probably*
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002151 * need another byte even if there aren't any leftovers:
2152 * the most-significant bit of the most-significant byte
2153 * acts like a sign bit, and it's usually got a sense
Serhiy Storchaka95949422013-08-27 19:40:23 +03002154 * opposite of the one we need. The exception is ints
2155 * of the form -(2**(8*j-1)) for j > 0. Such an int is
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002156 * its own 256's-complement, so has the right sign bit
2157 * even without the extra byte. That's a pain to check
2158 * for in advance, though, so we always grab an extra
2159 * byte at the start, and cut it back later if possible.
2160 */
2161 nbytes = (nbits >> 3) + 1;
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01002162 if (nbytes > 0x7fffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002163 PyErr_SetString(PyExc_OverflowError,
Serhiy Storchaka95949422013-08-27 19:40:23 +03002164 "int too large to pickle");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002165 goto error;
2166 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00002167 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002168 if (repr == NULL)
2169 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00002170 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002171 i = _PyLong_AsByteArray((PyLongObject *)obj,
2172 pdata, nbytes,
2173 1 /* little endian */ , 1 /* signed */ );
2174 if (i < 0)
2175 goto error;
Serhiy Storchaka95949422013-08-27 19:40:23 +03002176 /* If the int is negative, this may be a byte more than
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002177 * needed. This is so iff the MSB is all redundant sign
2178 * bits.
2179 */
2180 if (sign < 0 &&
Victor Stinner121aab42011-09-29 23:40:53 +02002181 nbytes > 1 &&
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002182 pdata[nbytes - 1] == 0xff &&
2183 (pdata[nbytes - 2] & 0x80) != 0) {
2184 nbytes--;
2185 }
2186
2187 if (nbytes < 256) {
2188 header[0] = LONG1;
2189 header[1] = (unsigned char)nbytes;
2190 size = 2;
2191 }
2192 else {
2193 header[0] = LONG4;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002194 size = (Py_ssize_t) nbytes;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002195 for (i = 1; i < 5; i++) {
2196 header[i] = (unsigned char)(size & 0xff);
2197 size >>= 8;
2198 }
2199 size = 5;
2200 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002201 if (_Pickler_Write(self, header, size) < 0 ||
2202 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002203 goto error;
2204 }
2205 else {
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002206 const char long_op = LONG;
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02002207 const char *string;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002208
Mark Dickinson8dd05142009-01-20 20:43:58 +00002209 /* proto < 2: write the repr and newline. This is quadratic-time (in
2210 the number of digits), in both directions. We add a trailing 'L'
2211 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002212
2213 repr = PyObject_Repr(obj);
2214 if (repr == NULL)
2215 goto error;
2216
Serhiy Storchaka06515832016-11-20 09:13:07 +02002217 string = PyUnicode_AsUTF8AndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002218 if (string == NULL)
2219 goto error;
2220
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002221 if (_Pickler_Write(self, &long_op, 1) < 0 ||
2222 _Pickler_Write(self, string, size) < 0 ||
2223 _Pickler_Write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002224 goto error;
2225 }
2226
2227 if (0) {
2228 error:
2229 status = -1;
2230 }
2231 Py_XDECREF(repr);
2232
2233 return status;
2234}
2235
2236static int
2237save_float(PicklerObject *self, PyObject *obj)
2238{
2239 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
2240
2241 if (self->bin) {
2242 char pdata[9];
2243 pdata[0] = BINFLOAT;
2244 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
2245 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002246 if (_Pickler_Write(self, pdata, 9) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002247 return -1;
Victor Stinner121aab42011-09-29 23:40:53 +02002248 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002249 else {
Eric Smith0923d1d2009-04-16 20:16:10 +00002250 int result = -1;
2251 char *buf = NULL;
2252 char op = FLOAT;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002253
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002254 if (_Pickler_Write(self, &op, 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00002255 goto done;
2256
Serhiy Storchakac86ca262015-02-15 14:18:32 +02002257 buf = PyOS_double_to_string(x, 'r', 0, Py_DTSF_ADD_DOT_0, NULL);
Eric Smith0923d1d2009-04-16 20:16:10 +00002258 if (!buf) {
2259 PyErr_NoMemory();
2260 goto done;
2261 }
2262
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002263 if (_Pickler_Write(self, buf, strlen(buf)) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00002264 goto done;
2265
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002266 if (_Pickler_Write(self, "\n", 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00002267 goto done;
2268
2269 result = 0;
2270done:
2271 PyMem_Free(buf);
2272 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002273 }
2274
2275 return 0;
2276}
2277
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002278/* Perform direct write of the header and payload of the binary object.
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01002279
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002280 The large contiguous data is written directly into the underlying file
2281 object, bypassing the output_buffer of the Pickler. We intentionally
2282 do not insert a protocol 4 frame opcode to make it possible to optimize
2283 file.read calls in the loader.
2284 */
2285static int
2286_Pickler_write_bytes(PicklerObject *self,
2287 const char *header, Py_ssize_t header_size,
2288 const char *data, Py_ssize_t data_size,
2289 PyObject *payload)
2290{
2291 int bypass_buffer = (data_size >= FRAME_SIZE_TARGET);
2292 int framing = self->framing;
2293
2294 if (bypass_buffer) {
2295 assert(self->output_buffer != NULL);
2296 /* Commit the previous frame. */
2297 if (_Pickler_CommitFrame(self)) {
2298 return -1;
2299 }
2300 /* Disable framing temporarily */
2301 self->framing = 0;
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01002302 }
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01002303
2304 if (_Pickler_Write(self, header, header_size) < 0) {
2305 return -1;
2306 }
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01002307
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002308 if (bypass_buffer && self->write != NULL) {
2309 /* Bypass the in-memory buffer to directly stream large data
2310 into the underlying file object. */
2311 PyObject *result, *mem = NULL;
2312 /* Dump the output buffer to the file. */
2313 if (_Pickler_FlushToFile(self) < 0) {
2314 return -1;
2315 }
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01002316
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002317 /* Stream write the payload into the file without going through the
2318 output buffer. */
2319 if (payload == NULL) {
Serhiy Storchaka5b76bdb2018-01-13 00:28:31 +02002320 /* TODO: It would be better to use a memoryview with a linked
2321 original string if this is possible. */
2322 payload = mem = PyBytes_FromStringAndSize(data, data_size);
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002323 if (payload == NULL) {
2324 return -1;
2325 }
2326 }
Petr Viktorinffd97532020-02-11 17:46:57 +01002327 result = PyObject_CallOneArg(self->write, payload);
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002328 Py_XDECREF(mem);
2329 if (result == NULL) {
2330 return -1;
2331 }
2332 Py_DECREF(result);
2333
2334 /* Reinitialize the buffer for subsequent calls to _Pickler_Write. */
2335 if (_Pickler_ClearBuffer(self) < 0) {
2336 return -1;
2337 }
2338 }
2339 else {
2340 if (_Pickler_Write(self, data, data_size) < 0) {
2341 return -1;
2342 }
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01002343 }
2344
2345 /* Re-enable framing for subsequent calls to _Pickler_Write. */
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002346 self->framing = framing;
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01002347
2348 return 0;
2349}
2350
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002351static int
Antoine Pitrou91f43802019-05-26 17:10:09 +02002352_save_bytes_data(PicklerObject *self, PyObject *obj, const char *data,
2353 Py_ssize_t size)
2354{
2355 assert(self->proto >= 3);
2356
2357 char header[9];
2358 Py_ssize_t len;
2359
2360 if (size < 0)
2361 return -1;
2362
2363 if (size <= 0xff) {
2364 header[0] = SHORT_BINBYTES;
2365 header[1] = (unsigned char)size;
2366 len = 2;
2367 }
2368 else if ((size_t)size <= 0xffffffffUL) {
2369 header[0] = BINBYTES;
2370 header[1] = (unsigned char)(size & 0xff);
2371 header[2] = (unsigned char)((size >> 8) & 0xff);
2372 header[3] = (unsigned char)((size >> 16) & 0xff);
2373 header[4] = (unsigned char)((size >> 24) & 0xff);
2374 len = 5;
2375 }
2376 else if (self->proto >= 4) {
2377 header[0] = BINBYTES8;
2378 _write_size64(header + 1, size);
2379 len = 9;
2380 }
2381 else {
2382 PyErr_SetString(PyExc_OverflowError,
2383 "serializing a bytes object larger than 4 GiB "
2384 "requires pickle protocol 4 or higher");
2385 return -1;
2386 }
2387
2388 if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
2389 return -1;
2390 }
2391
2392 if (memo_put(self, obj) < 0) {
2393 return -1;
2394 }
2395
2396 return 0;
2397}
2398
2399static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002400save_bytes(PicklerObject *self, PyObject *obj)
2401{
2402 if (self->proto < 3) {
2403 /* Older pickle protocols do not have an opcode for pickling bytes
2404 objects. Therefore, we need to fake the copy protocol (i.e.,
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05002405 the __reduce__ method) to permit bytes object unpickling.
2406
2407 Here we use a hack to be compatible with Python 2. Since in Python
2408 2 'bytes' is just an alias for 'str' (which has different
2409 parameters than the actual bytes object), we use codecs.encode
2410 to create the appropriate 'str' object when unpickled using
2411 Python 2 *and* the appropriate 'bytes' object when unpickled
2412 using Python 3. Again this is a hack and we don't need to do this
2413 with newer protocols. */
Pierre Glaser289f1f82019-05-08 23:08:25 +02002414 PyObject *reduce_value;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002415 int status;
2416
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05002417 if (PyBytes_GET_SIZE(obj) == 0) {
2418 reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
2419 }
2420 else {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08002421 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05002422 PyObject *unicode_str =
2423 PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
2424 PyBytes_GET_SIZE(obj),
2425 "strict");
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002426 _Py_IDENTIFIER(latin1);
2427
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05002428 if (unicode_str == NULL)
2429 return -1;
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05002430 reduce_value = Py_BuildValue("(O(OO))",
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08002431 st->codecs_encode, unicode_str,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002432 _PyUnicode_FromId(&PyId_latin1));
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05002433 Py_DECREF(unicode_str);
2434 }
2435
2436 if (reduce_value == NULL)
2437 return -1;
2438
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002439 /* save_reduce() will memoize the object automatically. */
2440 status = save_reduce(self, reduce_value, obj);
2441 Py_DECREF(reduce_value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002442 return status;
2443 }
2444 else {
Antoine Pitrou91f43802019-05-26 17:10:09 +02002445 return _save_bytes_data(self, obj, PyBytes_AS_STRING(obj),
2446 PyBytes_GET_SIZE(obj));
2447 }
2448}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002449
Antoine Pitrou91f43802019-05-26 17:10:09 +02002450static int
2451_save_bytearray_data(PicklerObject *self, PyObject *obj, const char *data,
2452 Py_ssize_t size)
2453{
2454 assert(self->proto >= 5);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002455
Antoine Pitrou91f43802019-05-26 17:10:09 +02002456 char header[9];
2457 Py_ssize_t len;
2458
2459 if (size < 0)
2460 return -1;
2461
2462 header[0] = BYTEARRAY8;
2463 _write_size64(header + 1, size);
2464 len = 9;
2465
2466 if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
2467 return -1;
2468 }
2469
2470 if (memo_put(self, obj) < 0) {
2471 return -1;
2472 }
2473
2474 return 0;
2475}
2476
2477static int
2478save_bytearray(PicklerObject *self, PyObject *obj)
2479{
2480 if (self->proto < 5) {
2481 /* Older pickle protocols do not have an opcode for pickling
2482 * bytearrays. */
2483 PyObject *reduce_value = NULL;
2484 int status;
2485
2486 if (PyByteArray_GET_SIZE(obj) == 0) {
2487 reduce_value = Py_BuildValue("(O())",
2488 (PyObject *) &PyByteArray_Type);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002489 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002490 else {
Antoine Pitrou91f43802019-05-26 17:10:09 +02002491 PyObject *bytes_obj = PyBytes_FromObject(obj);
2492 if (bytes_obj != NULL) {
2493 reduce_value = Py_BuildValue("(O(O))",
2494 (PyObject *) &PyByteArray_Type,
2495 bytes_obj);
2496 Py_DECREF(bytes_obj);
2497 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002498 }
Antoine Pitrou91f43802019-05-26 17:10:09 +02002499 if (reduce_value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002500 return -1;
2501
Antoine Pitrou91f43802019-05-26 17:10:09 +02002502 /* save_reduce() will memoize the object automatically. */
2503 status = save_reduce(self, reduce_value, obj);
2504 Py_DECREF(reduce_value);
2505 return status;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002506 }
Antoine Pitrou91f43802019-05-26 17:10:09 +02002507 else {
2508 return _save_bytearray_data(self, obj, PyByteArray_AS_STRING(obj),
2509 PyByteArray_GET_SIZE(obj));
2510 }
2511}
2512
2513static int
2514save_picklebuffer(PicklerObject *self, PyObject *obj)
2515{
2516 if (self->proto < 5) {
2517 PickleState *st = _Pickle_GetGlobalState();
2518 PyErr_SetString(st->PicklingError,
2519 "PickleBuffer can only pickled with protocol >= 5");
2520 return -1;
2521 }
2522 const Py_buffer* view = PyPickleBuffer_GetBuffer(obj);
2523 if (view == NULL) {
2524 return -1;
2525 }
2526 if (view->suboffsets != NULL || !PyBuffer_IsContiguous(view, 'A')) {
2527 PickleState *st = _Pickle_GetGlobalState();
2528 PyErr_SetString(st->PicklingError,
2529 "PickleBuffer can not be pickled when "
2530 "pointing to a non-contiguous buffer");
2531 return -1;
2532 }
2533 int in_band = 1;
2534 if (self->buffer_callback != NULL) {
Petr Viktorinffd97532020-02-11 17:46:57 +01002535 PyObject *ret = PyObject_CallOneArg(self->buffer_callback, obj);
Antoine Pitrou91f43802019-05-26 17:10:09 +02002536 if (ret == NULL) {
2537 return -1;
2538 }
2539 in_band = PyObject_IsTrue(ret);
2540 Py_DECREF(ret);
2541 if (in_band == -1) {
2542 return -1;
2543 }
2544 }
2545 if (in_band) {
2546 /* Write data in-band */
2547 if (view->readonly) {
2548 return _save_bytes_data(self, obj, (const char*) view->buf,
2549 view->len);
2550 }
2551 else {
2552 return _save_bytearray_data(self, obj, (const char*) view->buf,
2553 view->len);
2554 }
2555 }
2556 else {
2557 /* Write data out-of-band */
2558 const char next_buffer_op = NEXT_BUFFER;
2559 if (_Pickler_Write(self, &next_buffer_op, 1) < 0) {
2560 return -1;
2561 }
2562 if (view->readonly) {
2563 const char readonly_buffer_op = READONLY_BUFFER;
2564 if (_Pickler_Write(self, &readonly_buffer_op, 1) < 0) {
2565 return -1;
2566 }
2567 }
2568 }
2569 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002570}
2571
2572/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
2573 backslash and newline characters to \uXXXX escapes. */
2574static PyObject *
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002575raw_unicode_escape(PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002576{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002577 char *p;
Victor Stinner049e5092014-08-17 22:20:00 +02002578 Py_ssize_t i, size;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03002579 const void *data;
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002580 unsigned int kind;
Victor Stinner358af132015-10-12 22:36:57 +02002581 _PyBytesWriter writer;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002582
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002583 if (PyUnicode_READY(obj))
2584 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002585
Victor Stinner358af132015-10-12 22:36:57 +02002586 _PyBytesWriter_Init(&writer);
2587
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002588 size = PyUnicode_GET_LENGTH(obj);
2589 data = PyUnicode_DATA(obj);
2590 kind = PyUnicode_KIND(obj);
Victor Stinner121aab42011-09-29 23:40:53 +02002591
Victor Stinner358af132015-10-12 22:36:57 +02002592 p = _PyBytesWriter_Alloc(&writer, size);
2593 if (p == NULL)
2594 goto error;
2595 writer.overallocate = 1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002596
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002597 for (i=0; i < size; i++) {
2598 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002599 /* Map 32-bit characters to '\Uxxxxxxxx' */
2600 if (ch >= 0x10000) {
Raymond Hettinger15f44ab2016-08-30 10:47:49 -07002601 /* -1: subtract 1 preallocated byte */
Victor Stinner358af132015-10-12 22:36:57 +02002602 p = _PyBytesWriter_Prepare(&writer, p, 10-1);
2603 if (p == NULL)
2604 goto error;
2605
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002606 *p++ = '\\';
2607 *p++ = 'U';
Victor Stinnerf5cff562011-10-14 02:13:11 +02002608 *p++ = Py_hexdigits[(ch >> 28) & 0xf];
2609 *p++ = Py_hexdigits[(ch >> 24) & 0xf];
2610 *p++ = Py_hexdigits[(ch >> 20) & 0xf];
2611 *p++ = Py_hexdigits[(ch >> 16) & 0xf];
2612 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2613 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2614 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2615 *p++ = Py_hexdigits[ch & 15];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002616 }
Victor Stinner358af132015-10-12 22:36:57 +02002617 /* Map 16-bit characters, '\\' and '\n' to '\uxxxx' */
Serhiy Storchaka38ab7d42019-05-31 11:29:39 +03002618 else if (ch >= 256 ||
2619 ch == '\\' || ch == 0 || ch == '\n' || ch == '\r' ||
2620 ch == 0x1a)
2621 {
Raymond Hettinger15f44ab2016-08-30 10:47:49 -07002622 /* -1: subtract 1 preallocated byte */
Victor Stinner358af132015-10-12 22:36:57 +02002623 p = _PyBytesWriter_Prepare(&writer, p, 6-1);
2624 if (p == NULL)
2625 goto error;
2626
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002627 *p++ = '\\';
2628 *p++ = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +02002629 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2630 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2631 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2632 *p++ = Py_hexdigits[ch & 15];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002633 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00002634 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002635 else
2636 *p++ = (char) ch;
2637 }
Victor Stinner358af132015-10-12 22:36:57 +02002638
2639 return _PyBytesWriter_Finish(&writer, p);
2640
2641error:
2642 _PyBytesWriter_Dealloc(&writer);
2643 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002644}
2645
2646static int
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002647write_unicode_binary(PicklerObject *self, PyObject *obj)
Antoine Pitrou299978d2013-04-07 17:38:11 +02002648{
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002649 char header[9];
2650 Py_ssize_t len;
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002651 PyObject *encoded = NULL;
2652 Py_ssize_t size;
2653 const char *data;
2654
2655 if (PyUnicode_READY(obj))
2656 return -1;
2657
2658 data = PyUnicode_AsUTF8AndSize(obj, &size);
2659 if (data == NULL) {
2660 /* Issue #8383: for strings with lone surrogates, fallback on the
2661 "surrogatepass" error handler. */
2662 PyErr_Clear();
2663 encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
2664 if (encoded == NULL)
2665 return -1;
2666
2667 data = PyBytes_AS_STRING(encoded);
2668 size = PyBytes_GET_SIZE(encoded);
2669 }
Antoine Pitrou299978d2013-04-07 17:38:11 +02002670
Victor Stinnerf13c46c2014-08-17 21:05:55 +02002671 assert(size >= 0);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002672 if (size <= 0xff && self->proto >= 4) {
2673 header[0] = SHORT_BINUNICODE;
2674 header[1] = (unsigned char)(size & 0xff);
2675 len = 2;
2676 }
Victor Stinnerf13c46c2014-08-17 21:05:55 +02002677 else if ((size_t)size <= 0xffffffffUL) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002678 header[0] = BINUNICODE;
2679 header[1] = (unsigned char)(size & 0xff);
2680 header[2] = (unsigned char)((size >> 8) & 0xff);
2681 header[3] = (unsigned char)((size >> 16) & 0xff);
2682 header[4] = (unsigned char)((size >> 24) & 0xff);
2683 len = 5;
2684 }
2685 else if (self->proto >= 4) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002686 header[0] = BINUNICODE8;
Alexandre Vassalotti1048fb52013-11-25 11:35:46 -08002687 _write_size64(header + 1, size);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002688 len = 9;
2689 }
2690 else {
Antoine Pitrou299978d2013-04-07 17:38:11 +02002691 PyErr_SetString(PyExc_OverflowError,
Antoine Pitrou91f43802019-05-26 17:10:09 +02002692 "serializing a string larger than 4 GiB "
2693 "requires pickle protocol 4 or higher");
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002694 Py_XDECREF(encoded);
Antoine Pitrou299978d2013-04-07 17:38:11 +02002695 return -1;
2696 }
Antoine Pitrou299978d2013-04-07 17:38:11 +02002697
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002698 if (_Pickler_write_bytes(self, header, len, data, size, encoded) < 0) {
2699 Py_XDECREF(encoded);
2700 return -1;
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01002701 }
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002702 Py_XDECREF(encoded);
Antoine Pitrou299978d2013-04-07 17:38:11 +02002703 return 0;
2704}
2705
2706static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002707save_unicode(PicklerObject *self, PyObject *obj)
2708{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002709 if (self->bin) {
Antoine Pitrou299978d2013-04-07 17:38:11 +02002710 if (write_unicode_binary(self, obj) < 0)
2711 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002712 }
2713 else {
Antoine Pitrou299978d2013-04-07 17:38:11 +02002714 PyObject *encoded;
2715 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002716 const char unicode_op = UNICODE;
2717
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002718 encoded = raw_unicode_escape(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002719 if (encoded == NULL)
Antoine Pitrou299978d2013-04-07 17:38:11 +02002720 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002721
Antoine Pitrou299978d2013-04-07 17:38:11 +02002722 if (_Pickler_Write(self, &unicode_op, 1) < 0) {
2723 Py_DECREF(encoded);
2724 return -1;
2725 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002726
2727 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrou299978d2013-04-07 17:38:11 +02002728 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) {
2729 Py_DECREF(encoded);
2730 return -1;
2731 }
2732 Py_DECREF(encoded);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002733
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002734 if (_Pickler_Write(self, "\n", 1) < 0)
Antoine Pitrou299978d2013-04-07 17:38:11 +02002735 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002736 }
2737 if (memo_put(self, obj) < 0)
Antoine Pitrou299978d2013-04-07 17:38:11 +02002738 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002739
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002740 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002741}
2742
2743/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
2744static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002745store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002746{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002747 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002748
2749 assert(PyTuple_Size(t) == len);
2750
2751 for (i = 0; i < len; i++) {
2752 PyObject *element = PyTuple_GET_ITEM(t, i);
2753
2754 if (element == NULL)
2755 return -1;
2756 if (save(self, element, 0) < 0)
2757 return -1;
2758 }
2759
2760 return 0;
2761}
2762
2763/* Tuples are ubiquitous in the pickle protocols, so many techniques are
2764 * used across protocols to minimize the space needed to pickle them.
2765 * Tuples are also the only builtin immutable type that can be recursive
2766 * (a tuple can be reached from itself), and that requires some subtle
2767 * magic so that it works in all cases. IOW, this is a long routine.
2768 */
2769static int
2770save_tuple(PicklerObject *self, PyObject *obj)
2771{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002772 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002773
2774 const char mark_op = MARK;
2775 const char tuple_op = TUPLE;
2776 const char pop_op = POP;
2777 const char pop_mark_op = POP_MARK;
2778 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
2779
2780 if ((len = PyTuple_Size(obj)) < 0)
2781 return -1;
2782
2783 if (len == 0) {
2784 char pdata[2];
2785
2786 if (self->proto) {
2787 pdata[0] = EMPTY_TUPLE;
2788 len = 1;
2789 }
2790 else {
2791 pdata[0] = MARK;
2792 pdata[1] = TUPLE;
2793 len = 2;
2794 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002795 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002796 return -1;
2797 return 0;
2798 }
2799
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002800 /* The tuple isn't in the memo now. If it shows up there after
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002801 * saving the tuple elements, the tuple must be recursive, in
2802 * which case we'll pop everything we put on the stack, and fetch
2803 * its value from the memo.
2804 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002805 if (len <= 3 && self->proto >= 2) {
2806 /* Use TUPLE{1,2,3} opcodes. */
2807 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002808 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002809
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002810 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002811 /* pop the len elements */
2812 for (i = 0; i < len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002813 if (_Pickler_Write(self, &pop_op, 1) < 0)
2814 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002815 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002816 if (memo_get(self, obj) < 0)
2817 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002818
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002819 return 0;
2820 }
2821 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002822 if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2823 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002824 }
2825 goto memoize;
2826 }
2827
2828 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2829 * Generate MARK e1 e2 ... TUPLE
2830 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002831 if (_Pickler_Write(self, &mark_op, 1) < 0)
2832 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002833
2834 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002835 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002836
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002837 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002838 /* pop the stack stuff we pushed */
2839 if (self->bin) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002840 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2841 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002842 }
2843 else {
2844 /* Note that we pop one more than len, to remove
2845 * the MARK too.
2846 */
2847 for (i = 0; i <= len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002848 if (_Pickler_Write(self, &pop_op, 1) < 0)
2849 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002850 }
2851 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002852 if (memo_get(self, obj) < 0)
2853 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002854
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002855 return 0;
2856 }
2857 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002858 if (_Pickler_Write(self, &tuple_op, 1) < 0)
2859 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002860 }
2861
2862 memoize:
2863 if (memo_put(self, obj) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002864 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002865
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002866 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002867}
2868
2869/* iter is an iterator giving items, and we batch up chunks of
2870 * MARK item item ... item APPENDS
2871 * opcode sequences. Calling code should have arranged to first create an
2872 * empty list, or list-like object, for the APPENDS to operate on.
2873 * Returns 0 on success, <0 on error.
2874 */
2875static int
2876batch_list(PicklerObject *self, PyObject *iter)
2877{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002878 PyObject *obj = NULL;
2879 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002880 int i, n;
2881
2882 const char mark_op = MARK;
2883 const char append_op = APPEND;
2884 const char appends_op = APPENDS;
2885
2886 assert(iter != NULL);
2887
2888 /* XXX: I think this function could be made faster by avoiding the
2889 iterator interface and fetching objects directly from list using
2890 PyList_GET_ITEM.
2891 */
2892
2893 if (self->proto == 0) {
2894 /* APPENDS isn't available; do one at a time. */
2895 for (;;) {
2896 obj = PyIter_Next(iter);
2897 if (obj == NULL) {
2898 if (PyErr_Occurred())
2899 return -1;
2900 break;
2901 }
2902 i = save(self, obj, 0);
2903 Py_DECREF(obj);
2904 if (i < 0)
2905 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002906 if (_Pickler_Write(self, &append_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002907 return -1;
2908 }
2909 return 0;
2910 }
2911
2912 /* proto > 0: write in batches of BATCHSIZE. */
2913 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002914 /* Get first item */
2915 firstitem = PyIter_Next(iter);
2916 if (firstitem == NULL) {
2917 if (PyErr_Occurred())
2918 goto error;
2919
2920 /* nothing more to add */
2921 break;
2922 }
2923
2924 /* Try to get a second item */
2925 obj = PyIter_Next(iter);
2926 if (obj == NULL) {
2927 if (PyErr_Occurred())
2928 goto error;
2929
2930 /* Only one item to write */
2931 if (save(self, firstitem, 0) < 0)
2932 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002933 if (_Pickler_Write(self, &append_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002934 goto error;
2935 Py_CLEAR(firstitem);
2936 break;
2937 }
2938
2939 /* More than one item to write */
2940
2941 /* Pump out MARK, items, APPENDS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002942 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002943 goto error;
2944
2945 if (save(self, firstitem, 0) < 0)
2946 goto error;
2947 Py_CLEAR(firstitem);
2948 n = 1;
2949
2950 /* Fetch and save up to BATCHSIZE items */
2951 while (obj) {
2952 if (save(self, obj, 0) < 0)
2953 goto error;
2954 Py_CLEAR(obj);
2955 n += 1;
2956
2957 if (n == BATCHSIZE)
2958 break;
2959
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002960 obj = PyIter_Next(iter);
2961 if (obj == NULL) {
2962 if (PyErr_Occurred())
2963 goto error;
2964 break;
2965 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002966 }
2967
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002968 if (_Pickler_Write(self, &appends_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002969 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002970
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002971 } while (n == BATCHSIZE);
2972 return 0;
2973
2974 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002975 Py_XDECREF(firstitem);
2976 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002977 return -1;
2978}
2979
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002980/* This is a variant of batch_list() above, specialized for lists (with no
2981 * support for list subclasses). Like batch_list(), we batch up chunks of
2982 * MARK item item ... item APPENDS
2983 * opcode sequences. Calling code should have arranged to first create an
2984 * empty list, or list-like object, for the APPENDS to operate on.
2985 * Returns 0 on success, -1 on error.
2986 *
2987 * This version is considerably faster than batch_list(), if less general.
2988 *
2989 * Note that this only works for protocols > 0.
2990 */
2991static int
2992batch_list_exact(PicklerObject *self, PyObject *obj)
2993{
2994 PyObject *item = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002995 Py_ssize_t this_batch, total;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002996
2997 const char append_op = APPEND;
2998 const char appends_op = APPENDS;
2999 const char mark_op = MARK;
3000
3001 assert(obj != NULL);
3002 assert(self->proto > 0);
3003 assert(PyList_CheckExact(obj));
3004
3005 if (PyList_GET_SIZE(obj) == 1) {
3006 item = PyList_GET_ITEM(obj, 0);
3007 if (save(self, item, 0) < 0)
3008 return -1;
3009 if (_Pickler_Write(self, &append_op, 1) < 0)
3010 return -1;
3011 return 0;
3012 }
3013
3014 /* Write in batches of BATCHSIZE. */
3015 total = 0;
3016 do {
3017 this_batch = 0;
3018 if (_Pickler_Write(self, &mark_op, 1) < 0)
3019 return -1;
3020 while (total < PyList_GET_SIZE(obj)) {
3021 item = PyList_GET_ITEM(obj, total);
3022 if (save(self, item, 0) < 0)
3023 return -1;
3024 total++;
3025 if (++this_batch == BATCHSIZE)
3026 break;
3027 }
3028 if (_Pickler_Write(self, &appends_op, 1) < 0)
3029 return -1;
3030
3031 } while (total < PyList_GET_SIZE(obj));
3032
3033 return 0;
3034}
3035
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003036static int
3037save_list(PicklerObject *self, PyObject *obj)
3038{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003039 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003040 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003041 int status = 0;
3042
3043 if (self->fast && !fast_save_enter(self, obj))
3044 goto error;
3045
3046 /* Create an empty list. */
3047 if (self->bin) {
3048 header[0] = EMPTY_LIST;
3049 len = 1;
3050 }
3051 else {
3052 header[0] = MARK;
3053 header[1] = LIST;
3054 len = 2;
3055 }
3056
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003057 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003058 goto error;
3059
3060 /* Get list length, and bow out early if empty. */
3061 if ((len = PyList_Size(obj)) < 0)
3062 goto error;
3063
3064 if (memo_put(self, obj) < 0)
3065 goto error;
3066
3067 if (len != 0) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003068 /* Materialize the list elements. */
3069 if (PyList_CheckExact(obj) && self->proto > 0) {
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003070 if (Py_EnterRecursiveCall(" while pickling an object"))
3071 goto error;
3072 status = batch_list_exact(self, obj);
3073 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003074 } else {
3075 PyObject *iter = PyObject_GetIter(obj);
3076 if (iter == NULL)
3077 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003078
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003079 if (Py_EnterRecursiveCall(" while pickling an object")) {
3080 Py_DECREF(iter);
3081 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003082 }
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003083 status = batch_list(self, iter);
3084 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003085 Py_DECREF(iter);
3086 }
3087 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003088 if (0) {
3089 error:
3090 status = -1;
3091 }
3092
3093 if (self->fast && !fast_save_leave(self, obj))
3094 status = -1;
3095
3096 return status;
3097}
3098
3099/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
3100 * MARK key value ... key value SETITEMS
3101 * opcode sequences. Calling code should have arranged to first create an
3102 * empty dict, or dict-like object, for the SETITEMS to operate on.
3103 * Returns 0 on success, <0 on error.
3104 *
3105 * This is very much like batch_list(). The difference between saving
3106 * elements directly, and picking apart two-tuples, is so long-winded at
3107 * the C level, though, that attempts to combine these routines were too
3108 * ugly to bear.
3109 */
3110static int
3111batch_dict(PicklerObject *self, PyObject *iter)
3112{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00003113 PyObject *obj = NULL;
3114 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003115 int i, n;
3116
3117 const char mark_op = MARK;
3118 const char setitem_op = SETITEM;
3119 const char setitems_op = SETITEMS;
3120
3121 assert(iter != NULL);
3122
3123 if (self->proto == 0) {
3124 /* SETITEMS isn't available; do one at a time. */
3125 for (;;) {
3126 obj = PyIter_Next(iter);
3127 if (obj == NULL) {
3128 if (PyErr_Occurred())
3129 return -1;
3130 break;
3131 }
3132 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
3133 PyErr_SetString(PyExc_TypeError, "dict items "
3134 "iterator must return 2-tuples");
3135 return -1;
3136 }
3137 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
3138 if (i >= 0)
3139 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
3140 Py_DECREF(obj);
3141 if (i < 0)
3142 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003143 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003144 return -1;
3145 }
3146 return 0;
3147 }
3148
3149 /* proto > 0: write in batches of BATCHSIZE. */
3150 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00003151 /* Get first item */
3152 firstitem = PyIter_Next(iter);
3153 if (firstitem == NULL) {
3154 if (PyErr_Occurred())
3155 goto error;
3156
3157 /* nothing more to add */
3158 break;
3159 }
3160 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
3161 PyErr_SetString(PyExc_TypeError, "dict items "
3162 "iterator must return 2-tuples");
3163 goto error;
3164 }
3165
3166 /* Try to get a second item */
3167 obj = PyIter_Next(iter);
3168 if (obj == NULL) {
3169 if (PyErr_Occurred())
3170 goto error;
3171
3172 /* Only one item to write */
3173 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
3174 goto error;
3175 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
3176 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003177 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00003178 goto error;
3179 Py_CLEAR(firstitem);
3180 break;
3181 }
3182
3183 /* More than one item to write */
3184
3185 /* Pump out MARK, items, SETITEMS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003186 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00003187 goto error;
3188
3189 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
3190 goto error;
3191 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
3192 goto error;
3193 Py_CLEAR(firstitem);
3194 n = 1;
3195
3196 /* Fetch and save up to BATCHSIZE items */
3197 while (obj) {
3198 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
3199 PyErr_SetString(PyExc_TypeError, "dict items "
3200 "iterator must return 2-tuples");
3201 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003202 }
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00003203 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
3204 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
3205 goto error;
3206 Py_CLEAR(obj);
3207 n += 1;
3208
3209 if (n == BATCHSIZE)
3210 break;
3211
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003212 obj = PyIter_Next(iter);
3213 if (obj == NULL) {
3214 if (PyErr_Occurred())
3215 goto error;
3216 break;
3217 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003218 }
3219
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003220 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00003221 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003222
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003223 } while (n == BATCHSIZE);
3224 return 0;
3225
3226 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00003227 Py_XDECREF(firstitem);
3228 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003229 return -1;
3230}
3231
Collin Winter5c9b02d2009-05-25 05:43:30 +00003232/* This is a variant of batch_dict() above that specializes for dicts, with no
3233 * support for dict subclasses. Like batch_dict(), we batch up chunks of
3234 * MARK key value ... key value SETITEMS
3235 * opcode sequences. Calling code should have arranged to first create an
3236 * empty dict, or dict-like object, for the SETITEMS to operate on.
3237 * Returns 0 on success, -1 on error.
3238 *
3239 * Note that this currently doesn't work for protocol 0.
3240 */
3241static int
3242batch_dict_exact(PicklerObject *self, PyObject *obj)
3243{
3244 PyObject *key = NULL, *value = NULL;
3245 int i;
3246 Py_ssize_t dict_size, ppos = 0;
3247
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00003248 const char mark_op = MARK;
3249 const char setitem_op = SETITEM;
3250 const char setitems_op = SETITEMS;
Collin Winter5c9b02d2009-05-25 05:43:30 +00003251
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +02003252 assert(obj != NULL && PyDict_CheckExact(obj));
Collin Winter5c9b02d2009-05-25 05:43:30 +00003253 assert(self->proto > 0);
3254
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +02003255 dict_size = PyDict_GET_SIZE(obj);
Collin Winter5c9b02d2009-05-25 05:43:30 +00003256
3257 /* Special-case len(d) == 1 to save space. */
3258 if (dict_size == 1) {
3259 PyDict_Next(obj, &ppos, &key, &value);
3260 if (save(self, key, 0) < 0)
3261 return -1;
3262 if (save(self, value, 0) < 0)
3263 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003264 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00003265 return -1;
3266 return 0;
3267 }
3268
3269 /* Write in batches of BATCHSIZE. */
3270 do {
3271 i = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003272 if (_Pickler_Write(self, &mark_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00003273 return -1;
3274 while (PyDict_Next(obj, &ppos, &key, &value)) {
3275 if (save(self, key, 0) < 0)
3276 return -1;
3277 if (save(self, value, 0) < 0)
3278 return -1;
3279 if (++i == BATCHSIZE)
3280 break;
3281 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003282 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00003283 return -1;
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +02003284 if (PyDict_GET_SIZE(obj) != dict_size) {
Collin Winter5c9b02d2009-05-25 05:43:30 +00003285 PyErr_Format(
3286 PyExc_RuntimeError,
3287 "dictionary changed size during iteration");
3288 return -1;
3289 }
3290
3291 } while (i == BATCHSIZE);
3292 return 0;
3293}
3294
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003295static int
3296save_dict(PicklerObject *self, PyObject *obj)
3297{
3298 PyObject *items, *iter;
3299 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003300 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003301 int status = 0;
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +02003302 assert(PyDict_Check(obj));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003303
3304 if (self->fast && !fast_save_enter(self, obj))
3305 goto error;
3306
3307 /* Create an empty dict. */
3308 if (self->bin) {
3309 header[0] = EMPTY_DICT;
3310 len = 1;
3311 }
3312 else {
3313 header[0] = MARK;
3314 header[1] = DICT;
3315 len = 2;
3316 }
3317
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003318 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003319 goto error;
3320
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003321 if (memo_put(self, obj) < 0)
3322 goto error;
3323
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +02003324 if (PyDict_GET_SIZE(obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003325 /* Save the dict items. */
Collin Winter5c9b02d2009-05-25 05:43:30 +00003326 if (PyDict_CheckExact(obj) && self->proto > 0) {
3327 /* We can take certain shortcuts if we know this is a dict and
3328 not a dict subclass. */
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003329 if (Py_EnterRecursiveCall(" while pickling an object"))
3330 goto error;
3331 status = batch_dict_exact(self, obj);
3332 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00003333 } else {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02003334 _Py_IDENTIFIER(items);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02003335
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003336 items = _PyObject_CallMethodIdNoArgs(obj, &PyId_items);
Collin Winter5c9b02d2009-05-25 05:43:30 +00003337 if (items == NULL)
3338 goto error;
3339 iter = PyObject_GetIter(items);
3340 Py_DECREF(items);
3341 if (iter == NULL)
3342 goto error;
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003343 if (Py_EnterRecursiveCall(" while pickling an object")) {
3344 Py_DECREF(iter);
3345 goto error;
3346 }
Collin Winter5c9b02d2009-05-25 05:43:30 +00003347 status = batch_dict(self, iter);
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003348 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00003349 Py_DECREF(iter);
3350 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003351 }
3352
3353 if (0) {
3354 error:
3355 status = -1;
3356 }
3357
3358 if (self->fast && !fast_save_leave(self, obj))
3359 status = -1;
3360
3361 return status;
3362}
3363
3364static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003365save_set(PicklerObject *self, PyObject *obj)
3366{
3367 PyObject *item;
3368 int i;
3369 Py_ssize_t set_size, ppos = 0;
3370 Py_hash_t hash;
3371
3372 const char empty_set_op = EMPTY_SET;
3373 const char mark_op = MARK;
3374 const char additems_op = ADDITEMS;
3375
3376 if (self->proto < 4) {
3377 PyObject *items;
3378 PyObject *reduce_value;
3379 int status;
3380
3381 items = PySequence_List(obj);
3382 if (items == NULL) {
3383 return -1;
3384 }
3385 reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PySet_Type, items);
3386 Py_DECREF(items);
3387 if (reduce_value == NULL) {
3388 return -1;
3389 }
3390 /* save_reduce() will memoize the object automatically. */
3391 status = save_reduce(self, reduce_value, obj);
3392 Py_DECREF(reduce_value);
3393 return status;
3394 }
3395
3396 if (_Pickler_Write(self, &empty_set_op, 1) < 0)
3397 return -1;
3398
3399 if (memo_put(self, obj) < 0)
3400 return -1;
3401
3402 set_size = PySet_GET_SIZE(obj);
3403 if (set_size == 0)
3404 return 0; /* nothing to do */
3405
3406 /* Write in batches of BATCHSIZE. */
3407 do {
3408 i = 0;
3409 if (_Pickler_Write(self, &mark_op, 1) < 0)
3410 return -1;
3411 while (_PySet_NextEntry(obj, &ppos, &item, &hash)) {
3412 if (save(self, item, 0) < 0)
3413 return -1;
3414 if (++i == BATCHSIZE)
3415 break;
3416 }
3417 if (_Pickler_Write(self, &additems_op, 1) < 0)
3418 return -1;
3419 if (PySet_GET_SIZE(obj) != set_size) {
3420 PyErr_Format(
3421 PyExc_RuntimeError,
3422 "set changed size during iteration");
3423 return -1;
3424 }
3425 } while (i == BATCHSIZE);
3426
3427 return 0;
3428}
3429
3430static int
3431save_frozenset(PicklerObject *self, PyObject *obj)
3432{
3433 PyObject *iter;
3434
3435 const char mark_op = MARK;
3436 const char frozenset_op = FROZENSET;
3437
3438 if (self->fast && !fast_save_enter(self, obj))
3439 return -1;
3440
3441 if (self->proto < 4) {
3442 PyObject *items;
3443 PyObject *reduce_value;
3444 int status;
3445
3446 items = PySequence_List(obj);
3447 if (items == NULL) {
3448 return -1;
3449 }
3450 reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PyFrozenSet_Type,
3451 items);
3452 Py_DECREF(items);
3453 if (reduce_value == NULL) {
3454 return -1;
3455 }
3456 /* save_reduce() will memoize the object automatically. */
3457 status = save_reduce(self, reduce_value, obj);
3458 Py_DECREF(reduce_value);
3459 return status;
3460 }
3461
3462 if (_Pickler_Write(self, &mark_op, 1) < 0)
3463 return -1;
3464
3465 iter = PyObject_GetIter(obj);
Christian Heimesb3d3ee42013-11-23 21:01:40 +01003466 if (iter == NULL) {
Christian Heimes74d8d632013-11-23 21:05:31 +01003467 return -1;
Christian Heimesb3d3ee42013-11-23 21:01:40 +01003468 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003469 for (;;) {
3470 PyObject *item;
3471
3472 item = PyIter_Next(iter);
3473 if (item == NULL) {
3474 if (PyErr_Occurred()) {
3475 Py_DECREF(iter);
3476 return -1;
3477 }
3478 break;
3479 }
3480 if (save(self, item, 0) < 0) {
3481 Py_DECREF(item);
3482 Py_DECREF(iter);
3483 return -1;
3484 }
3485 Py_DECREF(item);
3486 }
3487 Py_DECREF(iter);
3488
3489 /* If the object is already in the memo, this means it is
3490 recursive. In this case, throw away everything we put on the
3491 stack, and fetch the object back from the memo. */
3492 if (PyMemoTable_Get(self->memo, obj)) {
3493 const char pop_mark_op = POP_MARK;
3494
3495 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
3496 return -1;
3497 if (memo_get(self, obj) < 0)
3498 return -1;
3499 return 0;
3500 }
3501
3502 if (_Pickler_Write(self, &frozenset_op, 1) < 0)
3503 return -1;
3504 if (memo_put(self, obj) < 0)
3505 return -1;
3506
3507 return 0;
3508}
3509
3510static int
3511fix_imports(PyObject **module_name, PyObject **global_name)
3512{
3513 PyObject *key;
3514 PyObject *item;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003515 PickleState *st = _Pickle_GetGlobalState();
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003516
3517 key = PyTuple_Pack(2, *module_name, *global_name);
3518 if (key == NULL)
3519 return -1;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003520 item = PyDict_GetItemWithError(st->name_mapping_3to2, key);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003521 Py_DECREF(key);
3522 if (item) {
3523 PyObject *fixed_module_name;
3524 PyObject *fixed_global_name;
3525
3526 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
3527 PyErr_Format(PyExc_RuntimeError,
3528 "_compat_pickle.REVERSE_NAME_MAPPING values "
3529 "should be 2-tuples, not %.200s",
3530 Py_TYPE(item)->tp_name);
3531 return -1;
3532 }
3533 fixed_module_name = PyTuple_GET_ITEM(item, 0);
3534 fixed_global_name = PyTuple_GET_ITEM(item, 1);
3535 if (!PyUnicode_Check(fixed_module_name) ||
3536 !PyUnicode_Check(fixed_global_name)) {
3537 PyErr_Format(PyExc_RuntimeError,
3538 "_compat_pickle.REVERSE_NAME_MAPPING values "
3539 "should be pairs of str, not (%.200s, %.200s)",
3540 Py_TYPE(fixed_module_name)->tp_name,
3541 Py_TYPE(fixed_global_name)->tp_name);
3542 return -1;
3543 }
3544
3545 Py_CLEAR(*module_name);
3546 Py_CLEAR(*global_name);
3547 Py_INCREF(fixed_module_name);
3548 Py_INCREF(fixed_global_name);
3549 *module_name = fixed_module_name;
3550 *global_name = fixed_global_name;
Serhiy Storchakabfe18242015-03-31 13:12:37 +03003551 return 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003552 }
3553 else if (PyErr_Occurred()) {
3554 return -1;
3555 }
3556
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003557 item = PyDict_GetItemWithError(st->import_mapping_3to2, *module_name);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003558 if (item) {
3559 if (!PyUnicode_Check(item)) {
3560 PyErr_Format(PyExc_RuntimeError,
3561 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
3562 "should be strings, not %.200s",
3563 Py_TYPE(item)->tp_name);
3564 return -1;
3565 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003566 Py_INCREF(item);
Serhiy Storchaka48842712016-04-06 09:45:48 +03003567 Py_XSETREF(*module_name, item);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003568 }
3569 else if (PyErr_Occurred()) {
3570 return -1;
3571 }
3572
3573 return 0;
3574}
3575
3576static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003577save_global(PicklerObject *self, PyObject *obj, PyObject *name)
3578{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003579 PyObject *global_name = NULL;
3580 PyObject *module_name = NULL;
3581 PyObject *module = NULL;
Serhiy Storchaka58e41342015-03-31 14:07:24 +03003582 PyObject *parent = NULL;
3583 PyObject *dotted_path = NULL;
3584 PyObject *lastname = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003585 PyObject *cls;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003586 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003587 int status = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003588 _Py_IDENTIFIER(__name__);
3589 _Py_IDENTIFIER(__qualname__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003590
3591 const char global_op = GLOBAL;
3592
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003593 if (name) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003594 Py_INCREF(name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003595 global_name = name;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003596 }
3597 else {
Serhiy Storchakaf320be72018-01-25 10:49:40 +02003598 if (_PyObject_LookupAttrId(obj, &PyId___qualname__, &global_name) < 0)
3599 goto error;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003600 if (global_name == NULL) {
3601 global_name = _PyObject_GetAttrId(obj, &PyId___name__);
3602 if (global_name == NULL)
3603 goto error;
3604 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003605 }
3606
Serhiy Storchaka58e41342015-03-31 14:07:24 +03003607 dotted_path = get_dotted_path(module, global_name);
3608 if (dotted_path == NULL)
3609 goto error;
3610 module_name = whichmodule(obj, dotted_path);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003611 if (module_name == NULL)
3612 goto error;
3613
3614 /* XXX: Change to use the import C API directly with level=0 to disallow
3615 relative imports.
3616
3617 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
3618 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
3619 custom import functions (IMHO, this would be a nice security
3620 feature). The import C API would need to be extended to support the
3621 extra parameters of __import__ to fix that. */
3622 module = PyImport_Import(module_name);
3623 if (module == NULL) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003624 PyErr_Format(st->PicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003625 "Can't pickle %R: import of module %R failed",
3626 obj, module_name);
3627 goto error;
3628 }
Serhiy Storchaka58e41342015-03-31 14:07:24 +03003629 lastname = PyList_GET_ITEM(dotted_path, PyList_GET_SIZE(dotted_path)-1);
3630 Py_INCREF(lastname);
3631 cls = get_deep_attribute(module, dotted_path, &parent);
3632 Py_CLEAR(dotted_path);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003633 if (cls == NULL) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003634 PyErr_Format(st->PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003635 "Can't pickle %R: attribute lookup %S on %S failed",
3636 obj, global_name, module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003637 goto error;
3638 }
3639 if (cls != obj) {
3640 Py_DECREF(cls);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003641 PyErr_Format(st->PicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003642 "Can't pickle %R: it's not the same object as %S.%S",
3643 obj, module_name, global_name);
3644 goto error;
3645 }
3646 Py_DECREF(cls);
3647
3648 if (self->proto >= 2) {
3649 /* See whether this is in the extension registry, and if
3650 * so generate an EXT opcode.
3651 */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003652 PyObject *extension_key;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003653 PyObject *code_obj; /* extension code as Python object */
3654 long code; /* extension code as C value */
3655 char pdata[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003656 Py_ssize_t n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003657
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003658 extension_key = PyTuple_Pack(2, module_name, global_name);
3659 if (extension_key == NULL) {
3660 goto error;
3661 }
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08003662 code_obj = PyDict_GetItemWithError(st->extension_registry,
3663 extension_key);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003664 Py_DECREF(extension_key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003665 /* The object is not registered in the extension registry.
3666 This is the most likely code path. */
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08003667 if (code_obj == NULL) {
3668 if (PyErr_Occurred()) {
3669 goto error;
3670 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003671 goto gen_global;
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08003672 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003673
3674 /* XXX: pickle.py doesn't check neither the type, nor the range
3675 of the value returned by the extension_registry. It should for
3676 consistency. */
3677
3678 /* Verify code_obj has the right type and value. */
3679 if (!PyLong_Check(code_obj)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003680 PyErr_Format(st->PicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003681 "Can't pickle %R: extension code %R isn't an integer",
3682 obj, code_obj);
3683 goto error;
3684 }
3685 code = PyLong_AS_LONG(code_obj);
3686 if (code <= 0 || code > 0x7fffffffL) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003687 if (!PyErr_Occurred())
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003688 PyErr_Format(st->PicklingError, "Can't pickle %R: extension "
3689 "code %ld is out of range", obj, code);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003690 goto error;
3691 }
3692
3693 /* Generate an EXT opcode. */
3694 if (code <= 0xff) {
3695 pdata[0] = EXT1;
3696 pdata[1] = (unsigned char)code;
3697 n = 2;
3698 }
3699 else if (code <= 0xffff) {
3700 pdata[0] = EXT2;
3701 pdata[1] = (unsigned char)(code & 0xff);
3702 pdata[2] = (unsigned char)((code >> 8) & 0xff);
3703 n = 3;
3704 }
3705 else {
3706 pdata[0] = EXT4;
3707 pdata[1] = (unsigned char)(code & 0xff);
3708 pdata[2] = (unsigned char)((code >> 8) & 0xff);
3709 pdata[3] = (unsigned char)((code >> 16) & 0xff);
3710 pdata[4] = (unsigned char)((code >> 24) & 0xff);
3711 n = 5;
3712 }
3713
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003714 if (_Pickler_Write(self, pdata, n) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003715 goto error;
3716 }
3717 else {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003718 gen_global:
Serhiy Storchaka58e41342015-03-31 14:07:24 +03003719 if (parent == module) {
3720 Py_INCREF(lastname);
3721 Py_DECREF(global_name);
3722 global_name = lastname;
3723 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003724 if (self->proto >= 4) {
3725 const char stack_global_op = STACK_GLOBAL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003726
Christian Heimese8b1ba12013-11-23 21:13:39 +01003727 if (save(self, module_name, 0) < 0)
3728 goto error;
3729 if (save(self, global_name, 0) < 0)
3730 goto error;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003731
3732 if (_Pickler_Write(self, &stack_global_op, 1) < 0)
3733 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003734 }
Serhiy Storchaka58e41342015-03-31 14:07:24 +03003735 else if (parent != module) {
3736 PickleState *st = _Pickle_GetGlobalState();
3737 PyObject *reduce_value = Py_BuildValue("(O(OO))",
3738 st->getattr, parent, lastname);
Alexey Izbyshevf8c06b02018-08-22 07:51:25 +03003739 if (reduce_value == NULL)
3740 goto error;
Serhiy Storchaka58e41342015-03-31 14:07:24 +03003741 status = save_reduce(self, reduce_value, NULL);
3742 Py_DECREF(reduce_value);
3743 if (status < 0)
3744 goto error;
3745 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003746 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003747 /* Generate a normal global opcode if we are using a pickle
3748 protocol < 4, or if the object is not registered in the
3749 extension registry. */
3750 PyObject *encoded;
3751 PyObject *(*unicode_encoder)(PyObject *);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003752
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003753 if (_Pickler_Write(self, &global_op, 1) < 0)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003754 goto error;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003755
3756 /* For protocol < 3 and if the user didn't request against doing
3757 so, we convert module names to the old 2.x module names. */
3758 if (self->proto < 3 && self->fix_imports) {
3759 if (fix_imports(&module_name, &global_name) < 0) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003760 goto error;
3761 }
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003762 }
3763
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003764 /* Since Python 3.0 now supports non-ASCII identifiers, we encode
3765 both the module name and the global name using UTF-8. We do so
3766 only when we are using the pickle protocol newer than version
3767 3. This is to ensure compatibility with older Unpickler running
3768 on Python 2.x. */
3769 if (self->proto == 3) {
3770 unicode_encoder = PyUnicode_AsUTF8String;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003771 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003772 else {
3773 unicode_encoder = PyUnicode_AsASCIIString;
3774 }
3775 encoded = unicode_encoder(module_name);
3776 if (encoded == NULL) {
3777 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003778 PyErr_Format(st->PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003779 "can't pickle module identifier '%S' using "
3780 "pickle protocol %i",
3781 module_name, self->proto);
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003782 goto error;
3783 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003784 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3785 PyBytes_GET_SIZE(encoded)) < 0) {
3786 Py_DECREF(encoded);
3787 goto error;
3788 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003789 Py_DECREF(encoded);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003790 if(_Pickler_Write(self, "\n", 1) < 0)
3791 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003792
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003793 /* Save the name of the module. */
3794 encoded = unicode_encoder(global_name);
3795 if (encoded == NULL) {
3796 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003797 PyErr_Format(st->PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003798 "can't pickle global identifier '%S' using "
3799 "pickle protocol %i",
3800 global_name, self->proto);
3801 goto error;
3802 }
3803 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3804 PyBytes_GET_SIZE(encoded)) < 0) {
3805 Py_DECREF(encoded);
3806 goto error;
3807 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003808 Py_DECREF(encoded);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003809 if (_Pickler_Write(self, "\n", 1) < 0)
3810 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003811 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003812 /* Memoize the object. */
3813 if (memo_put(self, obj) < 0)
3814 goto error;
3815 }
3816
3817 if (0) {
3818 error:
3819 status = -1;
3820 }
3821 Py_XDECREF(module_name);
3822 Py_XDECREF(global_name);
3823 Py_XDECREF(module);
Serhiy Storchaka58e41342015-03-31 14:07:24 +03003824 Py_XDECREF(parent);
3825 Py_XDECREF(dotted_path);
3826 Py_XDECREF(lastname);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003827
3828 return status;
3829}
3830
3831static int
Alexandre Vassalotti19b6fa62013-11-30 16:06:39 -08003832save_singleton_type(PicklerObject *self, PyObject *obj, PyObject *singleton)
3833{
3834 PyObject *reduce_value;
3835 int status;
3836
3837 reduce_value = Py_BuildValue("O(O)", &PyType_Type, singleton);
3838 if (reduce_value == NULL) {
3839 return -1;
3840 }
3841 status = save_reduce(self, reduce_value, obj);
3842 Py_DECREF(reduce_value);
3843 return status;
3844}
3845
3846static int
3847save_type(PicklerObject *self, PyObject *obj)
3848{
Alexandre Vassalotti65846c62013-11-30 17:55:48 -08003849 if (obj == (PyObject *)&_PyNone_Type) {
Alexandre Vassalotti19b6fa62013-11-30 16:06:39 -08003850 return save_singleton_type(self, obj, Py_None);
3851 }
3852 else if (obj == (PyObject *)&PyEllipsis_Type) {
3853 return save_singleton_type(self, obj, Py_Ellipsis);
3854 }
Alexandre Vassalotti65846c62013-11-30 17:55:48 -08003855 else if (obj == (PyObject *)&_PyNotImplemented_Type) {
Alexandre Vassalotti19b6fa62013-11-30 16:06:39 -08003856 return save_singleton_type(self, obj, Py_NotImplemented);
3857 }
3858 return save_global(self, obj, NULL);
3859}
3860
3861static int
Serhiy Storchaka986375e2017-11-30 22:48:31 +02003862save_pers(PicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003863{
3864 PyObject *pid = NULL;
3865 int status = 0;
3866
3867 const char persid_op = PERSID;
3868 const char binpersid_op = BINPERSID;
3869
Serhiy Storchaka986375e2017-11-30 22:48:31 +02003870 pid = call_method(self->pers_func, self->pers_func_self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003871 if (pid == NULL)
3872 return -1;
3873
3874 if (pid != Py_None) {
3875 if (self->bin) {
3876 if (save(self, pid, 1) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003877 _Pickler_Write(self, &binpersid_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003878 goto error;
3879 }
3880 else {
Serhiy Storchakadec25af2016-07-17 11:24:17 +03003881 PyObject *pid_str;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003882
3883 pid_str = PyObject_Str(pid);
3884 if (pid_str == NULL)
3885 goto error;
3886
Serhiy Storchakadec25af2016-07-17 11:24:17 +03003887 /* XXX: Should it check whether the pid contains embedded
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003888 newlines? */
Serhiy Storchakadec25af2016-07-17 11:24:17 +03003889 if (!PyUnicode_IS_ASCII(pid_str)) {
3890 PyErr_SetString(_Pickle_GetGlobalState()->PicklingError,
3891 "persistent IDs in protocol 0 must be "
3892 "ASCII strings");
3893 Py_DECREF(pid_str);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003894 goto error;
Serhiy Storchakadec25af2016-07-17 11:24:17 +03003895 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003896
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003897 if (_Pickler_Write(self, &persid_op, 1) < 0 ||
Serhiy Storchakadec25af2016-07-17 11:24:17 +03003898 _Pickler_Write(self, PyUnicode_DATA(pid_str),
3899 PyUnicode_GET_LENGTH(pid_str)) < 0 ||
3900 _Pickler_Write(self, "\n", 1) < 0) {
3901 Py_DECREF(pid_str);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003902 goto error;
Serhiy Storchakadec25af2016-07-17 11:24:17 +03003903 }
3904 Py_DECREF(pid_str);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003905 }
3906 status = 1;
3907 }
3908
3909 if (0) {
3910 error:
3911 status = -1;
3912 }
3913 Py_XDECREF(pid);
3914
3915 return status;
3916}
3917
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003918static PyObject *
3919get_class(PyObject *obj)
3920{
3921 PyObject *cls;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003922 _Py_IDENTIFIER(__class__);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003923
Serhiy Storchakaf320be72018-01-25 10:49:40 +02003924 if (_PyObject_LookupAttrId(obj, &PyId___class__, &cls) == 0) {
3925 cls = (PyObject *) Py_TYPE(obj);
3926 Py_INCREF(cls);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003927 }
3928 return cls;
3929}
3930
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003931/* We're saving obj, and args is the 2-thru-5 tuple returned by the
3932 * appropriate __reduce__ method for obj.
3933 */
3934static int
3935save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
3936{
3937 PyObject *callable;
3938 PyObject *argtup;
3939 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003940 PyObject *listitems = Py_None;
3941 PyObject *dictitems = Py_None;
Pierre Glaser65d98d02019-05-08 21:40:25 +02003942 PyObject *state_setter = Py_None;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003943 PickleState *st = _Pickle_GetGlobalState();
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00003944 Py_ssize_t size;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003945 int use_newobj = 0, use_newobj_ex = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003946
3947 const char reduce_op = REDUCE;
3948 const char build_op = BUILD;
3949 const char newobj_op = NEWOBJ;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003950 const char newobj_ex_op = NEWOBJ_EX;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003951
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00003952 size = PyTuple_Size(args);
Pierre Glaser65d98d02019-05-08 21:40:25 +02003953 if (size < 2 || size > 6) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003954 PyErr_SetString(st->PicklingError, "tuple returned by "
Pierre Glaser65d98d02019-05-08 21:40:25 +02003955 "__reduce__ must contain 2 through 6 elements");
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00003956 return -1;
3957 }
3958
Pierre Glaser65d98d02019-05-08 21:40:25 +02003959 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 6,
3960 &callable, &argtup, &state, &listitems, &dictitems,
3961 &state_setter))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003962 return -1;
3963
3964 if (!PyCallable_Check(callable)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003965 PyErr_SetString(st->PicklingError, "first item of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003966 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003967 return -1;
3968 }
3969 if (!PyTuple_Check(argtup)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003970 PyErr_SetString(st->PicklingError, "second item of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003971 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003972 return -1;
3973 }
3974
3975 if (state == Py_None)
3976 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003977
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003978 if (listitems == Py_None)
3979 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003980 else if (!PyIter_Check(listitems)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003981 PyErr_Format(st->PicklingError, "fourth element of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003982 "returned by __reduce__ must be an iterator, not %s",
3983 Py_TYPE(listitems)->tp_name);
3984 return -1;
3985 }
3986
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003987 if (dictitems == Py_None)
3988 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003989 else if (!PyIter_Check(dictitems)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003990 PyErr_Format(st->PicklingError, "fifth element of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003991 "returned by __reduce__ must be an iterator, not %s",
3992 Py_TYPE(dictitems)->tp_name);
3993 return -1;
3994 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003995
Pierre Glaser65d98d02019-05-08 21:40:25 +02003996 if (state_setter == Py_None)
3997 state_setter = NULL;
3998 else if (!PyCallable_Check(state_setter)) {
3999 PyErr_Format(st->PicklingError, "sixth element of the tuple "
4000 "returned by __reduce__ must be a function, not %s",
4001 Py_TYPE(state_setter)->tp_name);
4002 return -1;
4003 }
4004
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004005 if (self->proto >= 2) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01004006 PyObject *name;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004007 _Py_IDENTIFIER(__name__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004008
Serhiy Storchakaf320be72018-01-25 10:49:40 +02004009 if (_PyObject_LookupAttrId(callable, &PyId___name__, &name) < 0) {
4010 return -1;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004011 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +02004012 if (name != NULL && PyUnicode_Check(name)) {
Serhiy Storchaka0d554d72015-10-10 22:42:18 +03004013 _Py_IDENTIFIER(__newobj_ex__);
Serhiy Storchakaf0f35a62017-01-09 10:09:43 +02004014 use_newobj_ex = _PyUnicode_EqualToASCIIId(
4015 name, &PyId___newobj_ex__);
Serhiy Storchaka707b5cc2014-12-16 19:43:46 +02004016 if (!use_newobj_ex) {
4017 _Py_IDENTIFIER(__newobj__);
Serhiy Storchaka9937d902017-01-09 10:04:34 +02004018 use_newobj = _PyUnicode_EqualToASCIIId(name, &PyId___newobj__);
Serhiy Storchaka707b5cc2014-12-16 19:43:46 +02004019 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004020 }
Serhiy Storchaka707b5cc2014-12-16 19:43:46 +02004021 Py_XDECREF(name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004022 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004023
4024 if (use_newobj_ex) {
4025 PyObject *cls;
4026 PyObject *args;
4027 PyObject *kwargs;
4028
Serhiy Storchakafff9a312017-03-21 08:53:25 +02004029 if (PyTuple_GET_SIZE(argtup) != 3) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004030 PyErr_Format(st->PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004031 "length of the NEWOBJ_EX argument tuple must be "
Serhiy Storchakafff9a312017-03-21 08:53:25 +02004032 "exactly 3, not %zd", PyTuple_GET_SIZE(argtup));
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004033 return -1;
4034 }
4035
4036 cls = PyTuple_GET_ITEM(argtup, 0);
4037 if (!PyType_Check(cls)) {
Larry Hastings61272b72014-01-07 12:41:53 -08004038 PyErr_Format(st->PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004039 "first item from NEWOBJ_EX argument tuple must "
4040 "be a class, not %.200s", Py_TYPE(cls)->tp_name);
4041 return -1;
4042 }
4043 args = PyTuple_GET_ITEM(argtup, 1);
4044 if (!PyTuple_Check(args)) {
Larry Hastings61272b72014-01-07 12:41:53 -08004045 PyErr_Format(st->PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004046 "second item from NEWOBJ_EX argument tuple must "
4047 "be a tuple, not %.200s", Py_TYPE(args)->tp_name);
4048 return -1;
4049 }
4050 kwargs = PyTuple_GET_ITEM(argtup, 2);
4051 if (!PyDict_Check(kwargs)) {
Larry Hastings61272b72014-01-07 12:41:53 -08004052 PyErr_Format(st->PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004053 "third item from NEWOBJ_EX argument tuple must "
4054 "be a dict, not %.200s", Py_TYPE(kwargs)->tp_name);
4055 return -1;
4056 }
4057
Serhiy Storchaka0d554d72015-10-10 22:42:18 +03004058 if (self->proto >= 4) {
4059 if (save(self, cls, 0) < 0 ||
4060 save(self, args, 0) < 0 ||
4061 save(self, kwargs, 0) < 0 ||
4062 _Pickler_Write(self, &newobj_ex_op, 1) < 0) {
4063 return -1;
4064 }
4065 }
4066 else {
4067 PyObject *newargs;
4068 PyObject *cls_new;
4069 Py_ssize_t i;
4070 _Py_IDENTIFIER(__new__);
4071
Serhiy Storchakafff9a312017-03-21 08:53:25 +02004072 newargs = PyTuple_New(PyTuple_GET_SIZE(args) + 2);
Serhiy Storchaka0d554d72015-10-10 22:42:18 +03004073 if (newargs == NULL)
4074 return -1;
4075
4076 cls_new = _PyObject_GetAttrId(cls, &PyId___new__);
4077 if (cls_new == NULL) {
4078 Py_DECREF(newargs);
4079 return -1;
4080 }
4081 PyTuple_SET_ITEM(newargs, 0, cls_new);
4082 Py_INCREF(cls);
4083 PyTuple_SET_ITEM(newargs, 1, cls);
Serhiy Storchakafff9a312017-03-21 08:53:25 +02004084 for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
Serhiy Storchaka0d554d72015-10-10 22:42:18 +03004085 PyObject *item = PyTuple_GET_ITEM(args, i);
4086 Py_INCREF(item);
4087 PyTuple_SET_ITEM(newargs, i + 2, item);
4088 }
4089
4090 callable = PyObject_Call(st->partial, newargs, kwargs);
4091 Py_DECREF(newargs);
4092 if (callable == NULL)
4093 return -1;
4094
4095 newargs = PyTuple_New(0);
4096 if (newargs == NULL) {
4097 Py_DECREF(callable);
4098 return -1;
4099 }
4100
4101 if (save(self, callable, 0) < 0 ||
4102 save(self, newargs, 0) < 0 ||
4103 _Pickler_Write(self, &reduce_op, 1) < 0) {
4104 Py_DECREF(newargs);
4105 Py_DECREF(callable);
4106 return -1;
4107 }
4108 Py_DECREF(newargs);
4109 Py_DECREF(callable);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004110 }
4111 }
4112 else if (use_newobj) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004113 PyObject *cls;
4114 PyObject *newargtup;
4115 PyObject *obj_class;
4116 int p;
4117
4118 /* Sanity checks. */
Serhiy Storchakafff9a312017-03-21 08:53:25 +02004119 if (PyTuple_GET_SIZE(argtup) < 1) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004120 PyErr_SetString(st->PicklingError, "__newobj__ arglist is empty");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004121 return -1;
4122 }
4123
4124 cls = PyTuple_GET_ITEM(argtup, 0);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01004125 if (!PyType_Check(cls)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004126 PyErr_SetString(st->PicklingError, "args[0] from "
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01004127 "__newobj__ args is not a type");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004128 return -1;
4129 }
4130
4131 if (obj != NULL) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01004132 obj_class = get_class(obj);
Zackery Spytz25d38972018-12-05 11:29:20 -07004133 if (obj_class == NULL) {
4134 return -1;
4135 }
4136 p = obj_class != cls;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004137 Py_DECREF(obj_class);
4138 if (p) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004139 PyErr_SetString(st->PicklingError, "args[0] from "
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004140 "__newobj__ args has the wrong class");
4141 return -1;
4142 }
4143 }
4144 /* XXX: These calls save() are prone to infinite recursion. Imagine
4145 what happen if the value returned by the __reduce__() method of
4146 some extension type contains another object of the same type. Ouch!
4147
4148 Here is a quick example, that I ran into, to illustrate what I
4149 mean:
4150
4151 >>> import pickle, copyreg
4152 >>> copyreg.dispatch_table.pop(complex)
4153 >>> pickle.dumps(1+2j)
4154 Traceback (most recent call last):
4155 ...
Yury Selivanovf488fb42015-07-03 01:04:23 -04004156 RecursionError: maximum recursion depth exceeded
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004157
4158 Removing the complex class from copyreg.dispatch_table made the
4159 __reduce_ex__() method emit another complex object:
4160
4161 >>> (1+1j).__reduce_ex__(2)
4162 (<function __newobj__ at 0xb7b71c3c>,
4163 (<class 'complex'>, (1+1j)), None, None, None)
4164
4165 Thus when save() was called on newargstup (the 2nd item) recursion
4166 ensued. Of course, the bug was in the complex class which had a
4167 broken __getnewargs__() that emitted another complex object. But,
4168 the point, here, is it is quite easy to end up with a broken reduce
4169 function. */
4170
4171 /* Save the class and its __new__ arguments. */
4172 if (save(self, cls, 0) < 0)
4173 return -1;
4174
Serhiy Storchakafff9a312017-03-21 08:53:25 +02004175 newargtup = PyTuple_GetSlice(argtup, 1, PyTuple_GET_SIZE(argtup));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004176 if (newargtup == NULL)
4177 return -1;
4178
4179 p = save(self, newargtup, 0);
4180 Py_DECREF(newargtup);
4181 if (p < 0)
4182 return -1;
4183
4184 /* Add NEWOBJ opcode. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004185 if (_Pickler_Write(self, &newobj_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004186 return -1;
4187 }
4188 else { /* Not using NEWOBJ. */
4189 if (save(self, callable, 0) < 0 ||
4190 save(self, argtup, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004191 _Pickler_Write(self, &reduce_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004192 return -1;
4193 }
4194
4195 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
4196 the caller do not want to memoize the object. Not particularly useful,
4197 but that is to mimic the behavior save_reduce() in pickle.py when
4198 obj is None. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004199 if (obj != NULL) {
4200 /* If the object is already in the memo, this means it is
4201 recursive. In this case, throw away everything we put on the
4202 stack, and fetch the object back from the memo. */
4203 if (PyMemoTable_Get(self->memo, obj)) {
4204 const char pop_op = POP;
4205
4206 if (_Pickler_Write(self, &pop_op, 1) < 0)
4207 return -1;
4208 if (memo_get(self, obj) < 0)
4209 return -1;
4210
4211 return 0;
4212 }
4213 else if (memo_put(self, obj) < 0)
4214 return -1;
4215 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004216
4217 if (listitems && batch_list(self, listitems) < 0)
4218 return -1;
4219
4220 if (dictitems && batch_dict(self, dictitems) < 0)
4221 return -1;
4222
4223 if (state) {
Pierre Glaser65d98d02019-05-08 21:40:25 +02004224 if (state_setter == NULL) {
4225 if (save(self, state, 0) < 0 ||
4226 _Pickler_Write(self, &build_op, 1) < 0)
4227 return -1;
4228 }
4229 else {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004230
Pierre Glaser65d98d02019-05-08 21:40:25 +02004231 /* If a state_setter is specified, call it instead of load_build to
4232 * update obj's with its previous state.
4233 * The first 4 save/write instructions push state_setter and its
4234 * tuple of expected arguments (obj, state) onto the stack. The
4235 * REDUCE opcode triggers the state_setter(obj, state) function
4236 * call. Finally, because state-updating routines only do in-place
4237 * modification, the whole operation has to be stack-transparent.
4238 * Thus, we finally pop the call's output from the stack.*/
4239
4240 const char tupletwo_op = TUPLE2;
4241 const char pop_op = POP;
4242 if (save(self, state_setter, 0) < 0 ||
4243 save(self, obj, 0) < 0 || save(self, state, 0) < 0 ||
4244 _Pickler_Write(self, &tupletwo_op, 1) < 0 ||
4245 _Pickler_Write(self, &reduce_op, 1) < 0 ||
4246 _Pickler_Write(self, &pop_op, 1) < 0)
4247 return -1;
4248 }
4249 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004250 return 0;
4251}
4252
4253static int
4254save(PicklerObject *self, PyObject *obj, int pers_save)
4255{
4256 PyTypeObject *type;
4257 PyObject *reduce_func = NULL;
4258 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004259 int status = 0;
4260
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08004261 if (_Pickler_OpcodeBoundary(self) < 0)
4262 return -1;
4263
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004264 /* The extra pers_save argument is necessary to avoid calling save_pers()
4265 on its returned object. */
4266 if (!pers_save && self->pers_func) {
4267 /* save_pers() returns:
4268 -1 to signal an error;
4269 0 if it did nothing successfully;
4270 1 if a persistent id was saved.
4271 */
Serhiy Storchaka986375e2017-11-30 22:48:31 +02004272 if ((status = save_pers(self, obj)) != 0)
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004273 return status;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004274 }
4275
4276 type = Py_TYPE(obj);
4277
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004278 /* The old cPickle had an optimization that used switch-case statement
4279 dispatching on the first letter of the type name. This has was removed
4280 since benchmarks shown that this optimization was actually slowing
4281 things down. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004282
4283 /* Atom types; these aren't memoized, so don't check the memo. */
4284
4285 if (obj == Py_None) {
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004286 return save_none(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004287 }
4288 else if (obj == Py_False || obj == Py_True) {
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004289 return save_bool(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004290 }
4291 else if (type == &PyLong_Type) {
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004292 return save_long(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004293 }
4294 else if (type == &PyFloat_Type) {
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004295 return save_float(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004296 }
4297
4298 /* Check the memo to see if it has the object. If so, generate
4299 a GET (or BINGET) opcode, instead of pickling the object
4300 once again. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004301 if (PyMemoTable_Get(self->memo, obj)) {
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004302 return memo_get(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004303 }
4304
4305 if (type == &PyBytes_Type) {
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004306 return save_bytes(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004307 }
4308 else if (type == &PyUnicode_Type) {
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004309 return save_unicode(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004310 }
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004311
4312 /* We're only calling Py_EnterRecursiveCall here so that atomic
4313 types above are pickled faster. */
4314 if (Py_EnterRecursiveCall(" while pickling an object")) {
4315 return -1;
4316 }
4317
4318 if (type == &PyDict_Type) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004319 status = save_dict(self, obj);
4320 goto done;
4321 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004322 else if (type == &PySet_Type) {
4323 status = save_set(self, obj);
4324 goto done;
4325 }
4326 else if (type == &PyFrozenSet_Type) {
4327 status = save_frozenset(self, obj);
4328 goto done;
4329 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004330 else if (type == &PyList_Type) {
4331 status = save_list(self, obj);
4332 goto done;
4333 }
4334 else if (type == &PyTuple_Type) {
4335 status = save_tuple(self, obj);
4336 goto done;
4337 }
Antoine Pitrou91f43802019-05-26 17:10:09 +02004338 else if (type == &PyByteArray_Type) {
4339 status = save_bytearray(self, obj);
4340 goto done;
4341 }
4342 else if (type == &PyPickleBuffer_Type) {
4343 status = save_picklebuffer(self, obj);
4344 goto done;
4345 }
Pierre Glaser289f1f82019-05-08 23:08:25 +02004346
4347 /* Now, check reducer_override. If it returns NotImplemented,
4348 * fallback to save_type or save_global, and then perhaps to the
4349 * regular reduction mechanism.
4350 */
4351 if (self->reducer_override != NULL) {
Petr Viktorinffd97532020-02-11 17:46:57 +01004352 reduce_value = PyObject_CallOneArg(self->reducer_override, obj);
Pierre Glaser289f1f82019-05-08 23:08:25 +02004353 if (reduce_value == NULL) {
4354 goto error;
4355 }
4356 if (reduce_value != Py_NotImplemented) {
4357 goto reduce;
4358 }
4359 Py_DECREF(reduce_value);
4360 reduce_value = NULL;
4361 }
4362
4363 if (type == &PyType_Type) {
Alexandre Vassalotti19b6fa62013-11-30 16:06:39 -08004364 status = save_type(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004365 goto done;
4366 }
4367 else if (type == &PyFunction_Type) {
4368 status = save_global(self, obj, NULL);
Alexandre Vassalottifc912852013-11-24 03:07:35 -08004369 goto done;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004370 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004371
4372 /* XXX: This part needs some unit tests. */
4373
4374 /* Get a reduction callable, and call it. This may come from
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004375 * self.dispatch_table, copyreg.dispatch_table, the object's
4376 * __reduce_ex__ method, or the object's __reduce__ method.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004377 */
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004378 if (self->dispatch_table == NULL) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004379 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08004380 reduce_func = PyDict_GetItemWithError(st->dispatch_table,
4381 (PyObject *)type);
4382 if (reduce_func == NULL) {
4383 if (PyErr_Occurred()) {
4384 goto error;
4385 }
4386 } else {
4387 /* PyDict_GetItemWithError() returns a borrowed reference.
4388 Increase the reference count to be consistent with
4389 PyObject_GetItem and _PyObject_GetAttrId used below. */
4390 Py_INCREF(reduce_func);
4391 }
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004392 } else {
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08004393 reduce_func = PyObject_GetItem(self->dispatch_table,
4394 (PyObject *)type);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004395 if (reduce_func == NULL) {
4396 if (PyErr_ExceptionMatches(PyExc_KeyError))
4397 PyErr_Clear();
4398 else
4399 goto error;
4400 }
4401 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004402 if (reduce_func != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004403 Py_INCREF(obj);
Alexandre Vassalotti20c28c12013-11-27 02:26:54 -08004404 reduce_value = _Pickle_FastCall(reduce_func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004405 }
Antoine Pitrouffd41d92011-10-04 09:23:04 +02004406 else if (PyType_IsSubtype(type, &PyType_Type)) {
4407 status = save_global(self, obj, NULL);
4408 goto done;
4409 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004410 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004411 _Py_IDENTIFIER(__reduce__);
4412 _Py_IDENTIFIER(__reduce_ex__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004413
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004414 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
4415 automatically defined as __reduce__. While this is convenient, this
4416 make it impossible to know which method was actually called. Of
4417 course, this is not a big deal. But still, it would be nice to let
4418 the user know which method was called when something go
4419 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
4420 don't actually have to check for a __reduce__ method. */
4421
4422 /* Check for a __reduce_ex__ method. */
Serhiy Storchakaf320be72018-01-25 10:49:40 +02004423 if (_PyObject_LookupAttrId(obj, &PyId___reduce_ex__, &reduce_func) < 0) {
4424 goto error;
4425 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004426 if (reduce_func != NULL) {
4427 PyObject *proto;
4428 proto = PyLong_FromLong(self->proto);
4429 if (proto != NULL) {
Alexandre Vassalotti20c28c12013-11-27 02:26:54 -08004430 reduce_value = _Pickle_FastCall(reduce_func, proto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004431 }
4432 }
4433 else {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004434 /* Check for a __reduce__ method. */
Serhiy Storchaka41c57b32019-09-01 12:03:39 +03004435 if (_PyObject_LookupAttrId(obj, &PyId___reduce__, &reduce_func) < 0) {
4436 goto error;
4437 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004438 if (reduce_func != NULL) {
Victor Stinner2ff58a22019-06-17 14:27:23 +02004439 reduce_value = PyObject_CallNoArgs(reduce_func);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004440 }
4441 else {
Serhiy Storchaka41c57b32019-09-01 12:03:39 +03004442 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004443 PyErr_Format(st->PicklingError,
4444 "can't pickle '%.200s' object: %R",
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004445 type->tp_name, obj);
4446 goto error;
4447 }
4448 }
4449 }
4450
4451 if (reduce_value == NULL)
4452 goto error;
4453
Pierre Glaser289f1f82019-05-08 23:08:25 +02004454 reduce:
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004455 if (PyUnicode_Check(reduce_value)) {
4456 status = save_global(self, obj, reduce_value);
4457 goto done;
4458 }
4459
4460 if (!PyTuple_Check(reduce_value)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004461 PickleState *st = _Pickle_GetGlobalState();
4462 PyErr_SetString(st->PicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004463 "__reduce__ must return a string or tuple");
4464 goto error;
4465 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004466
4467 status = save_reduce(self, reduce_value, obj);
4468
4469 if (0) {
4470 error:
4471 status = -1;
4472 }
4473 done:
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08004474
Alexandre Vassalottidff18342008-07-13 18:48:30 +00004475 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004476 Py_XDECREF(reduce_func);
4477 Py_XDECREF(reduce_value);
4478
4479 return status;
4480}
4481
4482static int
4483dump(PicklerObject *self, PyObject *obj)
4484{
4485 const char stop_op = STOP;
Pierre Glaser0f2f35e2020-02-02 19:55:21 +01004486 int status = -1;
Pierre Glaser289f1f82019-05-08 23:08:25 +02004487 PyObject *tmp;
4488 _Py_IDENTIFIER(reducer_override);
4489
4490 if (_PyObject_LookupAttrId((PyObject *)self, &PyId_reducer_override,
4491 &tmp) < 0) {
Pierre Glaser0f2f35e2020-02-02 19:55:21 +01004492 goto error;
Pierre Glaser289f1f82019-05-08 23:08:25 +02004493 }
4494 /* Cache the reducer_override method, if it exists. */
4495 if (tmp != NULL) {
4496 Py_XSETREF(self->reducer_override, tmp);
4497 }
4498 else {
4499 Py_CLEAR(self->reducer_override);
4500 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004501
4502 if (self->proto >= 2) {
4503 char header[2];
4504
4505 header[0] = PROTO;
4506 assert(self->proto >= 0 && self->proto < 256);
4507 header[1] = (unsigned char)self->proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004508 if (_Pickler_Write(self, header, 2) < 0)
Pierre Glaser0f2f35e2020-02-02 19:55:21 +01004509 goto error;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004510 if (self->proto >= 4)
4511 self->framing = 1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004512 }
4513
4514 if (save(self, obj, 0) < 0 ||
Serhiy Storchakac8695292018-04-04 00:11:27 +03004515 _Pickler_Write(self, &stop_op, 1) < 0 ||
4516 _Pickler_CommitFrame(self) < 0)
Pierre Glaser0f2f35e2020-02-02 19:55:21 +01004517 goto error;
4518
4519 // Success
4520 status = 0;
4521
4522 error:
Serhiy Storchakac8695292018-04-04 00:11:27 +03004523 self->framing = 0;
Pierre Glaser0f2f35e2020-02-02 19:55:21 +01004524
4525 /* Break the reference cycle we generated at the beginning this function
4526 * call when setting the reducer_override attribute of the Pickler instance
4527 * to a bound method of the same instance. This is important as the Pickler
4528 * instance holds a reference to each object it has pickled (through its
4529 * memo): thus, these objects wont be garbage-collected as long as the
4530 * Pickler itself is not collected. */
4531 Py_CLEAR(self->reducer_override);
4532 return status;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004533}
4534
Larry Hastings61272b72014-01-07 12:41:53 -08004535/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004536
4537_pickle.Pickler.clear_memo
4538
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004539Clears the pickler's "memo".
4540
4541The memo is the data structure that remembers which objects the
4542pickler has already seen, so that shared or recursive objects are
4543pickled by reference and not by value. This method is useful when
4544re-using picklers.
Larry Hastings61272b72014-01-07 12:41:53 -08004545[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004546
Larry Hastings3cceb382014-01-04 11:09:09 -08004547static PyObject *
4548_pickle_Pickler_clear_memo_impl(PicklerObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08004549/*[clinic end generated code: output=8665c8658aaa094b input=01bdad52f3d93e56]*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004550{
4551 if (self->memo)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004552 PyMemoTable_Clear(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004553
4554 Py_RETURN_NONE;
4555}
4556
Larry Hastings61272b72014-01-07 12:41:53 -08004557/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004558
4559_pickle.Pickler.dump
4560
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004561 obj: object
4562 /
4563
4564Write a pickled representation of the given object to the open file.
Larry Hastings61272b72014-01-07 12:41:53 -08004565[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004566
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004567static PyObject *
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004568_pickle_Pickler_dump(PicklerObject *self, PyObject *obj)
Larry Hastings581ee362014-01-28 05:00:08 -08004569/*[clinic end generated code: output=87ecad1261e02ac7 input=552eb1c0f52260d9]*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004570{
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00004571 /* Check whether the Pickler was initialized correctly (issue3664).
4572 Developers often forget to call __init__() in their subclasses, which
4573 would trigger a segfault without this check. */
4574 if (self->write == NULL) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004575 PickleState *st = _Pickle_GetGlobalState();
4576 PyErr_Format(st->PicklingError,
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00004577 "Pickler.__init__() was not called by %s.__init__()",
4578 Py_TYPE(self)->tp_name);
4579 return NULL;
4580 }
4581
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004582 if (_Pickler_ClearBuffer(self) < 0)
4583 return NULL;
4584
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004585 if (dump(self, obj) < 0)
4586 return NULL;
4587
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004588 if (_Pickler_FlushToFile(self) < 0)
4589 return NULL;
4590
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004591 Py_RETURN_NONE;
4592}
4593
Serhiy Storchaka5bbd2312014-12-16 19:39:08 +02004594/*[clinic input]
4595
4596_pickle.Pickler.__sizeof__ -> Py_ssize_t
4597
4598Returns size in memory, in bytes.
4599[clinic start generated code]*/
4600
4601static Py_ssize_t
4602_pickle_Pickler___sizeof___impl(PicklerObject *self)
4603/*[clinic end generated code: output=106edb3123f332e1 input=8cbbec9bd5540d42]*/
4604{
4605 Py_ssize_t res, s;
4606
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +02004607 res = _PyObject_SIZE(Py_TYPE(self));
Serhiy Storchaka5bbd2312014-12-16 19:39:08 +02004608 if (self->memo != NULL) {
4609 res += sizeof(PyMemoTable);
4610 res += self->memo->mt_allocated * sizeof(PyMemoEntry);
4611 }
4612 if (self->output_buffer != NULL) {
4613 s = _PySys_GetSizeOf(self->output_buffer);
4614 if (s == -1)
4615 return -1;
4616 res += s;
4617 }
4618 return res;
4619}
4620
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004621static struct PyMethodDef Pickler_methods[] = {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004622 _PICKLE_PICKLER_DUMP_METHODDEF
4623 _PICKLE_PICKLER_CLEAR_MEMO_METHODDEF
Serhiy Storchaka5bbd2312014-12-16 19:39:08 +02004624 _PICKLE_PICKLER___SIZEOF___METHODDEF
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004625 {NULL, NULL} /* sentinel */
4626};
4627
4628static void
4629Pickler_dealloc(PicklerObject *self)
4630{
4631 PyObject_GC_UnTrack(self);
4632
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004633 Py_XDECREF(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004634 Py_XDECREF(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004635 Py_XDECREF(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004636 Py_XDECREF(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004637 Py_XDECREF(self->fast_memo);
Pierre Glaser289f1f82019-05-08 23:08:25 +02004638 Py_XDECREF(self->reducer_override);
Antoine Pitrou91f43802019-05-26 17:10:09 +02004639 Py_XDECREF(self->buffer_callback);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004640
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004641 PyMemoTable_Del(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004642
4643 Py_TYPE(self)->tp_free((PyObject *)self);
4644}
4645
4646static int
4647Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
4648{
4649 Py_VISIT(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004650 Py_VISIT(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004651 Py_VISIT(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004652 Py_VISIT(self->fast_memo);
Pierre Glaser289f1f82019-05-08 23:08:25 +02004653 Py_VISIT(self->reducer_override);
Antoine Pitrou91f43802019-05-26 17:10:09 +02004654 Py_VISIT(self->buffer_callback);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004655 return 0;
4656}
4657
4658static int
4659Pickler_clear(PicklerObject *self)
4660{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004661 Py_CLEAR(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004662 Py_CLEAR(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004663 Py_CLEAR(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004664 Py_CLEAR(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004665 Py_CLEAR(self->fast_memo);
Pierre Glaser289f1f82019-05-08 23:08:25 +02004666 Py_CLEAR(self->reducer_override);
Antoine Pitrou91f43802019-05-26 17:10:09 +02004667 Py_CLEAR(self->buffer_callback);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004668
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004669 if (self->memo != NULL) {
4670 PyMemoTable *memo = self->memo;
4671 self->memo = NULL;
4672 PyMemoTable_Del(memo);
4673 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004674 return 0;
4675}
4676
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004677
Larry Hastings61272b72014-01-07 12:41:53 -08004678/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004679
4680_pickle.Pickler.__init__
4681
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004682 file: object
Serhiy Storchaka279f4462019-09-14 12:24:05 +03004683 protocol: object = None
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004684 fix_imports: bool = True
Serhiy Storchaka279f4462019-09-14 12:24:05 +03004685 buffer_callback: object = None
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004686
4687This takes a binary file for writing a pickle data stream.
4688
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08004689The optional *protocol* argument tells the pickler to use the given
Mark Dickinsone9652e82020-01-24 10:03:22 +00004690protocol; supported protocols are 0, 1, 2, 3, 4 and 5. The default
4691protocol is 4. It was introduced in Python 3.4, and is incompatible
4692with previous versions.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004693
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08004694Specifying a negative protocol version selects the highest protocol
4695version supported. The higher the protocol used, the more recent the
4696version of Python needed to read the pickle produced.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004697
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08004698The *file* argument must have a write() method that accepts a single
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004699bytes argument. It can thus be a file object opened for binary
Martin Panter7462b6492015-11-02 03:37:02 +00004700writing, an io.BytesIO instance, or any other custom object that meets
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08004701this interface.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004702
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08004703If *fix_imports* is True and protocol is less than 3, pickle will try
4704to map the new Python 3 names to the old module names used in Python
47052, so that the pickle data stream is readable with Python 2.
Antoine Pitrou91f43802019-05-26 17:10:09 +02004706
4707If *buffer_callback* is None (the default), buffer views are
4708serialized into *file* as part of the pickle stream.
4709
4710If *buffer_callback* is not None, then it can be called any number
4711of times with a buffer view. If the callback returns a false value
4712(such as None), the given buffer is out-of-band; otherwise the
4713buffer is serialized in-band, i.e. inside the pickle stream.
4714
4715It is an error if *buffer_callback* is not None and *protocol*
4716is None or smaller than 5.
4717
Larry Hastings61272b72014-01-07 12:41:53 -08004718[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004719
Larry Hastingsb7ccb202014-01-18 23:50:21 -08004720static int
Larry Hastings89964c42015-04-14 18:07:59 -04004721_pickle_Pickler___init___impl(PicklerObject *self, PyObject *file,
Antoine Pitrou91f43802019-05-26 17:10:09 +02004722 PyObject *protocol, int fix_imports,
4723 PyObject *buffer_callback)
Mark Dickinsone9652e82020-01-24 10:03:22 +00004724/*[clinic end generated code: output=0abedc50590d259b input=a7c969699bf5dad3]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004725{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02004726 _Py_IDENTIFIER(persistent_id);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004727 _Py_IDENTIFIER(dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004728
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004729 /* In case of multiple __init__() calls, clear previous content. */
4730 if (self->write != NULL)
4731 (void)Pickler_clear(self);
4732
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004733 if (_Pickler_SetProtocol(self, protocol, fix_imports) < 0)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08004734 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004735
4736 if (_Pickler_SetOutputStream(self, file) < 0)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08004737 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004738
Antoine Pitrou91f43802019-05-26 17:10:09 +02004739 if (_Pickler_SetBufferCallback(self, buffer_callback) < 0)
4740 return -1;
4741
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004742 /* memo and output_buffer may have already been created in _Pickler_New */
4743 if (self->memo == NULL) {
4744 self->memo = PyMemoTable_New();
4745 if (self->memo == NULL)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08004746 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004747 }
4748 self->output_len = 0;
4749 if (self->output_buffer == NULL) {
4750 self->max_output_len = WRITE_BUF_SIZE;
4751 self->output_buffer = PyBytes_FromStringAndSize(NULL,
4752 self->max_output_len);
4753 if (self->output_buffer == NULL)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08004754 return -1;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004755 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004756
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004757 self->fast = 0;
4758 self->fast_nesting = 0;
4759 self->fast_memo = NULL;
Serhiy Storchaka04e36af2017-10-22 21:31:34 +03004760
Serhiy Storchaka986375e2017-11-30 22:48:31 +02004761 if (init_method_ref((PyObject *)self, &PyId_persistent_id,
4762 &self->pers_func, &self->pers_func_self) < 0)
4763 {
4764 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004765 }
Serhiy Storchaka04e36af2017-10-22 21:31:34 +03004766
Serhiy Storchakaf320be72018-01-25 10:49:40 +02004767 if (_PyObject_LookupAttrId((PyObject *)self,
4768 &PyId_dispatch_table, &self->dispatch_table) < 0) {
4769 return -1;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004770 }
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004771
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004772 return 0;
4773}
4774
Larry Hastingsb7ccb202014-01-18 23:50:21 -08004775
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004776/* Define a proxy object for the Pickler's internal memo object. This is to
4777 * avoid breaking code like:
4778 * pickler.memo.clear()
4779 * and
4780 * pickler.memo = saved_memo
4781 * Is this a good idea? Not really, but we don't want to break code that uses
4782 * it. Note that we don't implement the entire mapping API here. This is
4783 * intentional, as these should be treated as black-box implementation details.
4784 */
4785
Larry Hastings61272b72014-01-07 12:41:53 -08004786/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004787_pickle.PicklerMemoProxy.clear
4788
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004789Remove all items from memo.
Larry Hastings61272b72014-01-07 12:41:53 -08004790[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004791
Larry Hastings3cceb382014-01-04 11:09:09 -08004792static PyObject *
4793_pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08004794/*[clinic end generated code: output=5fb9370d48ae8b05 input=ccc186dacd0f1405]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004795{
4796 if (self->pickler->memo)
4797 PyMemoTable_Clear(self->pickler->memo);
4798 Py_RETURN_NONE;
4799}
4800
Larry Hastings61272b72014-01-07 12:41:53 -08004801/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004802_pickle.PicklerMemoProxy.copy
4803
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004804Copy the memo to a new object.
Larry Hastings61272b72014-01-07 12:41:53 -08004805[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004806
Larry Hastings3cceb382014-01-04 11:09:09 -08004807static PyObject *
4808_pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08004809/*[clinic end generated code: output=bb83a919d29225ef input=b73043485ac30b36]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004810{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004811 PyMemoTable *memo;
4812 PyObject *new_memo = PyDict_New();
4813 if (new_memo == NULL)
4814 return NULL;
4815
4816 memo = self->pickler->memo;
Benjamin Petersona4ae8282018-09-20 18:36:40 -07004817 for (size_t i = 0; i < memo->mt_allocated; ++i) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004818 PyMemoEntry entry = memo->mt_table[i];
4819 if (entry.me_key != NULL) {
4820 int status;
4821 PyObject *key, *value;
4822
4823 key = PyLong_FromVoidPtr(entry.me_key);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004824 value = Py_BuildValue("nO", entry.me_value, entry.me_key);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004825
4826 if (key == NULL || value == NULL) {
4827 Py_XDECREF(key);
4828 Py_XDECREF(value);
4829 goto error;
4830 }
4831 status = PyDict_SetItem(new_memo, key, value);
4832 Py_DECREF(key);
4833 Py_DECREF(value);
4834 if (status < 0)
4835 goto error;
4836 }
4837 }
4838 return new_memo;
4839
4840 error:
4841 Py_XDECREF(new_memo);
4842 return NULL;
4843}
4844
Larry Hastings61272b72014-01-07 12:41:53 -08004845/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004846_pickle.PicklerMemoProxy.__reduce__
4847
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004848Implement pickle support.
Larry Hastings61272b72014-01-07 12:41:53 -08004849[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004850
Larry Hastings3cceb382014-01-04 11:09:09 -08004851static PyObject *
4852_pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08004853/*[clinic end generated code: output=bebba1168863ab1d input=2f7c540e24b7aae4]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004854{
4855 PyObject *reduce_value, *dict_args;
Larry Hastings3cceb382014-01-04 11:09:09 -08004856 PyObject *contents = _pickle_PicklerMemoProxy_copy_impl(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004857 if (contents == NULL)
4858 return NULL;
4859
4860 reduce_value = PyTuple_New(2);
4861 if (reduce_value == NULL) {
4862 Py_DECREF(contents);
4863 return NULL;
4864 }
4865 dict_args = PyTuple_New(1);
4866 if (dict_args == NULL) {
4867 Py_DECREF(contents);
4868 Py_DECREF(reduce_value);
4869 return NULL;
4870 }
4871 PyTuple_SET_ITEM(dict_args, 0, contents);
4872 Py_INCREF((PyObject *)&PyDict_Type);
4873 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
4874 PyTuple_SET_ITEM(reduce_value, 1, dict_args);
4875 return reduce_value;
4876}
4877
4878static PyMethodDef picklerproxy_methods[] = {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004879 _PICKLE_PICKLERMEMOPROXY_CLEAR_METHODDEF
4880 _PICKLE_PICKLERMEMOPROXY_COPY_METHODDEF
4881 _PICKLE_PICKLERMEMOPROXY___REDUCE___METHODDEF
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004882 {NULL, NULL} /* sentinel */
4883};
4884
4885static void
4886PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
4887{
4888 PyObject_GC_UnTrack(self);
4889 Py_XDECREF(self->pickler);
4890 PyObject_GC_Del((PyObject *)self);
4891}
4892
4893static int
4894PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
4895 visitproc visit, void *arg)
4896{
4897 Py_VISIT(self->pickler);
4898 return 0;
4899}
4900
4901static int
4902PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
4903{
4904 Py_CLEAR(self->pickler);
4905 return 0;
4906}
4907
4908static PyTypeObject PicklerMemoProxyType = {
4909 PyVarObject_HEAD_INIT(NULL, 0)
4910 "_pickle.PicklerMemoProxy", /*tp_name*/
4911 sizeof(PicklerMemoProxyObject), /*tp_basicsize*/
4912 0,
4913 (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004914 0, /* tp_vectorcall_offset */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004915 0, /* tp_getattr */
4916 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004917 0, /* tp_as_async */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004918 0, /* tp_repr */
4919 0, /* tp_as_number */
4920 0, /* tp_as_sequence */
4921 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00004922 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004923 0, /* tp_call */
4924 0, /* tp_str */
4925 PyObject_GenericGetAttr, /* tp_getattro */
4926 PyObject_GenericSetAttr, /* tp_setattro */
4927 0, /* tp_as_buffer */
4928 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4929 0, /* tp_doc */
4930 (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */
4931 (inquiry)PicklerMemoProxy_clear, /* tp_clear */
4932 0, /* tp_richcompare */
4933 0, /* tp_weaklistoffset */
4934 0, /* tp_iter */
4935 0, /* tp_iternext */
4936 picklerproxy_methods, /* tp_methods */
4937};
4938
4939static PyObject *
4940PicklerMemoProxy_New(PicklerObject *pickler)
4941{
4942 PicklerMemoProxyObject *self;
4943
4944 self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
4945 if (self == NULL)
4946 return NULL;
4947 Py_INCREF(pickler);
4948 self->pickler = pickler;
4949 PyObject_GC_Track(self);
4950 return (PyObject *)self;
4951}
4952
4953/*****************************************************************************/
4954
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004955static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02004956Pickler_get_memo(PicklerObject *self, void *Py_UNUSED(ignored))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004957{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004958 return PicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004959}
4960
4961static int
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02004962Pickler_set_memo(PicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004963{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004964 PyMemoTable *new_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004965
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004966 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004967 PyErr_SetString(PyExc_TypeError,
4968 "attribute deletion is not supported");
4969 return -1;
4970 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004971
Andy Lesterdffe4c02020-03-04 07:15:20 -06004972 if (Py_IS_TYPE(obj, &PicklerMemoProxyType)) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004973 PicklerObject *pickler =
4974 ((PicklerMemoProxyObject *)obj)->pickler;
4975
4976 new_memo = PyMemoTable_Copy(pickler->memo);
4977 if (new_memo == NULL)
4978 return -1;
4979 }
4980 else if (PyDict_Check(obj)) {
4981 Py_ssize_t i = 0;
4982 PyObject *key, *value;
4983
4984 new_memo = PyMemoTable_New();
4985 if (new_memo == NULL)
4986 return -1;
4987
4988 while (PyDict_Next(obj, &i, &key, &value)) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004989 Py_ssize_t memo_id;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004990 PyObject *memo_obj;
4991
Serhiy Storchakafff9a312017-03-21 08:53:25 +02004992 if (!PyTuple_Check(value) || PyTuple_GET_SIZE(value) != 2) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004993 PyErr_SetString(PyExc_TypeError,
4994 "'memo' values must be 2-item tuples");
4995 goto error;
4996 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004997 memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004998 if (memo_id == -1 && PyErr_Occurred())
4999 goto error;
5000 memo_obj = PyTuple_GET_ITEM(value, 1);
5001 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
5002 goto error;
5003 }
5004 }
5005 else {
5006 PyErr_Format(PyExc_TypeError,
Serhiy Storchaka34fd4c22018-11-05 16:20:25 +02005007 "'memo' attribute must be a PicklerMemoProxy object "
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005008 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005009 return -1;
5010 }
5011
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005012 PyMemoTable_Del(self->memo);
5013 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005014
5015 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005016
5017 error:
5018 if (new_memo)
5019 PyMemoTable_Del(new_memo);
5020 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005021}
5022
5023static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02005024Pickler_get_persid(PicklerObject *self, void *Py_UNUSED(ignored))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005025{
Serhiy Storchaka986375e2017-11-30 22:48:31 +02005026 if (self->pers_func == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005027 PyErr_SetString(PyExc_AttributeError, "persistent_id");
Serhiy Storchaka986375e2017-11-30 22:48:31 +02005028 return NULL;
5029 }
5030 return reconstruct_method(self->pers_func, self->pers_func_self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005031}
5032
5033static int
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02005034Pickler_set_persid(PicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005035{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005036 if (value == NULL) {
5037 PyErr_SetString(PyExc_TypeError,
5038 "attribute deletion is not supported");
5039 return -1;
5040 }
5041 if (!PyCallable_Check(value)) {
5042 PyErr_SetString(PyExc_TypeError,
5043 "persistent_id must be a callable taking one argument");
5044 return -1;
5045 }
5046
Serhiy Storchaka986375e2017-11-30 22:48:31 +02005047 self->pers_func_self = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005048 Py_INCREF(value);
Serhiy Storchakaec397562016-04-06 09:50:03 +03005049 Py_XSETREF(self->pers_func, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005050
5051 return 0;
5052}
5053
5054static PyMemberDef Pickler_members[] = {
5055 {"bin", T_INT, offsetof(PicklerObject, bin)},
5056 {"fast", T_INT, offsetof(PicklerObject, fast)},
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01005057 {"dispatch_table", T_OBJECT_EX, offsetof(PicklerObject, dispatch_table)},
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005058 {NULL}
5059};
5060
5061static PyGetSetDef Pickler_getsets[] = {
5062 {"memo", (getter)Pickler_get_memo,
5063 (setter)Pickler_set_memo},
5064 {"persistent_id", (getter)Pickler_get_persid,
5065 (setter)Pickler_set_persid},
5066 {NULL}
5067};
5068
5069static PyTypeObject Pickler_Type = {
5070 PyVarObject_HEAD_INIT(NULL, 0)
5071 "_pickle.Pickler" , /*tp_name*/
5072 sizeof(PicklerObject), /*tp_basicsize*/
5073 0, /*tp_itemsize*/
5074 (destructor)Pickler_dealloc, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02005075 0, /*tp_vectorcall_offset*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005076 0, /*tp_getattr*/
5077 0, /*tp_setattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02005078 0, /*tp_as_async*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005079 0, /*tp_repr*/
5080 0, /*tp_as_number*/
5081 0, /*tp_as_sequence*/
5082 0, /*tp_as_mapping*/
5083 0, /*tp_hash*/
5084 0, /*tp_call*/
5085 0, /*tp_str*/
5086 0, /*tp_getattro*/
5087 0, /*tp_setattro*/
5088 0, /*tp_as_buffer*/
5089 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08005090 _pickle_Pickler___init____doc__, /*tp_doc*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005091 (traverseproc)Pickler_traverse, /*tp_traverse*/
5092 (inquiry)Pickler_clear, /*tp_clear*/
5093 0, /*tp_richcompare*/
5094 0, /*tp_weaklistoffset*/
5095 0, /*tp_iter*/
5096 0, /*tp_iternext*/
5097 Pickler_methods, /*tp_methods*/
5098 Pickler_members, /*tp_members*/
5099 Pickler_getsets, /*tp_getset*/
5100 0, /*tp_base*/
5101 0, /*tp_dict*/
5102 0, /*tp_descr_get*/
5103 0, /*tp_descr_set*/
5104 0, /*tp_dictoffset*/
Larry Hastingsb7ccb202014-01-18 23:50:21 -08005105 _pickle_Pickler___init__, /*tp_init*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005106 PyType_GenericAlloc, /*tp_alloc*/
5107 PyType_GenericNew, /*tp_new*/
5108 PyObject_GC_Del, /*tp_free*/
5109 0, /*tp_is_gc*/
5110};
5111
Victor Stinner121aab42011-09-29 23:40:53 +02005112/* Temporary helper for calling self.find_class().
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005113
5114 XXX: It would be nice to able to avoid Python function call overhead, by
5115 using directly the C version of find_class(), when find_class() is not
5116 overridden by a subclass. Although, this could become rather hackish. A
5117 simpler optimization would be to call the C function when self is not a
5118 subclass instance. */
5119static PyObject *
5120find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
5121{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005122 _Py_IDENTIFIER(find_class);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02005123
Victor Stinner55ba38a2016-12-09 16:09:30 +01005124 return _PyObject_CallMethodIdObjArgs((PyObject *)self, &PyId_find_class,
5125 module_name, global_name, NULL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005126}
5127
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005128static Py_ssize_t
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005129marker(UnpicklerObject *self)
5130{
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005131 Py_ssize_t mark;
5132
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005133 if (self->num_marks < 1) {
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005134 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005135 PyErr_SetString(st->UnpicklingError, "could not find MARK");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005136 return -1;
5137 }
5138
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005139 mark = self->marks[--self->num_marks];
5140 self->stack->mark_set = self->num_marks != 0;
5141 self->stack->fence = self->num_marks ?
5142 self->marks[self->num_marks - 1] : 0;
5143 return mark;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005144}
5145
5146static int
5147load_none(UnpicklerObject *self)
5148{
5149 PDATA_APPEND(self->stack, Py_None, -1);
5150 return 0;
5151}
5152
5153static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005154load_int(UnpicklerObject *self)
5155{
5156 PyObject *value;
5157 char *endptr, *s;
5158 Py_ssize_t len;
5159 long x;
5160
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005161 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005162 return -1;
5163 if (len < 2)
5164 return bad_readline();
5165
5166 errno = 0;
Victor Stinner121aab42011-09-29 23:40:53 +02005167 /* XXX: Should the base argument of strtol() be explicitly set to 10?
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005168 XXX(avassalotti): Should this uses PyOS_strtol()? */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005169 x = strtol(s, &endptr, 0);
5170
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005171 if (errno || (*endptr != '\n' && *endptr != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005172 /* Hm, maybe we've got something long. Let's try reading
Serhiy Storchaka95949422013-08-27 19:40:23 +03005173 * it as a Python int object. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005174 errno = 0;
5175 /* XXX: Same thing about the base here. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005176 value = PyLong_FromString(s, NULL, 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005177 if (value == NULL) {
5178 PyErr_SetString(PyExc_ValueError,
5179 "could not convert string to int");
5180 return -1;
5181 }
5182 }
5183 else {
5184 if (len == 3 && (x == 0 || x == 1)) {
5185 if ((value = PyBool_FromLong(x)) == NULL)
5186 return -1;
5187 }
5188 else {
5189 if ((value = PyLong_FromLong(x)) == NULL)
5190 return -1;
5191 }
5192 }
5193
5194 PDATA_PUSH(self->stack, value, -1);
5195 return 0;
5196}
5197
5198static int
5199load_bool(UnpicklerObject *self, PyObject *boolean)
5200{
5201 assert(boolean == Py_True || boolean == Py_False);
5202 PDATA_APPEND(self->stack, boolean, -1);
5203 return 0;
5204}
5205
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005206/* s contains x bytes of an unsigned little-endian integer. Return its value
5207 * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
5208 */
5209static Py_ssize_t
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005210calc_binsize(char *bytes, int nbytes)
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005211{
5212 unsigned char *s = (unsigned char *)bytes;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005213 int i;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005214 size_t x = 0;
5215
Serhiy Storchakae0606192015-09-29 22:10:07 +03005216 if (nbytes > (int)sizeof(size_t)) {
5217 /* Check for integer overflow. BINBYTES8 and BINUNICODE8 opcodes
5218 * have 64-bit size that can't be represented on 32-bit platform.
5219 */
5220 for (i = (int)sizeof(size_t); i < nbytes; i++) {
5221 if (s[i])
5222 return -1;
5223 }
5224 nbytes = (int)sizeof(size_t);
5225 }
5226 for (i = 0; i < nbytes; i++) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005227 x |= (size_t) s[i] << (8 * i);
5228 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005229
5230 if (x > PY_SSIZE_T_MAX)
5231 return -1;
5232 else
5233 return (Py_ssize_t) x;
5234}
5235
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005236/* s contains x bytes of a little-endian integer. Return its value as a
5237 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +03005238 * int, but when x is 4 it's a signed one. This is a historical source
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005239 * of x-platform bugs.
5240 */
5241static long
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005242calc_binint(char *bytes, int nbytes)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005243{
5244 unsigned char *s = (unsigned char *)bytes;
Victor Stinnerf13c46c2014-08-17 21:05:55 +02005245 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005246 long x = 0;
5247
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005248 for (i = 0; i < nbytes; i++) {
5249 x |= (long)s[i] << (8 * i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005250 }
5251
5252 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
5253 * is signed, so on a box with longs bigger than 4 bytes we need
5254 * to extend a BININT's sign bit to the full width.
5255 */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005256 if (SIZEOF_LONG > 4 && nbytes == 4) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005257 x |= -(x & (1L << 31));
5258 }
5259
5260 return x;
5261}
5262
5263static int
5264load_binintx(UnpicklerObject *self, char *s, int size)
5265{
5266 PyObject *value;
5267 long x;
5268
5269 x = calc_binint(s, size);
5270
5271 if ((value = PyLong_FromLong(x)) == NULL)
5272 return -1;
5273
5274 PDATA_PUSH(self->stack, value, -1);
5275 return 0;
5276}
5277
5278static int
5279load_binint(UnpicklerObject *self)
5280{
5281 char *s;
5282
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005283 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005284 return -1;
5285
5286 return load_binintx(self, s, 4);
5287}
5288
5289static int
5290load_binint1(UnpicklerObject *self)
5291{
5292 char *s;
5293
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005294 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005295 return -1;
5296
5297 return load_binintx(self, s, 1);
5298}
5299
5300static int
5301load_binint2(UnpicklerObject *self)
5302{
5303 char *s;
5304
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005305 if (_Unpickler_Read(self, &s, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005306 return -1;
5307
5308 return load_binintx(self, s, 2);
5309}
5310
5311static int
5312load_long(UnpicklerObject *self)
5313{
5314 PyObject *value;
Victor Stinnerb110dad2016-12-09 17:06:43 +01005315 char *s = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005316 Py_ssize_t len;
5317
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005318 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005319 return -1;
5320 if (len < 2)
5321 return bad_readline();
5322
Mark Dickinson8dd05142009-01-20 20:43:58 +00005323 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
5324 the 'L' before calling PyLong_FromString. In order to maintain
5325 compatibility with Python 3.0.0, we don't actually *require*
5326 the 'L' to be present. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005327 if (s[len-2] == 'L')
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00005328 s[len-2] = '\0';
Alexandre Vassalottie4bccb72009-01-24 01:47:57 +00005329 /* XXX: Should the base argument explicitly set to 10? */
5330 value = PyLong_FromString(s, NULL, 0);
Mark Dickinson8dd05142009-01-20 20:43:58 +00005331 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005332 return -1;
5333
5334 PDATA_PUSH(self->stack, value, -1);
5335 return 0;
5336}
5337
5338/* 'size' bytes contain the # of bytes of little-endian 256's-complement
5339 * data following.
5340 */
5341static int
5342load_counted_long(UnpicklerObject *self, int size)
5343{
5344 PyObject *value;
5345 char *nbytes;
5346 char *pdata;
5347
5348 assert(size == 1 || size == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005349 if (_Unpickler_Read(self, &nbytes, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005350 return -1;
5351
5352 size = calc_binint(nbytes, size);
5353 if (size < 0) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005354 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005355 /* Corrupt or hostile pickle -- we never write one like this */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005356 PyErr_SetString(st->UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005357 "LONG pickle has negative byte count");
5358 return -1;
5359 }
5360
5361 if (size == 0)
5362 value = PyLong_FromLong(0L);
5363 else {
5364 /* Read the raw little-endian bytes and convert. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005365 if (_Unpickler_Read(self, &pdata, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005366 return -1;
5367 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
5368 1 /* little endian */ , 1 /* signed */ );
5369 }
5370 if (value == NULL)
5371 return -1;
5372 PDATA_PUSH(self->stack, value, -1);
5373 return 0;
5374}
5375
5376static int
5377load_float(UnpicklerObject *self)
5378{
5379 PyObject *value;
5380 char *endptr, *s;
5381 Py_ssize_t len;
5382 double d;
5383
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005384 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005385 return -1;
5386 if (len < 2)
5387 return bad_readline();
5388
5389 errno = 0;
Mark Dickinson725bfd82009-05-03 20:33:40 +00005390 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
5391 if (d == -1.0 && PyErr_Occurred())
5392 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005393 if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005394 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
5395 return -1;
5396 }
Mark Dickinson725bfd82009-05-03 20:33:40 +00005397 value = PyFloat_FromDouble(d);
5398 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005399 return -1;
5400
5401 PDATA_PUSH(self->stack, value, -1);
5402 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005403}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005404
5405static int
5406load_binfloat(UnpicklerObject *self)
5407{
5408 PyObject *value;
5409 double x;
5410 char *s;
5411
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005412 if (_Unpickler_Read(self, &s, 8) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005413 return -1;
5414
5415 x = _PyFloat_Unpack8((unsigned char *)s, 0);
5416 if (x == -1.0 && PyErr_Occurred())
5417 return -1;
5418
5419 if ((value = PyFloat_FromDouble(x)) == NULL)
5420 return -1;
5421
5422 PDATA_PUSH(self->stack, value, -1);
5423 return 0;
5424}
5425
5426static int
5427load_string(UnpicklerObject *self)
5428{
5429 PyObject *bytes;
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08005430 PyObject *obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005431 Py_ssize_t len;
5432 char *s, *p;
5433
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005434 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005435 return -1;
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07005436 /* Strip the newline */
5437 len--;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005438 /* Strip outermost quotes */
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07005439 if (len >= 2 && s[0] == s[len - 1] && (s[0] == '\'' || s[0] == '"')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005440 p = s + 1;
5441 len -= 2;
5442 }
5443 else {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005444 PickleState *st = _Pickle_GetGlobalState();
5445 PyErr_SetString(st->UnpicklingError,
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07005446 "the STRING opcode argument must be quoted");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005447 return -1;
5448 }
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07005449 assert(len >= 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005450
5451 /* Use the PyBytes API to decode the string, since that is what is used
5452 to encode, and then coerce the result to Unicode. */
5453 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005454 if (bytes == NULL)
5455 return -1;
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08005456
5457 /* Leave the Python 2.x strings as bytes if the *encoding* given to the
5458 Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5459 if (strcmp(self->encoding, "bytes") == 0) {
5460 obj = bytes;
5461 }
5462 else {
5463 obj = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
5464 Py_DECREF(bytes);
5465 if (obj == NULL) {
5466 return -1;
5467 }
5468 }
5469
5470 PDATA_PUSH(self->stack, obj, -1);
5471 return 0;
5472}
5473
5474static int
5475load_counted_binstring(UnpicklerObject *self, int nbytes)
5476{
5477 PyObject *obj;
5478 Py_ssize_t size;
5479 char *s;
5480
5481 if (_Unpickler_Read(self, &s, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005482 return -1;
5483
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08005484 size = calc_binsize(s, nbytes);
5485 if (size < 0) {
5486 PickleState *st = _Pickle_GetGlobalState();
5487 PyErr_Format(st->UnpicklingError,
5488 "BINSTRING exceeds system's maximum size of %zd bytes",
5489 PY_SSIZE_T_MAX);
5490 return -1;
5491 }
5492
5493 if (_Unpickler_Read(self, &s, size) < 0)
5494 return -1;
5495
5496 /* Convert Python 2.x strings to bytes if the *encoding* given to the
5497 Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5498 if (strcmp(self->encoding, "bytes") == 0) {
5499 obj = PyBytes_FromStringAndSize(s, size);
5500 }
5501 else {
5502 obj = PyUnicode_Decode(s, size, self->encoding, self->errors);
5503 }
5504 if (obj == NULL) {
5505 return -1;
5506 }
5507
5508 PDATA_PUSH(self->stack, obj, -1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005509 return 0;
5510}
5511
5512static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005513load_counted_binbytes(UnpicklerObject *self, int nbytes)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005514{
5515 PyObject *bytes;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005516 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005517 char *s;
5518
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005519 if (_Unpickler_Read(self, &s, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005520 return -1;
5521
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005522 size = calc_binsize(s, nbytes);
5523 if (size < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005524 PyErr_Format(PyExc_OverflowError,
5525 "BINBYTES exceeds system's maximum size of %zd bytes",
Alexandre Vassalotticc757172013-04-14 02:25:10 -07005526 PY_SSIZE_T_MAX);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005527 return -1;
5528 }
5529
Antoine Pitrou91f43802019-05-26 17:10:09 +02005530 bytes = PyBytes_FromStringAndSize(NULL, size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005531 if (bytes == NULL)
5532 return -1;
Antoine Pitrou91f43802019-05-26 17:10:09 +02005533 if (_Unpickler_ReadInto(self, PyBytes_AS_STRING(bytes), size) < 0) {
5534 Py_DECREF(bytes);
5535 return -1;
5536 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005537
5538 PDATA_PUSH(self->stack, bytes, -1);
5539 return 0;
5540}
5541
5542static int
Antoine Pitrou91f43802019-05-26 17:10:09 +02005543load_counted_bytearray(UnpicklerObject *self)
5544{
5545 PyObject *bytearray;
5546 Py_ssize_t size;
5547 char *s;
5548
5549 if (_Unpickler_Read(self, &s, 8) < 0) {
5550 return -1;
5551 }
5552
5553 size = calc_binsize(s, 8);
5554 if (size < 0) {
5555 PyErr_Format(PyExc_OverflowError,
5556 "BYTEARRAY8 exceeds system's maximum size of %zd bytes",
5557 PY_SSIZE_T_MAX);
5558 return -1;
5559 }
5560
5561 bytearray = PyByteArray_FromStringAndSize(NULL, size);
5562 if (bytearray == NULL) {
5563 return -1;
5564 }
5565 if (_Unpickler_ReadInto(self, PyByteArray_AS_STRING(bytearray), size) < 0) {
5566 Py_DECREF(bytearray);
5567 return -1;
5568 }
5569
5570 PDATA_PUSH(self->stack, bytearray, -1);
5571 return 0;
5572}
5573
5574static int
5575load_next_buffer(UnpicklerObject *self)
5576{
5577 if (self->buffers == NULL) {
5578 PickleState *st = _Pickle_GetGlobalState();
5579 PyErr_SetString(st->UnpicklingError,
5580 "pickle stream refers to out-of-band data "
5581 "but no *buffers* argument was given");
5582 return -1;
5583 }
5584 PyObject *buf = PyIter_Next(self->buffers);
5585 if (buf == NULL) {
5586 if (!PyErr_Occurred()) {
5587 PickleState *st = _Pickle_GetGlobalState();
5588 PyErr_SetString(st->UnpicklingError,
5589 "not enough out-of-band buffers");
5590 }
5591 return -1;
5592 }
5593
5594 PDATA_PUSH(self->stack, buf, -1);
5595 return 0;
5596}
5597
5598static int
5599load_readonly_buffer(UnpicklerObject *self)
5600{
5601 Py_ssize_t len = Py_SIZE(self->stack);
5602 if (len <= self->stack->fence) {
5603 return Pdata_stack_underflow(self->stack);
5604 }
5605
5606 PyObject *obj = self->stack->data[len - 1];
5607 PyObject *view = PyMemoryView_FromObject(obj);
5608 if (view == NULL) {
5609 return -1;
5610 }
5611 if (!PyMemoryView_GET_BUFFER(view)->readonly) {
5612 /* Original object is writable */
5613 PyMemoryView_GET_BUFFER(view)->readonly = 1;
5614 self->stack->data[len - 1] = view;
5615 Py_DECREF(obj);
5616 }
5617 else {
5618 /* Original object is read-only, no need to replace it */
5619 Py_DECREF(view);
5620 }
5621 return 0;
5622}
5623
5624static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005625load_unicode(UnpicklerObject *self)
5626{
5627 PyObject *str;
5628 Py_ssize_t len;
Victor Stinnerb110dad2016-12-09 17:06:43 +01005629 char *s = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005630
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005631 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005632 return -1;
5633 if (len < 1)
5634 return bad_readline();
5635
5636 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
5637 if (str == NULL)
5638 return -1;
5639
5640 PDATA_PUSH(self->stack, str, -1);
5641 return 0;
5642}
5643
5644static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005645load_counted_binunicode(UnpicklerObject *self, int nbytes)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005646{
5647 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005648 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005649 char *s;
5650
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005651 if (_Unpickler_Read(self, &s, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005652 return -1;
5653
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005654 size = calc_binsize(s, nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005655 if (size < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005656 PyErr_Format(PyExc_OverflowError,
5657 "BINUNICODE exceeds system's maximum size of %zd bytes",
Alexandre Vassalotticc757172013-04-14 02:25:10 -07005658 PY_SSIZE_T_MAX);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005659 return -1;
5660 }
5661
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005662 if (_Unpickler_Read(self, &s, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005663 return -1;
5664
Victor Stinner485fb562010-04-13 11:07:24 +00005665 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005666 if (str == NULL)
5667 return -1;
5668
5669 PDATA_PUSH(self->stack, str, -1);
5670 return 0;
5671}
5672
5673static int
Victor Stinner21b47112016-03-14 18:09:39 +01005674load_counted_tuple(UnpicklerObject *self, Py_ssize_t len)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005675{
5676 PyObject *tuple;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005677
Serhiy Storchakaa49de6b2015-11-25 15:01:53 +02005678 if (Py_SIZE(self->stack) < len)
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005679 return Pdata_stack_underflow(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005680
Serhiy Storchakaa49de6b2015-11-25 15:01:53 +02005681 tuple = Pdata_poptuple(self->stack, Py_SIZE(self->stack) - len);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005682 if (tuple == NULL)
5683 return -1;
5684 PDATA_PUSH(self->stack, tuple, -1);
5685 return 0;
5686}
5687
5688static int
Serhiy Storchakaa49de6b2015-11-25 15:01:53 +02005689load_tuple(UnpicklerObject *self)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005690{
Serhiy Storchakaa49de6b2015-11-25 15:01:53 +02005691 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005692
Serhiy Storchakaa49de6b2015-11-25 15:01:53 +02005693 if ((i = marker(self)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005694 return -1;
5695
Serhiy Storchakaa49de6b2015-11-25 15:01:53 +02005696 return load_counted_tuple(self, Py_SIZE(self->stack) - i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005697}
5698
5699static int
5700load_empty_list(UnpicklerObject *self)
5701{
5702 PyObject *list;
5703
5704 if ((list = PyList_New(0)) == NULL)
5705 return -1;
5706 PDATA_PUSH(self->stack, list, -1);
5707 return 0;
5708}
5709
5710static int
5711load_empty_dict(UnpicklerObject *self)
5712{
5713 PyObject *dict;
5714
5715 if ((dict = PyDict_New()) == NULL)
5716 return -1;
5717 PDATA_PUSH(self->stack, dict, -1);
5718 return 0;
5719}
5720
5721static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005722load_empty_set(UnpicklerObject *self)
5723{
5724 PyObject *set;
5725
5726 if ((set = PySet_New(NULL)) == NULL)
5727 return -1;
5728 PDATA_PUSH(self->stack, set, -1);
5729 return 0;
5730}
5731
5732static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005733load_list(UnpicklerObject *self)
5734{
5735 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005736 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005737
5738 if ((i = marker(self)) < 0)
5739 return -1;
5740
5741 list = Pdata_poplist(self->stack, i);
5742 if (list == NULL)
5743 return -1;
5744 PDATA_PUSH(self->stack, list, -1);
5745 return 0;
5746}
5747
5748static int
5749load_dict(UnpicklerObject *self)
5750{
5751 PyObject *dict, *key, *value;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005752 Py_ssize_t i, j, k;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005753
5754 if ((i = marker(self)) < 0)
5755 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005756 j = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005757
5758 if ((dict = PyDict_New()) == NULL)
5759 return -1;
5760
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005761 if ((j - i) % 2 != 0) {
5762 PickleState *st = _Pickle_GetGlobalState();
5763 PyErr_SetString(st->UnpicklingError, "odd number of items for DICT");
Serhiy Storchaka3ac53802015-12-07 11:32:00 +02005764 Py_DECREF(dict);
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005765 return -1;
5766 }
5767
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005768 for (k = i + 1; k < j; k += 2) {
5769 key = self->stack->data[k - 1];
5770 value = self->stack->data[k];
5771 if (PyDict_SetItem(dict, key, value) < 0) {
5772 Py_DECREF(dict);
5773 return -1;
5774 }
5775 }
5776 Pdata_clear(self->stack, i);
5777 PDATA_PUSH(self->stack, dict, -1);
5778 return 0;
5779}
5780
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005781static int
5782load_frozenset(UnpicklerObject *self)
5783{
5784 PyObject *items;
5785 PyObject *frozenset;
5786 Py_ssize_t i;
5787
5788 if ((i = marker(self)) < 0)
5789 return -1;
5790
5791 items = Pdata_poptuple(self->stack, i);
5792 if (items == NULL)
5793 return -1;
5794
5795 frozenset = PyFrozenSet_New(items);
5796 Py_DECREF(items);
5797 if (frozenset == NULL)
5798 return -1;
5799
5800 PDATA_PUSH(self->stack, frozenset, -1);
5801 return 0;
5802}
5803
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005804static PyObject *
5805instantiate(PyObject *cls, PyObject *args)
5806{
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005807 /* Caller must assure args are a tuple. Normally, args come from
5808 Pdata_poptuple which packs objects from the top of the stack
5809 into a newly created tuple. */
5810 assert(PyTuple_Check(args));
Serhiy Storchaka04e36af2017-10-22 21:31:34 +03005811 if (!PyTuple_GET_SIZE(args) && PyType_Check(cls)) {
5812 _Py_IDENTIFIER(__getinitargs__);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005813 _Py_IDENTIFIER(__new__);
Serhiy Storchakaf320be72018-01-25 10:49:40 +02005814 PyObject *func;
5815 if (_PyObject_LookupAttrId(cls, &PyId___getinitargs__, &func) < 0) {
5816 return NULL;
5817 }
Serhiy Storchaka04e36af2017-10-22 21:31:34 +03005818 if (func == NULL) {
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02005819 return _PyObject_CallMethodIdOneArg(cls, &PyId___new__, cls);
Serhiy Storchaka04e36af2017-10-22 21:31:34 +03005820 }
5821 Py_DECREF(func);
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005822 }
Serhiy Storchaka04e36af2017-10-22 21:31:34 +03005823 return PyObject_CallObject(cls, args);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005824}
5825
5826static int
5827load_obj(UnpicklerObject *self)
5828{
5829 PyObject *cls, *args, *obj = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005830 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005831
5832 if ((i = marker(self)) < 0)
5833 return -1;
5834
Serhiy Storchakae9b30742015-11-23 15:17:43 +02005835 if (Py_SIZE(self->stack) - i < 1)
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005836 return Pdata_stack_underflow(self->stack);
Serhiy Storchakae9b30742015-11-23 15:17:43 +02005837
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005838 args = Pdata_poptuple(self->stack, i + 1);
5839 if (args == NULL)
5840 return -1;
5841
5842 PDATA_POP(self->stack, cls);
5843 if (cls) {
5844 obj = instantiate(cls, args);
5845 Py_DECREF(cls);
5846 }
5847 Py_DECREF(args);
5848 if (obj == NULL)
5849 return -1;
5850
5851 PDATA_PUSH(self->stack, obj, -1);
5852 return 0;
5853}
5854
5855static int
5856load_inst(UnpicklerObject *self)
5857{
5858 PyObject *cls = NULL;
5859 PyObject *args = NULL;
5860 PyObject *obj = NULL;
5861 PyObject *module_name;
5862 PyObject *class_name;
5863 Py_ssize_t len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005864 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005865 char *s;
5866
5867 if ((i = marker(self)) < 0)
5868 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005869 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005870 return -1;
5871 if (len < 2)
5872 return bad_readline();
5873
5874 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
5875 identifiers are permitted in Python 3.0, since the INST opcode is only
5876 supported by older protocols on Python 2.x. */
5877 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5878 if (module_name == NULL)
5879 return -1;
5880
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005881 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Serhiy Storchakaca28eba2015-12-01 00:18:23 +02005882 if (len < 2) {
5883 Py_DECREF(module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005884 return bad_readline();
Serhiy Storchakaca28eba2015-12-01 00:18:23 +02005885 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005886 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005887 if (class_name != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005888 cls = find_class(self, module_name, class_name);
5889 Py_DECREF(class_name);
5890 }
5891 }
5892 Py_DECREF(module_name);
5893
5894 if (cls == NULL)
5895 return -1;
5896
5897 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
5898 obj = instantiate(cls, args);
5899 Py_DECREF(args);
5900 }
5901 Py_DECREF(cls);
5902
5903 if (obj == NULL)
5904 return -1;
5905
5906 PDATA_PUSH(self->stack, obj, -1);
5907 return 0;
5908}
5909
Serhiy Storchakab4c98ed2020-07-18 11:11:21 +03005910static void
5911newobj_unpickling_error(const char * msg, int use_kwargs, PyObject *arg)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005912{
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005913 PickleState *st = _Pickle_GetGlobalState();
Serhiy Storchakab4c98ed2020-07-18 11:11:21 +03005914 PyErr_Format(st->UnpicklingError, msg,
5915 use_kwargs ? "NEWOBJ_EX" : "NEWOBJ",
5916 Py_TYPE(arg)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005917}
5918
5919static int
Serhiy Storchakab4c98ed2020-07-18 11:11:21 +03005920load_newobj(UnpicklerObject *self, int use_kwargs)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005921{
Serhiy Storchakab4c98ed2020-07-18 11:11:21 +03005922 PyObject *cls, *args, *kwargs = NULL;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005923 PyObject *obj;
5924
Serhiy Storchakab4c98ed2020-07-18 11:11:21 +03005925 /* Stack is ... cls args [kwargs], and we want to call
5926 * cls.__new__(cls, *args, **kwargs).
5927 */
5928 if (use_kwargs) {
5929 PDATA_POP(self->stack, kwargs);
5930 if (kwargs == NULL) {
5931 return -1;
5932 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005933 }
5934 PDATA_POP(self->stack, args);
5935 if (args == NULL) {
Serhiy Storchakab4c98ed2020-07-18 11:11:21 +03005936 Py_XDECREF(kwargs);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005937 return -1;
5938 }
5939 PDATA_POP(self->stack, cls);
5940 if (cls == NULL) {
Serhiy Storchakab4c98ed2020-07-18 11:11:21 +03005941 Py_XDECREF(kwargs);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005942 Py_DECREF(args);
5943 return -1;
5944 }
Larry Hastings61272b72014-01-07 12:41:53 -08005945
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005946 if (!PyType_Check(cls)) {
Serhiy Storchakab4c98ed2020-07-18 11:11:21 +03005947 newobj_unpickling_error("%s class argument must be a type, not %.200s",
5948 use_kwargs, cls);
Serhiy Storchaka4f309ab2020-07-13 15:49:26 +03005949 goto error;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005950 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005951 if (((PyTypeObject *)cls)->tp_new == NULL) {
Serhiy Storchakab4c98ed2020-07-18 11:11:21 +03005952 newobj_unpickling_error("%s class argument '%.200s' doesn't have __new__",
5953 use_kwargs, cls);
Serhiy Storchaka4f309ab2020-07-13 15:49:26 +03005954 goto error;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005955 }
Serhiy Storchaka4f309ab2020-07-13 15:49:26 +03005956 if (!PyTuple_Check(args)) {
Serhiy Storchakab4c98ed2020-07-18 11:11:21 +03005957 newobj_unpickling_error("%s args argument must be a tuple, not %.200s",
5958 use_kwargs, args);
Serhiy Storchaka4f309ab2020-07-13 15:49:26 +03005959 goto error;
5960 }
Serhiy Storchakab4c98ed2020-07-18 11:11:21 +03005961 if (use_kwargs && !PyDict_Check(kwargs)) {
5962 newobj_unpickling_error("%s kwargs argument must be a dict, not %.200s",
5963 use_kwargs, kwargs);
Serhiy Storchaka4f309ab2020-07-13 15:49:26 +03005964 goto error;
5965 }
5966
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005967 obj = ((PyTypeObject *)cls)->tp_new((PyTypeObject *)cls, args, kwargs);
Serhiy Storchakab4c98ed2020-07-18 11:11:21 +03005968 if (obj == NULL) {
5969 goto error;
5970 }
5971 Py_XDECREF(kwargs);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005972 Py_DECREF(args);
5973 Py_DECREF(cls);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005974 PDATA_PUSH(self->stack, obj, -1);
5975 return 0;
Serhiy Storchaka4f309ab2020-07-13 15:49:26 +03005976
5977error:
Serhiy Storchakab4c98ed2020-07-18 11:11:21 +03005978 Py_XDECREF(kwargs);
Serhiy Storchaka4f309ab2020-07-13 15:49:26 +03005979 Py_DECREF(args);
5980 Py_DECREF(cls);
5981 return -1;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005982}
5983
5984static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005985load_global(UnpicklerObject *self)
5986{
5987 PyObject *global = NULL;
5988 PyObject *module_name;
5989 PyObject *global_name;
5990 Py_ssize_t len;
5991 char *s;
5992
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005993 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005994 return -1;
5995 if (len < 2)
5996 return bad_readline();
5997 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
5998 if (!module_name)
5999 return -1;
6000
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006001 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006002 if (len < 2) {
6003 Py_DECREF(module_name);
6004 return bad_readline();
6005 }
6006 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
6007 if (global_name) {
6008 global = find_class(self, module_name, global_name);
6009 Py_DECREF(global_name);
6010 }
6011 }
6012 Py_DECREF(module_name);
6013
6014 if (global == NULL)
6015 return -1;
6016 PDATA_PUSH(self->stack, global, -1);
6017 return 0;
6018}
6019
6020static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006021load_stack_global(UnpicklerObject *self)
6022{
6023 PyObject *global;
6024 PyObject *module_name;
6025 PyObject *global_name;
6026
6027 PDATA_POP(self->stack, global_name);
6028 PDATA_POP(self->stack, module_name);
6029 if (module_name == NULL || !PyUnicode_CheckExact(module_name) ||
6030 global_name == NULL || !PyUnicode_CheckExact(global_name)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006031 PickleState *st = _Pickle_GetGlobalState();
6032 PyErr_SetString(st->UnpicklingError, "STACK_GLOBAL requires str");
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006033 Py_XDECREF(global_name);
6034 Py_XDECREF(module_name);
6035 return -1;
6036 }
6037 global = find_class(self, module_name, global_name);
6038 Py_DECREF(global_name);
6039 Py_DECREF(module_name);
6040 if (global == NULL)
6041 return -1;
6042 PDATA_PUSH(self->stack, global, -1);
6043 return 0;
6044}
6045
6046static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006047load_persid(UnpicklerObject *self)
6048{
Serhiy Storchaka986375e2017-11-30 22:48:31 +02006049 PyObject *pid, *obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006050 Py_ssize_t len;
6051 char *s;
6052
6053 if (self->pers_func) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006054 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006055 return -1;
Alexandre Vassalotti896414f2013-11-30 13:52:35 -08006056 if (len < 1)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006057 return bad_readline();
6058
Serhiy Storchakadec25af2016-07-17 11:24:17 +03006059 pid = PyUnicode_DecodeASCII(s, len - 1, "strict");
6060 if (pid == NULL) {
6061 if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
6062 PyErr_SetString(_Pickle_GetGlobalState()->UnpicklingError,
6063 "persistent IDs in protocol 0 must be "
6064 "ASCII strings");
6065 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006066 return -1;
Serhiy Storchakadec25af2016-07-17 11:24:17 +03006067 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006068
Serhiy Storchaka986375e2017-11-30 22:48:31 +02006069 obj = call_method(self->pers_func, self->pers_func_self, pid);
6070 Py_DECREF(pid);
6071 if (obj == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006072 return -1;
6073
Serhiy Storchaka986375e2017-11-30 22:48:31 +02006074 PDATA_PUSH(self->stack, obj, -1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006075 return 0;
6076 }
6077 else {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006078 PickleState *st = _Pickle_GetGlobalState();
6079 PyErr_SetString(st->UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006080 "A load persistent id instruction was encountered,\n"
6081 "but no persistent_load function was specified.");
6082 return -1;
6083 }
6084}
6085
6086static int
6087load_binpersid(UnpicklerObject *self)
6088{
Serhiy Storchaka986375e2017-11-30 22:48:31 +02006089 PyObject *pid, *obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006090
6091 if (self->pers_func) {
6092 PDATA_POP(self->stack, pid);
6093 if (pid == NULL)
6094 return -1;
6095
Serhiy Storchaka986375e2017-11-30 22:48:31 +02006096 obj = call_method(self->pers_func, self->pers_func_self, pid);
6097 Py_DECREF(pid);
6098 if (obj == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006099 return -1;
6100
Serhiy Storchaka986375e2017-11-30 22:48:31 +02006101 PDATA_PUSH(self->stack, obj, -1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006102 return 0;
6103 }
6104 else {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006105 PickleState *st = _Pickle_GetGlobalState();
6106 PyErr_SetString(st->UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006107 "A load persistent id instruction was encountered,\n"
6108 "but no persistent_load function was specified.");
6109 return -1;
6110 }
6111}
6112
6113static int
6114load_pop(UnpicklerObject *self)
6115{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006116 Py_ssize_t len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006117
6118 /* Note that we split the (pickle.py) stack into two stacks,
6119 * an object stack and a mark stack. We have to be clever and
6120 * pop the right one. We do this by looking at the top of the
Collin Winter8ca69de2009-05-26 16:53:41 +00006121 * mark stack first, and only signalling a stack underflow if
6122 * the object stack is empty and the mark stack doesn't match
6123 * our expectations.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006124 */
Collin Winter8ca69de2009-05-26 16:53:41 +00006125 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006126 self->num_marks--;
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006127 self->stack->mark_set = self->num_marks != 0;
6128 self->stack->fence = self->num_marks ?
6129 self->marks[self->num_marks - 1] : 0;
6130 } else if (len <= self->stack->fence)
6131 return Pdata_stack_underflow(self->stack);
6132 else {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006133 len--;
6134 Py_DECREF(self->stack->data[len]);
Victor Stinner60ac6ed2020-02-07 23:18:08 +01006135 Py_SET_SIZE(self->stack, len);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006136 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006137 return 0;
6138}
6139
6140static int
6141load_pop_mark(UnpicklerObject *self)
6142{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006143 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006144
6145 if ((i = marker(self)) < 0)
6146 return -1;
6147
6148 Pdata_clear(self->stack, i);
6149
6150 return 0;
6151}
6152
6153static int
6154load_dup(UnpicklerObject *self)
6155{
6156 PyObject *last;
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006157 Py_ssize_t len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006158
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006159 if (len <= self->stack->fence)
6160 return Pdata_stack_underflow(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006161 last = self->stack->data[len - 1];
6162 PDATA_APPEND(self->stack, last, -1);
6163 return 0;
6164}
6165
6166static int
6167load_get(UnpicklerObject *self)
6168{
6169 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006170 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006171 Py_ssize_t len;
6172 char *s;
6173
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006174 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006175 return -1;
6176 if (len < 2)
6177 return bad_readline();
6178
6179 key = PyLong_FromString(s, NULL, 10);
6180 if (key == NULL)
6181 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006182 idx = PyLong_AsSsize_t(key);
6183 if (idx == -1 && PyErr_Occurred()) {
6184 Py_DECREF(key);
6185 return -1;
6186 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006187
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006188 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006189 if (value == NULL) {
Claudiu Popa6f03b232019-11-24 20:15:08 +01006190 if (!PyErr_Occurred()) {
6191 PickleState *st = _Pickle_GetGlobalState();
6192 PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
6193 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006194 Py_DECREF(key);
6195 return -1;
6196 }
6197 Py_DECREF(key);
6198
6199 PDATA_APPEND(self->stack, value, -1);
6200 return 0;
6201}
6202
6203static int
6204load_binget(UnpicklerObject *self)
6205{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006206 PyObject *value;
6207 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006208 char *s;
6209
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006210 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006211 return -1;
6212
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006213 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006214
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006215 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006216 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006217 PyObject *key = PyLong_FromSsize_t(idx);
Christian Heimes9ee5c372013-07-26 22:45:00 +02006218 if (key != NULL) {
Claudiu Popa6f03b232019-11-24 20:15:08 +01006219 PickleState *st = _Pickle_GetGlobalState();
6220 PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
Christian Heimes9ee5c372013-07-26 22:45:00 +02006221 Py_DECREF(key);
6222 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006223 return -1;
6224 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006225
6226 PDATA_APPEND(self->stack, value, -1);
6227 return 0;
6228}
6229
6230static int
6231load_long_binget(UnpicklerObject *self)
6232{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006233 PyObject *value;
6234 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006235 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006236
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006237 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006238 return -1;
6239
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006240 idx = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006241
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006242 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006243 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006244 PyObject *key = PyLong_FromSsize_t(idx);
Christian Heimes9ee5c372013-07-26 22:45:00 +02006245 if (key != NULL) {
Claudiu Popa6f03b232019-11-24 20:15:08 +01006246 PickleState *st = _Pickle_GetGlobalState();
6247 PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
Christian Heimes9ee5c372013-07-26 22:45:00 +02006248 Py_DECREF(key);
6249 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006250 return -1;
6251 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006252
6253 PDATA_APPEND(self->stack, value, -1);
6254 return 0;
6255}
6256
6257/* Push an object from the extension registry (EXT[124]). nbytes is
6258 * the number of bytes following the opcode, holding the index (code) value.
6259 */
6260static int
6261load_extension(UnpicklerObject *self, int nbytes)
6262{
6263 char *codebytes; /* the nbytes bytes after the opcode */
6264 long code; /* calc_binint returns long */
6265 PyObject *py_code; /* code as a Python int */
6266 PyObject *obj; /* the object to push */
6267 PyObject *pair; /* (module_name, class_name) */
6268 PyObject *module_name, *class_name;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006269 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006270
6271 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006272 if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006273 return -1;
6274 code = calc_binint(codebytes, nbytes);
6275 if (code <= 0) { /* note that 0 is forbidden */
6276 /* Corrupt or hostile pickle. */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006277 PyErr_SetString(st->UnpicklingError, "EXT specifies code <= 0");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006278 return -1;
6279 }
6280
6281 /* Look for the code in the cache. */
6282 py_code = PyLong_FromLong(code);
6283 if (py_code == NULL)
6284 return -1;
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08006285 obj = PyDict_GetItemWithError(st->extension_cache, py_code);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006286 if (obj != NULL) {
6287 /* Bingo. */
6288 Py_DECREF(py_code);
6289 PDATA_APPEND(self->stack, obj, -1);
6290 return 0;
6291 }
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08006292 if (PyErr_Occurred()) {
6293 Py_DECREF(py_code);
6294 return -1;
6295 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006296
6297 /* Look up the (module_name, class_name) pair. */
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08006298 pair = PyDict_GetItemWithError(st->inverted_registry, py_code);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006299 if (pair == NULL) {
6300 Py_DECREF(py_code);
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08006301 if (!PyErr_Occurred()) {
6302 PyErr_Format(PyExc_ValueError, "unregistered extension "
6303 "code %ld", code);
6304 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006305 return -1;
6306 }
6307 /* Since the extension registry is manipulable via Python code,
6308 * confirm that pair is really a 2-tuple of strings.
6309 */
Victor Stinnerb37672d2018-11-22 03:37:50 +01006310 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2) {
6311 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006312 }
Victor Stinnerb37672d2018-11-22 03:37:50 +01006313
6314 module_name = PyTuple_GET_ITEM(pair, 0);
6315 if (!PyUnicode_Check(module_name)) {
6316 goto error;
6317 }
6318
6319 class_name = PyTuple_GET_ITEM(pair, 1);
6320 if (!PyUnicode_Check(class_name)) {
6321 goto error;
6322 }
6323
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006324 /* Load the object. */
6325 obj = find_class(self, module_name, class_name);
6326 if (obj == NULL) {
6327 Py_DECREF(py_code);
6328 return -1;
6329 }
6330 /* Cache code -> obj. */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006331 code = PyDict_SetItem(st->extension_cache, py_code, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006332 Py_DECREF(py_code);
6333 if (code < 0) {
6334 Py_DECREF(obj);
6335 return -1;
6336 }
6337 PDATA_PUSH(self->stack, obj, -1);
6338 return 0;
Victor Stinnerb37672d2018-11-22 03:37:50 +01006339
6340error:
6341 Py_DECREF(py_code);
6342 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
6343 "isn't a 2-tuple of strings", code);
6344 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006345}
6346
6347static int
6348load_put(UnpicklerObject *self)
6349{
6350 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006351 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006352 Py_ssize_t len;
Victor Stinnerb110dad2016-12-09 17:06:43 +01006353 char *s = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006354
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006355 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006356 return -1;
6357 if (len < 2)
6358 return bad_readline();
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006359 if (Py_SIZE(self->stack) <= self->stack->fence)
6360 return Pdata_stack_underflow(self->stack);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006361 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006362
6363 key = PyLong_FromString(s, NULL, 10);
6364 if (key == NULL)
6365 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006366 idx = PyLong_AsSsize_t(key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006367 Py_DECREF(key);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02006368 if (idx < 0) {
6369 if (!PyErr_Occurred())
6370 PyErr_SetString(PyExc_ValueError,
6371 "negative PUT argument");
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006372 return -1;
Antoine Pitrou55549ec2011-08-30 00:27:10 +02006373 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006374
6375 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006376}
6377
6378static int
6379load_binput(UnpicklerObject *self)
6380{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006381 PyObject *value;
6382 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006383 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006384
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006385 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006386 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006387
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006388 if (Py_SIZE(self->stack) <= self->stack->fence)
6389 return Pdata_stack_underflow(self->stack);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006390 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006391
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006392 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006393
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006394 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006395}
6396
6397static int
6398load_long_binput(UnpicklerObject *self)
6399{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006400 PyObject *value;
6401 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006402 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006403
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006404 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006405 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006406
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006407 if (Py_SIZE(self->stack) <= self->stack->fence)
6408 return Pdata_stack_underflow(self->stack);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006409 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006410
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006411 idx = calc_binsize(s, 4);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02006412 if (idx < 0) {
6413 PyErr_SetString(PyExc_ValueError,
6414 "negative LONG_BINPUT argument");
6415 return -1;
6416 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006417
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006418 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006419}
6420
6421static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006422load_memoize(UnpicklerObject *self)
6423{
6424 PyObject *value;
6425
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006426 if (Py_SIZE(self->stack) <= self->stack->fence)
6427 return Pdata_stack_underflow(self->stack);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006428 value = self->stack->data[Py_SIZE(self->stack) - 1];
6429
6430 return _Unpickler_MemoPut(self, self->memo_len, value);
6431}
6432
6433static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006434do_append(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006435{
6436 PyObject *value;
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006437 PyObject *slice;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006438 PyObject *list;
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006439 PyObject *result;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006440 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006441
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006442 len = Py_SIZE(self->stack);
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006443 if (x > len || x <= self->stack->fence)
6444 return Pdata_stack_underflow(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006445 if (len == x) /* nothing to do */
6446 return 0;
6447
6448 list = self->stack->data[x - 1];
6449
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006450 if (PyList_CheckExact(list)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006451 Py_ssize_t list_len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006452 int ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006453
6454 slice = Pdata_poplist(self->stack, x);
6455 if (!slice)
6456 return -1;
6457 list_len = PyList_GET_SIZE(list);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006458 ret = PyList_SetSlice(list, list_len, list_len, slice);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006459 Py_DECREF(slice);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006460 return ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006461 }
6462 else {
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006463 PyObject *extend_func;
6464 _Py_IDENTIFIER(extend);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006465
Serhiy Storchaka41c57b32019-09-01 12:03:39 +03006466 if (_PyObject_LookupAttrId(list, &PyId_extend, &extend_func) < 0) {
6467 return -1;
6468 }
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006469 if (extend_func != NULL) {
6470 slice = Pdata_poplist(self->stack, x);
6471 if (!slice) {
6472 Py_DECREF(extend_func);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006473 return -1;
6474 }
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006475 result = _Pickle_FastCall(extend_func, slice);
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006476 Py_DECREF(extend_func);
6477 if (result == NULL)
6478 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006479 Py_DECREF(result);
6480 }
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006481 else {
6482 PyObject *append_func;
6483 _Py_IDENTIFIER(append);
6484
6485 /* Even if the PEP 307 requires extend() and append() methods,
6486 fall back on append() if the object has no extend() method
6487 for backward compatibility. */
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006488 append_func = _PyObject_GetAttrId(list, &PyId_append);
6489 if (append_func == NULL)
6490 return -1;
6491 for (i = x; i < len; i++) {
6492 value = self->stack->data[i];
6493 result = _Pickle_FastCall(append_func, value);
6494 if (result == NULL) {
6495 Pdata_clear(self->stack, i + 1);
Victor Stinner60ac6ed2020-02-07 23:18:08 +01006496 Py_SET_SIZE(self->stack, x);
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006497 Py_DECREF(append_func);
6498 return -1;
6499 }
6500 Py_DECREF(result);
6501 }
Victor Stinner60ac6ed2020-02-07 23:18:08 +01006502 Py_SET_SIZE(self->stack, x);
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006503 Py_DECREF(append_func);
6504 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006505 }
6506
6507 return 0;
6508}
6509
6510static int
6511load_append(UnpicklerObject *self)
6512{
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006513 if (Py_SIZE(self->stack) - 1 <= self->stack->fence)
6514 return Pdata_stack_underflow(self->stack);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006515 return do_append(self, Py_SIZE(self->stack) - 1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006516}
6517
6518static int
6519load_appends(UnpicklerObject *self)
6520{
Serhiy Storchakae9b30742015-11-23 15:17:43 +02006521 Py_ssize_t i = marker(self);
6522 if (i < 0)
6523 return -1;
6524 return do_append(self, i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006525}
6526
6527static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006528do_setitems(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006529{
6530 PyObject *value, *key;
6531 PyObject *dict;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006532 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006533 int status = 0;
6534
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006535 len = Py_SIZE(self->stack);
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006536 if (x > len || x <= self->stack->fence)
6537 return Pdata_stack_underflow(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006538 if (len == x) /* nothing to do */
6539 return 0;
Victor Stinner121aab42011-09-29 23:40:53 +02006540 if ((len - x) % 2 != 0) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006541 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006542 /* Currupt or hostile pickle -- we never write one like this. */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006543 PyErr_SetString(st->UnpicklingError,
6544 "odd number of items for SETITEMS");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006545 return -1;
6546 }
6547
6548 /* Here, dict does not actually need to be a PyDict; it could be anything
6549 that supports the __setitem__ attribute. */
6550 dict = self->stack->data[x - 1];
6551
6552 for (i = x + 1; i < len; i += 2) {
6553 key = self->stack->data[i - 1];
6554 value = self->stack->data[i];
6555 if (PyObject_SetItem(dict, key, value) < 0) {
6556 status = -1;
6557 break;
6558 }
6559 }
6560
6561 Pdata_clear(self->stack, x);
6562 return status;
6563}
6564
6565static int
6566load_setitem(UnpicklerObject *self)
6567{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006568 return do_setitems(self, Py_SIZE(self->stack) - 2);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006569}
6570
6571static int
6572load_setitems(UnpicklerObject *self)
6573{
Serhiy Storchakae9b30742015-11-23 15:17:43 +02006574 Py_ssize_t i = marker(self);
6575 if (i < 0)
6576 return -1;
6577 return do_setitems(self, i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006578}
6579
6580static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006581load_additems(UnpicklerObject *self)
6582{
6583 PyObject *set;
6584 Py_ssize_t mark, len, i;
6585
6586 mark = marker(self);
Serhiy Storchakae9b30742015-11-23 15:17:43 +02006587 if (mark < 0)
6588 return -1;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006589 len = Py_SIZE(self->stack);
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006590 if (mark > len || mark <= self->stack->fence)
6591 return Pdata_stack_underflow(self->stack);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006592 if (len == mark) /* nothing to do */
6593 return 0;
6594
6595 set = self->stack->data[mark - 1];
6596
6597 if (PySet_Check(set)) {
6598 PyObject *items;
6599 int status;
6600
6601 items = Pdata_poptuple(self->stack, mark);
6602 if (items == NULL)
6603 return -1;
6604
6605 status = _PySet_Update(set, items);
6606 Py_DECREF(items);
6607 return status;
6608 }
6609 else {
6610 PyObject *add_func;
6611 _Py_IDENTIFIER(add);
6612
6613 add_func = _PyObject_GetAttrId(set, &PyId_add);
6614 if (add_func == NULL)
6615 return -1;
6616 for (i = mark; i < len; i++) {
6617 PyObject *result;
6618 PyObject *item;
6619
6620 item = self->stack->data[i];
Alexandre Vassalotti20c28c12013-11-27 02:26:54 -08006621 result = _Pickle_FastCall(add_func, item);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006622 if (result == NULL) {
6623 Pdata_clear(self->stack, i + 1);
Victor Stinner60ac6ed2020-02-07 23:18:08 +01006624 Py_SET_SIZE(self->stack, mark);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006625 return -1;
6626 }
6627 Py_DECREF(result);
6628 }
Victor Stinner60ac6ed2020-02-07 23:18:08 +01006629 Py_SET_SIZE(self->stack, mark);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006630 }
6631
6632 return 0;
6633}
6634
6635static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006636load_build(UnpicklerObject *self)
6637{
6638 PyObject *state, *inst, *slotstate;
6639 PyObject *setstate;
6640 int status = 0;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02006641 _Py_IDENTIFIER(__setstate__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006642
6643 /* Stack is ... instance, state. We want to leave instance at
6644 * the stack top, possibly mutated via instance.__setstate__(state).
6645 */
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006646 if (Py_SIZE(self->stack) - 2 < self->stack->fence)
6647 return Pdata_stack_underflow(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006648
6649 PDATA_POP(self->stack, state);
6650 if (state == NULL)
6651 return -1;
6652
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006653 inst = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006654
Serhiy Storchakaf320be72018-01-25 10:49:40 +02006655 if (_PyObject_LookupAttrId(inst, &PyId___setstate__, &setstate) < 0) {
6656 Py_DECREF(state);
6657 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006658 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +02006659 if (setstate != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006660 PyObject *result;
6661
6662 /* The explicit __setstate__ is responsible for everything. */
Alexandre Vassalotti20c28c12013-11-27 02:26:54 -08006663 result = _Pickle_FastCall(setstate, state);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006664 Py_DECREF(setstate);
6665 if (result == NULL)
6666 return -1;
6667 Py_DECREF(result);
6668 return 0;
6669 }
6670
6671 /* A default __setstate__. First see whether state embeds a
6672 * slot state dict too (a proto 2 addition).
6673 */
Serhiy Storchakafff9a312017-03-21 08:53:25 +02006674 if (PyTuple_Check(state) && PyTuple_GET_SIZE(state) == 2) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006675 PyObject *tmp = state;
6676
6677 state = PyTuple_GET_ITEM(tmp, 0);
6678 slotstate = PyTuple_GET_ITEM(tmp, 1);
6679 Py_INCREF(state);
6680 Py_INCREF(slotstate);
6681 Py_DECREF(tmp);
6682 }
6683 else
6684 slotstate = NULL;
6685
6686 /* Set inst.__dict__ from the state dict (if any). */
6687 if (state != Py_None) {
6688 PyObject *dict;
Antoine Pitroua9f48a02009-05-02 21:41:14 +00006689 PyObject *d_key, *d_value;
6690 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02006691 _Py_IDENTIFIER(__dict__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006692
6693 if (!PyDict_Check(state)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006694 PickleState *st = _Pickle_GetGlobalState();
6695 PyErr_SetString(st->UnpicklingError, "state is not a dictionary");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006696 goto error;
6697 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02006698 dict = _PyObject_GetAttrId(inst, &PyId___dict__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006699 if (dict == NULL)
6700 goto error;
6701
Antoine Pitroua9f48a02009-05-02 21:41:14 +00006702 i = 0;
6703 while (PyDict_Next(state, &i, &d_key, &d_value)) {
6704 /* normally the keys for instance attributes are
6705 interned. we should try to do that here. */
6706 Py_INCREF(d_key);
6707 if (PyUnicode_CheckExact(d_key))
6708 PyUnicode_InternInPlace(&d_key);
6709 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
6710 Py_DECREF(d_key);
6711 goto error;
6712 }
6713 Py_DECREF(d_key);
6714 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006715 Py_DECREF(dict);
6716 }
6717
6718 /* Also set instance attributes from the slotstate dict (if any). */
6719 if (slotstate != NULL) {
6720 PyObject *d_key, *d_value;
6721 Py_ssize_t i;
6722
6723 if (!PyDict_Check(slotstate)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006724 PickleState *st = _Pickle_GetGlobalState();
6725 PyErr_SetString(st->UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006726 "slot state is not a dictionary");
6727 goto error;
6728 }
6729 i = 0;
6730 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
6731 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
6732 goto error;
6733 }
6734 }
6735
6736 if (0) {
6737 error:
6738 status = -1;
6739 }
6740
6741 Py_DECREF(state);
6742 Py_XDECREF(slotstate);
6743 return status;
6744}
6745
6746static int
6747load_mark(UnpicklerObject *self)
6748{
6749
6750 /* Note that we split the (pickle.py) stack into two stacks, an
6751 * object stack and a mark stack. Here we push a mark onto the
6752 * mark stack.
6753 */
6754
Sergey Fedoseev86b89912018-08-25 12:54:40 +05006755 if (self->num_marks >= self->marks_size) {
Sergey Fedoseev90555ec2018-08-25 15:41:58 +05006756 size_t alloc = ((size_t)self->num_marks << 1) + 20;
6757 Py_ssize_t *marks_new = self->marks;
6758 PyMem_RESIZE(marks_new, Py_ssize_t, alloc);
6759 if (marks_new == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006760 PyErr_NoMemory();
6761 return -1;
6762 }
Sergey Fedoseev90555ec2018-08-25 15:41:58 +05006763 self->marks = marks_new;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006764 self->marks_size = (Py_ssize_t)alloc;
6765 }
6766
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006767 self->stack->mark_set = 1;
6768 self->marks[self->num_marks++] = self->stack->fence = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006769
6770 return 0;
6771}
6772
6773static int
6774load_reduce(UnpicklerObject *self)
6775{
6776 PyObject *callable = NULL;
6777 PyObject *argtup = NULL;
6778 PyObject *obj = NULL;
6779
6780 PDATA_POP(self->stack, argtup);
6781 if (argtup == NULL)
6782 return -1;
6783 PDATA_POP(self->stack, callable);
6784 if (callable) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00006785 obj = PyObject_CallObject(callable, argtup);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006786 Py_DECREF(callable);
6787 }
6788 Py_DECREF(argtup);
6789
6790 if (obj == NULL)
6791 return -1;
6792
6793 PDATA_PUSH(self->stack, obj, -1);
6794 return 0;
6795}
6796
6797/* Just raises an error if we don't know the protocol specified. PROTO
6798 * is the first opcode for protocols >= 2.
6799 */
6800static int
6801load_proto(UnpicklerObject *self)
6802{
6803 char *s;
6804 int i;
6805
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006806 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006807 return -1;
6808
6809 i = (unsigned char)s[0];
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006810 if (i <= HIGHEST_PROTOCOL) {
6811 self->proto = i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006812 return 0;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006813 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006814
6815 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
6816 return -1;
6817}
6818
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08006819static int
6820load_frame(UnpicklerObject *self)
6821{
6822 char *s;
6823 Py_ssize_t frame_len;
6824
6825 if (_Unpickler_Read(self, &s, 8) < 0)
6826 return -1;
6827
6828 frame_len = calc_binsize(s, 8);
6829 if (frame_len < 0) {
6830 PyErr_Format(PyExc_OverflowError,
6831 "FRAME length exceeds system's maximum of %zd bytes",
6832 PY_SSIZE_T_MAX);
6833 return -1;
6834 }
6835
6836 if (_Unpickler_Read(self, &s, frame_len) < 0)
6837 return -1;
6838
6839 /* Rewind to start of frame */
6840 self->next_read_idx -= frame_len;
6841 return 0;
6842}
6843
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006844static PyObject *
6845load(UnpicklerObject *self)
6846{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006847 PyObject *value = NULL;
Christian Heimes27ea78b2014-01-27 01:03:53 +01006848 char *s = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006849
6850 self->num_marks = 0;
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006851 self->stack->mark_set = 0;
6852 self->stack->fence = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006853 self->proto = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006854 if (Py_SIZE(self->stack))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006855 Pdata_clear(self->stack, 0);
6856
6857 /* Convenient macros for the dispatch while-switch loop just below. */
6858#define OP(opcode, load_func) \
6859 case opcode: if (load_func(self) < 0) break; continue;
6860
6861#define OP_ARG(opcode, load_func, arg) \
6862 case opcode: if (load_func(self, (arg)) < 0) break; continue;
6863
6864 while (1) {
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03006865 if (_Unpickler_Read(self, &s, 1) < 0) {
6866 PickleState *st = _Pickle_GetGlobalState();
6867 if (PyErr_ExceptionMatches(st->UnpicklingError)) {
6868 PyErr_Format(PyExc_EOFError, "Ran out of input");
6869 }
6870 return NULL;
6871 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006872
6873 switch ((enum opcode)s[0]) {
6874 OP(NONE, load_none)
6875 OP(BININT, load_binint)
6876 OP(BININT1, load_binint1)
6877 OP(BININT2, load_binint2)
6878 OP(INT, load_int)
6879 OP(LONG, load_long)
6880 OP_ARG(LONG1, load_counted_long, 1)
6881 OP_ARG(LONG4, load_counted_long, 4)
6882 OP(FLOAT, load_float)
6883 OP(BINFLOAT, load_binfloat)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006884 OP_ARG(SHORT_BINBYTES, load_counted_binbytes, 1)
6885 OP_ARG(BINBYTES, load_counted_binbytes, 4)
6886 OP_ARG(BINBYTES8, load_counted_binbytes, 8)
Antoine Pitrou91f43802019-05-26 17:10:09 +02006887 OP(BYTEARRAY8, load_counted_bytearray)
6888 OP(NEXT_BUFFER, load_next_buffer)
6889 OP(READONLY_BUFFER, load_readonly_buffer)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006890 OP_ARG(SHORT_BINSTRING, load_counted_binstring, 1)
6891 OP_ARG(BINSTRING, load_counted_binstring, 4)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006892 OP(STRING, load_string)
6893 OP(UNICODE, load_unicode)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006894 OP_ARG(SHORT_BINUNICODE, load_counted_binunicode, 1)
6895 OP_ARG(BINUNICODE, load_counted_binunicode, 4)
6896 OP_ARG(BINUNICODE8, load_counted_binunicode, 8)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006897 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
6898 OP_ARG(TUPLE1, load_counted_tuple, 1)
6899 OP_ARG(TUPLE2, load_counted_tuple, 2)
6900 OP_ARG(TUPLE3, load_counted_tuple, 3)
6901 OP(TUPLE, load_tuple)
6902 OP(EMPTY_LIST, load_empty_list)
6903 OP(LIST, load_list)
6904 OP(EMPTY_DICT, load_empty_dict)
6905 OP(DICT, load_dict)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006906 OP(EMPTY_SET, load_empty_set)
6907 OP(ADDITEMS, load_additems)
6908 OP(FROZENSET, load_frozenset)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006909 OP(OBJ, load_obj)
6910 OP(INST, load_inst)
Serhiy Storchakab4c98ed2020-07-18 11:11:21 +03006911 OP_ARG(NEWOBJ, load_newobj, 0)
6912 OP_ARG(NEWOBJ_EX, load_newobj, 1)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006913 OP(GLOBAL, load_global)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006914 OP(STACK_GLOBAL, load_stack_global)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006915 OP(APPEND, load_append)
6916 OP(APPENDS, load_appends)
6917 OP(BUILD, load_build)
6918 OP(DUP, load_dup)
6919 OP(BINGET, load_binget)
6920 OP(LONG_BINGET, load_long_binget)
6921 OP(GET, load_get)
6922 OP(MARK, load_mark)
6923 OP(BINPUT, load_binput)
6924 OP(LONG_BINPUT, load_long_binput)
6925 OP(PUT, load_put)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006926 OP(MEMOIZE, load_memoize)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006927 OP(POP, load_pop)
6928 OP(POP_MARK, load_pop_mark)
6929 OP(SETITEM, load_setitem)
6930 OP(SETITEMS, load_setitems)
6931 OP(PERSID, load_persid)
6932 OP(BINPERSID, load_binpersid)
6933 OP(REDUCE, load_reduce)
6934 OP(PROTO, load_proto)
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08006935 OP(FRAME, load_frame)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006936 OP_ARG(EXT1, load_extension, 1)
6937 OP_ARG(EXT2, load_extension, 2)
6938 OP_ARG(EXT4, load_extension, 4)
6939 OP_ARG(NEWTRUE, load_bool, Py_True)
6940 OP_ARG(NEWFALSE, load_bool, Py_False)
6941
6942 case STOP:
6943 break;
6944
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006945 default:
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03006946 {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006947 PickleState *st = _Pickle_GetGlobalState();
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03006948 unsigned char c = (unsigned char) *s;
6949 if (0x20 <= c && c <= 0x7e && c != '\'' && c != '\\') {
6950 PyErr_Format(st->UnpicklingError,
6951 "invalid load key, '%c'.", c);
6952 }
6953 else {
6954 PyErr_Format(st->UnpicklingError,
6955 "invalid load key, '\\x%02x'.", c);
6956 }
6957 return NULL;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006958 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006959 }
6960
6961 break; /* and we are done! */
6962 }
6963
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08006964 if (PyErr_Occurred()) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006965 return NULL;
6966 }
6967
Victor Stinner2ae57e32013-10-31 13:39:23 +01006968 if (_Unpickler_SkipConsumed(self) < 0)
6969 return NULL;
6970
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006971 PDATA_POP(self->stack, value);
6972 return value;
6973}
6974
Larry Hastings61272b72014-01-07 12:41:53 -08006975/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006976
6977_pickle.Unpickler.load
6978
6979Load a pickle.
6980
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08006981Read a pickled object representation from the open file object given
6982in the constructor, and return the reconstituted object hierarchy
6983specified therein.
Larry Hastings61272b72014-01-07 12:41:53 -08006984[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006985
Larry Hastings3cceb382014-01-04 11:09:09 -08006986static PyObject *
Larry Hastingsc2047262014-01-25 20:43:29 -08006987_pickle_Unpickler_load_impl(UnpicklerObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08006988/*[clinic end generated code: output=fdcc488aad675b14 input=acbb91a42fa9b7b9]*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006989{
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006990 UnpicklerObject *unpickler = (UnpicklerObject*)self;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006991
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006992 /* Check whether the Unpickler was initialized correctly. This prevents
6993 segfaulting if a subclass overridden __init__ with a function that does
6994 not call Unpickler.__init__(). Here, we simply ensure that self->read
6995 is not NULL. */
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006996 if (unpickler->read == NULL) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006997 PickleState *st = _Pickle_GetGlobalState();
6998 PyErr_Format(st->UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006999 "Unpickler.__init__() was not called by %s.__init__()",
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007000 Py_TYPE(unpickler)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007001 return NULL;
7002 }
7003
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007004 return load(unpickler);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007005}
7006
7007/* The name of find_class() is misleading. In newer pickle protocols, this
7008 function is used for loading any global (i.e., functions), not just
7009 classes. The name is kept only for backward compatibility. */
7010
Larry Hastings61272b72014-01-07 12:41:53 -08007011/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007012
7013_pickle.Unpickler.find_class
7014
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007015 module_name: object
7016 global_name: object
7017 /
7018
7019Return an object from a specified module.
7020
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007021If necessary, the module will be imported. Subclasses may override
7022this method (e.g. to restrict unpickling of arbitrary classes and
7023functions).
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007024
7025This method is called whenever a class or a function object is
7026needed. Both arguments passed are str objects.
Larry Hastings61272b72014-01-07 12:41:53 -08007027[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007028
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007029static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04007030_pickle_Unpickler_find_class_impl(UnpicklerObject *self,
7031 PyObject *module_name,
7032 PyObject *global_name)
7033/*[clinic end generated code: output=becc08d7f9ed41e3 input=e2e6a865de093ef4]*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007034{
7035 PyObject *global;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007036 PyObject *module;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007037
Steve Dowerb82e17e2019-05-23 08:45:22 -07007038 if (PySys_Audit("pickle.find_class", "OO",
7039 module_name, global_name) < 0) {
7040 return NULL;
7041 }
7042
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007043 /* Try to map the old names used in Python 2.x to the new ones used in
7044 Python 3.x. We do this only with old pickle protocols and when the
7045 user has not disabled the feature. */
7046 if (self->proto < 3 && self->fix_imports) {
7047 PyObject *key;
7048 PyObject *item;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007049 PickleState *st = _Pickle_GetGlobalState();
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007050
7051 /* Check if the global (i.e., a function or a class) was renamed
7052 or moved to another module. */
7053 key = PyTuple_Pack(2, module_name, global_name);
7054 if (key == NULL)
7055 return NULL;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007056 item = PyDict_GetItemWithError(st->name_mapping_2to3, key);
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007057 Py_DECREF(key);
7058 if (item) {
7059 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
7060 PyErr_Format(PyExc_RuntimeError,
7061 "_compat_pickle.NAME_MAPPING values should be "
7062 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
7063 return NULL;
7064 }
7065 module_name = PyTuple_GET_ITEM(item, 0);
7066 global_name = PyTuple_GET_ITEM(item, 1);
7067 if (!PyUnicode_Check(module_name) ||
7068 !PyUnicode_Check(global_name)) {
7069 PyErr_Format(PyExc_RuntimeError,
7070 "_compat_pickle.NAME_MAPPING values should be "
7071 "pairs of str, not (%.200s, %.200s)",
7072 Py_TYPE(module_name)->tp_name,
7073 Py_TYPE(global_name)->tp_name);
7074 return NULL;
7075 }
7076 }
7077 else if (PyErr_Occurred()) {
7078 return NULL;
7079 }
Serhiy Storchakabfe18242015-03-31 13:12:37 +03007080 else {
7081 /* Check if the module was renamed. */
7082 item = PyDict_GetItemWithError(st->import_mapping_2to3, module_name);
7083 if (item) {
7084 if (!PyUnicode_Check(item)) {
7085 PyErr_Format(PyExc_RuntimeError,
7086 "_compat_pickle.IMPORT_MAPPING values should be "
7087 "strings, not %.200s", Py_TYPE(item)->tp_name);
7088 return NULL;
7089 }
7090 module_name = item;
7091 }
7092 else if (PyErr_Occurred()) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007093 return NULL;
7094 }
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007095 }
7096 }
7097
tjb9004371c0a2019-02-18 23:30:51 +08007098 /*
7099 * we don't use PyImport_GetModule here, because it can return partially-
7100 * initialised modules, which then cause the getattribute to fail.
7101 */
7102 module = PyImport_Import(module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007103 if (module == NULL) {
tjb9004371c0a2019-02-18 23:30:51 +08007104 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007105 }
Eric Snow3f9eee62017-09-15 16:35:20 -06007106 global = getattribute(module, global_name, self->proto >= 4);
7107 Py_DECREF(module);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007108 return global;
7109}
7110
Serhiy Storchaka5bbd2312014-12-16 19:39:08 +02007111/*[clinic input]
7112
7113_pickle.Unpickler.__sizeof__ -> Py_ssize_t
7114
7115Returns size in memory, in bytes.
7116[clinic start generated code]*/
7117
7118static Py_ssize_t
7119_pickle_Unpickler___sizeof___impl(UnpicklerObject *self)
7120/*[clinic end generated code: output=119d9d03ad4c7651 input=13333471fdeedf5e]*/
7121{
7122 Py_ssize_t res;
7123
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +02007124 res = _PyObject_SIZE(Py_TYPE(self));
Serhiy Storchaka5bbd2312014-12-16 19:39:08 +02007125 if (self->memo != NULL)
7126 res += self->memo_size * sizeof(PyObject *);
7127 if (self->marks != NULL)
7128 res += self->marks_size * sizeof(Py_ssize_t);
7129 if (self->input_line != NULL)
7130 res += strlen(self->input_line) + 1;
7131 if (self->encoding != NULL)
7132 res += strlen(self->encoding) + 1;
7133 if (self->errors != NULL)
7134 res += strlen(self->errors) + 1;
7135 return res;
7136}
7137
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007138static struct PyMethodDef Unpickler_methods[] = {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007139 _PICKLE_UNPICKLER_LOAD_METHODDEF
7140 _PICKLE_UNPICKLER_FIND_CLASS_METHODDEF
Serhiy Storchaka5bbd2312014-12-16 19:39:08 +02007141 _PICKLE_UNPICKLER___SIZEOF___METHODDEF
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007142 {NULL, NULL} /* sentinel */
7143};
7144
7145static void
7146Unpickler_dealloc(UnpicklerObject *self)
7147{
7148 PyObject_GC_UnTrack((PyObject *)self);
7149 Py_XDECREF(self->readline);
Antoine Pitrou91f43802019-05-26 17:10:09 +02007150 Py_XDECREF(self->readinto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007151 Py_XDECREF(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00007152 Py_XDECREF(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007153 Py_XDECREF(self->stack);
7154 Py_XDECREF(self->pers_func);
Antoine Pitrou91f43802019-05-26 17:10:09 +02007155 Py_XDECREF(self->buffers);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007156 if (self->buffer.buf != NULL) {
7157 PyBuffer_Release(&self->buffer);
7158 self->buffer.buf = NULL;
7159 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007160
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007161 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007162 PyMem_Free(self->marks);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007163 PyMem_Free(self->input_line);
Victor Stinner49fc8ec2013-07-07 23:30:24 +02007164 PyMem_Free(self->encoding);
7165 PyMem_Free(self->errors);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007166
7167 Py_TYPE(self)->tp_free((PyObject *)self);
7168}
7169
7170static int
7171Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
7172{
7173 Py_VISIT(self->readline);
Antoine Pitrou91f43802019-05-26 17:10:09 +02007174 Py_VISIT(self->readinto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007175 Py_VISIT(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00007176 Py_VISIT(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007177 Py_VISIT(self->stack);
7178 Py_VISIT(self->pers_func);
Antoine Pitrou91f43802019-05-26 17:10:09 +02007179 Py_VISIT(self->buffers);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007180 return 0;
7181}
7182
7183static int
7184Unpickler_clear(UnpicklerObject *self)
7185{
7186 Py_CLEAR(self->readline);
Antoine Pitrou91f43802019-05-26 17:10:09 +02007187 Py_CLEAR(self->readinto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007188 Py_CLEAR(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00007189 Py_CLEAR(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007190 Py_CLEAR(self->stack);
7191 Py_CLEAR(self->pers_func);
Antoine Pitrou91f43802019-05-26 17:10:09 +02007192 Py_CLEAR(self->buffers);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007193 if (self->buffer.buf != NULL) {
7194 PyBuffer_Release(&self->buffer);
7195 self->buffer.buf = NULL;
7196 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007197
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007198 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007199 PyMem_Free(self->marks);
7200 self->marks = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007201 PyMem_Free(self->input_line);
7202 self->input_line = NULL;
Victor Stinner49fc8ec2013-07-07 23:30:24 +02007203 PyMem_Free(self->encoding);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007204 self->encoding = NULL;
Victor Stinner49fc8ec2013-07-07 23:30:24 +02007205 PyMem_Free(self->errors);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007206 self->errors = NULL;
7207
7208 return 0;
7209}
7210
Larry Hastings61272b72014-01-07 12:41:53 -08007211/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007212
7213_pickle.Unpickler.__init__
7214
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007215 file: object
7216 *
7217 fix_imports: bool = True
7218 encoding: str = 'ASCII'
7219 errors: str = 'strict'
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007220 buffers: object(c_default="NULL") = ()
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007221
7222This takes a binary file for reading a pickle data stream.
7223
7224The protocol version of the pickle is detected automatically, so no
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007225protocol argument is needed. Bytes past the pickled object's
7226representation are ignored.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007227
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007228The argument *file* must have two methods, a read() method that takes
7229an integer argument, and a readline() method that requires no
7230arguments. Both methods should return bytes. Thus *file* can be a
Martin Panter7462b6492015-11-02 03:37:02 +00007231binary file object opened for reading, an io.BytesIO object, or any
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007232other custom object that meets this interface.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007233
7234Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
Martin Panter46f50722016-05-26 05:35:26 +00007235which are used to control compatibility support for pickle stream
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007236generated by Python 2. If *fix_imports* is True, pickle will try to
7237map the old Python 2 names to the new names used in Python 3. The
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007238*encoding* and *errors* tell pickle how to decode 8-bit string
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007239instances pickled by Python 2; these default to 'ASCII' and 'strict',
7240respectively. The *encoding* can be 'bytes' to read these 8-bit
7241string instances as bytes objects.
Larry Hastings61272b72014-01-07 12:41:53 -08007242[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007243
Larry Hastingsb7ccb202014-01-18 23:50:21 -08007244static int
Larry Hastings89964c42015-04-14 18:07:59 -04007245_pickle_Unpickler___init___impl(UnpicklerObject *self, PyObject *file,
7246 int fix_imports, const char *encoding,
Antoine Pitrou91f43802019-05-26 17:10:09 +02007247 const char *errors, PyObject *buffers)
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007248/*[clinic end generated code: output=09f0192649ea3f85 input=ca4c1faea9553121]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007249{
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02007250 _Py_IDENTIFIER(persistent_load);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007251
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007252 /* In case of multiple __init__() calls, clear previous content. */
7253 if (self->read != NULL)
7254 (void)Unpickler_clear(self);
7255
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007256 if (_Unpickler_SetInputStream(self, file) < 0)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08007257 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007258
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007259 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08007260 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007261
Antoine Pitrou91f43802019-05-26 17:10:09 +02007262 if (_Unpickler_SetBuffers(self, buffers) < 0)
7263 return -1;
7264
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007265 self->fix_imports = fix_imports;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007266
Serhiy Storchaka986375e2017-11-30 22:48:31 +02007267 if (init_method_ref((PyObject *)self, &PyId_persistent_load,
7268 &self->pers_func, &self->pers_func_self) < 0)
7269 {
7270 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007271 }
7272
7273 self->stack = (Pdata *)Pdata_New();
7274 if (self->stack == NULL)
Zackery Spytz4b430e52018-09-28 23:48:46 -06007275 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007276
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007277 self->memo_size = 32;
7278 self->memo = _Unpickler_NewMemo(self->memo_size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007279 if (self->memo == NULL)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08007280 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007281
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007282 self->proto = 0;
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00007283
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007284 return 0;
7285}
7286
Larry Hastingsb7ccb202014-01-18 23:50:21 -08007287
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007288/* Define a proxy object for the Unpickler's internal memo object. This is to
7289 * avoid breaking code like:
7290 * unpickler.memo.clear()
7291 * and
7292 * unpickler.memo = saved_memo
7293 * Is this a good idea? Not really, but we don't want to break code that uses
7294 * it. Note that we don't implement the entire mapping API here. This is
7295 * intentional, as these should be treated as black-box implementation details.
7296 *
7297 * We do, however, have to implement pickling/unpickling support because of
Victor Stinner121aab42011-09-29 23:40:53 +02007298 * real-world code like cvs2svn.
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007299 */
7300
Larry Hastings61272b72014-01-07 12:41:53 -08007301/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007302_pickle.UnpicklerMemoProxy.clear
7303
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007304Remove all items from memo.
Larry Hastings61272b72014-01-07 12:41:53 -08007305[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007306
Larry Hastings3cceb382014-01-04 11:09:09 -08007307static PyObject *
7308_pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08007309/*[clinic end generated code: output=d20cd43f4ba1fb1f input=b1df7c52e7afd9bd]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007310{
7311 _Unpickler_MemoCleanup(self->unpickler);
7312 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
7313 if (self->unpickler->memo == NULL)
7314 return NULL;
7315 Py_RETURN_NONE;
7316}
7317
Larry Hastings61272b72014-01-07 12:41:53 -08007318/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007319_pickle.UnpicklerMemoProxy.copy
7320
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007321Copy the memo to a new object.
Larry Hastings61272b72014-01-07 12:41:53 -08007322[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007323
Larry Hastings3cceb382014-01-04 11:09:09 -08007324static PyObject *
7325_pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08007326/*[clinic end generated code: output=e12af7e9bc1e4c77 input=97769247ce032c1d]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007327{
Benjamin Petersona4ae8282018-09-20 18:36:40 -07007328 size_t i;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007329 PyObject *new_memo = PyDict_New();
7330 if (new_memo == NULL)
7331 return NULL;
7332
7333 for (i = 0; i < self->unpickler->memo_size; i++) {
7334 int status;
7335 PyObject *key, *value;
7336
7337 value = self->unpickler->memo[i];
7338 if (value == NULL)
7339 continue;
7340
7341 key = PyLong_FromSsize_t(i);
7342 if (key == NULL)
7343 goto error;
7344 status = PyDict_SetItem(new_memo, key, value);
7345 Py_DECREF(key);
7346 if (status < 0)
7347 goto error;
7348 }
7349 return new_memo;
7350
7351error:
7352 Py_DECREF(new_memo);
7353 return NULL;
7354}
7355
Larry Hastings61272b72014-01-07 12:41:53 -08007356/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007357_pickle.UnpicklerMemoProxy.__reduce__
7358
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007359Implement pickling support.
Larry Hastings61272b72014-01-07 12:41:53 -08007360[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007361
Larry Hastings3cceb382014-01-04 11:09:09 -08007362static PyObject *
7363_pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08007364/*[clinic end generated code: output=6da34ac048d94cca input=6920862413407199]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007365{
7366 PyObject *reduce_value;
7367 PyObject *constructor_args;
Larry Hastings3cceb382014-01-04 11:09:09 -08007368 PyObject *contents = _pickle_UnpicklerMemoProxy_copy_impl(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007369 if (contents == NULL)
7370 return NULL;
7371
7372 reduce_value = PyTuple_New(2);
7373 if (reduce_value == NULL) {
7374 Py_DECREF(contents);
7375 return NULL;
7376 }
7377 constructor_args = PyTuple_New(1);
7378 if (constructor_args == NULL) {
7379 Py_DECREF(contents);
7380 Py_DECREF(reduce_value);
7381 return NULL;
7382 }
7383 PyTuple_SET_ITEM(constructor_args, 0, contents);
7384 Py_INCREF((PyObject *)&PyDict_Type);
7385 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
7386 PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
7387 return reduce_value;
7388}
7389
7390static PyMethodDef unpicklerproxy_methods[] = {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007391 _PICKLE_UNPICKLERMEMOPROXY_CLEAR_METHODDEF
7392 _PICKLE_UNPICKLERMEMOPROXY_COPY_METHODDEF
7393 _PICKLE_UNPICKLERMEMOPROXY___REDUCE___METHODDEF
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007394 {NULL, NULL} /* sentinel */
7395};
7396
7397static void
7398UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
7399{
7400 PyObject_GC_UnTrack(self);
7401 Py_XDECREF(self->unpickler);
7402 PyObject_GC_Del((PyObject *)self);
7403}
7404
7405static int
7406UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
7407 visitproc visit, void *arg)
7408{
7409 Py_VISIT(self->unpickler);
7410 return 0;
7411}
7412
7413static int
7414UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
7415{
7416 Py_CLEAR(self->unpickler);
7417 return 0;
7418}
7419
7420static PyTypeObject UnpicklerMemoProxyType = {
7421 PyVarObject_HEAD_INIT(NULL, 0)
7422 "_pickle.UnpicklerMemoProxy", /*tp_name*/
7423 sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/
7424 0,
7425 (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02007426 0, /* tp_vectorcall_offset */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007427 0, /* tp_getattr */
7428 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02007429 0, /* tp_as_async */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007430 0, /* tp_repr */
7431 0, /* tp_as_number */
7432 0, /* tp_as_sequence */
7433 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00007434 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007435 0, /* tp_call */
7436 0, /* tp_str */
7437 PyObject_GenericGetAttr, /* tp_getattro */
7438 PyObject_GenericSetAttr, /* tp_setattro */
7439 0, /* tp_as_buffer */
7440 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
7441 0, /* tp_doc */
7442 (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */
7443 (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */
7444 0, /* tp_richcompare */
7445 0, /* tp_weaklistoffset */
7446 0, /* tp_iter */
7447 0, /* tp_iternext */
7448 unpicklerproxy_methods, /* tp_methods */
7449};
7450
7451static PyObject *
7452UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
7453{
7454 UnpicklerMemoProxyObject *self;
7455
7456 self = PyObject_GC_New(UnpicklerMemoProxyObject,
7457 &UnpicklerMemoProxyType);
7458 if (self == NULL)
7459 return NULL;
7460 Py_INCREF(unpickler);
7461 self->unpickler = unpickler;
7462 PyObject_GC_Track(self);
7463 return (PyObject *)self;
7464}
7465
7466/*****************************************************************************/
7467
7468
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007469static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02007470Unpickler_get_memo(UnpicklerObject *self, void *Py_UNUSED(ignored))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007471{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007472 return UnpicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007473}
7474
7475static int
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02007476Unpickler_set_memo(UnpicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007477{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007478 PyObject **new_memo;
Benjamin Petersona4ae8282018-09-20 18:36:40 -07007479 size_t new_memo_size = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007480
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007481 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007482 PyErr_SetString(PyExc_TypeError,
7483 "attribute deletion is not supported");
7484 return -1;
7485 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007486
Andy Lesterdffe4c02020-03-04 07:15:20 -06007487 if (Py_IS_TYPE(obj, &UnpicklerMemoProxyType)) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007488 UnpicklerObject *unpickler =
7489 ((UnpicklerMemoProxyObject *)obj)->unpickler;
7490
7491 new_memo_size = unpickler->memo_size;
7492 new_memo = _Unpickler_NewMemo(new_memo_size);
7493 if (new_memo == NULL)
7494 return -1;
7495
Benjamin Petersona4ae8282018-09-20 18:36:40 -07007496 for (size_t i = 0; i < new_memo_size; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007497 Py_XINCREF(unpickler->memo[i]);
7498 new_memo[i] = unpickler->memo[i];
7499 }
7500 }
7501 else if (PyDict_Check(obj)) {
7502 Py_ssize_t i = 0;
7503 PyObject *key, *value;
7504
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +02007505 new_memo_size = PyDict_GET_SIZE(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007506 new_memo = _Unpickler_NewMemo(new_memo_size);
7507 if (new_memo == NULL)
7508 return -1;
7509
7510 while (PyDict_Next(obj, &i, &key, &value)) {
7511 Py_ssize_t idx;
7512 if (!PyLong_Check(key)) {
7513 PyErr_SetString(PyExc_TypeError,
7514 "memo key must be integers");
7515 goto error;
7516 }
7517 idx = PyLong_AsSsize_t(key);
7518 if (idx == -1 && PyErr_Occurred())
7519 goto error;
Christian Heimesa24b4d22013-07-01 15:17:45 +02007520 if (idx < 0) {
7521 PyErr_SetString(PyExc_ValueError,
Christian Heimes80878792013-07-01 15:23:39 +02007522 "memo key must be positive integers.");
Christian Heimesa24b4d22013-07-01 15:17:45 +02007523 goto error;
7524 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007525 if (_Unpickler_MemoPut(self, idx, value) < 0)
7526 goto error;
7527 }
7528 }
7529 else {
7530 PyErr_Format(PyExc_TypeError,
Serhiy Storchaka34fd4c22018-11-05 16:20:25 +02007531 "'memo' attribute must be an UnpicklerMemoProxy object "
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007532 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007533 return -1;
7534 }
7535
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007536 _Unpickler_MemoCleanup(self);
7537 self->memo_size = new_memo_size;
7538 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007539
7540 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007541
7542 error:
7543 if (new_memo_size) {
Benjamin Petersona4ae8282018-09-20 18:36:40 -07007544 for (size_t i = new_memo_size - 1; i != SIZE_MAX; i--) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007545 Py_XDECREF(new_memo[i]);
7546 }
Victor Stinner00d7abd2020-12-01 09:56:42 +01007547 PyMem_Free(new_memo);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007548 }
7549 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007550}
7551
7552static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02007553Unpickler_get_persload(UnpicklerObject *self, void *Py_UNUSED(ignored))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007554{
Serhiy Storchaka986375e2017-11-30 22:48:31 +02007555 if (self->pers_func == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007556 PyErr_SetString(PyExc_AttributeError, "persistent_load");
Serhiy Storchaka986375e2017-11-30 22:48:31 +02007557 return NULL;
7558 }
7559 return reconstruct_method(self->pers_func, self->pers_func_self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007560}
7561
7562static int
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02007563Unpickler_set_persload(UnpicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007564{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007565 if (value == NULL) {
7566 PyErr_SetString(PyExc_TypeError,
7567 "attribute deletion is not supported");
7568 return -1;
7569 }
7570 if (!PyCallable_Check(value)) {
7571 PyErr_SetString(PyExc_TypeError,
7572 "persistent_load must be a callable taking "
7573 "one argument");
7574 return -1;
7575 }
7576
Serhiy Storchaka986375e2017-11-30 22:48:31 +02007577 self->pers_func_self = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007578 Py_INCREF(value);
Serhiy Storchakaec397562016-04-06 09:50:03 +03007579 Py_XSETREF(self->pers_func, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007580
7581 return 0;
7582}
7583
7584static PyGetSetDef Unpickler_getsets[] = {
7585 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
7586 {"persistent_load", (getter)Unpickler_get_persload,
7587 (setter)Unpickler_set_persload},
7588 {NULL}
7589};
7590
7591static PyTypeObject Unpickler_Type = {
7592 PyVarObject_HEAD_INIT(NULL, 0)
7593 "_pickle.Unpickler", /*tp_name*/
7594 sizeof(UnpicklerObject), /*tp_basicsize*/
7595 0, /*tp_itemsize*/
7596 (destructor)Unpickler_dealloc, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02007597 0, /*tp_vectorcall_offset*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007598 0, /*tp_getattr*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007599 0, /*tp_setattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02007600 0, /*tp_as_async*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007601 0, /*tp_repr*/
7602 0, /*tp_as_number*/
7603 0, /*tp_as_sequence*/
7604 0, /*tp_as_mapping*/
7605 0, /*tp_hash*/
7606 0, /*tp_call*/
7607 0, /*tp_str*/
7608 0, /*tp_getattro*/
7609 0, /*tp_setattro*/
7610 0, /*tp_as_buffer*/
7611 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007612 _pickle_Unpickler___init____doc__, /*tp_doc*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007613 (traverseproc)Unpickler_traverse, /*tp_traverse*/
7614 (inquiry)Unpickler_clear, /*tp_clear*/
7615 0, /*tp_richcompare*/
7616 0, /*tp_weaklistoffset*/
7617 0, /*tp_iter*/
7618 0, /*tp_iternext*/
7619 Unpickler_methods, /*tp_methods*/
7620 0, /*tp_members*/
7621 Unpickler_getsets, /*tp_getset*/
7622 0, /*tp_base*/
7623 0, /*tp_dict*/
7624 0, /*tp_descr_get*/
7625 0, /*tp_descr_set*/
7626 0, /*tp_dictoffset*/
Larry Hastingsb7ccb202014-01-18 23:50:21 -08007627 _pickle_Unpickler___init__, /*tp_init*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007628 PyType_GenericAlloc, /*tp_alloc*/
7629 PyType_GenericNew, /*tp_new*/
7630 PyObject_GC_Del, /*tp_free*/
7631 0, /*tp_is_gc*/
7632};
7633
Larry Hastings61272b72014-01-07 12:41:53 -08007634/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007635
7636_pickle.dump
7637
7638 obj: object
7639 file: object
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007640 protocol: object = None
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007641 *
7642 fix_imports: bool = True
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007643 buffer_callback: object = None
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007644
7645Write a pickled representation of obj to the open file object file.
7646
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007647This is equivalent to ``Pickler(file, protocol).dump(obj)``, but may
7648be more efficient.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007649
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007650The optional *protocol* argument tells the pickler to use the given
Mark Dickinsone9652e82020-01-24 10:03:22 +00007651protocol; supported protocols are 0, 1, 2, 3, 4 and 5. The default
7652protocol is 4. It was introduced in Python 3.4, and is incompatible
Łukasz Langac51d8c92018-04-03 23:06:53 -07007653with previous versions.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007654
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007655Specifying a negative protocol version selects the highest protocol
7656version supported. The higher the protocol used, the more recent the
7657version of Python needed to read the pickle produced.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007658
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007659The *file* argument must have a write() method that accepts a single
7660bytes argument. It can thus be a file object opened for binary
Martin Panter7462b6492015-11-02 03:37:02 +00007661writing, an io.BytesIO instance, or any other custom object that meets
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007662this interface.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007663
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007664If *fix_imports* is True and protocol is less than 3, pickle will try
7665to map the new Python 3 names to the old module names used in Python
76662, so that the pickle data stream is readable with Python 2.
Antoine Pitrou91f43802019-05-26 17:10:09 +02007667
7668If *buffer_callback* is None (the default), buffer views are serialized
7669into *file* as part of the pickle stream. It is an error if
7670*buffer_callback* is not None and *protocol* is None or smaller than 5.
7671
Larry Hastings61272b72014-01-07 12:41:53 -08007672[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007673
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007674static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03007675_pickle_dump_impl(PyObject *module, PyObject *obj, PyObject *file,
Antoine Pitrou91f43802019-05-26 17:10:09 +02007676 PyObject *protocol, int fix_imports,
7677 PyObject *buffer_callback)
Mark Dickinsone9652e82020-01-24 10:03:22 +00007678/*[clinic end generated code: output=706186dba996490c input=5ed6653da99cd97c]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007679{
7680 PicklerObject *pickler = _Pickler_New();
7681
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007682 if (pickler == NULL)
7683 return NULL;
7684
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007685 if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007686 goto error;
7687
7688 if (_Pickler_SetOutputStream(pickler, file) < 0)
7689 goto error;
7690
Antoine Pitrou91f43802019-05-26 17:10:09 +02007691 if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
7692 goto error;
7693
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007694 if (dump(pickler, obj) < 0)
7695 goto error;
7696
7697 if (_Pickler_FlushToFile(pickler) < 0)
7698 goto error;
7699
7700 Py_DECREF(pickler);
7701 Py_RETURN_NONE;
7702
7703 error:
7704 Py_XDECREF(pickler);
7705 return NULL;
7706}
7707
Larry Hastings61272b72014-01-07 12:41:53 -08007708/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007709
7710_pickle.dumps
7711
7712 obj: object
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007713 protocol: object = None
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007714 *
7715 fix_imports: bool = True
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007716 buffer_callback: object = None
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007717
7718Return the pickled representation of the object as a bytes object.
7719
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007720The optional *protocol* argument tells the pickler to use the given
Mark Dickinsone9652e82020-01-24 10:03:22 +00007721protocol; supported protocols are 0, 1, 2, 3, 4 and 5. The default
7722protocol is 4. It was introduced in Python 3.4, and is incompatible
Łukasz Langac51d8c92018-04-03 23:06:53 -07007723with previous versions.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007724
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007725Specifying a negative protocol version selects the highest protocol
7726version supported. The higher the protocol used, the more recent the
7727version of Python needed to read the pickle produced.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007728
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007729If *fix_imports* is True and *protocol* is less than 3, pickle will
7730try to map the new Python 3 names to the old module names used in
7731Python 2, so that the pickle data stream is readable with Python 2.
Antoine Pitrou91f43802019-05-26 17:10:09 +02007732
7733If *buffer_callback* is None (the default), buffer views are serialized
7734into *file* as part of the pickle stream. It is an error if
7735*buffer_callback* is not None and *protocol* is None or smaller than 5.
7736
Larry Hastings61272b72014-01-07 12:41:53 -08007737[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007738
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007739static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03007740_pickle_dumps_impl(PyObject *module, PyObject *obj, PyObject *protocol,
Antoine Pitrou91f43802019-05-26 17:10:09 +02007741 int fix_imports, PyObject *buffer_callback)
Mark Dickinsone9652e82020-01-24 10:03:22 +00007742/*[clinic end generated code: output=fbab0093a5580fdf input=e543272436c6f987]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007743{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007744 PyObject *result;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007745 PicklerObject *pickler = _Pickler_New();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007746
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007747 if (pickler == NULL)
7748 return NULL;
7749
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007750 if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007751 goto error;
7752
Antoine Pitrou91f43802019-05-26 17:10:09 +02007753 if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
7754 goto error;
7755
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007756 if (dump(pickler, obj) < 0)
7757 goto error;
7758
7759 result = _Pickler_GetString(pickler);
7760 Py_DECREF(pickler);
7761 return result;
7762
7763 error:
7764 Py_XDECREF(pickler);
7765 return NULL;
7766}
7767
Larry Hastings61272b72014-01-07 12:41:53 -08007768/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007769
7770_pickle.load
7771
7772 file: object
7773 *
7774 fix_imports: bool = True
7775 encoding: str = 'ASCII'
7776 errors: str = 'strict'
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007777 buffers: object(c_default="NULL") = ()
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007778
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007779Read and return an object from the pickle data stored in a file.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007780
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007781This is equivalent to ``Unpickler(file).load()``, but may be more
7782efficient.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007783
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007784The protocol version of the pickle is detected automatically, so no
7785protocol argument is needed. Bytes past the pickled object's
7786representation are ignored.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007787
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007788The argument *file* must have two methods, a read() method that takes
7789an integer argument, and a readline() method that requires no
7790arguments. Both methods should return bytes. Thus *file* can be a
Martin Panter7462b6492015-11-02 03:37:02 +00007791binary file object opened for reading, an io.BytesIO object, or any
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007792other custom object that meets this interface.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007793
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007794Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
Martin Panter46f50722016-05-26 05:35:26 +00007795which are used to control compatibility support for pickle stream
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007796generated by Python 2. If *fix_imports* is True, pickle will try to
7797map the old Python 2 names to the new names used in Python 3. The
7798*encoding* and *errors* tell pickle how to decode 8-bit string
7799instances pickled by Python 2; these default to 'ASCII' and 'strict',
7800respectively. The *encoding* can be 'bytes' to read these 8-bit
7801string instances as bytes objects.
Larry Hastings61272b72014-01-07 12:41:53 -08007802[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007803
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007804static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03007805_pickle_load_impl(PyObject *module, PyObject *file, int fix_imports,
Antoine Pitrou91f43802019-05-26 17:10:09 +02007806 const char *encoding, const char *errors,
7807 PyObject *buffers)
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007808/*[clinic end generated code: output=250452d141c23e76 input=46c7c31c92f4f371]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007809{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007810 PyObject *result;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007811 UnpicklerObject *unpickler = _Unpickler_New();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007812
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007813 if (unpickler == NULL)
7814 return NULL;
7815
7816 if (_Unpickler_SetInputStream(unpickler, file) < 0)
7817 goto error;
7818
7819 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7820 goto error;
7821
Antoine Pitrou91f43802019-05-26 17:10:09 +02007822 if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
7823 goto error;
7824
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007825 unpickler->fix_imports = fix_imports;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007826
7827 result = load(unpickler);
7828 Py_DECREF(unpickler);
7829 return result;
7830
7831 error:
7832 Py_XDECREF(unpickler);
7833 return NULL;
7834}
7835
Larry Hastings61272b72014-01-07 12:41:53 -08007836/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007837
7838_pickle.loads
7839
7840 data: object
Serhiy Storchaka531d1e52020-05-02 09:38:01 +03007841 /
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007842 *
7843 fix_imports: bool = True
7844 encoding: str = 'ASCII'
7845 errors: str = 'strict'
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007846 buffers: object(c_default="NULL") = ()
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007847
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007848Read and return an object from the given pickle data.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007849
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007850The protocol version of the pickle is detected automatically, so no
7851protocol argument is needed. Bytes past the pickled object's
7852representation are ignored.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007853
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007854Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
Martin Panter46f50722016-05-26 05:35:26 +00007855which are used to control compatibility support for pickle stream
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007856generated by Python 2. If *fix_imports* is True, pickle will try to
7857map the old Python 2 names to the new names used in Python 3. The
7858*encoding* and *errors* tell pickle how to decode 8-bit string
7859instances pickled by Python 2; these default to 'ASCII' and 'strict',
7860respectively. The *encoding* can be 'bytes' to read these 8-bit
7861string instances as bytes objects.
Larry Hastings61272b72014-01-07 12:41:53 -08007862[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007863
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007864static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03007865_pickle_loads_impl(PyObject *module, PyObject *data, int fix_imports,
Antoine Pitrou91f43802019-05-26 17:10:09 +02007866 const char *encoding, const char *errors,
7867 PyObject *buffers)
Serhiy Storchaka531d1e52020-05-02 09:38:01 +03007868/*[clinic end generated code: output=82ac1e6b588e6d02 input=b3615540d0535087]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007869{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007870 PyObject *result;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007871 UnpicklerObject *unpickler = _Unpickler_New();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007872
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007873 if (unpickler == NULL)
7874 return NULL;
7875
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007876 if (_Unpickler_SetStringInput(unpickler, data) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007877 goto error;
7878
7879 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7880 goto error;
7881
Antoine Pitrou91f43802019-05-26 17:10:09 +02007882 if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
7883 goto error;
7884
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007885 unpickler->fix_imports = fix_imports;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007886
7887 result = load(unpickler);
7888 Py_DECREF(unpickler);
7889 return result;
7890
7891 error:
7892 Py_XDECREF(unpickler);
7893 return NULL;
7894}
7895
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007896static struct PyMethodDef pickle_methods[] = {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007897 _PICKLE_DUMP_METHODDEF
7898 _PICKLE_DUMPS_METHODDEF
7899 _PICKLE_LOAD_METHODDEF
7900 _PICKLE_LOADS_METHODDEF
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007901 {NULL, NULL} /* sentinel */
7902};
7903
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007904static int
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007905pickle_clear(PyObject *m)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007906{
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007907 _Pickle_ClearState(_Pickle_GetState(m));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007908 return 0;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007909}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007910
Stefan Krahf483b0f2013-12-14 13:43:10 +01007911static void
7912pickle_free(PyObject *m)
7913{
7914 _Pickle_ClearState(_Pickle_GetState(m));
7915}
7916
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007917static int
7918pickle_traverse(PyObject *m, visitproc visit, void *arg)
7919{
7920 PickleState *st = _Pickle_GetState(m);
7921 Py_VISIT(st->PickleError);
7922 Py_VISIT(st->PicklingError);
7923 Py_VISIT(st->UnpicklingError);
7924 Py_VISIT(st->dispatch_table);
7925 Py_VISIT(st->extension_registry);
7926 Py_VISIT(st->extension_cache);
7927 Py_VISIT(st->inverted_registry);
7928 Py_VISIT(st->name_mapping_2to3);
7929 Py_VISIT(st->import_mapping_2to3);
7930 Py_VISIT(st->name_mapping_3to2);
7931 Py_VISIT(st->import_mapping_3to2);
7932 Py_VISIT(st->codecs_encode);
Serhiy Storchaka58e41342015-03-31 14:07:24 +03007933 Py_VISIT(st->getattr);
Hai Shi1f577ce2020-03-02 14:28:44 +08007934 Py_VISIT(st->partial);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007935 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007936}
7937
7938static struct PyModuleDef _picklemodule = {
7939 PyModuleDef_HEAD_INIT,
Stefan Krahf483b0f2013-12-14 13:43:10 +01007940 "_pickle", /* m_name */
7941 pickle_module_doc, /* m_doc */
7942 sizeof(PickleState), /* m_size */
7943 pickle_methods, /* m_methods */
7944 NULL, /* m_reload */
7945 pickle_traverse, /* m_traverse */
7946 pickle_clear, /* m_clear */
7947 (freefunc)pickle_free /* m_free */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007948};
7949
7950PyMODINIT_FUNC
7951PyInit__pickle(void)
7952{
7953 PyObject *m;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007954 PickleState *st;
7955
7956 m = PyState_FindModule(&_picklemodule);
7957 if (m) {
7958 Py_INCREF(m);
7959 return m;
7960 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007961
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007962 if (PyType_Ready(&Pdata_Type) < 0)
7963 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007964 if (PyType_Ready(&PicklerMemoProxyType) < 0)
7965 return NULL;
7966 if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
7967 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007968
7969 /* Create the module and add the functions. */
7970 m = PyModule_Create(&_picklemodule);
7971 if (m == NULL)
7972 return NULL;
7973
Antoine Pitrou91f43802019-05-26 17:10:09 +02007974 /* Add types */
Dong-hee Na37fcbb62020-03-25 07:08:51 +09007975 if (PyModule_AddType(m, &Pickler_Type) < 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007976 return NULL;
Dong-hee Na37fcbb62020-03-25 07:08:51 +09007977 }
7978 if (PyModule_AddType(m, &Unpickler_Type) < 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007979 return NULL;
Dong-hee Na37fcbb62020-03-25 07:08:51 +09007980 }
7981 if (PyModule_AddType(m, &PyPickleBuffer_Type) < 0) {
Antoine Pitrou91f43802019-05-26 17:10:09 +02007982 return NULL;
Dong-hee Na37fcbb62020-03-25 07:08:51 +09007983 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007984
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007985 st = _Pickle_GetState(m);
7986
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007987 /* Initialize the exceptions. */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007988 st->PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
7989 if (st->PickleError == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007990 return NULL;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007991 st->PicklingError = \
7992 PyErr_NewException("_pickle.PicklingError", st->PickleError, NULL);
7993 if (st->PicklingError == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007994 return NULL;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007995 st->UnpicklingError = \
7996 PyErr_NewException("_pickle.UnpicklingError", st->PickleError, NULL);
7997 if (st->UnpicklingError == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007998 return NULL;
7999
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08008000 Py_INCREF(st->PickleError);
8001 if (PyModule_AddObject(m, "PickleError", st->PickleError) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00008002 return NULL;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08008003 Py_INCREF(st->PicklingError);
8004 if (PyModule_AddObject(m, "PicklingError", st->PicklingError) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00008005 return NULL;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08008006 Py_INCREF(st->UnpicklingError);
8007 if (PyModule_AddObject(m, "UnpicklingError", st->UnpicklingError) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00008008 return NULL;
8009
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08008010 if (_Pickle_InitState(st) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00008011 return NULL;
8012
8013 return m;
8014}