blob: f67fb6a65c38caa010e0e6da88bb8480eede639d [file] [log] [blame]
Victor Stinner5c75f372019-04-17 23:02:26 +02001/* pickle accelerator C extensor: _pickle module.
2 *
3 * It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
4 * and as an extension module (Py_BUILD_CORE_MODULE define) on other
5 * platforms. */
Eric Snow2ebc5ce2017-09-07 23:51:28 -06006
Victor Stinner5c75f372019-04-17 23:02:26 +02007#if !defined(Py_BUILD_CORE_BUILTIN) && !defined(Py_BUILD_CORE_MODULE)
8# error "Py_BUILD_CORE_BUILTIN or Py_BUILD_CORE_MODULE must be defined"
Eric Snow2ebc5ce2017-09-07 23:51:28 -06009#endif
10
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000011#include "Python.h"
12#include "structmember.h"
13
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -080014PyDoc_STRVAR(pickle_module_doc,
15"Optimized C implementation for the Python pickle module.");
16
Larry Hastings61272b72014-01-07 12:41:53 -080017/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -080018module _pickle
Larry Hastingsc2047262014-01-25 20:43:29 -080019class _pickle.Pickler "PicklerObject *" "&Pickler_Type"
20class _pickle.PicklerMemoProxy "PicklerMemoProxyObject *" "&PicklerMemoProxyType"
21class _pickle.Unpickler "UnpicklerObject *" "&Unpickler_Type"
22class _pickle.UnpicklerMemoProxy "UnpicklerMemoProxyObject *" "&UnpicklerMemoProxyType"
Larry Hastings61272b72014-01-07 12:41:53 -080023[clinic start generated code]*/
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030024/*[clinic end generated code: output=da39a3ee5e6b4b0d input=4b3e113468a58e6c]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -080025
Łukasz Langac51d8c92018-04-03 23:06:53 -070026/* Bump HIGHEST_PROTOCOL when new opcodes are added to the pickle protocol.
27 Bump DEFAULT_PROTOCOL only when the oldest still supported version of Python
28 already includes it. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000029enum {
Antoine Pitrou91f43802019-05-26 17:10:09 +020030 HIGHEST_PROTOCOL = 5,
Łukasz Langac51d8c92018-04-03 23:06:53 -070031 DEFAULT_PROTOCOL = 4
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000032};
33
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000034/* Pickle opcodes. These must be kept updated with pickle.py.
35 Extensive docs are in pickletools.py. */
36enum opcode {
37 MARK = '(',
38 STOP = '.',
39 POP = '0',
40 POP_MARK = '1',
41 DUP = '2',
42 FLOAT = 'F',
43 INT = 'I',
44 BININT = 'J',
45 BININT1 = 'K',
46 LONG = 'L',
47 BININT2 = 'M',
48 NONE = 'N',
49 PERSID = 'P',
50 BINPERSID = 'Q',
51 REDUCE = 'R',
52 STRING = 'S',
53 BINSTRING = 'T',
54 SHORT_BINSTRING = 'U',
55 UNICODE = 'V',
56 BINUNICODE = 'X',
57 APPEND = 'a',
58 BUILD = 'b',
59 GLOBAL = 'c',
60 DICT = 'd',
61 EMPTY_DICT = '}',
62 APPENDS = 'e',
63 GET = 'g',
64 BINGET = 'h',
65 INST = 'i',
66 LONG_BINGET = 'j',
67 LIST = 'l',
68 EMPTY_LIST = ']',
69 OBJ = 'o',
70 PUT = 'p',
71 BINPUT = 'q',
72 LONG_BINPUT = 'r',
73 SETITEM = 's',
74 TUPLE = 't',
75 EMPTY_TUPLE = ')',
76 SETITEMS = 'u',
77 BINFLOAT = 'G',
78
79 /* Protocol 2. */
80 PROTO = '\x80',
81 NEWOBJ = '\x81',
82 EXT1 = '\x82',
83 EXT2 = '\x83',
84 EXT4 = '\x84',
85 TUPLE1 = '\x85',
86 TUPLE2 = '\x86',
87 TUPLE3 = '\x87',
88 NEWTRUE = '\x88',
89 NEWFALSE = '\x89',
90 LONG1 = '\x8a',
91 LONG4 = '\x8b',
92
93 /* Protocol 3 (Python 3.x) */
94 BINBYTES = 'B',
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +010095 SHORT_BINBYTES = 'C',
96
97 /* Protocol 4 */
98 SHORT_BINUNICODE = '\x8c',
99 BINUNICODE8 = '\x8d',
100 BINBYTES8 = '\x8e',
101 EMPTY_SET = '\x8f',
102 ADDITEMS = '\x90',
103 FROZENSET = '\x91',
104 NEWOBJ_EX = '\x92',
105 STACK_GLOBAL = '\x93',
106 MEMOIZE = '\x94',
Antoine Pitrou91f43802019-05-26 17:10:09 +0200107 FRAME = '\x95',
108
109 /* Protocol 5 */
110 BYTEARRAY8 = '\x96',
111 NEXT_BUFFER = '\x97',
112 READONLY_BUFFER = '\x98'
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000113};
114
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000115enum {
116 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
117 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
118 break if this gets out of synch with pickle.py, but it's unclear that would
119 help anything either. */
120 BATCHSIZE = 1000,
121
122 /* Nesting limit until Pickler, when running in "fast mode", starts
123 checking for self-referential data-structures. */
124 FAST_NESTING_LIMIT = 50,
125
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000126 /* Initial size of the write buffer of Pickler. */
127 WRITE_BUF_SIZE = 4096,
128
Antoine Pitrou04248a82010-10-12 20:51:21 +0000129 /* Prefetch size when unpickling (disabled on unpeekable streams) */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100130 PREFETCH = 8192 * 16,
131
Serhiy Storchaka1211c9a2018-01-20 16:42:44 +0200132 FRAME_SIZE_MIN = 4,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100133 FRAME_SIZE_TARGET = 64 * 1024,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100134 FRAME_HEADER_SIZE = 9
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000135};
136
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800137/*************************************************************************/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000138
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800139/* State of the pickle module, per PEP 3121. */
140typedef struct {
141 /* Exception classes for pickle. */
142 PyObject *PickleError;
143 PyObject *PicklingError;
144 PyObject *UnpicklingError;
Larry Hastings61272b72014-01-07 12:41:53 -0800145
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800146 /* copyreg.dispatch_table, {type_object: pickling_function} */
147 PyObject *dispatch_table;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000148
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800149 /* For the extension opcodes EXT1, EXT2 and EXT4. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000150
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800151 /* copyreg._extension_registry, {(module_name, function_name): code} */
152 PyObject *extension_registry;
153 /* copyreg._extension_cache, {code: object} */
154 PyObject *extension_cache;
155 /* copyreg._inverted_registry, {code: (module_name, function_name)} */
156 PyObject *inverted_registry;
157
158 /* Import mappings for compatibility with Python 2.x */
159
160 /* _compat_pickle.NAME_MAPPING,
161 {(oldmodule, oldname): (newmodule, newname)} */
162 PyObject *name_mapping_2to3;
163 /* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
164 PyObject *import_mapping_2to3;
165 /* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
166 PyObject *name_mapping_3to2;
167 PyObject *import_mapping_3to2;
168
169 /* codecs.encode, used for saving bytes in older protocols */
170 PyObject *codecs_encode;
Serhiy Storchaka58e41342015-03-31 14:07:24 +0300171 /* builtins.getattr, used for saving nested names with protocol < 4 */
172 PyObject *getattr;
Serhiy Storchaka0d554d72015-10-10 22:42:18 +0300173 /* functools.partial, used for implementing __newobj_ex__ with protocols
174 2 and 3 */
175 PyObject *partial;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800176} PickleState;
177
178/* Forward declaration of the _pickle module definition. */
179static struct PyModuleDef _picklemodule;
180
181/* Given a module object, get its per-module state. */
182static PickleState *
183_Pickle_GetState(PyObject *module)
184{
185 return (PickleState *)PyModule_GetState(module);
186}
187
188/* Find the module instance imported in the currently running sub-interpreter
189 and get its state. */
190static PickleState *
191_Pickle_GetGlobalState(void)
192{
193 return _Pickle_GetState(PyState_FindModule(&_picklemodule));
194}
195
196/* Clear the given pickle module state. */
197static void
198_Pickle_ClearState(PickleState *st)
199{
200 Py_CLEAR(st->PickleError);
201 Py_CLEAR(st->PicklingError);
202 Py_CLEAR(st->UnpicklingError);
203 Py_CLEAR(st->dispatch_table);
204 Py_CLEAR(st->extension_registry);
205 Py_CLEAR(st->extension_cache);
206 Py_CLEAR(st->inverted_registry);
207 Py_CLEAR(st->name_mapping_2to3);
208 Py_CLEAR(st->import_mapping_2to3);
209 Py_CLEAR(st->name_mapping_3to2);
210 Py_CLEAR(st->import_mapping_3to2);
211 Py_CLEAR(st->codecs_encode);
Serhiy Storchaka58e41342015-03-31 14:07:24 +0300212 Py_CLEAR(st->getattr);
Victor Stinner9ba97df2015-11-17 12:15:07 +0100213 Py_CLEAR(st->partial);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800214}
215
216/* Initialize the given pickle module state. */
217static int
218_Pickle_InitState(PickleState *st)
219{
220 PyObject *copyreg = NULL;
221 PyObject *compat_pickle = NULL;
222 PyObject *codecs = NULL;
Serhiy Storchaka0d554d72015-10-10 22:42:18 +0300223 PyObject *functools = NULL;
Serhiy Storchakabb86bf42018-12-11 08:28:18 +0200224 _Py_IDENTIFIER(getattr);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800225
Serhiy Storchakabb86bf42018-12-11 08:28:18 +0200226 st->getattr = _PyEval_GetBuiltinId(&PyId_getattr);
Serhiy Storchaka58e41342015-03-31 14:07:24 +0300227 if (st->getattr == NULL)
228 goto error;
Serhiy Storchaka58e41342015-03-31 14:07:24 +0300229
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800230 copyreg = PyImport_ImportModule("copyreg");
231 if (!copyreg)
232 goto error;
233 st->dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
234 if (!st->dispatch_table)
235 goto error;
236 if (!PyDict_CheckExact(st->dispatch_table)) {
237 PyErr_Format(PyExc_RuntimeError,
238 "copyreg.dispatch_table should be a dict, not %.200s",
239 Py_TYPE(st->dispatch_table)->tp_name);
240 goto error;
241 }
242 st->extension_registry = \
243 PyObject_GetAttrString(copyreg, "_extension_registry");
244 if (!st->extension_registry)
245 goto error;
246 if (!PyDict_CheckExact(st->extension_registry)) {
247 PyErr_Format(PyExc_RuntimeError,
248 "copyreg._extension_registry should be a dict, "
249 "not %.200s", Py_TYPE(st->extension_registry)->tp_name);
250 goto error;
251 }
252 st->inverted_registry = \
253 PyObject_GetAttrString(copyreg, "_inverted_registry");
254 if (!st->inverted_registry)
255 goto error;
256 if (!PyDict_CheckExact(st->inverted_registry)) {
257 PyErr_Format(PyExc_RuntimeError,
258 "copyreg._inverted_registry should be a dict, "
259 "not %.200s", Py_TYPE(st->inverted_registry)->tp_name);
260 goto error;
261 }
262 st->extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
263 if (!st->extension_cache)
264 goto error;
265 if (!PyDict_CheckExact(st->extension_cache)) {
266 PyErr_Format(PyExc_RuntimeError,
267 "copyreg._extension_cache should be a dict, "
268 "not %.200s", Py_TYPE(st->extension_cache)->tp_name);
269 goto error;
270 }
271 Py_CLEAR(copyreg);
272
273 /* Load the 2.x -> 3.x stdlib module mapping tables */
274 compat_pickle = PyImport_ImportModule("_compat_pickle");
275 if (!compat_pickle)
276 goto error;
277 st->name_mapping_2to3 = \
278 PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
279 if (!st->name_mapping_2to3)
280 goto error;
281 if (!PyDict_CheckExact(st->name_mapping_2to3)) {
282 PyErr_Format(PyExc_RuntimeError,
283 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
284 Py_TYPE(st->name_mapping_2to3)->tp_name);
285 goto error;
286 }
287 st->import_mapping_2to3 = \
288 PyObject_GetAttrString(compat_pickle, "IMPORT_MAPPING");
289 if (!st->import_mapping_2to3)
290 goto error;
291 if (!PyDict_CheckExact(st->import_mapping_2to3)) {
292 PyErr_Format(PyExc_RuntimeError,
293 "_compat_pickle.IMPORT_MAPPING should be a dict, "
294 "not %.200s", Py_TYPE(st->import_mapping_2to3)->tp_name);
295 goto error;
296 }
297 /* ... and the 3.x -> 2.x mapping tables */
298 st->name_mapping_3to2 = \
299 PyObject_GetAttrString(compat_pickle, "REVERSE_NAME_MAPPING");
300 if (!st->name_mapping_3to2)
301 goto error;
302 if (!PyDict_CheckExact(st->name_mapping_3to2)) {
303 PyErr_Format(PyExc_RuntimeError,
304 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
305 "not %.200s", Py_TYPE(st->name_mapping_3to2)->tp_name);
306 goto error;
307 }
308 st->import_mapping_3to2 = \
309 PyObject_GetAttrString(compat_pickle, "REVERSE_IMPORT_MAPPING");
310 if (!st->import_mapping_3to2)
311 goto error;
312 if (!PyDict_CheckExact(st->import_mapping_3to2)) {
313 PyErr_Format(PyExc_RuntimeError,
314 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
315 "not %.200s", Py_TYPE(st->import_mapping_3to2)->tp_name);
316 goto error;
317 }
318 Py_CLEAR(compat_pickle);
319
320 codecs = PyImport_ImportModule("codecs");
321 if (codecs == NULL)
322 goto error;
323 st->codecs_encode = PyObject_GetAttrString(codecs, "encode");
324 if (st->codecs_encode == NULL) {
325 goto error;
326 }
327 if (!PyCallable_Check(st->codecs_encode)) {
328 PyErr_Format(PyExc_RuntimeError,
329 "codecs.encode should be a callable, not %.200s",
330 Py_TYPE(st->codecs_encode)->tp_name);
331 goto error;
332 }
333 Py_CLEAR(codecs);
334
Serhiy Storchaka0d554d72015-10-10 22:42:18 +0300335 functools = PyImport_ImportModule("functools");
336 if (!functools)
337 goto error;
338 st->partial = PyObject_GetAttrString(functools, "partial");
339 if (!st->partial)
340 goto error;
341 Py_CLEAR(functools);
342
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800343 return 0;
344
345 error:
346 Py_CLEAR(copyreg);
347 Py_CLEAR(compat_pickle);
348 Py_CLEAR(codecs);
Serhiy Storchaka0d554d72015-10-10 22:42:18 +0300349 Py_CLEAR(functools);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800350 _Pickle_ClearState(st);
351 return -1;
352}
353
354/* Helper for calling a function with a single argument quickly.
355
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800356 This function steals the reference of the given argument. */
357static PyObject *
358_Pickle_FastCall(PyObject *func, PyObject *obj)
359{
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800360 PyObject *result;
361
Jeroen Demeyer196a5302019-07-04 12:31:34 +0200362 result = _PyObject_CallOneArg(func, obj);
Victor Stinner75210692016-08-19 18:59:15 +0200363 Py_DECREF(obj);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800364 return result;
365}
366
367/*************************************************************************/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000368
Serhiy Storchaka986375e2017-11-30 22:48:31 +0200369/* Retrieve and deconstruct a method for avoiding a reference cycle
370 (pickler -> bound method of pickler -> pickler) */
371static int
372init_method_ref(PyObject *self, _Py_Identifier *name,
373 PyObject **method_func, PyObject **method_self)
374{
375 PyObject *func, *func2;
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200376 int ret;
Serhiy Storchaka986375e2017-11-30 22:48:31 +0200377
378 /* *method_func and *method_self should be consistent. All refcount decrements
379 should be occurred after setting *method_self and *method_func. */
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200380 ret = _PyObject_LookupAttrId(self, name, &func);
Serhiy Storchaka986375e2017-11-30 22:48:31 +0200381 if (func == NULL) {
382 *method_self = NULL;
383 Py_CLEAR(*method_func);
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200384 return ret;
Serhiy Storchaka986375e2017-11-30 22:48:31 +0200385 }
386
387 if (PyMethod_Check(func) && PyMethod_GET_SELF(func) == self) {
388 /* Deconstruct a bound Python method */
389 func2 = PyMethod_GET_FUNCTION(func);
390 Py_INCREF(func2);
391 *method_self = self; /* borrowed */
392 Py_XSETREF(*method_func, func2);
393 Py_DECREF(func);
394 return 0;
395 }
396 else {
397 *method_self = NULL;
398 Py_XSETREF(*method_func, func);
399 return 0;
400 }
401}
402
403/* Bind a method if it was deconstructed */
404static PyObject *
405reconstruct_method(PyObject *func, PyObject *self)
406{
407 if (self) {
408 return PyMethod_New(func, self);
409 }
410 else {
411 Py_INCREF(func);
412 return func;
413 }
414}
415
416static PyObject *
417call_method(PyObject *func, PyObject *self, PyObject *obj)
418{
419 if (self) {
420 return PyObject_CallFunctionObjArgs(func, self, obj, NULL);
421 }
422 else {
Jeroen Demeyer196a5302019-07-04 12:31:34 +0200423 return _PyObject_CallOneArg(func, obj);
Serhiy Storchaka986375e2017-11-30 22:48:31 +0200424 }
425}
426
427/*************************************************************************/
428
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000429/* Internal data type used as the unpickling stack. */
430typedef struct {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000431 PyObject_VAR_HEAD
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000432 PyObject **data;
Serhiy Storchaka59fb6342015-12-06 22:01:35 +0200433 int mark_set; /* is MARK set? */
434 Py_ssize_t fence; /* position of top MARK or 0 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000435 Py_ssize_t allocated; /* number of slots in data allocated */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000436} Pdata;
437
438static void
439Pdata_dealloc(Pdata *self)
440{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200441 Py_ssize_t i = Py_SIZE(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000442 while (--i >= 0) {
443 Py_DECREF(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000444 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000445 PyMem_FREE(self->data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000446 PyObject_Del(self);
447}
448
449static PyTypeObject Pdata_Type = {
450 PyVarObject_HEAD_INIT(NULL, 0)
451 "_pickle.Pdata", /*tp_name*/
452 sizeof(Pdata), /*tp_basicsize*/
Serhiy Storchaka5bbd2312014-12-16 19:39:08 +0200453 sizeof(PyObject *), /*tp_itemsize*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000454 (destructor)Pdata_dealloc, /*tp_dealloc*/
455};
456
457static PyObject *
458Pdata_New(void)
459{
460 Pdata *self;
461
462 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
463 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000464 Py_SIZE(self) = 0;
Serhiy Storchaka59fb6342015-12-06 22:01:35 +0200465 self->mark_set = 0;
466 self->fence = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000467 self->allocated = 8;
468 self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000469 if (self->data)
470 return (PyObject *)self;
471 Py_DECREF(self);
472 return PyErr_NoMemory();
473}
474
475
476/* Retain only the initial clearto items. If clearto >= the current
477 * number of items, this is a (non-erroneous) NOP.
478 */
479static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200480Pdata_clear(Pdata *self, Py_ssize_t clearto)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000481{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200482 Py_ssize_t i = Py_SIZE(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000483
Serhiy Storchaka59fb6342015-12-06 22:01:35 +0200484 assert(clearto >= self->fence);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000485 if (clearto >= i)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000486 return 0;
487
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000488 while (--i >= clearto) {
489 Py_CLEAR(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000490 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000491 Py_SIZE(self) = clearto;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000492 return 0;
493}
494
495static int
496Pdata_grow(Pdata *self)
497{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000498 PyObject **data = self->data;
Victor Stinnerf13c46c2014-08-17 21:05:55 +0200499 size_t allocated = (size_t)self->allocated;
500 size_t new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000501
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000502 new_allocated = (allocated >> 3) + 6;
503 /* check for integer overflow */
Victor Stinnerf13c46c2014-08-17 21:05:55 +0200504 if (new_allocated > (size_t)PY_SSIZE_T_MAX - allocated)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000505 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000506 new_allocated += allocated;
Benjamin Peterson59b08c12015-06-27 13:41:33 -0500507 PyMem_RESIZE(data, PyObject *, new_allocated);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000508 if (data == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000509 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000510
511 self->data = data;
Victor Stinnerf13c46c2014-08-17 21:05:55 +0200512 self->allocated = (Py_ssize_t)new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000513 return 0;
514
515 nomemory:
516 PyErr_NoMemory();
517 return -1;
518}
519
Serhiy Storchaka59fb6342015-12-06 22:01:35 +0200520static int
521Pdata_stack_underflow(Pdata *self)
522{
523 PickleState *st = _Pickle_GetGlobalState();
524 PyErr_SetString(st->UnpicklingError,
525 self->mark_set ?
526 "unexpected MARK found" :
527 "unpickling stack underflow");
528 return -1;
529}
530
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000531/* D is a Pdata*. Pop the topmost element and store it into V, which
532 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
533 * is raised and V is set to NULL.
534 */
535static PyObject *
536Pdata_pop(Pdata *self)
537{
Serhiy Storchaka59fb6342015-12-06 22:01:35 +0200538 if (Py_SIZE(self) <= self->fence) {
539 Pdata_stack_underflow(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000540 return NULL;
541 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000542 return self->data[--Py_SIZE(self)];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000543}
544#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
545
546static int
547Pdata_push(Pdata *self, PyObject *obj)
548{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000549 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000550 return -1;
551 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000552 self->data[Py_SIZE(self)++] = obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000553 return 0;
554}
555
556/* Push an object on stack, transferring its ownership to the stack. */
557#define PDATA_PUSH(D, O, ER) do { \
558 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
559
560/* Push an object on stack, adding a new reference to the object. */
561#define PDATA_APPEND(D, O, ER) do { \
562 Py_INCREF((O)); \
563 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
564
565static PyObject *
566Pdata_poptuple(Pdata *self, Py_ssize_t start)
567{
568 PyObject *tuple;
569 Py_ssize_t len, i, j;
570
Serhiy Storchaka59fb6342015-12-06 22:01:35 +0200571 if (start < self->fence) {
572 Pdata_stack_underflow(self);
573 return NULL;
574 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000575 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000576 tuple = PyTuple_New(len);
577 if (tuple == NULL)
578 return NULL;
579 for (i = start, j = 0; j < len; i++, j++)
580 PyTuple_SET_ITEM(tuple, j, self->data[i]);
581
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000582 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000583 return tuple;
584}
585
586static PyObject *
587Pdata_poplist(Pdata *self, Py_ssize_t start)
588{
589 PyObject *list;
590 Py_ssize_t len, i, j;
591
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000592 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000593 list = PyList_New(len);
594 if (list == NULL)
595 return NULL;
596 for (i = start, j = 0; j < len; i++, j++)
597 PyList_SET_ITEM(list, j, self->data[i]);
598
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000599 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000600 return list;
601}
602
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000603typedef struct {
604 PyObject *me_key;
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200605 Py_ssize_t me_value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000606} PyMemoEntry;
607
608typedef struct {
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700609 size_t mt_mask;
610 size_t mt_used;
611 size_t mt_allocated;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000612 PyMemoEntry *mt_table;
613} PyMemoTable;
614
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000615typedef struct PicklerObject {
616 PyObject_HEAD
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000617 PyMemoTable *memo; /* Memo table, keep track of the seen
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000618 objects to support self-referential objects
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000619 pickling. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000620 PyObject *pers_func; /* persistent_id() method, can be NULL */
Serhiy Storchaka986375e2017-11-30 22:48:31 +0200621 PyObject *pers_func_self; /* borrowed reference to self if pers_func
622 is an unbound method, NULL otherwise */
Antoine Pitrou8d3c2902012-03-04 18:31:48 +0100623 PyObject *dispatch_table; /* private dispatch_table, can be NULL */
Pierre Glaser289f1f82019-05-08 23:08:25 +0200624 PyObject *reducer_override; /* hook for invoking user-defined callbacks
625 instead of save_global when pickling
626 functions and classes*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000627
628 PyObject *write; /* write() method of the output stream. */
629 PyObject *output_buffer; /* Write into a local bytearray buffer before
630 flushing to the stream. */
631 Py_ssize_t output_len; /* Length of output_buffer. */
632 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000633 int proto; /* Pickle protocol number, >= 0 */
634 int bin; /* Boolean, true if proto > 0 */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100635 int framing; /* True when framing is enabled, proto >= 4 */
636 Py_ssize_t frame_start; /* Position in output_buffer where the
Martin Pantera90a4a92016-05-30 04:04:50 +0000637 current frame begins. -1 if there
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100638 is no frame currently open. */
639
640 Py_ssize_t buf_size; /* Size of the current buffered pickle data */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000641 int fast; /* Enable fast mode if set to a true value.
642 The fast mode disable the usage of memo,
643 therefore speeding the pickling process by
644 not generating superfluous PUT opcodes. It
645 should not be used if with self-referential
646 objects. */
647 int fast_nesting;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000648 int fix_imports; /* Indicate whether Pickler should fix
649 the name of globals for Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000650 PyObject *fast_memo;
Antoine Pitrou91f43802019-05-26 17:10:09 +0200651 PyObject *buffer_callback; /* Callback for out-of-band buffers, or NULL */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000652} PicklerObject;
653
654typedef struct UnpicklerObject {
655 PyObject_HEAD
656 Pdata *stack; /* Pickle data stack, store unpickled objects. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000657
658 /* The unpickler memo is just an array of PyObject *s. Using a dict
659 is unnecessary, since the keys are contiguous ints. */
660 PyObject **memo;
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700661 size_t memo_size; /* Capacity of the memo array */
662 size_t memo_len; /* Number of objects in the memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000663
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000664 PyObject *pers_func; /* persistent_load() method, can be NULL. */
Serhiy Storchaka986375e2017-11-30 22:48:31 +0200665 PyObject *pers_func_self; /* borrowed reference to self if pers_func
666 is an unbound method, NULL otherwise */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000667
668 Py_buffer buffer;
669 char *input_buffer;
670 char *input_line;
671 Py_ssize_t input_len;
672 Py_ssize_t next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000673 Py_ssize_t prefetched_idx; /* index of first prefetched byte */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100674
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000675 PyObject *read; /* read() method of the input stream. */
Antoine Pitrou91f43802019-05-26 17:10:09 +0200676 PyObject *readinto; /* readinto() method of the input stream. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000677 PyObject *readline; /* readline() method of the input stream. */
Antoine Pitrou04248a82010-10-12 20:51:21 +0000678 PyObject *peek; /* peek() method of the input stream, or NULL */
Antoine Pitrou91f43802019-05-26 17:10:09 +0200679 PyObject *buffers; /* iterable of out-of-band buffers, or NULL */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000680
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000681 char *encoding; /* Name of the encoding to be used for
682 decoding strings pickled using Python
683 2.x. The default value is "ASCII" */
684 char *errors; /* Name of errors handling scheme to used when
685 decoding strings. The default value is
686 "strict". */
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -0500687 Py_ssize_t *marks; /* Mark stack, used for unpickling container
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000688 objects. */
689 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
690 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000691 int proto; /* Protocol of the pickle loaded. */
692 int fix_imports; /* Indicate whether Unpickler should fix
693 the name of globals pickled by Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000694} UnpicklerObject;
695
Serhiy Storchaka3c1f0f12014-01-27 10:34:22 +0200696typedef struct {
697 PyObject_HEAD
698 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
699} PicklerMemoProxyObject;
700
701typedef struct {
702 PyObject_HEAD
703 UnpicklerObject *unpickler;
704} UnpicklerMemoProxyObject;
705
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000706/* Forward declarations */
707static int save(PicklerObject *, PyObject *, int);
708static int save_reduce(PicklerObject *, PyObject *, PyObject *);
709static PyTypeObject Pickler_Type;
710static PyTypeObject Unpickler_Type;
711
Serhiy Storchaka3c1f0f12014-01-27 10:34:22 +0200712#include "clinic/_pickle.c.h"
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000713
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000714/*************************************************************************
Serhiy Storchaka95949422013-08-27 19:40:23 +0300715 A custom hashtable mapping void* to Python ints. This is used by the pickler
716 for memoization. Using a custom hashtable rather than PyDict allows us to skip
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000717 a bunch of unnecessary object creation. This makes a huge performance
718 difference. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000719
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000720#define MT_MINSIZE 8
721#define PERTURB_SHIFT 5
722
723
724static PyMemoTable *
725PyMemoTable_New(void)
726{
727 PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
728 if (memo == NULL) {
729 PyErr_NoMemory();
730 return NULL;
731 }
732
733 memo->mt_used = 0;
734 memo->mt_allocated = MT_MINSIZE;
735 memo->mt_mask = MT_MINSIZE - 1;
736 memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
737 if (memo->mt_table == NULL) {
738 PyMem_FREE(memo);
739 PyErr_NoMemory();
740 return NULL;
741 }
742 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
743
744 return memo;
745}
746
747static PyMemoTable *
748PyMemoTable_Copy(PyMemoTable *self)
749{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000750 PyMemoTable *new = PyMemoTable_New();
751 if (new == NULL)
752 return NULL;
753
754 new->mt_used = self->mt_used;
755 new->mt_allocated = self->mt_allocated;
756 new->mt_mask = self->mt_mask;
757 /* The table we get from _New() is probably smaller than we wanted.
758 Free it and allocate one that's the right size. */
759 PyMem_FREE(new->mt_table);
Benjamin Peterson59b08c12015-06-27 13:41:33 -0500760 new->mt_table = PyMem_NEW(PyMemoEntry, self->mt_allocated);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000761 if (new->mt_table == NULL) {
762 PyMem_FREE(new);
Victor Stinner42024562013-07-12 00:53:57 +0200763 PyErr_NoMemory();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000764 return NULL;
765 }
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700766 for (size_t i = 0; i < self->mt_allocated; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000767 Py_XINCREF(self->mt_table[i].me_key);
768 }
769 memcpy(new->mt_table, self->mt_table,
770 sizeof(PyMemoEntry) * self->mt_allocated);
771
772 return new;
773}
774
775static Py_ssize_t
776PyMemoTable_Size(PyMemoTable *self)
777{
778 return self->mt_used;
779}
780
781static int
782PyMemoTable_Clear(PyMemoTable *self)
783{
784 Py_ssize_t i = self->mt_allocated;
785
786 while (--i >= 0) {
787 Py_XDECREF(self->mt_table[i].me_key);
788 }
789 self->mt_used = 0;
790 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
791 return 0;
792}
793
794static void
795PyMemoTable_Del(PyMemoTable *self)
796{
797 if (self == NULL)
798 return;
799 PyMemoTable_Clear(self);
800
801 PyMem_FREE(self->mt_table);
802 PyMem_FREE(self);
803}
804
805/* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
806 can be considerably simpler than dictobject.c's lookdict(). */
807static PyMemoEntry *
808_PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
809{
810 size_t i;
811 size_t perturb;
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700812 size_t mask = self->mt_mask;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000813 PyMemoEntry *table = self->mt_table;
814 PyMemoEntry *entry;
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000815 Py_hash_t hash = (Py_hash_t)key >> 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000816
817 i = hash & mask;
818 entry = &table[i];
819 if (entry->me_key == NULL || entry->me_key == key)
820 return entry;
821
822 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
823 i = (i << 2) + i + perturb + 1;
824 entry = &table[i & mask];
825 if (entry->me_key == NULL || entry->me_key == key)
826 return entry;
827 }
Barry Warsawb2e57942017-09-14 18:13:16 -0700828 Py_UNREACHABLE();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000829}
830
831/* Returns -1 on failure, 0 on success. */
832static int
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700833_PyMemoTable_ResizeTable(PyMemoTable *self, size_t min_size)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000834{
835 PyMemoEntry *oldtable = NULL;
836 PyMemoEntry *oldentry, *newentry;
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700837 size_t new_size = MT_MINSIZE;
838 size_t to_process;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000839
840 assert(min_size > 0);
841
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700842 if (min_size > PY_SSIZE_T_MAX) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000843 PyErr_NoMemory();
844 return -1;
845 }
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700846
847 /* Find the smallest valid table size >= min_size. */
848 while (new_size < min_size) {
849 new_size <<= 1;
850 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000851 /* new_size needs to be a power of two. */
852 assert((new_size & (new_size - 1)) == 0);
853
854 /* Allocate new table. */
855 oldtable = self->mt_table;
Benjamin Peterson59b08c12015-06-27 13:41:33 -0500856 self->mt_table = PyMem_NEW(PyMemoEntry, new_size);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000857 if (self->mt_table == NULL) {
Victor Stinner8ca72e22013-07-12 00:53:26 +0200858 self->mt_table = oldtable;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000859 PyErr_NoMemory();
860 return -1;
861 }
862 self->mt_allocated = new_size;
863 self->mt_mask = new_size - 1;
864 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
865
866 /* Copy entries from the old table. */
867 to_process = self->mt_used;
868 for (oldentry = oldtable; to_process > 0; oldentry++) {
869 if (oldentry->me_key != NULL) {
870 to_process--;
871 /* newentry is a pointer to a chunk of the new
872 mt_table, so we're setting the key:value pair
873 in-place. */
874 newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
875 newentry->me_key = oldentry->me_key;
876 newentry->me_value = oldentry->me_value;
877 }
878 }
879
880 /* Deallocate the old table. */
881 PyMem_FREE(oldtable);
882 return 0;
883}
884
885/* Returns NULL on failure, a pointer to the value otherwise. */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200886static Py_ssize_t *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000887PyMemoTable_Get(PyMemoTable *self, PyObject *key)
888{
889 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
890 if (entry->me_key == NULL)
891 return NULL;
892 return &entry->me_value;
893}
894
895/* Returns -1 on failure, 0 on success. */
896static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200897PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000898{
899 PyMemoEntry *entry;
900
901 assert(key != NULL);
902
903 entry = _PyMemoTable_Lookup(self, key);
904 if (entry->me_key != NULL) {
905 entry->me_value = value;
906 return 0;
907 }
908 Py_INCREF(key);
909 entry->me_key = key;
910 entry->me_value = value;
911 self->mt_used++;
912
913 /* If we added a key, we can safely resize. Otherwise just return!
914 * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
915 *
916 * Quadrupling the size improves average table sparseness
917 * (reducing collisions) at the cost of some memory. It also halves
918 * the number of expensive resize operations in a growing memo table.
919 *
920 * Very large memo tables (over 50K items) use doubling instead.
921 * This may help applications with severe memory constraints.
922 */
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700923 if (SIZE_MAX / 3 >= self->mt_used && self->mt_used * 3 < self->mt_allocated * 2) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000924 return 0;
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700925 }
926 // self->mt_used is always < PY_SSIZE_T_MAX, so this can't overflow.
927 size_t desired_size = (self->mt_used > 50000 ? 2 : 4) * self->mt_used;
928 return _PyMemoTable_ResizeTable(self, desired_size);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000929}
930
931#undef MT_MINSIZE
932#undef PERTURB_SHIFT
933
934/*************************************************************************/
935
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000936
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000937static int
938_Pickler_ClearBuffer(PicklerObject *self)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000939{
Serhiy Storchaka48842712016-04-06 09:45:48 +0300940 Py_XSETREF(self->output_buffer,
Serhiy Storchaka4a1e70f2015-12-27 12:36:18 +0200941 PyBytes_FromStringAndSize(NULL, self->max_output_len));
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000942 if (self->output_buffer == NULL)
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000943 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000944 self->output_len = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100945 self->frame_start = -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000946 return 0;
947}
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000948
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100949static void
Antoine Pitrou8f2ee6e2013-11-23 21:05:08 +0100950_write_size64(char *out, size_t value)
951{
Victor Stinnerf13c46c2014-08-17 21:05:55 +0200952 size_t i;
Alexandre Vassalotti1048fb52013-11-25 11:35:46 -0800953
Serhiy Storchakafad85aa2015-11-07 15:42:38 +0200954 Py_BUILD_ASSERT(sizeof(size_t) <= 8);
Alexandre Vassalotti1048fb52013-11-25 11:35:46 -0800955
956 for (i = 0; i < sizeof(size_t); i++) {
957 out[i] = (unsigned char)((value >> (8 * i)) & 0xff);
958 }
959 for (i = sizeof(size_t); i < 8; i++) {
960 out[i] = 0;
Alexandre Vassalottided929b2013-11-24 22:41:13 -0800961 }
Antoine Pitrou8f2ee6e2013-11-23 21:05:08 +0100962}
963
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100964static int
965_Pickler_CommitFrame(PicklerObject *self)
966{
967 size_t frame_len;
968 char *qdata;
969
970 if (!self->framing || self->frame_start == -1)
971 return 0;
972 frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
973 qdata = PyBytes_AS_STRING(self->output_buffer) + self->frame_start;
Serhiy Storchaka1211c9a2018-01-20 16:42:44 +0200974 if (frame_len >= FRAME_SIZE_MIN) {
975 qdata[0] = FRAME;
976 _write_size64(qdata + 1, frame_len);
977 }
978 else {
979 memmove(qdata, qdata + FRAME_HEADER_SIZE, frame_len);
980 self->output_len -= FRAME_HEADER_SIZE;
981 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100982 self->frame_start = -1;
983 return 0;
984}
985
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000986static PyObject *
987_Pickler_GetString(PicklerObject *self)
988{
989 PyObject *output_buffer = self->output_buffer;
990
991 assert(self->output_buffer != NULL);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100992
993 if (_Pickler_CommitFrame(self))
994 return NULL;
995
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000996 self->output_buffer = NULL;
997 /* Resize down to exact size */
998 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
999 return NULL;
1000 return output_buffer;
1001}
1002
1003static int
1004_Pickler_FlushToFile(PicklerObject *self)
1005{
1006 PyObject *output, *result;
1007
1008 assert(self->write != NULL);
1009
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001010 /* This will commit the frame first */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001011 output = _Pickler_GetString(self);
1012 if (output == NULL)
1013 return -1;
1014
Alexandre Vassalotti20c28c12013-11-27 02:26:54 -08001015 result = _Pickle_FastCall(self->write, output);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001016 Py_XDECREF(result);
1017 return (result == NULL) ? -1 : 0;
1018}
1019
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01001020static int
1021_Pickler_OpcodeBoundary(PicklerObject *self)
1022{
1023 Py_ssize_t frame_len;
1024
1025 if (!self->framing || self->frame_start == -1) {
1026 return 0;
1027 }
1028 frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
1029 if (frame_len >= FRAME_SIZE_TARGET) {
1030 if(_Pickler_CommitFrame(self)) {
1031 return -1;
1032 }
Leo Ariasc3d95082018-02-03 18:36:10 -06001033 /* Flush the content of the committed frame to the underlying
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01001034 * file and reuse the pickler buffer for the next frame so as
1035 * to limit memory usage when dumping large complex objects to
1036 * a file.
1037 *
1038 * self->write is NULL when called via dumps.
1039 */
1040 if (self->write != NULL) {
1041 if (_Pickler_FlushToFile(self) < 0) {
1042 return -1;
1043 }
1044 if (_Pickler_ClearBuffer(self) < 0) {
1045 return -1;
1046 }
1047 }
1048 }
1049 return 0;
1050}
1051
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001052static Py_ssize_t
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001053_Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t data_len)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001054{
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001055 Py_ssize_t i, n, required;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001056 char *buffer;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001057 int need_new_frame;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001058
1059 assert(s != NULL);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001060 need_new_frame = (self->framing && self->frame_start == -1);
1061
1062 if (need_new_frame)
1063 n = data_len + FRAME_HEADER_SIZE;
1064 else
1065 n = data_len;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001066
1067 required = self->output_len + n;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001068 if (required > self->max_output_len) {
1069 /* Make place in buffer for the pickle chunk */
1070 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
1071 PyErr_NoMemory();
1072 return -1;
1073 }
1074 self->max_output_len = (self->output_len + n) / 2 * 3;
1075 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
1076 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001077 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001078 buffer = PyBytes_AS_STRING(self->output_buffer);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001079 if (need_new_frame) {
1080 /* Setup new frame */
1081 Py_ssize_t frame_start = self->output_len;
1082 self->frame_start = frame_start;
1083 for (i = 0; i < FRAME_HEADER_SIZE; i++) {
1084 /* Write an invalid value, for debugging */
1085 buffer[frame_start + i] = 0xFE;
1086 }
1087 self->output_len += FRAME_HEADER_SIZE;
1088 }
1089 if (data_len < 8) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001090 /* This is faster than memcpy when the string is short. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001091 for (i = 0; i < data_len; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001092 buffer[self->output_len + i] = s[i];
1093 }
1094 }
1095 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001096 memcpy(buffer + self->output_len, s, data_len);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001097 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001098 self->output_len += data_len;
1099 return data_len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001100}
1101
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001102static PicklerObject *
1103_Pickler_New(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001104{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001105 PicklerObject *self;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001106
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001107 self = PyObject_GC_New(PicklerObject, &Pickler_Type);
1108 if (self == NULL)
1109 return NULL;
1110
1111 self->pers_func = NULL;
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01001112 self->dispatch_table = NULL;
Antoine Pitrou91f43802019-05-26 17:10:09 +02001113 self->buffer_callback = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001114 self->write = NULL;
1115 self->proto = 0;
1116 self->bin = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001117 self->framing = 0;
1118 self->frame_start = -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001119 self->fast = 0;
1120 self->fast_nesting = 0;
1121 self->fix_imports = 0;
1122 self->fast_memo = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001123 self->max_output_len = WRITE_BUF_SIZE;
1124 self->output_len = 0;
Pierre Glaser289f1f82019-05-08 23:08:25 +02001125 self->reducer_override = NULL;
Victor Stinner68c8ea22013-07-11 22:56:25 +02001126
1127 self->memo = PyMemoTable_New();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001128 self->output_buffer = PyBytes_FromStringAndSize(NULL,
1129 self->max_output_len);
Victor Stinner68c8ea22013-07-11 22:56:25 +02001130
1131 if (self->memo == NULL || self->output_buffer == NULL) {
Victor Stinnerc31df042013-07-12 00:08:59 +02001132 Py_DECREF(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001133 return NULL;
1134 }
Zackery Spytz359bd4f2019-04-23 05:56:08 -06001135
1136 PyObject_GC_Track(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001137 return self;
1138}
1139
1140static int
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08001141_Pickler_SetProtocol(PicklerObject *self, PyObject *protocol, int fix_imports)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001142{
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08001143 long proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001144
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001145 if (protocol == Py_None) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001146 proto = DEFAULT_PROTOCOL;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08001147 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001148 else {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08001149 proto = PyLong_AsLong(protocol);
1150 if (proto < 0) {
1151 if (proto == -1 && PyErr_Occurred())
1152 return -1;
1153 proto = HIGHEST_PROTOCOL;
1154 }
1155 else if (proto > HIGHEST_PROTOCOL) {
1156 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
1157 HIGHEST_PROTOCOL);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001158 return -1;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08001159 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001160 }
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08001161 self->proto = (int)proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001162 self->bin = proto > 0;
1163 self->fix_imports = fix_imports && proto < 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001164 return 0;
1165}
1166
1167/* Returns -1 (with an exception set) on failure, 0 on success. This may
1168 be called once on a freshly created Pickler. */
1169static int
1170_Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
1171{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001172 _Py_IDENTIFIER(write);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001173 assert(file != NULL);
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001174 if (_PyObject_LookupAttrId(file, &PyId_write, &self->write) < 0) {
1175 return -1;
1176 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001177 if (self->write == NULL) {
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001178 PyErr_SetString(PyExc_TypeError,
1179 "file must have a 'write' attribute");
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001180 return -1;
1181 }
1182
1183 return 0;
1184}
1185
Antoine Pitrou91f43802019-05-26 17:10:09 +02001186static int
1187_Pickler_SetBufferCallback(PicklerObject *self, PyObject *buffer_callback)
1188{
1189 if (buffer_callback == Py_None) {
1190 buffer_callback = NULL;
1191 }
1192 if (buffer_callback != NULL && self->proto < 5) {
1193 PyErr_SetString(PyExc_ValueError,
1194 "buffer_callback needs protocol >= 5");
1195 return -1;
1196 }
1197
1198 Py_XINCREF(buffer_callback);
1199 self->buffer_callback = buffer_callback;
1200 return 0;
1201}
1202
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001203/* Returns the size of the input on success, -1 on failure. This takes its
1204 own reference to `input`. */
1205static Py_ssize_t
1206_Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
1207{
1208 if (self->buffer.buf != NULL)
1209 PyBuffer_Release(&self->buffer);
1210 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
1211 return -1;
1212 self->input_buffer = self->buffer.buf;
1213 self->input_len = self->buffer.len;
1214 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001215 self->prefetched_idx = self->input_len;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001216 return self->input_len;
1217}
1218
Antoine Pitrou04248a82010-10-12 20:51:21 +00001219static int
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03001220bad_readline(void)
1221{
1222 PickleState *st = _Pickle_GetGlobalState();
1223 PyErr_SetString(st->UnpicklingError, "pickle data was truncated");
1224 return -1;
1225}
1226
Antoine Pitrou91f43802019-05-26 17:10:09 +02001227/* Skip any consumed data that was only prefetched using peek() */
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03001228static int
Antoine Pitrou04248a82010-10-12 20:51:21 +00001229_Unpickler_SkipConsumed(UnpicklerObject *self)
1230{
Victor Stinnerb43ad1d2013-10-31 13:38:42 +01001231 Py_ssize_t consumed;
1232 PyObject *r;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001233
Victor Stinnerb43ad1d2013-10-31 13:38:42 +01001234 consumed = self->next_read_idx - self->prefetched_idx;
1235 if (consumed <= 0)
1236 return 0;
1237
1238 assert(self->peek); /* otherwise we did something wrong */
Martin Panter6245cb32016-04-15 02:14:19 +00001239 /* This makes a useless copy... */
Victor Stinnerb43ad1d2013-10-31 13:38:42 +01001240 r = PyObject_CallFunction(self->read, "n", consumed);
1241 if (r == NULL)
1242 return -1;
1243 Py_DECREF(r);
1244
1245 self->prefetched_idx = self->next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001246 return 0;
1247}
1248
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001249static const Py_ssize_t READ_WHOLE_LINE = -1;
1250
1251/* If reading from a file, we need to only pull the bytes we need, since there
1252 may be multiple pickle objects arranged contiguously in the same input
1253 buffer.
1254
1255 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
1256 bytes from the input stream/buffer.
1257
1258 Update the unpickler's input buffer with the newly-read data. Returns -1 on
1259 failure; on success, returns the number of bytes read from the file.
1260
1261 On success, self->input_len will be 0; this is intentional so that when
1262 unpickling from a file, the "we've run out of data" code paths will trigger,
1263 causing the Unpickler to go back to the file for more data. Use the returned
1264 size to tell you how much data you can process. */
1265static Py_ssize_t
1266_Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
1267{
1268 PyObject *data;
Serhiy Storchaka6fe39b72013-11-30 23:15:38 +02001269 Py_ssize_t read_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001270
1271 assert(self->read != NULL);
Victor Stinner121aab42011-09-29 23:40:53 +02001272
Antoine Pitrou04248a82010-10-12 20:51:21 +00001273 if (_Unpickler_SkipConsumed(self) < 0)
1274 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001275
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08001276 if (n == READ_WHOLE_LINE) {
Victor Stinner2ff58a22019-06-17 14:27:23 +02001277 data = PyObject_CallNoArgs(self->readline);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08001278 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001279 else {
Serhiy Storchaka6fe39b72013-11-30 23:15:38 +02001280 PyObject *len;
1281 /* Prefetch some data without advancing the file pointer, if possible */
1282 if (self->peek && n < PREFETCH) {
1283 len = PyLong_FromSsize_t(PREFETCH);
1284 if (len == NULL)
1285 return -1;
1286 data = _Pickle_FastCall(self->peek, len);
1287 if (data == NULL) {
1288 if (!PyErr_ExceptionMatches(PyExc_NotImplementedError))
1289 return -1;
1290 /* peek() is probably not supported by the given file object */
1291 PyErr_Clear();
1292 Py_CLEAR(self->peek);
1293 }
1294 else {
1295 read_size = _Unpickler_SetStringInput(self, data);
1296 Py_DECREF(data);
1297 self->prefetched_idx = 0;
1298 if (n <= read_size)
1299 return n;
1300 }
1301 }
1302 len = PyLong_FromSsize_t(n);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001303 if (len == NULL)
1304 return -1;
Alexandre Vassalotti20c28c12013-11-27 02:26:54 -08001305 data = _Pickle_FastCall(self->read, len);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001306 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001307 if (data == NULL)
1308 return -1;
1309
Serhiy Storchaka6fe39b72013-11-30 23:15:38 +02001310 read_size = _Unpickler_SetStringInput(self, data);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001311 Py_DECREF(data);
1312 return read_size;
1313}
1314
Victor Stinner19ed27e2016-05-20 11:42:37 +02001315/* Don't call it directly: use _Unpickler_Read() */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001316static Py_ssize_t
Victor Stinner19ed27e2016-05-20 11:42:37 +02001317_Unpickler_ReadImpl(UnpicklerObject *self, char **s, Py_ssize_t n)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001318{
Antoine Pitrou04248a82010-10-12 20:51:21 +00001319 Py_ssize_t num_read;
1320
Benjamin Peterson6aa15642015-09-27 01:16:03 -07001321 *s = NULL;
Benjamin Petersone48cf7e2015-09-26 00:08:34 -07001322 if (self->next_read_idx > PY_SSIZE_T_MAX - n) {
1323 PickleState *st = _Pickle_GetGlobalState();
1324 PyErr_SetString(st->UnpicklingError,
1325 "read would overflow (invalid bytecode)");
1326 return -1;
1327 }
Victor Stinner19ed27e2016-05-20 11:42:37 +02001328
1329 /* This case is handled by the _Unpickler_Read() macro for efficiency */
1330 assert(self->next_read_idx + n > self->input_len);
1331
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03001332 if (!self->read)
1333 return bad_readline();
1334
Antoine Pitrou91f43802019-05-26 17:10:09 +02001335 /* Extend the buffer to satisfy desired size */
Antoine Pitrou04248a82010-10-12 20:51:21 +00001336 num_read = _Unpickler_ReadFromFile(self, n);
1337 if (num_read < 0)
1338 return -1;
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03001339 if (num_read < n)
1340 return bad_readline();
Antoine Pitrou04248a82010-10-12 20:51:21 +00001341 *s = self->input_buffer;
1342 self->next_read_idx = n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001343 return n;
1344}
1345
Antoine Pitrou91f43802019-05-26 17:10:09 +02001346/* Read `n` bytes from the unpickler's data source, storing the result in `buf`.
1347 *
1348 * This should only be used for non-small data reads where potentially
1349 * avoiding a copy is beneficial. This method does not try to prefetch
1350 * more data into the input buffer.
1351 *
1352 * _Unpickler_Read() is recommended in most cases.
1353 */
1354static Py_ssize_t
1355_Unpickler_ReadInto(UnpicklerObject *self, char *buf, Py_ssize_t n)
1356{
1357 assert(n != READ_WHOLE_LINE);
1358
1359 /* Read from available buffer data, if any */
1360 Py_ssize_t in_buffer = self->input_len - self->next_read_idx;
1361 if (in_buffer > 0) {
1362 Py_ssize_t to_read = Py_MIN(in_buffer, n);
1363 memcpy(buf, self->input_buffer + self->next_read_idx, to_read);
1364 self->next_read_idx += to_read;
1365 buf += to_read;
1366 n -= to_read;
1367 if (n == 0) {
1368 /* Entire read was satisfied from buffer */
1369 return n;
1370 }
1371 }
1372
1373 /* Read from file */
1374 if (!self->readinto) {
1375 return bad_readline();
1376 }
1377 if (_Unpickler_SkipConsumed(self) < 0) {
1378 return -1;
1379 }
1380
1381 /* Call readinto() into user buffer */
1382 PyObject *buf_obj = PyMemoryView_FromMemory(buf, n, PyBUF_WRITE);
1383 if (buf_obj == NULL) {
1384 return -1;
1385 }
1386 PyObject *read_size_obj = _Pickle_FastCall(self->readinto, buf_obj);
1387 if (read_size_obj == NULL) {
1388 return -1;
1389 }
1390 Py_ssize_t read_size = PyLong_AsSsize_t(read_size_obj);
1391 Py_DECREF(read_size_obj);
1392
1393 if (read_size < 0) {
1394 if (!PyErr_Occurred()) {
1395 PyErr_SetString(PyExc_ValueError,
1396 "readinto() returned negative size");
1397 }
1398 return -1;
1399 }
1400 if (read_size < n) {
1401 return bad_readline();
1402 }
1403 return n;
1404}
1405
Victor Stinner19ed27e2016-05-20 11:42:37 +02001406/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
1407
1408 This should be used for all data reads, rather than accessing the unpickler's
1409 input buffer directly. This method deals correctly with reading from input
1410 streams, which the input buffer doesn't deal with.
1411
1412 Note that when reading from a file-like object, self->next_read_idx won't
1413 be updated (it should remain at 0 for the entire unpickling process). You
1414 should use this function's return value to know how many bytes you can
1415 consume.
1416
1417 Returns -1 (with an exception set) on failure. On success, return the
1418 number of chars read. */
1419#define _Unpickler_Read(self, s, n) \
Victor Stinnerda230562016-05-20 21:16:59 +02001420 (((n) <= (self)->input_len - (self)->next_read_idx) \
Victor Stinner19ed27e2016-05-20 11:42:37 +02001421 ? (*(s) = (self)->input_buffer + (self)->next_read_idx, \
1422 (self)->next_read_idx += (n), \
1423 (n)) \
1424 : _Unpickler_ReadImpl(self, (s), (n)))
1425
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001426static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001427_Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1428 char **result)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001429{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001430 char *input_line = PyMem_Realloc(self->input_line, len + 1);
Victor Stinner42024562013-07-12 00:53:57 +02001431 if (input_line == NULL) {
1432 PyErr_NoMemory();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001433 return -1;
Victor Stinner42024562013-07-12 00:53:57 +02001434 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001435
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001436 memcpy(input_line, line, len);
1437 input_line[len] = '\0';
1438 self->input_line = input_line;
1439 *result = self->input_line;
1440 return len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001441}
1442
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001443/* Read a line from the input stream/buffer. If we run off the end of the input
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03001444 before hitting \n, raise an error.
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001445
1446 Returns the number of chars read, or -1 on failure. */
1447static Py_ssize_t
1448_Unpickler_Readline(UnpicklerObject *self, char **result)
1449{
1450 Py_ssize_t i, num_read;
1451
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001452 for (i = self->next_read_idx; i < self->input_len; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001453 if (self->input_buffer[i] == '\n') {
1454 char *line_start = self->input_buffer + self->next_read_idx;
1455 num_read = i - self->next_read_idx + 1;
1456 self->next_read_idx = i + 1;
1457 return _Unpickler_CopyLine(self, line_start, num_read, result);
1458 }
1459 }
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03001460 if (!self->read)
1461 return bad_readline();
Victor Stinner121aab42011-09-29 23:40:53 +02001462
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03001463 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1464 if (num_read < 0)
1465 return -1;
1466 if (num_read == 0 || self->input_buffer[num_read - 1] != '\n')
1467 return bad_readline();
1468 self->next_read_idx = num_read;
1469 return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001470}
1471
1472/* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1473 will be modified in place. */
1474static int
Benjamin Petersona4ae8282018-09-20 18:36:40 -07001475_Unpickler_ResizeMemoList(UnpicklerObject *self, size_t new_size)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001476{
Benjamin Petersona4ae8282018-09-20 18:36:40 -07001477 size_t i;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001478
1479 assert(new_size > self->memo_size);
1480
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001481 PyObject **memo_new = self->memo;
1482 PyMem_RESIZE(memo_new, PyObject *, new_size);
1483 if (memo_new == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001484 PyErr_NoMemory();
1485 return -1;
1486 }
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001487 self->memo = memo_new;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001488 for (i = self->memo_size; i < new_size; i++)
1489 self->memo[i] = NULL;
1490 self->memo_size = new_size;
1491 return 0;
1492}
1493
1494/* Returns NULL if idx is out of bounds. */
1495static PyObject *
Benjamin Petersona4ae8282018-09-20 18:36:40 -07001496_Unpickler_MemoGet(UnpicklerObject *self, size_t idx)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001497{
Benjamin Petersona4ae8282018-09-20 18:36:40 -07001498 if (idx >= self->memo_size)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001499 return NULL;
1500
1501 return self->memo[idx];
1502}
1503
1504/* Returns -1 (with an exception set) on failure, 0 on success.
1505 This takes its own reference to `value`. */
1506static int
Benjamin Petersona4ae8282018-09-20 18:36:40 -07001507_Unpickler_MemoPut(UnpicklerObject *self, size_t idx, PyObject *value)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001508{
1509 PyObject *old_item;
1510
1511 if (idx >= self->memo_size) {
1512 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1513 return -1;
1514 assert(idx < self->memo_size);
1515 }
1516 Py_INCREF(value);
1517 old_item = self->memo[idx];
1518 self->memo[idx] = value;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001519 if (old_item != NULL) {
1520 Py_DECREF(old_item);
1521 }
1522 else {
1523 self->memo_len++;
1524 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001525 return 0;
1526}
1527
1528static PyObject **
1529_Unpickler_NewMemo(Py_ssize_t new_size)
1530{
Benjamin Peterson59b08c12015-06-27 13:41:33 -05001531 PyObject **memo = PyMem_NEW(PyObject *, new_size);
Victor Stinner42024562013-07-12 00:53:57 +02001532 if (memo == NULL) {
1533 PyErr_NoMemory();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001534 return NULL;
Victor Stinner42024562013-07-12 00:53:57 +02001535 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001536 memset(memo, 0, new_size * sizeof(PyObject *));
1537 return memo;
1538}
1539
1540/* Free the unpickler's memo, taking care to decref any items left in it. */
1541static void
1542_Unpickler_MemoCleanup(UnpicklerObject *self)
1543{
1544 Py_ssize_t i;
1545 PyObject **memo = self->memo;
1546
1547 if (self->memo == NULL)
1548 return;
1549 self->memo = NULL;
1550 i = self->memo_size;
1551 while (--i >= 0) {
1552 Py_XDECREF(memo[i]);
1553 }
1554 PyMem_FREE(memo);
1555}
1556
1557static UnpicklerObject *
1558_Unpickler_New(void)
1559{
1560 UnpicklerObject *self;
1561
1562 self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1563 if (self == NULL)
1564 return NULL;
1565
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001566 self->pers_func = NULL;
1567 self->input_buffer = NULL;
1568 self->input_line = NULL;
1569 self->input_len = 0;
1570 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001571 self->prefetched_idx = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001572 self->read = NULL;
Antoine Pitrou91f43802019-05-26 17:10:09 +02001573 self->readinto = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001574 self->readline = NULL;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001575 self->peek = NULL;
Antoine Pitrou91f43802019-05-26 17:10:09 +02001576 self->buffers = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001577 self->encoding = NULL;
1578 self->errors = NULL;
1579 self->marks = NULL;
1580 self->num_marks = 0;
1581 self->marks_size = 0;
1582 self->proto = 0;
1583 self->fix_imports = 0;
Victor Stinner68c8ea22013-07-11 22:56:25 +02001584 memset(&self->buffer, 0, sizeof(Py_buffer));
1585 self->memo_size = 32;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001586 self->memo_len = 0;
Victor Stinner68c8ea22013-07-11 22:56:25 +02001587 self->memo = _Unpickler_NewMemo(self->memo_size);
1588 self->stack = (Pdata *)Pdata_New();
1589
1590 if (self->memo == NULL || self->stack == NULL) {
1591 Py_DECREF(self);
1592 return NULL;
1593 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001594
Zackery Spytz359bd4f2019-04-23 05:56:08 -06001595 PyObject_GC_Track(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001596 return self;
1597}
1598
1599/* Returns -1 (with an exception set) on failure, 0 on success. This may
Antoine Pitrou91f43802019-05-26 17:10:09 +02001600 be called once on a freshly created Unpickler. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001601static int
1602_Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1603{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001604 _Py_IDENTIFIER(peek);
1605 _Py_IDENTIFIER(read);
Antoine Pitrou91f43802019-05-26 17:10:09 +02001606 _Py_IDENTIFIER(readinto);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001607 _Py_IDENTIFIER(readline);
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001608
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001609 if (_PyObject_LookupAttrId(file, &PyId_peek, &self->peek) < 0) {
1610 return -1;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001611 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001612 (void)_PyObject_LookupAttrId(file, &PyId_read, &self->read);
Antoine Pitrou91f43802019-05-26 17:10:09 +02001613 (void)_PyObject_LookupAttrId(file, &PyId_readinto, &self->readinto);
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001614 (void)_PyObject_LookupAttrId(file, &PyId_readline, &self->readline);
Antoine Pitrou91f43802019-05-26 17:10:09 +02001615 if (!self->readline || !self->readinto || !self->read) {
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001616 if (!PyErr_Occurred()) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001617 PyErr_SetString(PyExc_TypeError,
Antoine Pitrou91f43802019-05-26 17:10:09 +02001618 "file must have 'read', 'readinto' and "
1619 "'readline' attributes");
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001620 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001621 Py_CLEAR(self->read);
Antoine Pitrou91f43802019-05-26 17:10:09 +02001622 Py_CLEAR(self->readinto);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001623 Py_CLEAR(self->readline);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001624 Py_CLEAR(self->peek);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001625 return -1;
1626 }
1627 return 0;
1628}
1629
1630/* Returns -1 (with an exception set) on failure, 0 on success. This may
Antoine Pitrou91f43802019-05-26 17:10:09 +02001631 be called once on a freshly created Unpickler. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001632static int
1633_Unpickler_SetInputEncoding(UnpicklerObject *self,
1634 const char *encoding,
1635 const char *errors)
1636{
1637 if (encoding == NULL)
1638 encoding = "ASCII";
1639 if (errors == NULL)
1640 errors = "strict";
1641
Victor Stinner49fc8ec2013-07-07 23:30:24 +02001642 self->encoding = _PyMem_Strdup(encoding);
1643 self->errors = _PyMem_Strdup(errors);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001644 if (self->encoding == NULL || self->errors == NULL) {
1645 PyErr_NoMemory();
1646 return -1;
1647 }
1648 return 0;
1649}
1650
Antoine Pitrou91f43802019-05-26 17:10:09 +02001651/* Returns -1 (with an exception set) on failure, 0 on success. This may
1652 be called once on a freshly created Unpickler. */
1653static int
1654_Unpickler_SetBuffers(UnpicklerObject *self, PyObject *buffers)
1655{
Markus Mohrhard898318b2019-07-26 00:00:34 +08001656 if (buffers == NULL || buffers == Py_None) {
Antoine Pitrou91f43802019-05-26 17:10:09 +02001657 self->buffers = NULL;
1658 }
1659 else {
1660 self->buffers = PyObject_GetIter(buffers);
1661 if (self->buffers == NULL) {
1662 return -1;
1663 }
1664 }
1665 return 0;
1666}
1667
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001668/* Generate a GET opcode for an object stored in the memo. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001669static int
1670memo_get(PicklerObject *self, PyObject *key)
1671{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001672 Py_ssize_t *value;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001673 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001674 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001675
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001676 value = PyMemoTable_Get(self->memo, key);
1677 if (value == NULL) {
1678 PyErr_SetObject(PyExc_KeyError, key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001679 return -1;
1680 }
1681
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001682 if (!self->bin) {
1683 pdata[0] = GET;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001684 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1685 "%" PY_FORMAT_SIZE_T "d\n", *value);
1686 len = strlen(pdata);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001687 }
1688 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001689 if (*value < 256) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001690 pdata[0] = BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001691 pdata[1] = (unsigned char)(*value & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001692 len = 2;
1693 }
Serhiy Storchaka67c719b2014-09-05 10:10:23 +03001694 else if ((size_t)*value <= 0xffffffffUL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001695 pdata[0] = LONG_BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001696 pdata[1] = (unsigned char)(*value & 0xff);
1697 pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1698 pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1699 pdata[4] = (unsigned char)((*value >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001700 len = 5;
1701 }
1702 else { /* unlikely */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08001703 PickleState *st = _Pickle_GetGlobalState();
1704 PyErr_SetString(st->PicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001705 "memo id too large for LONG_BINGET");
1706 return -1;
1707 }
1708 }
1709
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001710 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001711 return -1;
1712
1713 return 0;
1714}
1715
1716/* Store an object in the memo, assign it a new unique ID based on the number
1717 of objects currently stored in the memo and generate a PUT opcode. */
1718static int
1719memo_put(PicklerObject *self, PyObject *obj)
1720{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001721 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001722 Py_ssize_t len;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001723 Py_ssize_t idx;
1724
1725 const char memoize_op = MEMOIZE;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001726
1727 if (self->fast)
1728 return 0;
1729
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001730 idx = PyMemoTable_Size(self->memo);
1731 if (PyMemoTable_Set(self->memo, obj, idx) < 0)
1732 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001733
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001734 if (self->proto >= 4) {
1735 if (_Pickler_Write(self, &memoize_op, 1) < 0)
1736 return -1;
1737 return 0;
1738 }
1739 else if (!self->bin) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001740 pdata[0] = PUT;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001741 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001742 "%" PY_FORMAT_SIZE_T "d\n", idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001743 len = strlen(pdata);
1744 }
1745 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001746 if (idx < 256) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001747 pdata[0] = BINPUT;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001748 pdata[1] = (unsigned char)idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001749 len = 2;
1750 }
Serhiy Storchaka67c719b2014-09-05 10:10:23 +03001751 else if ((size_t)idx <= 0xffffffffUL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001752 pdata[0] = LONG_BINPUT;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001753 pdata[1] = (unsigned char)(idx & 0xff);
1754 pdata[2] = (unsigned char)((idx >> 8) & 0xff);
1755 pdata[3] = (unsigned char)((idx >> 16) & 0xff);
1756 pdata[4] = (unsigned char)((idx >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001757 len = 5;
1758 }
1759 else { /* unlikely */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08001760 PickleState *st = _Pickle_GetGlobalState();
1761 PyErr_SetString(st->PicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001762 "memo id too large for LONG_BINPUT");
1763 return -1;
1764 }
1765 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001766 if (_Pickler_Write(self, pdata, len) < 0)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001767 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001768
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001769 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001770}
1771
1772static PyObject *
Serhiy Storchaka9937d902017-01-09 10:04:34 +02001773get_dotted_path(PyObject *obj, PyObject *name)
1774{
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001775 _Py_static_string(PyId_dot, ".");
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001776 PyObject *dotted_path;
1777 Py_ssize_t i, n;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001778
1779 dotted_path = PyUnicode_Split(name, _PyUnicode_FromId(&PyId_dot), -1);
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001780 if (dotted_path == NULL)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001781 return NULL;
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001782 n = PyList_GET_SIZE(dotted_path);
1783 assert(n >= 1);
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001784 for (i = 0; i < n; i++) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001785 PyObject *subpath = PyList_GET_ITEM(dotted_path, i);
Serhiy Storchaka9937d902017-01-09 10:04:34 +02001786 if (_PyUnicode_EqualToASCIIString(subpath, "<locals>")) {
Antoine Pitrou6cd5eda2014-12-02 00:20:03 +01001787 if (obj == NULL)
1788 PyErr_Format(PyExc_AttributeError,
1789 "Can't pickle local object %R", name);
1790 else
1791 PyErr_Format(PyExc_AttributeError,
1792 "Can't pickle local attribute %R on %R", name, obj);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001793 Py_DECREF(dotted_path);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001794 return NULL;
1795 }
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001796 }
1797 return dotted_path;
1798}
1799
1800static PyObject *
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001801get_deep_attribute(PyObject *obj, PyObject *names, PyObject **pparent)
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001802{
1803 Py_ssize_t i, n;
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001804 PyObject *parent = NULL;
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001805
1806 assert(PyList_CheckExact(names));
1807 Py_INCREF(obj);
1808 n = PyList_GET_SIZE(names);
1809 for (i = 0; i < n; i++) {
1810 PyObject *name = PyList_GET_ITEM(names, i);
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001811 Py_XDECREF(parent);
1812 parent = obj;
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001813 (void)_PyObject_LookupAttr(parent, name, &obj);
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001814 if (obj == NULL) {
1815 Py_DECREF(parent);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001816 return NULL;
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001817 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001818 }
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001819 if (pparent != NULL)
1820 *pparent = parent;
1821 else
1822 Py_XDECREF(parent);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001823 return obj;
1824}
1825
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001826
1827static PyObject *
1828getattribute(PyObject *obj, PyObject *name, int allow_qualname)
1829{
1830 PyObject *dotted_path, *attr;
1831
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001832 if (allow_qualname) {
1833 dotted_path = get_dotted_path(obj, name);
1834 if (dotted_path == NULL)
1835 return NULL;
1836 attr = get_deep_attribute(obj, dotted_path, NULL);
1837 Py_DECREF(dotted_path);
1838 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001839 else {
1840 (void)_PyObject_LookupAttr(obj, name, &attr);
1841 }
1842 if (attr == NULL && !PyErr_Occurred()) {
1843 PyErr_Format(PyExc_AttributeError,
1844 "Can't get attribute %R on %R", name, obj);
1845 }
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001846 return attr;
1847}
1848
Eric Snow3f9eee62017-09-15 16:35:20 -06001849static int
1850_checkmodule(PyObject *module_name, PyObject *module,
1851 PyObject *global, PyObject *dotted_path)
1852{
1853 if (module == Py_None) {
1854 return -1;
1855 }
1856 if (PyUnicode_Check(module_name) &&
1857 _PyUnicode_EqualToASCIIString(module_name, "__main__")) {
1858 return -1;
1859 }
1860
1861 PyObject *candidate = get_deep_attribute(module, dotted_path, NULL);
1862 if (candidate == NULL) {
Eric Snow3f9eee62017-09-15 16:35:20 -06001863 return -1;
1864 }
1865 if (candidate != global) {
1866 Py_DECREF(candidate);
1867 return -1;
1868 }
1869 Py_DECREF(candidate);
1870 return 0;
1871}
1872
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001873static PyObject *
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001874whichmodule(PyObject *global, PyObject *dotted_path)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001875{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001876 PyObject *module_name;
Eric Snow3f9eee62017-09-15 16:35:20 -06001877 PyObject *module = NULL;
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001878 Py_ssize_t i;
Eric Snow3f9eee62017-09-15 16:35:20 -06001879 PyObject *modules;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001880 _Py_IDENTIFIER(__module__);
1881 _Py_IDENTIFIER(modules);
1882 _Py_IDENTIFIER(__main__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001883
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001884 if (_PyObject_LookupAttrId(global, &PyId___module__, &module_name) < 0) {
1885 return NULL;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001886 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001887 if (module_name) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001888 /* In some rare cases (e.g., bound methods of extension types),
1889 __module__ can be None. If it is so, then search sys.modules for
1890 the module of global. */
1891 if (module_name != Py_None)
1892 return module_name;
1893 Py_CLEAR(module_name);
1894 }
1895 assert(module_name == NULL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001896
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001897 /* Fallback on walking sys.modules */
Eric Snow3f9eee62017-09-15 16:35:20 -06001898 modules = _PySys_GetObjectId(&PyId_modules);
1899 if (modules == NULL) {
Victor Stinner1e53bba2013-07-16 22:26:05 +02001900 PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001901 return NULL;
Victor Stinner1e53bba2013-07-16 22:26:05 +02001902 }
Eric Snow3f9eee62017-09-15 16:35:20 -06001903 if (PyDict_CheckExact(modules)) {
1904 i = 0;
1905 while (PyDict_Next(modules, &i, &module_name, &module)) {
1906 if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1907 Py_INCREF(module_name);
1908 return module_name;
1909 }
1910 if (PyErr_Occurred()) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001911 return NULL;
Eric Snow3f9eee62017-09-15 16:35:20 -06001912 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001913 }
Eric Snow3f9eee62017-09-15 16:35:20 -06001914 }
1915 else {
1916 PyObject *iterator = PyObject_GetIter(modules);
1917 if (iterator == NULL) {
1918 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001919 }
Eric Snow3f9eee62017-09-15 16:35:20 -06001920 while ((module_name = PyIter_Next(iterator))) {
1921 module = PyObject_GetItem(modules, module_name);
1922 if (module == NULL) {
1923 Py_DECREF(module_name);
1924 Py_DECREF(iterator);
1925 return NULL;
1926 }
1927 if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1928 Py_DECREF(module);
1929 Py_DECREF(iterator);
1930 return module_name;
1931 }
1932 Py_DECREF(module);
1933 Py_DECREF(module_name);
1934 if (PyErr_Occurred()) {
1935 Py_DECREF(iterator);
1936 return NULL;
1937 }
1938 }
1939 Py_DECREF(iterator);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001940 }
1941
1942 /* If no module is found, use __main__. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001943 module_name = _PyUnicode_FromId(&PyId___main__);
Victor Stinneraf46eb82017-09-05 23:30:16 +02001944 Py_XINCREF(module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001945 return module_name;
1946}
1947
1948/* fast_save_enter() and fast_save_leave() are guards against recursive
1949 objects when Pickler is used with the "fast mode" (i.e., with object
1950 memoization disabled). If the nesting of a list or dict object exceed
1951 FAST_NESTING_LIMIT, these guards will start keeping an internal
1952 reference to the seen list or dict objects and check whether these objects
1953 are recursive. These are not strictly necessary, since save() has a
1954 hard-coded recursion limit, but they give a nicer error message than the
1955 typical RuntimeError. */
1956static int
1957fast_save_enter(PicklerObject *self, PyObject *obj)
1958{
1959 /* if fast_nesting < 0, we're doing an error exit. */
1960 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1961 PyObject *key = NULL;
1962 if (self->fast_memo == NULL) {
1963 self->fast_memo = PyDict_New();
1964 if (self->fast_memo == NULL) {
1965 self->fast_nesting = -1;
1966 return 0;
1967 }
1968 }
1969 key = PyLong_FromVoidPtr(obj);
Mat Mf76231f2017-11-13 02:50:16 -05001970 if (key == NULL) {
1971 self->fast_nesting = -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001972 return 0;
Mat Mf76231f2017-11-13 02:50:16 -05001973 }
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08001974 if (PyDict_GetItemWithError(self->fast_memo, key)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001975 Py_DECREF(key);
1976 PyErr_Format(PyExc_ValueError,
1977 "fast mode: can't pickle cyclic objects "
1978 "including object type %.200s at %p",
Victor Stinnerdaa97562020-02-07 03:37:06 +01001979 Py_TYPE(obj)->tp_name, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001980 self->fast_nesting = -1;
1981 return 0;
1982 }
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08001983 if (PyErr_Occurred()) {
Mat Mf76231f2017-11-13 02:50:16 -05001984 Py_DECREF(key);
1985 self->fast_nesting = -1;
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08001986 return 0;
1987 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001988 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
1989 Py_DECREF(key);
1990 self->fast_nesting = -1;
1991 return 0;
1992 }
1993 Py_DECREF(key);
1994 }
1995 return 1;
1996}
1997
1998static int
1999fast_save_leave(PicklerObject *self, PyObject *obj)
2000{
2001 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
2002 PyObject *key = PyLong_FromVoidPtr(obj);
2003 if (key == NULL)
2004 return 0;
2005 if (PyDict_DelItem(self->fast_memo, key) < 0) {
2006 Py_DECREF(key);
2007 return 0;
2008 }
2009 Py_DECREF(key);
2010 }
2011 return 1;
2012}
2013
2014static int
2015save_none(PicklerObject *self, PyObject *obj)
2016{
2017 const char none_op = NONE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002018 if (_Pickler_Write(self, &none_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002019 return -1;
2020
2021 return 0;
2022}
2023
2024static int
2025save_bool(PicklerObject *self, PyObject *obj)
2026{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002027 if (self->proto >= 2) {
Alexandre Vassalotti8a67f522013-11-24 21:40:18 -08002028 const char bool_op = (obj == Py_True) ? NEWTRUE : NEWFALSE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002029 if (_Pickler_Write(self, &bool_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002030 return -1;
2031 }
Alexandre Vassalotti8a67f522013-11-24 21:40:18 -08002032 else {
2033 /* These aren't opcodes -- they're ways to pickle bools before protocol 2
2034 * so that unpicklers written before bools were introduced unpickle them
2035 * as ints, but unpicklers after can recognize that bools were intended.
2036 * Note that protocol 2 added direct ways to pickle bools.
2037 */
2038 const char *bool_str = (obj == Py_True) ? "I01\n" : "I00\n";
2039 if (_Pickler_Write(self, bool_str, strlen(bool_str)) < 0)
2040 return -1;
2041 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002042 return 0;
2043}
2044
2045static int
Alexandre Vassalottided929b2013-11-24 22:41:13 -08002046save_long(PicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002047{
Alexandre Vassalottided929b2013-11-24 22:41:13 -08002048 PyObject *repr = NULL;
2049 Py_ssize_t size;
2050 long val;
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002051 int overflow;
Alexandre Vassalottided929b2013-11-24 22:41:13 -08002052 int status = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002053
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002054 val= PyLong_AsLongAndOverflow(obj, &overflow);
2055 if (!overflow && (sizeof(long) <= 4 ||
2056 (val <= 0x7fffffffL && val >= (-0x7fffffffL - 1))))
2057 {
Larry Hastings61272b72014-01-07 12:41:53 -08002058 /* result fits in a signed 4-byte integer.
Alexandre Vassalotti1048fb52013-11-25 11:35:46 -08002059
2060 Note: we can't use -0x80000000L in the above condition because some
2061 compilers (e.g., MSVC) will promote 0x80000000L to an unsigned type
2062 before applying the unary minus when sizeof(long) <= 4. The
2063 resulting value stays unsigned which is commonly not what we want,
2064 so MSVC happily warns us about it. However, that result would have
2065 been fine because we guard for sizeof(long) <= 4 which turns the
2066 condition true in that particular case. */
Alexandre Vassalottided929b2013-11-24 22:41:13 -08002067 char pdata[32];
2068 Py_ssize_t len = 0;
2069
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002070 if (self->bin) {
2071 pdata[1] = (unsigned char)(val & 0xff);
2072 pdata[2] = (unsigned char)((val >> 8) & 0xff);
2073 pdata[3] = (unsigned char)((val >> 16) & 0xff);
2074 pdata[4] = (unsigned char)((val >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002075
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002076 if ((pdata[4] != 0) || (pdata[3] != 0)) {
2077 pdata[0] = BININT;
2078 len = 5;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002079 }
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002080 else if (pdata[2] != 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002081 pdata[0] = BININT2;
2082 len = 3;
2083 }
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002084 else {
2085 pdata[0] = BININT1;
2086 len = 2;
2087 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002088 }
2089 else {
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002090 sprintf(pdata, "%c%ld\n", INT, val);
2091 len = strlen(pdata);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002092 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002093 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002094 return -1;
Alexandre Vassalottided929b2013-11-24 22:41:13 -08002095
2096 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002097 }
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002098 assert(!PyErr_Occurred());
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002099
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002100 if (self->proto >= 2) {
2101 /* Linear-time pickling. */
2102 size_t nbits;
2103 size_t nbytes;
2104 unsigned char *pdata;
2105 char header[5];
2106 int i;
2107 int sign = _PyLong_Sign(obj);
2108
2109 if (sign == 0) {
2110 header[0] = LONG1;
2111 header[1] = 0; /* It's 0 -- an empty bytestring. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002112 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002113 goto error;
2114 return 0;
2115 }
2116 nbits = _PyLong_NumBits(obj);
2117 if (nbits == (size_t)-1 && PyErr_Occurred())
2118 goto error;
2119 /* How many bytes do we need? There are nbits >> 3 full
2120 * bytes of data, and nbits & 7 leftover bits. If there
2121 * are any leftover bits, then we clearly need another
Min ho Kim96e12d52019-07-22 06:12:33 +10002122 * byte. What's not so obvious is that we *probably*
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002123 * need another byte even if there aren't any leftovers:
2124 * the most-significant bit of the most-significant byte
2125 * acts like a sign bit, and it's usually got a sense
Serhiy Storchaka95949422013-08-27 19:40:23 +03002126 * opposite of the one we need. The exception is ints
2127 * of the form -(2**(8*j-1)) for j > 0. Such an int is
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002128 * its own 256's-complement, so has the right sign bit
2129 * even without the extra byte. That's a pain to check
2130 * for in advance, though, so we always grab an extra
2131 * byte at the start, and cut it back later if possible.
2132 */
2133 nbytes = (nbits >> 3) + 1;
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01002134 if (nbytes > 0x7fffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002135 PyErr_SetString(PyExc_OverflowError,
Serhiy Storchaka95949422013-08-27 19:40:23 +03002136 "int too large to pickle");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002137 goto error;
2138 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00002139 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002140 if (repr == NULL)
2141 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00002142 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002143 i = _PyLong_AsByteArray((PyLongObject *)obj,
2144 pdata, nbytes,
2145 1 /* little endian */ , 1 /* signed */ );
2146 if (i < 0)
2147 goto error;
Serhiy Storchaka95949422013-08-27 19:40:23 +03002148 /* If the int is negative, this may be a byte more than
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002149 * needed. This is so iff the MSB is all redundant sign
2150 * bits.
2151 */
2152 if (sign < 0 &&
Victor Stinner121aab42011-09-29 23:40:53 +02002153 nbytes > 1 &&
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002154 pdata[nbytes - 1] == 0xff &&
2155 (pdata[nbytes - 2] & 0x80) != 0) {
2156 nbytes--;
2157 }
2158
2159 if (nbytes < 256) {
2160 header[0] = LONG1;
2161 header[1] = (unsigned char)nbytes;
2162 size = 2;
2163 }
2164 else {
2165 header[0] = LONG4;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002166 size = (Py_ssize_t) nbytes;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002167 for (i = 1; i < 5; i++) {
2168 header[i] = (unsigned char)(size & 0xff);
2169 size >>= 8;
2170 }
2171 size = 5;
2172 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002173 if (_Pickler_Write(self, header, size) < 0 ||
2174 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002175 goto error;
2176 }
2177 else {
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002178 const char long_op = LONG;
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02002179 const char *string;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002180
Mark Dickinson8dd05142009-01-20 20:43:58 +00002181 /* proto < 2: write the repr and newline. This is quadratic-time (in
2182 the number of digits), in both directions. We add a trailing 'L'
2183 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002184
2185 repr = PyObject_Repr(obj);
2186 if (repr == NULL)
2187 goto error;
2188
Serhiy Storchaka06515832016-11-20 09:13:07 +02002189 string = PyUnicode_AsUTF8AndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002190 if (string == NULL)
2191 goto error;
2192
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002193 if (_Pickler_Write(self, &long_op, 1) < 0 ||
2194 _Pickler_Write(self, string, size) < 0 ||
2195 _Pickler_Write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002196 goto error;
2197 }
2198
2199 if (0) {
2200 error:
2201 status = -1;
2202 }
2203 Py_XDECREF(repr);
2204
2205 return status;
2206}
2207
2208static int
2209save_float(PicklerObject *self, PyObject *obj)
2210{
2211 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
2212
2213 if (self->bin) {
2214 char pdata[9];
2215 pdata[0] = BINFLOAT;
2216 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
2217 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002218 if (_Pickler_Write(self, pdata, 9) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002219 return -1;
Victor Stinner121aab42011-09-29 23:40:53 +02002220 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002221 else {
Eric Smith0923d1d2009-04-16 20:16:10 +00002222 int result = -1;
2223 char *buf = NULL;
2224 char op = FLOAT;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002225
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002226 if (_Pickler_Write(self, &op, 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00002227 goto done;
2228
Serhiy Storchakac86ca262015-02-15 14:18:32 +02002229 buf = PyOS_double_to_string(x, 'r', 0, Py_DTSF_ADD_DOT_0, NULL);
Eric Smith0923d1d2009-04-16 20:16:10 +00002230 if (!buf) {
2231 PyErr_NoMemory();
2232 goto done;
2233 }
2234
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002235 if (_Pickler_Write(self, buf, strlen(buf)) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00002236 goto done;
2237
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002238 if (_Pickler_Write(self, "\n", 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00002239 goto done;
2240
2241 result = 0;
2242done:
2243 PyMem_Free(buf);
2244 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002245 }
2246
2247 return 0;
2248}
2249
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002250/* Perform direct write of the header and payload of the binary object.
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01002251
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002252 The large contiguous data is written directly into the underlying file
2253 object, bypassing the output_buffer of the Pickler. We intentionally
2254 do not insert a protocol 4 frame opcode to make it possible to optimize
2255 file.read calls in the loader.
2256 */
2257static int
2258_Pickler_write_bytes(PicklerObject *self,
2259 const char *header, Py_ssize_t header_size,
2260 const char *data, Py_ssize_t data_size,
2261 PyObject *payload)
2262{
2263 int bypass_buffer = (data_size >= FRAME_SIZE_TARGET);
2264 int framing = self->framing;
2265
2266 if (bypass_buffer) {
2267 assert(self->output_buffer != NULL);
2268 /* Commit the previous frame. */
2269 if (_Pickler_CommitFrame(self)) {
2270 return -1;
2271 }
2272 /* Disable framing temporarily */
2273 self->framing = 0;
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01002274 }
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01002275
2276 if (_Pickler_Write(self, header, header_size) < 0) {
2277 return -1;
2278 }
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01002279
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002280 if (bypass_buffer && self->write != NULL) {
2281 /* Bypass the in-memory buffer to directly stream large data
2282 into the underlying file object. */
2283 PyObject *result, *mem = NULL;
2284 /* Dump the output buffer to the file. */
2285 if (_Pickler_FlushToFile(self) < 0) {
2286 return -1;
2287 }
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01002288
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002289 /* Stream write the payload into the file without going through the
2290 output buffer. */
2291 if (payload == NULL) {
Serhiy Storchaka5b76bdb2018-01-13 00:28:31 +02002292 /* TODO: It would be better to use a memoryview with a linked
2293 original string if this is possible. */
2294 payload = mem = PyBytes_FromStringAndSize(data, data_size);
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002295 if (payload == NULL) {
2296 return -1;
2297 }
2298 }
Jeroen Demeyer196a5302019-07-04 12:31:34 +02002299 result = _PyObject_CallOneArg(self->write, payload);
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002300 Py_XDECREF(mem);
2301 if (result == NULL) {
2302 return -1;
2303 }
2304 Py_DECREF(result);
2305
2306 /* Reinitialize the buffer for subsequent calls to _Pickler_Write. */
2307 if (_Pickler_ClearBuffer(self) < 0) {
2308 return -1;
2309 }
2310 }
2311 else {
2312 if (_Pickler_Write(self, data, data_size) < 0) {
2313 return -1;
2314 }
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01002315 }
2316
2317 /* Re-enable framing for subsequent calls to _Pickler_Write. */
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002318 self->framing = framing;
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01002319
2320 return 0;
2321}
2322
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002323static int
Antoine Pitrou91f43802019-05-26 17:10:09 +02002324_save_bytes_data(PicklerObject *self, PyObject *obj, const char *data,
2325 Py_ssize_t size)
2326{
2327 assert(self->proto >= 3);
2328
2329 char header[9];
2330 Py_ssize_t len;
2331
2332 if (size < 0)
2333 return -1;
2334
2335 if (size <= 0xff) {
2336 header[0] = SHORT_BINBYTES;
2337 header[1] = (unsigned char)size;
2338 len = 2;
2339 }
2340 else if ((size_t)size <= 0xffffffffUL) {
2341 header[0] = BINBYTES;
2342 header[1] = (unsigned char)(size & 0xff);
2343 header[2] = (unsigned char)((size >> 8) & 0xff);
2344 header[3] = (unsigned char)((size >> 16) & 0xff);
2345 header[4] = (unsigned char)((size >> 24) & 0xff);
2346 len = 5;
2347 }
2348 else if (self->proto >= 4) {
2349 header[0] = BINBYTES8;
2350 _write_size64(header + 1, size);
2351 len = 9;
2352 }
2353 else {
2354 PyErr_SetString(PyExc_OverflowError,
2355 "serializing a bytes object larger than 4 GiB "
2356 "requires pickle protocol 4 or higher");
2357 return -1;
2358 }
2359
2360 if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
2361 return -1;
2362 }
2363
2364 if (memo_put(self, obj) < 0) {
2365 return -1;
2366 }
2367
2368 return 0;
2369}
2370
2371static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002372save_bytes(PicklerObject *self, PyObject *obj)
2373{
2374 if (self->proto < 3) {
2375 /* Older pickle protocols do not have an opcode for pickling bytes
2376 objects. Therefore, we need to fake the copy protocol (i.e.,
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05002377 the __reduce__ method) to permit bytes object unpickling.
2378
2379 Here we use a hack to be compatible with Python 2. Since in Python
2380 2 'bytes' is just an alias for 'str' (which has different
2381 parameters than the actual bytes object), we use codecs.encode
2382 to create the appropriate 'str' object when unpickled using
2383 Python 2 *and* the appropriate 'bytes' object when unpickled
2384 using Python 3. Again this is a hack and we don't need to do this
2385 with newer protocols. */
Pierre Glaser289f1f82019-05-08 23:08:25 +02002386 PyObject *reduce_value;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002387 int status;
2388
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05002389 if (PyBytes_GET_SIZE(obj) == 0) {
2390 reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
2391 }
2392 else {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08002393 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05002394 PyObject *unicode_str =
2395 PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
2396 PyBytes_GET_SIZE(obj),
2397 "strict");
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002398 _Py_IDENTIFIER(latin1);
2399
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05002400 if (unicode_str == NULL)
2401 return -1;
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05002402 reduce_value = Py_BuildValue("(O(OO))",
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08002403 st->codecs_encode, unicode_str,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002404 _PyUnicode_FromId(&PyId_latin1));
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05002405 Py_DECREF(unicode_str);
2406 }
2407
2408 if (reduce_value == NULL)
2409 return -1;
2410
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002411 /* save_reduce() will memoize the object automatically. */
2412 status = save_reduce(self, reduce_value, obj);
2413 Py_DECREF(reduce_value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002414 return status;
2415 }
2416 else {
Antoine Pitrou91f43802019-05-26 17:10:09 +02002417 return _save_bytes_data(self, obj, PyBytes_AS_STRING(obj),
2418 PyBytes_GET_SIZE(obj));
2419 }
2420}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002421
Antoine Pitrou91f43802019-05-26 17:10:09 +02002422static int
2423_save_bytearray_data(PicklerObject *self, PyObject *obj, const char *data,
2424 Py_ssize_t size)
2425{
2426 assert(self->proto >= 5);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002427
Antoine Pitrou91f43802019-05-26 17:10:09 +02002428 char header[9];
2429 Py_ssize_t len;
2430
2431 if (size < 0)
2432 return -1;
2433
2434 header[0] = BYTEARRAY8;
2435 _write_size64(header + 1, size);
2436 len = 9;
2437
2438 if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
2439 return -1;
2440 }
2441
2442 if (memo_put(self, obj) < 0) {
2443 return -1;
2444 }
2445
2446 return 0;
2447}
2448
2449static int
2450save_bytearray(PicklerObject *self, PyObject *obj)
2451{
2452 if (self->proto < 5) {
2453 /* Older pickle protocols do not have an opcode for pickling
2454 * bytearrays. */
2455 PyObject *reduce_value = NULL;
2456 int status;
2457
2458 if (PyByteArray_GET_SIZE(obj) == 0) {
2459 reduce_value = Py_BuildValue("(O())",
2460 (PyObject *) &PyByteArray_Type);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002461 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002462 else {
Antoine Pitrou91f43802019-05-26 17:10:09 +02002463 PyObject *bytes_obj = PyBytes_FromObject(obj);
2464 if (bytes_obj != NULL) {
2465 reduce_value = Py_BuildValue("(O(O))",
2466 (PyObject *) &PyByteArray_Type,
2467 bytes_obj);
2468 Py_DECREF(bytes_obj);
2469 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002470 }
Antoine Pitrou91f43802019-05-26 17:10:09 +02002471 if (reduce_value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002472 return -1;
2473
Antoine Pitrou91f43802019-05-26 17:10:09 +02002474 /* save_reduce() will memoize the object automatically. */
2475 status = save_reduce(self, reduce_value, obj);
2476 Py_DECREF(reduce_value);
2477 return status;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002478 }
Antoine Pitrou91f43802019-05-26 17:10:09 +02002479 else {
2480 return _save_bytearray_data(self, obj, PyByteArray_AS_STRING(obj),
2481 PyByteArray_GET_SIZE(obj));
2482 }
2483}
2484
2485static int
2486save_picklebuffer(PicklerObject *self, PyObject *obj)
2487{
2488 if (self->proto < 5) {
2489 PickleState *st = _Pickle_GetGlobalState();
2490 PyErr_SetString(st->PicklingError,
2491 "PickleBuffer can only pickled with protocol >= 5");
2492 return -1;
2493 }
2494 const Py_buffer* view = PyPickleBuffer_GetBuffer(obj);
2495 if (view == NULL) {
2496 return -1;
2497 }
2498 if (view->suboffsets != NULL || !PyBuffer_IsContiguous(view, 'A')) {
2499 PickleState *st = _Pickle_GetGlobalState();
2500 PyErr_SetString(st->PicklingError,
2501 "PickleBuffer can not be pickled when "
2502 "pointing to a non-contiguous buffer");
2503 return -1;
2504 }
2505 int in_band = 1;
2506 if (self->buffer_callback != NULL) {
Jeroen Demeyer196a5302019-07-04 12:31:34 +02002507 PyObject *ret = _PyObject_CallOneArg(self->buffer_callback, obj);
Antoine Pitrou91f43802019-05-26 17:10:09 +02002508 if (ret == NULL) {
2509 return -1;
2510 }
2511 in_band = PyObject_IsTrue(ret);
2512 Py_DECREF(ret);
2513 if (in_band == -1) {
2514 return -1;
2515 }
2516 }
2517 if (in_band) {
2518 /* Write data in-band */
2519 if (view->readonly) {
2520 return _save_bytes_data(self, obj, (const char*) view->buf,
2521 view->len);
2522 }
2523 else {
2524 return _save_bytearray_data(self, obj, (const char*) view->buf,
2525 view->len);
2526 }
2527 }
2528 else {
2529 /* Write data out-of-band */
2530 const char next_buffer_op = NEXT_BUFFER;
2531 if (_Pickler_Write(self, &next_buffer_op, 1) < 0) {
2532 return -1;
2533 }
2534 if (view->readonly) {
2535 const char readonly_buffer_op = READONLY_BUFFER;
2536 if (_Pickler_Write(self, &readonly_buffer_op, 1) < 0) {
2537 return -1;
2538 }
2539 }
2540 }
2541 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002542}
2543
2544/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
2545 backslash and newline characters to \uXXXX escapes. */
2546static PyObject *
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002547raw_unicode_escape(PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002548{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002549 char *p;
Victor Stinner049e5092014-08-17 22:20:00 +02002550 Py_ssize_t i, size;
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002551 void *data;
2552 unsigned int kind;
Victor Stinner358af132015-10-12 22:36:57 +02002553 _PyBytesWriter writer;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002554
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002555 if (PyUnicode_READY(obj))
2556 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002557
Victor Stinner358af132015-10-12 22:36:57 +02002558 _PyBytesWriter_Init(&writer);
2559
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002560 size = PyUnicode_GET_LENGTH(obj);
2561 data = PyUnicode_DATA(obj);
2562 kind = PyUnicode_KIND(obj);
Victor Stinner121aab42011-09-29 23:40:53 +02002563
Victor Stinner358af132015-10-12 22:36:57 +02002564 p = _PyBytesWriter_Alloc(&writer, size);
2565 if (p == NULL)
2566 goto error;
2567 writer.overallocate = 1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002568
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002569 for (i=0; i < size; i++) {
2570 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002571 /* Map 32-bit characters to '\Uxxxxxxxx' */
2572 if (ch >= 0x10000) {
Raymond Hettinger15f44ab2016-08-30 10:47:49 -07002573 /* -1: subtract 1 preallocated byte */
Victor Stinner358af132015-10-12 22:36:57 +02002574 p = _PyBytesWriter_Prepare(&writer, p, 10-1);
2575 if (p == NULL)
2576 goto error;
2577
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002578 *p++ = '\\';
2579 *p++ = 'U';
Victor Stinnerf5cff562011-10-14 02:13:11 +02002580 *p++ = Py_hexdigits[(ch >> 28) & 0xf];
2581 *p++ = Py_hexdigits[(ch >> 24) & 0xf];
2582 *p++ = Py_hexdigits[(ch >> 20) & 0xf];
2583 *p++ = Py_hexdigits[(ch >> 16) & 0xf];
2584 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2585 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2586 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2587 *p++ = Py_hexdigits[ch & 15];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002588 }
Victor Stinner358af132015-10-12 22:36:57 +02002589 /* Map 16-bit characters, '\\' and '\n' to '\uxxxx' */
Serhiy Storchaka38ab7d42019-05-31 11:29:39 +03002590 else if (ch >= 256 ||
2591 ch == '\\' || ch == 0 || ch == '\n' || ch == '\r' ||
2592 ch == 0x1a)
2593 {
Raymond Hettinger15f44ab2016-08-30 10:47:49 -07002594 /* -1: subtract 1 preallocated byte */
Victor Stinner358af132015-10-12 22:36:57 +02002595 p = _PyBytesWriter_Prepare(&writer, p, 6-1);
2596 if (p == NULL)
2597 goto error;
2598
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002599 *p++ = '\\';
2600 *p++ = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +02002601 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2602 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2603 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2604 *p++ = Py_hexdigits[ch & 15];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002605 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00002606 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002607 else
2608 *p++ = (char) ch;
2609 }
Victor Stinner358af132015-10-12 22:36:57 +02002610
2611 return _PyBytesWriter_Finish(&writer, p);
2612
2613error:
2614 _PyBytesWriter_Dealloc(&writer);
2615 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002616}
2617
2618static int
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002619write_unicode_binary(PicklerObject *self, PyObject *obj)
Antoine Pitrou299978d2013-04-07 17:38:11 +02002620{
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002621 char header[9];
2622 Py_ssize_t len;
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002623 PyObject *encoded = NULL;
2624 Py_ssize_t size;
2625 const char *data;
2626
2627 if (PyUnicode_READY(obj))
2628 return -1;
2629
2630 data = PyUnicode_AsUTF8AndSize(obj, &size);
2631 if (data == NULL) {
2632 /* Issue #8383: for strings with lone surrogates, fallback on the
2633 "surrogatepass" error handler. */
2634 PyErr_Clear();
2635 encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
2636 if (encoded == NULL)
2637 return -1;
2638
2639 data = PyBytes_AS_STRING(encoded);
2640 size = PyBytes_GET_SIZE(encoded);
2641 }
Antoine Pitrou299978d2013-04-07 17:38:11 +02002642
Victor Stinnerf13c46c2014-08-17 21:05:55 +02002643 assert(size >= 0);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002644 if (size <= 0xff && self->proto >= 4) {
2645 header[0] = SHORT_BINUNICODE;
2646 header[1] = (unsigned char)(size & 0xff);
2647 len = 2;
2648 }
Victor Stinnerf13c46c2014-08-17 21:05:55 +02002649 else if ((size_t)size <= 0xffffffffUL) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002650 header[0] = BINUNICODE;
2651 header[1] = (unsigned char)(size & 0xff);
2652 header[2] = (unsigned char)((size >> 8) & 0xff);
2653 header[3] = (unsigned char)((size >> 16) & 0xff);
2654 header[4] = (unsigned char)((size >> 24) & 0xff);
2655 len = 5;
2656 }
2657 else if (self->proto >= 4) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002658 header[0] = BINUNICODE8;
Alexandre Vassalotti1048fb52013-11-25 11:35:46 -08002659 _write_size64(header + 1, size);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002660 len = 9;
2661 }
2662 else {
Antoine Pitrou299978d2013-04-07 17:38:11 +02002663 PyErr_SetString(PyExc_OverflowError,
Antoine Pitrou91f43802019-05-26 17:10:09 +02002664 "serializing a string larger than 4 GiB "
2665 "requires pickle protocol 4 or higher");
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002666 Py_XDECREF(encoded);
Antoine Pitrou299978d2013-04-07 17:38:11 +02002667 return -1;
2668 }
Antoine Pitrou299978d2013-04-07 17:38:11 +02002669
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002670 if (_Pickler_write_bytes(self, header, len, data, size, encoded) < 0) {
2671 Py_XDECREF(encoded);
2672 return -1;
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01002673 }
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002674 Py_XDECREF(encoded);
Antoine Pitrou299978d2013-04-07 17:38:11 +02002675 return 0;
2676}
2677
2678static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002679save_unicode(PicklerObject *self, PyObject *obj)
2680{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002681 if (self->bin) {
Antoine Pitrou299978d2013-04-07 17:38:11 +02002682 if (write_unicode_binary(self, obj) < 0)
2683 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002684 }
2685 else {
Antoine Pitrou299978d2013-04-07 17:38:11 +02002686 PyObject *encoded;
2687 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002688 const char unicode_op = UNICODE;
2689
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002690 encoded = raw_unicode_escape(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002691 if (encoded == NULL)
Antoine Pitrou299978d2013-04-07 17:38:11 +02002692 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002693
Antoine Pitrou299978d2013-04-07 17:38:11 +02002694 if (_Pickler_Write(self, &unicode_op, 1) < 0) {
2695 Py_DECREF(encoded);
2696 return -1;
2697 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002698
2699 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrou299978d2013-04-07 17:38:11 +02002700 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) {
2701 Py_DECREF(encoded);
2702 return -1;
2703 }
2704 Py_DECREF(encoded);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002705
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002706 if (_Pickler_Write(self, "\n", 1) < 0)
Antoine Pitrou299978d2013-04-07 17:38:11 +02002707 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002708 }
2709 if (memo_put(self, obj) < 0)
Antoine Pitrou299978d2013-04-07 17:38:11 +02002710 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002711
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002712 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002713}
2714
2715/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
2716static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002717store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002718{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002719 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002720
2721 assert(PyTuple_Size(t) == len);
2722
2723 for (i = 0; i < len; i++) {
2724 PyObject *element = PyTuple_GET_ITEM(t, i);
2725
2726 if (element == NULL)
2727 return -1;
2728 if (save(self, element, 0) < 0)
2729 return -1;
2730 }
2731
2732 return 0;
2733}
2734
2735/* Tuples are ubiquitous in the pickle protocols, so many techniques are
2736 * used across protocols to minimize the space needed to pickle them.
2737 * Tuples are also the only builtin immutable type that can be recursive
2738 * (a tuple can be reached from itself), and that requires some subtle
2739 * magic so that it works in all cases. IOW, this is a long routine.
2740 */
2741static int
2742save_tuple(PicklerObject *self, PyObject *obj)
2743{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002744 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002745
2746 const char mark_op = MARK;
2747 const char tuple_op = TUPLE;
2748 const char pop_op = POP;
2749 const char pop_mark_op = POP_MARK;
2750 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
2751
2752 if ((len = PyTuple_Size(obj)) < 0)
2753 return -1;
2754
2755 if (len == 0) {
2756 char pdata[2];
2757
2758 if (self->proto) {
2759 pdata[0] = EMPTY_TUPLE;
2760 len = 1;
2761 }
2762 else {
2763 pdata[0] = MARK;
2764 pdata[1] = TUPLE;
2765 len = 2;
2766 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002767 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002768 return -1;
2769 return 0;
2770 }
2771
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002772 /* The tuple isn't in the memo now. If it shows up there after
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002773 * saving the tuple elements, the tuple must be recursive, in
2774 * which case we'll pop everything we put on the stack, and fetch
2775 * its value from the memo.
2776 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002777 if (len <= 3 && self->proto >= 2) {
2778 /* Use TUPLE{1,2,3} opcodes. */
2779 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002780 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002781
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002782 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002783 /* pop the len elements */
2784 for (i = 0; i < len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002785 if (_Pickler_Write(self, &pop_op, 1) < 0)
2786 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002787 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002788 if (memo_get(self, obj) < 0)
2789 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002790
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002791 return 0;
2792 }
2793 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002794 if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2795 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002796 }
2797 goto memoize;
2798 }
2799
2800 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2801 * Generate MARK e1 e2 ... TUPLE
2802 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002803 if (_Pickler_Write(self, &mark_op, 1) < 0)
2804 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002805
2806 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002807 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002808
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002809 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002810 /* pop the stack stuff we pushed */
2811 if (self->bin) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002812 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2813 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002814 }
2815 else {
2816 /* Note that we pop one more than len, to remove
2817 * the MARK too.
2818 */
2819 for (i = 0; i <= len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002820 if (_Pickler_Write(self, &pop_op, 1) < 0)
2821 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002822 }
2823 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002824 if (memo_get(self, obj) < 0)
2825 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002826
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002827 return 0;
2828 }
2829 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002830 if (_Pickler_Write(self, &tuple_op, 1) < 0)
2831 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002832 }
2833
2834 memoize:
2835 if (memo_put(self, obj) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002836 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002837
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002838 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002839}
2840
2841/* iter is an iterator giving items, and we batch up chunks of
2842 * MARK item item ... item APPENDS
2843 * opcode sequences. Calling code should have arranged to first create an
2844 * empty list, or list-like object, for the APPENDS to operate on.
2845 * Returns 0 on success, <0 on error.
2846 */
2847static int
2848batch_list(PicklerObject *self, PyObject *iter)
2849{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002850 PyObject *obj = NULL;
2851 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002852 int i, n;
2853
2854 const char mark_op = MARK;
2855 const char append_op = APPEND;
2856 const char appends_op = APPENDS;
2857
2858 assert(iter != NULL);
2859
2860 /* XXX: I think this function could be made faster by avoiding the
2861 iterator interface and fetching objects directly from list using
2862 PyList_GET_ITEM.
2863 */
2864
2865 if (self->proto == 0) {
2866 /* APPENDS isn't available; do one at a time. */
2867 for (;;) {
2868 obj = PyIter_Next(iter);
2869 if (obj == NULL) {
2870 if (PyErr_Occurred())
2871 return -1;
2872 break;
2873 }
2874 i = save(self, obj, 0);
2875 Py_DECREF(obj);
2876 if (i < 0)
2877 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002878 if (_Pickler_Write(self, &append_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002879 return -1;
2880 }
2881 return 0;
2882 }
2883
2884 /* proto > 0: write in batches of BATCHSIZE. */
2885 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002886 /* Get first item */
2887 firstitem = PyIter_Next(iter);
2888 if (firstitem == NULL) {
2889 if (PyErr_Occurred())
2890 goto error;
2891
2892 /* nothing more to add */
2893 break;
2894 }
2895
2896 /* Try to get a second item */
2897 obj = PyIter_Next(iter);
2898 if (obj == NULL) {
2899 if (PyErr_Occurred())
2900 goto error;
2901
2902 /* Only one item to write */
2903 if (save(self, firstitem, 0) < 0)
2904 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002905 if (_Pickler_Write(self, &append_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002906 goto error;
2907 Py_CLEAR(firstitem);
2908 break;
2909 }
2910
2911 /* More than one item to write */
2912
2913 /* Pump out MARK, items, APPENDS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002914 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002915 goto error;
2916
2917 if (save(self, firstitem, 0) < 0)
2918 goto error;
2919 Py_CLEAR(firstitem);
2920 n = 1;
2921
2922 /* Fetch and save up to BATCHSIZE items */
2923 while (obj) {
2924 if (save(self, obj, 0) < 0)
2925 goto error;
2926 Py_CLEAR(obj);
2927 n += 1;
2928
2929 if (n == BATCHSIZE)
2930 break;
2931
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002932 obj = PyIter_Next(iter);
2933 if (obj == NULL) {
2934 if (PyErr_Occurred())
2935 goto error;
2936 break;
2937 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002938 }
2939
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002940 if (_Pickler_Write(self, &appends_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002941 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002942
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002943 } while (n == BATCHSIZE);
2944 return 0;
2945
2946 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002947 Py_XDECREF(firstitem);
2948 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002949 return -1;
2950}
2951
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002952/* This is a variant of batch_list() above, specialized for lists (with no
2953 * support for list subclasses). Like batch_list(), we batch up chunks of
2954 * MARK item item ... item APPENDS
2955 * opcode sequences. Calling code should have arranged to first create an
2956 * empty list, or list-like object, for the APPENDS to operate on.
2957 * Returns 0 on success, -1 on error.
2958 *
2959 * This version is considerably faster than batch_list(), if less general.
2960 *
2961 * Note that this only works for protocols > 0.
2962 */
2963static int
2964batch_list_exact(PicklerObject *self, PyObject *obj)
2965{
2966 PyObject *item = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002967 Py_ssize_t this_batch, total;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002968
2969 const char append_op = APPEND;
2970 const char appends_op = APPENDS;
2971 const char mark_op = MARK;
2972
2973 assert(obj != NULL);
2974 assert(self->proto > 0);
2975 assert(PyList_CheckExact(obj));
2976
2977 if (PyList_GET_SIZE(obj) == 1) {
2978 item = PyList_GET_ITEM(obj, 0);
2979 if (save(self, item, 0) < 0)
2980 return -1;
2981 if (_Pickler_Write(self, &append_op, 1) < 0)
2982 return -1;
2983 return 0;
2984 }
2985
2986 /* Write in batches of BATCHSIZE. */
2987 total = 0;
2988 do {
2989 this_batch = 0;
2990 if (_Pickler_Write(self, &mark_op, 1) < 0)
2991 return -1;
2992 while (total < PyList_GET_SIZE(obj)) {
2993 item = PyList_GET_ITEM(obj, total);
2994 if (save(self, item, 0) < 0)
2995 return -1;
2996 total++;
2997 if (++this_batch == BATCHSIZE)
2998 break;
2999 }
3000 if (_Pickler_Write(self, &appends_op, 1) < 0)
3001 return -1;
3002
3003 } while (total < PyList_GET_SIZE(obj));
3004
3005 return 0;
3006}
3007
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003008static int
3009save_list(PicklerObject *self, PyObject *obj)
3010{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003011 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003012 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003013 int status = 0;
3014
3015 if (self->fast && !fast_save_enter(self, obj))
3016 goto error;
3017
3018 /* Create an empty list. */
3019 if (self->bin) {
3020 header[0] = EMPTY_LIST;
3021 len = 1;
3022 }
3023 else {
3024 header[0] = MARK;
3025 header[1] = LIST;
3026 len = 2;
3027 }
3028
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003029 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003030 goto error;
3031
3032 /* Get list length, and bow out early if empty. */
3033 if ((len = PyList_Size(obj)) < 0)
3034 goto error;
3035
3036 if (memo_put(self, obj) < 0)
3037 goto error;
3038
3039 if (len != 0) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003040 /* Materialize the list elements. */
3041 if (PyList_CheckExact(obj) && self->proto > 0) {
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003042 if (Py_EnterRecursiveCall(" while pickling an object"))
3043 goto error;
3044 status = batch_list_exact(self, obj);
3045 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003046 } else {
3047 PyObject *iter = PyObject_GetIter(obj);
3048 if (iter == NULL)
3049 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003050
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003051 if (Py_EnterRecursiveCall(" while pickling an object")) {
3052 Py_DECREF(iter);
3053 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003054 }
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003055 status = batch_list(self, iter);
3056 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003057 Py_DECREF(iter);
3058 }
3059 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003060 if (0) {
3061 error:
3062 status = -1;
3063 }
3064
3065 if (self->fast && !fast_save_leave(self, obj))
3066 status = -1;
3067
3068 return status;
3069}
3070
3071/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
3072 * MARK key value ... key value SETITEMS
3073 * opcode sequences. Calling code should have arranged to first create an
3074 * empty dict, or dict-like object, for the SETITEMS to operate on.
3075 * Returns 0 on success, <0 on error.
3076 *
3077 * This is very much like batch_list(). The difference between saving
3078 * elements directly, and picking apart two-tuples, is so long-winded at
3079 * the C level, though, that attempts to combine these routines were too
3080 * ugly to bear.
3081 */
3082static int
3083batch_dict(PicklerObject *self, PyObject *iter)
3084{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00003085 PyObject *obj = NULL;
3086 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003087 int i, n;
3088
3089 const char mark_op = MARK;
3090 const char setitem_op = SETITEM;
3091 const char setitems_op = SETITEMS;
3092
3093 assert(iter != NULL);
3094
3095 if (self->proto == 0) {
3096 /* SETITEMS isn't available; do one at a time. */
3097 for (;;) {
3098 obj = PyIter_Next(iter);
3099 if (obj == NULL) {
3100 if (PyErr_Occurred())
3101 return -1;
3102 break;
3103 }
3104 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
3105 PyErr_SetString(PyExc_TypeError, "dict items "
3106 "iterator must return 2-tuples");
3107 return -1;
3108 }
3109 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
3110 if (i >= 0)
3111 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
3112 Py_DECREF(obj);
3113 if (i < 0)
3114 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003115 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003116 return -1;
3117 }
3118 return 0;
3119 }
3120
3121 /* proto > 0: write in batches of BATCHSIZE. */
3122 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00003123 /* Get first item */
3124 firstitem = PyIter_Next(iter);
3125 if (firstitem == NULL) {
3126 if (PyErr_Occurred())
3127 goto error;
3128
3129 /* nothing more to add */
3130 break;
3131 }
3132 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
3133 PyErr_SetString(PyExc_TypeError, "dict items "
3134 "iterator must return 2-tuples");
3135 goto error;
3136 }
3137
3138 /* Try to get a second item */
3139 obj = PyIter_Next(iter);
3140 if (obj == NULL) {
3141 if (PyErr_Occurred())
3142 goto error;
3143
3144 /* Only one item to write */
3145 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
3146 goto error;
3147 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
3148 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003149 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00003150 goto error;
3151 Py_CLEAR(firstitem);
3152 break;
3153 }
3154
3155 /* More than one item to write */
3156
3157 /* Pump out MARK, items, SETITEMS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003158 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00003159 goto error;
3160
3161 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
3162 goto error;
3163 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
3164 goto error;
3165 Py_CLEAR(firstitem);
3166 n = 1;
3167
3168 /* Fetch and save up to BATCHSIZE items */
3169 while (obj) {
3170 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
3171 PyErr_SetString(PyExc_TypeError, "dict items "
3172 "iterator must return 2-tuples");
3173 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003174 }
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00003175 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
3176 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
3177 goto error;
3178 Py_CLEAR(obj);
3179 n += 1;
3180
3181 if (n == BATCHSIZE)
3182 break;
3183
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003184 obj = PyIter_Next(iter);
3185 if (obj == NULL) {
3186 if (PyErr_Occurred())
3187 goto error;
3188 break;
3189 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003190 }
3191
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003192 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00003193 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003194
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003195 } while (n == BATCHSIZE);
3196 return 0;
3197
3198 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00003199 Py_XDECREF(firstitem);
3200 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003201 return -1;
3202}
3203
Collin Winter5c9b02d2009-05-25 05:43:30 +00003204/* This is a variant of batch_dict() above that specializes for dicts, with no
3205 * support for dict subclasses. Like batch_dict(), we batch up chunks of
3206 * MARK key value ... key value SETITEMS
3207 * opcode sequences. Calling code should have arranged to first create an
3208 * empty dict, or dict-like object, for the SETITEMS to operate on.
3209 * Returns 0 on success, -1 on error.
3210 *
3211 * Note that this currently doesn't work for protocol 0.
3212 */
3213static int
3214batch_dict_exact(PicklerObject *self, PyObject *obj)
3215{
3216 PyObject *key = NULL, *value = NULL;
3217 int i;
3218 Py_ssize_t dict_size, ppos = 0;
3219
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00003220 const char mark_op = MARK;
3221 const char setitem_op = SETITEM;
3222 const char setitems_op = SETITEMS;
Collin Winter5c9b02d2009-05-25 05:43:30 +00003223
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +02003224 assert(obj != NULL && PyDict_CheckExact(obj));
Collin Winter5c9b02d2009-05-25 05:43:30 +00003225 assert(self->proto > 0);
3226
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +02003227 dict_size = PyDict_GET_SIZE(obj);
Collin Winter5c9b02d2009-05-25 05:43:30 +00003228
3229 /* Special-case len(d) == 1 to save space. */
3230 if (dict_size == 1) {
3231 PyDict_Next(obj, &ppos, &key, &value);
3232 if (save(self, key, 0) < 0)
3233 return -1;
3234 if (save(self, value, 0) < 0)
3235 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003236 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00003237 return -1;
3238 return 0;
3239 }
3240
3241 /* Write in batches of BATCHSIZE. */
3242 do {
3243 i = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003244 if (_Pickler_Write(self, &mark_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00003245 return -1;
3246 while (PyDict_Next(obj, &ppos, &key, &value)) {
3247 if (save(self, key, 0) < 0)
3248 return -1;
3249 if (save(self, value, 0) < 0)
3250 return -1;
3251 if (++i == BATCHSIZE)
3252 break;
3253 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003254 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00003255 return -1;
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +02003256 if (PyDict_GET_SIZE(obj) != dict_size) {
Collin Winter5c9b02d2009-05-25 05:43:30 +00003257 PyErr_Format(
3258 PyExc_RuntimeError,
3259 "dictionary changed size during iteration");
3260 return -1;
3261 }
3262
3263 } while (i == BATCHSIZE);
3264 return 0;
3265}
3266
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003267static int
3268save_dict(PicklerObject *self, PyObject *obj)
3269{
3270 PyObject *items, *iter;
3271 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003272 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003273 int status = 0;
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +02003274 assert(PyDict_Check(obj));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003275
3276 if (self->fast && !fast_save_enter(self, obj))
3277 goto error;
3278
3279 /* Create an empty dict. */
3280 if (self->bin) {
3281 header[0] = EMPTY_DICT;
3282 len = 1;
3283 }
3284 else {
3285 header[0] = MARK;
3286 header[1] = DICT;
3287 len = 2;
3288 }
3289
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003290 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003291 goto error;
3292
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003293 if (memo_put(self, obj) < 0)
3294 goto error;
3295
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +02003296 if (PyDict_GET_SIZE(obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003297 /* Save the dict items. */
Collin Winter5c9b02d2009-05-25 05:43:30 +00003298 if (PyDict_CheckExact(obj) && self->proto > 0) {
3299 /* We can take certain shortcuts if we know this is a dict and
3300 not a dict subclass. */
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003301 if (Py_EnterRecursiveCall(" while pickling an object"))
3302 goto error;
3303 status = batch_dict_exact(self, obj);
3304 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00003305 } else {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02003306 _Py_IDENTIFIER(items);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02003307
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003308 items = _PyObject_CallMethodIdNoArgs(obj, &PyId_items);
Collin Winter5c9b02d2009-05-25 05:43:30 +00003309 if (items == NULL)
3310 goto error;
3311 iter = PyObject_GetIter(items);
3312 Py_DECREF(items);
3313 if (iter == NULL)
3314 goto error;
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003315 if (Py_EnterRecursiveCall(" while pickling an object")) {
3316 Py_DECREF(iter);
3317 goto error;
3318 }
Collin Winter5c9b02d2009-05-25 05:43:30 +00003319 status = batch_dict(self, iter);
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003320 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00003321 Py_DECREF(iter);
3322 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003323 }
3324
3325 if (0) {
3326 error:
3327 status = -1;
3328 }
3329
3330 if (self->fast && !fast_save_leave(self, obj))
3331 status = -1;
3332
3333 return status;
3334}
3335
3336static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003337save_set(PicklerObject *self, PyObject *obj)
3338{
3339 PyObject *item;
3340 int i;
3341 Py_ssize_t set_size, ppos = 0;
3342 Py_hash_t hash;
3343
3344 const char empty_set_op = EMPTY_SET;
3345 const char mark_op = MARK;
3346 const char additems_op = ADDITEMS;
3347
3348 if (self->proto < 4) {
3349 PyObject *items;
3350 PyObject *reduce_value;
3351 int status;
3352
3353 items = PySequence_List(obj);
3354 if (items == NULL) {
3355 return -1;
3356 }
3357 reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PySet_Type, items);
3358 Py_DECREF(items);
3359 if (reduce_value == NULL) {
3360 return -1;
3361 }
3362 /* save_reduce() will memoize the object automatically. */
3363 status = save_reduce(self, reduce_value, obj);
3364 Py_DECREF(reduce_value);
3365 return status;
3366 }
3367
3368 if (_Pickler_Write(self, &empty_set_op, 1) < 0)
3369 return -1;
3370
3371 if (memo_put(self, obj) < 0)
3372 return -1;
3373
3374 set_size = PySet_GET_SIZE(obj);
3375 if (set_size == 0)
3376 return 0; /* nothing to do */
3377
3378 /* Write in batches of BATCHSIZE. */
3379 do {
3380 i = 0;
3381 if (_Pickler_Write(self, &mark_op, 1) < 0)
3382 return -1;
3383 while (_PySet_NextEntry(obj, &ppos, &item, &hash)) {
3384 if (save(self, item, 0) < 0)
3385 return -1;
3386 if (++i == BATCHSIZE)
3387 break;
3388 }
3389 if (_Pickler_Write(self, &additems_op, 1) < 0)
3390 return -1;
3391 if (PySet_GET_SIZE(obj) != set_size) {
3392 PyErr_Format(
3393 PyExc_RuntimeError,
3394 "set changed size during iteration");
3395 return -1;
3396 }
3397 } while (i == BATCHSIZE);
3398
3399 return 0;
3400}
3401
3402static int
3403save_frozenset(PicklerObject *self, PyObject *obj)
3404{
3405 PyObject *iter;
3406
3407 const char mark_op = MARK;
3408 const char frozenset_op = FROZENSET;
3409
3410 if (self->fast && !fast_save_enter(self, obj))
3411 return -1;
3412
3413 if (self->proto < 4) {
3414 PyObject *items;
3415 PyObject *reduce_value;
3416 int status;
3417
3418 items = PySequence_List(obj);
3419 if (items == NULL) {
3420 return -1;
3421 }
3422 reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PyFrozenSet_Type,
3423 items);
3424 Py_DECREF(items);
3425 if (reduce_value == NULL) {
3426 return -1;
3427 }
3428 /* save_reduce() will memoize the object automatically. */
3429 status = save_reduce(self, reduce_value, obj);
3430 Py_DECREF(reduce_value);
3431 return status;
3432 }
3433
3434 if (_Pickler_Write(self, &mark_op, 1) < 0)
3435 return -1;
3436
3437 iter = PyObject_GetIter(obj);
Christian Heimesb3d3ee42013-11-23 21:01:40 +01003438 if (iter == NULL) {
Christian Heimes74d8d632013-11-23 21:05:31 +01003439 return -1;
Christian Heimesb3d3ee42013-11-23 21:01:40 +01003440 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003441 for (;;) {
3442 PyObject *item;
3443
3444 item = PyIter_Next(iter);
3445 if (item == NULL) {
3446 if (PyErr_Occurred()) {
3447 Py_DECREF(iter);
3448 return -1;
3449 }
3450 break;
3451 }
3452 if (save(self, item, 0) < 0) {
3453 Py_DECREF(item);
3454 Py_DECREF(iter);
3455 return -1;
3456 }
3457 Py_DECREF(item);
3458 }
3459 Py_DECREF(iter);
3460
3461 /* If the object is already in the memo, this means it is
3462 recursive. In this case, throw away everything we put on the
3463 stack, and fetch the object back from the memo. */
3464 if (PyMemoTable_Get(self->memo, obj)) {
3465 const char pop_mark_op = POP_MARK;
3466
3467 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
3468 return -1;
3469 if (memo_get(self, obj) < 0)
3470 return -1;
3471 return 0;
3472 }
3473
3474 if (_Pickler_Write(self, &frozenset_op, 1) < 0)
3475 return -1;
3476 if (memo_put(self, obj) < 0)
3477 return -1;
3478
3479 return 0;
3480}
3481
3482static int
3483fix_imports(PyObject **module_name, PyObject **global_name)
3484{
3485 PyObject *key;
3486 PyObject *item;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003487 PickleState *st = _Pickle_GetGlobalState();
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003488
3489 key = PyTuple_Pack(2, *module_name, *global_name);
3490 if (key == NULL)
3491 return -1;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003492 item = PyDict_GetItemWithError(st->name_mapping_3to2, key);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003493 Py_DECREF(key);
3494 if (item) {
3495 PyObject *fixed_module_name;
3496 PyObject *fixed_global_name;
3497
3498 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
3499 PyErr_Format(PyExc_RuntimeError,
3500 "_compat_pickle.REVERSE_NAME_MAPPING values "
3501 "should be 2-tuples, not %.200s",
3502 Py_TYPE(item)->tp_name);
3503 return -1;
3504 }
3505 fixed_module_name = PyTuple_GET_ITEM(item, 0);
3506 fixed_global_name = PyTuple_GET_ITEM(item, 1);
3507 if (!PyUnicode_Check(fixed_module_name) ||
3508 !PyUnicode_Check(fixed_global_name)) {
3509 PyErr_Format(PyExc_RuntimeError,
3510 "_compat_pickle.REVERSE_NAME_MAPPING values "
3511 "should be pairs of str, not (%.200s, %.200s)",
3512 Py_TYPE(fixed_module_name)->tp_name,
3513 Py_TYPE(fixed_global_name)->tp_name);
3514 return -1;
3515 }
3516
3517 Py_CLEAR(*module_name);
3518 Py_CLEAR(*global_name);
3519 Py_INCREF(fixed_module_name);
3520 Py_INCREF(fixed_global_name);
3521 *module_name = fixed_module_name;
3522 *global_name = fixed_global_name;
Serhiy Storchakabfe18242015-03-31 13:12:37 +03003523 return 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003524 }
3525 else if (PyErr_Occurred()) {
3526 return -1;
3527 }
3528
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003529 item = PyDict_GetItemWithError(st->import_mapping_3to2, *module_name);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003530 if (item) {
3531 if (!PyUnicode_Check(item)) {
3532 PyErr_Format(PyExc_RuntimeError,
3533 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
3534 "should be strings, not %.200s",
3535 Py_TYPE(item)->tp_name);
3536 return -1;
3537 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003538 Py_INCREF(item);
Serhiy Storchaka48842712016-04-06 09:45:48 +03003539 Py_XSETREF(*module_name, item);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003540 }
3541 else if (PyErr_Occurred()) {
3542 return -1;
3543 }
3544
3545 return 0;
3546}
3547
3548static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003549save_global(PicklerObject *self, PyObject *obj, PyObject *name)
3550{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003551 PyObject *global_name = NULL;
3552 PyObject *module_name = NULL;
3553 PyObject *module = NULL;
Serhiy Storchaka58e41342015-03-31 14:07:24 +03003554 PyObject *parent = NULL;
3555 PyObject *dotted_path = NULL;
3556 PyObject *lastname = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003557 PyObject *cls;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003558 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003559 int status = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003560 _Py_IDENTIFIER(__name__);
3561 _Py_IDENTIFIER(__qualname__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003562
3563 const char global_op = GLOBAL;
3564
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003565 if (name) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003566 Py_INCREF(name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003567 global_name = name;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003568 }
3569 else {
Serhiy Storchakaf320be72018-01-25 10:49:40 +02003570 if (_PyObject_LookupAttrId(obj, &PyId___qualname__, &global_name) < 0)
3571 goto error;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003572 if (global_name == NULL) {
3573 global_name = _PyObject_GetAttrId(obj, &PyId___name__);
3574 if (global_name == NULL)
3575 goto error;
3576 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003577 }
3578
Serhiy Storchaka58e41342015-03-31 14:07:24 +03003579 dotted_path = get_dotted_path(module, global_name);
3580 if (dotted_path == NULL)
3581 goto error;
3582 module_name = whichmodule(obj, dotted_path);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003583 if (module_name == NULL)
3584 goto error;
3585
3586 /* XXX: Change to use the import C API directly with level=0 to disallow
3587 relative imports.
3588
3589 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
3590 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
3591 custom import functions (IMHO, this would be a nice security
3592 feature). The import C API would need to be extended to support the
3593 extra parameters of __import__ to fix that. */
3594 module = PyImport_Import(module_name);
3595 if (module == NULL) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003596 PyErr_Format(st->PicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003597 "Can't pickle %R: import of module %R failed",
3598 obj, module_name);
3599 goto error;
3600 }
Serhiy Storchaka58e41342015-03-31 14:07:24 +03003601 lastname = PyList_GET_ITEM(dotted_path, PyList_GET_SIZE(dotted_path)-1);
3602 Py_INCREF(lastname);
3603 cls = get_deep_attribute(module, dotted_path, &parent);
3604 Py_CLEAR(dotted_path);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003605 if (cls == NULL) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003606 PyErr_Format(st->PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003607 "Can't pickle %R: attribute lookup %S on %S failed",
3608 obj, global_name, module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003609 goto error;
3610 }
3611 if (cls != obj) {
3612 Py_DECREF(cls);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003613 PyErr_Format(st->PicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003614 "Can't pickle %R: it's not the same object as %S.%S",
3615 obj, module_name, global_name);
3616 goto error;
3617 }
3618 Py_DECREF(cls);
3619
3620 if (self->proto >= 2) {
3621 /* See whether this is in the extension registry, and if
3622 * so generate an EXT opcode.
3623 */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003624 PyObject *extension_key;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003625 PyObject *code_obj; /* extension code as Python object */
3626 long code; /* extension code as C value */
3627 char pdata[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003628 Py_ssize_t n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003629
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003630 extension_key = PyTuple_Pack(2, module_name, global_name);
3631 if (extension_key == NULL) {
3632 goto error;
3633 }
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08003634 code_obj = PyDict_GetItemWithError(st->extension_registry,
3635 extension_key);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003636 Py_DECREF(extension_key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003637 /* The object is not registered in the extension registry.
3638 This is the most likely code path. */
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08003639 if (code_obj == NULL) {
3640 if (PyErr_Occurred()) {
3641 goto error;
3642 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003643 goto gen_global;
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08003644 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003645
3646 /* XXX: pickle.py doesn't check neither the type, nor the range
3647 of the value returned by the extension_registry. It should for
3648 consistency. */
3649
3650 /* Verify code_obj has the right type and value. */
3651 if (!PyLong_Check(code_obj)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003652 PyErr_Format(st->PicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003653 "Can't pickle %R: extension code %R isn't an integer",
3654 obj, code_obj);
3655 goto error;
3656 }
3657 code = PyLong_AS_LONG(code_obj);
3658 if (code <= 0 || code > 0x7fffffffL) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003659 if (!PyErr_Occurred())
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003660 PyErr_Format(st->PicklingError, "Can't pickle %R: extension "
3661 "code %ld is out of range", obj, code);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003662 goto error;
3663 }
3664
3665 /* Generate an EXT opcode. */
3666 if (code <= 0xff) {
3667 pdata[0] = EXT1;
3668 pdata[1] = (unsigned char)code;
3669 n = 2;
3670 }
3671 else if (code <= 0xffff) {
3672 pdata[0] = EXT2;
3673 pdata[1] = (unsigned char)(code & 0xff);
3674 pdata[2] = (unsigned char)((code >> 8) & 0xff);
3675 n = 3;
3676 }
3677 else {
3678 pdata[0] = EXT4;
3679 pdata[1] = (unsigned char)(code & 0xff);
3680 pdata[2] = (unsigned char)((code >> 8) & 0xff);
3681 pdata[3] = (unsigned char)((code >> 16) & 0xff);
3682 pdata[4] = (unsigned char)((code >> 24) & 0xff);
3683 n = 5;
3684 }
3685
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003686 if (_Pickler_Write(self, pdata, n) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003687 goto error;
3688 }
3689 else {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003690 gen_global:
Serhiy Storchaka58e41342015-03-31 14:07:24 +03003691 if (parent == module) {
3692 Py_INCREF(lastname);
3693 Py_DECREF(global_name);
3694 global_name = lastname;
3695 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003696 if (self->proto >= 4) {
3697 const char stack_global_op = STACK_GLOBAL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003698
Christian Heimese8b1ba12013-11-23 21:13:39 +01003699 if (save(self, module_name, 0) < 0)
3700 goto error;
3701 if (save(self, global_name, 0) < 0)
3702 goto error;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003703
3704 if (_Pickler_Write(self, &stack_global_op, 1) < 0)
3705 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003706 }
Serhiy Storchaka58e41342015-03-31 14:07:24 +03003707 else if (parent != module) {
3708 PickleState *st = _Pickle_GetGlobalState();
3709 PyObject *reduce_value = Py_BuildValue("(O(OO))",
3710 st->getattr, parent, lastname);
Alexey Izbyshevf8c06b02018-08-22 07:51:25 +03003711 if (reduce_value == NULL)
3712 goto error;
Serhiy Storchaka58e41342015-03-31 14:07:24 +03003713 status = save_reduce(self, reduce_value, NULL);
3714 Py_DECREF(reduce_value);
3715 if (status < 0)
3716 goto error;
3717 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003718 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003719 /* Generate a normal global opcode if we are using a pickle
3720 protocol < 4, or if the object is not registered in the
3721 extension registry. */
3722 PyObject *encoded;
3723 PyObject *(*unicode_encoder)(PyObject *);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003724
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003725 if (_Pickler_Write(self, &global_op, 1) < 0)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003726 goto error;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003727
3728 /* For protocol < 3 and if the user didn't request against doing
3729 so, we convert module names to the old 2.x module names. */
3730 if (self->proto < 3 && self->fix_imports) {
3731 if (fix_imports(&module_name, &global_name) < 0) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003732 goto error;
3733 }
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003734 }
3735
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003736 /* Since Python 3.0 now supports non-ASCII identifiers, we encode
3737 both the module name and the global name using UTF-8. We do so
3738 only when we are using the pickle protocol newer than version
3739 3. This is to ensure compatibility with older Unpickler running
3740 on Python 2.x. */
3741 if (self->proto == 3) {
3742 unicode_encoder = PyUnicode_AsUTF8String;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003743 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003744 else {
3745 unicode_encoder = PyUnicode_AsASCIIString;
3746 }
3747 encoded = unicode_encoder(module_name);
3748 if (encoded == NULL) {
3749 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003750 PyErr_Format(st->PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003751 "can't pickle module identifier '%S' using "
3752 "pickle protocol %i",
3753 module_name, self->proto);
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003754 goto error;
3755 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003756 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3757 PyBytes_GET_SIZE(encoded)) < 0) {
3758 Py_DECREF(encoded);
3759 goto error;
3760 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003761 Py_DECREF(encoded);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003762 if(_Pickler_Write(self, "\n", 1) < 0)
3763 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003764
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003765 /* Save the name of the module. */
3766 encoded = unicode_encoder(global_name);
3767 if (encoded == NULL) {
3768 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003769 PyErr_Format(st->PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003770 "can't pickle global identifier '%S' using "
3771 "pickle protocol %i",
3772 global_name, self->proto);
3773 goto error;
3774 }
3775 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3776 PyBytes_GET_SIZE(encoded)) < 0) {
3777 Py_DECREF(encoded);
3778 goto error;
3779 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003780 Py_DECREF(encoded);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003781 if (_Pickler_Write(self, "\n", 1) < 0)
3782 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003783 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003784 /* Memoize the object. */
3785 if (memo_put(self, obj) < 0)
3786 goto error;
3787 }
3788
3789 if (0) {
3790 error:
3791 status = -1;
3792 }
3793 Py_XDECREF(module_name);
3794 Py_XDECREF(global_name);
3795 Py_XDECREF(module);
Serhiy Storchaka58e41342015-03-31 14:07:24 +03003796 Py_XDECREF(parent);
3797 Py_XDECREF(dotted_path);
3798 Py_XDECREF(lastname);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003799
3800 return status;
3801}
3802
3803static int
Alexandre Vassalotti19b6fa62013-11-30 16:06:39 -08003804save_singleton_type(PicklerObject *self, PyObject *obj, PyObject *singleton)
3805{
3806 PyObject *reduce_value;
3807 int status;
3808
3809 reduce_value = Py_BuildValue("O(O)", &PyType_Type, singleton);
3810 if (reduce_value == NULL) {
3811 return -1;
3812 }
3813 status = save_reduce(self, reduce_value, obj);
3814 Py_DECREF(reduce_value);
3815 return status;
3816}
3817
3818static int
3819save_type(PicklerObject *self, PyObject *obj)
3820{
Alexandre Vassalotti65846c62013-11-30 17:55:48 -08003821 if (obj == (PyObject *)&_PyNone_Type) {
Alexandre Vassalotti19b6fa62013-11-30 16:06:39 -08003822 return save_singleton_type(self, obj, Py_None);
3823 }
3824 else if (obj == (PyObject *)&PyEllipsis_Type) {
3825 return save_singleton_type(self, obj, Py_Ellipsis);
3826 }
Alexandre Vassalotti65846c62013-11-30 17:55:48 -08003827 else if (obj == (PyObject *)&_PyNotImplemented_Type) {
Alexandre Vassalotti19b6fa62013-11-30 16:06:39 -08003828 return save_singleton_type(self, obj, Py_NotImplemented);
3829 }
3830 return save_global(self, obj, NULL);
3831}
3832
3833static int
Serhiy Storchaka986375e2017-11-30 22:48:31 +02003834save_pers(PicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003835{
3836 PyObject *pid = NULL;
3837 int status = 0;
3838
3839 const char persid_op = PERSID;
3840 const char binpersid_op = BINPERSID;
3841
Serhiy Storchaka986375e2017-11-30 22:48:31 +02003842 pid = call_method(self->pers_func, self->pers_func_self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003843 if (pid == NULL)
3844 return -1;
3845
3846 if (pid != Py_None) {
3847 if (self->bin) {
3848 if (save(self, pid, 1) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003849 _Pickler_Write(self, &binpersid_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003850 goto error;
3851 }
3852 else {
Serhiy Storchakadec25af2016-07-17 11:24:17 +03003853 PyObject *pid_str;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003854
3855 pid_str = PyObject_Str(pid);
3856 if (pid_str == NULL)
3857 goto error;
3858
Serhiy Storchakadec25af2016-07-17 11:24:17 +03003859 /* XXX: Should it check whether the pid contains embedded
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003860 newlines? */
Serhiy Storchakadec25af2016-07-17 11:24:17 +03003861 if (!PyUnicode_IS_ASCII(pid_str)) {
3862 PyErr_SetString(_Pickle_GetGlobalState()->PicklingError,
3863 "persistent IDs in protocol 0 must be "
3864 "ASCII strings");
3865 Py_DECREF(pid_str);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003866 goto error;
Serhiy Storchakadec25af2016-07-17 11:24:17 +03003867 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003868
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003869 if (_Pickler_Write(self, &persid_op, 1) < 0 ||
Serhiy Storchakadec25af2016-07-17 11:24:17 +03003870 _Pickler_Write(self, PyUnicode_DATA(pid_str),
3871 PyUnicode_GET_LENGTH(pid_str)) < 0 ||
3872 _Pickler_Write(self, "\n", 1) < 0) {
3873 Py_DECREF(pid_str);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003874 goto error;
Serhiy Storchakadec25af2016-07-17 11:24:17 +03003875 }
3876 Py_DECREF(pid_str);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003877 }
3878 status = 1;
3879 }
3880
3881 if (0) {
3882 error:
3883 status = -1;
3884 }
3885 Py_XDECREF(pid);
3886
3887 return status;
3888}
3889
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003890static PyObject *
3891get_class(PyObject *obj)
3892{
3893 PyObject *cls;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003894 _Py_IDENTIFIER(__class__);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003895
Serhiy Storchakaf320be72018-01-25 10:49:40 +02003896 if (_PyObject_LookupAttrId(obj, &PyId___class__, &cls) == 0) {
3897 cls = (PyObject *) Py_TYPE(obj);
3898 Py_INCREF(cls);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003899 }
3900 return cls;
3901}
3902
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003903/* We're saving obj, and args is the 2-thru-5 tuple returned by the
3904 * appropriate __reduce__ method for obj.
3905 */
3906static int
3907save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
3908{
3909 PyObject *callable;
3910 PyObject *argtup;
3911 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003912 PyObject *listitems = Py_None;
3913 PyObject *dictitems = Py_None;
Pierre Glaser65d98d02019-05-08 21:40:25 +02003914 PyObject *state_setter = Py_None;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003915 PickleState *st = _Pickle_GetGlobalState();
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00003916 Py_ssize_t size;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003917 int use_newobj = 0, use_newobj_ex = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003918
3919 const char reduce_op = REDUCE;
3920 const char build_op = BUILD;
3921 const char newobj_op = NEWOBJ;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003922 const char newobj_ex_op = NEWOBJ_EX;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003923
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00003924 size = PyTuple_Size(args);
Pierre Glaser65d98d02019-05-08 21:40:25 +02003925 if (size < 2 || size > 6) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003926 PyErr_SetString(st->PicklingError, "tuple returned by "
Pierre Glaser65d98d02019-05-08 21:40:25 +02003927 "__reduce__ must contain 2 through 6 elements");
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00003928 return -1;
3929 }
3930
Pierre Glaser65d98d02019-05-08 21:40:25 +02003931 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 6,
3932 &callable, &argtup, &state, &listitems, &dictitems,
3933 &state_setter))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003934 return -1;
3935
3936 if (!PyCallable_Check(callable)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003937 PyErr_SetString(st->PicklingError, "first item of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003938 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003939 return -1;
3940 }
3941 if (!PyTuple_Check(argtup)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003942 PyErr_SetString(st->PicklingError, "second item of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003943 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003944 return -1;
3945 }
3946
3947 if (state == Py_None)
3948 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003949
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003950 if (listitems == Py_None)
3951 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003952 else if (!PyIter_Check(listitems)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003953 PyErr_Format(st->PicklingError, "fourth element of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003954 "returned by __reduce__ must be an iterator, not %s",
3955 Py_TYPE(listitems)->tp_name);
3956 return -1;
3957 }
3958
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003959 if (dictitems == Py_None)
3960 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003961 else if (!PyIter_Check(dictitems)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003962 PyErr_Format(st->PicklingError, "fifth element of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003963 "returned by __reduce__ must be an iterator, not %s",
3964 Py_TYPE(dictitems)->tp_name);
3965 return -1;
3966 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003967
Pierre Glaser65d98d02019-05-08 21:40:25 +02003968 if (state_setter == Py_None)
3969 state_setter = NULL;
3970 else if (!PyCallable_Check(state_setter)) {
3971 PyErr_Format(st->PicklingError, "sixth element of the tuple "
3972 "returned by __reduce__ must be a function, not %s",
3973 Py_TYPE(state_setter)->tp_name);
3974 return -1;
3975 }
3976
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003977 if (self->proto >= 2) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003978 PyObject *name;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003979 _Py_IDENTIFIER(__name__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003980
Serhiy Storchakaf320be72018-01-25 10:49:40 +02003981 if (_PyObject_LookupAttrId(callable, &PyId___name__, &name) < 0) {
3982 return -1;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003983 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +02003984 if (name != NULL && PyUnicode_Check(name)) {
Serhiy Storchaka0d554d72015-10-10 22:42:18 +03003985 _Py_IDENTIFIER(__newobj_ex__);
Serhiy Storchakaf0f35a62017-01-09 10:09:43 +02003986 use_newobj_ex = _PyUnicode_EqualToASCIIId(
3987 name, &PyId___newobj_ex__);
Serhiy Storchaka707b5cc2014-12-16 19:43:46 +02003988 if (!use_newobj_ex) {
3989 _Py_IDENTIFIER(__newobj__);
Serhiy Storchaka9937d902017-01-09 10:04:34 +02003990 use_newobj = _PyUnicode_EqualToASCIIId(name, &PyId___newobj__);
Serhiy Storchaka707b5cc2014-12-16 19:43:46 +02003991 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003992 }
Serhiy Storchaka707b5cc2014-12-16 19:43:46 +02003993 Py_XDECREF(name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003994 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003995
3996 if (use_newobj_ex) {
3997 PyObject *cls;
3998 PyObject *args;
3999 PyObject *kwargs;
4000
Serhiy Storchakafff9a312017-03-21 08:53:25 +02004001 if (PyTuple_GET_SIZE(argtup) != 3) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004002 PyErr_Format(st->PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004003 "length of the NEWOBJ_EX argument tuple must be "
Serhiy Storchakafff9a312017-03-21 08:53:25 +02004004 "exactly 3, not %zd", PyTuple_GET_SIZE(argtup));
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004005 return -1;
4006 }
4007
4008 cls = PyTuple_GET_ITEM(argtup, 0);
4009 if (!PyType_Check(cls)) {
Larry Hastings61272b72014-01-07 12:41:53 -08004010 PyErr_Format(st->PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004011 "first item from NEWOBJ_EX argument tuple must "
4012 "be a class, not %.200s", Py_TYPE(cls)->tp_name);
4013 return -1;
4014 }
4015 args = PyTuple_GET_ITEM(argtup, 1);
4016 if (!PyTuple_Check(args)) {
Larry Hastings61272b72014-01-07 12:41:53 -08004017 PyErr_Format(st->PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004018 "second item from NEWOBJ_EX argument tuple must "
4019 "be a tuple, not %.200s", Py_TYPE(args)->tp_name);
4020 return -1;
4021 }
4022 kwargs = PyTuple_GET_ITEM(argtup, 2);
4023 if (!PyDict_Check(kwargs)) {
Larry Hastings61272b72014-01-07 12:41:53 -08004024 PyErr_Format(st->PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004025 "third item from NEWOBJ_EX argument tuple must "
4026 "be a dict, not %.200s", Py_TYPE(kwargs)->tp_name);
4027 return -1;
4028 }
4029
Serhiy Storchaka0d554d72015-10-10 22:42:18 +03004030 if (self->proto >= 4) {
4031 if (save(self, cls, 0) < 0 ||
4032 save(self, args, 0) < 0 ||
4033 save(self, kwargs, 0) < 0 ||
4034 _Pickler_Write(self, &newobj_ex_op, 1) < 0) {
4035 return -1;
4036 }
4037 }
4038 else {
4039 PyObject *newargs;
4040 PyObject *cls_new;
4041 Py_ssize_t i;
4042 _Py_IDENTIFIER(__new__);
4043
Serhiy Storchakafff9a312017-03-21 08:53:25 +02004044 newargs = PyTuple_New(PyTuple_GET_SIZE(args) + 2);
Serhiy Storchaka0d554d72015-10-10 22:42:18 +03004045 if (newargs == NULL)
4046 return -1;
4047
4048 cls_new = _PyObject_GetAttrId(cls, &PyId___new__);
4049 if (cls_new == NULL) {
4050 Py_DECREF(newargs);
4051 return -1;
4052 }
4053 PyTuple_SET_ITEM(newargs, 0, cls_new);
4054 Py_INCREF(cls);
4055 PyTuple_SET_ITEM(newargs, 1, cls);
Serhiy Storchakafff9a312017-03-21 08:53:25 +02004056 for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
Serhiy Storchaka0d554d72015-10-10 22:42:18 +03004057 PyObject *item = PyTuple_GET_ITEM(args, i);
4058 Py_INCREF(item);
4059 PyTuple_SET_ITEM(newargs, i + 2, item);
4060 }
4061
4062 callable = PyObject_Call(st->partial, newargs, kwargs);
4063 Py_DECREF(newargs);
4064 if (callable == NULL)
4065 return -1;
4066
4067 newargs = PyTuple_New(0);
4068 if (newargs == NULL) {
4069 Py_DECREF(callable);
4070 return -1;
4071 }
4072
4073 if (save(self, callable, 0) < 0 ||
4074 save(self, newargs, 0) < 0 ||
4075 _Pickler_Write(self, &reduce_op, 1) < 0) {
4076 Py_DECREF(newargs);
4077 Py_DECREF(callable);
4078 return -1;
4079 }
4080 Py_DECREF(newargs);
4081 Py_DECREF(callable);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004082 }
4083 }
4084 else if (use_newobj) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004085 PyObject *cls;
4086 PyObject *newargtup;
4087 PyObject *obj_class;
4088 int p;
4089
4090 /* Sanity checks. */
Serhiy Storchakafff9a312017-03-21 08:53:25 +02004091 if (PyTuple_GET_SIZE(argtup) < 1) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004092 PyErr_SetString(st->PicklingError, "__newobj__ arglist is empty");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004093 return -1;
4094 }
4095
4096 cls = PyTuple_GET_ITEM(argtup, 0);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01004097 if (!PyType_Check(cls)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004098 PyErr_SetString(st->PicklingError, "args[0] from "
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01004099 "__newobj__ args is not a type");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004100 return -1;
4101 }
4102
4103 if (obj != NULL) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01004104 obj_class = get_class(obj);
Zackery Spytz25d38972018-12-05 11:29:20 -07004105 if (obj_class == NULL) {
4106 return -1;
4107 }
4108 p = obj_class != cls;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004109 Py_DECREF(obj_class);
4110 if (p) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004111 PyErr_SetString(st->PicklingError, "args[0] from "
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004112 "__newobj__ args has the wrong class");
4113 return -1;
4114 }
4115 }
4116 /* XXX: These calls save() are prone to infinite recursion. Imagine
4117 what happen if the value returned by the __reduce__() method of
4118 some extension type contains another object of the same type. Ouch!
4119
4120 Here is a quick example, that I ran into, to illustrate what I
4121 mean:
4122
4123 >>> import pickle, copyreg
4124 >>> copyreg.dispatch_table.pop(complex)
4125 >>> pickle.dumps(1+2j)
4126 Traceback (most recent call last):
4127 ...
Yury Selivanovf488fb42015-07-03 01:04:23 -04004128 RecursionError: maximum recursion depth exceeded
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004129
4130 Removing the complex class from copyreg.dispatch_table made the
4131 __reduce_ex__() method emit another complex object:
4132
4133 >>> (1+1j).__reduce_ex__(2)
4134 (<function __newobj__ at 0xb7b71c3c>,
4135 (<class 'complex'>, (1+1j)), None, None, None)
4136
4137 Thus when save() was called on newargstup (the 2nd item) recursion
4138 ensued. Of course, the bug was in the complex class which had a
4139 broken __getnewargs__() that emitted another complex object. But,
4140 the point, here, is it is quite easy to end up with a broken reduce
4141 function. */
4142
4143 /* Save the class and its __new__ arguments. */
4144 if (save(self, cls, 0) < 0)
4145 return -1;
4146
Serhiy Storchakafff9a312017-03-21 08:53:25 +02004147 newargtup = PyTuple_GetSlice(argtup, 1, PyTuple_GET_SIZE(argtup));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004148 if (newargtup == NULL)
4149 return -1;
4150
4151 p = save(self, newargtup, 0);
4152 Py_DECREF(newargtup);
4153 if (p < 0)
4154 return -1;
4155
4156 /* Add NEWOBJ opcode. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004157 if (_Pickler_Write(self, &newobj_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004158 return -1;
4159 }
4160 else { /* Not using NEWOBJ. */
4161 if (save(self, callable, 0) < 0 ||
4162 save(self, argtup, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004163 _Pickler_Write(self, &reduce_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004164 return -1;
4165 }
4166
4167 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
4168 the caller do not want to memoize the object. Not particularly useful,
4169 but that is to mimic the behavior save_reduce() in pickle.py when
4170 obj is None. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004171 if (obj != NULL) {
4172 /* If the object is already in the memo, this means it is
4173 recursive. In this case, throw away everything we put on the
4174 stack, and fetch the object back from the memo. */
4175 if (PyMemoTable_Get(self->memo, obj)) {
4176 const char pop_op = POP;
4177
4178 if (_Pickler_Write(self, &pop_op, 1) < 0)
4179 return -1;
4180 if (memo_get(self, obj) < 0)
4181 return -1;
4182
4183 return 0;
4184 }
4185 else if (memo_put(self, obj) < 0)
4186 return -1;
4187 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004188
4189 if (listitems && batch_list(self, listitems) < 0)
4190 return -1;
4191
4192 if (dictitems && batch_dict(self, dictitems) < 0)
4193 return -1;
4194
4195 if (state) {
Pierre Glaser65d98d02019-05-08 21:40:25 +02004196 if (state_setter == NULL) {
4197 if (save(self, state, 0) < 0 ||
4198 _Pickler_Write(self, &build_op, 1) < 0)
4199 return -1;
4200 }
4201 else {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004202
Pierre Glaser65d98d02019-05-08 21:40:25 +02004203 /* If a state_setter is specified, call it instead of load_build to
4204 * update obj's with its previous state.
4205 * The first 4 save/write instructions push state_setter and its
4206 * tuple of expected arguments (obj, state) onto the stack. The
4207 * REDUCE opcode triggers the state_setter(obj, state) function
4208 * call. Finally, because state-updating routines only do in-place
4209 * modification, the whole operation has to be stack-transparent.
4210 * Thus, we finally pop the call's output from the stack.*/
4211
4212 const char tupletwo_op = TUPLE2;
4213 const char pop_op = POP;
4214 if (save(self, state_setter, 0) < 0 ||
4215 save(self, obj, 0) < 0 || save(self, state, 0) < 0 ||
4216 _Pickler_Write(self, &tupletwo_op, 1) < 0 ||
4217 _Pickler_Write(self, &reduce_op, 1) < 0 ||
4218 _Pickler_Write(self, &pop_op, 1) < 0)
4219 return -1;
4220 }
4221 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004222 return 0;
4223}
4224
4225static int
4226save(PicklerObject *self, PyObject *obj, int pers_save)
4227{
4228 PyTypeObject *type;
4229 PyObject *reduce_func = NULL;
4230 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004231 int status = 0;
4232
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08004233 if (_Pickler_OpcodeBoundary(self) < 0)
4234 return -1;
4235
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004236 /* The extra pers_save argument is necessary to avoid calling save_pers()
4237 on its returned object. */
4238 if (!pers_save && self->pers_func) {
4239 /* save_pers() returns:
4240 -1 to signal an error;
4241 0 if it did nothing successfully;
4242 1 if a persistent id was saved.
4243 */
Serhiy Storchaka986375e2017-11-30 22:48:31 +02004244 if ((status = save_pers(self, obj)) != 0)
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004245 return status;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004246 }
4247
4248 type = Py_TYPE(obj);
4249
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004250 /* The old cPickle had an optimization that used switch-case statement
4251 dispatching on the first letter of the type name. This has was removed
4252 since benchmarks shown that this optimization was actually slowing
4253 things down. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004254
4255 /* Atom types; these aren't memoized, so don't check the memo. */
4256
4257 if (obj == Py_None) {
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004258 return save_none(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004259 }
4260 else if (obj == Py_False || obj == Py_True) {
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004261 return save_bool(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004262 }
4263 else if (type == &PyLong_Type) {
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004264 return save_long(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004265 }
4266 else if (type == &PyFloat_Type) {
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004267 return save_float(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004268 }
4269
4270 /* Check the memo to see if it has the object. If so, generate
4271 a GET (or BINGET) opcode, instead of pickling the object
4272 once again. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004273 if (PyMemoTable_Get(self->memo, obj)) {
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004274 return memo_get(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004275 }
4276
4277 if (type == &PyBytes_Type) {
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004278 return save_bytes(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004279 }
4280 else if (type == &PyUnicode_Type) {
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004281 return save_unicode(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004282 }
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004283
4284 /* We're only calling Py_EnterRecursiveCall here so that atomic
4285 types above are pickled faster. */
4286 if (Py_EnterRecursiveCall(" while pickling an object")) {
4287 return -1;
4288 }
4289
4290 if (type == &PyDict_Type) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004291 status = save_dict(self, obj);
4292 goto done;
4293 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004294 else if (type == &PySet_Type) {
4295 status = save_set(self, obj);
4296 goto done;
4297 }
4298 else if (type == &PyFrozenSet_Type) {
4299 status = save_frozenset(self, obj);
4300 goto done;
4301 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004302 else if (type == &PyList_Type) {
4303 status = save_list(self, obj);
4304 goto done;
4305 }
4306 else if (type == &PyTuple_Type) {
4307 status = save_tuple(self, obj);
4308 goto done;
4309 }
Antoine Pitrou91f43802019-05-26 17:10:09 +02004310 else if (type == &PyByteArray_Type) {
4311 status = save_bytearray(self, obj);
4312 goto done;
4313 }
4314 else if (type == &PyPickleBuffer_Type) {
4315 status = save_picklebuffer(self, obj);
4316 goto done;
4317 }
Pierre Glaser289f1f82019-05-08 23:08:25 +02004318
4319 /* Now, check reducer_override. If it returns NotImplemented,
4320 * fallback to save_type or save_global, and then perhaps to the
4321 * regular reduction mechanism.
4322 */
4323 if (self->reducer_override != NULL) {
Jeroen Demeyer196a5302019-07-04 12:31:34 +02004324 reduce_value = _PyObject_CallOneArg(self->reducer_override, obj);
Pierre Glaser289f1f82019-05-08 23:08:25 +02004325 if (reduce_value == NULL) {
4326 goto error;
4327 }
4328 if (reduce_value != Py_NotImplemented) {
4329 goto reduce;
4330 }
4331 Py_DECREF(reduce_value);
4332 reduce_value = NULL;
4333 }
4334
4335 if (type == &PyType_Type) {
Alexandre Vassalotti19b6fa62013-11-30 16:06:39 -08004336 status = save_type(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004337 goto done;
4338 }
4339 else if (type == &PyFunction_Type) {
4340 status = save_global(self, obj, NULL);
Alexandre Vassalottifc912852013-11-24 03:07:35 -08004341 goto done;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004342 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004343
4344 /* XXX: This part needs some unit tests. */
4345
4346 /* Get a reduction callable, and call it. This may come from
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004347 * self.dispatch_table, copyreg.dispatch_table, the object's
4348 * __reduce_ex__ method, or the object's __reduce__ method.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004349 */
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004350 if (self->dispatch_table == NULL) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004351 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08004352 reduce_func = PyDict_GetItemWithError(st->dispatch_table,
4353 (PyObject *)type);
4354 if (reduce_func == NULL) {
4355 if (PyErr_Occurred()) {
4356 goto error;
4357 }
4358 } else {
4359 /* PyDict_GetItemWithError() returns a borrowed reference.
4360 Increase the reference count to be consistent with
4361 PyObject_GetItem and _PyObject_GetAttrId used below. */
4362 Py_INCREF(reduce_func);
4363 }
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004364 } else {
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08004365 reduce_func = PyObject_GetItem(self->dispatch_table,
4366 (PyObject *)type);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004367 if (reduce_func == NULL) {
4368 if (PyErr_ExceptionMatches(PyExc_KeyError))
4369 PyErr_Clear();
4370 else
4371 goto error;
4372 }
4373 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004374 if (reduce_func != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004375 Py_INCREF(obj);
Alexandre Vassalotti20c28c12013-11-27 02:26:54 -08004376 reduce_value = _Pickle_FastCall(reduce_func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004377 }
Antoine Pitrouffd41d92011-10-04 09:23:04 +02004378 else if (PyType_IsSubtype(type, &PyType_Type)) {
4379 status = save_global(self, obj, NULL);
4380 goto done;
4381 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004382 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004383 _Py_IDENTIFIER(__reduce__);
4384 _Py_IDENTIFIER(__reduce_ex__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004385
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004386 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
4387 automatically defined as __reduce__. While this is convenient, this
4388 make it impossible to know which method was actually called. Of
4389 course, this is not a big deal. But still, it would be nice to let
4390 the user know which method was called when something go
4391 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
4392 don't actually have to check for a __reduce__ method. */
4393
4394 /* Check for a __reduce_ex__ method. */
Serhiy Storchakaf320be72018-01-25 10:49:40 +02004395 if (_PyObject_LookupAttrId(obj, &PyId___reduce_ex__, &reduce_func) < 0) {
4396 goto error;
4397 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004398 if (reduce_func != NULL) {
4399 PyObject *proto;
4400 proto = PyLong_FromLong(self->proto);
4401 if (proto != NULL) {
Alexandre Vassalotti20c28c12013-11-27 02:26:54 -08004402 reduce_value = _Pickle_FastCall(reduce_func, proto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004403 }
4404 }
4405 else {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004406 /* Check for a __reduce__ method. */
Serhiy Storchaka41c57b32019-09-01 12:03:39 +03004407 if (_PyObject_LookupAttrId(obj, &PyId___reduce__, &reduce_func) < 0) {
4408 goto error;
4409 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004410 if (reduce_func != NULL) {
Victor Stinner2ff58a22019-06-17 14:27:23 +02004411 reduce_value = PyObject_CallNoArgs(reduce_func);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004412 }
4413 else {
Serhiy Storchaka41c57b32019-09-01 12:03:39 +03004414 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004415 PyErr_Format(st->PicklingError,
4416 "can't pickle '%.200s' object: %R",
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004417 type->tp_name, obj);
4418 goto error;
4419 }
4420 }
4421 }
4422
4423 if (reduce_value == NULL)
4424 goto error;
4425
Pierre Glaser289f1f82019-05-08 23:08:25 +02004426 reduce:
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004427 if (PyUnicode_Check(reduce_value)) {
4428 status = save_global(self, obj, reduce_value);
4429 goto done;
4430 }
4431
4432 if (!PyTuple_Check(reduce_value)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004433 PickleState *st = _Pickle_GetGlobalState();
4434 PyErr_SetString(st->PicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004435 "__reduce__ must return a string or tuple");
4436 goto error;
4437 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004438
4439 status = save_reduce(self, reduce_value, obj);
4440
4441 if (0) {
4442 error:
4443 status = -1;
4444 }
4445 done:
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08004446
Alexandre Vassalottidff18342008-07-13 18:48:30 +00004447 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004448 Py_XDECREF(reduce_func);
4449 Py_XDECREF(reduce_value);
4450
4451 return status;
4452}
4453
4454static int
4455dump(PicklerObject *self, PyObject *obj)
4456{
4457 const char stop_op = STOP;
Pierre Glaser0f2f35e2020-02-02 19:55:21 +01004458 int status = -1;
Pierre Glaser289f1f82019-05-08 23:08:25 +02004459 PyObject *tmp;
4460 _Py_IDENTIFIER(reducer_override);
4461
4462 if (_PyObject_LookupAttrId((PyObject *)self, &PyId_reducer_override,
4463 &tmp) < 0) {
Pierre Glaser0f2f35e2020-02-02 19:55:21 +01004464 goto error;
Pierre Glaser289f1f82019-05-08 23:08:25 +02004465 }
4466 /* Cache the reducer_override method, if it exists. */
4467 if (tmp != NULL) {
4468 Py_XSETREF(self->reducer_override, tmp);
4469 }
4470 else {
4471 Py_CLEAR(self->reducer_override);
4472 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004473
4474 if (self->proto >= 2) {
4475 char header[2];
4476
4477 header[0] = PROTO;
4478 assert(self->proto >= 0 && self->proto < 256);
4479 header[1] = (unsigned char)self->proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004480 if (_Pickler_Write(self, header, 2) < 0)
Pierre Glaser0f2f35e2020-02-02 19:55:21 +01004481 goto error;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004482 if (self->proto >= 4)
4483 self->framing = 1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004484 }
4485
4486 if (save(self, obj, 0) < 0 ||
Serhiy Storchakac8695292018-04-04 00:11:27 +03004487 _Pickler_Write(self, &stop_op, 1) < 0 ||
4488 _Pickler_CommitFrame(self) < 0)
Pierre Glaser0f2f35e2020-02-02 19:55:21 +01004489 goto error;
4490
4491 // Success
4492 status = 0;
4493
4494 error:
Serhiy Storchakac8695292018-04-04 00:11:27 +03004495 self->framing = 0;
Pierre Glaser0f2f35e2020-02-02 19:55:21 +01004496
4497 /* Break the reference cycle we generated at the beginning this function
4498 * call when setting the reducer_override attribute of the Pickler instance
4499 * to a bound method of the same instance. This is important as the Pickler
4500 * instance holds a reference to each object it has pickled (through its
4501 * memo): thus, these objects wont be garbage-collected as long as the
4502 * Pickler itself is not collected. */
4503 Py_CLEAR(self->reducer_override);
4504 return status;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004505}
4506
Larry Hastings61272b72014-01-07 12:41:53 -08004507/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004508
4509_pickle.Pickler.clear_memo
4510
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004511Clears the pickler's "memo".
4512
4513The memo is the data structure that remembers which objects the
4514pickler has already seen, so that shared or recursive objects are
4515pickled by reference and not by value. This method is useful when
4516re-using picklers.
Larry Hastings61272b72014-01-07 12:41:53 -08004517[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004518
Larry Hastings3cceb382014-01-04 11:09:09 -08004519static PyObject *
4520_pickle_Pickler_clear_memo_impl(PicklerObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08004521/*[clinic end generated code: output=8665c8658aaa094b input=01bdad52f3d93e56]*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004522{
4523 if (self->memo)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004524 PyMemoTable_Clear(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004525
4526 Py_RETURN_NONE;
4527}
4528
Larry Hastings61272b72014-01-07 12:41:53 -08004529/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004530
4531_pickle.Pickler.dump
4532
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004533 obj: object
4534 /
4535
4536Write a pickled representation of the given object to the open file.
Larry Hastings61272b72014-01-07 12:41:53 -08004537[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004538
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004539static PyObject *
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004540_pickle_Pickler_dump(PicklerObject *self, PyObject *obj)
Larry Hastings581ee362014-01-28 05:00:08 -08004541/*[clinic end generated code: output=87ecad1261e02ac7 input=552eb1c0f52260d9]*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004542{
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00004543 /* Check whether the Pickler was initialized correctly (issue3664).
4544 Developers often forget to call __init__() in their subclasses, which
4545 would trigger a segfault without this check. */
4546 if (self->write == NULL) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004547 PickleState *st = _Pickle_GetGlobalState();
4548 PyErr_Format(st->PicklingError,
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00004549 "Pickler.__init__() was not called by %s.__init__()",
4550 Py_TYPE(self)->tp_name);
4551 return NULL;
4552 }
4553
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004554 if (_Pickler_ClearBuffer(self) < 0)
4555 return NULL;
4556
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004557 if (dump(self, obj) < 0)
4558 return NULL;
4559
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004560 if (_Pickler_FlushToFile(self) < 0)
4561 return NULL;
4562
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004563 Py_RETURN_NONE;
4564}
4565
Serhiy Storchaka5bbd2312014-12-16 19:39:08 +02004566/*[clinic input]
4567
4568_pickle.Pickler.__sizeof__ -> Py_ssize_t
4569
4570Returns size in memory, in bytes.
4571[clinic start generated code]*/
4572
4573static Py_ssize_t
4574_pickle_Pickler___sizeof___impl(PicklerObject *self)
4575/*[clinic end generated code: output=106edb3123f332e1 input=8cbbec9bd5540d42]*/
4576{
4577 Py_ssize_t res, s;
4578
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +02004579 res = _PyObject_SIZE(Py_TYPE(self));
Serhiy Storchaka5bbd2312014-12-16 19:39:08 +02004580 if (self->memo != NULL) {
4581 res += sizeof(PyMemoTable);
4582 res += self->memo->mt_allocated * sizeof(PyMemoEntry);
4583 }
4584 if (self->output_buffer != NULL) {
4585 s = _PySys_GetSizeOf(self->output_buffer);
4586 if (s == -1)
4587 return -1;
4588 res += s;
4589 }
4590 return res;
4591}
4592
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004593static struct PyMethodDef Pickler_methods[] = {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004594 _PICKLE_PICKLER_DUMP_METHODDEF
4595 _PICKLE_PICKLER_CLEAR_MEMO_METHODDEF
Serhiy Storchaka5bbd2312014-12-16 19:39:08 +02004596 _PICKLE_PICKLER___SIZEOF___METHODDEF
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004597 {NULL, NULL} /* sentinel */
4598};
4599
4600static void
4601Pickler_dealloc(PicklerObject *self)
4602{
4603 PyObject_GC_UnTrack(self);
4604
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004605 Py_XDECREF(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004606 Py_XDECREF(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004607 Py_XDECREF(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004608 Py_XDECREF(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004609 Py_XDECREF(self->fast_memo);
Pierre Glaser289f1f82019-05-08 23:08:25 +02004610 Py_XDECREF(self->reducer_override);
Antoine Pitrou91f43802019-05-26 17:10:09 +02004611 Py_XDECREF(self->buffer_callback);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004612
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004613 PyMemoTable_Del(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004614
4615 Py_TYPE(self)->tp_free((PyObject *)self);
4616}
4617
4618static int
4619Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
4620{
4621 Py_VISIT(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004622 Py_VISIT(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004623 Py_VISIT(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004624 Py_VISIT(self->fast_memo);
Pierre Glaser289f1f82019-05-08 23:08:25 +02004625 Py_VISIT(self->reducer_override);
Antoine Pitrou91f43802019-05-26 17:10:09 +02004626 Py_VISIT(self->buffer_callback);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004627 return 0;
4628}
4629
4630static int
4631Pickler_clear(PicklerObject *self)
4632{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004633 Py_CLEAR(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004634 Py_CLEAR(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004635 Py_CLEAR(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004636 Py_CLEAR(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004637 Py_CLEAR(self->fast_memo);
Pierre Glaser289f1f82019-05-08 23:08:25 +02004638 Py_CLEAR(self->reducer_override);
Antoine Pitrou91f43802019-05-26 17:10:09 +02004639 Py_CLEAR(self->buffer_callback);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004640
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004641 if (self->memo != NULL) {
4642 PyMemoTable *memo = self->memo;
4643 self->memo = NULL;
4644 PyMemoTable_Del(memo);
4645 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004646 return 0;
4647}
4648
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004649
Larry Hastings61272b72014-01-07 12:41:53 -08004650/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004651
4652_pickle.Pickler.__init__
4653
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004654 file: object
Serhiy Storchaka279f4462019-09-14 12:24:05 +03004655 protocol: object = None
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004656 fix_imports: bool = True
Serhiy Storchaka279f4462019-09-14 12:24:05 +03004657 buffer_callback: object = None
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004658
4659This takes a binary file for writing a pickle data stream.
4660
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08004661The optional *protocol* argument tells the pickler to use the given
Mark Dickinsone9652e82020-01-24 10:03:22 +00004662protocol; supported protocols are 0, 1, 2, 3, 4 and 5. The default
4663protocol is 4. It was introduced in Python 3.4, and is incompatible
4664with previous versions.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004665
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08004666Specifying a negative protocol version selects the highest protocol
4667version supported. The higher the protocol used, the more recent the
4668version of Python needed to read the pickle produced.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004669
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08004670The *file* argument must have a write() method that accepts a single
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004671bytes argument. It can thus be a file object opened for binary
Martin Panter7462b6492015-11-02 03:37:02 +00004672writing, an io.BytesIO instance, or any other custom object that meets
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08004673this interface.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004674
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08004675If *fix_imports* is True and protocol is less than 3, pickle will try
4676to map the new Python 3 names to the old module names used in Python
46772, so that the pickle data stream is readable with Python 2.
Antoine Pitrou91f43802019-05-26 17:10:09 +02004678
4679If *buffer_callback* is None (the default), buffer views are
4680serialized into *file* as part of the pickle stream.
4681
4682If *buffer_callback* is not None, then it can be called any number
4683of times with a buffer view. If the callback returns a false value
4684(such as None), the given buffer is out-of-band; otherwise the
4685buffer is serialized in-band, i.e. inside the pickle stream.
4686
4687It is an error if *buffer_callback* is not None and *protocol*
4688is None or smaller than 5.
4689
Larry Hastings61272b72014-01-07 12:41:53 -08004690[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004691
Larry Hastingsb7ccb202014-01-18 23:50:21 -08004692static int
Larry Hastings89964c42015-04-14 18:07:59 -04004693_pickle_Pickler___init___impl(PicklerObject *self, PyObject *file,
Antoine Pitrou91f43802019-05-26 17:10:09 +02004694 PyObject *protocol, int fix_imports,
4695 PyObject *buffer_callback)
Mark Dickinsone9652e82020-01-24 10:03:22 +00004696/*[clinic end generated code: output=0abedc50590d259b input=a7c969699bf5dad3]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004697{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02004698 _Py_IDENTIFIER(persistent_id);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004699 _Py_IDENTIFIER(dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004700
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004701 /* In case of multiple __init__() calls, clear previous content. */
4702 if (self->write != NULL)
4703 (void)Pickler_clear(self);
4704
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004705 if (_Pickler_SetProtocol(self, protocol, fix_imports) < 0)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08004706 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004707
4708 if (_Pickler_SetOutputStream(self, file) < 0)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08004709 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004710
Antoine Pitrou91f43802019-05-26 17:10:09 +02004711 if (_Pickler_SetBufferCallback(self, buffer_callback) < 0)
4712 return -1;
4713
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004714 /* memo and output_buffer may have already been created in _Pickler_New */
4715 if (self->memo == NULL) {
4716 self->memo = PyMemoTable_New();
4717 if (self->memo == NULL)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08004718 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004719 }
4720 self->output_len = 0;
4721 if (self->output_buffer == NULL) {
4722 self->max_output_len = WRITE_BUF_SIZE;
4723 self->output_buffer = PyBytes_FromStringAndSize(NULL,
4724 self->max_output_len);
4725 if (self->output_buffer == NULL)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08004726 return -1;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004727 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004728
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004729 self->fast = 0;
4730 self->fast_nesting = 0;
4731 self->fast_memo = NULL;
Serhiy Storchaka04e36af2017-10-22 21:31:34 +03004732
Serhiy Storchaka986375e2017-11-30 22:48:31 +02004733 if (init_method_ref((PyObject *)self, &PyId_persistent_id,
4734 &self->pers_func, &self->pers_func_self) < 0)
4735 {
4736 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004737 }
Serhiy Storchaka04e36af2017-10-22 21:31:34 +03004738
Serhiy Storchakaf320be72018-01-25 10:49:40 +02004739 if (_PyObject_LookupAttrId((PyObject *)self,
4740 &PyId_dispatch_table, &self->dispatch_table) < 0) {
4741 return -1;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004742 }
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004743
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004744 return 0;
4745}
4746
Larry Hastingsb7ccb202014-01-18 23:50:21 -08004747
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004748/* Define a proxy object for the Pickler's internal memo object. This is to
4749 * avoid breaking code like:
4750 * pickler.memo.clear()
4751 * and
4752 * pickler.memo = saved_memo
4753 * Is this a good idea? Not really, but we don't want to break code that uses
4754 * it. Note that we don't implement the entire mapping API here. This is
4755 * intentional, as these should be treated as black-box implementation details.
4756 */
4757
Larry Hastings61272b72014-01-07 12:41:53 -08004758/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004759_pickle.PicklerMemoProxy.clear
4760
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004761Remove all items from memo.
Larry Hastings61272b72014-01-07 12:41:53 -08004762[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004763
Larry Hastings3cceb382014-01-04 11:09:09 -08004764static PyObject *
4765_pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08004766/*[clinic end generated code: output=5fb9370d48ae8b05 input=ccc186dacd0f1405]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004767{
4768 if (self->pickler->memo)
4769 PyMemoTable_Clear(self->pickler->memo);
4770 Py_RETURN_NONE;
4771}
4772
Larry Hastings61272b72014-01-07 12:41:53 -08004773/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004774_pickle.PicklerMemoProxy.copy
4775
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004776Copy the memo to a new object.
Larry Hastings61272b72014-01-07 12:41:53 -08004777[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004778
Larry Hastings3cceb382014-01-04 11:09:09 -08004779static PyObject *
4780_pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08004781/*[clinic end generated code: output=bb83a919d29225ef input=b73043485ac30b36]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004782{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004783 PyMemoTable *memo;
4784 PyObject *new_memo = PyDict_New();
4785 if (new_memo == NULL)
4786 return NULL;
4787
4788 memo = self->pickler->memo;
Benjamin Petersona4ae8282018-09-20 18:36:40 -07004789 for (size_t i = 0; i < memo->mt_allocated; ++i) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004790 PyMemoEntry entry = memo->mt_table[i];
4791 if (entry.me_key != NULL) {
4792 int status;
4793 PyObject *key, *value;
4794
4795 key = PyLong_FromVoidPtr(entry.me_key);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004796 value = Py_BuildValue("nO", entry.me_value, entry.me_key);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004797
4798 if (key == NULL || value == NULL) {
4799 Py_XDECREF(key);
4800 Py_XDECREF(value);
4801 goto error;
4802 }
4803 status = PyDict_SetItem(new_memo, key, value);
4804 Py_DECREF(key);
4805 Py_DECREF(value);
4806 if (status < 0)
4807 goto error;
4808 }
4809 }
4810 return new_memo;
4811
4812 error:
4813 Py_XDECREF(new_memo);
4814 return NULL;
4815}
4816
Larry Hastings61272b72014-01-07 12:41:53 -08004817/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004818_pickle.PicklerMemoProxy.__reduce__
4819
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004820Implement pickle support.
Larry Hastings61272b72014-01-07 12:41:53 -08004821[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004822
Larry Hastings3cceb382014-01-04 11:09:09 -08004823static PyObject *
4824_pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08004825/*[clinic end generated code: output=bebba1168863ab1d input=2f7c540e24b7aae4]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004826{
4827 PyObject *reduce_value, *dict_args;
Larry Hastings3cceb382014-01-04 11:09:09 -08004828 PyObject *contents = _pickle_PicklerMemoProxy_copy_impl(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004829 if (contents == NULL)
4830 return NULL;
4831
4832 reduce_value = PyTuple_New(2);
4833 if (reduce_value == NULL) {
4834 Py_DECREF(contents);
4835 return NULL;
4836 }
4837 dict_args = PyTuple_New(1);
4838 if (dict_args == NULL) {
4839 Py_DECREF(contents);
4840 Py_DECREF(reduce_value);
4841 return NULL;
4842 }
4843 PyTuple_SET_ITEM(dict_args, 0, contents);
4844 Py_INCREF((PyObject *)&PyDict_Type);
4845 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
4846 PyTuple_SET_ITEM(reduce_value, 1, dict_args);
4847 return reduce_value;
4848}
4849
4850static PyMethodDef picklerproxy_methods[] = {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004851 _PICKLE_PICKLERMEMOPROXY_CLEAR_METHODDEF
4852 _PICKLE_PICKLERMEMOPROXY_COPY_METHODDEF
4853 _PICKLE_PICKLERMEMOPROXY___REDUCE___METHODDEF
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004854 {NULL, NULL} /* sentinel */
4855};
4856
4857static void
4858PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
4859{
4860 PyObject_GC_UnTrack(self);
4861 Py_XDECREF(self->pickler);
4862 PyObject_GC_Del((PyObject *)self);
4863}
4864
4865static int
4866PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
4867 visitproc visit, void *arg)
4868{
4869 Py_VISIT(self->pickler);
4870 return 0;
4871}
4872
4873static int
4874PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
4875{
4876 Py_CLEAR(self->pickler);
4877 return 0;
4878}
4879
4880static PyTypeObject PicklerMemoProxyType = {
4881 PyVarObject_HEAD_INIT(NULL, 0)
4882 "_pickle.PicklerMemoProxy", /*tp_name*/
4883 sizeof(PicklerMemoProxyObject), /*tp_basicsize*/
4884 0,
4885 (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004886 0, /* tp_vectorcall_offset */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004887 0, /* tp_getattr */
4888 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004889 0, /* tp_as_async */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004890 0, /* tp_repr */
4891 0, /* tp_as_number */
4892 0, /* tp_as_sequence */
4893 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00004894 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004895 0, /* tp_call */
4896 0, /* tp_str */
4897 PyObject_GenericGetAttr, /* tp_getattro */
4898 PyObject_GenericSetAttr, /* tp_setattro */
4899 0, /* tp_as_buffer */
4900 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4901 0, /* tp_doc */
4902 (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */
4903 (inquiry)PicklerMemoProxy_clear, /* tp_clear */
4904 0, /* tp_richcompare */
4905 0, /* tp_weaklistoffset */
4906 0, /* tp_iter */
4907 0, /* tp_iternext */
4908 picklerproxy_methods, /* tp_methods */
4909};
4910
4911static PyObject *
4912PicklerMemoProxy_New(PicklerObject *pickler)
4913{
4914 PicklerMemoProxyObject *self;
4915
4916 self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
4917 if (self == NULL)
4918 return NULL;
4919 Py_INCREF(pickler);
4920 self->pickler = pickler;
4921 PyObject_GC_Track(self);
4922 return (PyObject *)self;
4923}
4924
4925/*****************************************************************************/
4926
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004927static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02004928Pickler_get_memo(PicklerObject *self, void *Py_UNUSED(ignored))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004929{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004930 return PicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004931}
4932
4933static int
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02004934Pickler_set_memo(PicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004935{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004936 PyMemoTable *new_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004937
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004938 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004939 PyErr_SetString(PyExc_TypeError,
4940 "attribute deletion is not supported");
4941 return -1;
4942 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004943
4944 if (Py_TYPE(obj) == &PicklerMemoProxyType) {
4945 PicklerObject *pickler =
4946 ((PicklerMemoProxyObject *)obj)->pickler;
4947
4948 new_memo = PyMemoTable_Copy(pickler->memo);
4949 if (new_memo == NULL)
4950 return -1;
4951 }
4952 else if (PyDict_Check(obj)) {
4953 Py_ssize_t i = 0;
4954 PyObject *key, *value;
4955
4956 new_memo = PyMemoTable_New();
4957 if (new_memo == NULL)
4958 return -1;
4959
4960 while (PyDict_Next(obj, &i, &key, &value)) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004961 Py_ssize_t memo_id;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004962 PyObject *memo_obj;
4963
Serhiy Storchakafff9a312017-03-21 08:53:25 +02004964 if (!PyTuple_Check(value) || PyTuple_GET_SIZE(value) != 2) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004965 PyErr_SetString(PyExc_TypeError,
4966 "'memo' values must be 2-item tuples");
4967 goto error;
4968 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004969 memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004970 if (memo_id == -1 && PyErr_Occurred())
4971 goto error;
4972 memo_obj = PyTuple_GET_ITEM(value, 1);
4973 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
4974 goto error;
4975 }
4976 }
4977 else {
4978 PyErr_Format(PyExc_TypeError,
Serhiy Storchaka34fd4c22018-11-05 16:20:25 +02004979 "'memo' attribute must be a PicklerMemoProxy object "
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004980 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004981 return -1;
4982 }
4983
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004984 PyMemoTable_Del(self->memo);
4985 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004986
4987 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004988
4989 error:
4990 if (new_memo)
4991 PyMemoTable_Del(new_memo);
4992 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004993}
4994
4995static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02004996Pickler_get_persid(PicklerObject *self, void *Py_UNUSED(ignored))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004997{
Serhiy Storchaka986375e2017-11-30 22:48:31 +02004998 if (self->pers_func == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004999 PyErr_SetString(PyExc_AttributeError, "persistent_id");
Serhiy Storchaka986375e2017-11-30 22:48:31 +02005000 return NULL;
5001 }
5002 return reconstruct_method(self->pers_func, self->pers_func_self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005003}
5004
5005static int
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02005006Pickler_set_persid(PicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005007{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005008 if (value == NULL) {
5009 PyErr_SetString(PyExc_TypeError,
5010 "attribute deletion is not supported");
5011 return -1;
5012 }
5013 if (!PyCallable_Check(value)) {
5014 PyErr_SetString(PyExc_TypeError,
5015 "persistent_id must be a callable taking one argument");
5016 return -1;
5017 }
5018
Serhiy Storchaka986375e2017-11-30 22:48:31 +02005019 self->pers_func_self = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005020 Py_INCREF(value);
Serhiy Storchakaec397562016-04-06 09:50:03 +03005021 Py_XSETREF(self->pers_func, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005022
5023 return 0;
5024}
5025
5026static PyMemberDef Pickler_members[] = {
5027 {"bin", T_INT, offsetof(PicklerObject, bin)},
5028 {"fast", T_INT, offsetof(PicklerObject, fast)},
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01005029 {"dispatch_table", T_OBJECT_EX, offsetof(PicklerObject, dispatch_table)},
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005030 {NULL}
5031};
5032
5033static PyGetSetDef Pickler_getsets[] = {
5034 {"memo", (getter)Pickler_get_memo,
5035 (setter)Pickler_set_memo},
5036 {"persistent_id", (getter)Pickler_get_persid,
5037 (setter)Pickler_set_persid},
5038 {NULL}
5039};
5040
5041static PyTypeObject Pickler_Type = {
5042 PyVarObject_HEAD_INIT(NULL, 0)
5043 "_pickle.Pickler" , /*tp_name*/
5044 sizeof(PicklerObject), /*tp_basicsize*/
5045 0, /*tp_itemsize*/
5046 (destructor)Pickler_dealloc, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02005047 0, /*tp_vectorcall_offset*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005048 0, /*tp_getattr*/
5049 0, /*tp_setattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02005050 0, /*tp_as_async*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005051 0, /*tp_repr*/
5052 0, /*tp_as_number*/
5053 0, /*tp_as_sequence*/
5054 0, /*tp_as_mapping*/
5055 0, /*tp_hash*/
5056 0, /*tp_call*/
5057 0, /*tp_str*/
5058 0, /*tp_getattro*/
5059 0, /*tp_setattro*/
5060 0, /*tp_as_buffer*/
5061 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08005062 _pickle_Pickler___init____doc__, /*tp_doc*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005063 (traverseproc)Pickler_traverse, /*tp_traverse*/
5064 (inquiry)Pickler_clear, /*tp_clear*/
5065 0, /*tp_richcompare*/
5066 0, /*tp_weaklistoffset*/
5067 0, /*tp_iter*/
5068 0, /*tp_iternext*/
5069 Pickler_methods, /*tp_methods*/
5070 Pickler_members, /*tp_members*/
5071 Pickler_getsets, /*tp_getset*/
5072 0, /*tp_base*/
5073 0, /*tp_dict*/
5074 0, /*tp_descr_get*/
5075 0, /*tp_descr_set*/
5076 0, /*tp_dictoffset*/
Larry Hastingsb7ccb202014-01-18 23:50:21 -08005077 _pickle_Pickler___init__, /*tp_init*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005078 PyType_GenericAlloc, /*tp_alloc*/
5079 PyType_GenericNew, /*tp_new*/
5080 PyObject_GC_Del, /*tp_free*/
5081 0, /*tp_is_gc*/
5082};
5083
Victor Stinner121aab42011-09-29 23:40:53 +02005084/* Temporary helper for calling self.find_class().
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005085
5086 XXX: It would be nice to able to avoid Python function call overhead, by
5087 using directly the C version of find_class(), when find_class() is not
5088 overridden by a subclass. Although, this could become rather hackish. A
5089 simpler optimization would be to call the C function when self is not a
5090 subclass instance. */
5091static PyObject *
5092find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
5093{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005094 _Py_IDENTIFIER(find_class);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02005095
Victor Stinner55ba38a2016-12-09 16:09:30 +01005096 return _PyObject_CallMethodIdObjArgs((PyObject *)self, &PyId_find_class,
5097 module_name, global_name, NULL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005098}
5099
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005100static Py_ssize_t
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005101marker(UnpicklerObject *self)
5102{
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005103 Py_ssize_t mark;
5104
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005105 if (self->num_marks < 1) {
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005106 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005107 PyErr_SetString(st->UnpicklingError, "could not find MARK");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005108 return -1;
5109 }
5110
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005111 mark = self->marks[--self->num_marks];
5112 self->stack->mark_set = self->num_marks != 0;
5113 self->stack->fence = self->num_marks ?
5114 self->marks[self->num_marks - 1] : 0;
5115 return mark;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005116}
5117
5118static int
5119load_none(UnpicklerObject *self)
5120{
5121 PDATA_APPEND(self->stack, Py_None, -1);
5122 return 0;
5123}
5124
5125static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005126load_int(UnpicklerObject *self)
5127{
5128 PyObject *value;
5129 char *endptr, *s;
5130 Py_ssize_t len;
5131 long x;
5132
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005133 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005134 return -1;
5135 if (len < 2)
5136 return bad_readline();
5137
5138 errno = 0;
Victor Stinner121aab42011-09-29 23:40:53 +02005139 /* XXX: Should the base argument of strtol() be explicitly set to 10?
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005140 XXX(avassalotti): Should this uses PyOS_strtol()? */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005141 x = strtol(s, &endptr, 0);
5142
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005143 if (errno || (*endptr != '\n' && *endptr != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005144 /* Hm, maybe we've got something long. Let's try reading
Serhiy Storchaka95949422013-08-27 19:40:23 +03005145 * it as a Python int object. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005146 errno = 0;
5147 /* XXX: Same thing about the base here. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005148 value = PyLong_FromString(s, NULL, 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005149 if (value == NULL) {
5150 PyErr_SetString(PyExc_ValueError,
5151 "could not convert string to int");
5152 return -1;
5153 }
5154 }
5155 else {
5156 if (len == 3 && (x == 0 || x == 1)) {
5157 if ((value = PyBool_FromLong(x)) == NULL)
5158 return -1;
5159 }
5160 else {
5161 if ((value = PyLong_FromLong(x)) == NULL)
5162 return -1;
5163 }
5164 }
5165
5166 PDATA_PUSH(self->stack, value, -1);
5167 return 0;
5168}
5169
5170static int
5171load_bool(UnpicklerObject *self, PyObject *boolean)
5172{
5173 assert(boolean == Py_True || boolean == Py_False);
5174 PDATA_APPEND(self->stack, boolean, -1);
5175 return 0;
5176}
5177
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005178/* s contains x bytes of an unsigned little-endian integer. Return its value
5179 * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
5180 */
5181static Py_ssize_t
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005182calc_binsize(char *bytes, int nbytes)
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005183{
5184 unsigned char *s = (unsigned char *)bytes;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005185 int i;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005186 size_t x = 0;
5187
Serhiy Storchakae0606192015-09-29 22:10:07 +03005188 if (nbytes > (int)sizeof(size_t)) {
5189 /* Check for integer overflow. BINBYTES8 and BINUNICODE8 opcodes
5190 * have 64-bit size that can't be represented on 32-bit platform.
5191 */
5192 for (i = (int)sizeof(size_t); i < nbytes; i++) {
5193 if (s[i])
5194 return -1;
5195 }
5196 nbytes = (int)sizeof(size_t);
5197 }
5198 for (i = 0; i < nbytes; i++) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005199 x |= (size_t) s[i] << (8 * i);
5200 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005201
5202 if (x > PY_SSIZE_T_MAX)
5203 return -1;
5204 else
5205 return (Py_ssize_t) x;
5206}
5207
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005208/* s contains x bytes of a little-endian integer. Return its value as a
5209 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +03005210 * int, but when x is 4 it's a signed one. This is a historical source
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005211 * of x-platform bugs.
5212 */
5213static long
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005214calc_binint(char *bytes, int nbytes)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005215{
5216 unsigned char *s = (unsigned char *)bytes;
Victor Stinnerf13c46c2014-08-17 21:05:55 +02005217 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005218 long x = 0;
5219
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005220 for (i = 0; i < nbytes; i++) {
5221 x |= (long)s[i] << (8 * i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005222 }
5223
5224 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
5225 * is signed, so on a box with longs bigger than 4 bytes we need
5226 * to extend a BININT's sign bit to the full width.
5227 */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005228 if (SIZEOF_LONG > 4 && nbytes == 4) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005229 x |= -(x & (1L << 31));
5230 }
5231
5232 return x;
5233}
5234
5235static int
5236load_binintx(UnpicklerObject *self, char *s, int size)
5237{
5238 PyObject *value;
5239 long x;
5240
5241 x = calc_binint(s, size);
5242
5243 if ((value = PyLong_FromLong(x)) == NULL)
5244 return -1;
5245
5246 PDATA_PUSH(self->stack, value, -1);
5247 return 0;
5248}
5249
5250static int
5251load_binint(UnpicklerObject *self)
5252{
5253 char *s;
5254
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005255 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005256 return -1;
5257
5258 return load_binintx(self, s, 4);
5259}
5260
5261static int
5262load_binint1(UnpicklerObject *self)
5263{
5264 char *s;
5265
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005266 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005267 return -1;
5268
5269 return load_binintx(self, s, 1);
5270}
5271
5272static int
5273load_binint2(UnpicklerObject *self)
5274{
5275 char *s;
5276
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005277 if (_Unpickler_Read(self, &s, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005278 return -1;
5279
5280 return load_binintx(self, s, 2);
5281}
5282
5283static int
5284load_long(UnpicklerObject *self)
5285{
5286 PyObject *value;
Victor Stinnerb110dad2016-12-09 17:06:43 +01005287 char *s = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005288 Py_ssize_t len;
5289
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005290 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005291 return -1;
5292 if (len < 2)
5293 return bad_readline();
5294
Mark Dickinson8dd05142009-01-20 20:43:58 +00005295 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
5296 the 'L' before calling PyLong_FromString. In order to maintain
5297 compatibility with Python 3.0.0, we don't actually *require*
5298 the 'L' to be present. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005299 if (s[len-2] == 'L')
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00005300 s[len-2] = '\0';
Alexandre Vassalottie4bccb72009-01-24 01:47:57 +00005301 /* XXX: Should the base argument explicitly set to 10? */
5302 value = PyLong_FromString(s, NULL, 0);
Mark Dickinson8dd05142009-01-20 20:43:58 +00005303 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005304 return -1;
5305
5306 PDATA_PUSH(self->stack, value, -1);
5307 return 0;
5308}
5309
5310/* 'size' bytes contain the # of bytes of little-endian 256's-complement
5311 * data following.
5312 */
5313static int
5314load_counted_long(UnpicklerObject *self, int size)
5315{
5316 PyObject *value;
5317 char *nbytes;
5318 char *pdata;
5319
5320 assert(size == 1 || size == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005321 if (_Unpickler_Read(self, &nbytes, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005322 return -1;
5323
5324 size = calc_binint(nbytes, size);
5325 if (size < 0) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005326 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005327 /* Corrupt or hostile pickle -- we never write one like this */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005328 PyErr_SetString(st->UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005329 "LONG pickle has negative byte count");
5330 return -1;
5331 }
5332
5333 if (size == 0)
5334 value = PyLong_FromLong(0L);
5335 else {
5336 /* Read the raw little-endian bytes and convert. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005337 if (_Unpickler_Read(self, &pdata, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005338 return -1;
5339 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
5340 1 /* little endian */ , 1 /* signed */ );
5341 }
5342 if (value == NULL)
5343 return -1;
5344 PDATA_PUSH(self->stack, value, -1);
5345 return 0;
5346}
5347
5348static int
5349load_float(UnpicklerObject *self)
5350{
5351 PyObject *value;
5352 char *endptr, *s;
5353 Py_ssize_t len;
5354 double d;
5355
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005356 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005357 return -1;
5358 if (len < 2)
5359 return bad_readline();
5360
5361 errno = 0;
Mark Dickinson725bfd82009-05-03 20:33:40 +00005362 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
5363 if (d == -1.0 && PyErr_Occurred())
5364 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005365 if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005366 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
5367 return -1;
5368 }
Mark Dickinson725bfd82009-05-03 20:33:40 +00005369 value = PyFloat_FromDouble(d);
5370 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005371 return -1;
5372
5373 PDATA_PUSH(self->stack, value, -1);
5374 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005375}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005376
5377static int
5378load_binfloat(UnpicklerObject *self)
5379{
5380 PyObject *value;
5381 double x;
5382 char *s;
5383
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005384 if (_Unpickler_Read(self, &s, 8) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005385 return -1;
5386
5387 x = _PyFloat_Unpack8((unsigned char *)s, 0);
5388 if (x == -1.0 && PyErr_Occurred())
5389 return -1;
5390
5391 if ((value = PyFloat_FromDouble(x)) == NULL)
5392 return -1;
5393
5394 PDATA_PUSH(self->stack, value, -1);
5395 return 0;
5396}
5397
5398static int
5399load_string(UnpicklerObject *self)
5400{
5401 PyObject *bytes;
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08005402 PyObject *obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005403 Py_ssize_t len;
5404 char *s, *p;
5405
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005406 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005407 return -1;
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07005408 /* Strip the newline */
5409 len--;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005410 /* Strip outermost quotes */
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07005411 if (len >= 2 && s[0] == s[len - 1] && (s[0] == '\'' || s[0] == '"')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005412 p = s + 1;
5413 len -= 2;
5414 }
5415 else {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005416 PickleState *st = _Pickle_GetGlobalState();
5417 PyErr_SetString(st->UnpicklingError,
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07005418 "the STRING opcode argument must be quoted");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005419 return -1;
5420 }
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07005421 assert(len >= 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005422
5423 /* Use the PyBytes API to decode the string, since that is what is used
5424 to encode, and then coerce the result to Unicode. */
5425 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005426 if (bytes == NULL)
5427 return -1;
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08005428
5429 /* Leave the Python 2.x strings as bytes if the *encoding* given to the
5430 Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5431 if (strcmp(self->encoding, "bytes") == 0) {
5432 obj = bytes;
5433 }
5434 else {
5435 obj = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
5436 Py_DECREF(bytes);
5437 if (obj == NULL) {
5438 return -1;
5439 }
5440 }
5441
5442 PDATA_PUSH(self->stack, obj, -1);
5443 return 0;
5444}
5445
5446static int
5447load_counted_binstring(UnpicklerObject *self, int nbytes)
5448{
5449 PyObject *obj;
5450 Py_ssize_t size;
5451 char *s;
5452
5453 if (_Unpickler_Read(self, &s, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005454 return -1;
5455
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08005456 size = calc_binsize(s, nbytes);
5457 if (size < 0) {
5458 PickleState *st = _Pickle_GetGlobalState();
5459 PyErr_Format(st->UnpicklingError,
5460 "BINSTRING exceeds system's maximum size of %zd bytes",
5461 PY_SSIZE_T_MAX);
5462 return -1;
5463 }
5464
5465 if (_Unpickler_Read(self, &s, size) < 0)
5466 return -1;
5467
5468 /* Convert Python 2.x strings to bytes if the *encoding* given to the
5469 Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5470 if (strcmp(self->encoding, "bytes") == 0) {
5471 obj = PyBytes_FromStringAndSize(s, size);
5472 }
5473 else {
5474 obj = PyUnicode_Decode(s, size, self->encoding, self->errors);
5475 }
5476 if (obj == NULL) {
5477 return -1;
5478 }
5479
5480 PDATA_PUSH(self->stack, obj, -1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005481 return 0;
5482}
5483
5484static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005485load_counted_binbytes(UnpicklerObject *self, int nbytes)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005486{
5487 PyObject *bytes;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005488 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005489 char *s;
5490
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005491 if (_Unpickler_Read(self, &s, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005492 return -1;
5493
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005494 size = calc_binsize(s, nbytes);
5495 if (size < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005496 PyErr_Format(PyExc_OverflowError,
5497 "BINBYTES exceeds system's maximum size of %zd bytes",
Alexandre Vassalotticc757172013-04-14 02:25:10 -07005498 PY_SSIZE_T_MAX);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005499 return -1;
5500 }
5501
Antoine Pitrou91f43802019-05-26 17:10:09 +02005502 bytes = PyBytes_FromStringAndSize(NULL, size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005503 if (bytes == NULL)
5504 return -1;
Antoine Pitrou91f43802019-05-26 17:10:09 +02005505 if (_Unpickler_ReadInto(self, PyBytes_AS_STRING(bytes), size) < 0) {
5506 Py_DECREF(bytes);
5507 return -1;
5508 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005509
5510 PDATA_PUSH(self->stack, bytes, -1);
5511 return 0;
5512}
5513
5514static int
Antoine Pitrou91f43802019-05-26 17:10:09 +02005515load_counted_bytearray(UnpicklerObject *self)
5516{
5517 PyObject *bytearray;
5518 Py_ssize_t size;
5519 char *s;
5520
5521 if (_Unpickler_Read(self, &s, 8) < 0) {
5522 return -1;
5523 }
5524
5525 size = calc_binsize(s, 8);
5526 if (size < 0) {
5527 PyErr_Format(PyExc_OverflowError,
5528 "BYTEARRAY8 exceeds system's maximum size of %zd bytes",
5529 PY_SSIZE_T_MAX);
5530 return -1;
5531 }
5532
5533 bytearray = PyByteArray_FromStringAndSize(NULL, size);
5534 if (bytearray == NULL) {
5535 return -1;
5536 }
5537 if (_Unpickler_ReadInto(self, PyByteArray_AS_STRING(bytearray), size) < 0) {
5538 Py_DECREF(bytearray);
5539 return -1;
5540 }
5541
5542 PDATA_PUSH(self->stack, bytearray, -1);
5543 return 0;
5544}
5545
5546static int
5547load_next_buffer(UnpicklerObject *self)
5548{
5549 if (self->buffers == NULL) {
5550 PickleState *st = _Pickle_GetGlobalState();
5551 PyErr_SetString(st->UnpicklingError,
5552 "pickle stream refers to out-of-band data "
5553 "but no *buffers* argument was given");
5554 return -1;
5555 }
5556 PyObject *buf = PyIter_Next(self->buffers);
5557 if (buf == NULL) {
5558 if (!PyErr_Occurred()) {
5559 PickleState *st = _Pickle_GetGlobalState();
5560 PyErr_SetString(st->UnpicklingError,
5561 "not enough out-of-band buffers");
5562 }
5563 return -1;
5564 }
5565
5566 PDATA_PUSH(self->stack, buf, -1);
5567 return 0;
5568}
5569
5570static int
5571load_readonly_buffer(UnpicklerObject *self)
5572{
5573 Py_ssize_t len = Py_SIZE(self->stack);
5574 if (len <= self->stack->fence) {
5575 return Pdata_stack_underflow(self->stack);
5576 }
5577
5578 PyObject *obj = self->stack->data[len - 1];
5579 PyObject *view = PyMemoryView_FromObject(obj);
5580 if (view == NULL) {
5581 return -1;
5582 }
5583 if (!PyMemoryView_GET_BUFFER(view)->readonly) {
5584 /* Original object is writable */
5585 PyMemoryView_GET_BUFFER(view)->readonly = 1;
5586 self->stack->data[len - 1] = view;
5587 Py_DECREF(obj);
5588 }
5589 else {
5590 /* Original object is read-only, no need to replace it */
5591 Py_DECREF(view);
5592 }
5593 return 0;
5594}
5595
5596static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005597load_unicode(UnpicklerObject *self)
5598{
5599 PyObject *str;
5600 Py_ssize_t len;
Victor Stinnerb110dad2016-12-09 17:06:43 +01005601 char *s = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005602
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005603 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005604 return -1;
5605 if (len < 1)
5606 return bad_readline();
5607
5608 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
5609 if (str == NULL)
5610 return -1;
5611
5612 PDATA_PUSH(self->stack, str, -1);
5613 return 0;
5614}
5615
5616static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005617load_counted_binunicode(UnpicklerObject *self, int nbytes)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005618{
5619 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005620 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005621 char *s;
5622
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005623 if (_Unpickler_Read(self, &s, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005624 return -1;
5625
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005626 size = calc_binsize(s, nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005627 if (size < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005628 PyErr_Format(PyExc_OverflowError,
5629 "BINUNICODE exceeds system's maximum size of %zd bytes",
Alexandre Vassalotticc757172013-04-14 02:25:10 -07005630 PY_SSIZE_T_MAX);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005631 return -1;
5632 }
5633
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005634 if (_Unpickler_Read(self, &s, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005635 return -1;
5636
Victor Stinner485fb562010-04-13 11:07:24 +00005637 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005638 if (str == NULL)
5639 return -1;
5640
5641 PDATA_PUSH(self->stack, str, -1);
5642 return 0;
5643}
5644
5645static int
Victor Stinner21b47112016-03-14 18:09:39 +01005646load_counted_tuple(UnpicklerObject *self, Py_ssize_t len)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005647{
5648 PyObject *tuple;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005649
Serhiy Storchakaa49de6b2015-11-25 15:01:53 +02005650 if (Py_SIZE(self->stack) < len)
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005651 return Pdata_stack_underflow(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005652
Serhiy Storchakaa49de6b2015-11-25 15:01:53 +02005653 tuple = Pdata_poptuple(self->stack, Py_SIZE(self->stack) - len);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005654 if (tuple == NULL)
5655 return -1;
5656 PDATA_PUSH(self->stack, tuple, -1);
5657 return 0;
5658}
5659
5660static int
Serhiy Storchakaa49de6b2015-11-25 15:01:53 +02005661load_tuple(UnpicklerObject *self)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005662{
Serhiy Storchakaa49de6b2015-11-25 15:01:53 +02005663 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005664
Serhiy Storchakaa49de6b2015-11-25 15:01:53 +02005665 if ((i = marker(self)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005666 return -1;
5667
Serhiy Storchakaa49de6b2015-11-25 15:01:53 +02005668 return load_counted_tuple(self, Py_SIZE(self->stack) - i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005669}
5670
5671static int
5672load_empty_list(UnpicklerObject *self)
5673{
5674 PyObject *list;
5675
5676 if ((list = PyList_New(0)) == NULL)
5677 return -1;
5678 PDATA_PUSH(self->stack, list, -1);
5679 return 0;
5680}
5681
5682static int
5683load_empty_dict(UnpicklerObject *self)
5684{
5685 PyObject *dict;
5686
5687 if ((dict = PyDict_New()) == NULL)
5688 return -1;
5689 PDATA_PUSH(self->stack, dict, -1);
5690 return 0;
5691}
5692
5693static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005694load_empty_set(UnpicklerObject *self)
5695{
5696 PyObject *set;
5697
5698 if ((set = PySet_New(NULL)) == NULL)
5699 return -1;
5700 PDATA_PUSH(self->stack, set, -1);
5701 return 0;
5702}
5703
5704static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005705load_list(UnpicklerObject *self)
5706{
5707 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005708 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005709
5710 if ((i = marker(self)) < 0)
5711 return -1;
5712
5713 list = Pdata_poplist(self->stack, i);
5714 if (list == NULL)
5715 return -1;
5716 PDATA_PUSH(self->stack, list, -1);
5717 return 0;
5718}
5719
5720static int
5721load_dict(UnpicklerObject *self)
5722{
5723 PyObject *dict, *key, *value;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005724 Py_ssize_t i, j, k;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005725
5726 if ((i = marker(self)) < 0)
5727 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005728 j = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005729
5730 if ((dict = PyDict_New()) == NULL)
5731 return -1;
5732
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005733 if ((j - i) % 2 != 0) {
5734 PickleState *st = _Pickle_GetGlobalState();
5735 PyErr_SetString(st->UnpicklingError, "odd number of items for DICT");
Serhiy Storchaka3ac53802015-12-07 11:32:00 +02005736 Py_DECREF(dict);
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005737 return -1;
5738 }
5739
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005740 for (k = i + 1; k < j; k += 2) {
5741 key = self->stack->data[k - 1];
5742 value = self->stack->data[k];
5743 if (PyDict_SetItem(dict, key, value) < 0) {
5744 Py_DECREF(dict);
5745 return -1;
5746 }
5747 }
5748 Pdata_clear(self->stack, i);
5749 PDATA_PUSH(self->stack, dict, -1);
5750 return 0;
5751}
5752
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005753static int
5754load_frozenset(UnpicklerObject *self)
5755{
5756 PyObject *items;
5757 PyObject *frozenset;
5758 Py_ssize_t i;
5759
5760 if ((i = marker(self)) < 0)
5761 return -1;
5762
5763 items = Pdata_poptuple(self->stack, i);
5764 if (items == NULL)
5765 return -1;
5766
5767 frozenset = PyFrozenSet_New(items);
5768 Py_DECREF(items);
5769 if (frozenset == NULL)
5770 return -1;
5771
5772 PDATA_PUSH(self->stack, frozenset, -1);
5773 return 0;
5774}
5775
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005776static PyObject *
5777instantiate(PyObject *cls, PyObject *args)
5778{
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005779 /* Caller must assure args are a tuple. Normally, args come from
5780 Pdata_poptuple which packs objects from the top of the stack
5781 into a newly created tuple. */
5782 assert(PyTuple_Check(args));
Serhiy Storchaka04e36af2017-10-22 21:31:34 +03005783 if (!PyTuple_GET_SIZE(args) && PyType_Check(cls)) {
5784 _Py_IDENTIFIER(__getinitargs__);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005785 _Py_IDENTIFIER(__new__);
Serhiy Storchakaf320be72018-01-25 10:49:40 +02005786 PyObject *func;
5787 if (_PyObject_LookupAttrId(cls, &PyId___getinitargs__, &func) < 0) {
5788 return NULL;
5789 }
Serhiy Storchaka04e36af2017-10-22 21:31:34 +03005790 if (func == NULL) {
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02005791 return _PyObject_CallMethodIdOneArg(cls, &PyId___new__, cls);
Serhiy Storchaka04e36af2017-10-22 21:31:34 +03005792 }
5793 Py_DECREF(func);
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005794 }
Serhiy Storchaka04e36af2017-10-22 21:31:34 +03005795 return PyObject_CallObject(cls, args);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005796}
5797
5798static int
5799load_obj(UnpicklerObject *self)
5800{
5801 PyObject *cls, *args, *obj = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005802 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005803
5804 if ((i = marker(self)) < 0)
5805 return -1;
5806
Serhiy Storchakae9b30742015-11-23 15:17:43 +02005807 if (Py_SIZE(self->stack) - i < 1)
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005808 return Pdata_stack_underflow(self->stack);
Serhiy Storchakae9b30742015-11-23 15:17:43 +02005809
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005810 args = Pdata_poptuple(self->stack, i + 1);
5811 if (args == NULL)
5812 return -1;
5813
5814 PDATA_POP(self->stack, cls);
5815 if (cls) {
5816 obj = instantiate(cls, args);
5817 Py_DECREF(cls);
5818 }
5819 Py_DECREF(args);
5820 if (obj == NULL)
5821 return -1;
5822
5823 PDATA_PUSH(self->stack, obj, -1);
5824 return 0;
5825}
5826
5827static int
5828load_inst(UnpicklerObject *self)
5829{
5830 PyObject *cls = NULL;
5831 PyObject *args = NULL;
5832 PyObject *obj = NULL;
5833 PyObject *module_name;
5834 PyObject *class_name;
5835 Py_ssize_t len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005836 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005837 char *s;
5838
5839 if ((i = marker(self)) < 0)
5840 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005841 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005842 return -1;
5843 if (len < 2)
5844 return bad_readline();
5845
5846 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
5847 identifiers are permitted in Python 3.0, since the INST opcode is only
5848 supported by older protocols on Python 2.x. */
5849 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5850 if (module_name == NULL)
5851 return -1;
5852
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005853 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Serhiy Storchakaca28eba2015-12-01 00:18:23 +02005854 if (len < 2) {
5855 Py_DECREF(module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005856 return bad_readline();
Serhiy Storchakaca28eba2015-12-01 00:18:23 +02005857 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005858 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005859 if (class_name != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005860 cls = find_class(self, module_name, class_name);
5861 Py_DECREF(class_name);
5862 }
5863 }
5864 Py_DECREF(module_name);
5865
5866 if (cls == NULL)
5867 return -1;
5868
5869 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
5870 obj = instantiate(cls, args);
5871 Py_DECREF(args);
5872 }
5873 Py_DECREF(cls);
5874
5875 if (obj == NULL)
5876 return -1;
5877
5878 PDATA_PUSH(self->stack, obj, -1);
5879 return 0;
5880}
5881
5882static int
5883load_newobj(UnpicklerObject *self)
5884{
5885 PyObject *args = NULL;
5886 PyObject *clsraw = NULL;
5887 PyTypeObject *cls; /* clsraw cast to its true type */
5888 PyObject *obj;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005889 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005890
5891 /* Stack is ... cls argtuple, and we want to call
5892 * cls.__new__(cls, *argtuple).
5893 */
5894 PDATA_POP(self->stack, args);
5895 if (args == NULL)
5896 goto error;
5897 if (!PyTuple_Check(args)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005898 PyErr_SetString(st->UnpicklingError,
5899 "NEWOBJ expected an arg " "tuple.");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005900 goto error;
5901 }
5902
5903 PDATA_POP(self->stack, clsraw);
5904 cls = (PyTypeObject *)clsraw;
5905 if (cls == NULL)
5906 goto error;
5907 if (!PyType_Check(cls)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005908 PyErr_SetString(st->UnpicklingError, "NEWOBJ class argument "
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005909 "isn't a type object");
5910 goto error;
5911 }
5912 if (cls->tp_new == NULL) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005913 PyErr_SetString(st->UnpicklingError, "NEWOBJ class argument "
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005914 "has NULL tp_new");
5915 goto error;
5916 }
5917
5918 /* Call __new__. */
5919 obj = cls->tp_new(cls, args, NULL);
5920 if (obj == NULL)
5921 goto error;
5922
5923 Py_DECREF(args);
5924 Py_DECREF(clsraw);
5925 PDATA_PUSH(self->stack, obj, -1);
5926 return 0;
5927
5928 error:
5929 Py_XDECREF(args);
5930 Py_XDECREF(clsraw);
5931 return -1;
5932}
5933
5934static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005935load_newobj_ex(UnpicklerObject *self)
5936{
5937 PyObject *cls, *args, *kwargs;
5938 PyObject *obj;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005939 PickleState *st = _Pickle_GetGlobalState();
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005940
5941 PDATA_POP(self->stack, kwargs);
5942 if (kwargs == NULL) {
5943 return -1;
5944 }
5945 PDATA_POP(self->stack, args);
5946 if (args == NULL) {
5947 Py_DECREF(kwargs);
5948 return -1;
5949 }
5950 PDATA_POP(self->stack, cls);
5951 if (cls == NULL) {
5952 Py_DECREF(kwargs);
5953 Py_DECREF(args);
5954 return -1;
5955 }
Larry Hastings61272b72014-01-07 12:41:53 -08005956
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005957 if (!PyType_Check(cls)) {
5958 Py_DECREF(kwargs);
5959 Py_DECREF(args);
Larry Hastings61272b72014-01-07 12:41:53 -08005960 PyErr_Format(st->UnpicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005961 "NEWOBJ_EX class argument must be a type, not %.200s",
5962 Py_TYPE(cls)->tp_name);
Benjamin Peterson80f78a32015-07-02 16:18:38 -05005963 Py_DECREF(cls);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005964 return -1;
5965 }
5966
5967 if (((PyTypeObject *)cls)->tp_new == NULL) {
5968 Py_DECREF(kwargs);
5969 Py_DECREF(args);
5970 Py_DECREF(cls);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005971 PyErr_SetString(st->UnpicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005972 "NEWOBJ_EX class argument doesn't have __new__");
5973 return -1;
5974 }
5975 obj = ((PyTypeObject *)cls)->tp_new((PyTypeObject *)cls, args, kwargs);
5976 Py_DECREF(kwargs);
5977 Py_DECREF(args);
5978 Py_DECREF(cls);
5979 if (obj == NULL) {
5980 return -1;
5981 }
5982 PDATA_PUSH(self->stack, obj, -1);
5983 return 0;
5984}
5985
5986static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005987load_global(UnpicklerObject *self)
5988{
5989 PyObject *global = NULL;
5990 PyObject *module_name;
5991 PyObject *global_name;
5992 Py_ssize_t len;
5993 char *s;
5994
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005995 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005996 return -1;
5997 if (len < 2)
5998 return bad_readline();
5999 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
6000 if (!module_name)
6001 return -1;
6002
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006003 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006004 if (len < 2) {
6005 Py_DECREF(module_name);
6006 return bad_readline();
6007 }
6008 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
6009 if (global_name) {
6010 global = find_class(self, module_name, global_name);
6011 Py_DECREF(global_name);
6012 }
6013 }
6014 Py_DECREF(module_name);
6015
6016 if (global == NULL)
6017 return -1;
6018 PDATA_PUSH(self->stack, global, -1);
6019 return 0;
6020}
6021
6022static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006023load_stack_global(UnpicklerObject *self)
6024{
6025 PyObject *global;
6026 PyObject *module_name;
6027 PyObject *global_name;
6028
6029 PDATA_POP(self->stack, global_name);
6030 PDATA_POP(self->stack, module_name);
6031 if (module_name == NULL || !PyUnicode_CheckExact(module_name) ||
6032 global_name == NULL || !PyUnicode_CheckExact(global_name)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006033 PickleState *st = _Pickle_GetGlobalState();
6034 PyErr_SetString(st->UnpicklingError, "STACK_GLOBAL requires str");
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006035 Py_XDECREF(global_name);
6036 Py_XDECREF(module_name);
6037 return -1;
6038 }
6039 global = find_class(self, module_name, global_name);
6040 Py_DECREF(global_name);
6041 Py_DECREF(module_name);
6042 if (global == NULL)
6043 return -1;
6044 PDATA_PUSH(self->stack, global, -1);
6045 return 0;
6046}
6047
6048static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006049load_persid(UnpicklerObject *self)
6050{
Serhiy Storchaka986375e2017-11-30 22:48:31 +02006051 PyObject *pid, *obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006052 Py_ssize_t len;
6053 char *s;
6054
6055 if (self->pers_func) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006056 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006057 return -1;
Alexandre Vassalotti896414f2013-11-30 13:52:35 -08006058 if (len < 1)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006059 return bad_readline();
6060
Serhiy Storchakadec25af2016-07-17 11:24:17 +03006061 pid = PyUnicode_DecodeASCII(s, len - 1, "strict");
6062 if (pid == NULL) {
6063 if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
6064 PyErr_SetString(_Pickle_GetGlobalState()->UnpicklingError,
6065 "persistent IDs in protocol 0 must be "
6066 "ASCII strings");
6067 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006068 return -1;
Serhiy Storchakadec25af2016-07-17 11:24:17 +03006069 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006070
Serhiy Storchaka986375e2017-11-30 22:48:31 +02006071 obj = call_method(self->pers_func, self->pers_func_self, pid);
6072 Py_DECREF(pid);
6073 if (obj == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006074 return -1;
6075
Serhiy Storchaka986375e2017-11-30 22:48:31 +02006076 PDATA_PUSH(self->stack, obj, -1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006077 return 0;
6078 }
6079 else {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006080 PickleState *st = _Pickle_GetGlobalState();
6081 PyErr_SetString(st->UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006082 "A load persistent id instruction was encountered,\n"
6083 "but no persistent_load function was specified.");
6084 return -1;
6085 }
6086}
6087
6088static int
6089load_binpersid(UnpicklerObject *self)
6090{
Serhiy Storchaka986375e2017-11-30 22:48:31 +02006091 PyObject *pid, *obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006092
6093 if (self->pers_func) {
6094 PDATA_POP(self->stack, pid);
6095 if (pid == NULL)
6096 return -1;
6097
Serhiy Storchaka986375e2017-11-30 22:48:31 +02006098 obj = call_method(self->pers_func, self->pers_func_self, pid);
6099 Py_DECREF(pid);
6100 if (obj == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006101 return -1;
6102
Serhiy Storchaka986375e2017-11-30 22:48:31 +02006103 PDATA_PUSH(self->stack, obj, -1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006104 return 0;
6105 }
6106 else {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006107 PickleState *st = _Pickle_GetGlobalState();
6108 PyErr_SetString(st->UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006109 "A load persistent id instruction was encountered,\n"
6110 "but no persistent_load function was specified.");
6111 return -1;
6112 }
6113}
6114
6115static int
6116load_pop(UnpicklerObject *self)
6117{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006118 Py_ssize_t len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006119
6120 /* Note that we split the (pickle.py) stack into two stacks,
6121 * an object stack and a mark stack. We have to be clever and
6122 * pop the right one. We do this by looking at the top of the
Collin Winter8ca69de2009-05-26 16:53:41 +00006123 * mark stack first, and only signalling a stack underflow if
6124 * the object stack is empty and the mark stack doesn't match
6125 * our expectations.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006126 */
Collin Winter8ca69de2009-05-26 16:53:41 +00006127 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006128 self->num_marks--;
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006129 self->stack->mark_set = self->num_marks != 0;
6130 self->stack->fence = self->num_marks ?
6131 self->marks[self->num_marks - 1] : 0;
6132 } else if (len <= self->stack->fence)
6133 return Pdata_stack_underflow(self->stack);
6134 else {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006135 len--;
6136 Py_DECREF(self->stack->data[len]);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006137 Py_SIZE(self->stack) = len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006138 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006139 return 0;
6140}
6141
6142static int
6143load_pop_mark(UnpicklerObject *self)
6144{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006145 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006146
6147 if ((i = marker(self)) < 0)
6148 return -1;
6149
6150 Pdata_clear(self->stack, i);
6151
6152 return 0;
6153}
6154
6155static int
6156load_dup(UnpicklerObject *self)
6157{
6158 PyObject *last;
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006159 Py_ssize_t len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006160
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006161 if (len <= self->stack->fence)
6162 return Pdata_stack_underflow(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006163 last = self->stack->data[len - 1];
6164 PDATA_APPEND(self->stack, last, -1);
6165 return 0;
6166}
6167
6168static int
6169load_get(UnpicklerObject *self)
6170{
6171 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006172 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006173 Py_ssize_t len;
6174 char *s;
6175
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006176 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006177 return -1;
6178 if (len < 2)
6179 return bad_readline();
6180
6181 key = PyLong_FromString(s, NULL, 10);
6182 if (key == NULL)
6183 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006184 idx = PyLong_AsSsize_t(key);
6185 if (idx == -1 && PyErr_Occurred()) {
6186 Py_DECREF(key);
6187 return -1;
6188 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006189
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006190 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006191 if (value == NULL) {
Claudiu Popa6f03b232019-11-24 20:15:08 +01006192 if (!PyErr_Occurred()) {
6193 PickleState *st = _Pickle_GetGlobalState();
6194 PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
6195 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006196 Py_DECREF(key);
6197 return -1;
6198 }
6199 Py_DECREF(key);
6200
6201 PDATA_APPEND(self->stack, value, -1);
6202 return 0;
6203}
6204
6205static int
6206load_binget(UnpicklerObject *self)
6207{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006208 PyObject *value;
6209 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006210 char *s;
6211
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006212 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006213 return -1;
6214
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006215 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006216
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006217 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006218 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006219 PyObject *key = PyLong_FromSsize_t(idx);
Christian Heimes9ee5c372013-07-26 22:45:00 +02006220 if (key != NULL) {
Claudiu Popa6f03b232019-11-24 20:15:08 +01006221 PickleState *st = _Pickle_GetGlobalState();
6222 PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
Christian Heimes9ee5c372013-07-26 22:45:00 +02006223 Py_DECREF(key);
6224 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006225 return -1;
6226 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006227
6228 PDATA_APPEND(self->stack, value, -1);
6229 return 0;
6230}
6231
6232static int
6233load_long_binget(UnpicklerObject *self)
6234{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006235 PyObject *value;
6236 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006237 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006238
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006239 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006240 return -1;
6241
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006242 idx = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006243
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006244 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006245 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006246 PyObject *key = PyLong_FromSsize_t(idx);
Christian Heimes9ee5c372013-07-26 22:45:00 +02006247 if (key != NULL) {
Claudiu Popa6f03b232019-11-24 20:15:08 +01006248 PickleState *st = _Pickle_GetGlobalState();
6249 PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
Christian Heimes9ee5c372013-07-26 22:45:00 +02006250 Py_DECREF(key);
6251 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006252 return -1;
6253 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006254
6255 PDATA_APPEND(self->stack, value, -1);
6256 return 0;
6257}
6258
6259/* Push an object from the extension registry (EXT[124]). nbytes is
6260 * the number of bytes following the opcode, holding the index (code) value.
6261 */
6262static int
6263load_extension(UnpicklerObject *self, int nbytes)
6264{
6265 char *codebytes; /* the nbytes bytes after the opcode */
6266 long code; /* calc_binint returns long */
6267 PyObject *py_code; /* code as a Python int */
6268 PyObject *obj; /* the object to push */
6269 PyObject *pair; /* (module_name, class_name) */
6270 PyObject *module_name, *class_name;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006271 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006272
6273 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006274 if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006275 return -1;
6276 code = calc_binint(codebytes, nbytes);
6277 if (code <= 0) { /* note that 0 is forbidden */
6278 /* Corrupt or hostile pickle. */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006279 PyErr_SetString(st->UnpicklingError, "EXT specifies code <= 0");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006280 return -1;
6281 }
6282
6283 /* Look for the code in the cache. */
6284 py_code = PyLong_FromLong(code);
6285 if (py_code == NULL)
6286 return -1;
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08006287 obj = PyDict_GetItemWithError(st->extension_cache, py_code);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006288 if (obj != NULL) {
6289 /* Bingo. */
6290 Py_DECREF(py_code);
6291 PDATA_APPEND(self->stack, obj, -1);
6292 return 0;
6293 }
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08006294 if (PyErr_Occurred()) {
6295 Py_DECREF(py_code);
6296 return -1;
6297 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006298
6299 /* Look up the (module_name, class_name) pair. */
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08006300 pair = PyDict_GetItemWithError(st->inverted_registry, py_code);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006301 if (pair == NULL) {
6302 Py_DECREF(py_code);
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08006303 if (!PyErr_Occurred()) {
6304 PyErr_Format(PyExc_ValueError, "unregistered extension "
6305 "code %ld", code);
6306 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006307 return -1;
6308 }
6309 /* Since the extension registry is manipulable via Python code,
6310 * confirm that pair is really a 2-tuple of strings.
6311 */
Victor Stinnerb37672d2018-11-22 03:37:50 +01006312 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2) {
6313 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006314 }
Victor Stinnerb37672d2018-11-22 03:37:50 +01006315
6316 module_name = PyTuple_GET_ITEM(pair, 0);
6317 if (!PyUnicode_Check(module_name)) {
6318 goto error;
6319 }
6320
6321 class_name = PyTuple_GET_ITEM(pair, 1);
6322 if (!PyUnicode_Check(class_name)) {
6323 goto error;
6324 }
6325
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006326 /* Load the object. */
6327 obj = find_class(self, module_name, class_name);
6328 if (obj == NULL) {
6329 Py_DECREF(py_code);
6330 return -1;
6331 }
6332 /* Cache code -> obj. */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006333 code = PyDict_SetItem(st->extension_cache, py_code, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006334 Py_DECREF(py_code);
6335 if (code < 0) {
6336 Py_DECREF(obj);
6337 return -1;
6338 }
6339 PDATA_PUSH(self->stack, obj, -1);
6340 return 0;
Victor Stinnerb37672d2018-11-22 03:37:50 +01006341
6342error:
6343 Py_DECREF(py_code);
6344 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
6345 "isn't a 2-tuple of strings", code);
6346 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006347}
6348
6349static int
6350load_put(UnpicklerObject *self)
6351{
6352 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006353 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006354 Py_ssize_t len;
Victor Stinnerb110dad2016-12-09 17:06:43 +01006355 char *s = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006356
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006357 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006358 return -1;
6359 if (len < 2)
6360 return bad_readline();
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006361 if (Py_SIZE(self->stack) <= self->stack->fence)
6362 return Pdata_stack_underflow(self->stack);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006363 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006364
6365 key = PyLong_FromString(s, NULL, 10);
6366 if (key == NULL)
6367 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006368 idx = PyLong_AsSsize_t(key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006369 Py_DECREF(key);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02006370 if (idx < 0) {
6371 if (!PyErr_Occurred())
6372 PyErr_SetString(PyExc_ValueError,
6373 "negative PUT argument");
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006374 return -1;
Antoine Pitrou55549ec2011-08-30 00:27:10 +02006375 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006376
6377 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006378}
6379
6380static int
6381load_binput(UnpicklerObject *self)
6382{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006383 PyObject *value;
6384 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006385 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006386
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006387 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006388 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006389
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006390 if (Py_SIZE(self->stack) <= self->stack->fence)
6391 return Pdata_stack_underflow(self->stack);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006392 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006393
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006394 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006395
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006396 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006397}
6398
6399static int
6400load_long_binput(UnpicklerObject *self)
6401{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006402 PyObject *value;
6403 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006404 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006405
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006406 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006407 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006408
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006409 if (Py_SIZE(self->stack) <= self->stack->fence)
6410 return Pdata_stack_underflow(self->stack);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006411 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006412
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006413 idx = calc_binsize(s, 4);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02006414 if (idx < 0) {
6415 PyErr_SetString(PyExc_ValueError,
6416 "negative LONG_BINPUT argument");
6417 return -1;
6418 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006419
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006420 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006421}
6422
6423static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006424load_memoize(UnpicklerObject *self)
6425{
6426 PyObject *value;
6427
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006428 if (Py_SIZE(self->stack) <= self->stack->fence)
6429 return Pdata_stack_underflow(self->stack);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006430 value = self->stack->data[Py_SIZE(self->stack) - 1];
6431
6432 return _Unpickler_MemoPut(self, self->memo_len, value);
6433}
6434
6435static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006436do_append(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006437{
6438 PyObject *value;
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006439 PyObject *slice;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006440 PyObject *list;
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006441 PyObject *result;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006442 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006443
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006444 len = Py_SIZE(self->stack);
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006445 if (x > len || x <= self->stack->fence)
6446 return Pdata_stack_underflow(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006447 if (len == x) /* nothing to do */
6448 return 0;
6449
6450 list = self->stack->data[x - 1];
6451
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006452 if (PyList_CheckExact(list)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006453 Py_ssize_t list_len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006454 int ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006455
6456 slice = Pdata_poplist(self->stack, x);
6457 if (!slice)
6458 return -1;
6459 list_len = PyList_GET_SIZE(list);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006460 ret = PyList_SetSlice(list, list_len, list_len, slice);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006461 Py_DECREF(slice);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006462 return ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006463 }
6464 else {
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006465 PyObject *extend_func;
6466 _Py_IDENTIFIER(extend);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006467
Serhiy Storchaka41c57b32019-09-01 12:03:39 +03006468 if (_PyObject_LookupAttrId(list, &PyId_extend, &extend_func) < 0) {
6469 return -1;
6470 }
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006471 if (extend_func != NULL) {
6472 slice = Pdata_poplist(self->stack, x);
6473 if (!slice) {
6474 Py_DECREF(extend_func);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006475 return -1;
6476 }
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006477 result = _Pickle_FastCall(extend_func, slice);
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006478 Py_DECREF(extend_func);
6479 if (result == NULL)
6480 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006481 Py_DECREF(result);
6482 }
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006483 else {
6484 PyObject *append_func;
6485 _Py_IDENTIFIER(append);
6486
6487 /* Even if the PEP 307 requires extend() and append() methods,
6488 fall back on append() if the object has no extend() method
6489 for backward compatibility. */
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006490 append_func = _PyObject_GetAttrId(list, &PyId_append);
6491 if (append_func == NULL)
6492 return -1;
6493 for (i = x; i < len; i++) {
6494 value = self->stack->data[i];
6495 result = _Pickle_FastCall(append_func, value);
6496 if (result == NULL) {
6497 Pdata_clear(self->stack, i + 1);
6498 Py_SIZE(self->stack) = x;
6499 Py_DECREF(append_func);
6500 return -1;
6501 }
6502 Py_DECREF(result);
6503 }
6504 Py_SIZE(self->stack) = x;
6505 Py_DECREF(append_func);
6506 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006507 }
6508
6509 return 0;
6510}
6511
6512static int
6513load_append(UnpicklerObject *self)
6514{
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006515 if (Py_SIZE(self->stack) - 1 <= self->stack->fence)
6516 return Pdata_stack_underflow(self->stack);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006517 return do_append(self, Py_SIZE(self->stack) - 1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006518}
6519
6520static int
6521load_appends(UnpicklerObject *self)
6522{
Serhiy Storchakae9b30742015-11-23 15:17:43 +02006523 Py_ssize_t i = marker(self);
6524 if (i < 0)
6525 return -1;
6526 return do_append(self, i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006527}
6528
6529static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006530do_setitems(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006531{
6532 PyObject *value, *key;
6533 PyObject *dict;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006534 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006535 int status = 0;
6536
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006537 len = Py_SIZE(self->stack);
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006538 if (x > len || x <= self->stack->fence)
6539 return Pdata_stack_underflow(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006540 if (len == x) /* nothing to do */
6541 return 0;
Victor Stinner121aab42011-09-29 23:40:53 +02006542 if ((len - x) % 2 != 0) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006543 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006544 /* Currupt or hostile pickle -- we never write one like this. */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006545 PyErr_SetString(st->UnpicklingError,
6546 "odd number of items for SETITEMS");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006547 return -1;
6548 }
6549
6550 /* Here, dict does not actually need to be a PyDict; it could be anything
6551 that supports the __setitem__ attribute. */
6552 dict = self->stack->data[x - 1];
6553
6554 for (i = x + 1; i < len; i += 2) {
6555 key = self->stack->data[i - 1];
6556 value = self->stack->data[i];
6557 if (PyObject_SetItem(dict, key, value) < 0) {
6558 status = -1;
6559 break;
6560 }
6561 }
6562
6563 Pdata_clear(self->stack, x);
6564 return status;
6565}
6566
6567static int
6568load_setitem(UnpicklerObject *self)
6569{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006570 return do_setitems(self, Py_SIZE(self->stack) - 2);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006571}
6572
6573static int
6574load_setitems(UnpicklerObject *self)
6575{
Serhiy Storchakae9b30742015-11-23 15:17:43 +02006576 Py_ssize_t i = marker(self);
6577 if (i < 0)
6578 return -1;
6579 return do_setitems(self, i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006580}
6581
6582static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006583load_additems(UnpicklerObject *self)
6584{
6585 PyObject *set;
6586 Py_ssize_t mark, len, i;
6587
6588 mark = marker(self);
Serhiy Storchakae9b30742015-11-23 15:17:43 +02006589 if (mark < 0)
6590 return -1;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006591 len = Py_SIZE(self->stack);
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006592 if (mark > len || mark <= self->stack->fence)
6593 return Pdata_stack_underflow(self->stack);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006594 if (len == mark) /* nothing to do */
6595 return 0;
6596
6597 set = self->stack->data[mark - 1];
6598
6599 if (PySet_Check(set)) {
6600 PyObject *items;
6601 int status;
6602
6603 items = Pdata_poptuple(self->stack, mark);
6604 if (items == NULL)
6605 return -1;
6606
6607 status = _PySet_Update(set, items);
6608 Py_DECREF(items);
6609 return status;
6610 }
6611 else {
6612 PyObject *add_func;
6613 _Py_IDENTIFIER(add);
6614
6615 add_func = _PyObject_GetAttrId(set, &PyId_add);
6616 if (add_func == NULL)
6617 return -1;
6618 for (i = mark; i < len; i++) {
6619 PyObject *result;
6620 PyObject *item;
6621
6622 item = self->stack->data[i];
Alexandre Vassalotti20c28c12013-11-27 02:26:54 -08006623 result = _Pickle_FastCall(add_func, item);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006624 if (result == NULL) {
6625 Pdata_clear(self->stack, i + 1);
6626 Py_SIZE(self->stack) = mark;
6627 return -1;
6628 }
6629 Py_DECREF(result);
6630 }
6631 Py_SIZE(self->stack) = mark;
6632 }
6633
6634 return 0;
6635}
6636
6637static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006638load_build(UnpicklerObject *self)
6639{
6640 PyObject *state, *inst, *slotstate;
6641 PyObject *setstate;
6642 int status = 0;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02006643 _Py_IDENTIFIER(__setstate__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006644
6645 /* Stack is ... instance, state. We want to leave instance at
6646 * the stack top, possibly mutated via instance.__setstate__(state).
6647 */
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006648 if (Py_SIZE(self->stack) - 2 < self->stack->fence)
6649 return Pdata_stack_underflow(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006650
6651 PDATA_POP(self->stack, state);
6652 if (state == NULL)
6653 return -1;
6654
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006655 inst = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006656
Serhiy Storchakaf320be72018-01-25 10:49:40 +02006657 if (_PyObject_LookupAttrId(inst, &PyId___setstate__, &setstate) < 0) {
6658 Py_DECREF(state);
6659 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006660 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +02006661 if (setstate != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006662 PyObject *result;
6663
6664 /* The explicit __setstate__ is responsible for everything. */
Alexandre Vassalotti20c28c12013-11-27 02:26:54 -08006665 result = _Pickle_FastCall(setstate, state);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006666 Py_DECREF(setstate);
6667 if (result == NULL)
6668 return -1;
6669 Py_DECREF(result);
6670 return 0;
6671 }
6672
6673 /* A default __setstate__. First see whether state embeds a
6674 * slot state dict too (a proto 2 addition).
6675 */
Serhiy Storchakafff9a312017-03-21 08:53:25 +02006676 if (PyTuple_Check(state) && PyTuple_GET_SIZE(state) == 2) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006677 PyObject *tmp = state;
6678
6679 state = PyTuple_GET_ITEM(tmp, 0);
6680 slotstate = PyTuple_GET_ITEM(tmp, 1);
6681 Py_INCREF(state);
6682 Py_INCREF(slotstate);
6683 Py_DECREF(tmp);
6684 }
6685 else
6686 slotstate = NULL;
6687
6688 /* Set inst.__dict__ from the state dict (if any). */
6689 if (state != Py_None) {
6690 PyObject *dict;
Antoine Pitroua9f48a02009-05-02 21:41:14 +00006691 PyObject *d_key, *d_value;
6692 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02006693 _Py_IDENTIFIER(__dict__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006694
6695 if (!PyDict_Check(state)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006696 PickleState *st = _Pickle_GetGlobalState();
6697 PyErr_SetString(st->UnpicklingError, "state is not a dictionary");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006698 goto error;
6699 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02006700 dict = _PyObject_GetAttrId(inst, &PyId___dict__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006701 if (dict == NULL)
6702 goto error;
6703
Antoine Pitroua9f48a02009-05-02 21:41:14 +00006704 i = 0;
6705 while (PyDict_Next(state, &i, &d_key, &d_value)) {
6706 /* normally the keys for instance attributes are
6707 interned. we should try to do that here. */
6708 Py_INCREF(d_key);
6709 if (PyUnicode_CheckExact(d_key))
6710 PyUnicode_InternInPlace(&d_key);
6711 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
6712 Py_DECREF(d_key);
6713 goto error;
6714 }
6715 Py_DECREF(d_key);
6716 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006717 Py_DECREF(dict);
6718 }
6719
6720 /* Also set instance attributes from the slotstate dict (if any). */
6721 if (slotstate != NULL) {
6722 PyObject *d_key, *d_value;
6723 Py_ssize_t i;
6724
6725 if (!PyDict_Check(slotstate)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006726 PickleState *st = _Pickle_GetGlobalState();
6727 PyErr_SetString(st->UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006728 "slot state is not a dictionary");
6729 goto error;
6730 }
6731 i = 0;
6732 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
6733 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
6734 goto error;
6735 }
6736 }
6737
6738 if (0) {
6739 error:
6740 status = -1;
6741 }
6742
6743 Py_DECREF(state);
6744 Py_XDECREF(slotstate);
6745 return status;
6746}
6747
6748static int
6749load_mark(UnpicklerObject *self)
6750{
6751
6752 /* Note that we split the (pickle.py) stack into two stacks, an
6753 * object stack and a mark stack. Here we push a mark onto the
6754 * mark stack.
6755 */
6756
Sergey Fedoseev86b89912018-08-25 12:54:40 +05006757 if (self->num_marks >= self->marks_size) {
Sergey Fedoseev90555ec2018-08-25 15:41:58 +05006758 size_t alloc = ((size_t)self->num_marks << 1) + 20;
6759 Py_ssize_t *marks_new = self->marks;
6760 PyMem_RESIZE(marks_new, Py_ssize_t, alloc);
6761 if (marks_new == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006762 PyErr_NoMemory();
6763 return -1;
6764 }
Sergey Fedoseev90555ec2018-08-25 15:41:58 +05006765 self->marks = marks_new;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006766 self->marks_size = (Py_ssize_t)alloc;
6767 }
6768
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006769 self->stack->mark_set = 1;
6770 self->marks[self->num_marks++] = self->stack->fence = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006771
6772 return 0;
6773}
6774
6775static int
6776load_reduce(UnpicklerObject *self)
6777{
6778 PyObject *callable = NULL;
6779 PyObject *argtup = NULL;
6780 PyObject *obj = NULL;
6781
6782 PDATA_POP(self->stack, argtup);
6783 if (argtup == NULL)
6784 return -1;
6785 PDATA_POP(self->stack, callable);
6786 if (callable) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00006787 obj = PyObject_CallObject(callable, argtup);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006788 Py_DECREF(callable);
6789 }
6790 Py_DECREF(argtup);
6791
6792 if (obj == NULL)
6793 return -1;
6794
6795 PDATA_PUSH(self->stack, obj, -1);
6796 return 0;
6797}
6798
6799/* Just raises an error if we don't know the protocol specified. PROTO
6800 * is the first opcode for protocols >= 2.
6801 */
6802static int
6803load_proto(UnpicklerObject *self)
6804{
6805 char *s;
6806 int i;
6807
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006808 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006809 return -1;
6810
6811 i = (unsigned char)s[0];
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006812 if (i <= HIGHEST_PROTOCOL) {
6813 self->proto = i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006814 return 0;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006815 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006816
6817 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
6818 return -1;
6819}
6820
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08006821static int
6822load_frame(UnpicklerObject *self)
6823{
6824 char *s;
6825 Py_ssize_t frame_len;
6826
6827 if (_Unpickler_Read(self, &s, 8) < 0)
6828 return -1;
6829
6830 frame_len = calc_binsize(s, 8);
6831 if (frame_len < 0) {
6832 PyErr_Format(PyExc_OverflowError,
6833 "FRAME length exceeds system's maximum of %zd bytes",
6834 PY_SSIZE_T_MAX);
6835 return -1;
6836 }
6837
6838 if (_Unpickler_Read(self, &s, frame_len) < 0)
6839 return -1;
6840
6841 /* Rewind to start of frame */
6842 self->next_read_idx -= frame_len;
6843 return 0;
6844}
6845
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006846static PyObject *
6847load(UnpicklerObject *self)
6848{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006849 PyObject *value = NULL;
Christian Heimes27ea78b2014-01-27 01:03:53 +01006850 char *s = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006851
6852 self->num_marks = 0;
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006853 self->stack->mark_set = 0;
6854 self->stack->fence = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006855 self->proto = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006856 if (Py_SIZE(self->stack))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006857 Pdata_clear(self->stack, 0);
6858
6859 /* Convenient macros for the dispatch while-switch loop just below. */
6860#define OP(opcode, load_func) \
6861 case opcode: if (load_func(self) < 0) break; continue;
6862
6863#define OP_ARG(opcode, load_func, arg) \
6864 case opcode: if (load_func(self, (arg)) < 0) break; continue;
6865
6866 while (1) {
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03006867 if (_Unpickler_Read(self, &s, 1) < 0) {
6868 PickleState *st = _Pickle_GetGlobalState();
6869 if (PyErr_ExceptionMatches(st->UnpicklingError)) {
6870 PyErr_Format(PyExc_EOFError, "Ran out of input");
6871 }
6872 return NULL;
6873 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006874
6875 switch ((enum opcode)s[0]) {
6876 OP(NONE, load_none)
6877 OP(BININT, load_binint)
6878 OP(BININT1, load_binint1)
6879 OP(BININT2, load_binint2)
6880 OP(INT, load_int)
6881 OP(LONG, load_long)
6882 OP_ARG(LONG1, load_counted_long, 1)
6883 OP_ARG(LONG4, load_counted_long, 4)
6884 OP(FLOAT, load_float)
6885 OP(BINFLOAT, load_binfloat)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006886 OP_ARG(SHORT_BINBYTES, load_counted_binbytes, 1)
6887 OP_ARG(BINBYTES, load_counted_binbytes, 4)
6888 OP_ARG(BINBYTES8, load_counted_binbytes, 8)
Antoine Pitrou91f43802019-05-26 17:10:09 +02006889 OP(BYTEARRAY8, load_counted_bytearray)
6890 OP(NEXT_BUFFER, load_next_buffer)
6891 OP(READONLY_BUFFER, load_readonly_buffer)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006892 OP_ARG(SHORT_BINSTRING, load_counted_binstring, 1)
6893 OP_ARG(BINSTRING, load_counted_binstring, 4)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006894 OP(STRING, load_string)
6895 OP(UNICODE, load_unicode)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006896 OP_ARG(SHORT_BINUNICODE, load_counted_binunicode, 1)
6897 OP_ARG(BINUNICODE, load_counted_binunicode, 4)
6898 OP_ARG(BINUNICODE8, load_counted_binunicode, 8)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006899 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
6900 OP_ARG(TUPLE1, load_counted_tuple, 1)
6901 OP_ARG(TUPLE2, load_counted_tuple, 2)
6902 OP_ARG(TUPLE3, load_counted_tuple, 3)
6903 OP(TUPLE, load_tuple)
6904 OP(EMPTY_LIST, load_empty_list)
6905 OP(LIST, load_list)
6906 OP(EMPTY_DICT, load_empty_dict)
6907 OP(DICT, load_dict)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006908 OP(EMPTY_SET, load_empty_set)
6909 OP(ADDITEMS, load_additems)
6910 OP(FROZENSET, load_frozenset)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006911 OP(OBJ, load_obj)
6912 OP(INST, load_inst)
6913 OP(NEWOBJ, load_newobj)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006914 OP(NEWOBJ_EX, load_newobj_ex)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006915 OP(GLOBAL, load_global)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006916 OP(STACK_GLOBAL, load_stack_global)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006917 OP(APPEND, load_append)
6918 OP(APPENDS, load_appends)
6919 OP(BUILD, load_build)
6920 OP(DUP, load_dup)
6921 OP(BINGET, load_binget)
6922 OP(LONG_BINGET, load_long_binget)
6923 OP(GET, load_get)
6924 OP(MARK, load_mark)
6925 OP(BINPUT, load_binput)
6926 OP(LONG_BINPUT, load_long_binput)
6927 OP(PUT, load_put)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006928 OP(MEMOIZE, load_memoize)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006929 OP(POP, load_pop)
6930 OP(POP_MARK, load_pop_mark)
6931 OP(SETITEM, load_setitem)
6932 OP(SETITEMS, load_setitems)
6933 OP(PERSID, load_persid)
6934 OP(BINPERSID, load_binpersid)
6935 OP(REDUCE, load_reduce)
6936 OP(PROTO, load_proto)
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08006937 OP(FRAME, load_frame)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006938 OP_ARG(EXT1, load_extension, 1)
6939 OP_ARG(EXT2, load_extension, 2)
6940 OP_ARG(EXT4, load_extension, 4)
6941 OP_ARG(NEWTRUE, load_bool, Py_True)
6942 OP_ARG(NEWFALSE, load_bool, Py_False)
6943
6944 case STOP:
6945 break;
6946
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006947 default:
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03006948 {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006949 PickleState *st = _Pickle_GetGlobalState();
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03006950 unsigned char c = (unsigned char) *s;
6951 if (0x20 <= c && c <= 0x7e && c != '\'' && c != '\\') {
6952 PyErr_Format(st->UnpicklingError,
6953 "invalid load key, '%c'.", c);
6954 }
6955 else {
6956 PyErr_Format(st->UnpicklingError,
6957 "invalid load key, '\\x%02x'.", c);
6958 }
6959 return NULL;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006960 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006961 }
6962
6963 break; /* and we are done! */
6964 }
6965
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08006966 if (PyErr_Occurred()) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006967 return NULL;
6968 }
6969
Victor Stinner2ae57e32013-10-31 13:39:23 +01006970 if (_Unpickler_SkipConsumed(self) < 0)
6971 return NULL;
6972
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006973 PDATA_POP(self->stack, value);
6974 return value;
6975}
6976
Larry Hastings61272b72014-01-07 12:41:53 -08006977/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006978
6979_pickle.Unpickler.load
6980
6981Load a pickle.
6982
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08006983Read a pickled object representation from the open file object given
6984in the constructor, and return the reconstituted object hierarchy
6985specified therein.
Larry Hastings61272b72014-01-07 12:41:53 -08006986[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006987
Larry Hastings3cceb382014-01-04 11:09:09 -08006988static PyObject *
Larry Hastingsc2047262014-01-25 20:43:29 -08006989_pickle_Unpickler_load_impl(UnpicklerObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08006990/*[clinic end generated code: output=fdcc488aad675b14 input=acbb91a42fa9b7b9]*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006991{
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006992 UnpicklerObject *unpickler = (UnpicklerObject*)self;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006993
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006994 /* Check whether the Unpickler was initialized correctly. This prevents
6995 segfaulting if a subclass overridden __init__ with a function that does
6996 not call Unpickler.__init__(). Here, we simply ensure that self->read
6997 is not NULL. */
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006998 if (unpickler->read == NULL) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006999 PickleState *st = _Pickle_GetGlobalState();
7000 PyErr_Format(st->UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007001 "Unpickler.__init__() was not called by %s.__init__()",
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007002 Py_TYPE(unpickler)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007003 return NULL;
7004 }
7005
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007006 return load(unpickler);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007007}
7008
7009/* The name of find_class() is misleading. In newer pickle protocols, this
7010 function is used for loading any global (i.e., functions), not just
7011 classes. The name is kept only for backward compatibility. */
7012
Larry Hastings61272b72014-01-07 12:41:53 -08007013/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007014
7015_pickle.Unpickler.find_class
7016
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007017 module_name: object
7018 global_name: object
7019 /
7020
7021Return an object from a specified module.
7022
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007023If necessary, the module will be imported. Subclasses may override
7024this method (e.g. to restrict unpickling of arbitrary classes and
7025functions).
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007026
7027This method is called whenever a class or a function object is
7028needed. Both arguments passed are str objects.
Larry Hastings61272b72014-01-07 12:41:53 -08007029[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007030
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007031static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04007032_pickle_Unpickler_find_class_impl(UnpicklerObject *self,
7033 PyObject *module_name,
7034 PyObject *global_name)
7035/*[clinic end generated code: output=becc08d7f9ed41e3 input=e2e6a865de093ef4]*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007036{
7037 PyObject *global;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007038 PyObject *module;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007039
Steve Dowerb82e17e2019-05-23 08:45:22 -07007040 if (PySys_Audit("pickle.find_class", "OO",
7041 module_name, global_name) < 0) {
7042 return NULL;
7043 }
7044
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007045 /* Try to map the old names used in Python 2.x to the new ones used in
7046 Python 3.x. We do this only with old pickle protocols and when the
7047 user has not disabled the feature. */
7048 if (self->proto < 3 && self->fix_imports) {
7049 PyObject *key;
7050 PyObject *item;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007051 PickleState *st = _Pickle_GetGlobalState();
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007052
7053 /* Check if the global (i.e., a function or a class) was renamed
7054 or moved to another module. */
7055 key = PyTuple_Pack(2, module_name, global_name);
7056 if (key == NULL)
7057 return NULL;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007058 item = PyDict_GetItemWithError(st->name_mapping_2to3, key);
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007059 Py_DECREF(key);
7060 if (item) {
7061 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
7062 PyErr_Format(PyExc_RuntimeError,
7063 "_compat_pickle.NAME_MAPPING values should be "
7064 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
7065 return NULL;
7066 }
7067 module_name = PyTuple_GET_ITEM(item, 0);
7068 global_name = PyTuple_GET_ITEM(item, 1);
7069 if (!PyUnicode_Check(module_name) ||
7070 !PyUnicode_Check(global_name)) {
7071 PyErr_Format(PyExc_RuntimeError,
7072 "_compat_pickle.NAME_MAPPING values should be "
7073 "pairs of str, not (%.200s, %.200s)",
7074 Py_TYPE(module_name)->tp_name,
7075 Py_TYPE(global_name)->tp_name);
7076 return NULL;
7077 }
7078 }
7079 else if (PyErr_Occurred()) {
7080 return NULL;
7081 }
Serhiy Storchakabfe18242015-03-31 13:12:37 +03007082 else {
7083 /* Check if the module was renamed. */
7084 item = PyDict_GetItemWithError(st->import_mapping_2to3, module_name);
7085 if (item) {
7086 if (!PyUnicode_Check(item)) {
7087 PyErr_Format(PyExc_RuntimeError,
7088 "_compat_pickle.IMPORT_MAPPING values should be "
7089 "strings, not %.200s", Py_TYPE(item)->tp_name);
7090 return NULL;
7091 }
7092 module_name = item;
7093 }
7094 else if (PyErr_Occurred()) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007095 return NULL;
7096 }
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007097 }
7098 }
7099
tjb9004371c0a2019-02-18 23:30:51 +08007100 /*
7101 * we don't use PyImport_GetModule here, because it can return partially-
7102 * initialised modules, which then cause the getattribute to fail.
7103 */
7104 module = PyImport_Import(module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007105 if (module == NULL) {
tjb9004371c0a2019-02-18 23:30:51 +08007106 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007107 }
Eric Snow3f9eee62017-09-15 16:35:20 -06007108 global = getattribute(module, global_name, self->proto >= 4);
7109 Py_DECREF(module);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007110 return global;
7111}
7112
Serhiy Storchaka5bbd2312014-12-16 19:39:08 +02007113/*[clinic input]
7114
7115_pickle.Unpickler.__sizeof__ -> Py_ssize_t
7116
7117Returns size in memory, in bytes.
7118[clinic start generated code]*/
7119
7120static Py_ssize_t
7121_pickle_Unpickler___sizeof___impl(UnpicklerObject *self)
7122/*[clinic end generated code: output=119d9d03ad4c7651 input=13333471fdeedf5e]*/
7123{
7124 Py_ssize_t res;
7125
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +02007126 res = _PyObject_SIZE(Py_TYPE(self));
Serhiy Storchaka5bbd2312014-12-16 19:39:08 +02007127 if (self->memo != NULL)
7128 res += self->memo_size * sizeof(PyObject *);
7129 if (self->marks != NULL)
7130 res += self->marks_size * sizeof(Py_ssize_t);
7131 if (self->input_line != NULL)
7132 res += strlen(self->input_line) + 1;
7133 if (self->encoding != NULL)
7134 res += strlen(self->encoding) + 1;
7135 if (self->errors != NULL)
7136 res += strlen(self->errors) + 1;
7137 return res;
7138}
7139
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007140static struct PyMethodDef Unpickler_methods[] = {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007141 _PICKLE_UNPICKLER_LOAD_METHODDEF
7142 _PICKLE_UNPICKLER_FIND_CLASS_METHODDEF
Serhiy Storchaka5bbd2312014-12-16 19:39:08 +02007143 _PICKLE_UNPICKLER___SIZEOF___METHODDEF
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007144 {NULL, NULL} /* sentinel */
7145};
7146
7147static void
7148Unpickler_dealloc(UnpicklerObject *self)
7149{
7150 PyObject_GC_UnTrack((PyObject *)self);
7151 Py_XDECREF(self->readline);
Antoine Pitrou91f43802019-05-26 17:10:09 +02007152 Py_XDECREF(self->readinto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007153 Py_XDECREF(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00007154 Py_XDECREF(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007155 Py_XDECREF(self->stack);
7156 Py_XDECREF(self->pers_func);
Antoine Pitrou91f43802019-05-26 17:10:09 +02007157 Py_XDECREF(self->buffers);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007158 if (self->buffer.buf != NULL) {
7159 PyBuffer_Release(&self->buffer);
7160 self->buffer.buf = NULL;
7161 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007162
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007163 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007164 PyMem_Free(self->marks);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007165 PyMem_Free(self->input_line);
Victor Stinner49fc8ec2013-07-07 23:30:24 +02007166 PyMem_Free(self->encoding);
7167 PyMem_Free(self->errors);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007168
7169 Py_TYPE(self)->tp_free((PyObject *)self);
7170}
7171
7172static int
7173Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
7174{
7175 Py_VISIT(self->readline);
Antoine Pitrou91f43802019-05-26 17:10:09 +02007176 Py_VISIT(self->readinto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007177 Py_VISIT(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00007178 Py_VISIT(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007179 Py_VISIT(self->stack);
7180 Py_VISIT(self->pers_func);
Antoine Pitrou91f43802019-05-26 17:10:09 +02007181 Py_VISIT(self->buffers);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007182 return 0;
7183}
7184
7185static int
7186Unpickler_clear(UnpicklerObject *self)
7187{
7188 Py_CLEAR(self->readline);
Antoine Pitrou91f43802019-05-26 17:10:09 +02007189 Py_CLEAR(self->readinto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007190 Py_CLEAR(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00007191 Py_CLEAR(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007192 Py_CLEAR(self->stack);
7193 Py_CLEAR(self->pers_func);
Antoine Pitrou91f43802019-05-26 17:10:09 +02007194 Py_CLEAR(self->buffers);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007195 if (self->buffer.buf != NULL) {
7196 PyBuffer_Release(&self->buffer);
7197 self->buffer.buf = NULL;
7198 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007199
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007200 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007201 PyMem_Free(self->marks);
7202 self->marks = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007203 PyMem_Free(self->input_line);
7204 self->input_line = NULL;
Victor Stinner49fc8ec2013-07-07 23:30:24 +02007205 PyMem_Free(self->encoding);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007206 self->encoding = NULL;
Victor Stinner49fc8ec2013-07-07 23:30:24 +02007207 PyMem_Free(self->errors);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007208 self->errors = NULL;
7209
7210 return 0;
7211}
7212
Larry Hastings61272b72014-01-07 12:41:53 -08007213/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007214
7215_pickle.Unpickler.__init__
7216
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007217 file: object
7218 *
7219 fix_imports: bool = True
7220 encoding: str = 'ASCII'
7221 errors: str = 'strict'
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007222 buffers: object(c_default="NULL") = ()
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007223
7224This takes a binary file for reading a pickle data stream.
7225
7226The protocol version of the pickle is detected automatically, so no
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007227protocol argument is needed. Bytes past the pickled object's
7228representation are ignored.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007229
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007230The argument *file* must have two methods, a read() method that takes
7231an integer argument, and a readline() method that requires no
7232arguments. Both methods should return bytes. Thus *file* can be a
Martin Panter7462b6492015-11-02 03:37:02 +00007233binary file object opened for reading, an io.BytesIO object, or any
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007234other custom object that meets this interface.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007235
7236Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
Martin Panter46f50722016-05-26 05:35:26 +00007237which are used to control compatibility support for pickle stream
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007238generated by Python 2. If *fix_imports* is True, pickle will try to
7239map the old Python 2 names to the new names used in Python 3. The
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007240*encoding* and *errors* tell pickle how to decode 8-bit string
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007241instances pickled by Python 2; these default to 'ASCII' and 'strict',
7242respectively. The *encoding* can be 'bytes' to read these 8-bit
7243string instances as bytes objects.
Larry Hastings61272b72014-01-07 12:41:53 -08007244[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007245
Larry Hastingsb7ccb202014-01-18 23:50:21 -08007246static int
Larry Hastings89964c42015-04-14 18:07:59 -04007247_pickle_Unpickler___init___impl(UnpicklerObject *self, PyObject *file,
7248 int fix_imports, const char *encoding,
Antoine Pitrou91f43802019-05-26 17:10:09 +02007249 const char *errors, PyObject *buffers)
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007250/*[clinic end generated code: output=09f0192649ea3f85 input=ca4c1faea9553121]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007251{
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02007252 _Py_IDENTIFIER(persistent_load);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007253
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007254 /* In case of multiple __init__() calls, clear previous content. */
7255 if (self->read != NULL)
7256 (void)Unpickler_clear(self);
7257
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007258 if (_Unpickler_SetInputStream(self, file) < 0)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08007259 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007260
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007261 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08007262 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007263
Antoine Pitrou91f43802019-05-26 17:10:09 +02007264 if (_Unpickler_SetBuffers(self, buffers) < 0)
7265 return -1;
7266
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007267 self->fix_imports = fix_imports;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007268
Serhiy Storchaka986375e2017-11-30 22:48:31 +02007269 if (init_method_ref((PyObject *)self, &PyId_persistent_load,
7270 &self->pers_func, &self->pers_func_self) < 0)
7271 {
7272 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007273 }
7274
7275 self->stack = (Pdata *)Pdata_New();
7276 if (self->stack == NULL)
Zackery Spytz4b430e52018-09-28 23:48:46 -06007277 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007278
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007279 self->memo_size = 32;
7280 self->memo = _Unpickler_NewMemo(self->memo_size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007281 if (self->memo == NULL)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08007282 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007283
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007284 self->proto = 0;
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00007285
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007286 return 0;
7287}
7288
Larry Hastingsb7ccb202014-01-18 23:50:21 -08007289
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007290/* Define a proxy object for the Unpickler's internal memo object. This is to
7291 * avoid breaking code like:
7292 * unpickler.memo.clear()
7293 * and
7294 * unpickler.memo = saved_memo
7295 * Is this a good idea? Not really, but we don't want to break code that uses
7296 * it. Note that we don't implement the entire mapping API here. This is
7297 * intentional, as these should be treated as black-box implementation details.
7298 *
7299 * We do, however, have to implement pickling/unpickling support because of
Victor Stinner121aab42011-09-29 23:40:53 +02007300 * real-world code like cvs2svn.
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007301 */
7302
Larry Hastings61272b72014-01-07 12:41:53 -08007303/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007304_pickle.UnpicklerMemoProxy.clear
7305
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007306Remove all items from memo.
Larry Hastings61272b72014-01-07 12:41:53 -08007307[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007308
Larry Hastings3cceb382014-01-04 11:09:09 -08007309static PyObject *
7310_pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08007311/*[clinic end generated code: output=d20cd43f4ba1fb1f input=b1df7c52e7afd9bd]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007312{
7313 _Unpickler_MemoCleanup(self->unpickler);
7314 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
7315 if (self->unpickler->memo == NULL)
7316 return NULL;
7317 Py_RETURN_NONE;
7318}
7319
Larry Hastings61272b72014-01-07 12:41:53 -08007320/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007321_pickle.UnpicklerMemoProxy.copy
7322
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007323Copy the memo to a new object.
Larry Hastings61272b72014-01-07 12:41:53 -08007324[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007325
Larry Hastings3cceb382014-01-04 11:09:09 -08007326static PyObject *
7327_pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08007328/*[clinic end generated code: output=e12af7e9bc1e4c77 input=97769247ce032c1d]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007329{
Benjamin Petersona4ae8282018-09-20 18:36:40 -07007330 size_t i;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007331 PyObject *new_memo = PyDict_New();
7332 if (new_memo == NULL)
7333 return NULL;
7334
7335 for (i = 0; i < self->unpickler->memo_size; i++) {
7336 int status;
7337 PyObject *key, *value;
7338
7339 value = self->unpickler->memo[i];
7340 if (value == NULL)
7341 continue;
7342
7343 key = PyLong_FromSsize_t(i);
7344 if (key == NULL)
7345 goto error;
7346 status = PyDict_SetItem(new_memo, key, value);
7347 Py_DECREF(key);
7348 if (status < 0)
7349 goto error;
7350 }
7351 return new_memo;
7352
7353error:
7354 Py_DECREF(new_memo);
7355 return NULL;
7356}
7357
Larry Hastings61272b72014-01-07 12:41:53 -08007358/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007359_pickle.UnpicklerMemoProxy.__reduce__
7360
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007361Implement pickling support.
Larry Hastings61272b72014-01-07 12:41:53 -08007362[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007363
Larry Hastings3cceb382014-01-04 11:09:09 -08007364static PyObject *
7365_pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08007366/*[clinic end generated code: output=6da34ac048d94cca input=6920862413407199]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007367{
7368 PyObject *reduce_value;
7369 PyObject *constructor_args;
Larry Hastings3cceb382014-01-04 11:09:09 -08007370 PyObject *contents = _pickle_UnpicklerMemoProxy_copy_impl(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007371 if (contents == NULL)
7372 return NULL;
7373
7374 reduce_value = PyTuple_New(2);
7375 if (reduce_value == NULL) {
7376 Py_DECREF(contents);
7377 return NULL;
7378 }
7379 constructor_args = PyTuple_New(1);
7380 if (constructor_args == NULL) {
7381 Py_DECREF(contents);
7382 Py_DECREF(reduce_value);
7383 return NULL;
7384 }
7385 PyTuple_SET_ITEM(constructor_args, 0, contents);
7386 Py_INCREF((PyObject *)&PyDict_Type);
7387 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
7388 PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
7389 return reduce_value;
7390}
7391
7392static PyMethodDef unpicklerproxy_methods[] = {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007393 _PICKLE_UNPICKLERMEMOPROXY_CLEAR_METHODDEF
7394 _PICKLE_UNPICKLERMEMOPROXY_COPY_METHODDEF
7395 _PICKLE_UNPICKLERMEMOPROXY___REDUCE___METHODDEF
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007396 {NULL, NULL} /* sentinel */
7397};
7398
7399static void
7400UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
7401{
7402 PyObject_GC_UnTrack(self);
7403 Py_XDECREF(self->unpickler);
7404 PyObject_GC_Del((PyObject *)self);
7405}
7406
7407static int
7408UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
7409 visitproc visit, void *arg)
7410{
7411 Py_VISIT(self->unpickler);
7412 return 0;
7413}
7414
7415static int
7416UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
7417{
7418 Py_CLEAR(self->unpickler);
7419 return 0;
7420}
7421
7422static PyTypeObject UnpicklerMemoProxyType = {
7423 PyVarObject_HEAD_INIT(NULL, 0)
7424 "_pickle.UnpicklerMemoProxy", /*tp_name*/
7425 sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/
7426 0,
7427 (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02007428 0, /* tp_vectorcall_offset */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007429 0, /* tp_getattr */
7430 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02007431 0, /* tp_as_async */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007432 0, /* tp_repr */
7433 0, /* tp_as_number */
7434 0, /* tp_as_sequence */
7435 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00007436 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007437 0, /* tp_call */
7438 0, /* tp_str */
7439 PyObject_GenericGetAttr, /* tp_getattro */
7440 PyObject_GenericSetAttr, /* tp_setattro */
7441 0, /* tp_as_buffer */
7442 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
7443 0, /* tp_doc */
7444 (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */
7445 (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */
7446 0, /* tp_richcompare */
7447 0, /* tp_weaklistoffset */
7448 0, /* tp_iter */
7449 0, /* tp_iternext */
7450 unpicklerproxy_methods, /* tp_methods */
7451};
7452
7453static PyObject *
7454UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
7455{
7456 UnpicklerMemoProxyObject *self;
7457
7458 self = PyObject_GC_New(UnpicklerMemoProxyObject,
7459 &UnpicklerMemoProxyType);
7460 if (self == NULL)
7461 return NULL;
7462 Py_INCREF(unpickler);
7463 self->unpickler = unpickler;
7464 PyObject_GC_Track(self);
7465 return (PyObject *)self;
7466}
7467
7468/*****************************************************************************/
7469
7470
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007471static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02007472Unpickler_get_memo(UnpicklerObject *self, void *Py_UNUSED(ignored))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007473{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007474 return UnpicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007475}
7476
7477static int
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02007478Unpickler_set_memo(UnpicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007479{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007480 PyObject **new_memo;
Benjamin Petersona4ae8282018-09-20 18:36:40 -07007481 size_t new_memo_size = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007482
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007483 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007484 PyErr_SetString(PyExc_TypeError,
7485 "attribute deletion is not supported");
7486 return -1;
7487 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007488
7489 if (Py_TYPE(obj) == &UnpicklerMemoProxyType) {
7490 UnpicklerObject *unpickler =
7491 ((UnpicklerMemoProxyObject *)obj)->unpickler;
7492
7493 new_memo_size = unpickler->memo_size;
7494 new_memo = _Unpickler_NewMemo(new_memo_size);
7495 if (new_memo == NULL)
7496 return -1;
7497
Benjamin Petersona4ae8282018-09-20 18:36:40 -07007498 for (size_t i = 0; i < new_memo_size; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007499 Py_XINCREF(unpickler->memo[i]);
7500 new_memo[i] = unpickler->memo[i];
7501 }
7502 }
7503 else if (PyDict_Check(obj)) {
7504 Py_ssize_t i = 0;
7505 PyObject *key, *value;
7506
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +02007507 new_memo_size = PyDict_GET_SIZE(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007508 new_memo = _Unpickler_NewMemo(new_memo_size);
7509 if (new_memo == NULL)
7510 return -1;
7511
7512 while (PyDict_Next(obj, &i, &key, &value)) {
7513 Py_ssize_t idx;
7514 if (!PyLong_Check(key)) {
7515 PyErr_SetString(PyExc_TypeError,
7516 "memo key must be integers");
7517 goto error;
7518 }
7519 idx = PyLong_AsSsize_t(key);
7520 if (idx == -1 && PyErr_Occurred())
7521 goto error;
Christian Heimesa24b4d22013-07-01 15:17:45 +02007522 if (idx < 0) {
7523 PyErr_SetString(PyExc_ValueError,
Christian Heimes80878792013-07-01 15:23:39 +02007524 "memo key must be positive integers.");
Christian Heimesa24b4d22013-07-01 15:17:45 +02007525 goto error;
7526 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007527 if (_Unpickler_MemoPut(self, idx, value) < 0)
7528 goto error;
7529 }
7530 }
7531 else {
7532 PyErr_Format(PyExc_TypeError,
Serhiy Storchaka34fd4c22018-11-05 16:20:25 +02007533 "'memo' attribute must be an UnpicklerMemoProxy object "
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007534 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007535 return -1;
7536 }
7537
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007538 _Unpickler_MemoCleanup(self);
7539 self->memo_size = new_memo_size;
7540 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007541
7542 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007543
7544 error:
7545 if (new_memo_size) {
Benjamin Petersona4ae8282018-09-20 18:36:40 -07007546 for (size_t i = new_memo_size - 1; i != SIZE_MAX; i--) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007547 Py_XDECREF(new_memo[i]);
7548 }
7549 PyMem_FREE(new_memo);
7550 }
7551 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007552}
7553
7554static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02007555Unpickler_get_persload(UnpicklerObject *self, void *Py_UNUSED(ignored))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007556{
Serhiy Storchaka986375e2017-11-30 22:48:31 +02007557 if (self->pers_func == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007558 PyErr_SetString(PyExc_AttributeError, "persistent_load");
Serhiy Storchaka986375e2017-11-30 22:48:31 +02007559 return NULL;
7560 }
7561 return reconstruct_method(self->pers_func, self->pers_func_self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007562}
7563
7564static int
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02007565Unpickler_set_persload(UnpicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007566{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007567 if (value == NULL) {
7568 PyErr_SetString(PyExc_TypeError,
7569 "attribute deletion is not supported");
7570 return -1;
7571 }
7572 if (!PyCallable_Check(value)) {
7573 PyErr_SetString(PyExc_TypeError,
7574 "persistent_load must be a callable taking "
7575 "one argument");
7576 return -1;
7577 }
7578
Serhiy Storchaka986375e2017-11-30 22:48:31 +02007579 self->pers_func_self = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007580 Py_INCREF(value);
Serhiy Storchakaec397562016-04-06 09:50:03 +03007581 Py_XSETREF(self->pers_func, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007582
7583 return 0;
7584}
7585
7586static PyGetSetDef Unpickler_getsets[] = {
7587 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
7588 {"persistent_load", (getter)Unpickler_get_persload,
7589 (setter)Unpickler_set_persload},
7590 {NULL}
7591};
7592
7593static PyTypeObject Unpickler_Type = {
7594 PyVarObject_HEAD_INIT(NULL, 0)
7595 "_pickle.Unpickler", /*tp_name*/
7596 sizeof(UnpicklerObject), /*tp_basicsize*/
7597 0, /*tp_itemsize*/
7598 (destructor)Unpickler_dealloc, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02007599 0, /*tp_vectorcall_offset*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007600 0, /*tp_getattr*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007601 0, /*tp_setattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02007602 0, /*tp_as_async*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007603 0, /*tp_repr*/
7604 0, /*tp_as_number*/
7605 0, /*tp_as_sequence*/
7606 0, /*tp_as_mapping*/
7607 0, /*tp_hash*/
7608 0, /*tp_call*/
7609 0, /*tp_str*/
7610 0, /*tp_getattro*/
7611 0, /*tp_setattro*/
7612 0, /*tp_as_buffer*/
7613 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007614 _pickle_Unpickler___init____doc__, /*tp_doc*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007615 (traverseproc)Unpickler_traverse, /*tp_traverse*/
7616 (inquiry)Unpickler_clear, /*tp_clear*/
7617 0, /*tp_richcompare*/
7618 0, /*tp_weaklistoffset*/
7619 0, /*tp_iter*/
7620 0, /*tp_iternext*/
7621 Unpickler_methods, /*tp_methods*/
7622 0, /*tp_members*/
7623 Unpickler_getsets, /*tp_getset*/
7624 0, /*tp_base*/
7625 0, /*tp_dict*/
7626 0, /*tp_descr_get*/
7627 0, /*tp_descr_set*/
7628 0, /*tp_dictoffset*/
Larry Hastingsb7ccb202014-01-18 23:50:21 -08007629 _pickle_Unpickler___init__, /*tp_init*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007630 PyType_GenericAlloc, /*tp_alloc*/
7631 PyType_GenericNew, /*tp_new*/
7632 PyObject_GC_Del, /*tp_free*/
7633 0, /*tp_is_gc*/
7634};
7635
Larry Hastings61272b72014-01-07 12:41:53 -08007636/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007637
7638_pickle.dump
7639
7640 obj: object
7641 file: object
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007642 protocol: object = None
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007643 *
7644 fix_imports: bool = True
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007645 buffer_callback: object = None
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007646
7647Write a pickled representation of obj to the open file object file.
7648
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007649This is equivalent to ``Pickler(file, protocol).dump(obj)``, but may
7650be more efficient.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007651
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007652The optional *protocol* argument tells the pickler to use the given
Mark Dickinsone9652e82020-01-24 10:03:22 +00007653protocol; supported protocols are 0, 1, 2, 3, 4 and 5. The default
7654protocol is 4. It was introduced in Python 3.4, and is incompatible
Łukasz Langac51d8c92018-04-03 23:06:53 -07007655with previous versions.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007656
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007657Specifying a negative protocol version selects the highest protocol
7658version supported. The higher the protocol used, the more recent the
7659version of Python needed to read the pickle produced.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007660
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007661The *file* argument must have a write() method that accepts a single
7662bytes argument. It can thus be a file object opened for binary
Martin Panter7462b6492015-11-02 03:37:02 +00007663writing, an io.BytesIO instance, or any other custom object that meets
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007664this interface.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007665
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007666If *fix_imports* is True and protocol is less than 3, pickle will try
7667to map the new Python 3 names to the old module names used in Python
76682, so that the pickle data stream is readable with Python 2.
Antoine Pitrou91f43802019-05-26 17:10:09 +02007669
7670If *buffer_callback* is None (the default), buffer views are serialized
7671into *file* as part of the pickle stream. It is an error if
7672*buffer_callback* is not None and *protocol* is None or smaller than 5.
7673
Larry Hastings61272b72014-01-07 12:41:53 -08007674[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007675
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007676static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03007677_pickle_dump_impl(PyObject *module, PyObject *obj, PyObject *file,
Antoine Pitrou91f43802019-05-26 17:10:09 +02007678 PyObject *protocol, int fix_imports,
7679 PyObject *buffer_callback)
Mark Dickinsone9652e82020-01-24 10:03:22 +00007680/*[clinic end generated code: output=706186dba996490c input=5ed6653da99cd97c]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007681{
7682 PicklerObject *pickler = _Pickler_New();
7683
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007684 if (pickler == NULL)
7685 return NULL;
7686
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007687 if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007688 goto error;
7689
7690 if (_Pickler_SetOutputStream(pickler, file) < 0)
7691 goto error;
7692
Antoine Pitrou91f43802019-05-26 17:10:09 +02007693 if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
7694 goto error;
7695
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007696 if (dump(pickler, obj) < 0)
7697 goto error;
7698
7699 if (_Pickler_FlushToFile(pickler) < 0)
7700 goto error;
7701
7702 Py_DECREF(pickler);
7703 Py_RETURN_NONE;
7704
7705 error:
7706 Py_XDECREF(pickler);
7707 return NULL;
7708}
7709
Larry Hastings61272b72014-01-07 12:41:53 -08007710/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007711
7712_pickle.dumps
7713
7714 obj: object
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007715 protocol: object = None
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007716 *
7717 fix_imports: bool = True
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007718 buffer_callback: object = None
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007719
7720Return the pickled representation of the object as a bytes object.
7721
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007722The optional *protocol* argument tells the pickler to use the given
Mark Dickinsone9652e82020-01-24 10:03:22 +00007723protocol; supported protocols are 0, 1, 2, 3, 4 and 5. The default
7724protocol is 4. It was introduced in Python 3.4, and is incompatible
Łukasz Langac51d8c92018-04-03 23:06:53 -07007725with previous versions.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007726
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007727Specifying a negative protocol version selects the highest protocol
7728version supported. The higher the protocol used, the more recent the
7729version of Python needed to read the pickle produced.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007730
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007731If *fix_imports* is True and *protocol* is less than 3, pickle will
7732try to map the new Python 3 names to the old module names used in
7733Python 2, so that the pickle data stream is readable with Python 2.
Antoine Pitrou91f43802019-05-26 17:10:09 +02007734
7735If *buffer_callback* is None (the default), buffer views are serialized
7736into *file* as part of the pickle stream. It is an error if
7737*buffer_callback* is not None and *protocol* is None or smaller than 5.
7738
Larry Hastings61272b72014-01-07 12:41:53 -08007739[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007740
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007741static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03007742_pickle_dumps_impl(PyObject *module, PyObject *obj, PyObject *protocol,
Antoine Pitrou91f43802019-05-26 17:10:09 +02007743 int fix_imports, PyObject *buffer_callback)
Mark Dickinsone9652e82020-01-24 10:03:22 +00007744/*[clinic end generated code: output=fbab0093a5580fdf input=e543272436c6f987]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007745{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007746 PyObject *result;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007747 PicklerObject *pickler = _Pickler_New();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007748
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007749 if (pickler == NULL)
7750 return NULL;
7751
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007752 if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007753 goto error;
7754
Antoine Pitrou91f43802019-05-26 17:10:09 +02007755 if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
7756 goto error;
7757
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007758 if (dump(pickler, obj) < 0)
7759 goto error;
7760
7761 result = _Pickler_GetString(pickler);
7762 Py_DECREF(pickler);
7763 return result;
7764
7765 error:
7766 Py_XDECREF(pickler);
7767 return NULL;
7768}
7769
Larry Hastings61272b72014-01-07 12:41:53 -08007770/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007771
7772_pickle.load
7773
7774 file: object
7775 *
7776 fix_imports: bool = True
7777 encoding: str = 'ASCII'
7778 errors: str = 'strict'
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007779 buffers: object(c_default="NULL") = ()
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007780
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007781Read and return an object from the pickle data stored in a file.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007782
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007783This is equivalent to ``Unpickler(file).load()``, but may be more
7784efficient.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007785
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007786The protocol version of the pickle is detected automatically, so no
7787protocol argument is needed. Bytes past the pickled object's
7788representation are ignored.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007789
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007790The argument *file* must have two methods, a read() method that takes
7791an integer argument, and a readline() method that requires no
7792arguments. Both methods should return bytes. Thus *file* can be a
Martin Panter7462b6492015-11-02 03:37:02 +00007793binary file object opened for reading, an io.BytesIO object, or any
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007794other custom object that meets this interface.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007795
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007796Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
Martin Panter46f50722016-05-26 05:35:26 +00007797which are used to control compatibility support for pickle stream
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007798generated by Python 2. If *fix_imports* is True, pickle will try to
7799map the old Python 2 names to the new names used in Python 3. The
7800*encoding* and *errors* tell pickle how to decode 8-bit string
7801instances pickled by Python 2; these default to 'ASCII' and 'strict',
7802respectively. The *encoding* can be 'bytes' to read these 8-bit
7803string instances as bytes objects.
Larry Hastings61272b72014-01-07 12:41:53 -08007804[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007805
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007806static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03007807_pickle_load_impl(PyObject *module, PyObject *file, int fix_imports,
Antoine Pitrou91f43802019-05-26 17:10:09 +02007808 const char *encoding, const char *errors,
7809 PyObject *buffers)
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007810/*[clinic end generated code: output=250452d141c23e76 input=46c7c31c92f4f371]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007811{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007812 PyObject *result;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007813 UnpicklerObject *unpickler = _Unpickler_New();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007814
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007815 if (unpickler == NULL)
7816 return NULL;
7817
7818 if (_Unpickler_SetInputStream(unpickler, file) < 0)
7819 goto error;
7820
7821 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7822 goto error;
7823
Antoine Pitrou91f43802019-05-26 17:10:09 +02007824 if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
7825 goto error;
7826
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007827 unpickler->fix_imports = fix_imports;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007828
7829 result = load(unpickler);
7830 Py_DECREF(unpickler);
7831 return result;
7832
7833 error:
7834 Py_XDECREF(unpickler);
7835 return NULL;
7836}
7837
Larry Hastings61272b72014-01-07 12:41:53 -08007838/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007839
7840_pickle.loads
7841
7842 data: object
7843 *
7844 fix_imports: bool = True
7845 encoding: str = 'ASCII'
7846 errors: str = 'strict'
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007847 buffers: object(c_default="NULL") = ()
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007848
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007849Read and return an object from the given pickle data.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007850
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007851The protocol version of the pickle is detected automatically, so no
7852protocol argument is needed. Bytes past the pickled object's
7853representation are ignored.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007854
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007855Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
Martin Panter46f50722016-05-26 05:35:26 +00007856which are used to control compatibility support for pickle stream
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007857generated by Python 2. If *fix_imports* is True, pickle will try to
7858map the old Python 2 names to the new names used in Python 3. The
7859*encoding* and *errors* tell pickle how to decode 8-bit string
7860instances pickled by Python 2; these default to 'ASCII' and 'strict',
7861respectively. The *encoding* can be 'bytes' to read these 8-bit
7862string instances as bytes objects.
Larry Hastings61272b72014-01-07 12:41:53 -08007863[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007864
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007865static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03007866_pickle_loads_impl(PyObject *module, PyObject *data, int fix_imports,
Antoine Pitrou91f43802019-05-26 17:10:09 +02007867 const char *encoding, const char *errors,
7868 PyObject *buffers)
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007869/*[clinic end generated code: output=82ac1e6b588e6d02 input=9c2ab6a0960185ea]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007870{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007871 PyObject *result;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007872 UnpicklerObject *unpickler = _Unpickler_New();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007873
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007874 if (unpickler == NULL)
7875 return NULL;
7876
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007877 if (_Unpickler_SetStringInput(unpickler, data) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007878 goto error;
7879
7880 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7881 goto error;
7882
Antoine Pitrou91f43802019-05-26 17:10:09 +02007883 if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
7884 goto error;
7885
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007886 unpickler->fix_imports = fix_imports;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007887
7888 result = load(unpickler);
7889 Py_DECREF(unpickler);
7890 return result;
7891
7892 error:
7893 Py_XDECREF(unpickler);
7894 return NULL;
7895}
7896
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007897static struct PyMethodDef pickle_methods[] = {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007898 _PICKLE_DUMP_METHODDEF
7899 _PICKLE_DUMPS_METHODDEF
7900 _PICKLE_LOAD_METHODDEF
7901 _PICKLE_LOADS_METHODDEF
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007902 {NULL, NULL} /* sentinel */
7903};
7904
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007905static int
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007906pickle_clear(PyObject *m)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007907{
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007908 _Pickle_ClearState(_Pickle_GetState(m));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007909 return 0;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007910}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007911
Stefan Krahf483b0f2013-12-14 13:43:10 +01007912static void
7913pickle_free(PyObject *m)
7914{
7915 _Pickle_ClearState(_Pickle_GetState(m));
7916}
7917
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007918static int
7919pickle_traverse(PyObject *m, visitproc visit, void *arg)
7920{
7921 PickleState *st = _Pickle_GetState(m);
7922 Py_VISIT(st->PickleError);
7923 Py_VISIT(st->PicklingError);
7924 Py_VISIT(st->UnpicklingError);
7925 Py_VISIT(st->dispatch_table);
7926 Py_VISIT(st->extension_registry);
7927 Py_VISIT(st->extension_cache);
7928 Py_VISIT(st->inverted_registry);
7929 Py_VISIT(st->name_mapping_2to3);
7930 Py_VISIT(st->import_mapping_2to3);
7931 Py_VISIT(st->name_mapping_3to2);
7932 Py_VISIT(st->import_mapping_3to2);
7933 Py_VISIT(st->codecs_encode);
Serhiy Storchaka58e41342015-03-31 14:07:24 +03007934 Py_VISIT(st->getattr);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007935 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007936}
7937
7938static struct PyModuleDef _picklemodule = {
7939 PyModuleDef_HEAD_INIT,
Stefan Krahf483b0f2013-12-14 13:43:10 +01007940 "_pickle", /* m_name */
7941 pickle_module_doc, /* m_doc */
7942 sizeof(PickleState), /* m_size */
7943 pickle_methods, /* m_methods */
7944 NULL, /* m_reload */
7945 pickle_traverse, /* m_traverse */
7946 pickle_clear, /* m_clear */
7947 (freefunc)pickle_free /* m_free */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007948};
7949
7950PyMODINIT_FUNC
7951PyInit__pickle(void)
7952{
7953 PyObject *m;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007954 PickleState *st;
7955
7956 m = PyState_FindModule(&_picklemodule);
7957 if (m) {
7958 Py_INCREF(m);
7959 return m;
7960 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007961
7962 if (PyType_Ready(&Unpickler_Type) < 0)
7963 return NULL;
7964 if (PyType_Ready(&Pickler_Type) < 0)
7965 return NULL;
7966 if (PyType_Ready(&Pdata_Type) < 0)
7967 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007968 if (PyType_Ready(&PicklerMemoProxyType) < 0)
7969 return NULL;
7970 if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
7971 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007972
7973 /* Create the module and add the functions. */
7974 m = PyModule_Create(&_picklemodule);
7975 if (m == NULL)
7976 return NULL;
7977
Antoine Pitrou91f43802019-05-26 17:10:09 +02007978 /* Add types */
Antoine Pitrou8391cf42011-07-15 21:01:21 +02007979 Py_INCREF(&Pickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007980 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
7981 return NULL;
Antoine Pitrou8391cf42011-07-15 21:01:21 +02007982 Py_INCREF(&Unpickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007983 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
7984 return NULL;
Antoine Pitrou91f43802019-05-26 17:10:09 +02007985 Py_INCREF(&PyPickleBuffer_Type);
7986 if (PyModule_AddObject(m, "PickleBuffer",
7987 (PyObject *)&PyPickleBuffer_Type) < 0)
7988 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007989
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007990 st = _Pickle_GetState(m);
7991
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007992 /* Initialize the exceptions. */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007993 st->PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
7994 if (st->PickleError == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007995 return NULL;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007996 st->PicklingError = \
7997 PyErr_NewException("_pickle.PicklingError", st->PickleError, NULL);
7998 if (st->PicklingError == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007999 return NULL;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08008000 st->UnpicklingError = \
8001 PyErr_NewException("_pickle.UnpicklingError", st->PickleError, NULL);
8002 if (st->UnpicklingError == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00008003 return NULL;
8004
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08008005 Py_INCREF(st->PickleError);
8006 if (PyModule_AddObject(m, "PickleError", st->PickleError) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00008007 return NULL;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08008008 Py_INCREF(st->PicklingError);
8009 if (PyModule_AddObject(m, "PicklingError", st->PicklingError) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00008010 return NULL;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08008011 Py_INCREF(st->UnpicklingError);
8012 if (PyModule_AddObject(m, "UnpicklingError", st->UnpicklingError) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00008013 return NULL;
8014
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08008015 if (_Pickle_InitState(st) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00008016 return NULL;
8017
8018 return m;
8019}