blob: 691d4a293e8498045bc512539f3b569a70e81c9b [file] [log] [blame]
Victor Stinner5c75f372019-04-17 23:02:26 +02001/* pickle accelerator C extensor: _pickle module.
2 *
3 * It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
4 * and as an extension module (Py_BUILD_CORE_MODULE define) on other
5 * platforms. */
Eric Snow2ebc5ce2017-09-07 23:51:28 -06006
Victor Stinner5c75f372019-04-17 23:02:26 +02007#if !defined(Py_BUILD_CORE_BUILTIN) && !defined(Py_BUILD_CORE_MODULE)
8# error "Py_BUILD_CORE_BUILTIN or Py_BUILD_CORE_MODULE must be defined"
Eric Snow2ebc5ce2017-09-07 23:51:28 -06009#endif
10
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000011#include "Python.h"
Victor Stinnercdad2722021-04-22 00:52:52 +020012#include "pycore_moduleobject.h" // _PyModule_GetState()
Victor Stinner4a21e572020-04-15 02:35:41 +020013#include "structmember.h" // PyMemberDef
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000014
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -080015PyDoc_STRVAR(pickle_module_doc,
16"Optimized C implementation for the Python pickle module.");
17
Larry Hastings61272b72014-01-07 12:41:53 -080018/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -080019module _pickle
Larry Hastingsc2047262014-01-25 20:43:29 -080020class _pickle.Pickler "PicklerObject *" "&Pickler_Type"
21class _pickle.PicklerMemoProxy "PicklerMemoProxyObject *" "&PicklerMemoProxyType"
22class _pickle.Unpickler "UnpicklerObject *" "&Unpickler_Type"
23class _pickle.UnpicklerMemoProxy "UnpicklerMemoProxyObject *" "&UnpicklerMemoProxyType"
Larry Hastings61272b72014-01-07 12:41:53 -080024[clinic start generated code]*/
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030025/*[clinic end generated code: output=da39a3ee5e6b4b0d input=4b3e113468a58e6c]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -080026
Łukasz Langac51d8c92018-04-03 23:06:53 -070027/* Bump HIGHEST_PROTOCOL when new opcodes are added to the pickle protocol.
28 Bump DEFAULT_PROTOCOL only when the oldest still supported version of Python
29 already includes it. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000030enum {
Antoine Pitrou91f43802019-05-26 17:10:09 +020031 HIGHEST_PROTOCOL = 5,
Łukasz Langac51d8c92018-04-03 23:06:53 -070032 DEFAULT_PROTOCOL = 4
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000033};
34
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000035/* Pickle opcodes. These must be kept updated with pickle.py.
36 Extensive docs are in pickletools.py. */
37enum opcode {
38 MARK = '(',
39 STOP = '.',
40 POP = '0',
41 POP_MARK = '1',
42 DUP = '2',
43 FLOAT = 'F',
44 INT = 'I',
45 BININT = 'J',
46 BININT1 = 'K',
47 LONG = 'L',
48 BININT2 = 'M',
49 NONE = 'N',
50 PERSID = 'P',
51 BINPERSID = 'Q',
52 REDUCE = 'R',
53 STRING = 'S',
54 BINSTRING = 'T',
55 SHORT_BINSTRING = 'U',
56 UNICODE = 'V',
57 BINUNICODE = 'X',
58 APPEND = 'a',
59 BUILD = 'b',
60 GLOBAL = 'c',
61 DICT = 'd',
62 EMPTY_DICT = '}',
63 APPENDS = 'e',
64 GET = 'g',
65 BINGET = 'h',
66 INST = 'i',
67 LONG_BINGET = 'j',
68 LIST = 'l',
69 EMPTY_LIST = ']',
70 OBJ = 'o',
71 PUT = 'p',
72 BINPUT = 'q',
73 LONG_BINPUT = 'r',
74 SETITEM = 's',
75 TUPLE = 't',
76 EMPTY_TUPLE = ')',
77 SETITEMS = 'u',
78 BINFLOAT = 'G',
79
80 /* Protocol 2. */
81 PROTO = '\x80',
82 NEWOBJ = '\x81',
83 EXT1 = '\x82',
84 EXT2 = '\x83',
85 EXT4 = '\x84',
86 TUPLE1 = '\x85',
87 TUPLE2 = '\x86',
88 TUPLE3 = '\x87',
89 NEWTRUE = '\x88',
90 NEWFALSE = '\x89',
91 LONG1 = '\x8a',
92 LONG4 = '\x8b',
93
94 /* Protocol 3 (Python 3.x) */
95 BINBYTES = 'B',
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +010096 SHORT_BINBYTES = 'C',
97
98 /* Protocol 4 */
99 SHORT_BINUNICODE = '\x8c',
100 BINUNICODE8 = '\x8d',
101 BINBYTES8 = '\x8e',
102 EMPTY_SET = '\x8f',
103 ADDITEMS = '\x90',
104 FROZENSET = '\x91',
105 NEWOBJ_EX = '\x92',
106 STACK_GLOBAL = '\x93',
107 MEMOIZE = '\x94',
Antoine Pitrou91f43802019-05-26 17:10:09 +0200108 FRAME = '\x95',
109
110 /* Protocol 5 */
111 BYTEARRAY8 = '\x96',
112 NEXT_BUFFER = '\x97',
113 READONLY_BUFFER = '\x98'
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000114};
115
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000116enum {
117 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
118 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
119 break if this gets out of synch with pickle.py, but it's unclear that would
120 help anything either. */
121 BATCHSIZE = 1000,
122
123 /* Nesting limit until Pickler, when running in "fast mode", starts
124 checking for self-referential data-structures. */
125 FAST_NESTING_LIMIT = 50,
126
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000127 /* Initial size of the write buffer of Pickler. */
128 WRITE_BUF_SIZE = 4096,
129
Antoine Pitrou04248a82010-10-12 20:51:21 +0000130 /* Prefetch size when unpickling (disabled on unpeekable streams) */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100131 PREFETCH = 8192 * 16,
132
Serhiy Storchaka1211c9a2018-01-20 16:42:44 +0200133 FRAME_SIZE_MIN = 4,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100134 FRAME_SIZE_TARGET = 64 * 1024,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100135 FRAME_HEADER_SIZE = 9
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000136};
137
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800138/*************************************************************************/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000139
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800140/* State of the pickle module, per PEP 3121. */
141typedef struct {
142 /* Exception classes for pickle. */
143 PyObject *PickleError;
144 PyObject *PicklingError;
145 PyObject *UnpicklingError;
Larry Hastings61272b72014-01-07 12:41:53 -0800146
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800147 /* copyreg.dispatch_table, {type_object: pickling_function} */
148 PyObject *dispatch_table;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000149
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800150 /* For the extension opcodes EXT1, EXT2 and EXT4. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000151
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800152 /* copyreg._extension_registry, {(module_name, function_name): code} */
153 PyObject *extension_registry;
154 /* copyreg._extension_cache, {code: object} */
155 PyObject *extension_cache;
156 /* copyreg._inverted_registry, {code: (module_name, function_name)} */
157 PyObject *inverted_registry;
158
159 /* Import mappings for compatibility with Python 2.x */
160
161 /* _compat_pickle.NAME_MAPPING,
162 {(oldmodule, oldname): (newmodule, newname)} */
163 PyObject *name_mapping_2to3;
164 /* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
165 PyObject *import_mapping_2to3;
166 /* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
167 PyObject *name_mapping_3to2;
168 PyObject *import_mapping_3to2;
169
170 /* codecs.encode, used for saving bytes in older protocols */
171 PyObject *codecs_encode;
Serhiy Storchaka58e41342015-03-31 14:07:24 +0300172 /* builtins.getattr, used for saving nested names with protocol < 4 */
173 PyObject *getattr;
Serhiy Storchaka0d554d72015-10-10 22:42:18 +0300174 /* functools.partial, used for implementing __newobj_ex__ with protocols
175 2 and 3 */
176 PyObject *partial;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800177} PickleState;
178
179/* Forward declaration of the _pickle module definition. */
180static struct PyModuleDef _picklemodule;
181
182/* Given a module object, get its per-module state. */
183static PickleState *
184_Pickle_GetState(PyObject *module)
185{
Victor Stinnercdad2722021-04-22 00:52:52 +0200186 return (PickleState *)_PyModule_GetState(module);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800187}
188
189/* Find the module instance imported in the currently running sub-interpreter
190 and get its state. */
191static PickleState *
192_Pickle_GetGlobalState(void)
193{
194 return _Pickle_GetState(PyState_FindModule(&_picklemodule));
195}
196
197/* Clear the given pickle module state. */
198static void
199_Pickle_ClearState(PickleState *st)
200{
201 Py_CLEAR(st->PickleError);
202 Py_CLEAR(st->PicklingError);
203 Py_CLEAR(st->UnpicklingError);
204 Py_CLEAR(st->dispatch_table);
205 Py_CLEAR(st->extension_registry);
206 Py_CLEAR(st->extension_cache);
207 Py_CLEAR(st->inverted_registry);
208 Py_CLEAR(st->name_mapping_2to3);
209 Py_CLEAR(st->import_mapping_2to3);
210 Py_CLEAR(st->name_mapping_3to2);
211 Py_CLEAR(st->import_mapping_3to2);
212 Py_CLEAR(st->codecs_encode);
Serhiy Storchaka58e41342015-03-31 14:07:24 +0300213 Py_CLEAR(st->getattr);
Victor Stinner9ba97df2015-11-17 12:15:07 +0100214 Py_CLEAR(st->partial);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800215}
216
217/* Initialize the given pickle module state. */
218static int
219_Pickle_InitState(PickleState *st)
220{
221 PyObject *copyreg = NULL;
222 PyObject *compat_pickle = NULL;
223 PyObject *codecs = NULL;
Serhiy Storchaka0d554d72015-10-10 22:42:18 +0300224 PyObject *functools = NULL;
Serhiy Storchakabb86bf42018-12-11 08:28:18 +0200225 _Py_IDENTIFIER(getattr);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800226
Serhiy Storchakabb86bf42018-12-11 08:28:18 +0200227 st->getattr = _PyEval_GetBuiltinId(&PyId_getattr);
Serhiy Storchaka58e41342015-03-31 14:07:24 +0300228 if (st->getattr == NULL)
229 goto error;
Serhiy Storchaka58e41342015-03-31 14:07:24 +0300230
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800231 copyreg = PyImport_ImportModule("copyreg");
232 if (!copyreg)
233 goto error;
234 st->dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
235 if (!st->dispatch_table)
236 goto error;
237 if (!PyDict_CheckExact(st->dispatch_table)) {
238 PyErr_Format(PyExc_RuntimeError,
239 "copyreg.dispatch_table should be a dict, not %.200s",
240 Py_TYPE(st->dispatch_table)->tp_name);
241 goto error;
242 }
243 st->extension_registry = \
244 PyObject_GetAttrString(copyreg, "_extension_registry");
245 if (!st->extension_registry)
246 goto error;
247 if (!PyDict_CheckExact(st->extension_registry)) {
248 PyErr_Format(PyExc_RuntimeError,
249 "copyreg._extension_registry should be a dict, "
250 "not %.200s", Py_TYPE(st->extension_registry)->tp_name);
251 goto error;
252 }
253 st->inverted_registry = \
254 PyObject_GetAttrString(copyreg, "_inverted_registry");
255 if (!st->inverted_registry)
256 goto error;
257 if (!PyDict_CheckExact(st->inverted_registry)) {
258 PyErr_Format(PyExc_RuntimeError,
259 "copyreg._inverted_registry should be a dict, "
260 "not %.200s", Py_TYPE(st->inverted_registry)->tp_name);
261 goto error;
262 }
263 st->extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
264 if (!st->extension_cache)
265 goto error;
266 if (!PyDict_CheckExact(st->extension_cache)) {
267 PyErr_Format(PyExc_RuntimeError,
268 "copyreg._extension_cache should be a dict, "
269 "not %.200s", Py_TYPE(st->extension_cache)->tp_name);
270 goto error;
271 }
272 Py_CLEAR(copyreg);
273
274 /* Load the 2.x -> 3.x stdlib module mapping tables */
275 compat_pickle = PyImport_ImportModule("_compat_pickle");
276 if (!compat_pickle)
277 goto error;
278 st->name_mapping_2to3 = \
279 PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
280 if (!st->name_mapping_2to3)
281 goto error;
282 if (!PyDict_CheckExact(st->name_mapping_2to3)) {
283 PyErr_Format(PyExc_RuntimeError,
284 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
285 Py_TYPE(st->name_mapping_2to3)->tp_name);
286 goto error;
287 }
288 st->import_mapping_2to3 = \
289 PyObject_GetAttrString(compat_pickle, "IMPORT_MAPPING");
290 if (!st->import_mapping_2to3)
291 goto error;
292 if (!PyDict_CheckExact(st->import_mapping_2to3)) {
293 PyErr_Format(PyExc_RuntimeError,
294 "_compat_pickle.IMPORT_MAPPING should be a dict, "
295 "not %.200s", Py_TYPE(st->import_mapping_2to3)->tp_name);
296 goto error;
297 }
298 /* ... and the 3.x -> 2.x mapping tables */
299 st->name_mapping_3to2 = \
300 PyObject_GetAttrString(compat_pickle, "REVERSE_NAME_MAPPING");
301 if (!st->name_mapping_3to2)
302 goto error;
303 if (!PyDict_CheckExact(st->name_mapping_3to2)) {
304 PyErr_Format(PyExc_RuntimeError,
305 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
306 "not %.200s", Py_TYPE(st->name_mapping_3to2)->tp_name);
307 goto error;
308 }
309 st->import_mapping_3to2 = \
310 PyObject_GetAttrString(compat_pickle, "REVERSE_IMPORT_MAPPING");
311 if (!st->import_mapping_3to2)
312 goto error;
313 if (!PyDict_CheckExact(st->import_mapping_3to2)) {
314 PyErr_Format(PyExc_RuntimeError,
315 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
316 "not %.200s", Py_TYPE(st->import_mapping_3to2)->tp_name);
317 goto error;
318 }
319 Py_CLEAR(compat_pickle);
320
321 codecs = PyImport_ImportModule("codecs");
322 if (codecs == NULL)
323 goto error;
324 st->codecs_encode = PyObject_GetAttrString(codecs, "encode");
325 if (st->codecs_encode == NULL) {
326 goto error;
327 }
328 if (!PyCallable_Check(st->codecs_encode)) {
329 PyErr_Format(PyExc_RuntimeError,
330 "codecs.encode should be a callable, not %.200s",
331 Py_TYPE(st->codecs_encode)->tp_name);
332 goto error;
333 }
334 Py_CLEAR(codecs);
335
Serhiy Storchaka0d554d72015-10-10 22:42:18 +0300336 functools = PyImport_ImportModule("functools");
337 if (!functools)
338 goto error;
339 st->partial = PyObject_GetAttrString(functools, "partial");
340 if (!st->partial)
341 goto error;
342 Py_CLEAR(functools);
343
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800344 return 0;
345
346 error:
347 Py_CLEAR(copyreg);
348 Py_CLEAR(compat_pickle);
349 Py_CLEAR(codecs);
Serhiy Storchaka0d554d72015-10-10 22:42:18 +0300350 Py_CLEAR(functools);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800351 _Pickle_ClearState(st);
352 return -1;
353}
354
355/* Helper for calling a function with a single argument quickly.
356
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800357 This function steals the reference of the given argument. */
358static PyObject *
359_Pickle_FastCall(PyObject *func, PyObject *obj)
360{
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800361 PyObject *result;
362
Petr Viktorinffd97532020-02-11 17:46:57 +0100363 result = PyObject_CallOneArg(func, obj);
Victor Stinner75210692016-08-19 18:59:15 +0200364 Py_DECREF(obj);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800365 return result;
366}
367
368/*************************************************************************/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000369
Serhiy Storchaka986375e2017-11-30 22:48:31 +0200370/* Retrieve and deconstruct a method for avoiding a reference cycle
371 (pickler -> bound method of pickler -> pickler) */
372static int
373init_method_ref(PyObject *self, _Py_Identifier *name,
374 PyObject **method_func, PyObject **method_self)
375{
376 PyObject *func, *func2;
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200377 int ret;
Serhiy Storchaka986375e2017-11-30 22:48:31 +0200378
379 /* *method_func and *method_self should be consistent. All refcount decrements
380 should be occurred after setting *method_self and *method_func. */
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200381 ret = _PyObject_LookupAttrId(self, name, &func);
Serhiy Storchaka986375e2017-11-30 22:48:31 +0200382 if (func == NULL) {
383 *method_self = NULL;
384 Py_CLEAR(*method_func);
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200385 return ret;
Serhiy Storchaka986375e2017-11-30 22:48:31 +0200386 }
387
388 if (PyMethod_Check(func) && PyMethod_GET_SELF(func) == self) {
389 /* Deconstruct a bound Python method */
390 func2 = PyMethod_GET_FUNCTION(func);
391 Py_INCREF(func2);
392 *method_self = self; /* borrowed */
393 Py_XSETREF(*method_func, func2);
394 Py_DECREF(func);
395 return 0;
396 }
397 else {
398 *method_self = NULL;
399 Py_XSETREF(*method_func, func);
400 return 0;
401 }
402}
403
404/* Bind a method if it was deconstructed */
405static PyObject *
406reconstruct_method(PyObject *func, PyObject *self)
407{
408 if (self) {
409 return PyMethod_New(func, self);
410 }
411 else {
412 Py_INCREF(func);
413 return func;
414 }
415}
416
417static PyObject *
418call_method(PyObject *func, PyObject *self, PyObject *obj)
419{
420 if (self) {
421 return PyObject_CallFunctionObjArgs(func, self, obj, NULL);
422 }
423 else {
Petr Viktorinffd97532020-02-11 17:46:57 +0100424 return PyObject_CallOneArg(func, obj);
Serhiy Storchaka986375e2017-11-30 22:48:31 +0200425 }
426}
427
428/*************************************************************************/
429
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000430/* Internal data type used as the unpickling stack. */
431typedef struct {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000432 PyObject_VAR_HEAD
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000433 PyObject **data;
Serhiy Storchaka59fb6342015-12-06 22:01:35 +0200434 int mark_set; /* is MARK set? */
435 Py_ssize_t fence; /* position of top MARK or 0 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000436 Py_ssize_t allocated; /* number of slots in data allocated */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000437} Pdata;
438
439static void
440Pdata_dealloc(Pdata *self)
441{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200442 Py_ssize_t i = Py_SIZE(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000443 while (--i >= 0) {
444 Py_DECREF(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000445 }
Victor Stinner00d7abd2020-12-01 09:56:42 +0100446 PyMem_Free(self->data);
Victor Stinner32bd68c2020-12-01 10:37:39 +0100447 PyObject_Free(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000448}
449
450static PyTypeObject Pdata_Type = {
451 PyVarObject_HEAD_INIT(NULL, 0)
452 "_pickle.Pdata", /*tp_name*/
453 sizeof(Pdata), /*tp_basicsize*/
Serhiy Storchaka5bbd2312014-12-16 19:39:08 +0200454 sizeof(PyObject *), /*tp_itemsize*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000455 (destructor)Pdata_dealloc, /*tp_dealloc*/
456};
457
458static PyObject *
459Pdata_New(void)
460{
461 Pdata *self;
462
463 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
464 return NULL;
Victor Stinner60ac6ed2020-02-07 23:18:08 +0100465 Py_SET_SIZE(self, 0);
Serhiy Storchaka59fb6342015-12-06 22:01:35 +0200466 self->mark_set = 0;
467 self->fence = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000468 self->allocated = 8;
Victor Stinner00d7abd2020-12-01 09:56:42 +0100469 self->data = PyMem_Malloc(self->allocated * sizeof(PyObject *));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000470 if (self->data)
471 return (PyObject *)self;
472 Py_DECREF(self);
473 return PyErr_NoMemory();
474}
475
476
477/* Retain only the initial clearto items. If clearto >= the current
478 * number of items, this is a (non-erroneous) NOP.
479 */
480static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200481Pdata_clear(Pdata *self, Py_ssize_t clearto)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000482{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200483 Py_ssize_t i = Py_SIZE(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000484
Serhiy Storchaka59fb6342015-12-06 22:01:35 +0200485 assert(clearto >= self->fence);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000486 if (clearto >= i)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000487 return 0;
488
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000489 while (--i >= clearto) {
490 Py_CLEAR(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000491 }
Victor Stinner60ac6ed2020-02-07 23:18:08 +0100492 Py_SET_SIZE(self, clearto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000493 return 0;
494}
495
496static int
497Pdata_grow(Pdata *self)
498{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000499 PyObject **data = self->data;
Victor Stinnerf13c46c2014-08-17 21:05:55 +0200500 size_t allocated = (size_t)self->allocated;
501 size_t new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000502
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000503 new_allocated = (allocated >> 3) + 6;
504 /* check for integer overflow */
Victor Stinnerf13c46c2014-08-17 21:05:55 +0200505 if (new_allocated > (size_t)PY_SSIZE_T_MAX - allocated)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000506 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000507 new_allocated += allocated;
Benjamin Peterson59b08c12015-06-27 13:41:33 -0500508 PyMem_RESIZE(data, PyObject *, new_allocated);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000509 if (data == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000510 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000511
512 self->data = data;
Victor Stinnerf13c46c2014-08-17 21:05:55 +0200513 self->allocated = (Py_ssize_t)new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000514 return 0;
515
516 nomemory:
517 PyErr_NoMemory();
518 return -1;
519}
520
Serhiy Storchaka59fb6342015-12-06 22:01:35 +0200521static int
522Pdata_stack_underflow(Pdata *self)
523{
524 PickleState *st = _Pickle_GetGlobalState();
525 PyErr_SetString(st->UnpicklingError,
526 self->mark_set ?
527 "unexpected MARK found" :
528 "unpickling stack underflow");
529 return -1;
530}
531
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000532/* D is a Pdata*. Pop the topmost element and store it into V, which
533 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
534 * is raised and V is set to NULL.
535 */
536static PyObject *
537Pdata_pop(Pdata *self)
538{
Serhiy Storchaka59fb6342015-12-06 22:01:35 +0200539 if (Py_SIZE(self) <= self->fence) {
540 Pdata_stack_underflow(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000541 return NULL;
542 }
Victor Stinner60ac6ed2020-02-07 23:18:08 +0100543 Py_SET_SIZE(self, Py_SIZE(self) - 1);
544 return self->data[Py_SIZE(self)];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000545}
546#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
547
548static int
549Pdata_push(Pdata *self, PyObject *obj)
550{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000551 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000552 return -1;
553 }
Victor Stinner60ac6ed2020-02-07 23:18:08 +0100554 self->data[Py_SIZE(self)] = obj;
555 Py_SET_SIZE(self, Py_SIZE(self) + 1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000556 return 0;
557}
558
559/* Push an object on stack, transferring its ownership to the stack. */
560#define PDATA_PUSH(D, O, ER) do { \
561 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
562
563/* Push an object on stack, adding a new reference to the object. */
564#define PDATA_APPEND(D, O, ER) do { \
565 Py_INCREF((O)); \
566 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
567
568static PyObject *
569Pdata_poptuple(Pdata *self, Py_ssize_t start)
570{
571 PyObject *tuple;
572 Py_ssize_t len, i, j;
573
Serhiy Storchaka59fb6342015-12-06 22:01:35 +0200574 if (start < self->fence) {
575 Pdata_stack_underflow(self);
576 return NULL;
577 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000578 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000579 tuple = PyTuple_New(len);
580 if (tuple == NULL)
581 return NULL;
582 for (i = start, j = 0; j < len; i++, j++)
583 PyTuple_SET_ITEM(tuple, j, self->data[i]);
584
Victor Stinner60ac6ed2020-02-07 23:18:08 +0100585 Py_SET_SIZE(self, start);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000586 return tuple;
587}
588
589static PyObject *
590Pdata_poplist(Pdata *self, Py_ssize_t start)
591{
592 PyObject *list;
593 Py_ssize_t len, i, j;
594
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000595 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000596 list = PyList_New(len);
597 if (list == NULL)
598 return NULL;
599 for (i = start, j = 0; j < len; i++, j++)
600 PyList_SET_ITEM(list, j, self->data[i]);
601
Victor Stinner60ac6ed2020-02-07 23:18:08 +0100602 Py_SET_SIZE(self, start);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000603 return list;
604}
605
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000606typedef struct {
607 PyObject *me_key;
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200608 Py_ssize_t me_value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000609} PyMemoEntry;
610
611typedef struct {
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700612 size_t mt_mask;
613 size_t mt_used;
614 size_t mt_allocated;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000615 PyMemoEntry *mt_table;
616} PyMemoTable;
617
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000618typedef struct PicklerObject {
619 PyObject_HEAD
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000620 PyMemoTable *memo; /* Memo table, keep track of the seen
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000621 objects to support self-referential objects
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000622 pickling. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000623 PyObject *pers_func; /* persistent_id() method, can be NULL */
Serhiy Storchaka986375e2017-11-30 22:48:31 +0200624 PyObject *pers_func_self; /* borrowed reference to self if pers_func
625 is an unbound method, NULL otherwise */
Antoine Pitrou8d3c2902012-03-04 18:31:48 +0100626 PyObject *dispatch_table; /* private dispatch_table, can be NULL */
Pierre Glaser289f1f82019-05-08 23:08:25 +0200627 PyObject *reducer_override; /* hook for invoking user-defined callbacks
628 instead of save_global when pickling
629 functions and classes*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000630
631 PyObject *write; /* write() method of the output stream. */
632 PyObject *output_buffer; /* Write into a local bytearray buffer before
633 flushing to the stream. */
634 Py_ssize_t output_len; /* Length of output_buffer. */
635 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000636 int proto; /* Pickle protocol number, >= 0 */
637 int bin; /* Boolean, true if proto > 0 */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100638 int framing; /* True when framing is enabled, proto >= 4 */
639 Py_ssize_t frame_start; /* Position in output_buffer where the
Martin Pantera90a4a92016-05-30 04:04:50 +0000640 current frame begins. -1 if there
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100641 is no frame currently open. */
642
643 Py_ssize_t buf_size; /* Size of the current buffered pickle data */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000644 int fast; /* Enable fast mode if set to a true value.
645 The fast mode disable the usage of memo,
646 therefore speeding the pickling process by
647 not generating superfluous PUT opcodes. It
648 should not be used if with self-referential
649 objects. */
650 int fast_nesting;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000651 int fix_imports; /* Indicate whether Pickler should fix
652 the name of globals for Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000653 PyObject *fast_memo;
Antoine Pitrou91f43802019-05-26 17:10:09 +0200654 PyObject *buffer_callback; /* Callback for out-of-band buffers, or NULL */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000655} PicklerObject;
656
657typedef struct UnpicklerObject {
658 PyObject_HEAD
659 Pdata *stack; /* Pickle data stack, store unpickled objects. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000660
661 /* The unpickler memo is just an array of PyObject *s. Using a dict
662 is unnecessary, since the keys are contiguous ints. */
663 PyObject **memo;
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700664 size_t memo_size; /* Capacity of the memo array */
665 size_t memo_len; /* Number of objects in the memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000666
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000667 PyObject *pers_func; /* persistent_load() method, can be NULL. */
Serhiy Storchaka986375e2017-11-30 22:48:31 +0200668 PyObject *pers_func_self; /* borrowed reference to self if pers_func
669 is an unbound method, NULL otherwise */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000670
671 Py_buffer buffer;
672 char *input_buffer;
673 char *input_line;
674 Py_ssize_t input_len;
675 Py_ssize_t next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000676 Py_ssize_t prefetched_idx; /* index of first prefetched byte */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100677
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000678 PyObject *read; /* read() method of the input stream. */
Antoine Pitrou91f43802019-05-26 17:10:09 +0200679 PyObject *readinto; /* readinto() method of the input stream. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000680 PyObject *readline; /* readline() method of the input stream. */
Antoine Pitrou04248a82010-10-12 20:51:21 +0000681 PyObject *peek; /* peek() method of the input stream, or NULL */
Antoine Pitrou91f43802019-05-26 17:10:09 +0200682 PyObject *buffers; /* iterable of out-of-band buffers, or NULL */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000683
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000684 char *encoding; /* Name of the encoding to be used for
685 decoding strings pickled using Python
686 2.x. The default value is "ASCII" */
687 char *errors; /* Name of errors handling scheme to used when
688 decoding strings. The default value is
689 "strict". */
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -0500690 Py_ssize_t *marks; /* Mark stack, used for unpickling container
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000691 objects. */
692 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
693 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000694 int proto; /* Protocol of the pickle loaded. */
695 int fix_imports; /* Indicate whether Unpickler should fix
696 the name of globals pickled by Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000697} UnpicklerObject;
698
Serhiy Storchaka3c1f0f12014-01-27 10:34:22 +0200699typedef struct {
700 PyObject_HEAD
701 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
702} PicklerMemoProxyObject;
703
704typedef struct {
705 PyObject_HEAD
706 UnpicklerObject *unpickler;
707} UnpicklerMemoProxyObject;
708
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000709/* Forward declarations */
710static int save(PicklerObject *, PyObject *, int);
711static int save_reduce(PicklerObject *, PyObject *, PyObject *);
712static PyTypeObject Pickler_Type;
713static PyTypeObject Unpickler_Type;
714
Serhiy Storchaka3c1f0f12014-01-27 10:34:22 +0200715#include "clinic/_pickle.c.h"
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000716
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000717/*************************************************************************
Serhiy Storchaka95949422013-08-27 19:40:23 +0300718 A custom hashtable mapping void* to Python ints. This is used by the pickler
719 for memoization. Using a custom hashtable rather than PyDict allows us to skip
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000720 a bunch of unnecessary object creation. This makes a huge performance
721 difference. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000722
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000723#define MT_MINSIZE 8
724#define PERTURB_SHIFT 5
725
726
727static PyMemoTable *
728PyMemoTable_New(void)
729{
Victor Stinner00d7abd2020-12-01 09:56:42 +0100730 PyMemoTable *memo = PyMem_Malloc(sizeof(PyMemoTable));
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000731 if (memo == NULL) {
732 PyErr_NoMemory();
733 return NULL;
734 }
735
736 memo->mt_used = 0;
737 memo->mt_allocated = MT_MINSIZE;
738 memo->mt_mask = MT_MINSIZE - 1;
Victor Stinner00d7abd2020-12-01 09:56:42 +0100739 memo->mt_table = PyMem_Malloc(MT_MINSIZE * sizeof(PyMemoEntry));
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000740 if (memo->mt_table == NULL) {
Victor Stinner00d7abd2020-12-01 09:56:42 +0100741 PyMem_Free(memo);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000742 PyErr_NoMemory();
743 return NULL;
744 }
745 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
746
747 return memo;
748}
749
750static PyMemoTable *
751PyMemoTable_Copy(PyMemoTable *self)
752{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000753 PyMemoTable *new = PyMemoTable_New();
754 if (new == NULL)
755 return NULL;
756
757 new->mt_used = self->mt_used;
758 new->mt_allocated = self->mt_allocated;
759 new->mt_mask = self->mt_mask;
760 /* The table we get from _New() is probably smaller than we wanted.
761 Free it and allocate one that's the right size. */
Victor Stinner00d7abd2020-12-01 09:56:42 +0100762 PyMem_Free(new->mt_table);
Benjamin Peterson59b08c12015-06-27 13:41:33 -0500763 new->mt_table = PyMem_NEW(PyMemoEntry, self->mt_allocated);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000764 if (new->mt_table == NULL) {
Victor Stinner00d7abd2020-12-01 09:56:42 +0100765 PyMem_Free(new);
Victor Stinner42024562013-07-12 00:53:57 +0200766 PyErr_NoMemory();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000767 return NULL;
768 }
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700769 for (size_t i = 0; i < self->mt_allocated; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000770 Py_XINCREF(self->mt_table[i].me_key);
771 }
772 memcpy(new->mt_table, self->mt_table,
773 sizeof(PyMemoEntry) * self->mt_allocated);
774
775 return new;
776}
777
778static Py_ssize_t
779PyMemoTable_Size(PyMemoTable *self)
780{
781 return self->mt_used;
782}
783
784static int
785PyMemoTable_Clear(PyMemoTable *self)
786{
787 Py_ssize_t i = self->mt_allocated;
788
789 while (--i >= 0) {
790 Py_XDECREF(self->mt_table[i].me_key);
791 }
792 self->mt_used = 0;
793 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
794 return 0;
795}
796
797static void
798PyMemoTable_Del(PyMemoTable *self)
799{
800 if (self == NULL)
801 return;
802 PyMemoTable_Clear(self);
803
Victor Stinner00d7abd2020-12-01 09:56:42 +0100804 PyMem_Free(self->mt_table);
805 PyMem_Free(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000806}
807
808/* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
809 can be considerably simpler than dictobject.c's lookdict(). */
810static PyMemoEntry *
811_PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
812{
813 size_t i;
814 size_t perturb;
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700815 size_t mask = self->mt_mask;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000816 PyMemoEntry *table = self->mt_table;
817 PyMemoEntry *entry;
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000818 Py_hash_t hash = (Py_hash_t)key >> 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000819
820 i = hash & mask;
821 entry = &table[i];
822 if (entry->me_key == NULL || entry->me_key == key)
823 return entry;
824
825 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
826 i = (i << 2) + i + perturb + 1;
827 entry = &table[i & mask];
828 if (entry->me_key == NULL || entry->me_key == key)
829 return entry;
830 }
Barry Warsawb2e57942017-09-14 18:13:16 -0700831 Py_UNREACHABLE();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000832}
833
834/* Returns -1 on failure, 0 on success. */
835static int
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700836_PyMemoTable_ResizeTable(PyMemoTable *self, size_t min_size)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000837{
838 PyMemoEntry *oldtable = NULL;
839 PyMemoEntry *oldentry, *newentry;
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700840 size_t new_size = MT_MINSIZE;
841 size_t to_process;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000842
843 assert(min_size > 0);
844
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700845 if (min_size > PY_SSIZE_T_MAX) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000846 PyErr_NoMemory();
847 return -1;
848 }
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700849
850 /* Find the smallest valid table size >= min_size. */
851 while (new_size < min_size) {
852 new_size <<= 1;
853 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000854 /* new_size needs to be a power of two. */
855 assert((new_size & (new_size - 1)) == 0);
856
857 /* Allocate new table. */
858 oldtable = self->mt_table;
Benjamin Peterson59b08c12015-06-27 13:41:33 -0500859 self->mt_table = PyMem_NEW(PyMemoEntry, new_size);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000860 if (self->mt_table == NULL) {
Victor Stinner8ca72e22013-07-12 00:53:26 +0200861 self->mt_table = oldtable;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000862 PyErr_NoMemory();
863 return -1;
864 }
865 self->mt_allocated = new_size;
866 self->mt_mask = new_size - 1;
867 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
868
869 /* Copy entries from the old table. */
870 to_process = self->mt_used;
871 for (oldentry = oldtable; to_process > 0; oldentry++) {
872 if (oldentry->me_key != NULL) {
873 to_process--;
874 /* newentry is a pointer to a chunk of the new
875 mt_table, so we're setting the key:value pair
876 in-place. */
877 newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
878 newentry->me_key = oldentry->me_key;
879 newentry->me_value = oldentry->me_value;
880 }
881 }
882
883 /* Deallocate the old table. */
Victor Stinner00d7abd2020-12-01 09:56:42 +0100884 PyMem_Free(oldtable);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000885 return 0;
886}
887
888/* Returns NULL on failure, a pointer to the value otherwise. */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200889static Py_ssize_t *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000890PyMemoTable_Get(PyMemoTable *self, PyObject *key)
891{
892 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
893 if (entry->me_key == NULL)
894 return NULL;
895 return &entry->me_value;
896}
897
898/* Returns -1 on failure, 0 on success. */
899static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200900PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000901{
902 PyMemoEntry *entry;
903
904 assert(key != NULL);
905
906 entry = _PyMemoTable_Lookup(self, key);
907 if (entry->me_key != NULL) {
908 entry->me_value = value;
909 return 0;
910 }
911 Py_INCREF(key);
912 entry->me_key = key;
913 entry->me_value = value;
914 self->mt_used++;
915
916 /* If we added a key, we can safely resize. Otherwise just return!
917 * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
918 *
919 * Quadrupling the size improves average table sparseness
920 * (reducing collisions) at the cost of some memory. It also halves
921 * the number of expensive resize operations in a growing memo table.
922 *
923 * Very large memo tables (over 50K items) use doubling instead.
924 * This may help applications with severe memory constraints.
925 */
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700926 if (SIZE_MAX / 3 >= self->mt_used && self->mt_used * 3 < self->mt_allocated * 2) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000927 return 0;
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700928 }
929 // self->mt_used is always < PY_SSIZE_T_MAX, so this can't overflow.
930 size_t desired_size = (self->mt_used > 50000 ? 2 : 4) * self->mt_used;
931 return _PyMemoTable_ResizeTable(self, desired_size);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000932}
933
934#undef MT_MINSIZE
935#undef PERTURB_SHIFT
936
937/*************************************************************************/
938
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000939
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000940static int
941_Pickler_ClearBuffer(PicklerObject *self)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000942{
Serhiy Storchaka48842712016-04-06 09:45:48 +0300943 Py_XSETREF(self->output_buffer,
Serhiy Storchaka4a1e70f2015-12-27 12:36:18 +0200944 PyBytes_FromStringAndSize(NULL, self->max_output_len));
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000945 if (self->output_buffer == NULL)
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000946 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000947 self->output_len = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100948 self->frame_start = -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000949 return 0;
950}
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000951
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100952static void
Antoine Pitrou8f2ee6e2013-11-23 21:05:08 +0100953_write_size64(char *out, size_t value)
954{
Victor Stinnerf13c46c2014-08-17 21:05:55 +0200955 size_t i;
Alexandre Vassalotti1048fb52013-11-25 11:35:46 -0800956
Serhiy Storchakafad85aa2015-11-07 15:42:38 +0200957 Py_BUILD_ASSERT(sizeof(size_t) <= 8);
Alexandre Vassalotti1048fb52013-11-25 11:35:46 -0800958
959 for (i = 0; i < sizeof(size_t); i++) {
960 out[i] = (unsigned char)((value >> (8 * i)) & 0xff);
961 }
962 for (i = sizeof(size_t); i < 8; i++) {
963 out[i] = 0;
Alexandre Vassalottided929b2013-11-24 22:41:13 -0800964 }
Antoine Pitrou8f2ee6e2013-11-23 21:05:08 +0100965}
966
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100967static int
968_Pickler_CommitFrame(PicklerObject *self)
969{
970 size_t frame_len;
971 char *qdata;
972
973 if (!self->framing || self->frame_start == -1)
974 return 0;
975 frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
976 qdata = PyBytes_AS_STRING(self->output_buffer) + self->frame_start;
Serhiy Storchaka1211c9a2018-01-20 16:42:44 +0200977 if (frame_len >= FRAME_SIZE_MIN) {
978 qdata[0] = FRAME;
979 _write_size64(qdata + 1, frame_len);
980 }
981 else {
982 memmove(qdata, qdata + FRAME_HEADER_SIZE, frame_len);
983 self->output_len -= FRAME_HEADER_SIZE;
984 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100985 self->frame_start = -1;
986 return 0;
987}
988
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000989static PyObject *
990_Pickler_GetString(PicklerObject *self)
991{
992 PyObject *output_buffer = self->output_buffer;
993
994 assert(self->output_buffer != NULL);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100995
996 if (_Pickler_CommitFrame(self))
997 return NULL;
998
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000999 self->output_buffer = NULL;
1000 /* Resize down to exact size */
1001 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
1002 return NULL;
1003 return output_buffer;
1004}
1005
1006static int
1007_Pickler_FlushToFile(PicklerObject *self)
1008{
1009 PyObject *output, *result;
1010
1011 assert(self->write != NULL);
1012
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001013 /* This will commit the frame first */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001014 output = _Pickler_GetString(self);
1015 if (output == NULL)
1016 return -1;
1017
Alexandre Vassalotti20c28c12013-11-27 02:26:54 -08001018 result = _Pickle_FastCall(self->write, output);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001019 Py_XDECREF(result);
1020 return (result == NULL) ? -1 : 0;
1021}
1022
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01001023static int
1024_Pickler_OpcodeBoundary(PicklerObject *self)
1025{
1026 Py_ssize_t frame_len;
1027
1028 if (!self->framing || self->frame_start == -1) {
1029 return 0;
1030 }
1031 frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
1032 if (frame_len >= FRAME_SIZE_TARGET) {
1033 if(_Pickler_CommitFrame(self)) {
1034 return -1;
1035 }
Leo Ariasc3d95082018-02-03 18:36:10 -06001036 /* Flush the content of the committed frame to the underlying
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01001037 * file and reuse the pickler buffer for the next frame so as
1038 * to limit memory usage when dumping large complex objects to
1039 * a file.
1040 *
1041 * self->write is NULL when called via dumps.
1042 */
1043 if (self->write != NULL) {
1044 if (_Pickler_FlushToFile(self) < 0) {
1045 return -1;
1046 }
1047 if (_Pickler_ClearBuffer(self) < 0) {
1048 return -1;
1049 }
1050 }
1051 }
1052 return 0;
1053}
1054
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001055static Py_ssize_t
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001056_Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t data_len)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001057{
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001058 Py_ssize_t i, n, required;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001059 char *buffer;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001060 int need_new_frame;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001061
1062 assert(s != NULL);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001063 need_new_frame = (self->framing && self->frame_start == -1);
1064
1065 if (need_new_frame)
1066 n = data_len + FRAME_HEADER_SIZE;
1067 else
1068 n = data_len;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001069
1070 required = self->output_len + n;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001071 if (required > self->max_output_len) {
1072 /* Make place in buffer for the pickle chunk */
1073 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
1074 PyErr_NoMemory();
1075 return -1;
1076 }
1077 self->max_output_len = (self->output_len + n) / 2 * 3;
1078 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
1079 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001080 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001081 buffer = PyBytes_AS_STRING(self->output_buffer);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001082 if (need_new_frame) {
1083 /* Setup new frame */
1084 Py_ssize_t frame_start = self->output_len;
1085 self->frame_start = frame_start;
1086 for (i = 0; i < FRAME_HEADER_SIZE; i++) {
1087 /* Write an invalid value, for debugging */
1088 buffer[frame_start + i] = 0xFE;
1089 }
1090 self->output_len += FRAME_HEADER_SIZE;
1091 }
1092 if (data_len < 8) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001093 /* This is faster than memcpy when the string is short. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001094 for (i = 0; i < data_len; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001095 buffer[self->output_len + i] = s[i];
1096 }
1097 }
1098 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001099 memcpy(buffer + self->output_len, s, data_len);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001100 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001101 self->output_len += data_len;
1102 return data_len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001103}
1104
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001105static PicklerObject *
1106_Pickler_New(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001107{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001108 PicklerObject *self;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001109
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001110 self = PyObject_GC_New(PicklerObject, &Pickler_Type);
1111 if (self == NULL)
1112 return NULL;
1113
1114 self->pers_func = NULL;
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01001115 self->dispatch_table = NULL;
Antoine Pitrou91f43802019-05-26 17:10:09 +02001116 self->buffer_callback = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001117 self->write = NULL;
1118 self->proto = 0;
1119 self->bin = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001120 self->framing = 0;
1121 self->frame_start = -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001122 self->fast = 0;
1123 self->fast_nesting = 0;
1124 self->fix_imports = 0;
1125 self->fast_memo = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001126 self->max_output_len = WRITE_BUF_SIZE;
1127 self->output_len = 0;
Pierre Glaser289f1f82019-05-08 23:08:25 +02001128 self->reducer_override = NULL;
Victor Stinner68c8ea22013-07-11 22:56:25 +02001129
1130 self->memo = PyMemoTable_New();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001131 self->output_buffer = PyBytes_FromStringAndSize(NULL,
1132 self->max_output_len);
Victor Stinner68c8ea22013-07-11 22:56:25 +02001133
1134 if (self->memo == NULL || self->output_buffer == NULL) {
Victor Stinnerc31df042013-07-12 00:08:59 +02001135 Py_DECREF(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001136 return NULL;
1137 }
Zackery Spytz359bd4f2019-04-23 05:56:08 -06001138
1139 PyObject_GC_Track(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001140 return self;
1141}
1142
1143static int
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08001144_Pickler_SetProtocol(PicklerObject *self, PyObject *protocol, int fix_imports)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001145{
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08001146 long proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001147
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001148 if (protocol == Py_None) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001149 proto = DEFAULT_PROTOCOL;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08001150 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001151 else {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08001152 proto = PyLong_AsLong(protocol);
1153 if (proto < 0) {
1154 if (proto == -1 && PyErr_Occurred())
1155 return -1;
1156 proto = HIGHEST_PROTOCOL;
1157 }
1158 else if (proto > HIGHEST_PROTOCOL) {
1159 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
1160 HIGHEST_PROTOCOL);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001161 return -1;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08001162 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001163 }
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08001164 self->proto = (int)proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001165 self->bin = proto > 0;
1166 self->fix_imports = fix_imports && proto < 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001167 return 0;
1168}
1169
1170/* Returns -1 (with an exception set) on failure, 0 on success. This may
1171 be called once on a freshly created Pickler. */
1172static int
1173_Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
1174{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001175 _Py_IDENTIFIER(write);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001176 assert(file != NULL);
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001177 if (_PyObject_LookupAttrId(file, &PyId_write, &self->write) < 0) {
1178 return -1;
1179 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001180 if (self->write == NULL) {
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001181 PyErr_SetString(PyExc_TypeError,
1182 "file must have a 'write' attribute");
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001183 return -1;
1184 }
1185
1186 return 0;
1187}
1188
Antoine Pitrou91f43802019-05-26 17:10:09 +02001189static int
1190_Pickler_SetBufferCallback(PicklerObject *self, PyObject *buffer_callback)
1191{
1192 if (buffer_callback == Py_None) {
1193 buffer_callback = NULL;
1194 }
1195 if (buffer_callback != NULL && self->proto < 5) {
1196 PyErr_SetString(PyExc_ValueError,
1197 "buffer_callback needs protocol >= 5");
1198 return -1;
1199 }
1200
1201 Py_XINCREF(buffer_callback);
1202 self->buffer_callback = buffer_callback;
1203 return 0;
1204}
1205
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001206/* Returns the size of the input on success, -1 on failure. This takes its
1207 own reference to `input`. */
1208static Py_ssize_t
1209_Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
1210{
1211 if (self->buffer.buf != NULL)
1212 PyBuffer_Release(&self->buffer);
1213 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
1214 return -1;
1215 self->input_buffer = self->buffer.buf;
1216 self->input_len = self->buffer.len;
1217 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001218 self->prefetched_idx = self->input_len;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001219 return self->input_len;
1220}
1221
Antoine Pitrou04248a82010-10-12 20:51:21 +00001222static int
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03001223bad_readline(void)
1224{
1225 PickleState *st = _Pickle_GetGlobalState();
1226 PyErr_SetString(st->UnpicklingError, "pickle data was truncated");
1227 return -1;
1228}
1229
Antoine Pitrou91f43802019-05-26 17:10:09 +02001230/* Skip any consumed data that was only prefetched using peek() */
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03001231static int
Antoine Pitrou04248a82010-10-12 20:51:21 +00001232_Unpickler_SkipConsumed(UnpicklerObject *self)
1233{
Victor Stinnerb43ad1d2013-10-31 13:38:42 +01001234 Py_ssize_t consumed;
1235 PyObject *r;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001236
Victor Stinnerb43ad1d2013-10-31 13:38:42 +01001237 consumed = self->next_read_idx - self->prefetched_idx;
1238 if (consumed <= 0)
1239 return 0;
1240
1241 assert(self->peek); /* otherwise we did something wrong */
Martin Panter6245cb32016-04-15 02:14:19 +00001242 /* This makes a useless copy... */
Victor Stinnerb43ad1d2013-10-31 13:38:42 +01001243 r = PyObject_CallFunction(self->read, "n", consumed);
1244 if (r == NULL)
1245 return -1;
1246 Py_DECREF(r);
1247
1248 self->prefetched_idx = self->next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001249 return 0;
1250}
1251
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001252static const Py_ssize_t READ_WHOLE_LINE = -1;
1253
1254/* If reading from a file, we need to only pull the bytes we need, since there
1255 may be multiple pickle objects arranged contiguously in the same input
1256 buffer.
1257
1258 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
1259 bytes from the input stream/buffer.
1260
1261 Update the unpickler's input buffer with the newly-read data. Returns -1 on
1262 failure; on success, returns the number of bytes read from the file.
1263
1264 On success, self->input_len will be 0; this is intentional so that when
1265 unpickling from a file, the "we've run out of data" code paths will trigger,
1266 causing the Unpickler to go back to the file for more data. Use the returned
1267 size to tell you how much data you can process. */
1268static Py_ssize_t
1269_Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
1270{
1271 PyObject *data;
Serhiy Storchaka6fe39b72013-11-30 23:15:38 +02001272 Py_ssize_t read_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001273
1274 assert(self->read != NULL);
Victor Stinner121aab42011-09-29 23:40:53 +02001275
Antoine Pitrou04248a82010-10-12 20:51:21 +00001276 if (_Unpickler_SkipConsumed(self) < 0)
1277 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001278
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08001279 if (n == READ_WHOLE_LINE) {
Victor Stinner2ff58a22019-06-17 14:27:23 +02001280 data = PyObject_CallNoArgs(self->readline);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08001281 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001282 else {
Serhiy Storchaka6fe39b72013-11-30 23:15:38 +02001283 PyObject *len;
1284 /* Prefetch some data without advancing the file pointer, if possible */
1285 if (self->peek && n < PREFETCH) {
1286 len = PyLong_FromSsize_t(PREFETCH);
1287 if (len == NULL)
1288 return -1;
1289 data = _Pickle_FastCall(self->peek, len);
1290 if (data == NULL) {
1291 if (!PyErr_ExceptionMatches(PyExc_NotImplementedError))
1292 return -1;
1293 /* peek() is probably not supported by the given file object */
1294 PyErr_Clear();
1295 Py_CLEAR(self->peek);
1296 }
1297 else {
1298 read_size = _Unpickler_SetStringInput(self, data);
1299 Py_DECREF(data);
1300 self->prefetched_idx = 0;
1301 if (n <= read_size)
1302 return n;
1303 }
1304 }
1305 len = PyLong_FromSsize_t(n);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001306 if (len == NULL)
1307 return -1;
Alexandre Vassalotti20c28c12013-11-27 02:26:54 -08001308 data = _Pickle_FastCall(self->read, len);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001309 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001310 if (data == NULL)
1311 return -1;
1312
Serhiy Storchaka6fe39b72013-11-30 23:15:38 +02001313 read_size = _Unpickler_SetStringInput(self, data);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001314 Py_DECREF(data);
1315 return read_size;
1316}
1317
Victor Stinner19ed27e2016-05-20 11:42:37 +02001318/* Don't call it directly: use _Unpickler_Read() */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001319static Py_ssize_t
Victor Stinner19ed27e2016-05-20 11:42:37 +02001320_Unpickler_ReadImpl(UnpicklerObject *self, char **s, Py_ssize_t n)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001321{
Antoine Pitrou04248a82010-10-12 20:51:21 +00001322 Py_ssize_t num_read;
1323
Benjamin Peterson6aa15642015-09-27 01:16:03 -07001324 *s = NULL;
Benjamin Petersone48cf7e2015-09-26 00:08:34 -07001325 if (self->next_read_idx > PY_SSIZE_T_MAX - n) {
1326 PickleState *st = _Pickle_GetGlobalState();
1327 PyErr_SetString(st->UnpicklingError,
1328 "read would overflow (invalid bytecode)");
1329 return -1;
1330 }
Victor Stinner19ed27e2016-05-20 11:42:37 +02001331
1332 /* This case is handled by the _Unpickler_Read() macro for efficiency */
1333 assert(self->next_read_idx + n > self->input_len);
1334
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03001335 if (!self->read)
1336 return bad_readline();
1337
Antoine Pitrou91f43802019-05-26 17:10:09 +02001338 /* Extend the buffer to satisfy desired size */
Antoine Pitrou04248a82010-10-12 20:51:21 +00001339 num_read = _Unpickler_ReadFromFile(self, n);
1340 if (num_read < 0)
1341 return -1;
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03001342 if (num_read < n)
1343 return bad_readline();
Antoine Pitrou04248a82010-10-12 20:51:21 +00001344 *s = self->input_buffer;
1345 self->next_read_idx = n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001346 return n;
1347}
1348
Antoine Pitrou91f43802019-05-26 17:10:09 +02001349/* Read `n` bytes from the unpickler's data source, storing the result in `buf`.
1350 *
1351 * This should only be used for non-small data reads where potentially
1352 * avoiding a copy is beneficial. This method does not try to prefetch
1353 * more data into the input buffer.
1354 *
1355 * _Unpickler_Read() is recommended in most cases.
1356 */
1357static Py_ssize_t
1358_Unpickler_ReadInto(UnpicklerObject *self, char *buf, Py_ssize_t n)
1359{
1360 assert(n != READ_WHOLE_LINE);
1361
1362 /* Read from available buffer data, if any */
1363 Py_ssize_t in_buffer = self->input_len - self->next_read_idx;
1364 if (in_buffer > 0) {
1365 Py_ssize_t to_read = Py_MIN(in_buffer, n);
1366 memcpy(buf, self->input_buffer + self->next_read_idx, to_read);
1367 self->next_read_idx += to_read;
1368 buf += to_read;
1369 n -= to_read;
1370 if (n == 0) {
1371 /* Entire read was satisfied from buffer */
1372 return n;
1373 }
1374 }
1375
1376 /* Read from file */
Antoine Pitrou9f378722020-02-23 23:33:53 +01001377 if (!self->read) {
1378 /* We're unpickling memory, this means the input is truncated */
Antoine Pitrou91f43802019-05-26 17:10:09 +02001379 return bad_readline();
1380 }
1381 if (_Unpickler_SkipConsumed(self) < 0) {
1382 return -1;
1383 }
1384
Antoine Pitrou9f378722020-02-23 23:33:53 +01001385 if (!self->readinto) {
1386 /* readinto() not supported on file-like object, fall back to read()
1387 * and copy into destination buffer (bpo-39681) */
1388 PyObject* len = PyLong_FromSsize_t(n);
1389 if (len == NULL) {
1390 return -1;
1391 }
1392 PyObject* data = _Pickle_FastCall(self->read, len);
1393 if (data == NULL) {
1394 return -1;
1395 }
1396 if (!PyBytes_Check(data)) {
1397 PyErr_Format(PyExc_ValueError,
1398 "read() returned non-bytes object (%R)",
1399 Py_TYPE(data));
1400 Py_DECREF(data);
1401 return -1;
1402 }
1403 Py_ssize_t read_size = PyBytes_GET_SIZE(data);
1404 if (read_size < n) {
1405 Py_DECREF(data);
1406 return bad_readline();
1407 }
1408 memcpy(buf, PyBytes_AS_STRING(data), n);
1409 Py_DECREF(data);
1410 return n;
1411 }
1412
Antoine Pitrou91f43802019-05-26 17:10:09 +02001413 /* Call readinto() into user buffer */
1414 PyObject *buf_obj = PyMemoryView_FromMemory(buf, n, PyBUF_WRITE);
1415 if (buf_obj == NULL) {
1416 return -1;
1417 }
1418 PyObject *read_size_obj = _Pickle_FastCall(self->readinto, buf_obj);
1419 if (read_size_obj == NULL) {
1420 return -1;
1421 }
1422 Py_ssize_t read_size = PyLong_AsSsize_t(read_size_obj);
1423 Py_DECREF(read_size_obj);
1424
1425 if (read_size < 0) {
1426 if (!PyErr_Occurred()) {
1427 PyErr_SetString(PyExc_ValueError,
1428 "readinto() returned negative size");
1429 }
1430 return -1;
1431 }
1432 if (read_size < n) {
1433 return bad_readline();
1434 }
1435 return n;
1436}
1437
Victor Stinner19ed27e2016-05-20 11:42:37 +02001438/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
1439
1440 This should be used for all data reads, rather than accessing the unpickler's
1441 input buffer directly. This method deals correctly with reading from input
1442 streams, which the input buffer doesn't deal with.
1443
1444 Note that when reading from a file-like object, self->next_read_idx won't
1445 be updated (it should remain at 0 for the entire unpickling process). You
1446 should use this function's return value to know how many bytes you can
1447 consume.
1448
1449 Returns -1 (with an exception set) on failure. On success, return the
1450 number of chars read. */
1451#define _Unpickler_Read(self, s, n) \
Victor Stinnerda230562016-05-20 21:16:59 +02001452 (((n) <= (self)->input_len - (self)->next_read_idx) \
Victor Stinner19ed27e2016-05-20 11:42:37 +02001453 ? (*(s) = (self)->input_buffer + (self)->next_read_idx, \
1454 (self)->next_read_idx += (n), \
1455 (n)) \
1456 : _Unpickler_ReadImpl(self, (s), (n)))
1457
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001458static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001459_Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1460 char **result)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001461{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001462 char *input_line = PyMem_Realloc(self->input_line, len + 1);
Victor Stinner42024562013-07-12 00:53:57 +02001463 if (input_line == NULL) {
1464 PyErr_NoMemory();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001465 return -1;
Victor Stinner42024562013-07-12 00:53:57 +02001466 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001467
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001468 memcpy(input_line, line, len);
1469 input_line[len] = '\0';
1470 self->input_line = input_line;
1471 *result = self->input_line;
1472 return len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001473}
1474
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001475/* Read a line from the input stream/buffer. If we run off the end of the input
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03001476 before hitting \n, raise an error.
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001477
1478 Returns the number of chars read, or -1 on failure. */
1479static Py_ssize_t
1480_Unpickler_Readline(UnpicklerObject *self, char **result)
1481{
1482 Py_ssize_t i, num_read;
1483
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001484 for (i = self->next_read_idx; i < self->input_len; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001485 if (self->input_buffer[i] == '\n') {
1486 char *line_start = self->input_buffer + self->next_read_idx;
1487 num_read = i - self->next_read_idx + 1;
1488 self->next_read_idx = i + 1;
1489 return _Unpickler_CopyLine(self, line_start, num_read, result);
1490 }
1491 }
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03001492 if (!self->read)
1493 return bad_readline();
Victor Stinner121aab42011-09-29 23:40:53 +02001494
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03001495 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1496 if (num_read < 0)
1497 return -1;
1498 if (num_read == 0 || self->input_buffer[num_read - 1] != '\n')
1499 return bad_readline();
1500 self->next_read_idx = num_read;
1501 return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001502}
1503
1504/* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1505 will be modified in place. */
1506static int
Benjamin Petersona4ae8282018-09-20 18:36:40 -07001507_Unpickler_ResizeMemoList(UnpicklerObject *self, size_t new_size)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001508{
Benjamin Petersona4ae8282018-09-20 18:36:40 -07001509 size_t i;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001510
1511 assert(new_size > self->memo_size);
1512
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001513 PyObject **memo_new = self->memo;
1514 PyMem_RESIZE(memo_new, PyObject *, new_size);
1515 if (memo_new == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001516 PyErr_NoMemory();
1517 return -1;
1518 }
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001519 self->memo = memo_new;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001520 for (i = self->memo_size; i < new_size; i++)
1521 self->memo[i] = NULL;
1522 self->memo_size = new_size;
1523 return 0;
1524}
1525
1526/* Returns NULL if idx is out of bounds. */
1527static PyObject *
Benjamin Petersona4ae8282018-09-20 18:36:40 -07001528_Unpickler_MemoGet(UnpicklerObject *self, size_t idx)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001529{
Benjamin Petersona4ae8282018-09-20 18:36:40 -07001530 if (idx >= self->memo_size)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001531 return NULL;
1532
1533 return self->memo[idx];
1534}
1535
1536/* Returns -1 (with an exception set) on failure, 0 on success.
1537 This takes its own reference to `value`. */
1538static int
Benjamin Petersona4ae8282018-09-20 18:36:40 -07001539_Unpickler_MemoPut(UnpicklerObject *self, size_t idx, PyObject *value)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001540{
1541 PyObject *old_item;
1542
1543 if (idx >= self->memo_size) {
1544 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1545 return -1;
1546 assert(idx < self->memo_size);
1547 }
1548 Py_INCREF(value);
1549 old_item = self->memo[idx];
1550 self->memo[idx] = value;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001551 if (old_item != NULL) {
1552 Py_DECREF(old_item);
1553 }
1554 else {
1555 self->memo_len++;
1556 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001557 return 0;
1558}
1559
1560static PyObject **
1561_Unpickler_NewMemo(Py_ssize_t new_size)
1562{
Benjamin Peterson59b08c12015-06-27 13:41:33 -05001563 PyObject **memo = PyMem_NEW(PyObject *, new_size);
Victor Stinner42024562013-07-12 00:53:57 +02001564 if (memo == NULL) {
1565 PyErr_NoMemory();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001566 return NULL;
Victor Stinner42024562013-07-12 00:53:57 +02001567 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001568 memset(memo, 0, new_size * sizeof(PyObject *));
1569 return memo;
1570}
1571
1572/* Free the unpickler's memo, taking care to decref any items left in it. */
1573static void
1574_Unpickler_MemoCleanup(UnpicklerObject *self)
1575{
1576 Py_ssize_t i;
1577 PyObject **memo = self->memo;
1578
1579 if (self->memo == NULL)
1580 return;
1581 self->memo = NULL;
1582 i = self->memo_size;
1583 while (--i >= 0) {
1584 Py_XDECREF(memo[i]);
1585 }
Victor Stinner00d7abd2020-12-01 09:56:42 +01001586 PyMem_Free(memo);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001587}
1588
1589static UnpicklerObject *
1590_Unpickler_New(void)
1591{
1592 UnpicklerObject *self;
1593
1594 self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1595 if (self == NULL)
1596 return NULL;
1597
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001598 self->pers_func = NULL;
1599 self->input_buffer = NULL;
1600 self->input_line = NULL;
1601 self->input_len = 0;
1602 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001603 self->prefetched_idx = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001604 self->read = NULL;
Antoine Pitrou91f43802019-05-26 17:10:09 +02001605 self->readinto = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001606 self->readline = NULL;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001607 self->peek = NULL;
Antoine Pitrou91f43802019-05-26 17:10:09 +02001608 self->buffers = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001609 self->encoding = NULL;
1610 self->errors = NULL;
1611 self->marks = NULL;
1612 self->num_marks = 0;
1613 self->marks_size = 0;
1614 self->proto = 0;
1615 self->fix_imports = 0;
Victor Stinner68c8ea22013-07-11 22:56:25 +02001616 memset(&self->buffer, 0, sizeof(Py_buffer));
1617 self->memo_size = 32;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001618 self->memo_len = 0;
Victor Stinner68c8ea22013-07-11 22:56:25 +02001619 self->memo = _Unpickler_NewMemo(self->memo_size);
1620 self->stack = (Pdata *)Pdata_New();
1621
1622 if (self->memo == NULL || self->stack == NULL) {
1623 Py_DECREF(self);
1624 return NULL;
1625 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001626
Zackery Spytz359bd4f2019-04-23 05:56:08 -06001627 PyObject_GC_Track(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001628 return self;
1629}
1630
1631/* Returns -1 (with an exception set) on failure, 0 on success. This may
Antoine Pitrou91f43802019-05-26 17:10:09 +02001632 be called once on a freshly created Unpickler. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001633static int
1634_Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1635{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001636 _Py_IDENTIFIER(peek);
1637 _Py_IDENTIFIER(read);
Antoine Pitrou91f43802019-05-26 17:10:09 +02001638 _Py_IDENTIFIER(readinto);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001639 _Py_IDENTIFIER(readline);
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001640
Antoine Pitrou9f378722020-02-23 23:33:53 +01001641 /* Optional file methods */
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001642 if (_PyObject_LookupAttrId(file, &PyId_peek, &self->peek) < 0) {
1643 return -1;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001644 }
Antoine Pitrou9f378722020-02-23 23:33:53 +01001645 if (_PyObject_LookupAttrId(file, &PyId_readinto, &self->readinto) < 0) {
1646 return -1;
1647 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001648 (void)_PyObject_LookupAttrId(file, &PyId_read, &self->read);
1649 (void)_PyObject_LookupAttrId(file, &PyId_readline, &self->readline);
Antoine Pitrou9f378722020-02-23 23:33:53 +01001650 if (!self->readline || !self->read) {
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001651 if (!PyErr_Occurred()) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001652 PyErr_SetString(PyExc_TypeError,
Antoine Pitrou9f378722020-02-23 23:33:53 +01001653 "file must have 'read' and 'readline' attributes");
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001654 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001655 Py_CLEAR(self->read);
Antoine Pitrou91f43802019-05-26 17:10:09 +02001656 Py_CLEAR(self->readinto);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001657 Py_CLEAR(self->readline);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001658 Py_CLEAR(self->peek);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001659 return -1;
1660 }
1661 return 0;
1662}
1663
1664/* Returns -1 (with an exception set) on failure, 0 on success. This may
Antoine Pitrou91f43802019-05-26 17:10:09 +02001665 be called once on a freshly created Unpickler. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001666static int
1667_Unpickler_SetInputEncoding(UnpicklerObject *self,
1668 const char *encoding,
1669 const char *errors)
1670{
1671 if (encoding == NULL)
1672 encoding = "ASCII";
1673 if (errors == NULL)
1674 errors = "strict";
1675
Victor Stinner49fc8ec2013-07-07 23:30:24 +02001676 self->encoding = _PyMem_Strdup(encoding);
1677 self->errors = _PyMem_Strdup(errors);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001678 if (self->encoding == NULL || self->errors == NULL) {
1679 PyErr_NoMemory();
1680 return -1;
1681 }
1682 return 0;
1683}
1684
Antoine Pitrou91f43802019-05-26 17:10:09 +02001685/* Returns -1 (with an exception set) on failure, 0 on success. This may
1686 be called once on a freshly created Unpickler. */
1687static int
1688_Unpickler_SetBuffers(UnpicklerObject *self, PyObject *buffers)
1689{
Markus Mohrhard898318b2019-07-26 00:00:34 +08001690 if (buffers == NULL || buffers == Py_None) {
Antoine Pitrou91f43802019-05-26 17:10:09 +02001691 self->buffers = NULL;
1692 }
1693 else {
1694 self->buffers = PyObject_GetIter(buffers);
1695 if (self->buffers == NULL) {
1696 return -1;
1697 }
1698 }
1699 return 0;
1700}
1701
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001702/* Generate a GET opcode for an object stored in the memo. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001703static int
1704memo_get(PicklerObject *self, PyObject *key)
1705{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001706 Py_ssize_t *value;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001707 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001708 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001709
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001710 value = PyMemoTable_Get(self->memo, key);
1711 if (value == NULL) {
1712 PyErr_SetObject(PyExc_KeyError, key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001713 return -1;
1714 }
1715
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001716 if (!self->bin) {
1717 pdata[0] = GET;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001718 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
Victor Stinnerd36cf5f2020-06-10 18:38:05 +02001719 "%zd\n", *value);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001720 len = strlen(pdata);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001721 }
1722 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001723 if (*value < 256) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001724 pdata[0] = BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001725 pdata[1] = (unsigned char)(*value & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001726 len = 2;
1727 }
Serhiy Storchaka67c719b2014-09-05 10:10:23 +03001728 else if ((size_t)*value <= 0xffffffffUL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001729 pdata[0] = LONG_BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001730 pdata[1] = (unsigned char)(*value & 0xff);
1731 pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1732 pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1733 pdata[4] = (unsigned char)((*value >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001734 len = 5;
1735 }
1736 else { /* unlikely */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08001737 PickleState *st = _Pickle_GetGlobalState();
1738 PyErr_SetString(st->PicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001739 "memo id too large for LONG_BINGET");
1740 return -1;
1741 }
1742 }
1743
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001744 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001745 return -1;
1746
1747 return 0;
1748}
1749
1750/* Store an object in the memo, assign it a new unique ID based on the number
1751 of objects currently stored in the memo and generate a PUT opcode. */
1752static int
1753memo_put(PicklerObject *self, PyObject *obj)
1754{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001755 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001756 Py_ssize_t len;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001757 Py_ssize_t idx;
1758
1759 const char memoize_op = MEMOIZE;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001760
1761 if (self->fast)
1762 return 0;
1763
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001764 idx = PyMemoTable_Size(self->memo);
1765 if (PyMemoTable_Set(self->memo, obj, idx) < 0)
1766 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001767
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001768 if (self->proto >= 4) {
1769 if (_Pickler_Write(self, &memoize_op, 1) < 0)
1770 return -1;
1771 return 0;
1772 }
1773 else if (!self->bin) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001774 pdata[0] = PUT;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001775 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
Victor Stinnerd36cf5f2020-06-10 18:38:05 +02001776 "%zd\n", idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001777 len = strlen(pdata);
1778 }
1779 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001780 if (idx < 256) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001781 pdata[0] = BINPUT;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001782 pdata[1] = (unsigned char)idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001783 len = 2;
1784 }
Serhiy Storchaka67c719b2014-09-05 10:10:23 +03001785 else if ((size_t)idx <= 0xffffffffUL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001786 pdata[0] = LONG_BINPUT;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001787 pdata[1] = (unsigned char)(idx & 0xff);
1788 pdata[2] = (unsigned char)((idx >> 8) & 0xff);
1789 pdata[3] = (unsigned char)((idx >> 16) & 0xff);
1790 pdata[4] = (unsigned char)((idx >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001791 len = 5;
1792 }
1793 else { /* unlikely */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08001794 PickleState *st = _Pickle_GetGlobalState();
1795 PyErr_SetString(st->PicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001796 "memo id too large for LONG_BINPUT");
1797 return -1;
1798 }
1799 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001800 if (_Pickler_Write(self, pdata, len) < 0)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001801 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001802
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001803 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001804}
1805
1806static PyObject *
Serhiy Storchaka9937d902017-01-09 10:04:34 +02001807get_dotted_path(PyObject *obj, PyObject *name)
1808{
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001809 _Py_static_string(PyId_dot, ".");
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001810 PyObject *dotted_path;
1811 Py_ssize_t i, n;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001812
1813 dotted_path = PyUnicode_Split(name, _PyUnicode_FromId(&PyId_dot), -1);
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001814 if (dotted_path == NULL)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001815 return NULL;
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001816 n = PyList_GET_SIZE(dotted_path);
1817 assert(n >= 1);
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001818 for (i = 0; i < n; i++) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001819 PyObject *subpath = PyList_GET_ITEM(dotted_path, i);
Serhiy Storchaka9937d902017-01-09 10:04:34 +02001820 if (_PyUnicode_EqualToASCIIString(subpath, "<locals>")) {
Antoine Pitrou6cd5eda2014-12-02 00:20:03 +01001821 if (obj == NULL)
1822 PyErr_Format(PyExc_AttributeError,
1823 "Can't pickle local object %R", name);
1824 else
1825 PyErr_Format(PyExc_AttributeError,
1826 "Can't pickle local attribute %R on %R", name, obj);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001827 Py_DECREF(dotted_path);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001828 return NULL;
1829 }
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001830 }
1831 return dotted_path;
1832}
1833
1834static PyObject *
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001835get_deep_attribute(PyObject *obj, PyObject *names, PyObject **pparent)
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001836{
1837 Py_ssize_t i, n;
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001838 PyObject *parent = NULL;
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001839
1840 assert(PyList_CheckExact(names));
1841 Py_INCREF(obj);
1842 n = PyList_GET_SIZE(names);
1843 for (i = 0; i < n; i++) {
1844 PyObject *name = PyList_GET_ITEM(names, i);
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001845 Py_XDECREF(parent);
1846 parent = obj;
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001847 (void)_PyObject_LookupAttr(parent, name, &obj);
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001848 if (obj == NULL) {
1849 Py_DECREF(parent);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001850 return NULL;
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001851 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001852 }
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001853 if (pparent != NULL)
1854 *pparent = parent;
1855 else
1856 Py_XDECREF(parent);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001857 return obj;
1858}
1859
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001860
1861static PyObject *
1862getattribute(PyObject *obj, PyObject *name, int allow_qualname)
1863{
1864 PyObject *dotted_path, *attr;
1865
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001866 if (allow_qualname) {
1867 dotted_path = get_dotted_path(obj, name);
1868 if (dotted_path == NULL)
1869 return NULL;
1870 attr = get_deep_attribute(obj, dotted_path, NULL);
1871 Py_DECREF(dotted_path);
1872 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001873 else {
1874 (void)_PyObject_LookupAttr(obj, name, &attr);
1875 }
1876 if (attr == NULL && !PyErr_Occurred()) {
1877 PyErr_Format(PyExc_AttributeError,
1878 "Can't get attribute %R on %R", name, obj);
1879 }
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001880 return attr;
1881}
1882
Eric Snow3f9eee62017-09-15 16:35:20 -06001883static int
1884_checkmodule(PyObject *module_name, PyObject *module,
1885 PyObject *global, PyObject *dotted_path)
1886{
1887 if (module == Py_None) {
1888 return -1;
1889 }
1890 if (PyUnicode_Check(module_name) &&
1891 _PyUnicode_EqualToASCIIString(module_name, "__main__")) {
1892 return -1;
1893 }
1894
1895 PyObject *candidate = get_deep_attribute(module, dotted_path, NULL);
1896 if (candidate == NULL) {
Eric Snow3f9eee62017-09-15 16:35:20 -06001897 return -1;
1898 }
1899 if (candidate != global) {
1900 Py_DECREF(candidate);
1901 return -1;
1902 }
1903 Py_DECREF(candidate);
1904 return 0;
1905}
1906
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001907static PyObject *
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001908whichmodule(PyObject *global, PyObject *dotted_path)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001909{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001910 PyObject *module_name;
Eric Snow3f9eee62017-09-15 16:35:20 -06001911 PyObject *module = NULL;
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001912 Py_ssize_t i;
Eric Snow3f9eee62017-09-15 16:35:20 -06001913 PyObject *modules;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001914 _Py_IDENTIFIER(__module__);
1915 _Py_IDENTIFIER(modules);
1916 _Py_IDENTIFIER(__main__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001917
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001918 if (_PyObject_LookupAttrId(global, &PyId___module__, &module_name) < 0) {
1919 return NULL;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001920 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001921 if (module_name) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001922 /* In some rare cases (e.g., bound methods of extension types),
1923 __module__ can be None. If it is so, then search sys.modules for
1924 the module of global. */
1925 if (module_name != Py_None)
1926 return module_name;
1927 Py_CLEAR(module_name);
1928 }
1929 assert(module_name == NULL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001930
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001931 /* Fallback on walking sys.modules */
Eric Snow3f9eee62017-09-15 16:35:20 -06001932 modules = _PySys_GetObjectId(&PyId_modules);
1933 if (modules == NULL) {
Victor Stinner1e53bba2013-07-16 22:26:05 +02001934 PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001935 return NULL;
Victor Stinner1e53bba2013-07-16 22:26:05 +02001936 }
Eric Snow3f9eee62017-09-15 16:35:20 -06001937 if (PyDict_CheckExact(modules)) {
1938 i = 0;
1939 while (PyDict_Next(modules, &i, &module_name, &module)) {
1940 if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1941 Py_INCREF(module_name);
1942 return module_name;
1943 }
1944 if (PyErr_Occurred()) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001945 return NULL;
Eric Snow3f9eee62017-09-15 16:35:20 -06001946 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001947 }
Eric Snow3f9eee62017-09-15 16:35:20 -06001948 }
1949 else {
1950 PyObject *iterator = PyObject_GetIter(modules);
1951 if (iterator == NULL) {
1952 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001953 }
Eric Snow3f9eee62017-09-15 16:35:20 -06001954 while ((module_name = PyIter_Next(iterator))) {
1955 module = PyObject_GetItem(modules, module_name);
1956 if (module == NULL) {
1957 Py_DECREF(module_name);
1958 Py_DECREF(iterator);
1959 return NULL;
1960 }
1961 if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1962 Py_DECREF(module);
1963 Py_DECREF(iterator);
1964 return module_name;
1965 }
1966 Py_DECREF(module);
1967 Py_DECREF(module_name);
1968 if (PyErr_Occurred()) {
1969 Py_DECREF(iterator);
1970 return NULL;
1971 }
1972 }
1973 Py_DECREF(iterator);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001974 }
1975
1976 /* If no module is found, use __main__. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001977 module_name = _PyUnicode_FromId(&PyId___main__);
Victor Stinneraf46eb82017-09-05 23:30:16 +02001978 Py_XINCREF(module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001979 return module_name;
1980}
1981
1982/* fast_save_enter() and fast_save_leave() are guards against recursive
1983 objects when Pickler is used with the "fast mode" (i.e., with object
1984 memoization disabled). If the nesting of a list or dict object exceed
1985 FAST_NESTING_LIMIT, these guards will start keeping an internal
1986 reference to the seen list or dict objects and check whether these objects
1987 are recursive. These are not strictly necessary, since save() has a
1988 hard-coded recursion limit, but they give a nicer error message than the
1989 typical RuntimeError. */
1990static int
1991fast_save_enter(PicklerObject *self, PyObject *obj)
1992{
1993 /* if fast_nesting < 0, we're doing an error exit. */
1994 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1995 PyObject *key = NULL;
1996 if (self->fast_memo == NULL) {
1997 self->fast_memo = PyDict_New();
1998 if (self->fast_memo == NULL) {
1999 self->fast_nesting = -1;
2000 return 0;
2001 }
2002 }
2003 key = PyLong_FromVoidPtr(obj);
Mat Mf76231f2017-11-13 02:50:16 -05002004 if (key == NULL) {
2005 self->fast_nesting = -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002006 return 0;
Mat Mf76231f2017-11-13 02:50:16 -05002007 }
Serhiy Storchakab510e102020-10-26 12:47:57 +02002008 int r = PyDict_Contains(self->fast_memo, key);
2009 if (r > 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002010 PyErr_Format(PyExc_ValueError,
2011 "fast mode: can't pickle cyclic objects "
2012 "including object type %.200s at %p",
Victor Stinnerdaa97562020-02-07 03:37:06 +01002013 Py_TYPE(obj)->tp_name, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002014 }
Serhiy Storchakab510e102020-10-26 12:47:57 +02002015 else if (r == 0) {
2016 r = PyDict_SetItem(self->fast_memo, key, Py_None);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002017 }
2018 Py_DECREF(key);
Serhiy Storchakab510e102020-10-26 12:47:57 +02002019 if (r != 0) {
2020 self->fast_nesting = -1;
2021 return 0;
2022 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002023 }
2024 return 1;
2025}
2026
2027static int
2028fast_save_leave(PicklerObject *self, PyObject *obj)
2029{
2030 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
2031 PyObject *key = PyLong_FromVoidPtr(obj);
2032 if (key == NULL)
2033 return 0;
2034 if (PyDict_DelItem(self->fast_memo, key) < 0) {
2035 Py_DECREF(key);
2036 return 0;
2037 }
2038 Py_DECREF(key);
2039 }
2040 return 1;
2041}
2042
2043static int
2044save_none(PicklerObject *self, PyObject *obj)
2045{
2046 const char none_op = NONE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002047 if (_Pickler_Write(self, &none_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002048 return -1;
2049
2050 return 0;
2051}
2052
2053static int
2054save_bool(PicklerObject *self, PyObject *obj)
2055{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002056 if (self->proto >= 2) {
Alexandre Vassalotti8a67f522013-11-24 21:40:18 -08002057 const char bool_op = (obj == Py_True) ? NEWTRUE : NEWFALSE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002058 if (_Pickler_Write(self, &bool_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002059 return -1;
2060 }
Alexandre Vassalotti8a67f522013-11-24 21:40:18 -08002061 else {
2062 /* These aren't opcodes -- they're ways to pickle bools before protocol 2
2063 * so that unpicklers written before bools were introduced unpickle them
2064 * as ints, but unpicklers after can recognize that bools were intended.
2065 * Note that protocol 2 added direct ways to pickle bools.
2066 */
2067 const char *bool_str = (obj == Py_True) ? "I01\n" : "I00\n";
2068 if (_Pickler_Write(self, bool_str, strlen(bool_str)) < 0)
2069 return -1;
2070 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002071 return 0;
2072}
2073
2074static int
Alexandre Vassalottided929b2013-11-24 22:41:13 -08002075save_long(PicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002076{
Alexandre Vassalottided929b2013-11-24 22:41:13 -08002077 PyObject *repr = NULL;
2078 Py_ssize_t size;
2079 long val;
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002080 int overflow;
Alexandre Vassalottided929b2013-11-24 22:41:13 -08002081 int status = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002082
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002083 val= PyLong_AsLongAndOverflow(obj, &overflow);
2084 if (!overflow && (sizeof(long) <= 4 ||
2085 (val <= 0x7fffffffL && val >= (-0x7fffffffL - 1))))
2086 {
Larry Hastings61272b72014-01-07 12:41:53 -08002087 /* result fits in a signed 4-byte integer.
Alexandre Vassalotti1048fb52013-11-25 11:35:46 -08002088
2089 Note: we can't use -0x80000000L in the above condition because some
2090 compilers (e.g., MSVC) will promote 0x80000000L to an unsigned type
2091 before applying the unary minus when sizeof(long) <= 4. The
2092 resulting value stays unsigned which is commonly not what we want,
2093 so MSVC happily warns us about it. However, that result would have
2094 been fine because we guard for sizeof(long) <= 4 which turns the
2095 condition true in that particular case. */
Alexandre Vassalottided929b2013-11-24 22:41:13 -08002096 char pdata[32];
2097 Py_ssize_t len = 0;
2098
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002099 if (self->bin) {
2100 pdata[1] = (unsigned char)(val & 0xff);
2101 pdata[2] = (unsigned char)((val >> 8) & 0xff);
2102 pdata[3] = (unsigned char)((val >> 16) & 0xff);
2103 pdata[4] = (unsigned char)((val >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002104
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002105 if ((pdata[4] != 0) || (pdata[3] != 0)) {
2106 pdata[0] = BININT;
2107 len = 5;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002108 }
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002109 else if (pdata[2] != 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002110 pdata[0] = BININT2;
2111 len = 3;
2112 }
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002113 else {
2114 pdata[0] = BININT1;
2115 len = 2;
2116 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002117 }
2118 else {
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002119 sprintf(pdata, "%c%ld\n", INT, val);
2120 len = strlen(pdata);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002121 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002122 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002123 return -1;
Alexandre Vassalottided929b2013-11-24 22:41:13 -08002124
2125 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002126 }
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002127 assert(!PyErr_Occurred());
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002128
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002129 if (self->proto >= 2) {
2130 /* Linear-time pickling. */
2131 size_t nbits;
2132 size_t nbytes;
2133 unsigned char *pdata;
2134 char header[5];
2135 int i;
2136 int sign = _PyLong_Sign(obj);
2137
2138 if (sign == 0) {
2139 header[0] = LONG1;
2140 header[1] = 0; /* It's 0 -- an empty bytestring. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002141 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002142 goto error;
2143 return 0;
2144 }
2145 nbits = _PyLong_NumBits(obj);
2146 if (nbits == (size_t)-1 && PyErr_Occurred())
2147 goto error;
2148 /* How many bytes do we need? There are nbits >> 3 full
2149 * bytes of data, and nbits & 7 leftover bits. If there
2150 * are any leftover bits, then we clearly need another
Min ho Kim96e12d52019-07-22 06:12:33 +10002151 * byte. What's not so obvious is that we *probably*
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002152 * need another byte even if there aren't any leftovers:
2153 * the most-significant bit of the most-significant byte
2154 * acts like a sign bit, and it's usually got a sense
Serhiy Storchaka95949422013-08-27 19:40:23 +03002155 * opposite of the one we need. The exception is ints
2156 * of the form -(2**(8*j-1)) for j > 0. Such an int is
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002157 * its own 256's-complement, so has the right sign bit
2158 * even without the extra byte. That's a pain to check
2159 * for in advance, though, so we always grab an extra
2160 * byte at the start, and cut it back later if possible.
2161 */
2162 nbytes = (nbits >> 3) + 1;
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01002163 if (nbytes > 0x7fffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002164 PyErr_SetString(PyExc_OverflowError,
Serhiy Storchaka95949422013-08-27 19:40:23 +03002165 "int too large to pickle");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002166 goto error;
2167 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00002168 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002169 if (repr == NULL)
2170 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00002171 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002172 i = _PyLong_AsByteArray((PyLongObject *)obj,
2173 pdata, nbytes,
2174 1 /* little endian */ , 1 /* signed */ );
2175 if (i < 0)
2176 goto error;
Serhiy Storchaka95949422013-08-27 19:40:23 +03002177 /* If the int is negative, this may be a byte more than
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002178 * needed. This is so iff the MSB is all redundant sign
2179 * bits.
2180 */
2181 if (sign < 0 &&
Victor Stinner121aab42011-09-29 23:40:53 +02002182 nbytes > 1 &&
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002183 pdata[nbytes - 1] == 0xff &&
2184 (pdata[nbytes - 2] & 0x80) != 0) {
2185 nbytes--;
2186 }
2187
2188 if (nbytes < 256) {
2189 header[0] = LONG1;
2190 header[1] = (unsigned char)nbytes;
2191 size = 2;
2192 }
2193 else {
2194 header[0] = LONG4;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002195 size = (Py_ssize_t) nbytes;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002196 for (i = 1; i < 5; i++) {
2197 header[i] = (unsigned char)(size & 0xff);
2198 size >>= 8;
2199 }
2200 size = 5;
2201 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002202 if (_Pickler_Write(self, header, size) < 0 ||
2203 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002204 goto error;
2205 }
2206 else {
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002207 const char long_op = LONG;
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02002208 const char *string;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002209
Mark Dickinson8dd05142009-01-20 20:43:58 +00002210 /* proto < 2: write the repr and newline. This is quadratic-time (in
2211 the number of digits), in both directions. We add a trailing 'L'
2212 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002213
2214 repr = PyObject_Repr(obj);
2215 if (repr == NULL)
2216 goto error;
2217
Serhiy Storchaka06515832016-11-20 09:13:07 +02002218 string = PyUnicode_AsUTF8AndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002219 if (string == NULL)
2220 goto error;
2221
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002222 if (_Pickler_Write(self, &long_op, 1) < 0 ||
2223 _Pickler_Write(self, string, size) < 0 ||
2224 _Pickler_Write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002225 goto error;
2226 }
2227
2228 if (0) {
2229 error:
2230 status = -1;
2231 }
2232 Py_XDECREF(repr);
2233
2234 return status;
2235}
2236
2237static int
2238save_float(PicklerObject *self, PyObject *obj)
2239{
2240 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
2241
2242 if (self->bin) {
2243 char pdata[9];
2244 pdata[0] = BINFLOAT;
2245 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
2246 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002247 if (_Pickler_Write(self, pdata, 9) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002248 return -1;
Victor Stinner121aab42011-09-29 23:40:53 +02002249 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002250 else {
Eric Smith0923d1d2009-04-16 20:16:10 +00002251 int result = -1;
2252 char *buf = NULL;
2253 char op = FLOAT;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002254
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002255 if (_Pickler_Write(self, &op, 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00002256 goto done;
2257
Serhiy Storchakac86ca262015-02-15 14:18:32 +02002258 buf = PyOS_double_to_string(x, 'r', 0, Py_DTSF_ADD_DOT_0, NULL);
Eric Smith0923d1d2009-04-16 20:16:10 +00002259 if (!buf) {
2260 PyErr_NoMemory();
2261 goto done;
2262 }
2263
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002264 if (_Pickler_Write(self, buf, strlen(buf)) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00002265 goto done;
2266
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002267 if (_Pickler_Write(self, "\n", 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00002268 goto done;
2269
2270 result = 0;
2271done:
2272 PyMem_Free(buf);
2273 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002274 }
2275
2276 return 0;
2277}
2278
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002279/* Perform direct write of the header and payload of the binary object.
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01002280
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002281 The large contiguous data is written directly into the underlying file
2282 object, bypassing the output_buffer of the Pickler. We intentionally
2283 do not insert a protocol 4 frame opcode to make it possible to optimize
2284 file.read calls in the loader.
2285 */
2286static int
2287_Pickler_write_bytes(PicklerObject *self,
2288 const char *header, Py_ssize_t header_size,
2289 const char *data, Py_ssize_t data_size,
2290 PyObject *payload)
2291{
2292 int bypass_buffer = (data_size >= FRAME_SIZE_TARGET);
2293 int framing = self->framing;
2294
2295 if (bypass_buffer) {
2296 assert(self->output_buffer != NULL);
2297 /* Commit the previous frame. */
2298 if (_Pickler_CommitFrame(self)) {
2299 return -1;
2300 }
2301 /* Disable framing temporarily */
2302 self->framing = 0;
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01002303 }
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01002304
2305 if (_Pickler_Write(self, header, header_size) < 0) {
2306 return -1;
2307 }
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01002308
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002309 if (bypass_buffer && self->write != NULL) {
2310 /* Bypass the in-memory buffer to directly stream large data
2311 into the underlying file object. */
2312 PyObject *result, *mem = NULL;
2313 /* Dump the output buffer to the file. */
2314 if (_Pickler_FlushToFile(self) < 0) {
2315 return -1;
2316 }
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01002317
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002318 /* Stream write the payload into the file without going through the
2319 output buffer. */
2320 if (payload == NULL) {
Serhiy Storchaka5b76bdb2018-01-13 00:28:31 +02002321 /* TODO: It would be better to use a memoryview with a linked
2322 original string if this is possible. */
2323 payload = mem = PyBytes_FromStringAndSize(data, data_size);
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002324 if (payload == NULL) {
2325 return -1;
2326 }
2327 }
Petr Viktorinffd97532020-02-11 17:46:57 +01002328 result = PyObject_CallOneArg(self->write, payload);
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002329 Py_XDECREF(mem);
2330 if (result == NULL) {
2331 return -1;
2332 }
2333 Py_DECREF(result);
2334
2335 /* Reinitialize the buffer for subsequent calls to _Pickler_Write. */
2336 if (_Pickler_ClearBuffer(self) < 0) {
2337 return -1;
2338 }
2339 }
2340 else {
2341 if (_Pickler_Write(self, data, data_size) < 0) {
2342 return -1;
2343 }
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01002344 }
2345
2346 /* Re-enable framing for subsequent calls to _Pickler_Write. */
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002347 self->framing = framing;
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01002348
2349 return 0;
2350}
2351
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002352static int
Antoine Pitrou91f43802019-05-26 17:10:09 +02002353_save_bytes_data(PicklerObject *self, PyObject *obj, const char *data,
2354 Py_ssize_t size)
2355{
2356 assert(self->proto >= 3);
2357
2358 char header[9];
2359 Py_ssize_t len;
2360
2361 if (size < 0)
2362 return -1;
2363
2364 if (size <= 0xff) {
2365 header[0] = SHORT_BINBYTES;
2366 header[1] = (unsigned char)size;
2367 len = 2;
2368 }
2369 else if ((size_t)size <= 0xffffffffUL) {
2370 header[0] = BINBYTES;
2371 header[1] = (unsigned char)(size & 0xff);
2372 header[2] = (unsigned char)((size >> 8) & 0xff);
2373 header[3] = (unsigned char)((size >> 16) & 0xff);
2374 header[4] = (unsigned char)((size >> 24) & 0xff);
2375 len = 5;
2376 }
2377 else if (self->proto >= 4) {
2378 header[0] = BINBYTES8;
2379 _write_size64(header + 1, size);
2380 len = 9;
2381 }
2382 else {
2383 PyErr_SetString(PyExc_OverflowError,
2384 "serializing a bytes object larger than 4 GiB "
2385 "requires pickle protocol 4 or higher");
2386 return -1;
2387 }
2388
2389 if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
2390 return -1;
2391 }
2392
2393 if (memo_put(self, obj) < 0) {
2394 return -1;
2395 }
2396
2397 return 0;
2398}
2399
2400static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002401save_bytes(PicklerObject *self, PyObject *obj)
2402{
2403 if (self->proto < 3) {
2404 /* Older pickle protocols do not have an opcode for pickling bytes
2405 objects. Therefore, we need to fake the copy protocol (i.e.,
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05002406 the __reduce__ method) to permit bytes object unpickling.
2407
2408 Here we use a hack to be compatible with Python 2. Since in Python
2409 2 'bytes' is just an alias for 'str' (which has different
2410 parameters than the actual bytes object), we use codecs.encode
2411 to create the appropriate 'str' object when unpickled using
2412 Python 2 *and* the appropriate 'bytes' object when unpickled
2413 using Python 3. Again this is a hack and we don't need to do this
2414 with newer protocols. */
Pierre Glaser289f1f82019-05-08 23:08:25 +02002415 PyObject *reduce_value;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002416 int status;
2417
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05002418 if (PyBytes_GET_SIZE(obj) == 0) {
2419 reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
2420 }
2421 else {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08002422 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05002423 PyObject *unicode_str =
2424 PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
2425 PyBytes_GET_SIZE(obj),
2426 "strict");
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002427 _Py_IDENTIFIER(latin1);
2428
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05002429 if (unicode_str == NULL)
2430 return -1;
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05002431 reduce_value = Py_BuildValue("(O(OO))",
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08002432 st->codecs_encode, unicode_str,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002433 _PyUnicode_FromId(&PyId_latin1));
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05002434 Py_DECREF(unicode_str);
2435 }
2436
2437 if (reduce_value == NULL)
2438 return -1;
2439
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002440 /* save_reduce() will memoize the object automatically. */
2441 status = save_reduce(self, reduce_value, obj);
2442 Py_DECREF(reduce_value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002443 return status;
2444 }
2445 else {
Antoine Pitrou91f43802019-05-26 17:10:09 +02002446 return _save_bytes_data(self, obj, PyBytes_AS_STRING(obj),
2447 PyBytes_GET_SIZE(obj));
2448 }
2449}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002450
Antoine Pitrou91f43802019-05-26 17:10:09 +02002451static int
2452_save_bytearray_data(PicklerObject *self, PyObject *obj, const char *data,
2453 Py_ssize_t size)
2454{
2455 assert(self->proto >= 5);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002456
Antoine Pitrou91f43802019-05-26 17:10:09 +02002457 char header[9];
2458 Py_ssize_t len;
2459
2460 if (size < 0)
2461 return -1;
2462
2463 header[0] = BYTEARRAY8;
2464 _write_size64(header + 1, size);
2465 len = 9;
2466
2467 if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
2468 return -1;
2469 }
2470
2471 if (memo_put(self, obj) < 0) {
2472 return -1;
2473 }
2474
2475 return 0;
2476}
2477
2478static int
2479save_bytearray(PicklerObject *self, PyObject *obj)
2480{
2481 if (self->proto < 5) {
2482 /* Older pickle protocols do not have an opcode for pickling
2483 * bytearrays. */
2484 PyObject *reduce_value = NULL;
2485 int status;
2486
2487 if (PyByteArray_GET_SIZE(obj) == 0) {
2488 reduce_value = Py_BuildValue("(O())",
2489 (PyObject *) &PyByteArray_Type);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002490 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002491 else {
Antoine Pitrou91f43802019-05-26 17:10:09 +02002492 PyObject *bytes_obj = PyBytes_FromObject(obj);
2493 if (bytes_obj != NULL) {
2494 reduce_value = Py_BuildValue("(O(O))",
2495 (PyObject *) &PyByteArray_Type,
2496 bytes_obj);
2497 Py_DECREF(bytes_obj);
2498 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002499 }
Antoine Pitrou91f43802019-05-26 17:10:09 +02002500 if (reduce_value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002501 return -1;
2502
Antoine Pitrou91f43802019-05-26 17:10:09 +02002503 /* save_reduce() will memoize the object automatically. */
2504 status = save_reduce(self, reduce_value, obj);
2505 Py_DECREF(reduce_value);
2506 return status;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002507 }
Antoine Pitrou91f43802019-05-26 17:10:09 +02002508 else {
2509 return _save_bytearray_data(self, obj, PyByteArray_AS_STRING(obj),
2510 PyByteArray_GET_SIZE(obj));
2511 }
2512}
2513
2514static int
2515save_picklebuffer(PicklerObject *self, PyObject *obj)
2516{
2517 if (self->proto < 5) {
2518 PickleState *st = _Pickle_GetGlobalState();
2519 PyErr_SetString(st->PicklingError,
2520 "PickleBuffer can only pickled with protocol >= 5");
2521 return -1;
2522 }
2523 const Py_buffer* view = PyPickleBuffer_GetBuffer(obj);
2524 if (view == NULL) {
2525 return -1;
2526 }
2527 if (view->suboffsets != NULL || !PyBuffer_IsContiguous(view, 'A')) {
2528 PickleState *st = _Pickle_GetGlobalState();
2529 PyErr_SetString(st->PicklingError,
2530 "PickleBuffer can not be pickled when "
2531 "pointing to a non-contiguous buffer");
2532 return -1;
2533 }
2534 int in_band = 1;
2535 if (self->buffer_callback != NULL) {
Petr Viktorinffd97532020-02-11 17:46:57 +01002536 PyObject *ret = PyObject_CallOneArg(self->buffer_callback, obj);
Antoine Pitrou91f43802019-05-26 17:10:09 +02002537 if (ret == NULL) {
2538 return -1;
2539 }
2540 in_band = PyObject_IsTrue(ret);
2541 Py_DECREF(ret);
2542 if (in_band == -1) {
2543 return -1;
2544 }
2545 }
2546 if (in_band) {
2547 /* Write data in-band */
2548 if (view->readonly) {
2549 return _save_bytes_data(self, obj, (const char*) view->buf,
2550 view->len);
2551 }
2552 else {
2553 return _save_bytearray_data(self, obj, (const char*) view->buf,
2554 view->len);
2555 }
2556 }
2557 else {
2558 /* Write data out-of-band */
2559 const char next_buffer_op = NEXT_BUFFER;
2560 if (_Pickler_Write(self, &next_buffer_op, 1) < 0) {
2561 return -1;
2562 }
2563 if (view->readonly) {
2564 const char readonly_buffer_op = READONLY_BUFFER;
2565 if (_Pickler_Write(self, &readonly_buffer_op, 1) < 0) {
2566 return -1;
2567 }
2568 }
2569 }
2570 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002571}
2572
2573/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
2574 backslash and newline characters to \uXXXX escapes. */
2575static PyObject *
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002576raw_unicode_escape(PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002577{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002578 char *p;
Victor Stinner049e5092014-08-17 22:20:00 +02002579 Py_ssize_t i, size;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03002580 const void *data;
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002581 unsigned int kind;
Victor Stinner358af132015-10-12 22:36:57 +02002582 _PyBytesWriter writer;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002583
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002584 if (PyUnicode_READY(obj))
2585 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002586
Victor Stinner358af132015-10-12 22:36:57 +02002587 _PyBytesWriter_Init(&writer);
2588
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002589 size = PyUnicode_GET_LENGTH(obj);
2590 data = PyUnicode_DATA(obj);
2591 kind = PyUnicode_KIND(obj);
Victor Stinner121aab42011-09-29 23:40:53 +02002592
Victor Stinner358af132015-10-12 22:36:57 +02002593 p = _PyBytesWriter_Alloc(&writer, size);
2594 if (p == NULL)
2595 goto error;
2596 writer.overallocate = 1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002597
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002598 for (i=0; i < size; i++) {
2599 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002600 /* Map 32-bit characters to '\Uxxxxxxxx' */
2601 if (ch >= 0x10000) {
Raymond Hettinger15f44ab2016-08-30 10:47:49 -07002602 /* -1: subtract 1 preallocated byte */
Victor Stinner358af132015-10-12 22:36:57 +02002603 p = _PyBytesWriter_Prepare(&writer, p, 10-1);
2604 if (p == NULL)
2605 goto error;
2606
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002607 *p++ = '\\';
2608 *p++ = 'U';
Victor Stinnerf5cff562011-10-14 02:13:11 +02002609 *p++ = Py_hexdigits[(ch >> 28) & 0xf];
2610 *p++ = Py_hexdigits[(ch >> 24) & 0xf];
2611 *p++ = Py_hexdigits[(ch >> 20) & 0xf];
2612 *p++ = Py_hexdigits[(ch >> 16) & 0xf];
2613 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2614 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2615 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2616 *p++ = Py_hexdigits[ch & 15];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002617 }
Victor Stinner358af132015-10-12 22:36:57 +02002618 /* Map 16-bit characters, '\\' and '\n' to '\uxxxx' */
Serhiy Storchaka38ab7d42019-05-31 11:29:39 +03002619 else if (ch >= 256 ||
2620 ch == '\\' || ch == 0 || ch == '\n' || ch == '\r' ||
2621 ch == 0x1a)
2622 {
Raymond Hettinger15f44ab2016-08-30 10:47:49 -07002623 /* -1: subtract 1 preallocated byte */
Victor Stinner358af132015-10-12 22:36:57 +02002624 p = _PyBytesWriter_Prepare(&writer, p, 6-1);
2625 if (p == NULL)
2626 goto error;
2627
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002628 *p++ = '\\';
2629 *p++ = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +02002630 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2631 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2632 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2633 *p++ = Py_hexdigits[ch & 15];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002634 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00002635 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002636 else
2637 *p++ = (char) ch;
2638 }
Victor Stinner358af132015-10-12 22:36:57 +02002639
2640 return _PyBytesWriter_Finish(&writer, p);
2641
2642error:
2643 _PyBytesWriter_Dealloc(&writer);
2644 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002645}
2646
2647static int
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002648write_unicode_binary(PicklerObject *self, PyObject *obj)
Antoine Pitrou299978d2013-04-07 17:38:11 +02002649{
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002650 char header[9];
2651 Py_ssize_t len;
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002652 PyObject *encoded = NULL;
2653 Py_ssize_t size;
2654 const char *data;
2655
2656 if (PyUnicode_READY(obj))
2657 return -1;
2658
2659 data = PyUnicode_AsUTF8AndSize(obj, &size);
2660 if (data == NULL) {
2661 /* Issue #8383: for strings with lone surrogates, fallback on the
2662 "surrogatepass" error handler. */
2663 PyErr_Clear();
2664 encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
2665 if (encoded == NULL)
2666 return -1;
2667
2668 data = PyBytes_AS_STRING(encoded);
2669 size = PyBytes_GET_SIZE(encoded);
2670 }
Antoine Pitrou299978d2013-04-07 17:38:11 +02002671
Victor Stinnerf13c46c2014-08-17 21:05:55 +02002672 assert(size >= 0);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002673 if (size <= 0xff && self->proto >= 4) {
2674 header[0] = SHORT_BINUNICODE;
2675 header[1] = (unsigned char)(size & 0xff);
2676 len = 2;
2677 }
Victor Stinnerf13c46c2014-08-17 21:05:55 +02002678 else if ((size_t)size <= 0xffffffffUL) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002679 header[0] = BINUNICODE;
2680 header[1] = (unsigned char)(size & 0xff);
2681 header[2] = (unsigned char)((size >> 8) & 0xff);
2682 header[3] = (unsigned char)((size >> 16) & 0xff);
2683 header[4] = (unsigned char)((size >> 24) & 0xff);
2684 len = 5;
2685 }
2686 else if (self->proto >= 4) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002687 header[0] = BINUNICODE8;
Alexandre Vassalotti1048fb52013-11-25 11:35:46 -08002688 _write_size64(header + 1, size);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002689 len = 9;
2690 }
2691 else {
Antoine Pitrou299978d2013-04-07 17:38:11 +02002692 PyErr_SetString(PyExc_OverflowError,
Antoine Pitrou91f43802019-05-26 17:10:09 +02002693 "serializing a string larger than 4 GiB "
2694 "requires pickle protocol 4 or higher");
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002695 Py_XDECREF(encoded);
Antoine Pitrou299978d2013-04-07 17:38:11 +02002696 return -1;
2697 }
Antoine Pitrou299978d2013-04-07 17:38:11 +02002698
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002699 if (_Pickler_write_bytes(self, header, len, data, size, encoded) < 0) {
2700 Py_XDECREF(encoded);
2701 return -1;
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01002702 }
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002703 Py_XDECREF(encoded);
Antoine Pitrou299978d2013-04-07 17:38:11 +02002704 return 0;
2705}
2706
2707static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002708save_unicode(PicklerObject *self, PyObject *obj)
2709{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002710 if (self->bin) {
Antoine Pitrou299978d2013-04-07 17:38:11 +02002711 if (write_unicode_binary(self, obj) < 0)
2712 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002713 }
2714 else {
Antoine Pitrou299978d2013-04-07 17:38:11 +02002715 PyObject *encoded;
2716 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002717 const char unicode_op = UNICODE;
2718
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002719 encoded = raw_unicode_escape(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002720 if (encoded == NULL)
Antoine Pitrou299978d2013-04-07 17:38:11 +02002721 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002722
Antoine Pitrou299978d2013-04-07 17:38:11 +02002723 if (_Pickler_Write(self, &unicode_op, 1) < 0) {
2724 Py_DECREF(encoded);
2725 return -1;
2726 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002727
2728 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrou299978d2013-04-07 17:38:11 +02002729 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) {
2730 Py_DECREF(encoded);
2731 return -1;
2732 }
2733 Py_DECREF(encoded);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002734
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002735 if (_Pickler_Write(self, "\n", 1) < 0)
Antoine Pitrou299978d2013-04-07 17:38:11 +02002736 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002737 }
2738 if (memo_put(self, obj) < 0)
Antoine Pitrou299978d2013-04-07 17:38:11 +02002739 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002740
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002741 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002742}
2743
2744/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
2745static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002746store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002747{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002748 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002749
2750 assert(PyTuple_Size(t) == len);
2751
2752 for (i = 0; i < len; i++) {
2753 PyObject *element = PyTuple_GET_ITEM(t, i);
2754
2755 if (element == NULL)
2756 return -1;
2757 if (save(self, element, 0) < 0)
2758 return -1;
2759 }
2760
2761 return 0;
2762}
2763
2764/* Tuples are ubiquitous in the pickle protocols, so many techniques are
2765 * used across protocols to minimize the space needed to pickle them.
2766 * Tuples are also the only builtin immutable type that can be recursive
2767 * (a tuple can be reached from itself), and that requires some subtle
2768 * magic so that it works in all cases. IOW, this is a long routine.
2769 */
2770static int
2771save_tuple(PicklerObject *self, PyObject *obj)
2772{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002773 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002774
2775 const char mark_op = MARK;
2776 const char tuple_op = TUPLE;
2777 const char pop_op = POP;
2778 const char pop_mark_op = POP_MARK;
2779 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
2780
2781 if ((len = PyTuple_Size(obj)) < 0)
2782 return -1;
2783
2784 if (len == 0) {
2785 char pdata[2];
2786
2787 if (self->proto) {
2788 pdata[0] = EMPTY_TUPLE;
2789 len = 1;
2790 }
2791 else {
2792 pdata[0] = MARK;
2793 pdata[1] = TUPLE;
2794 len = 2;
2795 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002796 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002797 return -1;
2798 return 0;
2799 }
2800
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002801 /* The tuple isn't in the memo now. If it shows up there after
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002802 * saving the tuple elements, the tuple must be recursive, in
2803 * which case we'll pop everything we put on the stack, and fetch
2804 * its value from the memo.
2805 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002806 if (len <= 3 && self->proto >= 2) {
2807 /* Use TUPLE{1,2,3} opcodes. */
2808 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002809 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002810
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002811 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002812 /* pop the len elements */
2813 for (i = 0; i < len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002814 if (_Pickler_Write(self, &pop_op, 1) < 0)
2815 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002816 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002817 if (memo_get(self, obj) < 0)
2818 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002819
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002820 return 0;
2821 }
2822 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002823 if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2824 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002825 }
2826 goto memoize;
2827 }
2828
2829 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2830 * Generate MARK e1 e2 ... TUPLE
2831 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002832 if (_Pickler_Write(self, &mark_op, 1) < 0)
2833 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002834
2835 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002836 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002837
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002838 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002839 /* pop the stack stuff we pushed */
2840 if (self->bin) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002841 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2842 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002843 }
2844 else {
2845 /* Note that we pop one more than len, to remove
2846 * the MARK too.
2847 */
2848 for (i = 0; i <= len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002849 if (_Pickler_Write(self, &pop_op, 1) < 0)
2850 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002851 }
2852 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002853 if (memo_get(self, obj) < 0)
2854 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002855
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002856 return 0;
2857 }
2858 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002859 if (_Pickler_Write(self, &tuple_op, 1) < 0)
2860 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002861 }
2862
2863 memoize:
2864 if (memo_put(self, obj) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002865 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002866
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002867 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002868}
2869
2870/* iter is an iterator giving items, and we batch up chunks of
2871 * MARK item item ... item APPENDS
2872 * opcode sequences. Calling code should have arranged to first create an
2873 * empty list, or list-like object, for the APPENDS to operate on.
2874 * Returns 0 on success, <0 on error.
2875 */
2876static int
2877batch_list(PicklerObject *self, PyObject *iter)
2878{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002879 PyObject *obj = NULL;
2880 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002881 int i, n;
2882
2883 const char mark_op = MARK;
2884 const char append_op = APPEND;
2885 const char appends_op = APPENDS;
2886
2887 assert(iter != NULL);
2888
2889 /* XXX: I think this function could be made faster by avoiding the
2890 iterator interface and fetching objects directly from list using
2891 PyList_GET_ITEM.
2892 */
2893
2894 if (self->proto == 0) {
2895 /* APPENDS isn't available; do one at a time. */
2896 for (;;) {
2897 obj = PyIter_Next(iter);
2898 if (obj == NULL) {
2899 if (PyErr_Occurred())
2900 return -1;
2901 break;
2902 }
2903 i = save(self, obj, 0);
2904 Py_DECREF(obj);
2905 if (i < 0)
2906 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002907 if (_Pickler_Write(self, &append_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002908 return -1;
2909 }
2910 return 0;
2911 }
2912
2913 /* proto > 0: write in batches of BATCHSIZE. */
2914 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002915 /* Get first item */
2916 firstitem = PyIter_Next(iter);
2917 if (firstitem == NULL) {
2918 if (PyErr_Occurred())
2919 goto error;
2920
2921 /* nothing more to add */
2922 break;
2923 }
2924
2925 /* Try to get a second item */
2926 obj = PyIter_Next(iter);
2927 if (obj == NULL) {
2928 if (PyErr_Occurred())
2929 goto error;
2930
2931 /* Only one item to write */
2932 if (save(self, firstitem, 0) < 0)
2933 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002934 if (_Pickler_Write(self, &append_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002935 goto error;
2936 Py_CLEAR(firstitem);
2937 break;
2938 }
2939
2940 /* More than one item to write */
2941
2942 /* Pump out MARK, items, APPENDS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002943 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002944 goto error;
2945
2946 if (save(self, firstitem, 0) < 0)
2947 goto error;
2948 Py_CLEAR(firstitem);
2949 n = 1;
2950
2951 /* Fetch and save up to BATCHSIZE items */
2952 while (obj) {
2953 if (save(self, obj, 0) < 0)
2954 goto error;
2955 Py_CLEAR(obj);
2956 n += 1;
2957
2958 if (n == BATCHSIZE)
2959 break;
2960
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002961 obj = PyIter_Next(iter);
2962 if (obj == NULL) {
2963 if (PyErr_Occurred())
2964 goto error;
2965 break;
2966 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002967 }
2968
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002969 if (_Pickler_Write(self, &appends_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002970 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002971
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002972 } while (n == BATCHSIZE);
2973 return 0;
2974
2975 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002976 Py_XDECREF(firstitem);
2977 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002978 return -1;
2979}
2980
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002981/* This is a variant of batch_list() above, specialized for lists (with no
2982 * support for list subclasses). Like batch_list(), we batch up chunks of
2983 * MARK item item ... item APPENDS
2984 * opcode sequences. Calling code should have arranged to first create an
2985 * empty list, or list-like object, for the APPENDS to operate on.
2986 * Returns 0 on success, -1 on error.
2987 *
2988 * This version is considerably faster than batch_list(), if less general.
2989 *
2990 * Note that this only works for protocols > 0.
2991 */
2992static int
2993batch_list_exact(PicklerObject *self, PyObject *obj)
2994{
2995 PyObject *item = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002996 Py_ssize_t this_batch, total;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002997
2998 const char append_op = APPEND;
2999 const char appends_op = APPENDS;
3000 const char mark_op = MARK;
3001
3002 assert(obj != NULL);
3003 assert(self->proto > 0);
3004 assert(PyList_CheckExact(obj));
3005
3006 if (PyList_GET_SIZE(obj) == 1) {
3007 item = PyList_GET_ITEM(obj, 0);
3008 if (save(self, item, 0) < 0)
3009 return -1;
3010 if (_Pickler_Write(self, &append_op, 1) < 0)
3011 return -1;
3012 return 0;
3013 }
3014
3015 /* Write in batches of BATCHSIZE. */
3016 total = 0;
3017 do {
3018 this_batch = 0;
3019 if (_Pickler_Write(self, &mark_op, 1) < 0)
3020 return -1;
3021 while (total < PyList_GET_SIZE(obj)) {
3022 item = PyList_GET_ITEM(obj, total);
3023 if (save(self, item, 0) < 0)
3024 return -1;
3025 total++;
3026 if (++this_batch == BATCHSIZE)
3027 break;
3028 }
3029 if (_Pickler_Write(self, &appends_op, 1) < 0)
3030 return -1;
3031
3032 } while (total < PyList_GET_SIZE(obj));
3033
3034 return 0;
3035}
3036
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003037static int
3038save_list(PicklerObject *self, PyObject *obj)
3039{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003040 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003041 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003042 int status = 0;
3043
3044 if (self->fast && !fast_save_enter(self, obj))
3045 goto error;
3046
3047 /* Create an empty list. */
3048 if (self->bin) {
3049 header[0] = EMPTY_LIST;
3050 len = 1;
3051 }
3052 else {
3053 header[0] = MARK;
3054 header[1] = LIST;
3055 len = 2;
3056 }
3057
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003058 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003059 goto error;
3060
3061 /* Get list length, and bow out early if empty. */
3062 if ((len = PyList_Size(obj)) < 0)
3063 goto error;
3064
3065 if (memo_put(self, obj) < 0)
3066 goto error;
3067
3068 if (len != 0) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003069 /* Materialize the list elements. */
3070 if (PyList_CheckExact(obj) && self->proto > 0) {
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003071 if (Py_EnterRecursiveCall(" while pickling an object"))
3072 goto error;
3073 status = batch_list_exact(self, obj);
3074 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003075 } else {
3076 PyObject *iter = PyObject_GetIter(obj);
3077 if (iter == NULL)
3078 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003079
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003080 if (Py_EnterRecursiveCall(" while pickling an object")) {
3081 Py_DECREF(iter);
3082 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003083 }
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003084 status = batch_list(self, iter);
3085 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003086 Py_DECREF(iter);
3087 }
3088 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003089 if (0) {
3090 error:
3091 status = -1;
3092 }
3093
3094 if (self->fast && !fast_save_leave(self, obj))
3095 status = -1;
3096
3097 return status;
3098}
3099
3100/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
3101 * MARK key value ... key value SETITEMS
3102 * opcode sequences. Calling code should have arranged to first create an
3103 * empty dict, or dict-like object, for the SETITEMS to operate on.
3104 * Returns 0 on success, <0 on error.
3105 *
3106 * This is very much like batch_list(). The difference between saving
3107 * elements directly, and picking apart two-tuples, is so long-winded at
3108 * the C level, though, that attempts to combine these routines were too
3109 * ugly to bear.
3110 */
3111static int
3112batch_dict(PicklerObject *self, PyObject *iter)
3113{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00003114 PyObject *obj = NULL;
3115 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003116 int i, n;
3117
3118 const char mark_op = MARK;
3119 const char setitem_op = SETITEM;
3120 const char setitems_op = SETITEMS;
3121
3122 assert(iter != NULL);
3123
3124 if (self->proto == 0) {
3125 /* SETITEMS isn't available; do one at a time. */
3126 for (;;) {
3127 obj = PyIter_Next(iter);
3128 if (obj == NULL) {
3129 if (PyErr_Occurred())
3130 return -1;
3131 break;
3132 }
3133 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
3134 PyErr_SetString(PyExc_TypeError, "dict items "
3135 "iterator must return 2-tuples");
3136 return -1;
3137 }
3138 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
3139 if (i >= 0)
3140 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
3141 Py_DECREF(obj);
3142 if (i < 0)
3143 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003144 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003145 return -1;
3146 }
3147 return 0;
3148 }
3149
3150 /* proto > 0: write in batches of BATCHSIZE. */
3151 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00003152 /* Get first item */
3153 firstitem = PyIter_Next(iter);
3154 if (firstitem == NULL) {
3155 if (PyErr_Occurred())
3156 goto error;
3157
3158 /* nothing more to add */
3159 break;
3160 }
3161 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
3162 PyErr_SetString(PyExc_TypeError, "dict items "
3163 "iterator must return 2-tuples");
3164 goto error;
3165 }
3166
3167 /* Try to get a second item */
3168 obj = PyIter_Next(iter);
3169 if (obj == NULL) {
3170 if (PyErr_Occurred())
3171 goto error;
3172
3173 /* Only one item to write */
3174 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
3175 goto error;
3176 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
3177 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003178 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00003179 goto error;
3180 Py_CLEAR(firstitem);
3181 break;
3182 }
3183
3184 /* More than one item to write */
3185
3186 /* Pump out MARK, items, SETITEMS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003187 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00003188 goto error;
3189
3190 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
3191 goto error;
3192 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
3193 goto error;
3194 Py_CLEAR(firstitem);
3195 n = 1;
3196
3197 /* Fetch and save up to BATCHSIZE items */
3198 while (obj) {
3199 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
3200 PyErr_SetString(PyExc_TypeError, "dict items "
3201 "iterator must return 2-tuples");
3202 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003203 }
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00003204 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
3205 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
3206 goto error;
3207 Py_CLEAR(obj);
3208 n += 1;
3209
3210 if (n == BATCHSIZE)
3211 break;
3212
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003213 obj = PyIter_Next(iter);
3214 if (obj == NULL) {
3215 if (PyErr_Occurred())
3216 goto error;
3217 break;
3218 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003219 }
3220
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003221 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00003222 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003223
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003224 } while (n == BATCHSIZE);
3225 return 0;
3226
3227 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00003228 Py_XDECREF(firstitem);
3229 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003230 return -1;
3231}
3232
Collin Winter5c9b02d2009-05-25 05:43:30 +00003233/* This is a variant of batch_dict() above that specializes for dicts, with no
3234 * support for dict subclasses. Like batch_dict(), we batch up chunks of
3235 * MARK key value ... key value SETITEMS
3236 * opcode sequences. Calling code should have arranged to first create an
3237 * empty dict, or dict-like object, for the SETITEMS to operate on.
3238 * Returns 0 on success, -1 on error.
3239 *
3240 * Note that this currently doesn't work for protocol 0.
3241 */
3242static int
3243batch_dict_exact(PicklerObject *self, PyObject *obj)
3244{
3245 PyObject *key = NULL, *value = NULL;
3246 int i;
3247 Py_ssize_t dict_size, ppos = 0;
3248
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00003249 const char mark_op = MARK;
3250 const char setitem_op = SETITEM;
3251 const char setitems_op = SETITEMS;
Collin Winter5c9b02d2009-05-25 05:43:30 +00003252
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +02003253 assert(obj != NULL && PyDict_CheckExact(obj));
Collin Winter5c9b02d2009-05-25 05:43:30 +00003254 assert(self->proto > 0);
3255
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +02003256 dict_size = PyDict_GET_SIZE(obj);
Collin Winter5c9b02d2009-05-25 05:43:30 +00003257
3258 /* Special-case len(d) == 1 to save space. */
3259 if (dict_size == 1) {
3260 PyDict_Next(obj, &ppos, &key, &value);
3261 if (save(self, key, 0) < 0)
3262 return -1;
3263 if (save(self, value, 0) < 0)
3264 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003265 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00003266 return -1;
3267 return 0;
3268 }
3269
3270 /* Write in batches of BATCHSIZE. */
3271 do {
3272 i = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003273 if (_Pickler_Write(self, &mark_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00003274 return -1;
3275 while (PyDict_Next(obj, &ppos, &key, &value)) {
3276 if (save(self, key, 0) < 0)
3277 return -1;
3278 if (save(self, value, 0) < 0)
3279 return -1;
3280 if (++i == BATCHSIZE)
3281 break;
3282 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003283 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00003284 return -1;
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +02003285 if (PyDict_GET_SIZE(obj) != dict_size) {
Collin Winter5c9b02d2009-05-25 05:43:30 +00003286 PyErr_Format(
3287 PyExc_RuntimeError,
3288 "dictionary changed size during iteration");
3289 return -1;
3290 }
3291
3292 } while (i == BATCHSIZE);
3293 return 0;
3294}
3295
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003296static int
3297save_dict(PicklerObject *self, PyObject *obj)
3298{
3299 PyObject *items, *iter;
3300 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003301 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003302 int status = 0;
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +02003303 assert(PyDict_Check(obj));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003304
3305 if (self->fast && !fast_save_enter(self, obj))
3306 goto error;
3307
3308 /* Create an empty dict. */
3309 if (self->bin) {
3310 header[0] = EMPTY_DICT;
3311 len = 1;
3312 }
3313 else {
3314 header[0] = MARK;
3315 header[1] = DICT;
3316 len = 2;
3317 }
3318
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003319 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003320 goto error;
3321
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003322 if (memo_put(self, obj) < 0)
3323 goto error;
3324
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +02003325 if (PyDict_GET_SIZE(obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003326 /* Save the dict items. */
Collin Winter5c9b02d2009-05-25 05:43:30 +00003327 if (PyDict_CheckExact(obj) && self->proto > 0) {
3328 /* We can take certain shortcuts if we know this is a dict and
3329 not a dict subclass. */
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003330 if (Py_EnterRecursiveCall(" while pickling an object"))
3331 goto error;
3332 status = batch_dict_exact(self, obj);
3333 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00003334 } else {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02003335 _Py_IDENTIFIER(items);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02003336
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003337 items = _PyObject_CallMethodIdNoArgs(obj, &PyId_items);
Collin Winter5c9b02d2009-05-25 05:43:30 +00003338 if (items == NULL)
3339 goto error;
3340 iter = PyObject_GetIter(items);
3341 Py_DECREF(items);
3342 if (iter == NULL)
3343 goto error;
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003344 if (Py_EnterRecursiveCall(" while pickling an object")) {
3345 Py_DECREF(iter);
3346 goto error;
3347 }
Collin Winter5c9b02d2009-05-25 05:43:30 +00003348 status = batch_dict(self, iter);
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003349 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00003350 Py_DECREF(iter);
3351 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003352 }
3353
3354 if (0) {
3355 error:
3356 status = -1;
3357 }
3358
3359 if (self->fast && !fast_save_leave(self, obj))
3360 status = -1;
3361
3362 return status;
3363}
3364
3365static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003366save_set(PicklerObject *self, PyObject *obj)
3367{
3368 PyObject *item;
3369 int i;
3370 Py_ssize_t set_size, ppos = 0;
3371 Py_hash_t hash;
3372
3373 const char empty_set_op = EMPTY_SET;
3374 const char mark_op = MARK;
3375 const char additems_op = ADDITEMS;
3376
3377 if (self->proto < 4) {
3378 PyObject *items;
3379 PyObject *reduce_value;
3380 int status;
3381
3382 items = PySequence_List(obj);
3383 if (items == NULL) {
3384 return -1;
3385 }
3386 reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PySet_Type, items);
3387 Py_DECREF(items);
3388 if (reduce_value == NULL) {
3389 return -1;
3390 }
3391 /* save_reduce() will memoize the object automatically. */
3392 status = save_reduce(self, reduce_value, obj);
3393 Py_DECREF(reduce_value);
3394 return status;
3395 }
3396
3397 if (_Pickler_Write(self, &empty_set_op, 1) < 0)
3398 return -1;
3399
3400 if (memo_put(self, obj) < 0)
3401 return -1;
3402
3403 set_size = PySet_GET_SIZE(obj);
3404 if (set_size == 0)
3405 return 0; /* nothing to do */
3406
3407 /* Write in batches of BATCHSIZE. */
3408 do {
3409 i = 0;
3410 if (_Pickler_Write(self, &mark_op, 1) < 0)
3411 return -1;
3412 while (_PySet_NextEntry(obj, &ppos, &item, &hash)) {
3413 if (save(self, item, 0) < 0)
3414 return -1;
3415 if (++i == BATCHSIZE)
3416 break;
3417 }
3418 if (_Pickler_Write(self, &additems_op, 1) < 0)
3419 return -1;
3420 if (PySet_GET_SIZE(obj) != set_size) {
3421 PyErr_Format(
3422 PyExc_RuntimeError,
3423 "set changed size during iteration");
3424 return -1;
3425 }
3426 } while (i == BATCHSIZE);
3427
3428 return 0;
3429}
3430
3431static int
3432save_frozenset(PicklerObject *self, PyObject *obj)
3433{
3434 PyObject *iter;
3435
3436 const char mark_op = MARK;
3437 const char frozenset_op = FROZENSET;
3438
3439 if (self->fast && !fast_save_enter(self, obj))
3440 return -1;
3441
3442 if (self->proto < 4) {
3443 PyObject *items;
3444 PyObject *reduce_value;
3445 int status;
3446
3447 items = PySequence_List(obj);
3448 if (items == NULL) {
3449 return -1;
3450 }
3451 reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PyFrozenSet_Type,
3452 items);
3453 Py_DECREF(items);
3454 if (reduce_value == NULL) {
3455 return -1;
3456 }
3457 /* save_reduce() will memoize the object automatically. */
3458 status = save_reduce(self, reduce_value, obj);
3459 Py_DECREF(reduce_value);
3460 return status;
3461 }
3462
3463 if (_Pickler_Write(self, &mark_op, 1) < 0)
3464 return -1;
3465
3466 iter = PyObject_GetIter(obj);
Christian Heimesb3d3ee42013-11-23 21:01:40 +01003467 if (iter == NULL) {
Christian Heimes74d8d632013-11-23 21:05:31 +01003468 return -1;
Christian Heimesb3d3ee42013-11-23 21:01:40 +01003469 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003470 for (;;) {
3471 PyObject *item;
3472
3473 item = PyIter_Next(iter);
3474 if (item == NULL) {
3475 if (PyErr_Occurred()) {
3476 Py_DECREF(iter);
3477 return -1;
3478 }
3479 break;
3480 }
3481 if (save(self, item, 0) < 0) {
3482 Py_DECREF(item);
3483 Py_DECREF(iter);
3484 return -1;
3485 }
3486 Py_DECREF(item);
3487 }
3488 Py_DECREF(iter);
3489
3490 /* If the object is already in the memo, this means it is
3491 recursive. In this case, throw away everything we put on the
3492 stack, and fetch the object back from the memo. */
3493 if (PyMemoTable_Get(self->memo, obj)) {
3494 const char pop_mark_op = POP_MARK;
3495
3496 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
3497 return -1;
3498 if (memo_get(self, obj) < 0)
3499 return -1;
3500 return 0;
3501 }
3502
3503 if (_Pickler_Write(self, &frozenset_op, 1) < 0)
3504 return -1;
3505 if (memo_put(self, obj) < 0)
3506 return -1;
3507
3508 return 0;
3509}
3510
3511static int
3512fix_imports(PyObject **module_name, PyObject **global_name)
3513{
3514 PyObject *key;
3515 PyObject *item;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003516 PickleState *st = _Pickle_GetGlobalState();
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003517
3518 key = PyTuple_Pack(2, *module_name, *global_name);
3519 if (key == NULL)
3520 return -1;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003521 item = PyDict_GetItemWithError(st->name_mapping_3to2, key);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003522 Py_DECREF(key);
3523 if (item) {
3524 PyObject *fixed_module_name;
3525 PyObject *fixed_global_name;
3526
3527 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
3528 PyErr_Format(PyExc_RuntimeError,
3529 "_compat_pickle.REVERSE_NAME_MAPPING values "
3530 "should be 2-tuples, not %.200s",
3531 Py_TYPE(item)->tp_name);
3532 return -1;
3533 }
3534 fixed_module_name = PyTuple_GET_ITEM(item, 0);
3535 fixed_global_name = PyTuple_GET_ITEM(item, 1);
3536 if (!PyUnicode_Check(fixed_module_name) ||
3537 !PyUnicode_Check(fixed_global_name)) {
3538 PyErr_Format(PyExc_RuntimeError,
3539 "_compat_pickle.REVERSE_NAME_MAPPING values "
3540 "should be pairs of str, not (%.200s, %.200s)",
3541 Py_TYPE(fixed_module_name)->tp_name,
3542 Py_TYPE(fixed_global_name)->tp_name);
3543 return -1;
3544 }
3545
3546 Py_CLEAR(*module_name);
3547 Py_CLEAR(*global_name);
3548 Py_INCREF(fixed_module_name);
3549 Py_INCREF(fixed_global_name);
3550 *module_name = fixed_module_name;
3551 *global_name = fixed_global_name;
Serhiy Storchakabfe18242015-03-31 13:12:37 +03003552 return 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003553 }
3554 else if (PyErr_Occurred()) {
3555 return -1;
3556 }
3557
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003558 item = PyDict_GetItemWithError(st->import_mapping_3to2, *module_name);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003559 if (item) {
3560 if (!PyUnicode_Check(item)) {
3561 PyErr_Format(PyExc_RuntimeError,
3562 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
3563 "should be strings, not %.200s",
3564 Py_TYPE(item)->tp_name);
3565 return -1;
3566 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003567 Py_INCREF(item);
Serhiy Storchaka48842712016-04-06 09:45:48 +03003568 Py_XSETREF(*module_name, item);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003569 }
3570 else if (PyErr_Occurred()) {
3571 return -1;
3572 }
3573
3574 return 0;
3575}
3576
3577static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003578save_global(PicklerObject *self, PyObject *obj, PyObject *name)
3579{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003580 PyObject *global_name = NULL;
3581 PyObject *module_name = NULL;
3582 PyObject *module = NULL;
Serhiy Storchaka58e41342015-03-31 14:07:24 +03003583 PyObject *parent = NULL;
3584 PyObject *dotted_path = NULL;
3585 PyObject *lastname = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003586 PyObject *cls;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003587 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003588 int status = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003589 _Py_IDENTIFIER(__name__);
3590 _Py_IDENTIFIER(__qualname__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003591
3592 const char global_op = GLOBAL;
3593
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003594 if (name) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003595 Py_INCREF(name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003596 global_name = name;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003597 }
3598 else {
Serhiy Storchakaf320be72018-01-25 10:49:40 +02003599 if (_PyObject_LookupAttrId(obj, &PyId___qualname__, &global_name) < 0)
3600 goto error;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003601 if (global_name == NULL) {
3602 global_name = _PyObject_GetAttrId(obj, &PyId___name__);
3603 if (global_name == NULL)
3604 goto error;
3605 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003606 }
3607
Serhiy Storchaka58e41342015-03-31 14:07:24 +03003608 dotted_path = get_dotted_path(module, global_name);
3609 if (dotted_path == NULL)
3610 goto error;
3611 module_name = whichmodule(obj, dotted_path);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003612 if (module_name == NULL)
3613 goto error;
3614
3615 /* XXX: Change to use the import C API directly with level=0 to disallow
3616 relative imports.
3617
3618 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
3619 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
3620 custom import functions (IMHO, this would be a nice security
3621 feature). The import C API would need to be extended to support the
3622 extra parameters of __import__ to fix that. */
3623 module = PyImport_Import(module_name);
3624 if (module == NULL) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003625 PyErr_Format(st->PicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003626 "Can't pickle %R: import of module %R failed",
3627 obj, module_name);
3628 goto error;
3629 }
Serhiy Storchaka58e41342015-03-31 14:07:24 +03003630 lastname = PyList_GET_ITEM(dotted_path, PyList_GET_SIZE(dotted_path)-1);
3631 Py_INCREF(lastname);
3632 cls = get_deep_attribute(module, dotted_path, &parent);
3633 Py_CLEAR(dotted_path);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003634 if (cls == NULL) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003635 PyErr_Format(st->PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003636 "Can't pickle %R: attribute lookup %S on %S failed",
3637 obj, global_name, module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003638 goto error;
3639 }
3640 if (cls != obj) {
3641 Py_DECREF(cls);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003642 PyErr_Format(st->PicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003643 "Can't pickle %R: it's not the same object as %S.%S",
3644 obj, module_name, global_name);
3645 goto error;
3646 }
3647 Py_DECREF(cls);
3648
3649 if (self->proto >= 2) {
3650 /* See whether this is in the extension registry, and if
3651 * so generate an EXT opcode.
3652 */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003653 PyObject *extension_key;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003654 PyObject *code_obj; /* extension code as Python object */
3655 long code; /* extension code as C value */
3656 char pdata[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003657 Py_ssize_t n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003658
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003659 extension_key = PyTuple_Pack(2, module_name, global_name);
3660 if (extension_key == NULL) {
3661 goto error;
3662 }
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08003663 code_obj = PyDict_GetItemWithError(st->extension_registry,
3664 extension_key);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003665 Py_DECREF(extension_key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003666 /* The object is not registered in the extension registry.
3667 This is the most likely code path. */
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08003668 if (code_obj == NULL) {
3669 if (PyErr_Occurred()) {
3670 goto error;
3671 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003672 goto gen_global;
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08003673 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003674
3675 /* XXX: pickle.py doesn't check neither the type, nor the range
3676 of the value returned by the extension_registry. It should for
3677 consistency. */
3678
3679 /* Verify code_obj has the right type and value. */
3680 if (!PyLong_Check(code_obj)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003681 PyErr_Format(st->PicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003682 "Can't pickle %R: extension code %R isn't an integer",
3683 obj, code_obj);
3684 goto error;
3685 }
3686 code = PyLong_AS_LONG(code_obj);
3687 if (code <= 0 || code > 0x7fffffffL) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003688 if (!PyErr_Occurred())
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003689 PyErr_Format(st->PicklingError, "Can't pickle %R: extension "
3690 "code %ld is out of range", obj, code);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003691 goto error;
3692 }
3693
3694 /* Generate an EXT opcode. */
3695 if (code <= 0xff) {
3696 pdata[0] = EXT1;
3697 pdata[1] = (unsigned char)code;
3698 n = 2;
3699 }
3700 else if (code <= 0xffff) {
3701 pdata[0] = EXT2;
3702 pdata[1] = (unsigned char)(code & 0xff);
3703 pdata[2] = (unsigned char)((code >> 8) & 0xff);
3704 n = 3;
3705 }
3706 else {
3707 pdata[0] = EXT4;
3708 pdata[1] = (unsigned char)(code & 0xff);
3709 pdata[2] = (unsigned char)((code >> 8) & 0xff);
3710 pdata[3] = (unsigned char)((code >> 16) & 0xff);
3711 pdata[4] = (unsigned char)((code >> 24) & 0xff);
3712 n = 5;
3713 }
3714
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003715 if (_Pickler_Write(self, pdata, n) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003716 goto error;
3717 }
3718 else {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003719 gen_global:
Serhiy Storchaka58e41342015-03-31 14:07:24 +03003720 if (parent == module) {
3721 Py_INCREF(lastname);
3722 Py_DECREF(global_name);
3723 global_name = lastname;
3724 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003725 if (self->proto >= 4) {
3726 const char stack_global_op = STACK_GLOBAL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003727
Christian Heimese8b1ba12013-11-23 21:13:39 +01003728 if (save(self, module_name, 0) < 0)
3729 goto error;
3730 if (save(self, global_name, 0) < 0)
3731 goto error;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003732
3733 if (_Pickler_Write(self, &stack_global_op, 1) < 0)
3734 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003735 }
Serhiy Storchaka58e41342015-03-31 14:07:24 +03003736 else if (parent != module) {
3737 PickleState *st = _Pickle_GetGlobalState();
3738 PyObject *reduce_value = Py_BuildValue("(O(OO))",
3739 st->getattr, parent, lastname);
Alexey Izbyshevf8c06b02018-08-22 07:51:25 +03003740 if (reduce_value == NULL)
3741 goto error;
Serhiy Storchaka58e41342015-03-31 14:07:24 +03003742 status = save_reduce(self, reduce_value, NULL);
3743 Py_DECREF(reduce_value);
3744 if (status < 0)
3745 goto error;
3746 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003747 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003748 /* Generate a normal global opcode if we are using a pickle
3749 protocol < 4, or if the object is not registered in the
3750 extension registry. */
3751 PyObject *encoded;
3752 PyObject *(*unicode_encoder)(PyObject *);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003753
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003754 if (_Pickler_Write(self, &global_op, 1) < 0)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003755 goto error;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003756
3757 /* For protocol < 3 and if the user didn't request against doing
3758 so, we convert module names to the old 2.x module names. */
3759 if (self->proto < 3 && self->fix_imports) {
3760 if (fix_imports(&module_name, &global_name) < 0) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003761 goto error;
3762 }
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003763 }
3764
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003765 /* Since Python 3.0 now supports non-ASCII identifiers, we encode
3766 both the module name and the global name using UTF-8. We do so
3767 only when we are using the pickle protocol newer than version
3768 3. This is to ensure compatibility with older Unpickler running
3769 on Python 2.x. */
3770 if (self->proto == 3) {
3771 unicode_encoder = PyUnicode_AsUTF8String;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003772 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003773 else {
3774 unicode_encoder = PyUnicode_AsASCIIString;
3775 }
3776 encoded = unicode_encoder(module_name);
3777 if (encoded == NULL) {
3778 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003779 PyErr_Format(st->PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003780 "can't pickle module identifier '%S' using "
3781 "pickle protocol %i",
3782 module_name, self->proto);
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003783 goto error;
3784 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003785 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3786 PyBytes_GET_SIZE(encoded)) < 0) {
3787 Py_DECREF(encoded);
3788 goto error;
3789 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003790 Py_DECREF(encoded);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003791 if(_Pickler_Write(self, "\n", 1) < 0)
3792 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003793
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003794 /* Save the name of the module. */
3795 encoded = unicode_encoder(global_name);
3796 if (encoded == NULL) {
3797 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003798 PyErr_Format(st->PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003799 "can't pickle global identifier '%S' using "
3800 "pickle protocol %i",
3801 global_name, self->proto);
3802 goto error;
3803 }
3804 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3805 PyBytes_GET_SIZE(encoded)) < 0) {
3806 Py_DECREF(encoded);
3807 goto error;
3808 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003809 Py_DECREF(encoded);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003810 if (_Pickler_Write(self, "\n", 1) < 0)
3811 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003812 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003813 /* Memoize the object. */
3814 if (memo_put(self, obj) < 0)
3815 goto error;
3816 }
3817
3818 if (0) {
3819 error:
3820 status = -1;
3821 }
3822 Py_XDECREF(module_name);
3823 Py_XDECREF(global_name);
3824 Py_XDECREF(module);
Serhiy Storchaka58e41342015-03-31 14:07:24 +03003825 Py_XDECREF(parent);
3826 Py_XDECREF(dotted_path);
3827 Py_XDECREF(lastname);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003828
3829 return status;
3830}
3831
3832static int
Alexandre Vassalotti19b6fa62013-11-30 16:06:39 -08003833save_singleton_type(PicklerObject *self, PyObject *obj, PyObject *singleton)
3834{
3835 PyObject *reduce_value;
3836 int status;
3837
3838 reduce_value = Py_BuildValue("O(O)", &PyType_Type, singleton);
3839 if (reduce_value == NULL) {
3840 return -1;
3841 }
3842 status = save_reduce(self, reduce_value, obj);
3843 Py_DECREF(reduce_value);
3844 return status;
3845}
3846
3847static int
3848save_type(PicklerObject *self, PyObject *obj)
3849{
Alexandre Vassalotti65846c62013-11-30 17:55:48 -08003850 if (obj == (PyObject *)&_PyNone_Type) {
Alexandre Vassalotti19b6fa62013-11-30 16:06:39 -08003851 return save_singleton_type(self, obj, Py_None);
3852 }
3853 else if (obj == (PyObject *)&PyEllipsis_Type) {
3854 return save_singleton_type(self, obj, Py_Ellipsis);
3855 }
Alexandre Vassalotti65846c62013-11-30 17:55:48 -08003856 else if (obj == (PyObject *)&_PyNotImplemented_Type) {
Alexandre Vassalotti19b6fa62013-11-30 16:06:39 -08003857 return save_singleton_type(self, obj, Py_NotImplemented);
3858 }
3859 return save_global(self, obj, NULL);
3860}
3861
3862static int
Serhiy Storchaka986375e2017-11-30 22:48:31 +02003863save_pers(PicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003864{
3865 PyObject *pid = NULL;
3866 int status = 0;
3867
3868 const char persid_op = PERSID;
3869 const char binpersid_op = BINPERSID;
3870
Serhiy Storchaka986375e2017-11-30 22:48:31 +02003871 pid = call_method(self->pers_func, self->pers_func_self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003872 if (pid == NULL)
3873 return -1;
3874
3875 if (pid != Py_None) {
3876 if (self->bin) {
3877 if (save(self, pid, 1) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003878 _Pickler_Write(self, &binpersid_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003879 goto error;
3880 }
3881 else {
Serhiy Storchakadec25af2016-07-17 11:24:17 +03003882 PyObject *pid_str;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003883
3884 pid_str = PyObject_Str(pid);
3885 if (pid_str == NULL)
3886 goto error;
3887
Serhiy Storchakadec25af2016-07-17 11:24:17 +03003888 /* XXX: Should it check whether the pid contains embedded
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003889 newlines? */
Serhiy Storchakadec25af2016-07-17 11:24:17 +03003890 if (!PyUnicode_IS_ASCII(pid_str)) {
3891 PyErr_SetString(_Pickle_GetGlobalState()->PicklingError,
3892 "persistent IDs in protocol 0 must be "
3893 "ASCII strings");
3894 Py_DECREF(pid_str);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003895 goto error;
Serhiy Storchakadec25af2016-07-17 11:24:17 +03003896 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003897
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003898 if (_Pickler_Write(self, &persid_op, 1) < 0 ||
Serhiy Storchakadec25af2016-07-17 11:24:17 +03003899 _Pickler_Write(self, PyUnicode_DATA(pid_str),
3900 PyUnicode_GET_LENGTH(pid_str)) < 0 ||
3901 _Pickler_Write(self, "\n", 1) < 0) {
3902 Py_DECREF(pid_str);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003903 goto error;
Serhiy Storchakadec25af2016-07-17 11:24:17 +03003904 }
3905 Py_DECREF(pid_str);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003906 }
3907 status = 1;
3908 }
3909
3910 if (0) {
3911 error:
3912 status = -1;
3913 }
3914 Py_XDECREF(pid);
3915
3916 return status;
3917}
3918
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003919static PyObject *
3920get_class(PyObject *obj)
3921{
3922 PyObject *cls;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003923 _Py_IDENTIFIER(__class__);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003924
Serhiy Storchakaf320be72018-01-25 10:49:40 +02003925 if (_PyObject_LookupAttrId(obj, &PyId___class__, &cls) == 0) {
3926 cls = (PyObject *) Py_TYPE(obj);
3927 Py_INCREF(cls);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003928 }
3929 return cls;
3930}
3931
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003932/* We're saving obj, and args is the 2-thru-5 tuple returned by the
3933 * appropriate __reduce__ method for obj.
3934 */
3935static int
3936save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
3937{
3938 PyObject *callable;
3939 PyObject *argtup;
3940 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003941 PyObject *listitems = Py_None;
3942 PyObject *dictitems = Py_None;
Pierre Glaser65d98d02019-05-08 21:40:25 +02003943 PyObject *state_setter = Py_None;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003944 PickleState *st = _Pickle_GetGlobalState();
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00003945 Py_ssize_t size;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003946 int use_newobj = 0, use_newobj_ex = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003947
3948 const char reduce_op = REDUCE;
3949 const char build_op = BUILD;
3950 const char newobj_op = NEWOBJ;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003951 const char newobj_ex_op = NEWOBJ_EX;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003952
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00003953 size = PyTuple_Size(args);
Pierre Glaser65d98d02019-05-08 21:40:25 +02003954 if (size < 2 || size > 6) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003955 PyErr_SetString(st->PicklingError, "tuple returned by "
Pierre Glaser65d98d02019-05-08 21:40:25 +02003956 "__reduce__ must contain 2 through 6 elements");
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00003957 return -1;
3958 }
3959
Pierre Glaser65d98d02019-05-08 21:40:25 +02003960 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 6,
3961 &callable, &argtup, &state, &listitems, &dictitems,
3962 &state_setter))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003963 return -1;
3964
3965 if (!PyCallable_Check(callable)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003966 PyErr_SetString(st->PicklingError, "first item of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003967 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003968 return -1;
3969 }
3970 if (!PyTuple_Check(argtup)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003971 PyErr_SetString(st->PicklingError, "second item of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003972 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003973 return -1;
3974 }
3975
3976 if (state == Py_None)
3977 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003978
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003979 if (listitems == Py_None)
3980 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003981 else if (!PyIter_Check(listitems)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003982 PyErr_Format(st->PicklingError, "fourth element of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003983 "returned by __reduce__ must be an iterator, not %s",
3984 Py_TYPE(listitems)->tp_name);
3985 return -1;
3986 }
3987
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003988 if (dictitems == Py_None)
3989 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003990 else if (!PyIter_Check(dictitems)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003991 PyErr_Format(st->PicklingError, "fifth element of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003992 "returned by __reduce__ must be an iterator, not %s",
3993 Py_TYPE(dictitems)->tp_name);
3994 return -1;
3995 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003996
Pierre Glaser65d98d02019-05-08 21:40:25 +02003997 if (state_setter == Py_None)
3998 state_setter = NULL;
3999 else if (!PyCallable_Check(state_setter)) {
4000 PyErr_Format(st->PicklingError, "sixth element of the tuple "
4001 "returned by __reduce__ must be a function, not %s",
4002 Py_TYPE(state_setter)->tp_name);
4003 return -1;
4004 }
4005
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004006 if (self->proto >= 2) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01004007 PyObject *name;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004008 _Py_IDENTIFIER(__name__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004009
Serhiy Storchakaf320be72018-01-25 10:49:40 +02004010 if (_PyObject_LookupAttrId(callable, &PyId___name__, &name) < 0) {
4011 return -1;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004012 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +02004013 if (name != NULL && PyUnicode_Check(name)) {
Serhiy Storchaka0d554d72015-10-10 22:42:18 +03004014 _Py_IDENTIFIER(__newobj_ex__);
Serhiy Storchakaf0f35a62017-01-09 10:09:43 +02004015 use_newobj_ex = _PyUnicode_EqualToASCIIId(
4016 name, &PyId___newobj_ex__);
Serhiy Storchaka707b5cc2014-12-16 19:43:46 +02004017 if (!use_newobj_ex) {
4018 _Py_IDENTIFIER(__newobj__);
Serhiy Storchaka9937d902017-01-09 10:04:34 +02004019 use_newobj = _PyUnicode_EqualToASCIIId(name, &PyId___newobj__);
Serhiy Storchaka707b5cc2014-12-16 19:43:46 +02004020 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004021 }
Serhiy Storchaka707b5cc2014-12-16 19:43:46 +02004022 Py_XDECREF(name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004023 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004024
4025 if (use_newobj_ex) {
4026 PyObject *cls;
4027 PyObject *args;
4028 PyObject *kwargs;
4029
Serhiy Storchakafff9a312017-03-21 08:53:25 +02004030 if (PyTuple_GET_SIZE(argtup) != 3) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004031 PyErr_Format(st->PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004032 "length of the NEWOBJ_EX argument tuple must be "
Serhiy Storchakafff9a312017-03-21 08:53:25 +02004033 "exactly 3, not %zd", PyTuple_GET_SIZE(argtup));
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004034 return -1;
4035 }
4036
4037 cls = PyTuple_GET_ITEM(argtup, 0);
4038 if (!PyType_Check(cls)) {
Larry Hastings61272b72014-01-07 12:41:53 -08004039 PyErr_Format(st->PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004040 "first item from NEWOBJ_EX argument tuple must "
4041 "be a class, not %.200s", Py_TYPE(cls)->tp_name);
4042 return -1;
4043 }
4044 args = PyTuple_GET_ITEM(argtup, 1);
4045 if (!PyTuple_Check(args)) {
Larry Hastings61272b72014-01-07 12:41:53 -08004046 PyErr_Format(st->PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004047 "second item from NEWOBJ_EX argument tuple must "
4048 "be a tuple, not %.200s", Py_TYPE(args)->tp_name);
4049 return -1;
4050 }
4051 kwargs = PyTuple_GET_ITEM(argtup, 2);
4052 if (!PyDict_Check(kwargs)) {
Larry Hastings61272b72014-01-07 12:41:53 -08004053 PyErr_Format(st->PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004054 "third item from NEWOBJ_EX argument tuple must "
4055 "be a dict, not %.200s", Py_TYPE(kwargs)->tp_name);
4056 return -1;
4057 }
4058
Serhiy Storchaka0d554d72015-10-10 22:42:18 +03004059 if (self->proto >= 4) {
4060 if (save(self, cls, 0) < 0 ||
4061 save(self, args, 0) < 0 ||
4062 save(self, kwargs, 0) < 0 ||
4063 _Pickler_Write(self, &newobj_ex_op, 1) < 0) {
4064 return -1;
4065 }
4066 }
4067 else {
4068 PyObject *newargs;
4069 PyObject *cls_new;
4070 Py_ssize_t i;
4071 _Py_IDENTIFIER(__new__);
4072
Serhiy Storchakafff9a312017-03-21 08:53:25 +02004073 newargs = PyTuple_New(PyTuple_GET_SIZE(args) + 2);
Serhiy Storchaka0d554d72015-10-10 22:42:18 +03004074 if (newargs == NULL)
4075 return -1;
4076
4077 cls_new = _PyObject_GetAttrId(cls, &PyId___new__);
4078 if (cls_new == NULL) {
4079 Py_DECREF(newargs);
4080 return -1;
4081 }
4082 PyTuple_SET_ITEM(newargs, 0, cls_new);
4083 Py_INCREF(cls);
4084 PyTuple_SET_ITEM(newargs, 1, cls);
Serhiy Storchakafff9a312017-03-21 08:53:25 +02004085 for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
Serhiy Storchaka0d554d72015-10-10 22:42:18 +03004086 PyObject *item = PyTuple_GET_ITEM(args, i);
4087 Py_INCREF(item);
4088 PyTuple_SET_ITEM(newargs, i + 2, item);
4089 }
4090
4091 callable = PyObject_Call(st->partial, newargs, kwargs);
4092 Py_DECREF(newargs);
4093 if (callable == NULL)
4094 return -1;
4095
4096 newargs = PyTuple_New(0);
4097 if (newargs == NULL) {
4098 Py_DECREF(callable);
4099 return -1;
4100 }
4101
4102 if (save(self, callable, 0) < 0 ||
4103 save(self, newargs, 0) < 0 ||
4104 _Pickler_Write(self, &reduce_op, 1) < 0) {
4105 Py_DECREF(newargs);
4106 Py_DECREF(callable);
4107 return -1;
4108 }
4109 Py_DECREF(newargs);
4110 Py_DECREF(callable);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004111 }
4112 }
4113 else if (use_newobj) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004114 PyObject *cls;
4115 PyObject *newargtup;
4116 PyObject *obj_class;
4117 int p;
4118
4119 /* Sanity checks. */
Serhiy Storchakafff9a312017-03-21 08:53:25 +02004120 if (PyTuple_GET_SIZE(argtup) < 1) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004121 PyErr_SetString(st->PicklingError, "__newobj__ arglist is empty");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004122 return -1;
4123 }
4124
4125 cls = PyTuple_GET_ITEM(argtup, 0);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01004126 if (!PyType_Check(cls)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004127 PyErr_SetString(st->PicklingError, "args[0] from "
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01004128 "__newobj__ args is not a type");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004129 return -1;
4130 }
4131
4132 if (obj != NULL) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01004133 obj_class = get_class(obj);
Zackery Spytz25d38972018-12-05 11:29:20 -07004134 if (obj_class == NULL) {
4135 return -1;
4136 }
4137 p = obj_class != cls;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004138 Py_DECREF(obj_class);
4139 if (p) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004140 PyErr_SetString(st->PicklingError, "args[0] from "
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004141 "__newobj__ args has the wrong class");
4142 return -1;
4143 }
4144 }
4145 /* XXX: These calls save() are prone to infinite recursion. Imagine
4146 what happen if the value returned by the __reduce__() method of
4147 some extension type contains another object of the same type. Ouch!
4148
4149 Here is a quick example, that I ran into, to illustrate what I
4150 mean:
4151
4152 >>> import pickle, copyreg
4153 >>> copyreg.dispatch_table.pop(complex)
4154 >>> pickle.dumps(1+2j)
4155 Traceback (most recent call last):
4156 ...
Yury Selivanovf488fb42015-07-03 01:04:23 -04004157 RecursionError: maximum recursion depth exceeded
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004158
4159 Removing the complex class from copyreg.dispatch_table made the
4160 __reduce_ex__() method emit another complex object:
4161
4162 >>> (1+1j).__reduce_ex__(2)
4163 (<function __newobj__ at 0xb7b71c3c>,
4164 (<class 'complex'>, (1+1j)), None, None, None)
4165
4166 Thus when save() was called on newargstup (the 2nd item) recursion
4167 ensued. Of course, the bug was in the complex class which had a
4168 broken __getnewargs__() that emitted another complex object. But,
4169 the point, here, is it is quite easy to end up with a broken reduce
4170 function. */
4171
4172 /* Save the class and its __new__ arguments. */
4173 if (save(self, cls, 0) < 0)
4174 return -1;
4175
Serhiy Storchakafff9a312017-03-21 08:53:25 +02004176 newargtup = PyTuple_GetSlice(argtup, 1, PyTuple_GET_SIZE(argtup));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004177 if (newargtup == NULL)
4178 return -1;
4179
4180 p = save(self, newargtup, 0);
4181 Py_DECREF(newargtup);
4182 if (p < 0)
4183 return -1;
4184
4185 /* Add NEWOBJ opcode. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004186 if (_Pickler_Write(self, &newobj_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004187 return -1;
4188 }
4189 else { /* Not using NEWOBJ. */
4190 if (save(self, callable, 0) < 0 ||
4191 save(self, argtup, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004192 _Pickler_Write(self, &reduce_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004193 return -1;
4194 }
4195
4196 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
4197 the caller do not want to memoize the object. Not particularly useful,
4198 but that is to mimic the behavior save_reduce() in pickle.py when
4199 obj is None. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004200 if (obj != NULL) {
4201 /* If the object is already in the memo, this means it is
4202 recursive. In this case, throw away everything we put on the
4203 stack, and fetch the object back from the memo. */
4204 if (PyMemoTable_Get(self->memo, obj)) {
4205 const char pop_op = POP;
4206
4207 if (_Pickler_Write(self, &pop_op, 1) < 0)
4208 return -1;
4209 if (memo_get(self, obj) < 0)
4210 return -1;
4211
4212 return 0;
4213 }
4214 else if (memo_put(self, obj) < 0)
4215 return -1;
4216 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004217
4218 if (listitems && batch_list(self, listitems) < 0)
4219 return -1;
4220
4221 if (dictitems && batch_dict(self, dictitems) < 0)
4222 return -1;
4223
4224 if (state) {
Pierre Glaser65d98d02019-05-08 21:40:25 +02004225 if (state_setter == NULL) {
4226 if (save(self, state, 0) < 0 ||
4227 _Pickler_Write(self, &build_op, 1) < 0)
4228 return -1;
4229 }
4230 else {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004231
Pierre Glaser65d98d02019-05-08 21:40:25 +02004232 /* If a state_setter is specified, call it instead of load_build to
4233 * update obj's with its previous state.
4234 * The first 4 save/write instructions push state_setter and its
4235 * tuple of expected arguments (obj, state) onto the stack. The
4236 * REDUCE opcode triggers the state_setter(obj, state) function
4237 * call. Finally, because state-updating routines only do in-place
4238 * modification, the whole operation has to be stack-transparent.
4239 * Thus, we finally pop the call's output from the stack.*/
4240
4241 const char tupletwo_op = TUPLE2;
4242 const char pop_op = POP;
4243 if (save(self, state_setter, 0) < 0 ||
4244 save(self, obj, 0) < 0 || save(self, state, 0) < 0 ||
4245 _Pickler_Write(self, &tupletwo_op, 1) < 0 ||
4246 _Pickler_Write(self, &reduce_op, 1) < 0 ||
4247 _Pickler_Write(self, &pop_op, 1) < 0)
4248 return -1;
4249 }
4250 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004251 return 0;
4252}
4253
4254static int
4255save(PicklerObject *self, PyObject *obj, int pers_save)
4256{
4257 PyTypeObject *type;
4258 PyObject *reduce_func = NULL;
4259 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004260 int status = 0;
4261
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08004262 if (_Pickler_OpcodeBoundary(self) < 0)
4263 return -1;
4264
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004265 /* The extra pers_save argument is necessary to avoid calling save_pers()
4266 on its returned object. */
4267 if (!pers_save && self->pers_func) {
4268 /* save_pers() returns:
4269 -1 to signal an error;
4270 0 if it did nothing successfully;
4271 1 if a persistent id was saved.
4272 */
Serhiy Storchaka986375e2017-11-30 22:48:31 +02004273 if ((status = save_pers(self, obj)) != 0)
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004274 return status;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004275 }
4276
4277 type = Py_TYPE(obj);
4278
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004279 /* The old cPickle had an optimization that used switch-case statement
4280 dispatching on the first letter of the type name. This has was removed
4281 since benchmarks shown that this optimization was actually slowing
4282 things down. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004283
4284 /* Atom types; these aren't memoized, so don't check the memo. */
4285
4286 if (obj == Py_None) {
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004287 return save_none(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004288 }
4289 else if (obj == Py_False || obj == Py_True) {
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004290 return save_bool(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004291 }
4292 else if (type == &PyLong_Type) {
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004293 return save_long(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004294 }
4295 else if (type == &PyFloat_Type) {
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004296 return save_float(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004297 }
4298
4299 /* Check the memo to see if it has the object. If so, generate
4300 a GET (or BINGET) opcode, instead of pickling the object
4301 once again. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004302 if (PyMemoTable_Get(self->memo, obj)) {
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004303 return memo_get(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004304 }
4305
4306 if (type == &PyBytes_Type) {
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004307 return save_bytes(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004308 }
4309 else if (type == &PyUnicode_Type) {
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004310 return save_unicode(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004311 }
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004312
4313 /* We're only calling Py_EnterRecursiveCall here so that atomic
4314 types above are pickled faster. */
4315 if (Py_EnterRecursiveCall(" while pickling an object")) {
4316 return -1;
4317 }
4318
4319 if (type == &PyDict_Type) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004320 status = save_dict(self, obj);
4321 goto done;
4322 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004323 else if (type == &PySet_Type) {
4324 status = save_set(self, obj);
4325 goto done;
4326 }
4327 else if (type == &PyFrozenSet_Type) {
4328 status = save_frozenset(self, obj);
4329 goto done;
4330 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004331 else if (type == &PyList_Type) {
4332 status = save_list(self, obj);
4333 goto done;
4334 }
4335 else if (type == &PyTuple_Type) {
4336 status = save_tuple(self, obj);
4337 goto done;
4338 }
Antoine Pitrou91f43802019-05-26 17:10:09 +02004339 else if (type == &PyByteArray_Type) {
4340 status = save_bytearray(self, obj);
4341 goto done;
4342 }
4343 else if (type == &PyPickleBuffer_Type) {
4344 status = save_picklebuffer(self, obj);
4345 goto done;
4346 }
Pierre Glaser289f1f82019-05-08 23:08:25 +02004347
4348 /* Now, check reducer_override. If it returns NotImplemented,
4349 * fallback to save_type or save_global, and then perhaps to the
4350 * regular reduction mechanism.
4351 */
4352 if (self->reducer_override != NULL) {
Petr Viktorinffd97532020-02-11 17:46:57 +01004353 reduce_value = PyObject_CallOneArg(self->reducer_override, obj);
Pierre Glaser289f1f82019-05-08 23:08:25 +02004354 if (reduce_value == NULL) {
4355 goto error;
4356 }
4357 if (reduce_value != Py_NotImplemented) {
4358 goto reduce;
4359 }
4360 Py_DECREF(reduce_value);
4361 reduce_value = NULL;
4362 }
4363
4364 if (type == &PyType_Type) {
Alexandre Vassalotti19b6fa62013-11-30 16:06:39 -08004365 status = save_type(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004366 goto done;
4367 }
4368 else if (type == &PyFunction_Type) {
4369 status = save_global(self, obj, NULL);
Alexandre Vassalottifc912852013-11-24 03:07:35 -08004370 goto done;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004371 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004372
4373 /* XXX: This part needs some unit tests. */
4374
4375 /* Get a reduction callable, and call it. This may come from
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004376 * self.dispatch_table, copyreg.dispatch_table, the object's
4377 * __reduce_ex__ method, or the object's __reduce__ method.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004378 */
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004379 if (self->dispatch_table == NULL) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004380 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08004381 reduce_func = PyDict_GetItemWithError(st->dispatch_table,
4382 (PyObject *)type);
4383 if (reduce_func == NULL) {
4384 if (PyErr_Occurred()) {
4385 goto error;
4386 }
4387 } else {
4388 /* PyDict_GetItemWithError() returns a borrowed reference.
4389 Increase the reference count to be consistent with
4390 PyObject_GetItem and _PyObject_GetAttrId used below. */
4391 Py_INCREF(reduce_func);
4392 }
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004393 } else {
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08004394 reduce_func = PyObject_GetItem(self->dispatch_table,
4395 (PyObject *)type);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004396 if (reduce_func == NULL) {
4397 if (PyErr_ExceptionMatches(PyExc_KeyError))
4398 PyErr_Clear();
4399 else
4400 goto error;
4401 }
4402 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004403 if (reduce_func != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004404 Py_INCREF(obj);
Alexandre Vassalotti20c28c12013-11-27 02:26:54 -08004405 reduce_value = _Pickle_FastCall(reduce_func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004406 }
Antoine Pitrouffd41d92011-10-04 09:23:04 +02004407 else if (PyType_IsSubtype(type, &PyType_Type)) {
4408 status = save_global(self, obj, NULL);
4409 goto done;
4410 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004411 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004412 _Py_IDENTIFIER(__reduce__);
4413 _Py_IDENTIFIER(__reduce_ex__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004414
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004415 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
4416 automatically defined as __reduce__. While this is convenient, this
4417 make it impossible to know which method was actually called. Of
4418 course, this is not a big deal. But still, it would be nice to let
4419 the user know which method was called when something go
4420 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
4421 don't actually have to check for a __reduce__ method. */
4422
4423 /* Check for a __reduce_ex__ method. */
Serhiy Storchakaf320be72018-01-25 10:49:40 +02004424 if (_PyObject_LookupAttrId(obj, &PyId___reduce_ex__, &reduce_func) < 0) {
4425 goto error;
4426 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004427 if (reduce_func != NULL) {
4428 PyObject *proto;
4429 proto = PyLong_FromLong(self->proto);
4430 if (proto != NULL) {
Alexandre Vassalotti20c28c12013-11-27 02:26:54 -08004431 reduce_value = _Pickle_FastCall(reduce_func, proto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004432 }
4433 }
4434 else {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004435 /* Check for a __reduce__ method. */
Serhiy Storchaka41c57b32019-09-01 12:03:39 +03004436 if (_PyObject_LookupAttrId(obj, &PyId___reduce__, &reduce_func) < 0) {
4437 goto error;
4438 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004439 if (reduce_func != NULL) {
Victor Stinner2ff58a22019-06-17 14:27:23 +02004440 reduce_value = PyObject_CallNoArgs(reduce_func);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004441 }
4442 else {
Serhiy Storchaka41c57b32019-09-01 12:03:39 +03004443 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004444 PyErr_Format(st->PicklingError,
4445 "can't pickle '%.200s' object: %R",
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004446 type->tp_name, obj);
4447 goto error;
4448 }
4449 }
4450 }
4451
4452 if (reduce_value == NULL)
4453 goto error;
4454
Pierre Glaser289f1f82019-05-08 23:08:25 +02004455 reduce:
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004456 if (PyUnicode_Check(reduce_value)) {
4457 status = save_global(self, obj, reduce_value);
4458 goto done;
4459 }
4460
4461 if (!PyTuple_Check(reduce_value)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004462 PickleState *st = _Pickle_GetGlobalState();
4463 PyErr_SetString(st->PicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004464 "__reduce__ must return a string or tuple");
4465 goto error;
4466 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004467
4468 status = save_reduce(self, reduce_value, obj);
4469
4470 if (0) {
4471 error:
4472 status = -1;
4473 }
4474 done:
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08004475
Alexandre Vassalottidff18342008-07-13 18:48:30 +00004476 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004477 Py_XDECREF(reduce_func);
4478 Py_XDECREF(reduce_value);
4479
4480 return status;
4481}
4482
4483static int
4484dump(PicklerObject *self, PyObject *obj)
4485{
4486 const char stop_op = STOP;
Pierre Glaser0f2f35e2020-02-02 19:55:21 +01004487 int status = -1;
Pierre Glaser289f1f82019-05-08 23:08:25 +02004488 PyObject *tmp;
4489 _Py_IDENTIFIER(reducer_override);
4490
4491 if (_PyObject_LookupAttrId((PyObject *)self, &PyId_reducer_override,
4492 &tmp) < 0) {
Pierre Glaser0f2f35e2020-02-02 19:55:21 +01004493 goto error;
Pierre Glaser289f1f82019-05-08 23:08:25 +02004494 }
4495 /* Cache the reducer_override method, if it exists. */
4496 if (tmp != NULL) {
4497 Py_XSETREF(self->reducer_override, tmp);
4498 }
4499 else {
4500 Py_CLEAR(self->reducer_override);
4501 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004502
4503 if (self->proto >= 2) {
4504 char header[2];
4505
4506 header[0] = PROTO;
4507 assert(self->proto >= 0 && self->proto < 256);
4508 header[1] = (unsigned char)self->proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004509 if (_Pickler_Write(self, header, 2) < 0)
Pierre Glaser0f2f35e2020-02-02 19:55:21 +01004510 goto error;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004511 if (self->proto >= 4)
4512 self->framing = 1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004513 }
4514
4515 if (save(self, obj, 0) < 0 ||
Serhiy Storchakac8695292018-04-04 00:11:27 +03004516 _Pickler_Write(self, &stop_op, 1) < 0 ||
4517 _Pickler_CommitFrame(self) < 0)
Pierre Glaser0f2f35e2020-02-02 19:55:21 +01004518 goto error;
4519
4520 // Success
4521 status = 0;
4522
4523 error:
Serhiy Storchakac8695292018-04-04 00:11:27 +03004524 self->framing = 0;
Pierre Glaser0f2f35e2020-02-02 19:55:21 +01004525
4526 /* Break the reference cycle we generated at the beginning this function
4527 * call when setting the reducer_override attribute of the Pickler instance
4528 * to a bound method of the same instance. This is important as the Pickler
4529 * instance holds a reference to each object it has pickled (through its
4530 * memo): thus, these objects wont be garbage-collected as long as the
4531 * Pickler itself is not collected. */
4532 Py_CLEAR(self->reducer_override);
4533 return status;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004534}
4535
Larry Hastings61272b72014-01-07 12:41:53 -08004536/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004537
4538_pickle.Pickler.clear_memo
4539
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004540Clears the pickler's "memo".
4541
4542The memo is the data structure that remembers which objects the
4543pickler has already seen, so that shared or recursive objects are
4544pickled by reference and not by value. This method is useful when
4545re-using picklers.
Larry Hastings61272b72014-01-07 12:41:53 -08004546[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004547
Larry Hastings3cceb382014-01-04 11:09:09 -08004548static PyObject *
4549_pickle_Pickler_clear_memo_impl(PicklerObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08004550/*[clinic end generated code: output=8665c8658aaa094b input=01bdad52f3d93e56]*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004551{
4552 if (self->memo)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004553 PyMemoTable_Clear(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004554
4555 Py_RETURN_NONE;
4556}
4557
Larry Hastings61272b72014-01-07 12:41:53 -08004558/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004559
4560_pickle.Pickler.dump
4561
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004562 obj: object
4563 /
4564
4565Write a pickled representation of the given object to the open file.
Larry Hastings61272b72014-01-07 12:41:53 -08004566[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004567
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004568static PyObject *
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004569_pickle_Pickler_dump(PicklerObject *self, PyObject *obj)
Larry Hastings581ee362014-01-28 05:00:08 -08004570/*[clinic end generated code: output=87ecad1261e02ac7 input=552eb1c0f52260d9]*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004571{
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00004572 /* Check whether the Pickler was initialized correctly (issue3664).
4573 Developers often forget to call __init__() in their subclasses, which
4574 would trigger a segfault without this check. */
4575 if (self->write == NULL) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004576 PickleState *st = _Pickle_GetGlobalState();
4577 PyErr_Format(st->PicklingError,
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00004578 "Pickler.__init__() was not called by %s.__init__()",
4579 Py_TYPE(self)->tp_name);
4580 return NULL;
4581 }
4582
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004583 if (_Pickler_ClearBuffer(self) < 0)
4584 return NULL;
4585
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004586 if (dump(self, obj) < 0)
4587 return NULL;
4588
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004589 if (_Pickler_FlushToFile(self) < 0)
4590 return NULL;
4591
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004592 Py_RETURN_NONE;
4593}
4594
Serhiy Storchaka5bbd2312014-12-16 19:39:08 +02004595/*[clinic input]
4596
4597_pickle.Pickler.__sizeof__ -> Py_ssize_t
4598
4599Returns size in memory, in bytes.
4600[clinic start generated code]*/
4601
4602static Py_ssize_t
4603_pickle_Pickler___sizeof___impl(PicklerObject *self)
4604/*[clinic end generated code: output=106edb3123f332e1 input=8cbbec9bd5540d42]*/
4605{
4606 Py_ssize_t res, s;
4607
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +02004608 res = _PyObject_SIZE(Py_TYPE(self));
Serhiy Storchaka5bbd2312014-12-16 19:39:08 +02004609 if (self->memo != NULL) {
4610 res += sizeof(PyMemoTable);
4611 res += self->memo->mt_allocated * sizeof(PyMemoEntry);
4612 }
4613 if (self->output_buffer != NULL) {
4614 s = _PySys_GetSizeOf(self->output_buffer);
4615 if (s == -1)
4616 return -1;
4617 res += s;
4618 }
4619 return res;
4620}
4621
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004622static struct PyMethodDef Pickler_methods[] = {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004623 _PICKLE_PICKLER_DUMP_METHODDEF
4624 _PICKLE_PICKLER_CLEAR_MEMO_METHODDEF
Serhiy Storchaka5bbd2312014-12-16 19:39:08 +02004625 _PICKLE_PICKLER___SIZEOF___METHODDEF
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004626 {NULL, NULL} /* sentinel */
4627};
4628
4629static void
4630Pickler_dealloc(PicklerObject *self)
4631{
4632 PyObject_GC_UnTrack(self);
4633
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004634 Py_XDECREF(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004635 Py_XDECREF(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004636 Py_XDECREF(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004637 Py_XDECREF(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004638 Py_XDECREF(self->fast_memo);
Pierre Glaser289f1f82019-05-08 23:08:25 +02004639 Py_XDECREF(self->reducer_override);
Antoine Pitrou91f43802019-05-26 17:10:09 +02004640 Py_XDECREF(self->buffer_callback);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004641
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004642 PyMemoTable_Del(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004643
4644 Py_TYPE(self)->tp_free((PyObject *)self);
4645}
4646
4647static int
4648Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
4649{
4650 Py_VISIT(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004651 Py_VISIT(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004652 Py_VISIT(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004653 Py_VISIT(self->fast_memo);
Pierre Glaser289f1f82019-05-08 23:08:25 +02004654 Py_VISIT(self->reducer_override);
Antoine Pitrou91f43802019-05-26 17:10:09 +02004655 Py_VISIT(self->buffer_callback);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004656 return 0;
4657}
4658
4659static int
4660Pickler_clear(PicklerObject *self)
4661{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004662 Py_CLEAR(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004663 Py_CLEAR(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004664 Py_CLEAR(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004665 Py_CLEAR(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004666 Py_CLEAR(self->fast_memo);
Pierre Glaser289f1f82019-05-08 23:08:25 +02004667 Py_CLEAR(self->reducer_override);
Antoine Pitrou91f43802019-05-26 17:10:09 +02004668 Py_CLEAR(self->buffer_callback);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004669
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004670 if (self->memo != NULL) {
4671 PyMemoTable *memo = self->memo;
4672 self->memo = NULL;
4673 PyMemoTable_Del(memo);
4674 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004675 return 0;
4676}
4677
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004678
Larry Hastings61272b72014-01-07 12:41:53 -08004679/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004680
4681_pickle.Pickler.__init__
4682
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004683 file: object
Serhiy Storchaka279f4462019-09-14 12:24:05 +03004684 protocol: object = None
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004685 fix_imports: bool = True
Serhiy Storchaka279f4462019-09-14 12:24:05 +03004686 buffer_callback: object = None
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004687
4688This takes a binary file for writing a pickle data stream.
4689
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08004690The optional *protocol* argument tells the pickler to use the given
Mark Dickinsone9652e82020-01-24 10:03:22 +00004691protocol; supported protocols are 0, 1, 2, 3, 4 and 5. The default
4692protocol is 4. It was introduced in Python 3.4, and is incompatible
4693with previous versions.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004694
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08004695Specifying a negative protocol version selects the highest protocol
4696version supported. The higher the protocol used, the more recent the
4697version of Python needed to read the pickle produced.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004698
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08004699The *file* argument must have a write() method that accepts a single
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004700bytes argument. It can thus be a file object opened for binary
Martin Panter7462b6492015-11-02 03:37:02 +00004701writing, an io.BytesIO instance, or any other custom object that meets
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08004702this interface.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004703
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08004704If *fix_imports* is True and protocol is less than 3, pickle will try
4705to map the new Python 3 names to the old module names used in Python
47062, so that the pickle data stream is readable with Python 2.
Antoine Pitrou91f43802019-05-26 17:10:09 +02004707
4708If *buffer_callback* is None (the default), buffer views are
4709serialized into *file* as part of the pickle stream.
4710
4711If *buffer_callback* is not None, then it can be called any number
4712of times with a buffer view. If the callback returns a false value
4713(such as None), the given buffer is out-of-band; otherwise the
4714buffer is serialized in-band, i.e. inside the pickle stream.
4715
4716It is an error if *buffer_callback* is not None and *protocol*
4717is None or smaller than 5.
4718
Larry Hastings61272b72014-01-07 12:41:53 -08004719[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004720
Larry Hastingsb7ccb202014-01-18 23:50:21 -08004721static int
Larry Hastings89964c42015-04-14 18:07:59 -04004722_pickle_Pickler___init___impl(PicklerObject *self, PyObject *file,
Antoine Pitrou91f43802019-05-26 17:10:09 +02004723 PyObject *protocol, int fix_imports,
4724 PyObject *buffer_callback)
Mark Dickinsone9652e82020-01-24 10:03:22 +00004725/*[clinic end generated code: output=0abedc50590d259b input=a7c969699bf5dad3]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004726{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02004727 _Py_IDENTIFIER(persistent_id);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004728 _Py_IDENTIFIER(dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004729
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004730 /* In case of multiple __init__() calls, clear previous content. */
4731 if (self->write != NULL)
4732 (void)Pickler_clear(self);
4733
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004734 if (_Pickler_SetProtocol(self, protocol, fix_imports) < 0)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08004735 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004736
4737 if (_Pickler_SetOutputStream(self, file) < 0)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08004738 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004739
Antoine Pitrou91f43802019-05-26 17:10:09 +02004740 if (_Pickler_SetBufferCallback(self, buffer_callback) < 0)
4741 return -1;
4742
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004743 /* memo and output_buffer may have already been created in _Pickler_New */
4744 if (self->memo == NULL) {
4745 self->memo = PyMemoTable_New();
4746 if (self->memo == NULL)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08004747 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004748 }
4749 self->output_len = 0;
4750 if (self->output_buffer == NULL) {
4751 self->max_output_len = WRITE_BUF_SIZE;
4752 self->output_buffer = PyBytes_FromStringAndSize(NULL,
4753 self->max_output_len);
4754 if (self->output_buffer == NULL)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08004755 return -1;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004756 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004757
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004758 self->fast = 0;
4759 self->fast_nesting = 0;
4760 self->fast_memo = NULL;
Serhiy Storchaka04e36af2017-10-22 21:31:34 +03004761
Serhiy Storchaka986375e2017-11-30 22:48:31 +02004762 if (init_method_ref((PyObject *)self, &PyId_persistent_id,
4763 &self->pers_func, &self->pers_func_self) < 0)
4764 {
4765 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004766 }
Serhiy Storchaka04e36af2017-10-22 21:31:34 +03004767
Serhiy Storchakaf320be72018-01-25 10:49:40 +02004768 if (_PyObject_LookupAttrId((PyObject *)self,
4769 &PyId_dispatch_table, &self->dispatch_table) < 0) {
4770 return -1;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004771 }
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004772
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004773 return 0;
4774}
4775
Larry Hastingsb7ccb202014-01-18 23:50:21 -08004776
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004777/* Define a proxy object for the Pickler's internal memo object. This is to
4778 * avoid breaking code like:
4779 * pickler.memo.clear()
4780 * and
4781 * pickler.memo = saved_memo
4782 * Is this a good idea? Not really, but we don't want to break code that uses
4783 * it. Note that we don't implement the entire mapping API here. This is
4784 * intentional, as these should be treated as black-box implementation details.
4785 */
4786
Larry Hastings61272b72014-01-07 12:41:53 -08004787/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004788_pickle.PicklerMemoProxy.clear
4789
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004790Remove all items from memo.
Larry Hastings61272b72014-01-07 12:41:53 -08004791[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004792
Larry Hastings3cceb382014-01-04 11:09:09 -08004793static PyObject *
4794_pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08004795/*[clinic end generated code: output=5fb9370d48ae8b05 input=ccc186dacd0f1405]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004796{
4797 if (self->pickler->memo)
4798 PyMemoTable_Clear(self->pickler->memo);
4799 Py_RETURN_NONE;
4800}
4801
Larry Hastings61272b72014-01-07 12:41:53 -08004802/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004803_pickle.PicklerMemoProxy.copy
4804
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004805Copy the memo to a new object.
Larry Hastings61272b72014-01-07 12:41:53 -08004806[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004807
Larry Hastings3cceb382014-01-04 11:09:09 -08004808static PyObject *
4809_pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08004810/*[clinic end generated code: output=bb83a919d29225ef input=b73043485ac30b36]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004811{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004812 PyMemoTable *memo;
4813 PyObject *new_memo = PyDict_New();
4814 if (new_memo == NULL)
4815 return NULL;
4816
4817 memo = self->pickler->memo;
Benjamin Petersona4ae8282018-09-20 18:36:40 -07004818 for (size_t i = 0; i < memo->mt_allocated; ++i) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004819 PyMemoEntry entry = memo->mt_table[i];
4820 if (entry.me_key != NULL) {
4821 int status;
4822 PyObject *key, *value;
4823
4824 key = PyLong_FromVoidPtr(entry.me_key);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004825 value = Py_BuildValue("nO", entry.me_value, entry.me_key);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004826
4827 if (key == NULL || value == NULL) {
4828 Py_XDECREF(key);
4829 Py_XDECREF(value);
4830 goto error;
4831 }
4832 status = PyDict_SetItem(new_memo, key, value);
4833 Py_DECREF(key);
4834 Py_DECREF(value);
4835 if (status < 0)
4836 goto error;
4837 }
4838 }
4839 return new_memo;
4840
4841 error:
4842 Py_XDECREF(new_memo);
4843 return NULL;
4844}
4845
Larry Hastings61272b72014-01-07 12:41:53 -08004846/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004847_pickle.PicklerMemoProxy.__reduce__
4848
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004849Implement pickle support.
Larry Hastings61272b72014-01-07 12:41:53 -08004850[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004851
Larry Hastings3cceb382014-01-04 11:09:09 -08004852static PyObject *
4853_pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08004854/*[clinic end generated code: output=bebba1168863ab1d input=2f7c540e24b7aae4]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004855{
4856 PyObject *reduce_value, *dict_args;
Larry Hastings3cceb382014-01-04 11:09:09 -08004857 PyObject *contents = _pickle_PicklerMemoProxy_copy_impl(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004858 if (contents == NULL)
4859 return NULL;
4860
4861 reduce_value = PyTuple_New(2);
4862 if (reduce_value == NULL) {
4863 Py_DECREF(contents);
4864 return NULL;
4865 }
4866 dict_args = PyTuple_New(1);
4867 if (dict_args == NULL) {
4868 Py_DECREF(contents);
4869 Py_DECREF(reduce_value);
4870 return NULL;
4871 }
4872 PyTuple_SET_ITEM(dict_args, 0, contents);
4873 Py_INCREF((PyObject *)&PyDict_Type);
4874 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
4875 PyTuple_SET_ITEM(reduce_value, 1, dict_args);
4876 return reduce_value;
4877}
4878
4879static PyMethodDef picklerproxy_methods[] = {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004880 _PICKLE_PICKLERMEMOPROXY_CLEAR_METHODDEF
4881 _PICKLE_PICKLERMEMOPROXY_COPY_METHODDEF
4882 _PICKLE_PICKLERMEMOPROXY___REDUCE___METHODDEF
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004883 {NULL, NULL} /* sentinel */
4884};
4885
4886static void
4887PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
4888{
4889 PyObject_GC_UnTrack(self);
4890 Py_XDECREF(self->pickler);
4891 PyObject_GC_Del((PyObject *)self);
4892}
4893
4894static int
4895PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
4896 visitproc visit, void *arg)
4897{
4898 Py_VISIT(self->pickler);
4899 return 0;
4900}
4901
4902static int
4903PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
4904{
4905 Py_CLEAR(self->pickler);
4906 return 0;
4907}
4908
4909static PyTypeObject PicklerMemoProxyType = {
4910 PyVarObject_HEAD_INIT(NULL, 0)
4911 "_pickle.PicklerMemoProxy", /*tp_name*/
4912 sizeof(PicklerMemoProxyObject), /*tp_basicsize*/
4913 0,
4914 (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004915 0, /* tp_vectorcall_offset */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004916 0, /* tp_getattr */
4917 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004918 0, /* tp_as_async */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004919 0, /* tp_repr */
4920 0, /* tp_as_number */
4921 0, /* tp_as_sequence */
4922 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00004923 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004924 0, /* tp_call */
4925 0, /* tp_str */
4926 PyObject_GenericGetAttr, /* tp_getattro */
4927 PyObject_GenericSetAttr, /* tp_setattro */
4928 0, /* tp_as_buffer */
4929 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4930 0, /* tp_doc */
4931 (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */
4932 (inquiry)PicklerMemoProxy_clear, /* tp_clear */
4933 0, /* tp_richcompare */
4934 0, /* tp_weaklistoffset */
4935 0, /* tp_iter */
4936 0, /* tp_iternext */
4937 picklerproxy_methods, /* tp_methods */
4938};
4939
4940static PyObject *
4941PicklerMemoProxy_New(PicklerObject *pickler)
4942{
4943 PicklerMemoProxyObject *self;
4944
4945 self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
4946 if (self == NULL)
4947 return NULL;
4948 Py_INCREF(pickler);
4949 self->pickler = pickler;
4950 PyObject_GC_Track(self);
4951 return (PyObject *)self;
4952}
4953
4954/*****************************************************************************/
4955
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004956static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02004957Pickler_get_memo(PicklerObject *self, void *Py_UNUSED(ignored))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004958{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004959 return PicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004960}
4961
4962static int
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02004963Pickler_set_memo(PicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004964{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004965 PyMemoTable *new_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004966
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004967 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004968 PyErr_SetString(PyExc_TypeError,
4969 "attribute deletion is not supported");
4970 return -1;
4971 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004972
Andy Lesterdffe4c02020-03-04 07:15:20 -06004973 if (Py_IS_TYPE(obj, &PicklerMemoProxyType)) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004974 PicklerObject *pickler =
4975 ((PicklerMemoProxyObject *)obj)->pickler;
4976
4977 new_memo = PyMemoTable_Copy(pickler->memo);
4978 if (new_memo == NULL)
4979 return -1;
4980 }
4981 else if (PyDict_Check(obj)) {
4982 Py_ssize_t i = 0;
4983 PyObject *key, *value;
4984
4985 new_memo = PyMemoTable_New();
4986 if (new_memo == NULL)
4987 return -1;
4988
4989 while (PyDict_Next(obj, &i, &key, &value)) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004990 Py_ssize_t memo_id;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004991 PyObject *memo_obj;
4992
Serhiy Storchakafff9a312017-03-21 08:53:25 +02004993 if (!PyTuple_Check(value) || PyTuple_GET_SIZE(value) != 2) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004994 PyErr_SetString(PyExc_TypeError,
4995 "'memo' values must be 2-item tuples");
4996 goto error;
4997 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004998 memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004999 if (memo_id == -1 && PyErr_Occurred())
5000 goto error;
5001 memo_obj = PyTuple_GET_ITEM(value, 1);
5002 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
5003 goto error;
5004 }
5005 }
5006 else {
5007 PyErr_Format(PyExc_TypeError,
Serhiy Storchaka34fd4c22018-11-05 16:20:25 +02005008 "'memo' attribute must be a PicklerMemoProxy object "
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005009 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005010 return -1;
5011 }
5012
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005013 PyMemoTable_Del(self->memo);
5014 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005015
5016 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005017
5018 error:
5019 if (new_memo)
5020 PyMemoTable_Del(new_memo);
5021 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005022}
5023
5024static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02005025Pickler_get_persid(PicklerObject *self, void *Py_UNUSED(ignored))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005026{
Serhiy Storchaka986375e2017-11-30 22:48:31 +02005027 if (self->pers_func == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005028 PyErr_SetString(PyExc_AttributeError, "persistent_id");
Serhiy Storchaka986375e2017-11-30 22:48:31 +02005029 return NULL;
5030 }
5031 return reconstruct_method(self->pers_func, self->pers_func_self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005032}
5033
5034static int
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02005035Pickler_set_persid(PicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005036{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005037 if (value == NULL) {
5038 PyErr_SetString(PyExc_TypeError,
5039 "attribute deletion is not supported");
5040 return -1;
5041 }
5042 if (!PyCallable_Check(value)) {
5043 PyErr_SetString(PyExc_TypeError,
5044 "persistent_id must be a callable taking one argument");
5045 return -1;
5046 }
5047
Serhiy Storchaka986375e2017-11-30 22:48:31 +02005048 self->pers_func_self = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005049 Py_INCREF(value);
Serhiy Storchakaec397562016-04-06 09:50:03 +03005050 Py_XSETREF(self->pers_func, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005051
5052 return 0;
5053}
5054
5055static PyMemberDef Pickler_members[] = {
5056 {"bin", T_INT, offsetof(PicklerObject, bin)},
5057 {"fast", T_INT, offsetof(PicklerObject, fast)},
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01005058 {"dispatch_table", T_OBJECT_EX, offsetof(PicklerObject, dispatch_table)},
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005059 {NULL}
5060};
5061
5062static PyGetSetDef Pickler_getsets[] = {
5063 {"memo", (getter)Pickler_get_memo,
5064 (setter)Pickler_set_memo},
5065 {"persistent_id", (getter)Pickler_get_persid,
5066 (setter)Pickler_set_persid},
5067 {NULL}
5068};
5069
5070static PyTypeObject Pickler_Type = {
5071 PyVarObject_HEAD_INIT(NULL, 0)
5072 "_pickle.Pickler" , /*tp_name*/
5073 sizeof(PicklerObject), /*tp_basicsize*/
5074 0, /*tp_itemsize*/
5075 (destructor)Pickler_dealloc, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02005076 0, /*tp_vectorcall_offset*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005077 0, /*tp_getattr*/
5078 0, /*tp_setattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02005079 0, /*tp_as_async*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005080 0, /*tp_repr*/
5081 0, /*tp_as_number*/
5082 0, /*tp_as_sequence*/
5083 0, /*tp_as_mapping*/
5084 0, /*tp_hash*/
5085 0, /*tp_call*/
5086 0, /*tp_str*/
5087 0, /*tp_getattro*/
5088 0, /*tp_setattro*/
5089 0, /*tp_as_buffer*/
5090 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08005091 _pickle_Pickler___init____doc__, /*tp_doc*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005092 (traverseproc)Pickler_traverse, /*tp_traverse*/
5093 (inquiry)Pickler_clear, /*tp_clear*/
5094 0, /*tp_richcompare*/
5095 0, /*tp_weaklistoffset*/
5096 0, /*tp_iter*/
5097 0, /*tp_iternext*/
5098 Pickler_methods, /*tp_methods*/
5099 Pickler_members, /*tp_members*/
5100 Pickler_getsets, /*tp_getset*/
5101 0, /*tp_base*/
5102 0, /*tp_dict*/
5103 0, /*tp_descr_get*/
5104 0, /*tp_descr_set*/
5105 0, /*tp_dictoffset*/
Larry Hastingsb7ccb202014-01-18 23:50:21 -08005106 _pickle_Pickler___init__, /*tp_init*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005107 PyType_GenericAlloc, /*tp_alloc*/
5108 PyType_GenericNew, /*tp_new*/
5109 PyObject_GC_Del, /*tp_free*/
5110 0, /*tp_is_gc*/
5111};
5112
Victor Stinner121aab42011-09-29 23:40:53 +02005113/* Temporary helper for calling self.find_class().
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005114
5115 XXX: It would be nice to able to avoid Python function call overhead, by
5116 using directly the C version of find_class(), when find_class() is not
5117 overridden by a subclass. Although, this could become rather hackish. A
5118 simpler optimization would be to call the C function when self is not a
5119 subclass instance. */
5120static PyObject *
5121find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
5122{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005123 _Py_IDENTIFIER(find_class);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02005124
Victor Stinner55ba38a2016-12-09 16:09:30 +01005125 return _PyObject_CallMethodIdObjArgs((PyObject *)self, &PyId_find_class,
5126 module_name, global_name, NULL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005127}
5128
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005129static Py_ssize_t
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005130marker(UnpicklerObject *self)
5131{
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005132 Py_ssize_t mark;
5133
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005134 if (self->num_marks < 1) {
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005135 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005136 PyErr_SetString(st->UnpicklingError, "could not find MARK");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005137 return -1;
5138 }
5139
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005140 mark = self->marks[--self->num_marks];
5141 self->stack->mark_set = self->num_marks != 0;
5142 self->stack->fence = self->num_marks ?
5143 self->marks[self->num_marks - 1] : 0;
5144 return mark;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005145}
5146
5147static int
5148load_none(UnpicklerObject *self)
5149{
5150 PDATA_APPEND(self->stack, Py_None, -1);
5151 return 0;
5152}
5153
5154static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005155load_int(UnpicklerObject *self)
5156{
5157 PyObject *value;
5158 char *endptr, *s;
5159 Py_ssize_t len;
5160 long x;
5161
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005162 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005163 return -1;
5164 if (len < 2)
5165 return bad_readline();
5166
5167 errno = 0;
Victor Stinner121aab42011-09-29 23:40:53 +02005168 /* XXX: Should the base argument of strtol() be explicitly set to 10?
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005169 XXX(avassalotti): Should this uses PyOS_strtol()? */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005170 x = strtol(s, &endptr, 0);
5171
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005172 if (errno || (*endptr != '\n' && *endptr != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005173 /* Hm, maybe we've got something long. Let's try reading
Serhiy Storchaka95949422013-08-27 19:40:23 +03005174 * it as a Python int object. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005175 errno = 0;
5176 /* XXX: Same thing about the base here. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005177 value = PyLong_FromString(s, NULL, 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005178 if (value == NULL) {
5179 PyErr_SetString(PyExc_ValueError,
5180 "could not convert string to int");
5181 return -1;
5182 }
5183 }
5184 else {
5185 if (len == 3 && (x == 0 || x == 1)) {
5186 if ((value = PyBool_FromLong(x)) == NULL)
5187 return -1;
5188 }
5189 else {
5190 if ((value = PyLong_FromLong(x)) == NULL)
5191 return -1;
5192 }
5193 }
5194
5195 PDATA_PUSH(self->stack, value, -1);
5196 return 0;
5197}
5198
5199static int
5200load_bool(UnpicklerObject *self, PyObject *boolean)
5201{
5202 assert(boolean == Py_True || boolean == Py_False);
5203 PDATA_APPEND(self->stack, boolean, -1);
5204 return 0;
5205}
5206
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005207/* s contains x bytes of an unsigned little-endian integer. Return its value
5208 * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
5209 */
5210static Py_ssize_t
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005211calc_binsize(char *bytes, int nbytes)
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005212{
5213 unsigned char *s = (unsigned char *)bytes;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005214 int i;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005215 size_t x = 0;
5216
Serhiy Storchakae0606192015-09-29 22:10:07 +03005217 if (nbytes > (int)sizeof(size_t)) {
5218 /* Check for integer overflow. BINBYTES8 and BINUNICODE8 opcodes
5219 * have 64-bit size that can't be represented on 32-bit platform.
5220 */
5221 for (i = (int)sizeof(size_t); i < nbytes; i++) {
5222 if (s[i])
5223 return -1;
5224 }
5225 nbytes = (int)sizeof(size_t);
5226 }
5227 for (i = 0; i < nbytes; i++) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005228 x |= (size_t) s[i] << (8 * i);
5229 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005230
5231 if (x > PY_SSIZE_T_MAX)
5232 return -1;
5233 else
5234 return (Py_ssize_t) x;
5235}
5236
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005237/* s contains x bytes of a little-endian integer. Return its value as a
5238 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +03005239 * int, but when x is 4 it's a signed one. This is a historical source
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005240 * of x-platform bugs.
5241 */
5242static long
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005243calc_binint(char *bytes, int nbytes)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005244{
5245 unsigned char *s = (unsigned char *)bytes;
Victor Stinnerf13c46c2014-08-17 21:05:55 +02005246 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005247 long x = 0;
5248
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005249 for (i = 0; i < nbytes; i++) {
5250 x |= (long)s[i] << (8 * i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005251 }
5252
5253 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
5254 * is signed, so on a box with longs bigger than 4 bytes we need
5255 * to extend a BININT's sign bit to the full width.
5256 */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005257 if (SIZEOF_LONG > 4 && nbytes == 4) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005258 x |= -(x & (1L << 31));
5259 }
5260
5261 return x;
5262}
5263
5264static int
5265load_binintx(UnpicklerObject *self, char *s, int size)
5266{
5267 PyObject *value;
5268 long x;
5269
5270 x = calc_binint(s, size);
5271
5272 if ((value = PyLong_FromLong(x)) == NULL)
5273 return -1;
5274
5275 PDATA_PUSH(self->stack, value, -1);
5276 return 0;
5277}
5278
5279static int
5280load_binint(UnpicklerObject *self)
5281{
5282 char *s;
5283
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005284 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005285 return -1;
5286
5287 return load_binintx(self, s, 4);
5288}
5289
5290static int
5291load_binint1(UnpicklerObject *self)
5292{
5293 char *s;
5294
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005295 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005296 return -1;
5297
5298 return load_binintx(self, s, 1);
5299}
5300
5301static int
5302load_binint2(UnpicklerObject *self)
5303{
5304 char *s;
5305
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005306 if (_Unpickler_Read(self, &s, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005307 return -1;
5308
5309 return load_binintx(self, s, 2);
5310}
5311
5312static int
5313load_long(UnpicklerObject *self)
5314{
5315 PyObject *value;
Victor Stinnerb110dad2016-12-09 17:06:43 +01005316 char *s = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005317 Py_ssize_t len;
5318
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005319 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005320 return -1;
5321 if (len < 2)
5322 return bad_readline();
5323
Mark Dickinson8dd05142009-01-20 20:43:58 +00005324 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
5325 the 'L' before calling PyLong_FromString. In order to maintain
5326 compatibility with Python 3.0.0, we don't actually *require*
5327 the 'L' to be present. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005328 if (s[len-2] == 'L')
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00005329 s[len-2] = '\0';
Alexandre Vassalottie4bccb72009-01-24 01:47:57 +00005330 /* XXX: Should the base argument explicitly set to 10? */
5331 value = PyLong_FromString(s, NULL, 0);
Mark Dickinson8dd05142009-01-20 20:43:58 +00005332 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005333 return -1;
5334
5335 PDATA_PUSH(self->stack, value, -1);
5336 return 0;
5337}
5338
5339/* 'size' bytes contain the # of bytes of little-endian 256's-complement
5340 * data following.
5341 */
5342static int
5343load_counted_long(UnpicklerObject *self, int size)
5344{
5345 PyObject *value;
5346 char *nbytes;
5347 char *pdata;
5348
5349 assert(size == 1 || size == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005350 if (_Unpickler_Read(self, &nbytes, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005351 return -1;
5352
5353 size = calc_binint(nbytes, size);
5354 if (size < 0) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005355 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005356 /* Corrupt or hostile pickle -- we never write one like this */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005357 PyErr_SetString(st->UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005358 "LONG pickle has negative byte count");
5359 return -1;
5360 }
5361
5362 if (size == 0)
5363 value = PyLong_FromLong(0L);
5364 else {
5365 /* Read the raw little-endian bytes and convert. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005366 if (_Unpickler_Read(self, &pdata, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005367 return -1;
5368 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
5369 1 /* little endian */ , 1 /* signed */ );
5370 }
5371 if (value == NULL)
5372 return -1;
5373 PDATA_PUSH(self->stack, value, -1);
5374 return 0;
5375}
5376
5377static int
5378load_float(UnpicklerObject *self)
5379{
5380 PyObject *value;
5381 char *endptr, *s;
5382 Py_ssize_t len;
5383 double d;
5384
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005385 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005386 return -1;
5387 if (len < 2)
5388 return bad_readline();
5389
5390 errno = 0;
Mark Dickinson725bfd82009-05-03 20:33:40 +00005391 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
5392 if (d == -1.0 && PyErr_Occurred())
5393 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005394 if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005395 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
5396 return -1;
5397 }
Mark Dickinson725bfd82009-05-03 20:33:40 +00005398 value = PyFloat_FromDouble(d);
5399 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005400 return -1;
5401
5402 PDATA_PUSH(self->stack, value, -1);
5403 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005404}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005405
5406static int
5407load_binfloat(UnpicklerObject *self)
5408{
5409 PyObject *value;
5410 double x;
5411 char *s;
5412
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005413 if (_Unpickler_Read(self, &s, 8) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005414 return -1;
5415
5416 x = _PyFloat_Unpack8((unsigned char *)s, 0);
5417 if (x == -1.0 && PyErr_Occurred())
5418 return -1;
5419
5420 if ((value = PyFloat_FromDouble(x)) == NULL)
5421 return -1;
5422
5423 PDATA_PUSH(self->stack, value, -1);
5424 return 0;
5425}
5426
5427static int
5428load_string(UnpicklerObject *self)
5429{
5430 PyObject *bytes;
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08005431 PyObject *obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005432 Py_ssize_t len;
5433 char *s, *p;
5434
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005435 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005436 return -1;
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07005437 /* Strip the newline */
5438 len--;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005439 /* Strip outermost quotes */
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07005440 if (len >= 2 && s[0] == s[len - 1] && (s[0] == '\'' || s[0] == '"')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005441 p = s + 1;
5442 len -= 2;
5443 }
5444 else {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005445 PickleState *st = _Pickle_GetGlobalState();
5446 PyErr_SetString(st->UnpicklingError,
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07005447 "the STRING opcode argument must be quoted");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005448 return -1;
5449 }
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07005450 assert(len >= 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005451
5452 /* Use the PyBytes API to decode the string, since that is what is used
5453 to encode, and then coerce the result to Unicode. */
5454 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005455 if (bytes == NULL)
5456 return -1;
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08005457
5458 /* Leave the Python 2.x strings as bytes if the *encoding* given to the
5459 Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5460 if (strcmp(self->encoding, "bytes") == 0) {
5461 obj = bytes;
5462 }
5463 else {
5464 obj = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
5465 Py_DECREF(bytes);
5466 if (obj == NULL) {
5467 return -1;
5468 }
5469 }
5470
5471 PDATA_PUSH(self->stack, obj, -1);
5472 return 0;
5473}
5474
5475static int
5476load_counted_binstring(UnpicklerObject *self, int nbytes)
5477{
5478 PyObject *obj;
5479 Py_ssize_t size;
5480 char *s;
5481
5482 if (_Unpickler_Read(self, &s, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005483 return -1;
5484
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08005485 size = calc_binsize(s, nbytes);
5486 if (size < 0) {
5487 PickleState *st = _Pickle_GetGlobalState();
5488 PyErr_Format(st->UnpicklingError,
5489 "BINSTRING exceeds system's maximum size of %zd bytes",
5490 PY_SSIZE_T_MAX);
5491 return -1;
5492 }
5493
5494 if (_Unpickler_Read(self, &s, size) < 0)
5495 return -1;
5496
5497 /* Convert Python 2.x strings to bytes if the *encoding* given to the
5498 Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5499 if (strcmp(self->encoding, "bytes") == 0) {
5500 obj = PyBytes_FromStringAndSize(s, size);
5501 }
5502 else {
5503 obj = PyUnicode_Decode(s, size, self->encoding, self->errors);
5504 }
5505 if (obj == NULL) {
5506 return -1;
5507 }
5508
5509 PDATA_PUSH(self->stack, obj, -1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005510 return 0;
5511}
5512
5513static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005514load_counted_binbytes(UnpicklerObject *self, int nbytes)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005515{
5516 PyObject *bytes;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005517 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005518 char *s;
5519
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005520 if (_Unpickler_Read(self, &s, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005521 return -1;
5522
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005523 size = calc_binsize(s, nbytes);
5524 if (size < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005525 PyErr_Format(PyExc_OverflowError,
5526 "BINBYTES exceeds system's maximum size of %zd bytes",
Alexandre Vassalotticc757172013-04-14 02:25:10 -07005527 PY_SSIZE_T_MAX);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005528 return -1;
5529 }
5530
Antoine Pitrou91f43802019-05-26 17:10:09 +02005531 bytes = PyBytes_FromStringAndSize(NULL, size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005532 if (bytes == NULL)
5533 return -1;
Antoine Pitrou91f43802019-05-26 17:10:09 +02005534 if (_Unpickler_ReadInto(self, PyBytes_AS_STRING(bytes), size) < 0) {
5535 Py_DECREF(bytes);
5536 return -1;
5537 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005538
5539 PDATA_PUSH(self->stack, bytes, -1);
5540 return 0;
5541}
5542
5543static int
Antoine Pitrou91f43802019-05-26 17:10:09 +02005544load_counted_bytearray(UnpicklerObject *self)
5545{
5546 PyObject *bytearray;
5547 Py_ssize_t size;
5548 char *s;
5549
5550 if (_Unpickler_Read(self, &s, 8) < 0) {
5551 return -1;
5552 }
5553
5554 size = calc_binsize(s, 8);
5555 if (size < 0) {
5556 PyErr_Format(PyExc_OverflowError,
5557 "BYTEARRAY8 exceeds system's maximum size of %zd bytes",
5558 PY_SSIZE_T_MAX);
5559 return -1;
5560 }
5561
5562 bytearray = PyByteArray_FromStringAndSize(NULL, size);
5563 if (bytearray == NULL) {
5564 return -1;
5565 }
5566 if (_Unpickler_ReadInto(self, PyByteArray_AS_STRING(bytearray), size) < 0) {
5567 Py_DECREF(bytearray);
5568 return -1;
5569 }
5570
5571 PDATA_PUSH(self->stack, bytearray, -1);
5572 return 0;
5573}
5574
5575static int
5576load_next_buffer(UnpicklerObject *self)
5577{
5578 if (self->buffers == NULL) {
5579 PickleState *st = _Pickle_GetGlobalState();
5580 PyErr_SetString(st->UnpicklingError,
5581 "pickle stream refers to out-of-band data "
5582 "but no *buffers* argument was given");
5583 return -1;
5584 }
5585 PyObject *buf = PyIter_Next(self->buffers);
5586 if (buf == NULL) {
5587 if (!PyErr_Occurred()) {
5588 PickleState *st = _Pickle_GetGlobalState();
5589 PyErr_SetString(st->UnpicklingError,
5590 "not enough out-of-band buffers");
5591 }
5592 return -1;
5593 }
5594
5595 PDATA_PUSH(self->stack, buf, -1);
5596 return 0;
5597}
5598
5599static int
5600load_readonly_buffer(UnpicklerObject *self)
5601{
5602 Py_ssize_t len = Py_SIZE(self->stack);
5603 if (len <= self->stack->fence) {
5604 return Pdata_stack_underflow(self->stack);
5605 }
5606
5607 PyObject *obj = self->stack->data[len - 1];
5608 PyObject *view = PyMemoryView_FromObject(obj);
5609 if (view == NULL) {
5610 return -1;
5611 }
5612 if (!PyMemoryView_GET_BUFFER(view)->readonly) {
5613 /* Original object is writable */
5614 PyMemoryView_GET_BUFFER(view)->readonly = 1;
5615 self->stack->data[len - 1] = view;
5616 Py_DECREF(obj);
5617 }
5618 else {
5619 /* Original object is read-only, no need to replace it */
5620 Py_DECREF(view);
5621 }
5622 return 0;
5623}
5624
5625static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005626load_unicode(UnpicklerObject *self)
5627{
5628 PyObject *str;
5629 Py_ssize_t len;
Victor Stinnerb110dad2016-12-09 17:06:43 +01005630 char *s = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005631
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005632 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005633 return -1;
5634 if (len < 1)
5635 return bad_readline();
5636
5637 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
5638 if (str == NULL)
5639 return -1;
5640
5641 PDATA_PUSH(self->stack, str, -1);
5642 return 0;
5643}
5644
5645static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005646load_counted_binunicode(UnpicklerObject *self, int nbytes)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005647{
5648 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005649 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005650 char *s;
5651
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005652 if (_Unpickler_Read(self, &s, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005653 return -1;
5654
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005655 size = calc_binsize(s, nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005656 if (size < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005657 PyErr_Format(PyExc_OverflowError,
5658 "BINUNICODE exceeds system's maximum size of %zd bytes",
Alexandre Vassalotticc757172013-04-14 02:25:10 -07005659 PY_SSIZE_T_MAX);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005660 return -1;
5661 }
5662
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005663 if (_Unpickler_Read(self, &s, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005664 return -1;
5665
Victor Stinner485fb562010-04-13 11:07:24 +00005666 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005667 if (str == NULL)
5668 return -1;
5669
5670 PDATA_PUSH(self->stack, str, -1);
5671 return 0;
5672}
5673
5674static int
Victor Stinner21b47112016-03-14 18:09:39 +01005675load_counted_tuple(UnpicklerObject *self, Py_ssize_t len)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005676{
5677 PyObject *tuple;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005678
Serhiy Storchakaa49de6b2015-11-25 15:01:53 +02005679 if (Py_SIZE(self->stack) < len)
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005680 return Pdata_stack_underflow(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005681
Serhiy Storchakaa49de6b2015-11-25 15:01:53 +02005682 tuple = Pdata_poptuple(self->stack, Py_SIZE(self->stack) - len);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005683 if (tuple == NULL)
5684 return -1;
5685 PDATA_PUSH(self->stack, tuple, -1);
5686 return 0;
5687}
5688
5689static int
Serhiy Storchakaa49de6b2015-11-25 15:01:53 +02005690load_tuple(UnpicklerObject *self)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005691{
Serhiy Storchakaa49de6b2015-11-25 15:01:53 +02005692 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005693
Serhiy Storchakaa49de6b2015-11-25 15:01:53 +02005694 if ((i = marker(self)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005695 return -1;
5696
Serhiy Storchakaa49de6b2015-11-25 15:01:53 +02005697 return load_counted_tuple(self, Py_SIZE(self->stack) - i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005698}
5699
5700static int
5701load_empty_list(UnpicklerObject *self)
5702{
5703 PyObject *list;
5704
5705 if ((list = PyList_New(0)) == NULL)
5706 return -1;
5707 PDATA_PUSH(self->stack, list, -1);
5708 return 0;
5709}
5710
5711static int
5712load_empty_dict(UnpicklerObject *self)
5713{
5714 PyObject *dict;
5715
5716 if ((dict = PyDict_New()) == NULL)
5717 return -1;
5718 PDATA_PUSH(self->stack, dict, -1);
5719 return 0;
5720}
5721
5722static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005723load_empty_set(UnpicklerObject *self)
5724{
5725 PyObject *set;
5726
5727 if ((set = PySet_New(NULL)) == NULL)
5728 return -1;
5729 PDATA_PUSH(self->stack, set, -1);
5730 return 0;
5731}
5732
5733static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005734load_list(UnpicklerObject *self)
5735{
5736 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005737 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005738
5739 if ((i = marker(self)) < 0)
5740 return -1;
5741
5742 list = Pdata_poplist(self->stack, i);
5743 if (list == NULL)
5744 return -1;
5745 PDATA_PUSH(self->stack, list, -1);
5746 return 0;
5747}
5748
5749static int
5750load_dict(UnpicklerObject *self)
5751{
5752 PyObject *dict, *key, *value;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005753 Py_ssize_t i, j, k;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005754
5755 if ((i = marker(self)) < 0)
5756 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005757 j = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005758
5759 if ((dict = PyDict_New()) == NULL)
5760 return -1;
5761
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005762 if ((j - i) % 2 != 0) {
5763 PickleState *st = _Pickle_GetGlobalState();
5764 PyErr_SetString(st->UnpicklingError, "odd number of items for DICT");
Serhiy Storchaka3ac53802015-12-07 11:32:00 +02005765 Py_DECREF(dict);
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005766 return -1;
5767 }
5768
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005769 for (k = i + 1; k < j; k += 2) {
5770 key = self->stack->data[k - 1];
5771 value = self->stack->data[k];
5772 if (PyDict_SetItem(dict, key, value) < 0) {
5773 Py_DECREF(dict);
5774 return -1;
5775 }
5776 }
5777 Pdata_clear(self->stack, i);
5778 PDATA_PUSH(self->stack, dict, -1);
5779 return 0;
5780}
5781
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005782static int
5783load_frozenset(UnpicklerObject *self)
5784{
5785 PyObject *items;
5786 PyObject *frozenset;
5787 Py_ssize_t i;
5788
5789 if ((i = marker(self)) < 0)
5790 return -1;
5791
5792 items = Pdata_poptuple(self->stack, i);
5793 if (items == NULL)
5794 return -1;
5795
5796 frozenset = PyFrozenSet_New(items);
5797 Py_DECREF(items);
5798 if (frozenset == NULL)
5799 return -1;
5800
5801 PDATA_PUSH(self->stack, frozenset, -1);
5802 return 0;
5803}
5804
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005805static PyObject *
5806instantiate(PyObject *cls, PyObject *args)
5807{
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005808 /* Caller must assure args are a tuple. Normally, args come from
5809 Pdata_poptuple which packs objects from the top of the stack
5810 into a newly created tuple. */
5811 assert(PyTuple_Check(args));
Serhiy Storchaka04e36af2017-10-22 21:31:34 +03005812 if (!PyTuple_GET_SIZE(args) && PyType_Check(cls)) {
5813 _Py_IDENTIFIER(__getinitargs__);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005814 _Py_IDENTIFIER(__new__);
Serhiy Storchakaf320be72018-01-25 10:49:40 +02005815 PyObject *func;
5816 if (_PyObject_LookupAttrId(cls, &PyId___getinitargs__, &func) < 0) {
5817 return NULL;
5818 }
Serhiy Storchaka04e36af2017-10-22 21:31:34 +03005819 if (func == NULL) {
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02005820 return _PyObject_CallMethodIdOneArg(cls, &PyId___new__, cls);
Serhiy Storchaka04e36af2017-10-22 21:31:34 +03005821 }
5822 Py_DECREF(func);
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005823 }
Serhiy Storchaka04e36af2017-10-22 21:31:34 +03005824 return PyObject_CallObject(cls, args);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005825}
5826
5827static int
5828load_obj(UnpicklerObject *self)
5829{
5830 PyObject *cls, *args, *obj = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005831 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005832
5833 if ((i = marker(self)) < 0)
5834 return -1;
5835
Serhiy Storchakae9b30742015-11-23 15:17:43 +02005836 if (Py_SIZE(self->stack) - i < 1)
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005837 return Pdata_stack_underflow(self->stack);
Serhiy Storchakae9b30742015-11-23 15:17:43 +02005838
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005839 args = Pdata_poptuple(self->stack, i + 1);
5840 if (args == NULL)
5841 return -1;
5842
5843 PDATA_POP(self->stack, cls);
5844 if (cls) {
5845 obj = instantiate(cls, args);
5846 Py_DECREF(cls);
5847 }
5848 Py_DECREF(args);
5849 if (obj == NULL)
5850 return -1;
5851
5852 PDATA_PUSH(self->stack, obj, -1);
5853 return 0;
5854}
5855
5856static int
5857load_inst(UnpicklerObject *self)
5858{
5859 PyObject *cls = NULL;
5860 PyObject *args = NULL;
5861 PyObject *obj = NULL;
5862 PyObject *module_name;
5863 PyObject *class_name;
5864 Py_ssize_t len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005865 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005866 char *s;
5867
5868 if ((i = marker(self)) < 0)
5869 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005870 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005871 return -1;
5872 if (len < 2)
5873 return bad_readline();
5874
5875 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
5876 identifiers are permitted in Python 3.0, since the INST opcode is only
5877 supported by older protocols on Python 2.x. */
5878 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5879 if (module_name == NULL)
5880 return -1;
5881
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005882 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Serhiy Storchakaca28eba2015-12-01 00:18:23 +02005883 if (len < 2) {
5884 Py_DECREF(module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005885 return bad_readline();
Serhiy Storchakaca28eba2015-12-01 00:18:23 +02005886 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005887 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005888 if (class_name != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005889 cls = find_class(self, module_name, class_name);
5890 Py_DECREF(class_name);
5891 }
5892 }
5893 Py_DECREF(module_name);
5894
5895 if (cls == NULL)
5896 return -1;
5897
5898 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
5899 obj = instantiate(cls, args);
5900 Py_DECREF(args);
5901 }
5902 Py_DECREF(cls);
5903
5904 if (obj == NULL)
5905 return -1;
5906
5907 PDATA_PUSH(self->stack, obj, -1);
5908 return 0;
5909}
5910
Serhiy Storchakab4c98ed2020-07-18 11:11:21 +03005911static void
5912newobj_unpickling_error(const char * msg, int use_kwargs, PyObject *arg)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005913{
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005914 PickleState *st = _Pickle_GetGlobalState();
Serhiy Storchakab4c98ed2020-07-18 11:11:21 +03005915 PyErr_Format(st->UnpicklingError, msg,
5916 use_kwargs ? "NEWOBJ_EX" : "NEWOBJ",
5917 Py_TYPE(arg)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005918}
5919
5920static int
Serhiy Storchakab4c98ed2020-07-18 11:11:21 +03005921load_newobj(UnpicklerObject *self, int use_kwargs)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005922{
Serhiy Storchakab4c98ed2020-07-18 11:11:21 +03005923 PyObject *cls, *args, *kwargs = NULL;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005924 PyObject *obj;
5925
Serhiy Storchakab4c98ed2020-07-18 11:11:21 +03005926 /* Stack is ... cls args [kwargs], and we want to call
5927 * cls.__new__(cls, *args, **kwargs).
5928 */
5929 if (use_kwargs) {
5930 PDATA_POP(self->stack, kwargs);
5931 if (kwargs == NULL) {
5932 return -1;
5933 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005934 }
5935 PDATA_POP(self->stack, args);
5936 if (args == NULL) {
Serhiy Storchakab4c98ed2020-07-18 11:11:21 +03005937 Py_XDECREF(kwargs);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005938 return -1;
5939 }
5940 PDATA_POP(self->stack, cls);
5941 if (cls == NULL) {
Serhiy Storchakab4c98ed2020-07-18 11:11:21 +03005942 Py_XDECREF(kwargs);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005943 Py_DECREF(args);
5944 return -1;
5945 }
Larry Hastings61272b72014-01-07 12:41:53 -08005946
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005947 if (!PyType_Check(cls)) {
Serhiy Storchakab4c98ed2020-07-18 11:11:21 +03005948 newobj_unpickling_error("%s class argument must be a type, not %.200s",
5949 use_kwargs, cls);
Serhiy Storchaka4f309ab2020-07-13 15:49:26 +03005950 goto error;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005951 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005952 if (((PyTypeObject *)cls)->tp_new == NULL) {
Serhiy Storchakab4c98ed2020-07-18 11:11:21 +03005953 newobj_unpickling_error("%s class argument '%.200s' doesn't have __new__",
5954 use_kwargs, cls);
Serhiy Storchaka4f309ab2020-07-13 15:49:26 +03005955 goto error;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005956 }
Serhiy Storchaka4f309ab2020-07-13 15:49:26 +03005957 if (!PyTuple_Check(args)) {
Serhiy Storchakab4c98ed2020-07-18 11:11:21 +03005958 newobj_unpickling_error("%s args argument must be a tuple, not %.200s",
5959 use_kwargs, args);
Serhiy Storchaka4f309ab2020-07-13 15:49:26 +03005960 goto error;
5961 }
Serhiy Storchakab4c98ed2020-07-18 11:11:21 +03005962 if (use_kwargs && !PyDict_Check(kwargs)) {
5963 newobj_unpickling_error("%s kwargs argument must be a dict, not %.200s",
5964 use_kwargs, kwargs);
Serhiy Storchaka4f309ab2020-07-13 15:49:26 +03005965 goto error;
5966 }
5967
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005968 obj = ((PyTypeObject *)cls)->tp_new((PyTypeObject *)cls, args, kwargs);
Serhiy Storchakab4c98ed2020-07-18 11:11:21 +03005969 if (obj == NULL) {
5970 goto error;
5971 }
5972 Py_XDECREF(kwargs);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005973 Py_DECREF(args);
5974 Py_DECREF(cls);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005975 PDATA_PUSH(self->stack, obj, -1);
5976 return 0;
Serhiy Storchaka4f309ab2020-07-13 15:49:26 +03005977
5978error:
Serhiy Storchakab4c98ed2020-07-18 11:11:21 +03005979 Py_XDECREF(kwargs);
Serhiy Storchaka4f309ab2020-07-13 15:49:26 +03005980 Py_DECREF(args);
5981 Py_DECREF(cls);
5982 return -1;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005983}
5984
5985static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005986load_global(UnpicklerObject *self)
5987{
5988 PyObject *global = NULL;
5989 PyObject *module_name;
5990 PyObject *global_name;
5991 Py_ssize_t len;
5992 char *s;
5993
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005994 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005995 return -1;
5996 if (len < 2)
5997 return bad_readline();
5998 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
5999 if (!module_name)
6000 return -1;
6001
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006002 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006003 if (len < 2) {
6004 Py_DECREF(module_name);
6005 return bad_readline();
6006 }
6007 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
6008 if (global_name) {
6009 global = find_class(self, module_name, global_name);
6010 Py_DECREF(global_name);
6011 }
6012 }
6013 Py_DECREF(module_name);
6014
6015 if (global == NULL)
6016 return -1;
6017 PDATA_PUSH(self->stack, global, -1);
6018 return 0;
6019}
6020
6021static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006022load_stack_global(UnpicklerObject *self)
6023{
6024 PyObject *global;
6025 PyObject *module_name;
6026 PyObject *global_name;
6027
6028 PDATA_POP(self->stack, global_name);
6029 PDATA_POP(self->stack, module_name);
6030 if (module_name == NULL || !PyUnicode_CheckExact(module_name) ||
6031 global_name == NULL || !PyUnicode_CheckExact(global_name)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006032 PickleState *st = _Pickle_GetGlobalState();
6033 PyErr_SetString(st->UnpicklingError, "STACK_GLOBAL requires str");
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006034 Py_XDECREF(global_name);
6035 Py_XDECREF(module_name);
6036 return -1;
6037 }
6038 global = find_class(self, module_name, global_name);
6039 Py_DECREF(global_name);
6040 Py_DECREF(module_name);
6041 if (global == NULL)
6042 return -1;
6043 PDATA_PUSH(self->stack, global, -1);
6044 return 0;
6045}
6046
6047static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006048load_persid(UnpicklerObject *self)
6049{
Serhiy Storchaka986375e2017-11-30 22:48:31 +02006050 PyObject *pid, *obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006051 Py_ssize_t len;
6052 char *s;
6053
6054 if (self->pers_func) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006055 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006056 return -1;
Alexandre Vassalotti896414f2013-11-30 13:52:35 -08006057 if (len < 1)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006058 return bad_readline();
6059
Serhiy Storchakadec25af2016-07-17 11:24:17 +03006060 pid = PyUnicode_DecodeASCII(s, len - 1, "strict");
6061 if (pid == NULL) {
6062 if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
6063 PyErr_SetString(_Pickle_GetGlobalState()->UnpicklingError,
6064 "persistent IDs in protocol 0 must be "
6065 "ASCII strings");
6066 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006067 return -1;
Serhiy Storchakadec25af2016-07-17 11:24:17 +03006068 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006069
Serhiy Storchaka986375e2017-11-30 22:48:31 +02006070 obj = call_method(self->pers_func, self->pers_func_self, pid);
6071 Py_DECREF(pid);
6072 if (obj == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006073 return -1;
6074
Serhiy Storchaka986375e2017-11-30 22:48:31 +02006075 PDATA_PUSH(self->stack, obj, -1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006076 return 0;
6077 }
6078 else {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006079 PickleState *st = _Pickle_GetGlobalState();
6080 PyErr_SetString(st->UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006081 "A load persistent id instruction was encountered,\n"
6082 "but no persistent_load function was specified.");
6083 return -1;
6084 }
6085}
6086
6087static int
6088load_binpersid(UnpicklerObject *self)
6089{
Serhiy Storchaka986375e2017-11-30 22:48:31 +02006090 PyObject *pid, *obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006091
6092 if (self->pers_func) {
6093 PDATA_POP(self->stack, pid);
6094 if (pid == NULL)
6095 return -1;
6096
Serhiy Storchaka986375e2017-11-30 22:48:31 +02006097 obj = call_method(self->pers_func, self->pers_func_self, pid);
6098 Py_DECREF(pid);
6099 if (obj == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006100 return -1;
6101
Serhiy Storchaka986375e2017-11-30 22:48:31 +02006102 PDATA_PUSH(self->stack, obj, -1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006103 return 0;
6104 }
6105 else {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006106 PickleState *st = _Pickle_GetGlobalState();
6107 PyErr_SetString(st->UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006108 "A load persistent id instruction was encountered,\n"
6109 "but no persistent_load function was specified.");
6110 return -1;
6111 }
6112}
6113
6114static int
6115load_pop(UnpicklerObject *self)
6116{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006117 Py_ssize_t len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006118
6119 /* Note that we split the (pickle.py) stack into two stacks,
6120 * an object stack and a mark stack. We have to be clever and
6121 * pop the right one. We do this by looking at the top of the
Collin Winter8ca69de2009-05-26 16:53:41 +00006122 * mark stack first, and only signalling a stack underflow if
6123 * the object stack is empty and the mark stack doesn't match
6124 * our expectations.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006125 */
Collin Winter8ca69de2009-05-26 16:53:41 +00006126 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006127 self->num_marks--;
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006128 self->stack->mark_set = self->num_marks != 0;
6129 self->stack->fence = self->num_marks ?
6130 self->marks[self->num_marks - 1] : 0;
6131 } else if (len <= self->stack->fence)
6132 return Pdata_stack_underflow(self->stack);
6133 else {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006134 len--;
6135 Py_DECREF(self->stack->data[len]);
Victor Stinner60ac6ed2020-02-07 23:18:08 +01006136 Py_SET_SIZE(self->stack, len);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006137 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006138 return 0;
6139}
6140
6141static int
6142load_pop_mark(UnpicklerObject *self)
6143{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006144 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006145
6146 if ((i = marker(self)) < 0)
6147 return -1;
6148
6149 Pdata_clear(self->stack, i);
6150
6151 return 0;
6152}
6153
6154static int
6155load_dup(UnpicklerObject *self)
6156{
6157 PyObject *last;
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006158 Py_ssize_t len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006159
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006160 if (len <= self->stack->fence)
6161 return Pdata_stack_underflow(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006162 last = self->stack->data[len - 1];
6163 PDATA_APPEND(self->stack, last, -1);
6164 return 0;
6165}
6166
6167static int
6168load_get(UnpicklerObject *self)
6169{
6170 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006171 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006172 Py_ssize_t len;
6173 char *s;
6174
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006175 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006176 return -1;
6177 if (len < 2)
6178 return bad_readline();
6179
6180 key = PyLong_FromString(s, NULL, 10);
6181 if (key == NULL)
6182 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006183 idx = PyLong_AsSsize_t(key);
6184 if (idx == -1 && PyErr_Occurred()) {
6185 Py_DECREF(key);
6186 return -1;
6187 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006188
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006189 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006190 if (value == NULL) {
Claudiu Popa6f03b232019-11-24 20:15:08 +01006191 if (!PyErr_Occurred()) {
6192 PickleState *st = _Pickle_GetGlobalState();
6193 PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
6194 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006195 Py_DECREF(key);
6196 return -1;
6197 }
6198 Py_DECREF(key);
6199
6200 PDATA_APPEND(self->stack, value, -1);
6201 return 0;
6202}
6203
6204static int
6205load_binget(UnpicklerObject *self)
6206{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006207 PyObject *value;
6208 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006209 char *s;
6210
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006211 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006212 return -1;
6213
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006214 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006215
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006216 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006217 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006218 PyObject *key = PyLong_FromSsize_t(idx);
Christian Heimes9ee5c372013-07-26 22:45:00 +02006219 if (key != NULL) {
Claudiu Popa6f03b232019-11-24 20:15:08 +01006220 PickleState *st = _Pickle_GetGlobalState();
6221 PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
Christian Heimes9ee5c372013-07-26 22:45:00 +02006222 Py_DECREF(key);
6223 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006224 return -1;
6225 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006226
6227 PDATA_APPEND(self->stack, value, -1);
6228 return 0;
6229}
6230
6231static int
6232load_long_binget(UnpicklerObject *self)
6233{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006234 PyObject *value;
6235 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006236 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006237
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006238 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006239 return -1;
6240
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006241 idx = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006242
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006243 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006244 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006245 PyObject *key = PyLong_FromSsize_t(idx);
Christian Heimes9ee5c372013-07-26 22:45:00 +02006246 if (key != NULL) {
Claudiu Popa6f03b232019-11-24 20:15:08 +01006247 PickleState *st = _Pickle_GetGlobalState();
6248 PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
Christian Heimes9ee5c372013-07-26 22:45:00 +02006249 Py_DECREF(key);
6250 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006251 return -1;
6252 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006253
6254 PDATA_APPEND(self->stack, value, -1);
6255 return 0;
6256}
6257
6258/* Push an object from the extension registry (EXT[124]). nbytes is
6259 * the number of bytes following the opcode, holding the index (code) value.
6260 */
6261static int
6262load_extension(UnpicklerObject *self, int nbytes)
6263{
6264 char *codebytes; /* the nbytes bytes after the opcode */
6265 long code; /* calc_binint returns long */
6266 PyObject *py_code; /* code as a Python int */
6267 PyObject *obj; /* the object to push */
6268 PyObject *pair; /* (module_name, class_name) */
6269 PyObject *module_name, *class_name;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006270 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006271
6272 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006273 if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006274 return -1;
6275 code = calc_binint(codebytes, nbytes);
6276 if (code <= 0) { /* note that 0 is forbidden */
6277 /* Corrupt or hostile pickle. */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006278 PyErr_SetString(st->UnpicklingError, "EXT specifies code <= 0");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006279 return -1;
6280 }
6281
6282 /* Look for the code in the cache. */
6283 py_code = PyLong_FromLong(code);
6284 if (py_code == NULL)
6285 return -1;
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08006286 obj = PyDict_GetItemWithError(st->extension_cache, py_code);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006287 if (obj != NULL) {
6288 /* Bingo. */
6289 Py_DECREF(py_code);
6290 PDATA_APPEND(self->stack, obj, -1);
6291 return 0;
6292 }
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08006293 if (PyErr_Occurred()) {
6294 Py_DECREF(py_code);
6295 return -1;
6296 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006297
6298 /* Look up the (module_name, class_name) pair. */
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08006299 pair = PyDict_GetItemWithError(st->inverted_registry, py_code);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006300 if (pair == NULL) {
6301 Py_DECREF(py_code);
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08006302 if (!PyErr_Occurred()) {
6303 PyErr_Format(PyExc_ValueError, "unregistered extension "
6304 "code %ld", code);
6305 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006306 return -1;
6307 }
6308 /* Since the extension registry is manipulable via Python code,
6309 * confirm that pair is really a 2-tuple of strings.
6310 */
Victor Stinnerb37672d2018-11-22 03:37:50 +01006311 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2) {
6312 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006313 }
Victor Stinnerb37672d2018-11-22 03:37:50 +01006314
6315 module_name = PyTuple_GET_ITEM(pair, 0);
6316 if (!PyUnicode_Check(module_name)) {
6317 goto error;
6318 }
6319
6320 class_name = PyTuple_GET_ITEM(pair, 1);
6321 if (!PyUnicode_Check(class_name)) {
6322 goto error;
6323 }
6324
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006325 /* Load the object. */
6326 obj = find_class(self, module_name, class_name);
6327 if (obj == NULL) {
6328 Py_DECREF(py_code);
6329 return -1;
6330 }
6331 /* Cache code -> obj. */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006332 code = PyDict_SetItem(st->extension_cache, py_code, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006333 Py_DECREF(py_code);
6334 if (code < 0) {
6335 Py_DECREF(obj);
6336 return -1;
6337 }
6338 PDATA_PUSH(self->stack, obj, -1);
6339 return 0;
Victor Stinnerb37672d2018-11-22 03:37:50 +01006340
6341error:
6342 Py_DECREF(py_code);
6343 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
6344 "isn't a 2-tuple of strings", code);
6345 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006346}
6347
6348static int
6349load_put(UnpicklerObject *self)
6350{
6351 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006352 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006353 Py_ssize_t len;
Victor Stinnerb110dad2016-12-09 17:06:43 +01006354 char *s = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006355
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006356 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006357 return -1;
6358 if (len < 2)
6359 return bad_readline();
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006360 if (Py_SIZE(self->stack) <= self->stack->fence)
6361 return Pdata_stack_underflow(self->stack);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006362 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006363
6364 key = PyLong_FromString(s, NULL, 10);
6365 if (key == NULL)
6366 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006367 idx = PyLong_AsSsize_t(key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006368 Py_DECREF(key);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02006369 if (idx < 0) {
6370 if (!PyErr_Occurred())
6371 PyErr_SetString(PyExc_ValueError,
6372 "negative PUT argument");
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006373 return -1;
Antoine Pitrou55549ec2011-08-30 00:27:10 +02006374 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006375
6376 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006377}
6378
6379static int
6380load_binput(UnpicklerObject *self)
6381{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006382 PyObject *value;
6383 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006384 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006385
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006386 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006387 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006388
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006389 if (Py_SIZE(self->stack) <= self->stack->fence)
6390 return Pdata_stack_underflow(self->stack);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006391 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006392
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006393 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006394
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006395 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006396}
6397
6398static int
6399load_long_binput(UnpicklerObject *self)
6400{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006401 PyObject *value;
6402 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006403 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006404
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006405 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006406 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006407
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006408 if (Py_SIZE(self->stack) <= self->stack->fence)
6409 return Pdata_stack_underflow(self->stack);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006410 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006411
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006412 idx = calc_binsize(s, 4);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02006413 if (idx < 0) {
6414 PyErr_SetString(PyExc_ValueError,
6415 "negative LONG_BINPUT argument");
6416 return -1;
6417 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006418
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006419 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006420}
6421
6422static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006423load_memoize(UnpicklerObject *self)
6424{
6425 PyObject *value;
6426
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006427 if (Py_SIZE(self->stack) <= self->stack->fence)
6428 return Pdata_stack_underflow(self->stack);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006429 value = self->stack->data[Py_SIZE(self->stack) - 1];
6430
6431 return _Unpickler_MemoPut(self, self->memo_len, value);
6432}
6433
6434static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006435do_append(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006436{
6437 PyObject *value;
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006438 PyObject *slice;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006439 PyObject *list;
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006440 PyObject *result;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006441 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006442
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006443 len = Py_SIZE(self->stack);
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006444 if (x > len || x <= self->stack->fence)
6445 return Pdata_stack_underflow(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006446 if (len == x) /* nothing to do */
6447 return 0;
6448
6449 list = self->stack->data[x - 1];
6450
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006451 if (PyList_CheckExact(list)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006452 Py_ssize_t list_len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006453 int ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006454
6455 slice = Pdata_poplist(self->stack, x);
6456 if (!slice)
6457 return -1;
6458 list_len = PyList_GET_SIZE(list);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006459 ret = PyList_SetSlice(list, list_len, list_len, slice);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006460 Py_DECREF(slice);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006461 return ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006462 }
6463 else {
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006464 PyObject *extend_func;
6465 _Py_IDENTIFIER(extend);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006466
Serhiy Storchaka41c57b32019-09-01 12:03:39 +03006467 if (_PyObject_LookupAttrId(list, &PyId_extend, &extend_func) < 0) {
6468 return -1;
6469 }
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006470 if (extend_func != NULL) {
6471 slice = Pdata_poplist(self->stack, x);
6472 if (!slice) {
6473 Py_DECREF(extend_func);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006474 return -1;
6475 }
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006476 result = _Pickle_FastCall(extend_func, slice);
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006477 Py_DECREF(extend_func);
6478 if (result == NULL)
6479 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006480 Py_DECREF(result);
6481 }
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006482 else {
6483 PyObject *append_func;
6484 _Py_IDENTIFIER(append);
6485
6486 /* Even if the PEP 307 requires extend() and append() methods,
6487 fall back on append() if the object has no extend() method
6488 for backward compatibility. */
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006489 append_func = _PyObject_GetAttrId(list, &PyId_append);
6490 if (append_func == NULL)
6491 return -1;
6492 for (i = x; i < len; i++) {
6493 value = self->stack->data[i];
6494 result = _Pickle_FastCall(append_func, value);
6495 if (result == NULL) {
6496 Pdata_clear(self->stack, i + 1);
Victor Stinner60ac6ed2020-02-07 23:18:08 +01006497 Py_SET_SIZE(self->stack, x);
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006498 Py_DECREF(append_func);
6499 return -1;
6500 }
6501 Py_DECREF(result);
6502 }
Victor Stinner60ac6ed2020-02-07 23:18:08 +01006503 Py_SET_SIZE(self->stack, x);
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006504 Py_DECREF(append_func);
6505 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006506 }
6507
6508 return 0;
6509}
6510
6511static int
6512load_append(UnpicklerObject *self)
6513{
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006514 if (Py_SIZE(self->stack) - 1 <= self->stack->fence)
6515 return Pdata_stack_underflow(self->stack);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006516 return do_append(self, Py_SIZE(self->stack) - 1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006517}
6518
6519static int
6520load_appends(UnpicklerObject *self)
6521{
Serhiy Storchakae9b30742015-11-23 15:17:43 +02006522 Py_ssize_t i = marker(self);
6523 if (i < 0)
6524 return -1;
6525 return do_append(self, i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006526}
6527
6528static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006529do_setitems(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006530{
6531 PyObject *value, *key;
6532 PyObject *dict;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006533 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006534 int status = 0;
6535
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006536 len = Py_SIZE(self->stack);
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006537 if (x > len || x <= self->stack->fence)
6538 return Pdata_stack_underflow(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006539 if (len == x) /* nothing to do */
6540 return 0;
Victor Stinner121aab42011-09-29 23:40:53 +02006541 if ((len - x) % 2 != 0) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006542 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006543 /* Currupt or hostile pickle -- we never write one like this. */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006544 PyErr_SetString(st->UnpicklingError,
6545 "odd number of items for SETITEMS");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006546 return -1;
6547 }
6548
6549 /* Here, dict does not actually need to be a PyDict; it could be anything
6550 that supports the __setitem__ attribute. */
6551 dict = self->stack->data[x - 1];
6552
6553 for (i = x + 1; i < len; i += 2) {
6554 key = self->stack->data[i - 1];
6555 value = self->stack->data[i];
6556 if (PyObject_SetItem(dict, key, value) < 0) {
6557 status = -1;
6558 break;
6559 }
6560 }
6561
6562 Pdata_clear(self->stack, x);
6563 return status;
6564}
6565
6566static int
6567load_setitem(UnpicklerObject *self)
6568{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006569 return do_setitems(self, Py_SIZE(self->stack) - 2);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006570}
6571
6572static int
6573load_setitems(UnpicklerObject *self)
6574{
Serhiy Storchakae9b30742015-11-23 15:17:43 +02006575 Py_ssize_t i = marker(self);
6576 if (i < 0)
6577 return -1;
6578 return do_setitems(self, i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006579}
6580
6581static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006582load_additems(UnpicklerObject *self)
6583{
6584 PyObject *set;
6585 Py_ssize_t mark, len, i;
6586
6587 mark = marker(self);
Serhiy Storchakae9b30742015-11-23 15:17:43 +02006588 if (mark < 0)
6589 return -1;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006590 len = Py_SIZE(self->stack);
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006591 if (mark > len || mark <= self->stack->fence)
6592 return Pdata_stack_underflow(self->stack);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006593 if (len == mark) /* nothing to do */
6594 return 0;
6595
6596 set = self->stack->data[mark - 1];
6597
6598 if (PySet_Check(set)) {
6599 PyObject *items;
6600 int status;
6601
6602 items = Pdata_poptuple(self->stack, mark);
6603 if (items == NULL)
6604 return -1;
6605
6606 status = _PySet_Update(set, items);
6607 Py_DECREF(items);
6608 return status;
6609 }
6610 else {
6611 PyObject *add_func;
6612 _Py_IDENTIFIER(add);
6613
6614 add_func = _PyObject_GetAttrId(set, &PyId_add);
6615 if (add_func == NULL)
6616 return -1;
6617 for (i = mark; i < len; i++) {
6618 PyObject *result;
6619 PyObject *item;
6620
6621 item = self->stack->data[i];
Alexandre Vassalotti20c28c12013-11-27 02:26:54 -08006622 result = _Pickle_FastCall(add_func, item);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006623 if (result == NULL) {
6624 Pdata_clear(self->stack, i + 1);
Victor Stinner60ac6ed2020-02-07 23:18:08 +01006625 Py_SET_SIZE(self->stack, mark);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006626 return -1;
6627 }
6628 Py_DECREF(result);
6629 }
Victor Stinner60ac6ed2020-02-07 23:18:08 +01006630 Py_SET_SIZE(self->stack, mark);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006631 }
6632
6633 return 0;
6634}
6635
6636static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006637load_build(UnpicklerObject *self)
6638{
6639 PyObject *state, *inst, *slotstate;
6640 PyObject *setstate;
6641 int status = 0;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02006642 _Py_IDENTIFIER(__setstate__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006643
6644 /* Stack is ... instance, state. We want to leave instance at
6645 * the stack top, possibly mutated via instance.__setstate__(state).
6646 */
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006647 if (Py_SIZE(self->stack) - 2 < self->stack->fence)
6648 return Pdata_stack_underflow(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006649
6650 PDATA_POP(self->stack, state);
6651 if (state == NULL)
6652 return -1;
6653
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006654 inst = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006655
Serhiy Storchakaf320be72018-01-25 10:49:40 +02006656 if (_PyObject_LookupAttrId(inst, &PyId___setstate__, &setstate) < 0) {
6657 Py_DECREF(state);
6658 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006659 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +02006660 if (setstate != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006661 PyObject *result;
6662
6663 /* The explicit __setstate__ is responsible for everything. */
Alexandre Vassalotti20c28c12013-11-27 02:26:54 -08006664 result = _Pickle_FastCall(setstate, state);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006665 Py_DECREF(setstate);
6666 if (result == NULL)
6667 return -1;
6668 Py_DECREF(result);
6669 return 0;
6670 }
6671
6672 /* A default __setstate__. First see whether state embeds a
6673 * slot state dict too (a proto 2 addition).
6674 */
Serhiy Storchakafff9a312017-03-21 08:53:25 +02006675 if (PyTuple_Check(state) && PyTuple_GET_SIZE(state) == 2) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006676 PyObject *tmp = state;
6677
6678 state = PyTuple_GET_ITEM(tmp, 0);
6679 slotstate = PyTuple_GET_ITEM(tmp, 1);
6680 Py_INCREF(state);
6681 Py_INCREF(slotstate);
6682 Py_DECREF(tmp);
6683 }
6684 else
6685 slotstate = NULL;
6686
6687 /* Set inst.__dict__ from the state dict (if any). */
6688 if (state != Py_None) {
6689 PyObject *dict;
Antoine Pitroua9f48a02009-05-02 21:41:14 +00006690 PyObject *d_key, *d_value;
6691 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02006692 _Py_IDENTIFIER(__dict__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006693
6694 if (!PyDict_Check(state)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006695 PickleState *st = _Pickle_GetGlobalState();
6696 PyErr_SetString(st->UnpicklingError, "state is not a dictionary");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006697 goto error;
6698 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02006699 dict = _PyObject_GetAttrId(inst, &PyId___dict__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006700 if (dict == NULL)
6701 goto error;
6702
Antoine Pitroua9f48a02009-05-02 21:41:14 +00006703 i = 0;
6704 while (PyDict_Next(state, &i, &d_key, &d_value)) {
6705 /* normally the keys for instance attributes are
6706 interned. we should try to do that here. */
6707 Py_INCREF(d_key);
6708 if (PyUnicode_CheckExact(d_key))
6709 PyUnicode_InternInPlace(&d_key);
6710 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
6711 Py_DECREF(d_key);
6712 goto error;
6713 }
6714 Py_DECREF(d_key);
6715 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006716 Py_DECREF(dict);
6717 }
6718
6719 /* Also set instance attributes from the slotstate dict (if any). */
6720 if (slotstate != NULL) {
6721 PyObject *d_key, *d_value;
6722 Py_ssize_t i;
6723
6724 if (!PyDict_Check(slotstate)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006725 PickleState *st = _Pickle_GetGlobalState();
6726 PyErr_SetString(st->UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006727 "slot state is not a dictionary");
6728 goto error;
6729 }
6730 i = 0;
6731 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
6732 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
6733 goto error;
6734 }
6735 }
6736
6737 if (0) {
6738 error:
6739 status = -1;
6740 }
6741
6742 Py_DECREF(state);
6743 Py_XDECREF(slotstate);
6744 return status;
6745}
6746
6747static int
6748load_mark(UnpicklerObject *self)
6749{
6750
6751 /* Note that we split the (pickle.py) stack into two stacks, an
6752 * object stack and a mark stack. Here we push a mark onto the
6753 * mark stack.
6754 */
6755
Sergey Fedoseev86b89912018-08-25 12:54:40 +05006756 if (self->num_marks >= self->marks_size) {
Sergey Fedoseev90555ec2018-08-25 15:41:58 +05006757 size_t alloc = ((size_t)self->num_marks << 1) + 20;
6758 Py_ssize_t *marks_new = self->marks;
6759 PyMem_RESIZE(marks_new, Py_ssize_t, alloc);
6760 if (marks_new == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006761 PyErr_NoMemory();
6762 return -1;
6763 }
Sergey Fedoseev90555ec2018-08-25 15:41:58 +05006764 self->marks = marks_new;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006765 self->marks_size = (Py_ssize_t)alloc;
6766 }
6767
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006768 self->stack->mark_set = 1;
6769 self->marks[self->num_marks++] = self->stack->fence = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006770
6771 return 0;
6772}
6773
6774static int
6775load_reduce(UnpicklerObject *self)
6776{
6777 PyObject *callable = NULL;
6778 PyObject *argtup = NULL;
6779 PyObject *obj = NULL;
6780
6781 PDATA_POP(self->stack, argtup);
6782 if (argtup == NULL)
6783 return -1;
6784 PDATA_POP(self->stack, callable);
6785 if (callable) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00006786 obj = PyObject_CallObject(callable, argtup);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006787 Py_DECREF(callable);
6788 }
6789 Py_DECREF(argtup);
6790
6791 if (obj == NULL)
6792 return -1;
6793
6794 PDATA_PUSH(self->stack, obj, -1);
6795 return 0;
6796}
6797
6798/* Just raises an error if we don't know the protocol specified. PROTO
6799 * is the first opcode for protocols >= 2.
6800 */
6801static int
6802load_proto(UnpicklerObject *self)
6803{
6804 char *s;
6805 int i;
6806
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006807 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006808 return -1;
6809
6810 i = (unsigned char)s[0];
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006811 if (i <= HIGHEST_PROTOCOL) {
6812 self->proto = i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006813 return 0;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006814 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006815
6816 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
6817 return -1;
6818}
6819
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08006820static int
6821load_frame(UnpicklerObject *self)
6822{
6823 char *s;
6824 Py_ssize_t frame_len;
6825
6826 if (_Unpickler_Read(self, &s, 8) < 0)
6827 return -1;
6828
6829 frame_len = calc_binsize(s, 8);
6830 if (frame_len < 0) {
6831 PyErr_Format(PyExc_OverflowError,
6832 "FRAME length exceeds system's maximum of %zd bytes",
6833 PY_SSIZE_T_MAX);
6834 return -1;
6835 }
6836
6837 if (_Unpickler_Read(self, &s, frame_len) < 0)
6838 return -1;
6839
6840 /* Rewind to start of frame */
6841 self->next_read_idx -= frame_len;
6842 return 0;
6843}
6844
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006845static PyObject *
6846load(UnpicklerObject *self)
6847{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006848 PyObject *value = NULL;
Christian Heimes27ea78b2014-01-27 01:03:53 +01006849 char *s = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006850
6851 self->num_marks = 0;
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006852 self->stack->mark_set = 0;
6853 self->stack->fence = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006854 self->proto = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006855 if (Py_SIZE(self->stack))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006856 Pdata_clear(self->stack, 0);
6857
6858 /* Convenient macros for the dispatch while-switch loop just below. */
6859#define OP(opcode, load_func) \
6860 case opcode: if (load_func(self) < 0) break; continue;
6861
6862#define OP_ARG(opcode, load_func, arg) \
6863 case opcode: if (load_func(self, (arg)) < 0) break; continue;
6864
6865 while (1) {
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03006866 if (_Unpickler_Read(self, &s, 1) < 0) {
6867 PickleState *st = _Pickle_GetGlobalState();
6868 if (PyErr_ExceptionMatches(st->UnpicklingError)) {
6869 PyErr_Format(PyExc_EOFError, "Ran out of input");
6870 }
6871 return NULL;
6872 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006873
6874 switch ((enum opcode)s[0]) {
6875 OP(NONE, load_none)
6876 OP(BININT, load_binint)
6877 OP(BININT1, load_binint1)
6878 OP(BININT2, load_binint2)
6879 OP(INT, load_int)
6880 OP(LONG, load_long)
6881 OP_ARG(LONG1, load_counted_long, 1)
6882 OP_ARG(LONG4, load_counted_long, 4)
6883 OP(FLOAT, load_float)
6884 OP(BINFLOAT, load_binfloat)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006885 OP_ARG(SHORT_BINBYTES, load_counted_binbytes, 1)
6886 OP_ARG(BINBYTES, load_counted_binbytes, 4)
6887 OP_ARG(BINBYTES8, load_counted_binbytes, 8)
Antoine Pitrou91f43802019-05-26 17:10:09 +02006888 OP(BYTEARRAY8, load_counted_bytearray)
6889 OP(NEXT_BUFFER, load_next_buffer)
6890 OP(READONLY_BUFFER, load_readonly_buffer)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006891 OP_ARG(SHORT_BINSTRING, load_counted_binstring, 1)
6892 OP_ARG(BINSTRING, load_counted_binstring, 4)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006893 OP(STRING, load_string)
6894 OP(UNICODE, load_unicode)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006895 OP_ARG(SHORT_BINUNICODE, load_counted_binunicode, 1)
6896 OP_ARG(BINUNICODE, load_counted_binunicode, 4)
6897 OP_ARG(BINUNICODE8, load_counted_binunicode, 8)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006898 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
6899 OP_ARG(TUPLE1, load_counted_tuple, 1)
6900 OP_ARG(TUPLE2, load_counted_tuple, 2)
6901 OP_ARG(TUPLE3, load_counted_tuple, 3)
6902 OP(TUPLE, load_tuple)
6903 OP(EMPTY_LIST, load_empty_list)
6904 OP(LIST, load_list)
6905 OP(EMPTY_DICT, load_empty_dict)
6906 OP(DICT, load_dict)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006907 OP(EMPTY_SET, load_empty_set)
6908 OP(ADDITEMS, load_additems)
6909 OP(FROZENSET, load_frozenset)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006910 OP(OBJ, load_obj)
6911 OP(INST, load_inst)
Serhiy Storchakab4c98ed2020-07-18 11:11:21 +03006912 OP_ARG(NEWOBJ, load_newobj, 0)
6913 OP_ARG(NEWOBJ_EX, load_newobj, 1)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006914 OP(GLOBAL, load_global)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006915 OP(STACK_GLOBAL, load_stack_global)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006916 OP(APPEND, load_append)
6917 OP(APPENDS, load_appends)
6918 OP(BUILD, load_build)
6919 OP(DUP, load_dup)
6920 OP(BINGET, load_binget)
6921 OP(LONG_BINGET, load_long_binget)
6922 OP(GET, load_get)
6923 OP(MARK, load_mark)
6924 OP(BINPUT, load_binput)
6925 OP(LONG_BINPUT, load_long_binput)
6926 OP(PUT, load_put)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006927 OP(MEMOIZE, load_memoize)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006928 OP(POP, load_pop)
6929 OP(POP_MARK, load_pop_mark)
6930 OP(SETITEM, load_setitem)
6931 OP(SETITEMS, load_setitems)
6932 OP(PERSID, load_persid)
6933 OP(BINPERSID, load_binpersid)
6934 OP(REDUCE, load_reduce)
6935 OP(PROTO, load_proto)
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08006936 OP(FRAME, load_frame)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006937 OP_ARG(EXT1, load_extension, 1)
6938 OP_ARG(EXT2, load_extension, 2)
6939 OP_ARG(EXT4, load_extension, 4)
6940 OP_ARG(NEWTRUE, load_bool, Py_True)
6941 OP_ARG(NEWFALSE, load_bool, Py_False)
6942
6943 case STOP:
6944 break;
6945
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006946 default:
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03006947 {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006948 PickleState *st = _Pickle_GetGlobalState();
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03006949 unsigned char c = (unsigned char) *s;
6950 if (0x20 <= c && c <= 0x7e && c != '\'' && c != '\\') {
6951 PyErr_Format(st->UnpicklingError,
6952 "invalid load key, '%c'.", c);
6953 }
6954 else {
6955 PyErr_Format(st->UnpicklingError,
6956 "invalid load key, '\\x%02x'.", c);
6957 }
6958 return NULL;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006959 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006960 }
6961
6962 break; /* and we are done! */
6963 }
6964
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08006965 if (PyErr_Occurred()) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006966 return NULL;
6967 }
6968
Victor Stinner2ae57e32013-10-31 13:39:23 +01006969 if (_Unpickler_SkipConsumed(self) < 0)
6970 return NULL;
6971
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006972 PDATA_POP(self->stack, value);
6973 return value;
6974}
6975
Larry Hastings61272b72014-01-07 12:41:53 -08006976/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006977
6978_pickle.Unpickler.load
6979
6980Load a pickle.
6981
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08006982Read a pickled object representation from the open file object given
6983in the constructor, and return the reconstituted object hierarchy
6984specified therein.
Larry Hastings61272b72014-01-07 12:41:53 -08006985[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006986
Larry Hastings3cceb382014-01-04 11:09:09 -08006987static PyObject *
Larry Hastingsc2047262014-01-25 20:43:29 -08006988_pickle_Unpickler_load_impl(UnpicklerObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08006989/*[clinic end generated code: output=fdcc488aad675b14 input=acbb91a42fa9b7b9]*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006990{
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006991 UnpicklerObject *unpickler = (UnpicklerObject*)self;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006992
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006993 /* Check whether the Unpickler was initialized correctly. This prevents
6994 segfaulting if a subclass overridden __init__ with a function that does
6995 not call Unpickler.__init__(). Here, we simply ensure that self->read
6996 is not NULL. */
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006997 if (unpickler->read == NULL) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006998 PickleState *st = _Pickle_GetGlobalState();
6999 PyErr_Format(st->UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007000 "Unpickler.__init__() was not called by %s.__init__()",
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007001 Py_TYPE(unpickler)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007002 return NULL;
7003 }
7004
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007005 return load(unpickler);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007006}
7007
7008/* The name of find_class() is misleading. In newer pickle protocols, this
7009 function is used for loading any global (i.e., functions), not just
7010 classes. The name is kept only for backward compatibility. */
7011
Larry Hastings61272b72014-01-07 12:41:53 -08007012/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007013
7014_pickle.Unpickler.find_class
7015
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007016 module_name: object
7017 global_name: object
7018 /
7019
7020Return an object from a specified module.
7021
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007022If necessary, the module will be imported. Subclasses may override
7023this method (e.g. to restrict unpickling of arbitrary classes and
7024functions).
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007025
7026This method is called whenever a class or a function object is
7027needed. Both arguments passed are str objects.
Larry Hastings61272b72014-01-07 12:41:53 -08007028[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007029
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007030static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04007031_pickle_Unpickler_find_class_impl(UnpicklerObject *self,
7032 PyObject *module_name,
7033 PyObject *global_name)
7034/*[clinic end generated code: output=becc08d7f9ed41e3 input=e2e6a865de093ef4]*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007035{
7036 PyObject *global;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007037 PyObject *module;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007038
Steve Dowerb82e17e2019-05-23 08:45:22 -07007039 if (PySys_Audit("pickle.find_class", "OO",
7040 module_name, global_name) < 0) {
7041 return NULL;
7042 }
7043
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007044 /* Try to map the old names used in Python 2.x to the new ones used in
7045 Python 3.x. We do this only with old pickle protocols and when the
7046 user has not disabled the feature. */
7047 if (self->proto < 3 && self->fix_imports) {
7048 PyObject *key;
7049 PyObject *item;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007050 PickleState *st = _Pickle_GetGlobalState();
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007051
7052 /* Check if the global (i.e., a function or a class) was renamed
7053 or moved to another module. */
7054 key = PyTuple_Pack(2, module_name, global_name);
7055 if (key == NULL)
7056 return NULL;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007057 item = PyDict_GetItemWithError(st->name_mapping_2to3, key);
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007058 Py_DECREF(key);
7059 if (item) {
7060 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
7061 PyErr_Format(PyExc_RuntimeError,
7062 "_compat_pickle.NAME_MAPPING values should be "
7063 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
7064 return NULL;
7065 }
7066 module_name = PyTuple_GET_ITEM(item, 0);
7067 global_name = PyTuple_GET_ITEM(item, 1);
7068 if (!PyUnicode_Check(module_name) ||
7069 !PyUnicode_Check(global_name)) {
7070 PyErr_Format(PyExc_RuntimeError,
7071 "_compat_pickle.NAME_MAPPING values should be "
7072 "pairs of str, not (%.200s, %.200s)",
7073 Py_TYPE(module_name)->tp_name,
7074 Py_TYPE(global_name)->tp_name);
7075 return NULL;
7076 }
7077 }
7078 else if (PyErr_Occurred()) {
7079 return NULL;
7080 }
Serhiy Storchakabfe18242015-03-31 13:12:37 +03007081 else {
7082 /* Check if the module was renamed. */
7083 item = PyDict_GetItemWithError(st->import_mapping_2to3, module_name);
7084 if (item) {
7085 if (!PyUnicode_Check(item)) {
7086 PyErr_Format(PyExc_RuntimeError,
7087 "_compat_pickle.IMPORT_MAPPING values should be "
7088 "strings, not %.200s", Py_TYPE(item)->tp_name);
7089 return NULL;
7090 }
7091 module_name = item;
7092 }
7093 else if (PyErr_Occurred()) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007094 return NULL;
7095 }
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007096 }
7097 }
7098
tjb9004371c0a2019-02-18 23:30:51 +08007099 /*
7100 * we don't use PyImport_GetModule here, because it can return partially-
7101 * initialised modules, which then cause the getattribute to fail.
7102 */
7103 module = PyImport_Import(module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007104 if (module == NULL) {
tjb9004371c0a2019-02-18 23:30:51 +08007105 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007106 }
Eric Snow3f9eee62017-09-15 16:35:20 -06007107 global = getattribute(module, global_name, self->proto >= 4);
7108 Py_DECREF(module);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007109 return global;
7110}
7111
Serhiy Storchaka5bbd2312014-12-16 19:39:08 +02007112/*[clinic input]
7113
7114_pickle.Unpickler.__sizeof__ -> Py_ssize_t
7115
7116Returns size in memory, in bytes.
7117[clinic start generated code]*/
7118
7119static Py_ssize_t
7120_pickle_Unpickler___sizeof___impl(UnpicklerObject *self)
7121/*[clinic end generated code: output=119d9d03ad4c7651 input=13333471fdeedf5e]*/
7122{
7123 Py_ssize_t res;
7124
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +02007125 res = _PyObject_SIZE(Py_TYPE(self));
Serhiy Storchaka5bbd2312014-12-16 19:39:08 +02007126 if (self->memo != NULL)
7127 res += self->memo_size * sizeof(PyObject *);
7128 if (self->marks != NULL)
7129 res += self->marks_size * sizeof(Py_ssize_t);
7130 if (self->input_line != NULL)
7131 res += strlen(self->input_line) + 1;
7132 if (self->encoding != NULL)
7133 res += strlen(self->encoding) + 1;
7134 if (self->errors != NULL)
7135 res += strlen(self->errors) + 1;
7136 return res;
7137}
7138
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007139static struct PyMethodDef Unpickler_methods[] = {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007140 _PICKLE_UNPICKLER_LOAD_METHODDEF
7141 _PICKLE_UNPICKLER_FIND_CLASS_METHODDEF
Serhiy Storchaka5bbd2312014-12-16 19:39:08 +02007142 _PICKLE_UNPICKLER___SIZEOF___METHODDEF
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007143 {NULL, NULL} /* sentinel */
7144};
7145
7146static void
7147Unpickler_dealloc(UnpicklerObject *self)
7148{
7149 PyObject_GC_UnTrack((PyObject *)self);
7150 Py_XDECREF(self->readline);
Antoine Pitrou91f43802019-05-26 17:10:09 +02007151 Py_XDECREF(self->readinto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007152 Py_XDECREF(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00007153 Py_XDECREF(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007154 Py_XDECREF(self->stack);
7155 Py_XDECREF(self->pers_func);
Antoine Pitrou91f43802019-05-26 17:10:09 +02007156 Py_XDECREF(self->buffers);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007157 if (self->buffer.buf != NULL) {
7158 PyBuffer_Release(&self->buffer);
7159 self->buffer.buf = NULL;
7160 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007161
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007162 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007163 PyMem_Free(self->marks);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007164 PyMem_Free(self->input_line);
Victor Stinner49fc8ec2013-07-07 23:30:24 +02007165 PyMem_Free(self->encoding);
7166 PyMem_Free(self->errors);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007167
7168 Py_TYPE(self)->tp_free((PyObject *)self);
7169}
7170
7171static int
7172Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
7173{
7174 Py_VISIT(self->readline);
Antoine Pitrou91f43802019-05-26 17:10:09 +02007175 Py_VISIT(self->readinto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007176 Py_VISIT(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00007177 Py_VISIT(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007178 Py_VISIT(self->stack);
7179 Py_VISIT(self->pers_func);
Antoine Pitrou91f43802019-05-26 17:10:09 +02007180 Py_VISIT(self->buffers);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007181 return 0;
7182}
7183
7184static int
7185Unpickler_clear(UnpicklerObject *self)
7186{
7187 Py_CLEAR(self->readline);
Antoine Pitrou91f43802019-05-26 17:10:09 +02007188 Py_CLEAR(self->readinto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007189 Py_CLEAR(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00007190 Py_CLEAR(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007191 Py_CLEAR(self->stack);
7192 Py_CLEAR(self->pers_func);
Antoine Pitrou91f43802019-05-26 17:10:09 +02007193 Py_CLEAR(self->buffers);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007194 if (self->buffer.buf != NULL) {
7195 PyBuffer_Release(&self->buffer);
7196 self->buffer.buf = NULL;
7197 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007198
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007199 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007200 PyMem_Free(self->marks);
7201 self->marks = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007202 PyMem_Free(self->input_line);
7203 self->input_line = NULL;
Victor Stinner49fc8ec2013-07-07 23:30:24 +02007204 PyMem_Free(self->encoding);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007205 self->encoding = NULL;
Victor Stinner49fc8ec2013-07-07 23:30:24 +02007206 PyMem_Free(self->errors);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007207 self->errors = NULL;
7208
7209 return 0;
7210}
7211
Larry Hastings61272b72014-01-07 12:41:53 -08007212/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007213
7214_pickle.Unpickler.__init__
7215
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007216 file: object
7217 *
7218 fix_imports: bool = True
7219 encoding: str = 'ASCII'
7220 errors: str = 'strict'
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007221 buffers: object(c_default="NULL") = ()
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007222
7223This takes a binary file for reading a pickle data stream.
7224
7225The protocol version of the pickle is detected automatically, so no
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007226protocol argument is needed. Bytes past the pickled object's
7227representation are ignored.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007228
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007229The argument *file* must have two methods, a read() method that takes
7230an integer argument, and a readline() method that requires no
7231arguments. Both methods should return bytes. Thus *file* can be a
Martin Panter7462b6492015-11-02 03:37:02 +00007232binary file object opened for reading, an io.BytesIO object, or any
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007233other custom object that meets this interface.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007234
7235Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
Martin Panter46f50722016-05-26 05:35:26 +00007236which are used to control compatibility support for pickle stream
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007237generated by Python 2. If *fix_imports* is True, pickle will try to
7238map the old Python 2 names to the new names used in Python 3. The
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007239*encoding* and *errors* tell pickle how to decode 8-bit string
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007240instances pickled by Python 2; these default to 'ASCII' and 'strict',
7241respectively. The *encoding* can be 'bytes' to read these 8-bit
7242string instances as bytes objects.
Larry Hastings61272b72014-01-07 12:41:53 -08007243[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007244
Larry Hastingsb7ccb202014-01-18 23:50:21 -08007245static int
Larry Hastings89964c42015-04-14 18:07:59 -04007246_pickle_Unpickler___init___impl(UnpicklerObject *self, PyObject *file,
7247 int fix_imports, const char *encoding,
Antoine Pitrou91f43802019-05-26 17:10:09 +02007248 const char *errors, PyObject *buffers)
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007249/*[clinic end generated code: output=09f0192649ea3f85 input=ca4c1faea9553121]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007250{
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02007251 _Py_IDENTIFIER(persistent_load);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007252
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007253 /* In case of multiple __init__() calls, clear previous content. */
7254 if (self->read != NULL)
7255 (void)Unpickler_clear(self);
7256
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007257 if (_Unpickler_SetInputStream(self, file) < 0)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08007258 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007259
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007260 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08007261 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007262
Antoine Pitrou91f43802019-05-26 17:10:09 +02007263 if (_Unpickler_SetBuffers(self, buffers) < 0)
7264 return -1;
7265
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007266 self->fix_imports = fix_imports;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007267
Serhiy Storchaka986375e2017-11-30 22:48:31 +02007268 if (init_method_ref((PyObject *)self, &PyId_persistent_load,
7269 &self->pers_func, &self->pers_func_self) < 0)
7270 {
7271 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007272 }
7273
7274 self->stack = (Pdata *)Pdata_New();
7275 if (self->stack == NULL)
Zackery Spytz4b430e52018-09-28 23:48:46 -06007276 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007277
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007278 self->memo_size = 32;
7279 self->memo = _Unpickler_NewMemo(self->memo_size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007280 if (self->memo == NULL)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08007281 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007282
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007283 self->proto = 0;
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00007284
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007285 return 0;
7286}
7287
Larry Hastingsb7ccb202014-01-18 23:50:21 -08007288
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007289/* Define a proxy object for the Unpickler's internal memo object. This is to
7290 * avoid breaking code like:
7291 * unpickler.memo.clear()
7292 * and
7293 * unpickler.memo = saved_memo
7294 * Is this a good idea? Not really, but we don't want to break code that uses
7295 * it. Note that we don't implement the entire mapping API here. This is
7296 * intentional, as these should be treated as black-box implementation details.
7297 *
7298 * We do, however, have to implement pickling/unpickling support because of
Victor Stinner121aab42011-09-29 23:40:53 +02007299 * real-world code like cvs2svn.
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007300 */
7301
Larry Hastings61272b72014-01-07 12:41:53 -08007302/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007303_pickle.UnpicklerMemoProxy.clear
7304
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007305Remove all items from memo.
Larry Hastings61272b72014-01-07 12:41:53 -08007306[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007307
Larry Hastings3cceb382014-01-04 11:09:09 -08007308static PyObject *
7309_pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08007310/*[clinic end generated code: output=d20cd43f4ba1fb1f input=b1df7c52e7afd9bd]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007311{
7312 _Unpickler_MemoCleanup(self->unpickler);
7313 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
7314 if (self->unpickler->memo == NULL)
7315 return NULL;
7316 Py_RETURN_NONE;
7317}
7318
Larry Hastings61272b72014-01-07 12:41:53 -08007319/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007320_pickle.UnpicklerMemoProxy.copy
7321
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007322Copy the memo to a new object.
Larry Hastings61272b72014-01-07 12:41:53 -08007323[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007324
Larry Hastings3cceb382014-01-04 11:09:09 -08007325static PyObject *
7326_pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08007327/*[clinic end generated code: output=e12af7e9bc1e4c77 input=97769247ce032c1d]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007328{
Benjamin Petersona4ae8282018-09-20 18:36:40 -07007329 size_t i;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007330 PyObject *new_memo = PyDict_New();
7331 if (new_memo == NULL)
7332 return NULL;
7333
7334 for (i = 0; i < self->unpickler->memo_size; i++) {
7335 int status;
7336 PyObject *key, *value;
7337
7338 value = self->unpickler->memo[i];
7339 if (value == NULL)
7340 continue;
7341
7342 key = PyLong_FromSsize_t(i);
7343 if (key == NULL)
7344 goto error;
7345 status = PyDict_SetItem(new_memo, key, value);
7346 Py_DECREF(key);
7347 if (status < 0)
7348 goto error;
7349 }
7350 return new_memo;
7351
7352error:
7353 Py_DECREF(new_memo);
7354 return NULL;
7355}
7356
Larry Hastings61272b72014-01-07 12:41:53 -08007357/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007358_pickle.UnpicklerMemoProxy.__reduce__
7359
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007360Implement pickling support.
Larry Hastings61272b72014-01-07 12:41:53 -08007361[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007362
Larry Hastings3cceb382014-01-04 11:09:09 -08007363static PyObject *
7364_pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08007365/*[clinic end generated code: output=6da34ac048d94cca input=6920862413407199]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007366{
7367 PyObject *reduce_value;
7368 PyObject *constructor_args;
Larry Hastings3cceb382014-01-04 11:09:09 -08007369 PyObject *contents = _pickle_UnpicklerMemoProxy_copy_impl(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007370 if (contents == NULL)
7371 return NULL;
7372
7373 reduce_value = PyTuple_New(2);
7374 if (reduce_value == NULL) {
7375 Py_DECREF(contents);
7376 return NULL;
7377 }
7378 constructor_args = PyTuple_New(1);
7379 if (constructor_args == NULL) {
7380 Py_DECREF(contents);
7381 Py_DECREF(reduce_value);
7382 return NULL;
7383 }
7384 PyTuple_SET_ITEM(constructor_args, 0, contents);
7385 Py_INCREF((PyObject *)&PyDict_Type);
7386 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
7387 PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
7388 return reduce_value;
7389}
7390
7391static PyMethodDef unpicklerproxy_methods[] = {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007392 _PICKLE_UNPICKLERMEMOPROXY_CLEAR_METHODDEF
7393 _PICKLE_UNPICKLERMEMOPROXY_COPY_METHODDEF
7394 _PICKLE_UNPICKLERMEMOPROXY___REDUCE___METHODDEF
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007395 {NULL, NULL} /* sentinel */
7396};
7397
7398static void
7399UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
7400{
7401 PyObject_GC_UnTrack(self);
7402 Py_XDECREF(self->unpickler);
7403 PyObject_GC_Del((PyObject *)self);
7404}
7405
7406static int
7407UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
7408 visitproc visit, void *arg)
7409{
7410 Py_VISIT(self->unpickler);
7411 return 0;
7412}
7413
7414static int
7415UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
7416{
7417 Py_CLEAR(self->unpickler);
7418 return 0;
7419}
7420
7421static PyTypeObject UnpicklerMemoProxyType = {
7422 PyVarObject_HEAD_INIT(NULL, 0)
7423 "_pickle.UnpicklerMemoProxy", /*tp_name*/
7424 sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/
7425 0,
7426 (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02007427 0, /* tp_vectorcall_offset */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007428 0, /* tp_getattr */
7429 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02007430 0, /* tp_as_async */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007431 0, /* tp_repr */
7432 0, /* tp_as_number */
7433 0, /* tp_as_sequence */
7434 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00007435 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007436 0, /* tp_call */
7437 0, /* tp_str */
7438 PyObject_GenericGetAttr, /* tp_getattro */
7439 PyObject_GenericSetAttr, /* tp_setattro */
7440 0, /* tp_as_buffer */
7441 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
7442 0, /* tp_doc */
7443 (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */
7444 (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */
7445 0, /* tp_richcompare */
7446 0, /* tp_weaklistoffset */
7447 0, /* tp_iter */
7448 0, /* tp_iternext */
7449 unpicklerproxy_methods, /* tp_methods */
7450};
7451
7452static PyObject *
7453UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
7454{
7455 UnpicklerMemoProxyObject *self;
7456
7457 self = PyObject_GC_New(UnpicklerMemoProxyObject,
7458 &UnpicklerMemoProxyType);
7459 if (self == NULL)
7460 return NULL;
7461 Py_INCREF(unpickler);
7462 self->unpickler = unpickler;
7463 PyObject_GC_Track(self);
7464 return (PyObject *)self;
7465}
7466
7467/*****************************************************************************/
7468
7469
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007470static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02007471Unpickler_get_memo(UnpicklerObject *self, void *Py_UNUSED(ignored))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007472{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007473 return UnpicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007474}
7475
7476static int
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02007477Unpickler_set_memo(UnpicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007478{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007479 PyObject **new_memo;
Benjamin Petersona4ae8282018-09-20 18:36:40 -07007480 size_t new_memo_size = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007481
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007482 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007483 PyErr_SetString(PyExc_TypeError,
7484 "attribute deletion is not supported");
7485 return -1;
7486 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007487
Andy Lesterdffe4c02020-03-04 07:15:20 -06007488 if (Py_IS_TYPE(obj, &UnpicklerMemoProxyType)) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007489 UnpicklerObject *unpickler =
7490 ((UnpicklerMemoProxyObject *)obj)->unpickler;
7491
7492 new_memo_size = unpickler->memo_size;
7493 new_memo = _Unpickler_NewMemo(new_memo_size);
7494 if (new_memo == NULL)
7495 return -1;
7496
Benjamin Petersona4ae8282018-09-20 18:36:40 -07007497 for (size_t i = 0; i < new_memo_size; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007498 Py_XINCREF(unpickler->memo[i]);
7499 new_memo[i] = unpickler->memo[i];
7500 }
7501 }
7502 else if (PyDict_Check(obj)) {
7503 Py_ssize_t i = 0;
7504 PyObject *key, *value;
7505
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +02007506 new_memo_size = PyDict_GET_SIZE(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007507 new_memo = _Unpickler_NewMemo(new_memo_size);
7508 if (new_memo == NULL)
7509 return -1;
7510
7511 while (PyDict_Next(obj, &i, &key, &value)) {
7512 Py_ssize_t idx;
7513 if (!PyLong_Check(key)) {
7514 PyErr_SetString(PyExc_TypeError,
7515 "memo key must be integers");
7516 goto error;
7517 }
7518 idx = PyLong_AsSsize_t(key);
7519 if (idx == -1 && PyErr_Occurred())
7520 goto error;
Christian Heimesa24b4d22013-07-01 15:17:45 +02007521 if (idx < 0) {
7522 PyErr_SetString(PyExc_ValueError,
Christian Heimes80878792013-07-01 15:23:39 +02007523 "memo key must be positive integers.");
Christian Heimesa24b4d22013-07-01 15:17:45 +02007524 goto error;
7525 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007526 if (_Unpickler_MemoPut(self, idx, value) < 0)
7527 goto error;
7528 }
7529 }
7530 else {
7531 PyErr_Format(PyExc_TypeError,
Serhiy Storchaka34fd4c22018-11-05 16:20:25 +02007532 "'memo' attribute must be an UnpicklerMemoProxy object "
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007533 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007534 return -1;
7535 }
7536
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007537 _Unpickler_MemoCleanup(self);
7538 self->memo_size = new_memo_size;
7539 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007540
7541 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007542
7543 error:
7544 if (new_memo_size) {
Benjamin Petersona4ae8282018-09-20 18:36:40 -07007545 for (size_t i = new_memo_size - 1; i != SIZE_MAX; i--) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007546 Py_XDECREF(new_memo[i]);
7547 }
Victor Stinner00d7abd2020-12-01 09:56:42 +01007548 PyMem_Free(new_memo);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007549 }
7550 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007551}
7552
7553static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02007554Unpickler_get_persload(UnpicklerObject *self, void *Py_UNUSED(ignored))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007555{
Serhiy Storchaka986375e2017-11-30 22:48:31 +02007556 if (self->pers_func == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007557 PyErr_SetString(PyExc_AttributeError, "persistent_load");
Serhiy Storchaka986375e2017-11-30 22:48:31 +02007558 return NULL;
7559 }
7560 return reconstruct_method(self->pers_func, self->pers_func_self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007561}
7562
7563static int
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02007564Unpickler_set_persload(UnpicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007565{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007566 if (value == NULL) {
7567 PyErr_SetString(PyExc_TypeError,
7568 "attribute deletion is not supported");
7569 return -1;
7570 }
7571 if (!PyCallable_Check(value)) {
7572 PyErr_SetString(PyExc_TypeError,
7573 "persistent_load must be a callable taking "
7574 "one argument");
7575 return -1;
7576 }
7577
Serhiy Storchaka986375e2017-11-30 22:48:31 +02007578 self->pers_func_self = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007579 Py_INCREF(value);
Serhiy Storchakaec397562016-04-06 09:50:03 +03007580 Py_XSETREF(self->pers_func, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007581
7582 return 0;
7583}
7584
7585static PyGetSetDef Unpickler_getsets[] = {
7586 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
7587 {"persistent_load", (getter)Unpickler_get_persload,
7588 (setter)Unpickler_set_persload},
7589 {NULL}
7590};
7591
7592static PyTypeObject Unpickler_Type = {
7593 PyVarObject_HEAD_INIT(NULL, 0)
7594 "_pickle.Unpickler", /*tp_name*/
7595 sizeof(UnpicklerObject), /*tp_basicsize*/
7596 0, /*tp_itemsize*/
7597 (destructor)Unpickler_dealloc, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02007598 0, /*tp_vectorcall_offset*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007599 0, /*tp_getattr*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007600 0, /*tp_setattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02007601 0, /*tp_as_async*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007602 0, /*tp_repr*/
7603 0, /*tp_as_number*/
7604 0, /*tp_as_sequence*/
7605 0, /*tp_as_mapping*/
7606 0, /*tp_hash*/
7607 0, /*tp_call*/
7608 0, /*tp_str*/
7609 0, /*tp_getattro*/
7610 0, /*tp_setattro*/
7611 0, /*tp_as_buffer*/
7612 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007613 _pickle_Unpickler___init____doc__, /*tp_doc*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007614 (traverseproc)Unpickler_traverse, /*tp_traverse*/
7615 (inquiry)Unpickler_clear, /*tp_clear*/
7616 0, /*tp_richcompare*/
7617 0, /*tp_weaklistoffset*/
7618 0, /*tp_iter*/
7619 0, /*tp_iternext*/
7620 Unpickler_methods, /*tp_methods*/
7621 0, /*tp_members*/
7622 Unpickler_getsets, /*tp_getset*/
7623 0, /*tp_base*/
7624 0, /*tp_dict*/
7625 0, /*tp_descr_get*/
7626 0, /*tp_descr_set*/
7627 0, /*tp_dictoffset*/
Larry Hastingsb7ccb202014-01-18 23:50:21 -08007628 _pickle_Unpickler___init__, /*tp_init*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007629 PyType_GenericAlloc, /*tp_alloc*/
7630 PyType_GenericNew, /*tp_new*/
7631 PyObject_GC_Del, /*tp_free*/
7632 0, /*tp_is_gc*/
7633};
7634
Larry Hastings61272b72014-01-07 12:41:53 -08007635/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007636
7637_pickle.dump
7638
7639 obj: object
7640 file: object
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007641 protocol: object = None
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007642 *
7643 fix_imports: bool = True
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007644 buffer_callback: object = None
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007645
7646Write a pickled representation of obj to the open file object file.
7647
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007648This is equivalent to ``Pickler(file, protocol).dump(obj)``, but may
7649be more efficient.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007650
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007651The optional *protocol* argument tells the pickler to use the given
Mark Dickinsone9652e82020-01-24 10:03:22 +00007652protocol; supported protocols are 0, 1, 2, 3, 4 and 5. The default
7653protocol is 4. It was introduced in Python 3.4, and is incompatible
Łukasz Langac51d8c92018-04-03 23:06:53 -07007654with previous versions.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007655
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007656Specifying a negative protocol version selects the highest protocol
7657version supported. The higher the protocol used, the more recent the
7658version of Python needed to read the pickle produced.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007659
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007660The *file* argument must have a write() method that accepts a single
7661bytes argument. It can thus be a file object opened for binary
Martin Panter7462b6492015-11-02 03:37:02 +00007662writing, an io.BytesIO instance, or any other custom object that meets
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007663this interface.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007664
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007665If *fix_imports* is True and protocol is less than 3, pickle will try
7666to map the new Python 3 names to the old module names used in Python
76672, so that the pickle data stream is readable with Python 2.
Antoine Pitrou91f43802019-05-26 17:10:09 +02007668
7669If *buffer_callback* is None (the default), buffer views are serialized
7670into *file* as part of the pickle stream. It is an error if
7671*buffer_callback* is not None and *protocol* is None or smaller than 5.
7672
Larry Hastings61272b72014-01-07 12:41:53 -08007673[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007674
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007675static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03007676_pickle_dump_impl(PyObject *module, PyObject *obj, PyObject *file,
Antoine Pitrou91f43802019-05-26 17:10:09 +02007677 PyObject *protocol, int fix_imports,
7678 PyObject *buffer_callback)
Mark Dickinsone9652e82020-01-24 10:03:22 +00007679/*[clinic end generated code: output=706186dba996490c input=5ed6653da99cd97c]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007680{
7681 PicklerObject *pickler = _Pickler_New();
7682
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007683 if (pickler == NULL)
7684 return NULL;
7685
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007686 if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007687 goto error;
7688
7689 if (_Pickler_SetOutputStream(pickler, file) < 0)
7690 goto error;
7691
Antoine Pitrou91f43802019-05-26 17:10:09 +02007692 if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
7693 goto error;
7694
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007695 if (dump(pickler, obj) < 0)
7696 goto error;
7697
7698 if (_Pickler_FlushToFile(pickler) < 0)
7699 goto error;
7700
7701 Py_DECREF(pickler);
7702 Py_RETURN_NONE;
7703
7704 error:
7705 Py_XDECREF(pickler);
7706 return NULL;
7707}
7708
Larry Hastings61272b72014-01-07 12:41:53 -08007709/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007710
7711_pickle.dumps
7712
7713 obj: object
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007714 protocol: object = None
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007715 *
7716 fix_imports: bool = True
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007717 buffer_callback: object = None
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007718
7719Return the pickled representation of the object as a bytes object.
7720
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007721The optional *protocol* argument tells the pickler to use the given
Mark Dickinsone9652e82020-01-24 10:03:22 +00007722protocol; supported protocols are 0, 1, 2, 3, 4 and 5. The default
7723protocol is 4. It was introduced in Python 3.4, and is incompatible
Łukasz Langac51d8c92018-04-03 23:06:53 -07007724with previous versions.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007725
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007726Specifying a negative protocol version selects the highest protocol
7727version supported. The higher the protocol used, the more recent the
7728version of Python needed to read the pickle produced.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007729
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007730If *fix_imports* is True and *protocol* is less than 3, pickle will
7731try to map the new Python 3 names to the old module names used in
7732Python 2, so that the pickle data stream is readable with Python 2.
Antoine Pitrou91f43802019-05-26 17:10:09 +02007733
7734If *buffer_callback* is None (the default), buffer views are serialized
7735into *file* as part of the pickle stream. It is an error if
7736*buffer_callback* is not None and *protocol* is None or smaller than 5.
7737
Larry Hastings61272b72014-01-07 12:41:53 -08007738[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007739
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007740static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03007741_pickle_dumps_impl(PyObject *module, PyObject *obj, PyObject *protocol,
Antoine Pitrou91f43802019-05-26 17:10:09 +02007742 int fix_imports, PyObject *buffer_callback)
Mark Dickinsone9652e82020-01-24 10:03:22 +00007743/*[clinic end generated code: output=fbab0093a5580fdf input=e543272436c6f987]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007744{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007745 PyObject *result;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007746 PicklerObject *pickler = _Pickler_New();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007747
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007748 if (pickler == NULL)
7749 return NULL;
7750
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007751 if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007752 goto error;
7753
Antoine Pitrou91f43802019-05-26 17:10:09 +02007754 if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
7755 goto error;
7756
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007757 if (dump(pickler, obj) < 0)
7758 goto error;
7759
7760 result = _Pickler_GetString(pickler);
7761 Py_DECREF(pickler);
7762 return result;
7763
7764 error:
7765 Py_XDECREF(pickler);
7766 return NULL;
7767}
7768
Larry Hastings61272b72014-01-07 12:41:53 -08007769/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007770
7771_pickle.load
7772
7773 file: object
7774 *
7775 fix_imports: bool = True
7776 encoding: str = 'ASCII'
7777 errors: str = 'strict'
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007778 buffers: object(c_default="NULL") = ()
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007779
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007780Read and return an object from the pickle data stored in a file.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007781
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007782This is equivalent to ``Unpickler(file).load()``, but may be more
7783efficient.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007784
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007785The protocol version of the pickle is detected automatically, so no
7786protocol argument is needed. Bytes past the pickled object's
7787representation are ignored.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007788
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007789The argument *file* must have two methods, a read() method that takes
7790an integer argument, and a readline() method that requires no
7791arguments. Both methods should return bytes. Thus *file* can be a
Martin Panter7462b6492015-11-02 03:37:02 +00007792binary file object opened for reading, an io.BytesIO object, or any
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007793other custom object that meets this interface.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007794
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007795Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
Martin Panter46f50722016-05-26 05:35:26 +00007796which are used to control compatibility support for pickle stream
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007797generated by Python 2. If *fix_imports* is True, pickle will try to
7798map the old Python 2 names to the new names used in Python 3. The
7799*encoding* and *errors* tell pickle how to decode 8-bit string
7800instances pickled by Python 2; these default to 'ASCII' and 'strict',
7801respectively. The *encoding* can be 'bytes' to read these 8-bit
7802string instances as bytes objects.
Larry Hastings61272b72014-01-07 12:41:53 -08007803[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007804
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007805static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03007806_pickle_load_impl(PyObject *module, PyObject *file, int fix_imports,
Antoine Pitrou91f43802019-05-26 17:10:09 +02007807 const char *encoding, const char *errors,
7808 PyObject *buffers)
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007809/*[clinic end generated code: output=250452d141c23e76 input=46c7c31c92f4f371]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007810{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007811 PyObject *result;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007812 UnpicklerObject *unpickler = _Unpickler_New();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007813
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007814 if (unpickler == NULL)
7815 return NULL;
7816
7817 if (_Unpickler_SetInputStream(unpickler, file) < 0)
7818 goto error;
7819
7820 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7821 goto error;
7822
Antoine Pitrou91f43802019-05-26 17:10:09 +02007823 if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
7824 goto error;
7825
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007826 unpickler->fix_imports = fix_imports;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007827
7828 result = load(unpickler);
7829 Py_DECREF(unpickler);
7830 return result;
7831
7832 error:
7833 Py_XDECREF(unpickler);
7834 return NULL;
7835}
7836
Larry Hastings61272b72014-01-07 12:41:53 -08007837/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007838
7839_pickle.loads
7840
7841 data: object
Serhiy Storchaka531d1e52020-05-02 09:38:01 +03007842 /
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007843 *
7844 fix_imports: bool = True
7845 encoding: str = 'ASCII'
7846 errors: str = 'strict'
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007847 buffers: object(c_default="NULL") = ()
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007848
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007849Read and return an object from the given pickle data.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007850
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007851The protocol version of the pickle is detected automatically, so no
7852protocol argument is needed. Bytes past the pickled object's
7853representation are ignored.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007854
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007855Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
Martin Panter46f50722016-05-26 05:35:26 +00007856which are used to control compatibility support for pickle stream
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007857generated by Python 2. If *fix_imports* is True, pickle will try to
7858map the old Python 2 names to the new names used in Python 3. The
7859*encoding* and *errors* tell pickle how to decode 8-bit string
7860instances pickled by Python 2; these default to 'ASCII' and 'strict',
7861respectively. The *encoding* can be 'bytes' to read these 8-bit
7862string instances as bytes objects.
Larry Hastings61272b72014-01-07 12:41:53 -08007863[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007864
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007865static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03007866_pickle_loads_impl(PyObject *module, PyObject *data, int fix_imports,
Antoine Pitrou91f43802019-05-26 17:10:09 +02007867 const char *encoding, const char *errors,
7868 PyObject *buffers)
Serhiy Storchaka531d1e52020-05-02 09:38:01 +03007869/*[clinic end generated code: output=82ac1e6b588e6d02 input=b3615540d0535087]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007870{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007871 PyObject *result;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007872 UnpicklerObject *unpickler = _Unpickler_New();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007873
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007874 if (unpickler == NULL)
7875 return NULL;
7876
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007877 if (_Unpickler_SetStringInput(unpickler, data) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007878 goto error;
7879
7880 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7881 goto error;
7882
Antoine Pitrou91f43802019-05-26 17:10:09 +02007883 if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
7884 goto error;
7885
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007886 unpickler->fix_imports = fix_imports;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007887
7888 result = load(unpickler);
7889 Py_DECREF(unpickler);
7890 return result;
7891
7892 error:
7893 Py_XDECREF(unpickler);
7894 return NULL;
7895}
7896
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007897static struct PyMethodDef pickle_methods[] = {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007898 _PICKLE_DUMP_METHODDEF
7899 _PICKLE_DUMPS_METHODDEF
7900 _PICKLE_LOAD_METHODDEF
7901 _PICKLE_LOADS_METHODDEF
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007902 {NULL, NULL} /* sentinel */
7903};
7904
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007905static int
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007906pickle_clear(PyObject *m)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007907{
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007908 _Pickle_ClearState(_Pickle_GetState(m));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007909 return 0;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007910}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007911
Stefan Krahf483b0f2013-12-14 13:43:10 +01007912static void
7913pickle_free(PyObject *m)
7914{
7915 _Pickle_ClearState(_Pickle_GetState(m));
7916}
7917
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007918static int
7919pickle_traverse(PyObject *m, visitproc visit, void *arg)
7920{
7921 PickleState *st = _Pickle_GetState(m);
7922 Py_VISIT(st->PickleError);
7923 Py_VISIT(st->PicklingError);
7924 Py_VISIT(st->UnpicklingError);
7925 Py_VISIT(st->dispatch_table);
7926 Py_VISIT(st->extension_registry);
7927 Py_VISIT(st->extension_cache);
7928 Py_VISIT(st->inverted_registry);
7929 Py_VISIT(st->name_mapping_2to3);
7930 Py_VISIT(st->import_mapping_2to3);
7931 Py_VISIT(st->name_mapping_3to2);
7932 Py_VISIT(st->import_mapping_3to2);
7933 Py_VISIT(st->codecs_encode);
Serhiy Storchaka58e41342015-03-31 14:07:24 +03007934 Py_VISIT(st->getattr);
Hai Shi1f577ce2020-03-02 14:28:44 +08007935 Py_VISIT(st->partial);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007936 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007937}
7938
7939static struct PyModuleDef _picklemodule = {
7940 PyModuleDef_HEAD_INIT,
Stefan Krahf483b0f2013-12-14 13:43:10 +01007941 "_pickle", /* m_name */
7942 pickle_module_doc, /* m_doc */
7943 sizeof(PickleState), /* m_size */
7944 pickle_methods, /* m_methods */
7945 NULL, /* m_reload */
7946 pickle_traverse, /* m_traverse */
7947 pickle_clear, /* m_clear */
7948 (freefunc)pickle_free /* m_free */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007949};
7950
7951PyMODINIT_FUNC
7952PyInit__pickle(void)
7953{
7954 PyObject *m;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007955 PickleState *st;
7956
7957 m = PyState_FindModule(&_picklemodule);
7958 if (m) {
7959 Py_INCREF(m);
7960 return m;
7961 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007962
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007963 if (PyType_Ready(&Pdata_Type) < 0)
7964 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007965 if (PyType_Ready(&PicklerMemoProxyType) < 0)
7966 return NULL;
7967 if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
7968 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007969
7970 /* Create the module and add the functions. */
7971 m = PyModule_Create(&_picklemodule);
7972 if (m == NULL)
7973 return NULL;
7974
Antoine Pitrou91f43802019-05-26 17:10:09 +02007975 /* Add types */
Dong-hee Na37fcbb62020-03-25 07:08:51 +09007976 if (PyModule_AddType(m, &Pickler_Type) < 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007977 return NULL;
Dong-hee Na37fcbb62020-03-25 07:08:51 +09007978 }
7979 if (PyModule_AddType(m, &Unpickler_Type) < 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007980 return NULL;
Dong-hee Na37fcbb62020-03-25 07:08:51 +09007981 }
7982 if (PyModule_AddType(m, &PyPickleBuffer_Type) < 0) {
Antoine Pitrou91f43802019-05-26 17:10:09 +02007983 return NULL;
Dong-hee Na37fcbb62020-03-25 07:08:51 +09007984 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007985
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007986 st = _Pickle_GetState(m);
7987
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007988 /* Initialize the exceptions. */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007989 st->PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
7990 if (st->PickleError == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007991 return NULL;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007992 st->PicklingError = \
7993 PyErr_NewException("_pickle.PicklingError", st->PickleError, NULL);
7994 if (st->PicklingError == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007995 return NULL;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007996 st->UnpicklingError = \
7997 PyErr_NewException("_pickle.UnpicklingError", st->PickleError, NULL);
7998 if (st->UnpicklingError == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007999 return NULL;
8000
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08008001 Py_INCREF(st->PickleError);
8002 if (PyModule_AddObject(m, "PickleError", st->PickleError) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00008003 return NULL;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08008004 Py_INCREF(st->PicklingError);
8005 if (PyModule_AddObject(m, "PicklingError", st->PicklingError) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00008006 return NULL;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08008007 Py_INCREF(st->UnpicklingError);
8008 if (PyModule_AddObject(m, "UnpicklingError", st->UnpicklingError) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00008009 return NULL;
8010
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08008011 if (_Pickle_InitState(st) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00008012 return NULL;
8013
8014 return m;
8015}