blob: 8dea2c6ea0c203e92c05c3aef48ffe95fe1953cf [file] [log] [blame]
Victor Stinner5c75f372019-04-17 23:02:26 +02001/* pickle accelerator C extensor: _pickle module.
2 *
3 * It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
4 * and as an extension module (Py_BUILD_CORE_MODULE define) on other
5 * platforms. */
Eric Snow2ebc5ce2017-09-07 23:51:28 -06006
Victor Stinner5c75f372019-04-17 23:02:26 +02007#if !defined(Py_BUILD_CORE_BUILTIN) && !defined(Py_BUILD_CORE_MODULE)
8# error "Py_BUILD_CORE_BUILTIN or Py_BUILD_CORE_MODULE must be defined"
Eric Snow2ebc5ce2017-09-07 23:51:28 -06009#endif
10
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000011#include "Python.h"
Victor Stinner4a21e572020-04-15 02:35:41 +020012#include "structmember.h" // PyMemberDef
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000013
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -080014PyDoc_STRVAR(pickle_module_doc,
15"Optimized C implementation for the Python pickle module.");
16
Larry Hastings61272b72014-01-07 12:41:53 -080017/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -080018module _pickle
Larry Hastingsc2047262014-01-25 20:43:29 -080019class _pickle.Pickler "PicklerObject *" "&Pickler_Type"
20class _pickle.PicklerMemoProxy "PicklerMemoProxyObject *" "&PicklerMemoProxyType"
21class _pickle.Unpickler "UnpicklerObject *" "&Unpickler_Type"
22class _pickle.UnpicklerMemoProxy "UnpicklerMemoProxyObject *" "&UnpicklerMemoProxyType"
Larry Hastings61272b72014-01-07 12:41:53 -080023[clinic start generated code]*/
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030024/*[clinic end generated code: output=da39a3ee5e6b4b0d input=4b3e113468a58e6c]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -080025
Łukasz Langac51d8c92018-04-03 23:06:53 -070026/* Bump HIGHEST_PROTOCOL when new opcodes are added to the pickle protocol.
27 Bump DEFAULT_PROTOCOL only when the oldest still supported version of Python
28 already includes it. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000029enum {
Antoine Pitrou91f43802019-05-26 17:10:09 +020030 HIGHEST_PROTOCOL = 5,
Łukasz Langac51d8c92018-04-03 23:06:53 -070031 DEFAULT_PROTOCOL = 4
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000032};
33
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000034/* Pickle opcodes. These must be kept updated with pickle.py.
35 Extensive docs are in pickletools.py. */
36enum opcode {
37 MARK = '(',
38 STOP = '.',
39 POP = '0',
40 POP_MARK = '1',
41 DUP = '2',
42 FLOAT = 'F',
43 INT = 'I',
44 BININT = 'J',
45 BININT1 = 'K',
46 LONG = 'L',
47 BININT2 = 'M',
48 NONE = 'N',
49 PERSID = 'P',
50 BINPERSID = 'Q',
51 REDUCE = 'R',
52 STRING = 'S',
53 BINSTRING = 'T',
54 SHORT_BINSTRING = 'U',
55 UNICODE = 'V',
56 BINUNICODE = 'X',
57 APPEND = 'a',
58 BUILD = 'b',
59 GLOBAL = 'c',
60 DICT = 'd',
61 EMPTY_DICT = '}',
62 APPENDS = 'e',
63 GET = 'g',
64 BINGET = 'h',
65 INST = 'i',
66 LONG_BINGET = 'j',
67 LIST = 'l',
68 EMPTY_LIST = ']',
69 OBJ = 'o',
70 PUT = 'p',
71 BINPUT = 'q',
72 LONG_BINPUT = 'r',
73 SETITEM = 's',
74 TUPLE = 't',
75 EMPTY_TUPLE = ')',
76 SETITEMS = 'u',
77 BINFLOAT = 'G',
78
79 /* Protocol 2. */
80 PROTO = '\x80',
81 NEWOBJ = '\x81',
82 EXT1 = '\x82',
83 EXT2 = '\x83',
84 EXT4 = '\x84',
85 TUPLE1 = '\x85',
86 TUPLE2 = '\x86',
87 TUPLE3 = '\x87',
88 NEWTRUE = '\x88',
89 NEWFALSE = '\x89',
90 LONG1 = '\x8a',
91 LONG4 = '\x8b',
92
93 /* Protocol 3 (Python 3.x) */
94 BINBYTES = 'B',
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +010095 SHORT_BINBYTES = 'C',
96
97 /* Protocol 4 */
98 SHORT_BINUNICODE = '\x8c',
99 BINUNICODE8 = '\x8d',
100 BINBYTES8 = '\x8e',
101 EMPTY_SET = '\x8f',
102 ADDITEMS = '\x90',
103 FROZENSET = '\x91',
104 NEWOBJ_EX = '\x92',
105 STACK_GLOBAL = '\x93',
106 MEMOIZE = '\x94',
Antoine Pitrou91f43802019-05-26 17:10:09 +0200107 FRAME = '\x95',
108
109 /* Protocol 5 */
110 BYTEARRAY8 = '\x96',
111 NEXT_BUFFER = '\x97',
112 READONLY_BUFFER = '\x98'
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000113};
114
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000115enum {
116 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
117 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
118 break if this gets out of synch with pickle.py, but it's unclear that would
119 help anything either. */
120 BATCHSIZE = 1000,
121
122 /* Nesting limit until Pickler, when running in "fast mode", starts
123 checking for self-referential data-structures. */
124 FAST_NESTING_LIMIT = 50,
125
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000126 /* Initial size of the write buffer of Pickler. */
127 WRITE_BUF_SIZE = 4096,
128
Antoine Pitrou04248a82010-10-12 20:51:21 +0000129 /* Prefetch size when unpickling (disabled on unpeekable streams) */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100130 PREFETCH = 8192 * 16,
131
Serhiy Storchaka1211c9a2018-01-20 16:42:44 +0200132 FRAME_SIZE_MIN = 4,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100133 FRAME_SIZE_TARGET = 64 * 1024,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100134 FRAME_HEADER_SIZE = 9
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000135};
136
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800137/*************************************************************************/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000138
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800139/* State of the pickle module, per PEP 3121. */
140typedef struct {
141 /* Exception classes for pickle. */
142 PyObject *PickleError;
143 PyObject *PicklingError;
144 PyObject *UnpicklingError;
Larry Hastings61272b72014-01-07 12:41:53 -0800145
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800146 /* copyreg.dispatch_table, {type_object: pickling_function} */
147 PyObject *dispatch_table;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000148
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800149 /* For the extension opcodes EXT1, EXT2 and EXT4. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000150
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800151 /* copyreg._extension_registry, {(module_name, function_name): code} */
152 PyObject *extension_registry;
153 /* copyreg._extension_cache, {code: object} */
154 PyObject *extension_cache;
155 /* copyreg._inverted_registry, {code: (module_name, function_name)} */
156 PyObject *inverted_registry;
157
158 /* Import mappings for compatibility with Python 2.x */
159
160 /* _compat_pickle.NAME_MAPPING,
161 {(oldmodule, oldname): (newmodule, newname)} */
162 PyObject *name_mapping_2to3;
163 /* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
164 PyObject *import_mapping_2to3;
165 /* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
166 PyObject *name_mapping_3to2;
167 PyObject *import_mapping_3to2;
168
169 /* codecs.encode, used for saving bytes in older protocols */
170 PyObject *codecs_encode;
Serhiy Storchaka58e41342015-03-31 14:07:24 +0300171 /* builtins.getattr, used for saving nested names with protocol < 4 */
172 PyObject *getattr;
Serhiy Storchaka0d554d72015-10-10 22:42:18 +0300173 /* functools.partial, used for implementing __newobj_ex__ with protocols
174 2 and 3 */
175 PyObject *partial;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800176} PickleState;
177
178/* Forward declaration of the _pickle module definition. */
179static struct PyModuleDef _picklemodule;
180
181/* Given a module object, get its per-module state. */
182static PickleState *
183_Pickle_GetState(PyObject *module)
184{
185 return (PickleState *)PyModule_GetState(module);
186}
187
188/* Find the module instance imported in the currently running sub-interpreter
189 and get its state. */
190static PickleState *
191_Pickle_GetGlobalState(void)
192{
193 return _Pickle_GetState(PyState_FindModule(&_picklemodule));
194}
195
196/* Clear the given pickle module state. */
197static void
198_Pickle_ClearState(PickleState *st)
199{
200 Py_CLEAR(st->PickleError);
201 Py_CLEAR(st->PicklingError);
202 Py_CLEAR(st->UnpicklingError);
203 Py_CLEAR(st->dispatch_table);
204 Py_CLEAR(st->extension_registry);
205 Py_CLEAR(st->extension_cache);
206 Py_CLEAR(st->inverted_registry);
207 Py_CLEAR(st->name_mapping_2to3);
208 Py_CLEAR(st->import_mapping_2to3);
209 Py_CLEAR(st->name_mapping_3to2);
210 Py_CLEAR(st->import_mapping_3to2);
211 Py_CLEAR(st->codecs_encode);
Serhiy Storchaka58e41342015-03-31 14:07:24 +0300212 Py_CLEAR(st->getattr);
Victor Stinner9ba97df2015-11-17 12:15:07 +0100213 Py_CLEAR(st->partial);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800214}
215
216/* Initialize the given pickle module state. */
217static int
218_Pickle_InitState(PickleState *st)
219{
220 PyObject *copyreg = NULL;
221 PyObject *compat_pickle = NULL;
222 PyObject *codecs = NULL;
Serhiy Storchaka0d554d72015-10-10 22:42:18 +0300223 PyObject *functools = NULL;
Serhiy Storchakabb86bf42018-12-11 08:28:18 +0200224 _Py_IDENTIFIER(getattr);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800225
Serhiy Storchakabb86bf42018-12-11 08:28:18 +0200226 st->getattr = _PyEval_GetBuiltinId(&PyId_getattr);
Serhiy Storchaka58e41342015-03-31 14:07:24 +0300227 if (st->getattr == NULL)
228 goto error;
Serhiy Storchaka58e41342015-03-31 14:07:24 +0300229
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800230 copyreg = PyImport_ImportModule("copyreg");
231 if (!copyreg)
232 goto error;
233 st->dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
234 if (!st->dispatch_table)
235 goto error;
236 if (!PyDict_CheckExact(st->dispatch_table)) {
237 PyErr_Format(PyExc_RuntimeError,
238 "copyreg.dispatch_table should be a dict, not %.200s",
239 Py_TYPE(st->dispatch_table)->tp_name);
240 goto error;
241 }
242 st->extension_registry = \
243 PyObject_GetAttrString(copyreg, "_extension_registry");
244 if (!st->extension_registry)
245 goto error;
246 if (!PyDict_CheckExact(st->extension_registry)) {
247 PyErr_Format(PyExc_RuntimeError,
248 "copyreg._extension_registry should be a dict, "
249 "not %.200s", Py_TYPE(st->extension_registry)->tp_name);
250 goto error;
251 }
252 st->inverted_registry = \
253 PyObject_GetAttrString(copyreg, "_inverted_registry");
254 if (!st->inverted_registry)
255 goto error;
256 if (!PyDict_CheckExact(st->inverted_registry)) {
257 PyErr_Format(PyExc_RuntimeError,
258 "copyreg._inverted_registry should be a dict, "
259 "not %.200s", Py_TYPE(st->inverted_registry)->tp_name);
260 goto error;
261 }
262 st->extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
263 if (!st->extension_cache)
264 goto error;
265 if (!PyDict_CheckExact(st->extension_cache)) {
266 PyErr_Format(PyExc_RuntimeError,
267 "copyreg._extension_cache should be a dict, "
268 "not %.200s", Py_TYPE(st->extension_cache)->tp_name);
269 goto error;
270 }
271 Py_CLEAR(copyreg);
272
273 /* Load the 2.x -> 3.x stdlib module mapping tables */
274 compat_pickle = PyImport_ImportModule("_compat_pickle");
275 if (!compat_pickle)
276 goto error;
277 st->name_mapping_2to3 = \
278 PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
279 if (!st->name_mapping_2to3)
280 goto error;
281 if (!PyDict_CheckExact(st->name_mapping_2to3)) {
282 PyErr_Format(PyExc_RuntimeError,
283 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
284 Py_TYPE(st->name_mapping_2to3)->tp_name);
285 goto error;
286 }
287 st->import_mapping_2to3 = \
288 PyObject_GetAttrString(compat_pickle, "IMPORT_MAPPING");
289 if (!st->import_mapping_2to3)
290 goto error;
291 if (!PyDict_CheckExact(st->import_mapping_2to3)) {
292 PyErr_Format(PyExc_RuntimeError,
293 "_compat_pickle.IMPORT_MAPPING should be a dict, "
294 "not %.200s", Py_TYPE(st->import_mapping_2to3)->tp_name);
295 goto error;
296 }
297 /* ... and the 3.x -> 2.x mapping tables */
298 st->name_mapping_3to2 = \
299 PyObject_GetAttrString(compat_pickle, "REVERSE_NAME_MAPPING");
300 if (!st->name_mapping_3to2)
301 goto error;
302 if (!PyDict_CheckExact(st->name_mapping_3to2)) {
303 PyErr_Format(PyExc_RuntimeError,
304 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
305 "not %.200s", Py_TYPE(st->name_mapping_3to2)->tp_name);
306 goto error;
307 }
308 st->import_mapping_3to2 = \
309 PyObject_GetAttrString(compat_pickle, "REVERSE_IMPORT_MAPPING");
310 if (!st->import_mapping_3to2)
311 goto error;
312 if (!PyDict_CheckExact(st->import_mapping_3to2)) {
313 PyErr_Format(PyExc_RuntimeError,
314 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
315 "not %.200s", Py_TYPE(st->import_mapping_3to2)->tp_name);
316 goto error;
317 }
318 Py_CLEAR(compat_pickle);
319
320 codecs = PyImport_ImportModule("codecs");
321 if (codecs == NULL)
322 goto error;
323 st->codecs_encode = PyObject_GetAttrString(codecs, "encode");
324 if (st->codecs_encode == NULL) {
325 goto error;
326 }
327 if (!PyCallable_Check(st->codecs_encode)) {
328 PyErr_Format(PyExc_RuntimeError,
329 "codecs.encode should be a callable, not %.200s",
330 Py_TYPE(st->codecs_encode)->tp_name);
331 goto error;
332 }
333 Py_CLEAR(codecs);
334
Serhiy Storchaka0d554d72015-10-10 22:42:18 +0300335 functools = PyImport_ImportModule("functools");
336 if (!functools)
337 goto error;
338 st->partial = PyObject_GetAttrString(functools, "partial");
339 if (!st->partial)
340 goto error;
341 Py_CLEAR(functools);
342
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800343 return 0;
344
345 error:
346 Py_CLEAR(copyreg);
347 Py_CLEAR(compat_pickle);
348 Py_CLEAR(codecs);
Serhiy Storchaka0d554d72015-10-10 22:42:18 +0300349 Py_CLEAR(functools);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800350 _Pickle_ClearState(st);
351 return -1;
352}
353
354/* Helper for calling a function with a single argument quickly.
355
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800356 This function steals the reference of the given argument. */
357static PyObject *
358_Pickle_FastCall(PyObject *func, PyObject *obj)
359{
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800360 PyObject *result;
361
Petr Viktorinffd97532020-02-11 17:46:57 +0100362 result = PyObject_CallOneArg(func, obj);
Victor Stinner75210692016-08-19 18:59:15 +0200363 Py_DECREF(obj);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800364 return result;
365}
366
367/*************************************************************************/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000368
Serhiy Storchaka986375e2017-11-30 22:48:31 +0200369/* Retrieve and deconstruct a method for avoiding a reference cycle
370 (pickler -> bound method of pickler -> pickler) */
371static int
372init_method_ref(PyObject *self, _Py_Identifier *name,
373 PyObject **method_func, PyObject **method_self)
374{
375 PyObject *func, *func2;
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200376 int ret;
Serhiy Storchaka986375e2017-11-30 22:48:31 +0200377
378 /* *method_func and *method_self should be consistent. All refcount decrements
379 should be occurred after setting *method_self and *method_func. */
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200380 ret = _PyObject_LookupAttrId(self, name, &func);
Serhiy Storchaka986375e2017-11-30 22:48:31 +0200381 if (func == NULL) {
382 *method_self = NULL;
383 Py_CLEAR(*method_func);
Serhiy Storchakaf320be72018-01-25 10:49:40 +0200384 return ret;
Serhiy Storchaka986375e2017-11-30 22:48:31 +0200385 }
386
387 if (PyMethod_Check(func) && PyMethod_GET_SELF(func) == self) {
388 /* Deconstruct a bound Python method */
389 func2 = PyMethod_GET_FUNCTION(func);
390 Py_INCREF(func2);
391 *method_self = self; /* borrowed */
392 Py_XSETREF(*method_func, func2);
393 Py_DECREF(func);
394 return 0;
395 }
396 else {
397 *method_self = NULL;
398 Py_XSETREF(*method_func, func);
399 return 0;
400 }
401}
402
403/* Bind a method if it was deconstructed */
404static PyObject *
405reconstruct_method(PyObject *func, PyObject *self)
406{
407 if (self) {
408 return PyMethod_New(func, self);
409 }
410 else {
411 Py_INCREF(func);
412 return func;
413 }
414}
415
416static PyObject *
417call_method(PyObject *func, PyObject *self, PyObject *obj)
418{
419 if (self) {
420 return PyObject_CallFunctionObjArgs(func, self, obj, NULL);
421 }
422 else {
Petr Viktorinffd97532020-02-11 17:46:57 +0100423 return PyObject_CallOneArg(func, obj);
Serhiy Storchaka986375e2017-11-30 22:48:31 +0200424 }
425}
426
427/*************************************************************************/
428
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000429/* Internal data type used as the unpickling stack. */
430typedef struct {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000431 PyObject_VAR_HEAD
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000432 PyObject **data;
Serhiy Storchaka59fb6342015-12-06 22:01:35 +0200433 int mark_set; /* is MARK set? */
434 Py_ssize_t fence; /* position of top MARK or 0 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000435 Py_ssize_t allocated; /* number of slots in data allocated */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000436} Pdata;
437
438static void
439Pdata_dealloc(Pdata *self)
440{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200441 Py_ssize_t i = Py_SIZE(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000442 while (--i >= 0) {
443 Py_DECREF(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000444 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000445 PyMem_FREE(self->data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000446 PyObject_Del(self);
447}
448
449static PyTypeObject Pdata_Type = {
450 PyVarObject_HEAD_INIT(NULL, 0)
451 "_pickle.Pdata", /*tp_name*/
452 sizeof(Pdata), /*tp_basicsize*/
Serhiy Storchaka5bbd2312014-12-16 19:39:08 +0200453 sizeof(PyObject *), /*tp_itemsize*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000454 (destructor)Pdata_dealloc, /*tp_dealloc*/
455};
456
457static PyObject *
458Pdata_New(void)
459{
460 Pdata *self;
461
462 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
463 return NULL;
Victor Stinner60ac6ed2020-02-07 23:18:08 +0100464 Py_SET_SIZE(self, 0);
Serhiy Storchaka59fb6342015-12-06 22:01:35 +0200465 self->mark_set = 0;
466 self->fence = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000467 self->allocated = 8;
468 self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000469 if (self->data)
470 return (PyObject *)self;
471 Py_DECREF(self);
472 return PyErr_NoMemory();
473}
474
475
476/* Retain only the initial clearto items. If clearto >= the current
477 * number of items, this is a (non-erroneous) NOP.
478 */
479static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200480Pdata_clear(Pdata *self, Py_ssize_t clearto)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000481{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200482 Py_ssize_t i = Py_SIZE(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000483
Serhiy Storchaka59fb6342015-12-06 22:01:35 +0200484 assert(clearto >= self->fence);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000485 if (clearto >= i)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000486 return 0;
487
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000488 while (--i >= clearto) {
489 Py_CLEAR(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000490 }
Victor Stinner60ac6ed2020-02-07 23:18:08 +0100491 Py_SET_SIZE(self, clearto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000492 return 0;
493}
494
495static int
496Pdata_grow(Pdata *self)
497{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000498 PyObject **data = self->data;
Victor Stinnerf13c46c2014-08-17 21:05:55 +0200499 size_t allocated = (size_t)self->allocated;
500 size_t new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000501
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000502 new_allocated = (allocated >> 3) + 6;
503 /* check for integer overflow */
Victor Stinnerf13c46c2014-08-17 21:05:55 +0200504 if (new_allocated > (size_t)PY_SSIZE_T_MAX - allocated)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000505 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000506 new_allocated += allocated;
Benjamin Peterson59b08c12015-06-27 13:41:33 -0500507 PyMem_RESIZE(data, PyObject *, new_allocated);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000508 if (data == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000509 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000510
511 self->data = data;
Victor Stinnerf13c46c2014-08-17 21:05:55 +0200512 self->allocated = (Py_ssize_t)new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000513 return 0;
514
515 nomemory:
516 PyErr_NoMemory();
517 return -1;
518}
519
Serhiy Storchaka59fb6342015-12-06 22:01:35 +0200520static int
521Pdata_stack_underflow(Pdata *self)
522{
523 PickleState *st = _Pickle_GetGlobalState();
524 PyErr_SetString(st->UnpicklingError,
525 self->mark_set ?
526 "unexpected MARK found" :
527 "unpickling stack underflow");
528 return -1;
529}
530
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000531/* D is a Pdata*. Pop the topmost element and store it into V, which
532 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
533 * is raised and V is set to NULL.
534 */
535static PyObject *
536Pdata_pop(Pdata *self)
537{
Serhiy Storchaka59fb6342015-12-06 22:01:35 +0200538 if (Py_SIZE(self) <= self->fence) {
539 Pdata_stack_underflow(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000540 return NULL;
541 }
Victor Stinner60ac6ed2020-02-07 23:18:08 +0100542 Py_SET_SIZE(self, Py_SIZE(self) - 1);
543 return self->data[Py_SIZE(self)];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000544}
545#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
546
547static int
548Pdata_push(Pdata *self, PyObject *obj)
549{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000550 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000551 return -1;
552 }
Victor Stinner60ac6ed2020-02-07 23:18:08 +0100553 self->data[Py_SIZE(self)] = obj;
554 Py_SET_SIZE(self, Py_SIZE(self) + 1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000555 return 0;
556}
557
558/* Push an object on stack, transferring its ownership to the stack. */
559#define PDATA_PUSH(D, O, ER) do { \
560 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
561
562/* Push an object on stack, adding a new reference to the object. */
563#define PDATA_APPEND(D, O, ER) do { \
564 Py_INCREF((O)); \
565 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
566
567static PyObject *
568Pdata_poptuple(Pdata *self, Py_ssize_t start)
569{
570 PyObject *tuple;
571 Py_ssize_t len, i, j;
572
Serhiy Storchaka59fb6342015-12-06 22:01:35 +0200573 if (start < self->fence) {
574 Pdata_stack_underflow(self);
575 return NULL;
576 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000577 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000578 tuple = PyTuple_New(len);
579 if (tuple == NULL)
580 return NULL;
581 for (i = start, j = 0; j < len; i++, j++)
582 PyTuple_SET_ITEM(tuple, j, self->data[i]);
583
Victor Stinner60ac6ed2020-02-07 23:18:08 +0100584 Py_SET_SIZE(self, start);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000585 return tuple;
586}
587
588static PyObject *
589Pdata_poplist(Pdata *self, Py_ssize_t start)
590{
591 PyObject *list;
592 Py_ssize_t len, i, j;
593
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000594 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000595 list = PyList_New(len);
596 if (list == NULL)
597 return NULL;
598 for (i = start, j = 0; j < len; i++, j++)
599 PyList_SET_ITEM(list, j, self->data[i]);
600
Victor Stinner60ac6ed2020-02-07 23:18:08 +0100601 Py_SET_SIZE(self, start);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000602 return list;
603}
604
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000605typedef struct {
606 PyObject *me_key;
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200607 Py_ssize_t me_value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000608} PyMemoEntry;
609
610typedef struct {
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700611 size_t mt_mask;
612 size_t mt_used;
613 size_t mt_allocated;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000614 PyMemoEntry *mt_table;
615} PyMemoTable;
616
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000617typedef struct PicklerObject {
618 PyObject_HEAD
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000619 PyMemoTable *memo; /* Memo table, keep track of the seen
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000620 objects to support self-referential objects
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000621 pickling. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000622 PyObject *pers_func; /* persistent_id() method, can be NULL */
Serhiy Storchaka986375e2017-11-30 22:48:31 +0200623 PyObject *pers_func_self; /* borrowed reference to self if pers_func
624 is an unbound method, NULL otherwise */
Antoine Pitrou8d3c2902012-03-04 18:31:48 +0100625 PyObject *dispatch_table; /* private dispatch_table, can be NULL */
Pierre Glaser289f1f82019-05-08 23:08:25 +0200626 PyObject *reducer_override; /* hook for invoking user-defined callbacks
627 instead of save_global when pickling
628 functions and classes*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000629
630 PyObject *write; /* write() method of the output stream. */
631 PyObject *output_buffer; /* Write into a local bytearray buffer before
632 flushing to the stream. */
633 Py_ssize_t output_len; /* Length of output_buffer. */
634 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000635 int proto; /* Pickle protocol number, >= 0 */
636 int bin; /* Boolean, true if proto > 0 */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100637 int framing; /* True when framing is enabled, proto >= 4 */
638 Py_ssize_t frame_start; /* Position in output_buffer where the
Martin Pantera90a4a92016-05-30 04:04:50 +0000639 current frame begins. -1 if there
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100640 is no frame currently open. */
641
642 Py_ssize_t buf_size; /* Size of the current buffered pickle data */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000643 int fast; /* Enable fast mode if set to a true value.
644 The fast mode disable the usage of memo,
645 therefore speeding the pickling process by
646 not generating superfluous PUT opcodes. It
647 should not be used if with self-referential
648 objects. */
649 int fast_nesting;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000650 int fix_imports; /* Indicate whether Pickler should fix
651 the name of globals for Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000652 PyObject *fast_memo;
Antoine Pitrou91f43802019-05-26 17:10:09 +0200653 PyObject *buffer_callback; /* Callback for out-of-band buffers, or NULL */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000654} PicklerObject;
655
656typedef struct UnpicklerObject {
657 PyObject_HEAD
658 Pdata *stack; /* Pickle data stack, store unpickled objects. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000659
660 /* The unpickler memo is just an array of PyObject *s. Using a dict
661 is unnecessary, since the keys are contiguous ints. */
662 PyObject **memo;
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700663 size_t memo_size; /* Capacity of the memo array */
664 size_t memo_len; /* Number of objects in the memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000665
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000666 PyObject *pers_func; /* persistent_load() method, can be NULL. */
Serhiy Storchaka986375e2017-11-30 22:48:31 +0200667 PyObject *pers_func_self; /* borrowed reference to self if pers_func
668 is an unbound method, NULL otherwise */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000669
670 Py_buffer buffer;
671 char *input_buffer;
672 char *input_line;
673 Py_ssize_t input_len;
674 Py_ssize_t next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000675 Py_ssize_t prefetched_idx; /* index of first prefetched byte */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100676
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000677 PyObject *read; /* read() method of the input stream. */
Antoine Pitrou91f43802019-05-26 17:10:09 +0200678 PyObject *readinto; /* readinto() method of the input stream. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000679 PyObject *readline; /* readline() method of the input stream. */
Antoine Pitrou04248a82010-10-12 20:51:21 +0000680 PyObject *peek; /* peek() method of the input stream, or NULL */
Antoine Pitrou91f43802019-05-26 17:10:09 +0200681 PyObject *buffers; /* iterable of out-of-band buffers, or NULL */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000682
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000683 char *encoding; /* Name of the encoding to be used for
684 decoding strings pickled using Python
685 2.x. The default value is "ASCII" */
686 char *errors; /* Name of errors handling scheme to used when
687 decoding strings. The default value is
688 "strict". */
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -0500689 Py_ssize_t *marks; /* Mark stack, used for unpickling container
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000690 objects. */
691 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
692 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000693 int proto; /* Protocol of the pickle loaded. */
694 int fix_imports; /* Indicate whether Unpickler should fix
695 the name of globals pickled by Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000696} UnpicklerObject;
697
Serhiy Storchaka3c1f0f12014-01-27 10:34:22 +0200698typedef struct {
699 PyObject_HEAD
700 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
701} PicklerMemoProxyObject;
702
703typedef struct {
704 PyObject_HEAD
705 UnpicklerObject *unpickler;
706} UnpicklerMemoProxyObject;
707
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000708/* Forward declarations */
709static int save(PicklerObject *, PyObject *, int);
710static int save_reduce(PicklerObject *, PyObject *, PyObject *);
711static PyTypeObject Pickler_Type;
712static PyTypeObject Unpickler_Type;
713
Serhiy Storchaka3c1f0f12014-01-27 10:34:22 +0200714#include "clinic/_pickle.c.h"
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000715
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000716/*************************************************************************
Serhiy Storchaka95949422013-08-27 19:40:23 +0300717 A custom hashtable mapping void* to Python ints. This is used by the pickler
718 for memoization. Using a custom hashtable rather than PyDict allows us to skip
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000719 a bunch of unnecessary object creation. This makes a huge performance
720 difference. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000721
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000722#define MT_MINSIZE 8
723#define PERTURB_SHIFT 5
724
725
726static PyMemoTable *
727PyMemoTable_New(void)
728{
729 PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
730 if (memo == NULL) {
731 PyErr_NoMemory();
732 return NULL;
733 }
734
735 memo->mt_used = 0;
736 memo->mt_allocated = MT_MINSIZE;
737 memo->mt_mask = MT_MINSIZE - 1;
738 memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
739 if (memo->mt_table == NULL) {
740 PyMem_FREE(memo);
741 PyErr_NoMemory();
742 return NULL;
743 }
744 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
745
746 return memo;
747}
748
749static PyMemoTable *
750PyMemoTable_Copy(PyMemoTable *self)
751{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000752 PyMemoTable *new = PyMemoTable_New();
753 if (new == NULL)
754 return NULL;
755
756 new->mt_used = self->mt_used;
757 new->mt_allocated = self->mt_allocated;
758 new->mt_mask = self->mt_mask;
759 /* The table we get from _New() is probably smaller than we wanted.
760 Free it and allocate one that's the right size. */
761 PyMem_FREE(new->mt_table);
Benjamin Peterson59b08c12015-06-27 13:41:33 -0500762 new->mt_table = PyMem_NEW(PyMemoEntry, self->mt_allocated);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000763 if (new->mt_table == NULL) {
764 PyMem_FREE(new);
Victor Stinner42024562013-07-12 00:53:57 +0200765 PyErr_NoMemory();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000766 return NULL;
767 }
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700768 for (size_t i = 0; i < self->mt_allocated; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000769 Py_XINCREF(self->mt_table[i].me_key);
770 }
771 memcpy(new->mt_table, self->mt_table,
772 sizeof(PyMemoEntry) * self->mt_allocated);
773
774 return new;
775}
776
777static Py_ssize_t
778PyMemoTable_Size(PyMemoTable *self)
779{
780 return self->mt_used;
781}
782
783static int
784PyMemoTable_Clear(PyMemoTable *self)
785{
786 Py_ssize_t i = self->mt_allocated;
787
788 while (--i >= 0) {
789 Py_XDECREF(self->mt_table[i].me_key);
790 }
791 self->mt_used = 0;
792 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
793 return 0;
794}
795
796static void
797PyMemoTable_Del(PyMemoTable *self)
798{
799 if (self == NULL)
800 return;
801 PyMemoTable_Clear(self);
802
803 PyMem_FREE(self->mt_table);
804 PyMem_FREE(self);
805}
806
807/* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
808 can be considerably simpler than dictobject.c's lookdict(). */
809static PyMemoEntry *
810_PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
811{
812 size_t i;
813 size_t perturb;
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700814 size_t mask = self->mt_mask;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000815 PyMemoEntry *table = self->mt_table;
816 PyMemoEntry *entry;
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000817 Py_hash_t hash = (Py_hash_t)key >> 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000818
819 i = hash & mask;
820 entry = &table[i];
821 if (entry->me_key == NULL || entry->me_key == key)
822 return entry;
823
824 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
825 i = (i << 2) + i + perturb + 1;
826 entry = &table[i & mask];
827 if (entry->me_key == NULL || entry->me_key == key)
828 return entry;
829 }
Barry Warsawb2e57942017-09-14 18:13:16 -0700830 Py_UNREACHABLE();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000831}
832
833/* Returns -1 on failure, 0 on success. */
834static int
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700835_PyMemoTable_ResizeTable(PyMemoTable *self, size_t min_size)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000836{
837 PyMemoEntry *oldtable = NULL;
838 PyMemoEntry *oldentry, *newentry;
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700839 size_t new_size = MT_MINSIZE;
840 size_t to_process;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000841
842 assert(min_size > 0);
843
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700844 if (min_size > PY_SSIZE_T_MAX) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000845 PyErr_NoMemory();
846 return -1;
847 }
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700848
849 /* Find the smallest valid table size >= min_size. */
850 while (new_size < min_size) {
851 new_size <<= 1;
852 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000853 /* new_size needs to be a power of two. */
854 assert((new_size & (new_size - 1)) == 0);
855
856 /* Allocate new table. */
857 oldtable = self->mt_table;
Benjamin Peterson59b08c12015-06-27 13:41:33 -0500858 self->mt_table = PyMem_NEW(PyMemoEntry, new_size);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000859 if (self->mt_table == NULL) {
Victor Stinner8ca72e22013-07-12 00:53:26 +0200860 self->mt_table = oldtable;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000861 PyErr_NoMemory();
862 return -1;
863 }
864 self->mt_allocated = new_size;
865 self->mt_mask = new_size - 1;
866 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
867
868 /* Copy entries from the old table. */
869 to_process = self->mt_used;
870 for (oldentry = oldtable; to_process > 0; oldentry++) {
871 if (oldentry->me_key != NULL) {
872 to_process--;
873 /* newentry is a pointer to a chunk of the new
874 mt_table, so we're setting the key:value pair
875 in-place. */
876 newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
877 newentry->me_key = oldentry->me_key;
878 newentry->me_value = oldentry->me_value;
879 }
880 }
881
882 /* Deallocate the old table. */
883 PyMem_FREE(oldtable);
884 return 0;
885}
886
887/* Returns NULL on failure, a pointer to the value otherwise. */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200888static Py_ssize_t *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000889PyMemoTable_Get(PyMemoTable *self, PyObject *key)
890{
891 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
892 if (entry->me_key == NULL)
893 return NULL;
894 return &entry->me_value;
895}
896
897/* Returns -1 on failure, 0 on success. */
898static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200899PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000900{
901 PyMemoEntry *entry;
902
903 assert(key != NULL);
904
905 entry = _PyMemoTable_Lookup(self, key);
906 if (entry->me_key != NULL) {
907 entry->me_value = value;
908 return 0;
909 }
910 Py_INCREF(key);
911 entry->me_key = key;
912 entry->me_value = value;
913 self->mt_used++;
914
915 /* If we added a key, we can safely resize. Otherwise just return!
916 * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
917 *
918 * Quadrupling the size improves average table sparseness
919 * (reducing collisions) at the cost of some memory. It also halves
920 * the number of expensive resize operations in a growing memo table.
921 *
922 * Very large memo tables (over 50K items) use doubling instead.
923 * This may help applications with severe memory constraints.
924 */
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700925 if (SIZE_MAX / 3 >= self->mt_used && self->mt_used * 3 < self->mt_allocated * 2) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000926 return 0;
Benjamin Petersona4ae8282018-09-20 18:36:40 -0700927 }
928 // self->mt_used is always < PY_SSIZE_T_MAX, so this can't overflow.
929 size_t desired_size = (self->mt_used > 50000 ? 2 : 4) * self->mt_used;
930 return _PyMemoTable_ResizeTable(self, desired_size);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000931}
932
933#undef MT_MINSIZE
934#undef PERTURB_SHIFT
935
936/*************************************************************************/
937
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000938
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000939static int
940_Pickler_ClearBuffer(PicklerObject *self)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000941{
Serhiy Storchaka48842712016-04-06 09:45:48 +0300942 Py_XSETREF(self->output_buffer,
Serhiy Storchaka4a1e70f2015-12-27 12:36:18 +0200943 PyBytes_FromStringAndSize(NULL, self->max_output_len));
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000944 if (self->output_buffer == NULL)
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000945 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000946 self->output_len = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100947 self->frame_start = -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000948 return 0;
949}
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000950
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100951static void
Antoine Pitrou8f2ee6e2013-11-23 21:05:08 +0100952_write_size64(char *out, size_t value)
953{
Victor Stinnerf13c46c2014-08-17 21:05:55 +0200954 size_t i;
Alexandre Vassalotti1048fb52013-11-25 11:35:46 -0800955
Serhiy Storchakafad85aa2015-11-07 15:42:38 +0200956 Py_BUILD_ASSERT(sizeof(size_t) <= 8);
Alexandre Vassalotti1048fb52013-11-25 11:35:46 -0800957
958 for (i = 0; i < sizeof(size_t); i++) {
959 out[i] = (unsigned char)((value >> (8 * i)) & 0xff);
960 }
961 for (i = sizeof(size_t); i < 8; i++) {
962 out[i] = 0;
Alexandre Vassalottided929b2013-11-24 22:41:13 -0800963 }
Antoine Pitrou8f2ee6e2013-11-23 21:05:08 +0100964}
965
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100966static int
967_Pickler_CommitFrame(PicklerObject *self)
968{
969 size_t frame_len;
970 char *qdata;
971
972 if (!self->framing || self->frame_start == -1)
973 return 0;
974 frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
975 qdata = PyBytes_AS_STRING(self->output_buffer) + self->frame_start;
Serhiy Storchaka1211c9a2018-01-20 16:42:44 +0200976 if (frame_len >= FRAME_SIZE_MIN) {
977 qdata[0] = FRAME;
978 _write_size64(qdata + 1, frame_len);
979 }
980 else {
981 memmove(qdata, qdata + FRAME_HEADER_SIZE, frame_len);
982 self->output_len -= FRAME_HEADER_SIZE;
983 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100984 self->frame_start = -1;
985 return 0;
986}
987
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000988static PyObject *
989_Pickler_GetString(PicklerObject *self)
990{
991 PyObject *output_buffer = self->output_buffer;
992
993 assert(self->output_buffer != NULL);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100994
995 if (_Pickler_CommitFrame(self))
996 return NULL;
997
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000998 self->output_buffer = NULL;
999 /* Resize down to exact size */
1000 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
1001 return NULL;
1002 return output_buffer;
1003}
1004
1005static int
1006_Pickler_FlushToFile(PicklerObject *self)
1007{
1008 PyObject *output, *result;
1009
1010 assert(self->write != NULL);
1011
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001012 /* This will commit the frame first */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001013 output = _Pickler_GetString(self);
1014 if (output == NULL)
1015 return -1;
1016
Alexandre Vassalotti20c28c12013-11-27 02:26:54 -08001017 result = _Pickle_FastCall(self->write, output);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001018 Py_XDECREF(result);
1019 return (result == NULL) ? -1 : 0;
1020}
1021
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01001022static int
1023_Pickler_OpcodeBoundary(PicklerObject *self)
1024{
1025 Py_ssize_t frame_len;
1026
1027 if (!self->framing || self->frame_start == -1) {
1028 return 0;
1029 }
1030 frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
1031 if (frame_len >= FRAME_SIZE_TARGET) {
1032 if(_Pickler_CommitFrame(self)) {
1033 return -1;
1034 }
Leo Ariasc3d95082018-02-03 18:36:10 -06001035 /* Flush the content of the committed frame to the underlying
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01001036 * file and reuse the pickler buffer for the next frame so as
1037 * to limit memory usage when dumping large complex objects to
1038 * a file.
1039 *
1040 * self->write is NULL when called via dumps.
1041 */
1042 if (self->write != NULL) {
1043 if (_Pickler_FlushToFile(self) < 0) {
1044 return -1;
1045 }
1046 if (_Pickler_ClearBuffer(self) < 0) {
1047 return -1;
1048 }
1049 }
1050 }
1051 return 0;
1052}
1053
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001054static Py_ssize_t
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001055_Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t data_len)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001056{
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001057 Py_ssize_t i, n, required;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001058 char *buffer;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001059 int need_new_frame;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001060
1061 assert(s != NULL);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001062 need_new_frame = (self->framing && self->frame_start == -1);
1063
1064 if (need_new_frame)
1065 n = data_len + FRAME_HEADER_SIZE;
1066 else
1067 n = data_len;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001068
1069 required = self->output_len + n;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001070 if (required > self->max_output_len) {
1071 /* Make place in buffer for the pickle chunk */
1072 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
1073 PyErr_NoMemory();
1074 return -1;
1075 }
1076 self->max_output_len = (self->output_len + n) / 2 * 3;
1077 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
1078 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001079 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001080 buffer = PyBytes_AS_STRING(self->output_buffer);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001081 if (need_new_frame) {
1082 /* Setup new frame */
1083 Py_ssize_t frame_start = self->output_len;
1084 self->frame_start = frame_start;
1085 for (i = 0; i < FRAME_HEADER_SIZE; i++) {
1086 /* Write an invalid value, for debugging */
1087 buffer[frame_start + i] = 0xFE;
1088 }
1089 self->output_len += FRAME_HEADER_SIZE;
1090 }
1091 if (data_len < 8) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001092 /* This is faster than memcpy when the string is short. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001093 for (i = 0; i < data_len; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001094 buffer[self->output_len + i] = s[i];
1095 }
1096 }
1097 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001098 memcpy(buffer + self->output_len, s, data_len);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001099 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001100 self->output_len += data_len;
1101 return data_len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001102}
1103
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001104static PicklerObject *
1105_Pickler_New(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001106{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001107 PicklerObject *self;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001108
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001109 self = PyObject_GC_New(PicklerObject, &Pickler_Type);
1110 if (self == NULL)
1111 return NULL;
1112
1113 self->pers_func = NULL;
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01001114 self->dispatch_table = NULL;
Antoine Pitrou91f43802019-05-26 17:10:09 +02001115 self->buffer_callback = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001116 self->write = NULL;
1117 self->proto = 0;
1118 self->bin = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001119 self->framing = 0;
1120 self->frame_start = -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001121 self->fast = 0;
1122 self->fast_nesting = 0;
1123 self->fix_imports = 0;
1124 self->fast_memo = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001125 self->max_output_len = WRITE_BUF_SIZE;
1126 self->output_len = 0;
Pierre Glaser289f1f82019-05-08 23:08:25 +02001127 self->reducer_override = NULL;
Victor Stinner68c8ea22013-07-11 22:56:25 +02001128
1129 self->memo = PyMemoTable_New();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001130 self->output_buffer = PyBytes_FromStringAndSize(NULL,
1131 self->max_output_len);
Victor Stinner68c8ea22013-07-11 22:56:25 +02001132
1133 if (self->memo == NULL || self->output_buffer == NULL) {
Victor Stinnerc31df042013-07-12 00:08:59 +02001134 Py_DECREF(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001135 return NULL;
1136 }
Zackery Spytz359bd4f2019-04-23 05:56:08 -06001137
1138 PyObject_GC_Track(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001139 return self;
1140}
1141
1142static int
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08001143_Pickler_SetProtocol(PicklerObject *self, PyObject *protocol, int fix_imports)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001144{
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08001145 long proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001146
Serhiy Storchaka279f4462019-09-14 12:24:05 +03001147 if (protocol == Py_None) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001148 proto = DEFAULT_PROTOCOL;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08001149 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001150 else {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08001151 proto = PyLong_AsLong(protocol);
1152 if (proto < 0) {
1153 if (proto == -1 && PyErr_Occurred())
1154 return -1;
1155 proto = HIGHEST_PROTOCOL;
1156 }
1157 else if (proto > HIGHEST_PROTOCOL) {
1158 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
1159 HIGHEST_PROTOCOL);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001160 return -1;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08001161 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001162 }
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08001163 self->proto = (int)proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001164 self->bin = proto > 0;
1165 self->fix_imports = fix_imports && proto < 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001166 return 0;
1167}
1168
1169/* Returns -1 (with an exception set) on failure, 0 on success. This may
1170 be called once on a freshly created Pickler. */
1171static int
1172_Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
1173{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001174 _Py_IDENTIFIER(write);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001175 assert(file != NULL);
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001176 if (_PyObject_LookupAttrId(file, &PyId_write, &self->write) < 0) {
1177 return -1;
1178 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001179 if (self->write == NULL) {
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001180 PyErr_SetString(PyExc_TypeError,
1181 "file must have a 'write' attribute");
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001182 return -1;
1183 }
1184
1185 return 0;
1186}
1187
Antoine Pitrou91f43802019-05-26 17:10:09 +02001188static int
1189_Pickler_SetBufferCallback(PicklerObject *self, PyObject *buffer_callback)
1190{
1191 if (buffer_callback == Py_None) {
1192 buffer_callback = NULL;
1193 }
1194 if (buffer_callback != NULL && self->proto < 5) {
1195 PyErr_SetString(PyExc_ValueError,
1196 "buffer_callback needs protocol >= 5");
1197 return -1;
1198 }
1199
1200 Py_XINCREF(buffer_callback);
1201 self->buffer_callback = buffer_callback;
1202 return 0;
1203}
1204
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001205/* Returns the size of the input on success, -1 on failure. This takes its
1206 own reference to `input`. */
1207static Py_ssize_t
1208_Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
1209{
1210 if (self->buffer.buf != NULL)
1211 PyBuffer_Release(&self->buffer);
1212 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
1213 return -1;
1214 self->input_buffer = self->buffer.buf;
1215 self->input_len = self->buffer.len;
1216 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001217 self->prefetched_idx = self->input_len;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001218 return self->input_len;
1219}
1220
Antoine Pitrou04248a82010-10-12 20:51:21 +00001221static int
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03001222bad_readline(void)
1223{
1224 PickleState *st = _Pickle_GetGlobalState();
1225 PyErr_SetString(st->UnpicklingError, "pickle data was truncated");
1226 return -1;
1227}
1228
Antoine Pitrou91f43802019-05-26 17:10:09 +02001229/* Skip any consumed data that was only prefetched using peek() */
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03001230static int
Antoine Pitrou04248a82010-10-12 20:51:21 +00001231_Unpickler_SkipConsumed(UnpicklerObject *self)
1232{
Victor Stinnerb43ad1d2013-10-31 13:38:42 +01001233 Py_ssize_t consumed;
1234 PyObject *r;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001235
Victor Stinnerb43ad1d2013-10-31 13:38:42 +01001236 consumed = self->next_read_idx - self->prefetched_idx;
1237 if (consumed <= 0)
1238 return 0;
1239
1240 assert(self->peek); /* otherwise we did something wrong */
Martin Panter6245cb32016-04-15 02:14:19 +00001241 /* This makes a useless copy... */
Victor Stinnerb43ad1d2013-10-31 13:38:42 +01001242 r = PyObject_CallFunction(self->read, "n", consumed);
1243 if (r == NULL)
1244 return -1;
1245 Py_DECREF(r);
1246
1247 self->prefetched_idx = self->next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001248 return 0;
1249}
1250
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001251static const Py_ssize_t READ_WHOLE_LINE = -1;
1252
1253/* If reading from a file, we need to only pull the bytes we need, since there
1254 may be multiple pickle objects arranged contiguously in the same input
1255 buffer.
1256
1257 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
1258 bytes from the input stream/buffer.
1259
1260 Update the unpickler's input buffer with the newly-read data. Returns -1 on
1261 failure; on success, returns the number of bytes read from the file.
1262
1263 On success, self->input_len will be 0; this is intentional so that when
1264 unpickling from a file, the "we've run out of data" code paths will trigger,
1265 causing the Unpickler to go back to the file for more data. Use the returned
1266 size to tell you how much data you can process. */
1267static Py_ssize_t
1268_Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
1269{
1270 PyObject *data;
Serhiy Storchaka6fe39b72013-11-30 23:15:38 +02001271 Py_ssize_t read_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001272
1273 assert(self->read != NULL);
Victor Stinner121aab42011-09-29 23:40:53 +02001274
Antoine Pitrou04248a82010-10-12 20:51:21 +00001275 if (_Unpickler_SkipConsumed(self) < 0)
1276 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001277
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08001278 if (n == READ_WHOLE_LINE) {
Victor Stinner2ff58a22019-06-17 14:27:23 +02001279 data = PyObject_CallNoArgs(self->readline);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08001280 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001281 else {
Serhiy Storchaka6fe39b72013-11-30 23:15:38 +02001282 PyObject *len;
1283 /* Prefetch some data without advancing the file pointer, if possible */
1284 if (self->peek && n < PREFETCH) {
1285 len = PyLong_FromSsize_t(PREFETCH);
1286 if (len == NULL)
1287 return -1;
1288 data = _Pickle_FastCall(self->peek, len);
1289 if (data == NULL) {
1290 if (!PyErr_ExceptionMatches(PyExc_NotImplementedError))
1291 return -1;
1292 /* peek() is probably not supported by the given file object */
1293 PyErr_Clear();
1294 Py_CLEAR(self->peek);
1295 }
1296 else {
1297 read_size = _Unpickler_SetStringInput(self, data);
1298 Py_DECREF(data);
1299 self->prefetched_idx = 0;
1300 if (n <= read_size)
1301 return n;
1302 }
1303 }
1304 len = PyLong_FromSsize_t(n);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001305 if (len == NULL)
1306 return -1;
Alexandre Vassalotti20c28c12013-11-27 02:26:54 -08001307 data = _Pickle_FastCall(self->read, len);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001308 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001309 if (data == NULL)
1310 return -1;
1311
Serhiy Storchaka6fe39b72013-11-30 23:15:38 +02001312 read_size = _Unpickler_SetStringInput(self, data);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001313 Py_DECREF(data);
1314 return read_size;
1315}
1316
Victor Stinner19ed27e2016-05-20 11:42:37 +02001317/* Don't call it directly: use _Unpickler_Read() */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001318static Py_ssize_t
Victor Stinner19ed27e2016-05-20 11:42:37 +02001319_Unpickler_ReadImpl(UnpicklerObject *self, char **s, Py_ssize_t n)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001320{
Antoine Pitrou04248a82010-10-12 20:51:21 +00001321 Py_ssize_t num_read;
1322
Benjamin Peterson6aa15642015-09-27 01:16:03 -07001323 *s = NULL;
Benjamin Petersone48cf7e2015-09-26 00:08:34 -07001324 if (self->next_read_idx > PY_SSIZE_T_MAX - n) {
1325 PickleState *st = _Pickle_GetGlobalState();
1326 PyErr_SetString(st->UnpicklingError,
1327 "read would overflow (invalid bytecode)");
1328 return -1;
1329 }
Victor Stinner19ed27e2016-05-20 11:42:37 +02001330
1331 /* This case is handled by the _Unpickler_Read() macro for efficiency */
1332 assert(self->next_read_idx + n > self->input_len);
1333
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03001334 if (!self->read)
1335 return bad_readline();
1336
Antoine Pitrou91f43802019-05-26 17:10:09 +02001337 /* Extend the buffer to satisfy desired size */
Antoine Pitrou04248a82010-10-12 20:51:21 +00001338 num_read = _Unpickler_ReadFromFile(self, n);
1339 if (num_read < 0)
1340 return -1;
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03001341 if (num_read < n)
1342 return bad_readline();
Antoine Pitrou04248a82010-10-12 20:51:21 +00001343 *s = self->input_buffer;
1344 self->next_read_idx = n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001345 return n;
1346}
1347
Antoine Pitrou91f43802019-05-26 17:10:09 +02001348/* Read `n` bytes from the unpickler's data source, storing the result in `buf`.
1349 *
1350 * This should only be used for non-small data reads where potentially
1351 * avoiding a copy is beneficial. This method does not try to prefetch
1352 * more data into the input buffer.
1353 *
1354 * _Unpickler_Read() is recommended in most cases.
1355 */
1356static Py_ssize_t
1357_Unpickler_ReadInto(UnpicklerObject *self, char *buf, Py_ssize_t n)
1358{
1359 assert(n != READ_WHOLE_LINE);
1360
1361 /* Read from available buffer data, if any */
1362 Py_ssize_t in_buffer = self->input_len - self->next_read_idx;
1363 if (in_buffer > 0) {
1364 Py_ssize_t to_read = Py_MIN(in_buffer, n);
1365 memcpy(buf, self->input_buffer + self->next_read_idx, to_read);
1366 self->next_read_idx += to_read;
1367 buf += to_read;
1368 n -= to_read;
1369 if (n == 0) {
1370 /* Entire read was satisfied from buffer */
1371 return n;
1372 }
1373 }
1374
1375 /* Read from file */
Antoine Pitrou9f378722020-02-23 23:33:53 +01001376 if (!self->read) {
1377 /* We're unpickling memory, this means the input is truncated */
Antoine Pitrou91f43802019-05-26 17:10:09 +02001378 return bad_readline();
1379 }
1380 if (_Unpickler_SkipConsumed(self) < 0) {
1381 return -1;
1382 }
1383
Antoine Pitrou9f378722020-02-23 23:33:53 +01001384 if (!self->readinto) {
1385 /* readinto() not supported on file-like object, fall back to read()
1386 * and copy into destination buffer (bpo-39681) */
1387 PyObject* len = PyLong_FromSsize_t(n);
1388 if (len == NULL) {
1389 return -1;
1390 }
1391 PyObject* data = _Pickle_FastCall(self->read, len);
1392 if (data == NULL) {
1393 return -1;
1394 }
1395 if (!PyBytes_Check(data)) {
1396 PyErr_Format(PyExc_ValueError,
1397 "read() returned non-bytes object (%R)",
1398 Py_TYPE(data));
1399 Py_DECREF(data);
1400 return -1;
1401 }
1402 Py_ssize_t read_size = PyBytes_GET_SIZE(data);
1403 if (read_size < n) {
1404 Py_DECREF(data);
1405 return bad_readline();
1406 }
1407 memcpy(buf, PyBytes_AS_STRING(data), n);
1408 Py_DECREF(data);
1409 return n;
1410 }
1411
Antoine Pitrou91f43802019-05-26 17:10:09 +02001412 /* Call readinto() into user buffer */
1413 PyObject *buf_obj = PyMemoryView_FromMemory(buf, n, PyBUF_WRITE);
1414 if (buf_obj == NULL) {
1415 return -1;
1416 }
1417 PyObject *read_size_obj = _Pickle_FastCall(self->readinto, buf_obj);
1418 if (read_size_obj == NULL) {
1419 return -1;
1420 }
1421 Py_ssize_t read_size = PyLong_AsSsize_t(read_size_obj);
1422 Py_DECREF(read_size_obj);
1423
1424 if (read_size < 0) {
1425 if (!PyErr_Occurred()) {
1426 PyErr_SetString(PyExc_ValueError,
1427 "readinto() returned negative size");
1428 }
1429 return -1;
1430 }
1431 if (read_size < n) {
1432 return bad_readline();
1433 }
1434 return n;
1435}
1436
Victor Stinner19ed27e2016-05-20 11:42:37 +02001437/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
1438
1439 This should be used for all data reads, rather than accessing the unpickler's
1440 input buffer directly. This method deals correctly with reading from input
1441 streams, which the input buffer doesn't deal with.
1442
1443 Note that when reading from a file-like object, self->next_read_idx won't
1444 be updated (it should remain at 0 for the entire unpickling process). You
1445 should use this function's return value to know how many bytes you can
1446 consume.
1447
1448 Returns -1 (with an exception set) on failure. On success, return the
1449 number of chars read. */
1450#define _Unpickler_Read(self, s, n) \
Victor Stinnerda230562016-05-20 21:16:59 +02001451 (((n) <= (self)->input_len - (self)->next_read_idx) \
Victor Stinner19ed27e2016-05-20 11:42:37 +02001452 ? (*(s) = (self)->input_buffer + (self)->next_read_idx, \
1453 (self)->next_read_idx += (n), \
1454 (n)) \
1455 : _Unpickler_ReadImpl(self, (s), (n)))
1456
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001457static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001458_Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1459 char **result)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001460{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001461 char *input_line = PyMem_Realloc(self->input_line, len + 1);
Victor Stinner42024562013-07-12 00:53:57 +02001462 if (input_line == NULL) {
1463 PyErr_NoMemory();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001464 return -1;
Victor Stinner42024562013-07-12 00:53:57 +02001465 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001466
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001467 memcpy(input_line, line, len);
1468 input_line[len] = '\0';
1469 self->input_line = input_line;
1470 *result = self->input_line;
1471 return len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001472}
1473
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001474/* Read a line from the input stream/buffer. If we run off the end of the input
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03001475 before hitting \n, raise an error.
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001476
1477 Returns the number of chars read, or -1 on failure. */
1478static Py_ssize_t
1479_Unpickler_Readline(UnpicklerObject *self, char **result)
1480{
1481 Py_ssize_t i, num_read;
1482
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001483 for (i = self->next_read_idx; i < self->input_len; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001484 if (self->input_buffer[i] == '\n') {
1485 char *line_start = self->input_buffer + self->next_read_idx;
1486 num_read = i - self->next_read_idx + 1;
1487 self->next_read_idx = i + 1;
1488 return _Unpickler_CopyLine(self, line_start, num_read, result);
1489 }
1490 }
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03001491 if (!self->read)
1492 return bad_readline();
Victor Stinner121aab42011-09-29 23:40:53 +02001493
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03001494 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1495 if (num_read < 0)
1496 return -1;
1497 if (num_read == 0 || self->input_buffer[num_read - 1] != '\n')
1498 return bad_readline();
1499 self->next_read_idx = num_read;
1500 return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001501}
1502
1503/* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1504 will be modified in place. */
1505static int
Benjamin Petersona4ae8282018-09-20 18:36:40 -07001506_Unpickler_ResizeMemoList(UnpicklerObject *self, size_t new_size)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001507{
Benjamin Petersona4ae8282018-09-20 18:36:40 -07001508 size_t i;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001509
1510 assert(new_size > self->memo_size);
1511
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001512 PyObject **memo_new = self->memo;
1513 PyMem_RESIZE(memo_new, PyObject *, new_size);
1514 if (memo_new == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001515 PyErr_NoMemory();
1516 return -1;
1517 }
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001518 self->memo = memo_new;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001519 for (i = self->memo_size; i < new_size; i++)
1520 self->memo[i] = NULL;
1521 self->memo_size = new_size;
1522 return 0;
1523}
1524
1525/* Returns NULL if idx is out of bounds. */
1526static PyObject *
Benjamin Petersona4ae8282018-09-20 18:36:40 -07001527_Unpickler_MemoGet(UnpicklerObject *self, size_t idx)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001528{
Benjamin Petersona4ae8282018-09-20 18:36:40 -07001529 if (idx >= self->memo_size)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001530 return NULL;
1531
1532 return self->memo[idx];
1533}
1534
1535/* Returns -1 (with an exception set) on failure, 0 on success.
1536 This takes its own reference to `value`. */
1537static int
Benjamin Petersona4ae8282018-09-20 18:36:40 -07001538_Unpickler_MemoPut(UnpicklerObject *self, size_t idx, PyObject *value)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001539{
1540 PyObject *old_item;
1541
1542 if (idx >= self->memo_size) {
1543 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1544 return -1;
1545 assert(idx < self->memo_size);
1546 }
1547 Py_INCREF(value);
1548 old_item = self->memo[idx];
1549 self->memo[idx] = value;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001550 if (old_item != NULL) {
1551 Py_DECREF(old_item);
1552 }
1553 else {
1554 self->memo_len++;
1555 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001556 return 0;
1557}
1558
1559static PyObject **
1560_Unpickler_NewMemo(Py_ssize_t new_size)
1561{
Benjamin Peterson59b08c12015-06-27 13:41:33 -05001562 PyObject **memo = PyMem_NEW(PyObject *, new_size);
Victor Stinner42024562013-07-12 00:53:57 +02001563 if (memo == NULL) {
1564 PyErr_NoMemory();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001565 return NULL;
Victor Stinner42024562013-07-12 00:53:57 +02001566 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001567 memset(memo, 0, new_size * sizeof(PyObject *));
1568 return memo;
1569}
1570
1571/* Free the unpickler's memo, taking care to decref any items left in it. */
1572static void
1573_Unpickler_MemoCleanup(UnpicklerObject *self)
1574{
1575 Py_ssize_t i;
1576 PyObject **memo = self->memo;
1577
1578 if (self->memo == NULL)
1579 return;
1580 self->memo = NULL;
1581 i = self->memo_size;
1582 while (--i >= 0) {
1583 Py_XDECREF(memo[i]);
1584 }
1585 PyMem_FREE(memo);
1586}
1587
1588static UnpicklerObject *
1589_Unpickler_New(void)
1590{
1591 UnpicklerObject *self;
1592
1593 self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1594 if (self == NULL)
1595 return NULL;
1596
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001597 self->pers_func = NULL;
1598 self->input_buffer = NULL;
1599 self->input_line = NULL;
1600 self->input_len = 0;
1601 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001602 self->prefetched_idx = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001603 self->read = NULL;
Antoine Pitrou91f43802019-05-26 17:10:09 +02001604 self->readinto = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001605 self->readline = NULL;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001606 self->peek = NULL;
Antoine Pitrou91f43802019-05-26 17:10:09 +02001607 self->buffers = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001608 self->encoding = NULL;
1609 self->errors = NULL;
1610 self->marks = NULL;
1611 self->num_marks = 0;
1612 self->marks_size = 0;
1613 self->proto = 0;
1614 self->fix_imports = 0;
Victor Stinner68c8ea22013-07-11 22:56:25 +02001615 memset(&self->buffer, 0, sizeof(Py_buffer));
1616 self->memo_size = 32;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001617 self->memo_len = 0;
Victor Stinner68c8ea22013-07-11 22:56:25 +02001618 self->memo = _Unpickler_NewMemo(self->memo_size);
1619 self->stack = (Pdata *)Pdata_New();
1620
1621 if (self->memo == NULL || self->stack == NULL) {
1622 Py_DECREF(self);
1623 return NULL;
1624 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001625
Zackery Spytz359bd4f2019-04-23 05:56:08 -06001626 PyObject_GC_Track(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001627 return self;
1628}
1629
1630/* Returns -1 (with an exception set) on failure, 0 on success. This may
Antoine Pitrou91f43802019-05-26 17:10:09 +02001631 be called once on a freshly created Unpickler. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001632static int
1633_Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1634{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001635 _Py_IDENTIFIER(peek);
1636 _Py_IDENTIFIER(read);
Antoine Pitrou91f43802019-05-26 17:10:09 +02001637 _Py_IDENTIFIER(readinto);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001638 _Py_IDENTIFIER(readline);
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001639
Antoine Pitrou9f378722020-02-23 23:33:53 +01001640 /* Optional file methods */
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001641 if (_PyObject_LookupAttrId(file, &PyId_peek, &self->peek) < 0) {
1642 return -1;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001643 }
Antoine Pitrou9f378722020-02-23 23:33:53 +01001644 if (_PyObject_LookupAttrId(file, &PyId_readinto, &self->readinto) < 0) {
1645 return -1;
1646 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001647 (void)_PyObject_LookupAttrId(file, &PyId_read, &self->read);
1648 (void)_PyObject_LookupAttrId(file, &PyId_readline, &self->readline);
Antoine Pitrou9f378722020-02-23 23:33:53 +01001649 if (!self->readline || !self->read) {
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001650 if (!PyErr_Occurred()) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001651 PyErr_SetString(PyExc_TypeError,
Antoine Pitrou9f378722020-02-23 23:33:53 +01001652 "file must have 'read' and 'readline' attributes");
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001653 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001654 Py_CLEAR(self->read);
Antoine Pitrou91f43802019-05-26 17:10:09 +02001655 Py_CLEAR(self->readinto);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001656 Py_CLEAR(self->readline);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001657 Py_CLEAR(self->peek);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001658 return -1;
1659 }
1660 return 0;
1661}
1662
1663/* Returns -1 (with an exception set) on failure, 0 on success. This may
Antoine Pitrou91f43802019-05-26 17:10:09 +02001664 be called once on a freshly created Unpickler. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001665static int
1666_Unpickler_SetInputEncoding(UnpicklerObject *self,
1667 const char *encoding,
1668 const char *errors)
1669{
1670 if (encoding == NULL)
1671 encoding = "ASCII";
1672 if (errors == NULL)
1673 errors = "strict";
1674
Victor Stinner49fc8ec2013-07-07 23:30:24 +02001675 self->encoding = _PyMem_Strdup(encoding);
1676 self->errors = _PyMem_Strdup(errors);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001677 if (self->encoding == NULL || self->errors == NULL) {
1678 PyErr_NoMemory();
1679 return -1;
1680 }
1681 return 0;
1682}
1683
Antoine Pitrou91f43802019-05-26 17:10:09 +02001684/* Returns -1 (with an exception set) on failure, 0 on success. This may
1685 be called once on a freshly created Unpickler. */
1686static int
1687_Unpickler_SetBuffers(UnpicklerObject *self, PyObject *buffers)
1688{
Markus Mohrhard898318b2019-07-26 00:00:34 +08001689 if (buffers == NULL || buffers == Py_None) {
Antoine Pitrou91f43802019-05-26 17:10:09 +02001690 self->buffers = NULL;
1691 }
1692 else {
1693 self->buffers = PyObject_GetIter(buffers);
1694 if (self->buffers == NULL) {
1695 return -1;
1696 }
1697 }
1698 return 0;
1699}
1700
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001701/* Generate a GET opcode for an object stored in the memo. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001702static int
1703memo_get(PicklerObject *self, PyObject *key)
1704{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001705 Py_ssize_t *value;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001706 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001707 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001708
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001709 value = PyMemoTable_Get(self->memo, key);
1710 if (value == NULL) {
1711 PyErr_SetObject(PyExc_KeyError, key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001712 return -1;
1713 }
1714
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001715 if (!self->bin) {
1716 pdata[0] = GET;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001717 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1718 "%" PY_FORMAT_SIZE_T "d\n", *value);
1719 len = strlen(pdata);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001720 }
1721 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001722 if (*value < 256) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001723 pdata[0] = BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001724 pdata[1] = (unsigned char)(*value & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001725 len = 2;
1726 }
Serhiy Storchaka67c719b2014-09-05 10:10:23 +03001727 else if ((size_t)*value <= 0xffffffffUL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001728 pdata[0] = LONG_BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001729 pdata[1] = (unsigned char)(*value & 0xff);
1730 pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1731 pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1732 pdata[4] = (unsigned char)((*value >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001733 len = 5;
1734 }
1735 else { /* unlikely */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08001736 PickleState *st = _Pickle_GetGlobalState();
1737 PyErr_SetString(st->PicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001738 "memo id too large for LONG_BINGET");
1739 return -1;
1740 }
1741 }
1742
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001743 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001744 return -1;
1745
1746 return 0;
1747}
1748
1749/* Store an object in the memo, assign it a new unique ID based on the number
1750 of objects currently stored in the memo and generate a PUT opcode. */
1751static int
1752memo_put(PicklerObject *self, PyObject *obj)
1753{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001754 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001755 Py_ssize_t len;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001756 Py_ssize_t idx;
1757
1758 const char memoize_op = MEMOIZE;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001759
1760 if (self->fast)
1761 return 0;
1762
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001763 idx = PyMemoTable_Size(self->memo);
1764 if (PyMemoTable_Set(self->memo, obj, idx) < 0)
1765 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001766
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001767 if (self->proto >= 4) {
1768 if (_Pickler_Write(self, &memoize_op, 1) < 0)
1769 return -1;
1770 return 0;
1771 }
1772 else if (!self->bin) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001773 pdata[0] = PUT;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001774 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001775 "%" PY_FORMAT_SIZE_T "d\n", idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001776 len = strlen(pdata);
1777 }
1778 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001779 if (idx < 256) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001780 pdata[0] = BINPUT;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001781 pdata[1] = (unsigned char)idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001782 len = 2;
1783 }
Serhiy Storchaka67c719b2014-09-05 10:10:23 +03001784 else if ((size_t)idx <= 0xffffffffUL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001785 pdata[0] = LONG_BINPUT;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001786 pdata[1] = (unsigned char)(idx & 0xff);
1787 pdata[2] = (unsigned char)((idx >> 8) & 0xff);
1788 pdata[3] = (unsigned char)((idx >> 16) & 0xff);
1789 pdata[4] = (unsigned char)((idx >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001790 len = 5;
1791 }
1792 else { /* unlikely */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08001793 PickleState *st = _Pickle_GetGlobalState();
1794 PyErr_SetString(st->PicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001795 "memo id too large for LONG_BINPUT");
1796 return -1;
1797 }
1798 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001799 if (_Pickler_Write(self, pdata, len) < 0)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001800 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001801
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001802 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001803}
1804
1805static PyObject *
Serhiy Storchaka9937d902017-01-09 10:04:34 +02001806get_dotted_path(PyObject *obj, PyObject *name)
1807{
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001808 _Py_static_string(PyId_dot, ".");
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001809 PyObject *dotted_path;
1810 Py_ssize_t i, n;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001811
1812 dotted_path = PyUnicode_Split(name, _PyUnicode_FromId(&PyId_dot), -1);
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001813 if (dotted_path == NULL)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001814 return NULL;
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001815 n = PyList_GET_SIZE(dotted_path);
1816 assert(n >= 1);
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001817 for (i = 0; i < n; i++) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001818 PyObject *subpath = PyList_GET_ITEM(dotted_path, i);
Serhiy Storchaka9937d902017-01-09 10:04:34 +02001819 if (_PyUnicode_EqualToASCIIString(subpath, "<locals>")) {
Antoine Pitrou6cd5eda2014-12-02 00:20:03 +01001820 if (obj == NULL)
1821 PyErr_Format(PyExc_AttributeError,
1822 "Can't pickle local object %R", name);
1823 else
1824 PyErr_Format(PyExc_AttributeError,
1825 "Can't pickle local attribute %R on %R", name, obj);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001826 Py_DECREF(dotted_path);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001827 return NULL;
1828 }
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001829 }
1830 return dotted_path;
1831}
1832
1833static PyObject *
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001834get_deep_attribute(PyObject *obj, PyObject *names, PyObject **pparent)
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001835{
1836 Py_ssize_t i, n;
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001837 PyObject *parent = NULL;
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001838
1839 assert(PyList_CheckExact(names));
1840 Py_INCREF(obj);
1841 n = PyList_GET_SIZE(names);
1842 for (i = 0; i < n; i++) {
1843 PyObject *name = PyList_GET_ITEM(names, i);
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001844 Py_XDECREF(parent);
1845 parent = obj;
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001846 (void)_PyObject_LookupAttr(parent, name, &obj);
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001847 if (obj == NULL) {
1848 Py_DECREF(parent);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001849 return NULL;
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001850 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001851 }
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001852 if (pparent != NULL)
1853 *pparent = parent;
1854 else
1855 Py_XDECREF(parent);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001856 return obj;
1857}
1858
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001859
1860static PyObject *
1861getattribute(PyObject *obj, PyObject *name, int allow_qualname)
1862{
1863 PyObject *dotted_path, *attr;
1864
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001865 if (allow_qualname) {
1866 dotted_path = get_dotted_path(obj, name);
1867 if (dotted_path == NULL)
1868 return NULL;
1869 attr = get_deep_attribute(obj, dotted_path, NULL);
1870 Py_DECREF(dotted_path);
1871 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001872 else {
1873 (void)_PyObject_LookupAttr(obj, name, &attr);
1874 }
1875 if (attr == NULL && !PyErr_Occurred()) {
1876 PyErr_Format(PyExc_AttributeError,
1877 "Can't get attribute %R on %R", name, obj);
1878 }
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001879 return attr;
1880}
1881
Eric Snow3f9eee62017-09-15 16:35:20 -06001882static int
1883_checkmodule(PyObject *module_name, PyObject *module,
1884 PyObject *global, PyObject *dotted_path)
1885{
1886 if (module == Py_None) {
1887 return -1;
1888 }
1889 if (PyUnicode_Check(module_name) &&
1890 _PyUnicode_EqualToASCIIString(module_name, "__main__")) {
1891 return -1;
1892 }
1893
1894 PyObject *candidate = get_deep_attribute(module, dotted_path, NULL);
1895 if (candidate == NULL) {
Eric Snow3f9eee62017-09-15 16:35:20 -06001896 return -1;
1897 }
1898 if (candidate != global) {
1899 Py_DECREF(candidate);
1900 return -1;
1901 }
1902 Py_DECREF(candidate);
1903 return 0;
1904}
1905
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001906static PyObject *
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001907whichmodule(PyObject *global, PyObject *dotted_path)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001908{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001909 PyObject *module_name;
Eric Snow3f9eee62017-09-15 16:35:20 -06001910 PyObject *module = NULL;
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001911 Py_ssize_t i;
Eric Snow3f9eee62017-09-15 16:35:20 -06001912 PyObject *modules;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001913 _Py_IDENTIFIER(__module__);
1914 _Py_IDENTIFIER(modules);
1915 _Py_IDENTIFIER(__main__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001916
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001917 if (_PyObject_LookupAttrId(global, &PyId___module__, &module_name) < 0) {
1918 return NULL;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001919 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +02001920 if (module_name) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001921 /* In some rare cases (e.g., bound methods of extension types),
1922 __module__ can be None. If it is so, then search sys.modules for
1923 the module of global. */
1924 if (module_name != Py_None)
1925 return module_name;
1926 Py_CLEAR(module_name);
1927 }
1928 assert(module_name == NULL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001929
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001930 /* Fallback on walking sys.modules */
Eric Snow3f9eee62017-09-15 16:35:20 -06001931 modules = _PySys_GetObjectId(&PyId_modules);
1932 if (modules == NULL) {
Victor Stinner1e53bba2013-07-16 22:26:05 +02001933 PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001934 return NULL;
Victor Stinner1e53bba2013-07-16 22:26:05 +02001935 }
Eric Snow3f9eee62017-09-15 16:35:20 -06001936 if (PyDict_CheckExact(modules)) {
1937 i = 0;
1938 while (PyDict_Next(modules, &i, &module_name, &module)) {
1939 if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1940 Py_INCREF(module_name);
1941 return module_name;
1942 }
1943 if (PyErr_Occurred()) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001944 return NULL;
Eric Snow3f9eee62017-09-15 16:35:20 -06001945 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001946 }
Eric Snow3f9eee62017-09-15 16:35:20 -06001947 }
1948 else {
1949 PyObject *iterator = PyObject_GetIter(modules);
1950 if (iterator == NULL) {
1951 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001952 }
Eric Snow3f9eee62017-09-15 16:35:20 -06001953 while ((module_name = PyIter_Next(iterator))) {
1954 module = PyObject_GetItem(modules, module_name);
1955 if (module == NULL) {
1956 Py_DECREF(module_name);
1957 Py_DECREF(iterator);
1958 return NULL;
1959 }
1960 if (_checkmodule(module_name, module, global, dotted_path) == 0) {
1961 Py_DECREF(module);
1962 Py_DECREF(iterator);
1963 return module_name;
1964 }
1965 Py_DECREF(module);
1966 Py_DECREF(module_name);
1967 if (PyErr_Occurred()) {
1968 Py_DECREF(iterator);
1969 return NULL;
1970 }
1971 }
1972 Py_DECREF(iterator);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001973 }
1974
1975 /* If no module is found, use __main__. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001976 module_name = _PyUnicode_FromId(&PyId___main__);
Victor Stinneraf46eb82017-09-05 23:30:16 +02001977 Py_XINCREF(module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001978 return module_name;
1979}
1980
1981/* fast_save_enter() and fast_save_leave() are guards against recursive
1982 objects when Pickler is used with the "fast mode" (i.e., with object
1983 memoization disabled). If the nesting of a list or dict object exceed
1984 FAST_NESTING_LIMIT, these guards will start keeping an internal
1985 reference to the seen list or dict objects and check whether these objects
1986 are recursive. These are not strictly necessary, since save() has a
1987 hard-coded recursion limit, but they give a nicer error message than the
1988 typical RuntimeError. */
1989static int
1990fast_save_enter(PicklerObject *self, PyObject *obj)
1991{
1992 /* if fast_nesting < 0, we're doing an error exit. */
1993 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1994 PyObject *key = NULL;
1995 if (self->fast_memo == NULL) {
1996 self->fast_memo = PyDict_New();
1997 if (self->fast_memo == NULL) {
1998 self->fast_nesting = -1;
1999 return 0;
2000 }
2001 }
2002 key = PyLong_FromVoidPtr(obj);
Mat Mf76231f2017-11-13 02:50:16 -05002003 if (key == NULL) {
2004 self->fast_nesting = -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002005 return 0;
Mat Mf76231f2017-11-13 02:50:16 -05002006 }
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08002007 if (PyDict_GetItemWithError(self->fast_memo, key)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002008 Py_DECREF(key);
2009 PyErr_Format(PyExc_ValueError,
2010 "fast mode: can't pickle cyclic objects "
2011 "including object type %.200s at %p",
Victor Stinnerdaa97562020-02-07 03:37:06 +01002012 Py_TYPE(obj)->tp_name, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002013 self->fast_nesting = -1;
2014 return 0;
2015 }
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08002016 if (PyErr_Occurred()) {
Mat Mf76231f2017-11-13 02:50:16 -05002017 Py_DECREF(key);
2018 self->fast_nesting = -1;
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08002019 return 0;
2020 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002021 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
2022 Py_DECREF(key);
2023 self->fast_nesting = -1;
2024 return 0;
2025 }
2026 Py_DECREF(key);
2027 }
2028 return 1;
2029}
2030
2031static int
2032fast_save_leave(PicklerObject *self, PyObject *obj)
2033{
2034 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
2035 PyObject *key = PyLong_FromVoidPtr(obj);
2036 if (key == NULL)
2037 return 0;
2038 if (PyDict_DelItem(self->fast_memo, key) < 0) {
2039 Py_DECREF(key);
2040 return 0;
2041 }
2042 Py_DECREF(key);
2043 }
2044 return 1;
2045}
2046
2047static int
2048save_none(PicklerObject *self, PyObject *obj)
2049{
2050 const char none_op = NONE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002051 if (_Pickler_Write(self, &none_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002052 return -1;
2053
2054 return 0;
2055}
2056
2057static int
2058save_bool(PicklerObject *self, PyObject *obj)
2059{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002060 if (self->proto >= 2) {
Alexandre Vassalotti8a67f522013-11-24 21:40:18 -08002061 const char bool_op = (obj == Py_True) ? NEWTRUE : NEWFALSE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002062 if (_Pickler_Write(self, &bool_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002063 return -1;
2064 }
Alexandre Vassalotti8a67f522013-11-24 21:40:18 -08002065 else {
2066 /* These aren't opcodes -- they're ways to pickle bools before protocol 2
2067 * so that unpicklers written before bools were introduced unpickle them
2068 * as ints, but unpicklers after can recognize that bools were intended.
2069 * Note that protocol 2 added direct ways to pickle bools.
2070 */
2071 const char *bool_str = (obj == Py_True) ? "I01\n" : "I00\n";
2072 if (_Pickler_Write(self, bool_str, strlen(bool_str)) < 0)
2073 return -1;
2074 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002075 return 0;
2076}
2077
2078static int
Alexandre Vassalottided929b2013-11-24 22:41:13 -08002079save_long(PicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002080{
Alexandre Vassalottided929b2013-11-24 22:41:13 -08002081 PyObject *repr = NULL;
2082 Py_ssize_t size;
2083 long val;
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002084 int overflow;
Alexandre Vassalottided929b2013-11-24 22:41:13 -08002085 int status = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002086
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002087 val= PyLong_AsLongAndOverflow(obj, &overflow);
2088 if (!overflow && (sizeof(long) <= 4 ||
2089 (val <= 0x7fffffffL && val >= (-0x7fffffffL - 1))))
2090 {
Larry Hastings61272b72014-01-07 12:41:53 -08002091 /* result fits in a signed 4-byte integer.
Alexandre Vassalotti1048fb52013-11-25 11:35:46 -08002092
2093 Note: we can't use -0x80000000L in the above condition because some
2094 compilers (e.g., MSVC) will promote 0x80000000L to an unsigned type
2095 before applying the unary minus when sizeof(long) <= 4. The
2096 resulting value stays unsigned which is commonly not what we want,
2097 so MSVC happily warns us about it. However, that result would have
2098 been fine because we guard for sizeof(long) <= 4 which turns the
2099 condition true in that particular case. */
Alexandre Vassalottided929b2013-11-24 22:41:13 -08002100 char pdata[32];
2101 Py_ssize_t len = 0;
2102
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002103 if (self->bin) {
2104 pdata[1] = (unsigned char)(val & 0xff);
2105 pdata[2] = (unsigned char)((val >> 8) & 0xff);
2106 pdata[3] = (unsigned char)((val >> 16) & 0xff);
2107 pdata[4] = (unsigned char)((val >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002108
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002109 if ((pdata[4] != 0) || (pdata[3] != 0)) {
2110 pdata[0] = BININT;
2111 len = 5;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002112 }
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002113 else if (pdata[2] != 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002114 pdata[0] = BININT2;
2115 len = 3;
2116 }
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002117 else {
2118 pdata[0] = BININT1;
2119 len = 2;
2120 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002121 }
2122 else {
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002123 sprintf(pdata, "%c%ld\n", INT, val);
2124 len = strlen(pdata);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002125 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002126 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002127 return -1;
Alexandre Vassalottided929b2013-11-24 22:41:13 -08002128
2129 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002130 }
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002131 assert(!PyErr_Occurred());
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002132
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002133 if (self->proto >= 2) {
2134 /* Linear-time pickling. */
2135 size_t nbits;
2136 size_t nbytes;
2137 unsigned char *pdata;
2138 char header[5];
2139 int i;
2140 int sign = _PyLong_Sign(obj);
2141
2142 if (sign == 0) {
2143 header[0] = LONG1;
2144 header[1] = 0; /* It's 0 -- an empty bytestring. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002145 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002146 goto error;
2147 return 0;
2148 }
2149 nbits = _PyLong_NumBits(obj);
2150 if (nbits == (size_t)-1 && PyErr_Occurred())
2151 goto error;
2152 /* How many bytes do we need? There are nbits >> 3 full
2153 * bytes of data, and nbits & 7 leftover bits. If there
2154 * are any leftover bits, then we clearly need another
Min ho Kim96e12d52019-07-22 06:12:33 +10002155 * byte. What's not so obvious is that we *probably*
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002156 * need another byte even if there aren't any leftovers:
2157 * the most-significant bit of the most-significant byte
2158 * acts like a sign bit, and it's usually got a sense
Serhiy Storchaka95949422013-08-27 19:40:23 +03002159 * opposite of the one we need. The exception is ints
2160 * of the form -(2**(8*j-1)) for j > 0. Such an int is
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002161 * its own 256's-complement, so has the right sign bit
2162 * even without the extra byte. That's a pain to check
2163 * for in advance, though, so we always grab an extra
2164 * byte at the start, and cut it back later if possible.
2165 */
2166 nbytes = (nbits >> 3) + 1;
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01002167 if (nbytes > 0x7fffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002168 PyErr_SetString(PyExc_OverflowError,
Serhiy Storchaka95949422013-08-27 19:40:23 +03002169 "int too large to pickle");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002170 goto error;
2171 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00002172 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002173 if (repr == NULL)
2174 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00002175 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002176 i = _PyLong_AsByteArray((PyLongObject *)obj,
2177 pdata, nbytes,
2178 1 /* little endian */ , 1 /* signed */ );
2179 if (i < 0)
2180 goto error;
Serhiy Storchaka95949422013-08-27 19:40:23 +03002181 /* If the int is negative, this may be a byte more than
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002182 * needed. This is so iff the MSB is all redundant sign
2183 * bits.
2184 */
2185 if (sign < 0 &&
Victor Stinner121aab42011-09-29 23:40:53 +02002186 nbytes > 1 &&
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002187 pdata[nbytes - 1] == 0xff &&
2188 (pdata[nbytes - 2] & 0x80) != 0) {
2189 nbytes--;
2190 }
2191
2192 if (nbytes < 256) {
2193 header[0] = LONG1;
2194 header[1] = (unsigned char)nbytes;
2195 size = 2;
2196 }
2197 else {
2198 header[0] = LONG4;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002199 size = (Py_ssize_t) nbytes;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002200 for (i = 1; i < 5; i++) {
2201 header[i] = (unsigned char)(size & 0xff);
2202 size >>= 8;
2203 }
2204 size = 5;
2205 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002206 if (_Pickler_Write(self, header, size) < 0 ||
2207 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002208 goto error;
2209 }
2210 else {
Serhiy Storchaka3daaafb2017-11-16 09:44:43 +02002211 const char long_op = LONG;
Serhiy Storchaka85b0f5b2016-11-20 10:16:47 +02002212 const char *string;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002213
Mark Dickinson8dd05142009-01-20 20:43:58 +00002214 /* proto < 2: write the repr and newline. This is quadratic-time (in
2215 the number of digits), in both directions. We add a trailing 'L'
2216 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002217
2218 repr = PyObject_Repr(obj);
2219 if (repr == NULL)
2220 goto error;
2221
Serhiy Storchaka06515832016-11-20 09:13:07 +02002222 string = PyUnicode_AsUTF8AndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002223 if (string == NULL)
2224 goto error;
2225
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002226 if (_Pickler_Write(self, &long_op, 1) < 0 ||
2227 _Pickler_Write(self, string, size) < 0 ||
2228 _Pickler_Write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002229 goto error;
2230 }
2231
2232 if (0) {
2233 error:
2234 status = -1;
2235 }
2236 Py_XDECREF(repr);
2237
2238 return status;
2239}
2240
2241static int
2242save_float(PicklerObject *self, PyObject *obj)
2243{
2244 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
2245
2246 if (self->bin) {
2247 char pdata[9];
2248 pdata[0] = BINFLOAT;
2249 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
2250 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002251 if (_Pickler_Write(self, pdata, 9) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002252 return -1;
Victor Stinner121aab42011-09-29 23:40:53 +02002253 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002254 else {
Eric Smith0923d1d2009-04-16 20:16:10 +00002255 int result = -1;
2256 char *buf = NULL;
2257 char op = FLOAT;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002258
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002259 if (_Pickler_Write(self, &op, 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00002260 goto done;
2261
Serhiy Storchakac86ca262015-02-15 14:18:32 +02002262 buf = PyOS_double_to_string(x, 'r', 0, Py_DTSF_ADD_DOT_0, NULL);
Eric Smith0923d1d2009-04-16 20:16:10 +00002263 if (!buf) {
2264 PyErr_NoMemory();
2265 goto done;
2266 }
2267
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002268 if (_Pickler_Write(self, buf, strlen(buf)) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00002269 goto done;
2270
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002271 if (_Pickler_Write(self, "\n", 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00002272 goto done;
2273
2274 result = 0;
2275done:
2276 PyMem_Free(buf);
2277 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002278 }
2279
2280 return 0;
2281}
2282
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002283/* Perform direct write of the header and payload of the binary object.
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01002284
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002285 The large contiguous data is written directly into the underlying file
2286 object, bypassing the output_buffer of the Pickler. We intentionally
2287 do not insert a protocol 4 frame opcode to make it possible to optimize
2288 file.read calls in the loader.
2289 */
2290static int
2291_Pickler_write_bytes(PicklerObject *self,
2292 const char *header, Py_ssize_t header_size,
2293 const char *data, Py_ssize_t data_size,
2294 PyObject *payload)
2295{
2296 int bypass_buffer = (data_size >= FRAME_SIZE_TARGET);
2297 int framing = self->framing;
2298
2299 if (bypass_buffer) {
2300 assert(self->output_buffer != NULL);
2301 /* Commit the previous frame. */
2302 if (_Pickler_CommitFrame(self)) {
2303 return -1;
2304 }
2305 /* Disable framing temporarily */
2306 self->framing = 0;
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01002307 }
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01002308
2309 if (_Pickler_Write(self, header, header_size) < 0) {
2310 return -1;
2311 }
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01002312
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002313 if (bypass_buffer && self->write != NULL) {
2314 /* Bypass the in-memory buffer to directly stream large data
2315 into the underlying file object. */
2316 PyObject *result, *mem = NULL;
2317 /* Dump the output buffer to the file. */
2318 if (_Pickler_FlushToFile(self) < 0) {
2319 return -1;
2320 }
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01002321
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002322 /* Stream write the payload into the file without going through the
2323 output buffer. */
2324 if (payload == NULL) {
Serhiy Storchaka5b76bdb2018-01-13 00:28:31 +02002325 /* TODO: It would be better to use a memoryview with a linked
2326 original string if this is possible. */
2327 payload = mem = PyBytes_FromStringAndSize(data, data_size);
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002328 if (payload == NULL) {
2329 return -1;
2330 }
2331 }
Petr Viktorinffd97532020-02-11 17:46:57 +01002332 result = PyObject_CallOneArg(self->write, payload);
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002333 Py_XDECREF(mem);
2334 if (result == NULL) {
2335 return -1;
2336 }
2337 Py_DECREF(result);
2338
2339 /* Reinitialize the buffer for subsequent calls to _Pickler_Write. */
2340 if (_Pickler_ClearBuffer(self) < 0) {
2341 return -1;
2342 }
2343 }
2344 else {
2345 if (_Pickler_Write(self, data, data_size) < 0) {
2346 return -1;
2347 }
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01002348 }
2349
2350 /* Re-enable framing for subsequent calls to _Pickler_Write. */
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002351 self->framing = framing;
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01002352
2353 return 0;
2354}
2355
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002356static int
Antoine Pitrou91f43802019-05-26 17:10:09 +02002357_save_bytes_data(PicklerObject *self, PyObject *obj, const char *data,
2358 Py_ssize_t size)
2359{
2360 assert(self->proto >= 3);
2361
2362 char header[9];
2363 Py_ssize_t len;
2364
2365 if (size < 0)
2366 return -1;
2367
2368 if (size <= 0xff) {
2369 header[0] = SHORT_BINBYTES;
2370 header[1] = (unsigned char)size;
2371 len = 2;
2372 }
2373 else if ((size_t)size <= 0xffffffffUL) {
2374 header[0] = BINBYTES;
2375 header[1] = (unsigned char)(size & 0xff);
2376 header[2] = (unsigned char)((size >> 8) & 0xff);
2377 header[3] = (unsigned char)((size >> 16) & 0xff);
2378 header[4] = (unsigned char)((size >> 24) & 0xff);
2379 len = 5;
2380 }
2381 else if (self->proto >= 4) {
2382 header[0] = BINBYTES8;
2383 _write_size64(header + 1, size);
2384 len = 9;
2385 }
2386 else {
2387 PyErr_SetString(PyExc_OverflowError,
2388 "serializing a bytes object larger than 4 GiB "
2389 "requires pickle protocol 4 or higher");
2390 return -1;
2391 }
2392
2393 if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
2394 return -1;
2395 }
2396
2397 if (memo_put(self, obj) < 0) {
2398 return -1;
2399 }
2400
2401 return 0;
2402}
2403
2404static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002405save_bytes(PicklerObject *self, PyObject *obj)
2406{
2407 if (self->proto < 3) {
2408 /* Older pickle protocols do not have an opcode for pickling bytes
2409 objects. Therefore, we need to fake the copy protocol (i.e.,
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05002410 the __reduce__ method) to permit bytes object unpickling.
2411
2412 Here we use a hack to be compatible with Python 2. Since in Python
2413 2 'bytes' is just an alias for 'str' (which has different
2414 parameters than the actual bytes object), we use codecs.encode
2415 to create the appropriate 'str' object when unpickled using
2416 Python 2 *and* the appropriate 'bytes' object when unpickled
2417 using Python 3. Again this is a hack and we don't need to do this
2418 with newer protocols. */
Pierre Glaser289f1f82019-05-08 23:08:25 +02002419 PyObject *reduce_value;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002420 int status;
2421
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05002422 if (PyBytes_GET_SIZE(obj) == 0) {
2423 reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
2424 }
2425 else {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08002426 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05002427 PyObject *unicode_str =
2428 PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
2429 PyBytes_GET_SIZE(obj),
2430 "strict");
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002431 _Py_IDENTIFIER(latin1);
2432
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05002433 if (unicode_str == NULL)
2434 return -1;
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05002435 reduce_value = Py_BuildValue("(O(OO))",
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08002436 st->codecs_encode, unicode_str,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002437 _PyUnicode_FromId(&PyId_latin1));
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05002438 Py_DECREF(unicode_str);
2439 }
2440
2441 if (reduce_value == NULL)
2442 return -1;
2443
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002444 /* save_reduce() will memoize the object automatically. */
2445 status = save_reduce(self, reduce_value, obj);
2446 Py_DECREF(reduce_value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002447 return status;
2448 }
2449 else {
Antoine Pitrou91f43802019-05-26 17:10:09 +02002450 return _save_bytes_data(self, obj, PyBytes_AS_STRING(obj),
2451 PyBytes_GET_SIZE(obj));
2452 }
2453}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002454
Antoine Pitrou91f43802019-05-26 17:10:09 +02002455static int
2456_save_bytearray_data(PicklerObject *self, PyObject *obj, const char *data,
2457 Py_ssize_t size)
2458{
2459 assert(self->proto >= 5);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002460
Antoine Pitrou91f43802019-05-26 17:10:09 +02002461 char header[9];
2462 Py_ssize_t len;
2463
2464 if (size < 0)
2465 return -1;
2466
2467 header[0] = BYTEARRAY8;
2468 _write_size64(header + 1, size);
2469 len = 9;
2470
2471 if (_Pickler_write_bytes(self, header, len, data, size, obj) < 0) {
2472 return -1;
2473 }
2474
2475 if (memo_put(self, obj) < 0) {
2476 return -1;
2477 }
2478
2479 return 0;
2480}
2481
2482static int
2483save_bytearray(PicklerObject *self, PyObject *obj)
2484{
2485 if (self->proto < 5) {
2486 /* Older pickle protocols do not have an opcode for pickling
2487 * bytearrays. */
2488 PyObject *reduce_value = NULL;
2489 int status;
2490
2491 if (PyByteArray_GET_SIZE(obj) == 0) {
2492 reduce_value = Py_BuildValue("(O())",
2493 (PyObject *) &PyByteArray_Type);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002494 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002495 else {
Antoine Pitrou91f43802019-05-26 17:10:09 +02002496 PyObject *bytes_obj = PyBytes_FromObject(obj);
2497 if (bytes_obj != NULL) {
2498 reduce_value = Py_BuildValue("(O(O))",
2499 (PyObject *) &PyByteArray_Type,
2500 bytes_obj);
2501 Py_DECREF(bytes_obj);
2502 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002503 }
Antoine Pitrou91f43802019-05-26 17:10:09 +02002504 if (reduce_value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002505 return -1;
2506
Antoine Pitrou91f43802019-05-26 17:10:09 +02002507 /* save_reduce() will memoize the object automatically. */
2508 status = save_reduce(self, reduce_value, obj);
2509 Py_DECREF(reduce_value);
2510 return status;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002511 }
Antoine Pitrou91f43802019-05-26 17:10:09 +02002512 else {
2513 return _save_bytearray_data(self, obj, PyByteArray_AS_STRING(obj),
2514 PyByteArray_GET_SIZE(obj));
2515 }
2516}
2517
2518static int
2519save_picklebuffer(PicklerObject *self, PyObject *obj)
2520{
2521 if (self->proto < 5) {
2522 PickleState *st = _Pickle_GetGlobalState();
2523 PyErr_SetString(st->PicklingError,
2524 "PickleBuffer can only pickled with protocol >= 5");
2525 return -1;
2526 }
2527 const Py_buffer* view = PyPickleBuffer_GetBuffer(obj);
2528 if (view == NULL) {
2529 return -1;
2530 }
2531 if (view->suboffsets != NULL || !PyBuffer_IsContiguous(view, 'A')) {
2532 PickleState *st = _Pickle_GetGlobalState();
2533 PyErr_SetString(st->PicklingError,
2534 "PickleBuffer can not be pickled when "
2535 "pointing to a non-contiguous buffer");
2536 return -1;
2537 }
2538 int in_band = 1;
2539 if (self->buffer_callback != NULL) {
Petr Viktorinffd97532020-02-11 17:46:57 +01002540 PyObject *ret = PyObject_CallOneArg(self->buffer_callback, obj);
Antoine Pitrou91f43802019-05-26 17:10:09 +02002541 if (ret == NULL) {
2542 return -1;
2543 }
2544 in_band = PyObject_IsTrue(ret);
2545 Py_DECREF(ret);
2546 if (in_band == -1) {
2547 return -1;
2548 }
2549 }
2550 if (in_band) {
2551 /* Write data in-band */
2552 if (view->readonly) {
2553 return _save_bytes_data(self, obj, (const char*) view->buf,
2554 view->len);
2555 }
2556 else {
2557 return _save_bytearray_data(self, obj, (const char*) view->buf,
2558 view->len);
2559 }
2560 }
2561 else {
2562 /* Write data out-of-band */
2563 const char next_buffer_op = NEXT_BUFFER;
2564 if (_Pickler_Write(self, &next_buffer_op, 1) < 0) {
2565 return -1;
2566 }
2567 if (view->readonly) {
2568 const char readonly_buffer_op = READONLY_BUFFER;
2569 if (_Pickler_Write(self, &readonly_buffer_op, 1) < 0) {
2570 return -1;
2571 }
2572 }
2573 }
2574 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002575}
2576
2577/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
2578 backslash and newline characters to \uXXXX escapes. */
2579static PyObject *
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002580raw_unicode_escape(PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002581{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002582 char *p;
Victor Stinner049e5092014-08-17 22:20:00 +02002583 Py_ssize_t i, size;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03002584 const void *data;
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002585 unsigned int kind;
Victor Stinner358af132015-10-12 22:36:57 +02002586 _PyBytesWriter writer;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002587
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002588 if (PyUnicode_READY(obj))
2589 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002590
Victor Stinner358af132015-10-12 22:36:57 +02002591 _PyBytesWriter_Init(&writer);
2592
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002593 size = PyUnicode_GET_LENGTH(obj);
2594 data = PyUnicode_DATA(obj);
2595 kind = PyUnicode_KIND(obj);
Victor Stinner121aab42011-09-29 23:40:53 +02002596
Victor Stinner358af132015-10-12 22:36:57 +02002597 p = _PyBytesWriter_Alloc(&writer, size);
2598 if (p == NULL)
2599 goto error;
2600 writer.overallocate = 1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002601
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002602 for (i=0; i < size; i++) {
2603 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002604 /* Map 32-bit characters to '\Uxxxxxxxx' */
2605 if (ch >= 0x10000) {
Raymond Hettinger15f44ab2016-08-30 10:47:49 -07002606 /* -1: subtract 1 preallocated byte */
Victor Stinner358af132015-10-12 22:36:57 +02002607 p = _PyBytesWriter_Prepare(&writer, p, 10-1);
2608 if (p == NULL)
2609 goto error;
2610
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002611 *p++ = '\\';
2612 *p++ = 'U';
Victor Stinnerf5cff562011-10-14 02:13:11 +02002613 *p++ = Py_hexdigits[(ch >> 28) & 0xf];
2614 *p++ = Py_hexdigits[(ch >> 24) & 0xf];
2615 *p++ = Py_hexdigits[(ch >> 20) & 0xf];
2616 *p++ = Py_hexdigits[(ch >> 16) & 0xf];
2617 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2618 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2619 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2620 *p++ = Py_hexdigits[ch & 15];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002621 }
Victor Stinner358af132015-10-12 22:36:57 +02002622 /* Map 16-bit characters, '\\' and '\n' to '\uxxxx' */
Serhiy Storchaka38ab7d42019-05-31 11:29:39 +03002623 else if (ch >= 256 ||
2624 ch == '\\' || ch == 0 || ch == '\n' || ch == '\r' ||
2625 ch == 0x1a)
2626 {
Raymond Hettinger15f44ab2016-08-30 10:47:49 -07002627 /* -1: subtract 1 preallocated byte */
Victor Stinner358af132015-10-12 22:36:57 +02002628 p = _PyBytesWriter_Prepare(&writer, p, 6-1);
2629 if (p == NULL)
2630 goto error;
2631
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002632 *p++ = '\\';
2633 *p++ = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +02002634 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2635 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2636 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2637 *p++ = Py_hexdigits[ch & 15];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002638 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00002639 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002640 else
2641 *p++ = (char) ch;
2642 }
Victor Stinner358af132015-10-12 22:36:57 +02002643
2644 return _PyBytesWriter_Finish(&writer, p);
2645
2646error:
2647 _PyBytesWriter_Dealloc(&writer);
2648 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002649}
2650
2651static int
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002652write_unicode_binary(PicklerObject *self, PyObject *obj)
Antoine Pitrou299978d2013-04-07 17:38:11 +02002653{
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002654 char header[9];
2655 Py_ssize_t len;
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002656 PyObject *encoded = NULL;
2657 Py_ssize_t size;
2658 const char *data;
2659
2660 if (PyUnicode_READY(obj))
2661 return -1;
2662
2663 data = PyUnicode_AsUTF8AndSize(obj, &size);
2664 if (data == NULL) {
2665 /* Issue #8383: for strings with lone surrogates, fallback on the
2666 "surrogatepass" error handler. */
2667 PyErr_Clear();
2668 encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
2669 if (encoded == NULL)
2670 return -1;
2671
2672 data = PyBytes_AS_STRING(encoded);
2673 size = PyBytes_GET_SIZE(encoded);
2674 }
Antoine Pitrou299978d2013-04-07 17:38:11 +02002675
Victor Stinnerf13c46c2014-08-17 21:05:55 +02002676 assert(size >= 0);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002677 if (size <= 0xff && self->proto >= 4) {
2678 header[0] = SHORT_BINUNICODE;
2679 header[1] = (unsigned char)(size & 0xff);
2680 len = 2;
2681 }
Victor Stinnerf13c46c2014-08-17 21:05:55 +02002682 else if ((size_t)size <= 0xffffffffUL) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002683 header[0] = BINUNICODE;
2684 header[1] = (unsigned char)(size & 0xff);
2685 header[2] = (unsigned char)((size >> 8) & 0xff);
2686 header[3] = (unsigned char)((size >> 16) & 0xff);
2687 header[4] = (unsigned char)((size >> 24) & 0xff);
2688 len = 5;
2689 }
2690 else if (self->proto >= 4) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002691 header[0] = BINUNICODE8;
Alexandre Vassalotti1048fb52013-11-25 11:35:46 -08002692 _write_size64(header + 1, size);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002693 len = 9;
2694 }
2695 else {
Antoine Pitrou299978d2013-04-07 17:38:11 +02002696 PyErr_SetString(PyExc_OverflowError,
Antoine Pitrou91f43802019-05-26 17:10:09 +02002697 "serializing a string larger than 4 GiB "
2698 "requires pickle protocol 4 or higher");
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002699 Py_XDECREF(encoded);
Antoine Pitrou299978d2013-04-07 17:38:11 +02002700 return -1;
2701 }
Antoine Pitrou299978d2013-04-07 17:38:11 +02002702
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002703 if (_Pickler_write_bytes(self, header, len, data, size, encoded) < 0) {
2704 Py_XDECREF(encoded);
2705 return -1;
Olivier Grisel3cd7c6e2018-01-06 16:18:54 +01002706 }
Serhiy Storchaka0a2da502018-01-11 13:03:20 +02002707 Py_XDECREF(encoded);
Antoine Pitrou299978d2013-04-07 17:38:11 +02002708 return 0;
2709}
2710
2711static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002712save_unicode(PicklerObject *self, PyObject *obj)
2713{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002714 if (self->bin) {
Antoine Pitrou299978d2013-04-07 17:38:11 +02002715 if (write_unicode_binary(self, obj) < 0)
2716 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002717 }
2718 else {
Antoine Pitrou299978d2013-04-07 17:38:11 +02002719 PyObject *encoded;
2720 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002721 const char unicode_op = UNICODE;
2722
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002723 encoded = raw_unicode_escape(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002724 if (encoded == NULL)
Antoine Pitrou299978d2013-04-07 17:38:11 +02002725 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002726
Antoine Pitrou299978d2013-04-07 17:38:11 +02002727 if (_Pickler_Write(self, &unicode_op, 1) < 0) {
2728 Py_DECREF(encoded);
2729 return -1;
2730 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002731
2732 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrou299978d2013-04-07 17:38:11 +02002733 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) {
2734 Py_DECREF(encoded);
2735 return -1;
2736 }
2737 Py_DECREF(encoded);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002738
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002739 if (_Pickler_Write(self, "\n", 1) < 0)
Antoine Pitrou299978d2013-04-07 17:38:11 +02002740 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002741 }
2742 if (memo_put(self, obj) < 0)
Antoine Pitrou299978d2013-04-07 17:38:11 +02002743 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002744
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002745 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002746}
2747
2748/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
2749static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002750store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002751{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002752 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002753
2754 assert(PyTuple_Size(t) == len);
2755
2756 for (i = 0; i < len; i++) {
2757 PyObject *element = PyTuple_GET_ITEM(t, i);
2758
2759 if (element == NULL)
2760 return -1;
2761 if (save(self, element, 0) < 0)
2762 return -1;
2763 }
2764
2765 return 0;
2766}
2767
2768/* Tuples are ubiquitous in the pickle protocols, so many techniques are
2769 * used across protocols to minimize the space needed to pickle them.
2770 * Tuples are also the only builtin immutable type that can be recursive
2771 * (a tuple can be reached from itself), and that requires some subtle
2772 * magic so that it works in all cases. IOW, this is a long routine.
2773 */
2774static int
2775save_tuple(PicklerObject *self, PyObject *obj)
2776{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002777 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002778
2779 const char mark_op = MARK;
2780 const char tuple_op = TUPLE;
2781 const char pop_op = POP;
2782 const char pop_mark_op = POP_MARK;
2783 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
2784
2785 if ((len = PyTuple_Size(obj)) < 0)
2786 return -1;
2787
2788 if (len == 0) {
2789 char pdata[2];
2790
2791 if (self->proto) {
2792 pdata[0] = EMPTY_TUPLE;
2793 len = 1;
2794 }
2795 else {
2796 pdata[0] = MARK;
2797 pdata[1] = TUPLE;
2798 len = 2;
2799 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002800 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002801 return -1;
2802 return 0;
2803 }
2804
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002805 /* The tuple isn't in the memo now. If it shows up there after
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002806 * saving the tuple elements, the tuple must be recursive, in
2807 * which case we'll pop everything we put on the stack, and fetch
2808 * its value from the memo.
2809 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002810 if (len <= 3 && self->proto >= 2) {
2811 /* Use TUPLE{1,2,3} opcodes. */
2812 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002813 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002814
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002815 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002816 /* pop the len elements */
2817 for (i = 0; i < len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002818 if (_Pickler_Write(self, &pop_op, 1) < 0)
2819 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002820 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002821 if (memo_get(self, obj) < 0)
2822 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002823
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002824 return 0;
2825 }
2826 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002827 if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2828 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002829 }
2830 goto memoize;
2831 }
2832
2833 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2834 * Generate MARK e1 e2 ... TUPLE
2835 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002836 if (_Pickler_Write(self, &mark_op, 1) < 0)
2837 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002838
2839 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002840 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002841
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002842 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002843 /* pop the stack stuff we pushed */
2844 if (self->bin) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002845 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2846 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002847 }
2848 else {
2849 /* Note that we pop one more than len, to remove
2850 * the MARK too.
2851 */
2852 for (i = 0; i <= len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002853 if (_Pickler_Write(self, &pop_op, 1) < 0)
2854 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002855 }
2856 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002857 if (memo_get(self, obj) < 0)
2858 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002859
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002860 return 0;
2861 }
2862 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002863 if (_Pickler_Write(self, &tuple_op, 1) < 0)
2864 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002865 }
2866
2867 memoize:
2868 if (memo_put(self, obj) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002869 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002870
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002871 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002872}
2873
2874/* iter is an iterator giving items, and we batch up chunks of
2875 * MARK item item ... item APPENDS
2876 * opcode sequences. Calling code should have arranged to first create an
2877 * empty list, or list-like object, for the APPENDS to operate on.
2878 * Returns 0 on success, <0 on error.
2879 */
2880static int
2881batch_list(PicklerObject *self, PyObject *iter)
2882{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002883 PyObject *obj = NULL;
2884 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002885 int i, n;
2886
2887 const char mark_op = MARK;
2888 const char append_op = APPEND;
2889 const char appends_op = APPENDS;
2890
2891 assert(iter != NULL);
2892
2893 /* XXX: I think this function could be made faster by avoiding the
2894 iterator interface and fetching objects directly from list using
2895 PyList_GET_ITEM.
2896 */
2897
2898 if (self->proto == 0) {
2899 /* APPENDS isn't available; do one at a time. */
2900 for (;;) {
2901 obj = PyIter_Next(iter);
2902 if (obj == NULL) {
2903 if (PyErr_Occurred())
2904 return -1;
2905 break;
2906 }
2907 i = save(self, obj, 0);
2908 Py_DECREF(obj);
2909 if (i < 0)
2910 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002911 if (_Pickler_Write(self, &append_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002912 return -1;
2913 }
2914 return 0;
2915 }
2916
2917 /* proto > 0: write in batches of BATCHSIZE. */
2918 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002919 /* Get first item */
2920 firstitem = PyIter_Next(iter);
2921 if (firstitem == NULL) {
2922 if (PyErr_Occurred())
2923 goto error;
2924
2925 /* nothing more to add */
2926 break;
2927 }
2928
2929 /* Try to get a second item */
2930 obj = PyIter_Next(iter);
2931 if (obj == NULL) {
2932 if (PyErr_Occurred())
2933 goto error;
2934
2935 /* Only one item to write */
2936 if (save(self, firstitem, 0) < 0)
2937 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002938 if (_Pickler_Write(self, &append_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002939 goto error;
2940 Py_CLEAR(firstitem);
2941 break;
2942 }
2943
2944 /* More than one item to write */
2945
2946 /* Pump out MARK, items, APPENDS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002947 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002948 goto error;
2949
2950 if (save(self, firstitem, 0) < 0)
2951 goto error;
2952 Py_CLEAR(firstitem);
2953 n = 1;
2954
2955 /* Fetch and save up to BATCHSIZE items */
2956 while (obj) {
2957 if (save(self, obj, 0) < 0)
2958 goto error;
2959 Py_CLEAR(obj);
2960 n += 1;
2961
2962 if (n == BATCHSIZE)
2963 break;
2964
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002965 obj = PyIter_Next(iter);
2966 if (obj == NULL) {
2967 if (PyErr_Occurred())
2968 goto error;
2969 break;
2970 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002971 }
2972
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002973 if (_Pickler_Write(self, &appends_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002974 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002975
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002976 } while (n == BATCHSIZE);
2977 return 0;
2978
2979 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002980 Py_XDECREF(firstitem);
2981 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002982 return -1;
2983}
2984
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002985/* This is a variant of batch_list() above, specialized for lists (with no
2986 * support for list subclasses). Like batch_list(), we batch up chunks of
2987 * MARK item item ... item APPENDS
2988 * opcode sequences. Calling code should have arranged to first create an
2989 * empty list, or list-like object, for the APPENDS to operate on.
2990 * Returns 0 on success, -1 on error.
2991 *
2992 * This version is considerably faster than batch_list(), if less general.
2993 *
2994 * Note that this only works for protocols > 0.
2995 */
2996static int
2997batch_list_exact(PicklerObject *self, PyObject *obj)
2998{
2999 PyObject *item = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003000 Py_ssize_t this_batch, total;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003001
3002 const char append_op = APPEND;
3003 const char appends_op = APPENDS;
3004 const char mark_op = MARK;
3005
3006 assert(obj != NULL);
3007 assert(self->proto > 0);
3008 assert(PyList_CheckExact(obj));
3009
3010 if (PyList_GET_SIZE(obj) == 1) {
3011 item = PyList_GET_ITEM(obj, 0);
3012 if (save(self, item, 0) < 0)
3013 return -1;
3014 if (_Pickler_Write(self, &append_op, 1) < 0)
3015 return -1;
3016 return 0;
3017 }
3018
3019 /* Write in batches of BATCHSIZE. */
3020 total = 0;
3021 do {
3022 this_batch = 0;
3023 if (_Pickler_Write(self, &mark_op, 1) < 0)
3024 return -1;
3025 while (total < PyList_GET_SIZE(obj)) {
3026 item = PyList_GET_ITEM(obj, total);
3027 if (save(self, item, 0) < 0)
3028 return -1;
3029 total++;
3030 if (++this_batch == BATCHSIZE)
3031 break;
3032 }
3033 if (_Pickler_Write(self, &appends_op, 1) < 0)
3034 return -1;
3035
3036 } while (total < PyList_GET_SIZE(obj));
3037
3038 return 0;
3039}
3040
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003041static int
3042save_list(PicklerObject *self, PyObject *obj)
3043{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003044 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003045 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003046 int status = 0;
3047
3048 if (self->fast && !fast_save_enter(self, obj))
3049 goto error;
3050
3051 /* Create an empty list. */
3052 if (self->bin) {
3053 header[0] = EMPTY_LIST;
3054 len = 1;
3055 }
3056 else {
3057 header[0] = MARK;
3058 header[1] = LIST;
3059 len = 2;
3060 }
3061
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003062 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003063 goto error;
3064
3065 /* Get list length, and bow out early if empty. */
3066 if ((len = PyList_Size(obj)) < 0)
3067 goto error;
3068
3069 if (memo_put(self, obj) < 0)
3070 goto error;
3071
3072 if (len != 0) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003073 /* Materialize the list elements. */
3074 if (PyList_CheckExact(obj) && self->proto > 0) {
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003075 if (Py_EnterRecursiveCall(" while pickling an object"))
3076 goto error;
3077 status = batch_list_exact(self, obj);
3078 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003079 } else {
3080 PyObject *iter = PyObject_GetIter(obj);
3081 if (iter == NULL)
3082 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003083
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003084 if (Py_EnterRecursiveCall(" while pickling an object")) {
3085 Py_DECREF(iter);
3086 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003087 }
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003088 status = batch_list(self, iter);
3089 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003090 Py_DECREF(iter);
3091 }
3092 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003093 if (0) {
3094 error:
3095 status = -1;
3096 }
3097
3098 if (self->fast && !fast_save_leave(self, obj))
3099 status = -1;
3100
3101 return status;
3102}
3103
3104/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
3105 * MARK key value ... key value SETITEMS
3106 * opcode sequences. Calling code should have arranged to first create an
3107 * empty dict, or dict-like object, for the SETITEMS to operate on.
3108 * Returns 0 on success, <0 on error.
3109 *
3110 * This is very much like batch_list(). The difference between saving
3111 * elements directly, and picking apart two-tuples, is so long-winded at
3112 * the C level, though, that attempts to combine these routines were too
3113 * ugly to bear.
3114 */
3115static int
3116batch_dict(PicklerObject *self, PyObject *iter)
3117{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00003118 PyObject *obj = NULL;
3119 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003120 int i, n;
3121
3122 const char mark_op = MARK;
3123 const char setitem_op = SETITEM;
3124 const char setitems_op = SETITEMS;
3125
3126 assert(iter != NULL);
3127
3128 if (self->proto == 0) {
3129 /* SETITEMS isn't available; do one at a time. */
3130 for (;;) {
3131 obj = PyIter_Next(iter);
3132 if (obj == NULL) {
3133 if (PyErr_Occurred())
3134 return -1;
3135 break;
3136 }
3137 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
3138 PyErr_SetString(PyExc_TypeError, "dict items "
3139 "iterator must return 2-tuples");
3140 return -1;
3141 }
3142 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
3143 if (i >= 0)
3144 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
3145 Py_DECREF(obj);
3146 if (i < 0)
3147 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003148 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003149 return -1;
3150 }
3151 return 0;
3152 }
3153
3154 /* proto > 0: write in batches of BATCHSIZE. */
3155 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00003156 /* Get first item */
3157 firstitem = PyIter_Next(iter);
3158 if (firstitem == NULL) {
3159 if (PyErr_Occurred())
3160 goto error;
3161
3162 /* nothing more to add */
3163 break;
3164 }
3165 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
3166 PyErr_SetString(PyExc_TypeError, "dict items "
3167 "iterator must return 2-tuples");
3168 goto error;
3169 }
3170
3171 /* Try to get a second item */
3172 obj = PyIter_Next(iter);
3173 if (obj == NULL) {
3174 if (PyErr_Occurred())
3175 goto error;
3176
3177 /* Only one item to write */
3178 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
3179 goto error;
3180 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
3181 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003182 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00003183 goto error;
3184 Py_CLEAR(firstitem);
3185 break;
3186 }
3187
3188 /* More than one item to write */
3189
3190 /* Pump out MARK, items, SETITEMS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003191 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00003192 goto error;
3193
3194 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
3195 goto error;
3196 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
3197 goto error;
3198 Py_CLEAR(firstitem);
3199 n = 1;
3200
3201 /* Fetch and save up to BATCHSIZE items */
3202 while (obj) {
3203 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
3204 PyErr_SetString(PyExc_TypeError, "dict items "
3205 "iterator must return 2-tuples");
3206 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003207 }
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00003208 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
3209 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
3210 goto error;
3211 Py_CLEAR(obj);
3212 n += 1;
3213
3214 if (n == BATCHSIZE)
3215 break;
3216
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003217 obj = PyIter_Next(iter);
3218 if (obj == NULL) {
3219 if (PyErr_Occurred())
3220 goto error;
3221 break;
3222 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003223 }
3224
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003225 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00003226 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003227
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003228 } while (n == BATCHSIZE);
3229 return 0;
3230
3231 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00003232 Py_XDECREF(firstitem);
3233 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003234 return -1;
3235}
3236
Collin Winter5c9b02d2009-05-25 05:43:30 +00003237/* This is a variant of batch_dict() above that specializes for dicts, with no
3238 * support for dict subclasses. Like batch_dict(), we batch up chunks of
3239 * MARK key value ... key value SETITEMS
3240 * opcode sequences. Calling code should have arranged to first create an
3241 * empty dict, or dict-like object, for the SETITEMS to operate on.
3242 * Returns 0 on success, -1 on error.
3243 *
3244 * Note that this currently doesn't work for protocol 0.
3245 */
3246static int
3247batch_dict_exact(PicklerObject *self, PyObject *obj)
3248{
3249 PyObject *key = NULL, *value = NULL;
3250 int i;
3251 Py_ssize_t dict_size, ppos = 0;
3252
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00003253 const char mark_op = MARK;
3254 const char setitem_op = SETITEM;
3255 const char setitems_op = SETITEMS;
Collin Winter5c9b02d2009-05-25 05:43:30 +00003256
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +02003257 assert(obj != NULL && PyDict_CheckExact(obj));
Collin Winter5c9b02d2009-05-25 05:43:30 +00003258 assert(self->proto > 0);
3259
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +02003260 dict_size = PyDict_GET_SIZE(obj);
Collin Winter5c9b02d2009-05-25 05:43:30 +00003261
3262 /* Special-case len(d) == 1 to save space. */
3263 if (dict_size == 1) {
3264 PyDict_Next(obj, &ppos, &key, &value);
3265 if (save(self, key, 0) < 0)
3266 return -1;
3267 if (save(self, value, 0) < 0)
3268 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003269 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00003270 return -1;
3271 return 0;
3272 }
3273
3274 /* Write in batches of BATCHSIZE. */
3275 do {
3276 i = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003277 if (_Pickler_Write(self, &mark_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00003278 return -1;
3279 while (PyDict_Next(obj, &ppos, &key, &value)) {
3280 if (save(self, key, 0) < 0)
3281 return -1;
3282 if (save(self, value, 0) < 0)
3283 return -1;
3284 if (++i == BATCHSIZE)
3285 break;
3286 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003287 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00003288 return -1;
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +02003289 if (PyDict_GET_SIZE(obj) != dict_size) {
Collin Winter5c9b02d2009-05-25 05:43:30 +00003290 PyErr_Format(
3291 PyExc_RuntimeError,
3292 "dictionary changed size during iteration");
3293 return -1;
3294 }
3295
3296 } while (i == BATCHSIZE);
3297 return 0;
3298}
3299
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003300static int
3301save_dict(PicklerObject *self, PyObject *obj)
3302{
3303 PyObject *items, *iter;
3304 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003305 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003306 int status = 0;
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +02003307 assert(PyDict_Check(obj));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003308
3309 if (self->fast && !fast_save_enter(self, obj))
3310 goto error;
3311
3312 /* Create an empty dict. */
3313 if (self->bin) {
3314 header[0] = EMPTY_DICT;
3315 len = 1;
3316 }
3317 else {
3318 header[0] = MARK;
3319 header[1] = DICT;
3320 len = 2;
3321 }
3322
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003323 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003324 goto error;
3325
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003326 if (memo_put(self, obj) < 0)
3327 goto error;
3328
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +02003329 if (PyDict_GET_SIZE(obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003330 /* Save the dict items. */
Collin Winter5c9b02d2009-05-25 05:43:30 +00003331 if (PyDict_CheckExact(obj) && self->proto > 0) {
3332 /* We can take certain shortcuts if we know this is a dict and
3333 not a dict subclass. */
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003334 if (Py_EnterRecursiveCall(" while pickling an object"))
3335 goto error;
3336 status = batch_dict_exact(self, obj);
3337 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00003338 } else {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02003339 _Py_IDENTIFIER(items);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02003340
Jeroen Demeyer762f93f2019-07-08 10:19:25 +02003341 items = _PyObject_CallMethodIdNoArgs(obj, &PyId_items);
Collin Winter5c9b02d2009-05-25 05:43:30 +00003342 if (items == NULL)
3343 goto error;
3344 iter = PyObject_GetIter(items);
3345 Py_DECREF(items);
3346 if (iter == NULL)
3347 goto error;
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003348 if (Py_EnterRecursiveCall(" while pickling an object")) {
3349 Py_DECREF(iter);
3350 goto error;
3351 }
Collin Winter5c9b02d2009-05-25 05:43:30 +00003352 status = batch_dict(self, iter);
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003353 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00003354 Py_DECREF(iter);
3355 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003356 }
3357
3358 if (0) {
3359 error:
3360 status = -1;
3361 }
3362
3363 if (self->fast && !fast_save_leave(self, obj))
3364 status = -1;
3365
3366 return status;
3367}
3368
3369static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003370save_set(PicklerObject *self, PyObject *obj)
3371{
3372 PyObject *item;
3373 int i;
3374 Py_ssize_t set_size, ppos = 0;
3375 Py_hash_t hash;
3376
3377 const char empty_set_op = EMPTY_SET;
3378 const char mark_op = MARK;
3379 const char additems_op = ADDITEMS;
3380
3381 if (self->proto < 4) {
3382 PyObject *items;
3383 PyObject *reduce_value;
3384 int status;
3385
3386 items = PySequence_List(obj);
3387 if (items == NULL) {
3388 return -1;
3389 }
3390 reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PySet_Type, items);
3391 Py_DECREF(items);
3392 if (reduce_value == NULL) {
3393 return -1;
3394 }
3395 /* save_reduce() will memoize the object automatically. */
3396 status = save_reduce(self, reduce_value, obj);
3397 Py_DECREF(reduce_value);
3398 return status;
3399 }
3400
3401 if (_Pickler_Write(self, &empty_set_op, 1) < 0)
3402 return -1;
3403
3404 if (memo_put(self, obj) < 0)
3405 return -1;
3406
3407 set_size = PySet_GET_SIZE(obj);
3408 if (set_size == 0)
3409 return 0; /* nothing to do */
3410
3411 /* Write in batches of BATCHSIZE. */
3412 do {
3413 i = 0;
3414 if (_Pickler_Write(self, &mark_op, 1) < 0)
3415 return -1;
3416 while (_PySet_NextEntry(obj, &ppos, &item, &hash)) {
3417 if (save(self, item, 0) < 0)
3418 return -1;
3419 if (++i == BATCHSIZE)
3420 break;
3421 }
3422 if (_Pickler_Write(self, &additems_op, 1) < 0)
3423 return -1;
3424 if (PySet_GET_SIZE(obj) != set_size) {
3425 PyErr_Format(
3426 PyExc_RuntimeError,
3427 "set changed size during iteration");
3428 return -1;
3429 }
3430 } while (i == BATCHSIZE);
3431
3432 return 0;
3433}
3434
3435static int
3436save_frozenset(PicklerObject *self, PyObject *obj)
3437{
3438 PyObject *iter;
3439
3440 const char mark_op = MARK;
3441 const char frozenset_op = FROZENSET;
3442
3443 if (self->fast && !fast_save_enter(self, obj))
3444 return -1;
3445
3446 if (self->proto < 4) {
3447 PyObject *items;
3448 PyObject *reduce_value;
3449 int status;
3450
3451 items = PySequence_List(obj);
3452 if (items == NULL) {
3453 return -1;
3454 }
3455 reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PyFrozenSet_Type,
3456 items);
3457 Py_DECREF(items);
3458 if (reduce_value == NULL) {
3459 return -1;
3460 }
3461 /* save_reduce() will memoize the object automatically. */
3462 status = save_reduce(self, reduce_value, obj);
3463 Py_DECREF(reduce_value);
3464 return status;
3465 }
3466
3467 if (_Pickler_Write(self, &mark_op, 1) < 0)
3468 return -1;
3469
3470 iter = PyObject_GetIter(obj);
Christian Heimesb3d3ee42013-11-23 21:01:40 +01003471 if (iter == NULL) {
Christian Heimes74d8d632013-11-23 21:05:31 +01003472 return -1;
Christian Heimesb3d3ee42013-11-23 21:01:40 +01003473 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003474 for (;;) {
3475 PyObject *item;
3476
3477 item = PyIter_Next(iter);
3478 if (item == NULL) {
3479 if (PyErr_Occurred()) {
3480 Py_DECREF(iter);
3481 return -1;
3482 }
3483 break;
3484 }
3485 if (save(self, item, 0) < 0) {
3486 Py_DECREF(item);
3487 Py_DECREF(iter);
3488 return -1;
3489 }
3490 Py_DECREF(item);
3491 }
3492 Py_DECREF(iter);
3493
3494 /* If the object is already in the memo, this means it is
3495 recursive. In this case, throw away everything we put on the
3496 stack, and fetch the object back from the memo. */
3497 if (PyMemoTable_Get(self->memo, obj)) {
3498 const char pop_mark_op = POP_MARK;
3499
3500 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
3501 return -1;
3502 if (memo_get(self, obj) < 0)
3503 return -1;
3504 return 0;
3505 }
3506
3507 if (_Pickler_Write(self, &frozenset_op, 1) < 0)
3508 return -1;
3509 if (memo_put(self, obj) < 0)
3510 return -1;
3511
3512 return 0;
3513}
3514
3515static int
3516fix_imports(PyObject **module_name, PyObject **global_name)
3517{
3518 PyObject *key;
3519 PyObject *item;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003520 PickleState *st = _Pickle_GetGlobalState();
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003521
3522 key = PyTuple_Pack(2, *module_name, *global_name);
3523 if (key == NULL)
3524 return -1;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003525 item = PyDict_GetItemWithError(st->name_mapping_3to2, key);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003526 Py_DECREF(key);
3527 if (item) {
3528 PyObject *fixed_module_name;
3529 PyObject *fixed_global_name;
3530
3531 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
3532 PyErr_Format(PyExc_RuntimeError,
3533 "_compat_pickle.REVERSE_NAME_MAPPING values "
3534 "should be 2-tuples, not %.200s",
3535 Py_TYPE(item)->tp_name);
3536 return -1;
3537 }
3538 fixed_module_name = PyTuple_GET_ITEM(item, 0);
3539 fixed_global_name = PyTuple_GET_ITEM(item, 1);
3540 if (!PyUnicode_Check(fixed_module_name) ||
3541 !PyUnicode_Check(fixed_global_name)) {
3542 PyErr_Format(PyExc_RuntimeError,
3543 "_compat_pickle.REVERSE_NAME_MAPPING values "
3544 "should be pairs of str, not (%.200s, %.200s)",
3545 Py_TYPE(fixed_module_name)->tp_name,
3546 Py_TYPE(fixed_global_name)->tp_name);
3547 return -1;
3548 }
3549
3550 Py_CLEAR(*module_name);
3551 Py_CLEAR(*global_name);
3552 Py_INCREF(fixed_module_name);
3553 Py_INCREF(fixed_global_name);
3554 *module_name = fixed_module_name;
3555 *global_name = fixed_global_name;
Serhiy Storchakabfe18242015-03-31 13:12:37 +03003556 return 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003557 }
3558 else if (PyErr_Occurred()) {
3559 return -1;
3560 }
3561
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003562 item = PyDict_GetItemWithError(st->import_mapping_3to2, *module_name);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003563 if (item) {
3564 if (!PyUnicode_Check(item)) {
3565 PyErr_Format(PyExc_RuntimeError,
3566 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
3567 "should be strings, not %.200s",
3568 Py_TYPE(item)->tp_name);
3569 return -1;
3570 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003571 Py_INCREF(item);
Serhiy Storchaka48842712016-04-06 09:45:48 +03003572 Py_XSETREF(*module_name, item);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003573 }
3574 else if (PyErr_Occurred()) {
3575 return -1;
3576 }
3577
3578 return 0;
3579}
3580
3581static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003582save_global(PicklerObject *self, PyObject *obj, PyObject *name)
3583{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003584 PyObject *global_name = NULL;
3585 PyObject *module_name = NULL;
3586 PyObject *module = NULL;
Serhiy Storchaka58e41342015-03-31 14:07:24 +03003587 PyObject *parent = NULL;
3588 PyObject *dotted_path = NULL;
3589 PyObject *lastname = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003590 PyObject *cls;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003591 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003592 int status = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003593 _Py_IDENTIFIER(__name__);
3594 _Py_IDENTIFIER(__qualname__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003595
3596 const char global_op = GLOBAL;
3597
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003598 if (name) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003599 Py_INCREF(name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003600 global_name = name;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003601 }
3602 else {
Serhiy Storchakaf320be72018-01-25 10:49:40 +02003603 if (_PyObject_LookupAttrId(obj, &PyId___qualname__, &global_name) < 0)
3604 goto error;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003605 if (global_name == NULL) {
3606 global_name = _PyObject_GetAttrId(obj, &PyId___name__);
3607 if (global_name == NULL)
3608 goto error;
3609 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003610 }
3611
Serhiy Storchaka58e41342015-03-31 14:07:24 +03003612 dotted_path = get_dotted_path(module, global_name);
3613 if (dotted_path == NULL)
3614 goto error;
3615 module_name = whichmodule(obj, dotted_path);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003616 if (module_name == NULL)
3617 goto error;
3618
3619 /* XXX: Change to use the import C API directly with level=0 to disallow
3620 relative imports.
3621
3622 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
3623 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
3624 custom import functions (IMHO, this would be a nice security
3625 feature). The import C API would need to be extended to support the
3626 extra parameters of __import__ to fix that. */
3627 module = PyImport_Import(module_name);
3628 if (module == NULL) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003629 PyErr_Format(st->PicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003630 "Can't pickle %R: import of module %R failed",
3631 obj, module_name);
3632 goto error;
3633 }
Serhiy Storchaka58e41342015-03-31 14:07:24 +03003634 lastname = PyList_GET_ITEM(dotted_path, PyList_GET_SIZE(dotted_path)-1);
3635 Py_INCREF(lastname);
3636 cls = get_deep_attribute(module, dotted_path, &parent);
3637 Py_CLEAR(dotted_path);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003638 if (cls == NULL) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003639 PyErr_Format(st->PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003640 "Can't pickle %R: attribute lookup %S on %S failed",
3641 obj, global_name, module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003642 goto error;
3643 }
3644 if (cls != obj) {
3645 Py_DECREF(cls);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003646 PyErr_Format(st->PicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003647 "Can't pickle %R: it's not the same object as %S.%S",
3648 obj, module_name, global_name);
3649 goto error;
3650 }
3651 Py_DECREF(cls);
3652
3653 if (self->proto >= 2) {
3654 /* See whether this is in the extension registry, and if
3655 * so generate an EXT opcode.
3656 */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003657 PyObject *extension_key;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003658 PyObject *code_obj; /* extension code as Python object */
3659 long code; /* extension code as C value */
3660 char pdata[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003661 Py_ssize_t n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003662
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003663 extension_key = PyTuple_Pack(2, module_name, global_name);
3664 if (extension_key == NULL) {
3665 goto error;
3666 }
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08003667 code_obj = PyDict_GetItemWithError(st->extension_registry,
3668 extension_key);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003669 Py_DECREF(extension_key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003670 /* The object is not registered in the extension registry.
3671 This is the most likely code path. */
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08003672 if (code_obj == NULL) {
3673 if (PyErr_Occurred()) {
3674 goto error;
3675 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003676 goto gen_global;
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08003677 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003678
3679 /* XXX: pickle.py doesn't check neither the type, nor the range
3680 of the value returned by the extension_registry. It should for
3681 consistency. */
3682
3683 /* Verify code_obj has the right type and value. */
3684 if (!PyLong_Check(code_obj)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003685 PyErr_Format(st->PicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003686 "Can't pickle %R: extension code %R isn't an integer",
3687 obj, code_obj);
3688 goto error;
3689 }
3690 code = PyLong_AS_LONG(code_obj);
3691 if (code <= 0 || code > 0x7fffffffL) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003692 if (!PyErr_Occurred())
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003693 PyErr_Format(st->PicklingError, "Can't pickle %R: extension "
3694 "code %ld is out of range", obj, code);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003695 goto error;
3696 }
3697
3698 /* Generate an EXT opcode. */
3699 if (code <= 0xff) {
3700 pdata[0] = EXT1;
3701 pdata[1] = (unsigned char)code;
3702 n = 2;
3703 }
3704 else if (code <= 0xffff) {
3705 pdata[0] = EXT2;
3706 pdata[1] = (unsigned char)(code & 0xff);
3707 pdata[2] = (unsigned char)((code >> 8) & 0xff);
3708 n = 3;
3709 }
3710 else {
3711 pdata[0] = EXT4;
3712 pdata[1] = (unsigned char)(code & 0xff);
3713 pdata[2] = (unsigned char)((code >> 8) & 0xff);
3714 pdata[3] = (unsigned char)((code >> 16) & 0xff);
3715 pdata[4] = (unsigned char)((code >> 24) & 0xff);
3716 n = 5;
3717 }
3718
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003719 if (_Pickler_Write(self, pdata, n) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003720 goto error;
3721 }
3722 else {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003723 gen_global:
Serhiy Storchaka58e41342015-03-31 14:07:24 +03003724 if (parent == module) {
3725 Py_INCREF(lastname);
3726 Py_DECREF(global_name);
3727 global_name = lastname;
3728 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003729 if (self->proto >= 4) {
3730 const char stack_global_op = STACK_GLOBAL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003731
Christian Heimese8b1ba12013-11-23 21:13:39 +01003732 if (save(self, module_name, 0) < 0)
3733 goto error;
3734 if (save(self, global_name, 0) < 0)
3735 goto error;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003736
3737 if (_Pickler_Write(self, &stack_global_op, 1) < 0)
3738 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003739 }
Serhiy Storchaka58e41342015-03-31 14:07:24 +03003740 else if (parent != module) {
3741 PickleState *st = _Pickle_GetGlobalState();
3742 PyObject *reduce_value = Py_BuildValue("(O(OO))",
3743 st->getattr, parent, lastname);
Alexey Izbyshevf8c06b02018-08-22 07:51:25 +03003744 if (reduce_value == NULL)
3745 goto error;
Serhiy Storchaka58e41342015-03-31 14:07:24 +03003746 status = save_reduce(self, reduce_value, NULL);
3747 Py_DECREF(reduce_value);
3748 if (status < 0)
3749 goto error;
3750 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003751 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003752 /* Generate a normal global opcode if we are using a pickle
3753 protocol < 4, or if the object is not registered in the
3754 extension registry. */
3755 PyObject *encoded;
3756 PyObject *(*unicode_encoder)(PyObject *);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003757
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003758 if (_Pickler_Write(self, &global_op, 1) < 0)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003759 goto error;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003760
3761 /* For protocol < 3 and if the user didn't request against doing
3762 so, we convert module names to the old 2.x module names. */
3763 if (self->proto < 3 && self->fix_imports) {
3764 if (fix_imports(&module_name, &global_name) < 0) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003765 goto error;
3766 }
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003767 }
3768
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003769 /* Since Python 3.0 now supports non-ASCII identifiers, we encode
3770 both the module name and the global name using UTF-8. We do so
3771 only when we are using the pickle protocol newer than version
3772 3. This is to ensure compatibility with older Unpickler running
3773 on Python 2.x. */
3774 if (self->proto == 3) {
3775 unicode_encoder = PyUnicode_AsUTF8String;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003776 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003777 else {
3778 unicode_encoder = PyUnicode_AsASCIIString;
3779 }
3780 encoded = unicode_encoder(module_name);
3781 if (encoded == NULL) {
3782 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003783 PyErr_Format(st->PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003784 "can't pickle module identifier '%S' using "
3785 "pickle protocol %i",
3786 module_name, self->proto);
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003787 goto error;
3788 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003789 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3790 PyBytes_GET_SIZE(encoded)) < 0) {
3791 Py_DECREF(encoded);
3792 goto error;
3793 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003794 Py_DECREF(encoded);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003795 if(_Pickler_Write(self, "\n", 1) < 0)
3796 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003797
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003798 /* Save the name of the module. */
3799 encoded = unicode_encoder(global_name);
3800 if (encoded == NULL) {
3801 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003802 PyErr_Format(st->PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003803 "can't pickle global identifier '%S' using "
3804 "pickle protocol %i",
3805 global_name, self->proto);
3806 goto error;
3807 }
3808 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3809 PyBytes_GET_SIZE(encoded)) < 0) {
3810 Py_DECREF(encoded);
3811 goto error;
3812 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003813 Py_DECREF(encoded);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003814 if (_Pickler_Write(self, "\n", 1) < 0)
3815 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003816 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003817 /* Memoize the object. */
3818 if (memo_put(self, obj) < 0)
3819 goto error;
3820 }
3821
3822 if (0) {
3823 error:
3824 status = -1;
3825 }
3826 Py_XDECREF(module_name);
3827 Py_XDECREF(global_name);
3828 Py_XDECREF(module);
Serhiy Storchaka58e41342015-03-31 14:07:24 +03003829 Py_XDECREF(parent);
3830 Py_XDECREF(dotted_path);
3831 Py_XDECREF(lastname);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003832
3833 return status;
3834}
3835
3836static int
Alexandre Vassalotti19b6fa62013-11-30 16:06:39 -08003837save_singleton_type(PicklerObject *self, PyObject *obj, PyObject *singleton)
3838{
3839 PyObject *reduce_value;
3840 int status;
3841
3842 reduce_value = Py_BuildValue("O(O)", &PyType_Type, singleton);
3843 if (reduce_value == NULL) {
3844 return -1;
3845 }
3846 status = save_reduce(self, reduce_value, obj);
3847 Py_DECREF(reduce_value);
3848 return status;
3849}
3850
3851static int
3852save_type(PicklerObject *self, PyObject *obj)
3853{
Alexandre Vassalotti65846c62013-11-30 17:55:48 -08003854 if (obj == (PyObject *)&_PyNone_Type) {
Alexandre Vassalotti19b6fa62013-11-30 16:06:39 -08003855 return save_singleton_type(self, obj, Py_None);
3856 }
3857 else if (obj == (PyObject *)&PyEllipsis_Type) {
3858 return save_singleton_type(self, obj, Py_Ellipsis);
3859 }
Alexandre Vassalotti65846c62013-11-30 17:55:48 -08003860 else if (obj == (PyObject *)&_PyNotImplemented_Type) {
Alexandre Vassalotti19b6fa62013-11-30 16:06:39 -08003861 return save_singleton_type(self, obj, Py_NotImplemented);
3862 }
3863 return save_global(self, obj, NULL);
3864}
3865
3866static int
Serhiy Storchaka986375e2017-11-30 22:48:31 +02003867save_pers(PicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003868{
3869 PyObject *pid = NULL;
3870 int status = 0;
3871
3872 const char persid_op = PERSID;
3873 const char binpersid_op = BINPERSID;
3874
Serhiy Storchaka986375e2017-11-30 22:48:31 +02003875 pid = call_method(self->pers_func, self->pers_func_self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003876 if (pid == NULL)
3877 return -1;
3878
3879 if (pid != Py_None) {
3880 if (self->bin) {
3881 if (save(self, pid, 1) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003882 _Pickler_Write(self, &binpersid_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003883 goto error;
3884 }
3885 else {
Serhiy Storchakadec25af2016-07-17 11:24:17 +03003886 PyObject *pid_str;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003887
3888 pid_str = PyObject_Str(pid);
3889 if (pid_str == NULL)
3890 goto error;
3891
Serhiy Storchakadec25af2016-07-17 11:24:17 +03003892 /* XXX: Should it check whether the pid contains embedded
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003893 newlines? */
Serhiy Storchakadec25af2016-07-17 11:24:17 +03003894 if (!PyUnicode_IS_ASCII(pid_str)) {
3895 PyErr_SetString(_Pickle_GetGlobalState()->PicklingError,
3896 "persistent IDs in protocol 0 must be "
3897 "ASCII strings");
3898 Py_DECREF(pid_str);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003899 goto error;
Serhiy Storchakadec25af2016-07-17 11:24:17 +03003900 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003901
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003902 if (_Pickler_Write(self, &persid_op, 1) < 0 ||
Serhiy Storchakadec25af2016-07-17 11:24:17 +03003903 _Pickler_Write(self, PyUnicode_DATA(pid_str),
3904 PyUnicode_GET_LENGTH(pid_str)) < 0 ||
3905 _Pickler_Write(self, "\n", 1) < 0) {
3906 Py_DECREF(pid_str);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003907 goto error;
Serhiy Storchakadec25af2016-07-17 11:24:17 +03003908 }
3909 Py_DECREF(pid_str);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003910 }
3911 status = 1;
3912 }
3913
3914 if (0) {
3915 error:
3916 status = -1;
3917 }
3918 Py_XDECREF(pid);
3919
3920 return status;
3921}
3922
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003923static PyObject *
3924get_class(PyObject *obj)
3925{
3926 PyObject *cls;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003927 _Py_IDENTIFIER(__class__);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003928
Serhiy Storchakaf320be72018-01-25 10:49:40 +02003929 if (_PyObject_LookupAttrId(obj, &PyId___class__, &cls) == 0) {
3930 cls = (PyObject *) Py_TYPE(obj);
3931 Py_INCREF(cls);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003932 }
3933 return cls;
3934}
3935
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003936/* We're saving obj, and args is the 2-thru-5 tuple returned by the
3937 * appropriate __reduce__ method for obj.
3938 */
3939static int
3940save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
3941{
3942 PyObject *callable;
3943 PyObject *argtup;
3944 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003945 PyObject *listitems = Py_None;
3946 PyObject *dictitems = Py_None;
Pierre Glaser65d98d02019-05-08 21:40:25 +02003947 PyObject *state_setter = Py_None;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003948 PickleState *st = _Pickle_GetGlobalState();
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00003949 Py_ssize_t size;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003950 int use_newobj = 0, use_newobj_ex = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003951
3952 const char reduce_op = REDUCE;
3953 const char build_op = BUILD;
3954 const char newobj_op = NEWOBJ;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003955 const char newobj_ex_op = NEWOBJ_EX;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003956
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00003957 size = PyTuple_Size(args);
Pierre Glaser65d98d02019-05-08 21:40:25 +02003958 if (size < 2 || size > 6) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003959 PyErr_SetString(st->PicklingError, "tuple returned by "
Pierre Glaser65d98d02019-05-08 21:40:25 +02003960 "__reduce__ must contain 2 through 6 elements");
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00003961 return -1;
3962 }
3963
Pierre Glaser65d98d02019-05-08 21:40:25 +02003964 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 6,
3965 &callable, &argtup, &state, &listitems, &dictitems,
3966 &state_setter))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003967 return -1;
3968
3969 if (!PyCallable_Check(callable)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003970 PyErr_SetString(st->PicklingError, "first item of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003971 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003972 return -1;
3973 }
3974 if (!PyTuple_Check(argtup)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003975 PyErr_SetString(st->PicklingError, "second item of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003976 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003977 return -1;
3978 }
3979
3980 if (state == Py_None)
3981 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003982
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003983 if (listitems == Py_None)
3984 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003985 else if (!PyIter_Check(listitems)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003986 PyErr_Format(st->PicklingError, "fourth element of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003987 "returned by __reduce__ must be an iterator, not %s",
3988 Py_TYPE(listitems)->tp_name);
3989 return -1;
3990 }
3991
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003992 if (dictitems == Py_None)
3993 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003994 else if (!PyIter_Check(dictitems)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003995 PyErr_Format(st->PicklingError, "fifth element of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003996 "returned by __reduce__ must be an iterator, not %s",
3997 Py_TYPE(dictitems)->tp_name);
3998 return -1;
3999 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004000
Pierre Glaser65d98d02019-05-08 21:40:25 +02004001 if (state_setter == Py_None)
4002 state_setter = NULL;
4003 else if (!PyCallable_Check(state_setter)) {
4004 PyErr_Format(st->PicklingError, "sixth element of the tuple "
4005 "returned by __reduce__ must be a function, not %s",
4006 Py_TYPE(state_setter)->tp_name);
4007 return -1;
4008 }
4009
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004010 if (self->proto >= 2) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01004011 PyObject *name;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004012 _Py_IDENTIFIER(__name__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004013
Serhiy Storchakaf320be72018-01-25 10:49:40 +02004014 if (_PyObject_LookupAttrId(callable, &PyId___name__, &name) < 0) {
4015 return -1;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004016 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +02004017 if (name != NULL && PyUnicode_Check(name)) {
Serhiy Storchaka0d554d72015-10-10 22:42:18 +03004018 _Py_IDENTIFIER(__newobj_ex__);
Serhiy Storchakaf0f35a62017-01-09 10:09:43 +02004019 use_newobj_ex = _PyUnicode_EqualToASCIIId(
4020 name, &PyId___newobj_ex__);
Serhiy Storchaka707b5cc2014-12-16 19:43:46 +02004021 if (!use_newobj_ex) {
4022 _Py_IDENTIFIER(__newobj__);
Serhiy Storchaka9937d902017-01-09 10:04:34 +02004023 use_newobj = _PyUnicode_EqualToASCIIId(name, &PyId___newobj__);
Serhiy Storchaka707b5cc2014-12-16 19:43:46 +02004024 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004025 }
Serhiy Storchaka707b5cc2014-12-16 19:43:46 +02004026 Py_XDECREF(name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004027 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004028
4029 if (use_newobj_ex) {
4030 PyObject *cls;
4031 PyObject *args;
4032 PyObject *kwargs;
4033
Serhiy Storchakafff9a312017-03-21 08:53:25 +02004034 if (PyTuple_GET_SIZE(argtup) != 3) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004035 PyErr_Format(st->PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004036 "length of the NEWOBJ_EX argument tuple must be "
Serhiy Storchakafff9a312017-03-21 08:53:25 +02004037 "exactly 3, not %zd", PyTuple_GET_SIZE(argtup));
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004038 return -1;
4039 }
4040
4041 cls = PyTuple_GET_ITEM(argtup, 0);
4042 if (!PyType_Check(cls)) {
Larry Hastings61272b72014-01-07 12:41:53 -08004043 PyErr_Format(st->PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004044 "first item from NEWOBJ_EX argument tuple must "
4045 "be a class, not %.200s", Py_TYPE(cls)->tp_name);
4046 return -1;
4047 }
4048 args = PyTuple_GET_ITEM(argtup, 1);
4049 if (!PyTuple_Check(args)) {
Larry Hastings61272b72014-01-07 12:41:53 -08004050 PyErr_Format(st->PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004051 "second item from NEWOBJ_EX argument tuple must "
4052 "be a tuple, not %.200s", Py_TYPE(args)->tp_name);
4053 return -1;
4054 }
4055 kwargs = PyTuple_GET_ITEM(argtup, 2);
4056 if (!PyDict_Check(kwargs)) {
Larry Hastings61272b72014-01-07 12:41:53 -08004057 PyErr_Format(st->PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004058 "third item from NEWOBJ_EX argument tuple must "
4059 "be a dict, not %.200s", Py_TYPE(kwargs)->tp_name);
4060 return -1;
4061 }
4062
Serhiy Storchaka0d554d72015-10-10 22:42:18 +03004063 if (self->proto >= 4) {
4064 if (save(self, cls, 0) < 0 ||
4065 save(self, args, 0) < 0 ||
4066 save(self, kwargs, 0) < 0 ||
4067 _Pickler_Write(self, &newobj_ex_op, 1) < 0) {
4068 return -1;
4069 }
4070 }
4071 else {
4072 PyObject *newargs;
4073 PyObject *cls_new;
4074 Py_ssize_t i;
4075 _Py_IDENTIFIER(__new__);
4076
Serhiy Storchakafff9a312017-03-21 08:53:25 +02004077 newargs = PyTuple_New(PyTuple_GET_SIZE(args) + 2);
Serhiy Storchaka0d554d72015-10-10 22:42:18 +03004078 if (newargs == NULL)
4079 return -1;
4080
4081 cls_new = _PyObject_GetAttrId(cls, &PyId___new__);
4082 if (cls_new == NULL) {
4083 Py_DECREF(newargs);
4084 return -1;
4085 }
4086 PyTuple_SET_ITEM(newargs, 0, cls_new);
4087 Py_INCREF(cls);
4088 PyTuple_SET_ITEM(newargs, 1, cls);
Serhiy Storchakafff9a312017-03-21 08:53:25 +02004089 for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
Serhiy Storchaka0d554d72015-10-10 22:42:18 +03004090 PyObject *item = PyTuple_GET_ITEM(args, i);
4091 Py_INCREF(item);
4092 PyTuple_SET_ITEM(newargs, i + 2, item);
4093 }
4094
4095 callable = PyObject_Call(st->partial, newargs, kwargs);
4096 Py_DECREF(newargs);
4097 if (callable == NULL)
4098 return -1;
4099
4100 newargs = PyTuple_New(0);
4101 if (newargs == NULL) {
4102 Py_DECREF(callable);
4103 return -1;
4104 }
4105
4106 if (save(self, callable, 0) < 0 ||
4107 save(self, newargs, 0) < 0 ||
4108 _Pickler_Write(self, &reduce_op, 1) < 0) {
4109 Py_DECREF(newargs);
4110 Py_DECREF(callable);
4111 return -1;
4112 }
4113 Py_DECREF(newargs);
4114 Py_DECREF(callable);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004115 }
4116 }
4117 else if (use_newobj) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004118 PyObject *cls;
4119 PyObject *newargtup;
4120 PyObject *obj_class;
4121 int p;
4122
4123 /* Sanity checks. */
Serhiy Storchakafff9a312017-03-21 08:53:25 +02004124 if (PyTuple_GET_SIZE(argtup) < 1) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004125 PyErr_SetString(st->PicklingError, "__newobj__ arglist is empty");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004126 return -1;
4127 }
4128
4129 cls = PyTuple_GET_ITEM(argtup, 0);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01004130 if (!PyType_Check(cls)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004131 PyErr_SetString(st->PicklingError, "args[0] from "
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01004132 "__newobj__ args is not a type");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004133 return -1;
4134 }
4135
4136 if (obj != NULL) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01004137 obj_class = get_class(obj);
Zackery Spytz25d38972018-12-05 11:29:20 -07004138 if (obj_class == NULL) {
4139 return -1;
4140 }
4141 p = obj_class != cls;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004142 Py_DECREF(obj_class);
4143 if (p) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004144 PyErr_SetString(st->PicklingError, "args[0] from "
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004145 "__newobj__ args has the wrong class");
4146 return -1;
4147 }
4148 }
4149 /* XXX: These calls save() are prone to infinite recursion. Imagine
4150 what happen if the value returned by the __reduce__() method of
4151 some extension type contains another object of the same type. Ouch!
4152
4153 Here is a quick example, that I ran into, to illustrate what I
4154 mean:
4155
4156 >>> import pickle, copyreg
4157 >>> copyreg.dispatch_table.pop(complex)
4158 >>> pickle.dumps(1+2j)
4159 Traceback (most recent call last):
4160 ...
Yury Selivanovf488fb42015-07-03 01:04:23 -04004161 RecursionError: maximum recursion depth exceeded
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004162
4163 Removing the complex class from copyreg.dispatch_table made the
4164 __reduce_ex__() method emit another complex object:
4165
4166 >>> (1+1j).__reduce_ex__(2)
4167 (<function __newobj__ at 0xb7b71c3c>,
4168 (<class 'complex'>, (1+1j)), None, None, None)
4169
4170 Thus when save() was called on newargstup (the 2nd item) recursion
4171 ensued. Of course, the bug was in the complex class which had a
4172 broken __getnewargs__() that emitted another complex object. But,
4173 the point, here, is it is quite easy to end up with a broken reduce
4174 function. */
4175
4176 /* Save the class and its __new__ arguments. */
4177 if (save(self, cls, 0) < 0)
4178 return -1;
4179
Serhiy Storchakafff9a312017-03-21 08:53:25 +02004180 newargtup = PyTuple_GetSlice(argtup, 1, PyTuple_GET_SIZE(argtup));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004181 if (newargtup == NULL)
4182 return -1;
4183
4184 p = save(self, newargtup, 0);
4185 Py_DECREF(newargtup);
4186 if (p < 0)
4187 return -1;
4188
4189 /* Add NEWOBJ opcode. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004190 if (_Pickler_Write(self, &newobj_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004191 return -1;
4192 }
4193 else { /* Not using NEWOBJ. */
4194 if (save(self, callable, 0) < 0 ||
4195 save(self, argtup, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004196 _Pickler_Write(self, &reduce_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004197 return -1;
4198 }
4199
4200 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
4201 the caller do not want to memoize the object. Not particularly useful,
4202 but that is to mimic the behavior save_reduce() in pickle.py when
4203 obj is None. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004204 if (obj != NULL) {
4205 /* If the object is already in the memo, this means it is
4206 recursive. In this case, throw away everything we put on the
4207 stack, and fetch the object back from the memo. */
4208 if (PyMemoTable_Get(self->memo, obj)) {
4209 const char pop_op = POP;
4210
4211 if (_Pickler_Write(self, &pop_op, 1) < 0)
4212 return -1;
4213 if (memo_get(self, obj) < 0)
4214 return -1;
4215
4216 return 0;
4217 }
4218 else if (memo_put(self, obj) < 0)
4219 return -1;
4220 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004221
4222 if (listitems && batch_list(self, listitems) < 0)
4223 return -1;
4224
4225 if (dictitems && batch_dict(self, dictitems) < 0)
4226 return -1;
4227
4228 if (state) {
Pierre Glaser65d98d02019-05-08 21:40:25 +02004229 if (state_setter == NULL) {
4230 if (save(self, state, 0) < 0 ||
4231 _Pickler_Write(self, &build_op, 1) < 0)
4232 return -1;
4233 }
4234 else {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004235
Pierre Glaser65d98d02019-05-08 21:40:25 +02004236 /* If a state_setter is specified, call it instead of load_build to
4237 * update obj's with its previous state.
4238 * The first 4 save/write instructions push state_setter and its
4239 * tuple of expected arguments (obj, state) onto the stack. The
4240 * REDUCE opcode triggers the state_setter(obj, state) function
4241 * call. Finally, because state-updating routines only do in-place
4242 * modification, the whole operation has to be stack-transparent.
4243 * Thus, we finally pop the call's output from the stack.*/
4244
4245 const char tupletwo_op = TUPLE2;
4246 const char pop_op = POP;
4247 if (save(self, state_setter, 0) < 0 ||
4248 save(self, obj, 0) < 0 || save(self, state, 0) < 0 ||
4249 _Pickler_Write(self, &tupletwo_op, 1) < 0 ||
4250 _Pickler_Write(self, &reduce_op, 1) < 0 ||
4251 _Pickler_Write(self, &pop_op, 1) < 0)
4252 return -1;
4253 }
4254 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004255 return 0;
4256}
4257
4258static int
4259save(PicklerObject *self, PyObject *obj, int pers_save)
4260{
4261 PyTypeObject *type;
4262 PyObject *reduce_func = NULL;
4263 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004264 int status = 0;
4265
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08004266 if (_Pickler_OpcodeBoundary(self) < 0)
4267 return -1;
4268
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004269 /* The extra pers_save argument is necessary to avoid calling save_pers()
4270 on its returned object. */
4271 if (!pers_save && self->pers_func) {
4272 /* save_pers() returns:
4273 -1 to signal an error;
4274 0 if it did nothing successfully;
4275 1 if a persistent id was saved.
4276 */
Serhiy Storchaka986375e2017-11-30 22:48:31 +02004277 if ((status = save_pers(self, obj)) != 0)
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004278 return status;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004279 }
4280
4281 type = Py_TYPE(obj);
4282
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004283 /* The old cPickle had an optimization that used switch-case statement
4284 dispatching on the first letter of the type name. This has was removed
4285 since benchmarks shown that this optimization was actually slowing
4286 things down. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004287
4288 /* Atom types; these aren't memoized, so don't check the memo. */
4289
4290 if (obj == Py_None) {
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004291 return save_none(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004292 }
4293 else if (obj == Py_False || obj == Py_True) {
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004294 return save_bool(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004295 }
4296 else if (type == &PyLong_Type) {
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004297 return save_long(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004298 }
4299 else if (type == &PyFloat_Type) {
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004300 return save_float(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004301 }
4302
4303 /* Check the memo to see if it has the object. If so, generate
4304 a GET (or BINGET) opcode, instead of pickling the object
4305 once again. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004306 if (PyMemoTable_Get(self->memo, obj)) {
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004307 return memo_get(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004308 }
4309
4310 if (type == &PyBytes_Type) {
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004311 return save_bytes(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004312 }
4313 else if (type == &PyUnicode_Type) {
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004314 return save_unicode(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004315 }
Serhiy Storchaka5d4cb542018-07-18 10:10:49 +03004316
4317 /* We're only calling Py_EnterRecursiveCall here so that atomic
4318 types above are pickled faster. */
4319 if (Py_EnterRecursiveCall(" while pickling an object")) {
4320 return -1;
4321 }
4322
4323 if (type == &PyDict_Type) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004324 status = save_dict(self, obj);
4325 goto done;
4326 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004327 else if (type == &PySet_Type) {
4328 status = save_set(self, obj);
4329 goto done;
4330 }
4331 else if (type == &PyFrozenSet_Type) {
4332 status = save_frozenset(self, obj);
4333 goto done;
4334 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004335 else if (type == &PyList_Type) {
4336 status = save_list(self, obj);
4337 goto done;
4338 }
4339 else if (type == &PyTuple_Type) {
4340 status = save_tuple(self, obj);
4341 goto done;
4342 }
Antoine Pitrou91f43802019-05-26 17:10:09 +02004343 else if (type == &PyByteArray_Type) {
4344 status = save_bytearray(self, obj);
4345 goto done;
4346 }
4347 else if (type == &PyPickleBuffer_Type) {
4348 status = save_picklebuffer(self, obj);
4349 goto done;
4350 }
Pierre Glaser289f1f82019-05-08 23:08:25 +02004351
4352 /* Now, check reducer_override. If it returns NotImplemented,
4353 * fallback to save_type or save_global, and then perhaps to the
4354 * regular reduction mechanism.
4355 */
4356 if (self->reducer_override != NULL) {
Petr Viktorinffd97532020-02-11 17:46:57 +01004357 reduce_value = PyObject_CallOneArg(self->reducer_override, obj);
Pierre Glaser289f1f82019-05-08 23:08:25 +02004358 if (reduce_value == NULL) {
4359 goto error;
4360 }
4361 if (reduce_value != Py_NotImplemented) {
4362 goto reduce;
4363 }
4364 Py_DECREF(reduce_value);
4365 reduce_value = NULL;
4366 }
4367
4368 if (type == &PyType_Type) {
Alexandre Vassalotti19b6fa62013-11-30 16:06:39 -08004369 status = save_type(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004370 goto done;
4371 }
4372 else if (type == &PyFunction_Type) {
4373 status = save_global(self, obj, NULL);
Alexandre Vassalottifc912852013-11-24 03:07:35 -08004374 goto done;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004375 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004376
4377 /* XXX: This part needs some unit tests. */
4378
4379 /* Get a reduction callable, and call it. This may come from
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004380 * self.dispatch_table, copyreg.dispatch_table, the object's
4381 * __reduce_ex__ method, or the object's __reduce__ method.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004382 */
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004383 if (self->dispatch_table == NULL) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004384 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08004385 reduce_func = PyDict_GetItemWithError(st->dispatch_table,
4386 (PyObject *)type);
4387 if (reduce_func == NULL) {
4388 if (PyErr_Occurred()) {
4389 goto error;
4390 }
4391 } else {
4392 /* PyDict_GetItemWithError() returns a borrowed reference.
4393 Increase the reference count to be consistent with
4394 PyObject_GetItem and _PyObject_GetAttrId used below. */
4395 Py_INCREF(reduce_func);
4396 }
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004397 } else {
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08004398 reduce_func = PyObject_GetItem(self->dispatch_table,
4399 (PyObject *)type);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004400 if (reduce_func == NULL) {
4401 if (PyErr_ExceptionMatches(PyExc_KeyError))
4402 PyErr_Clear();
4403 else
4404 goto error;
4405 }
4406 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004407 if (reduce_func != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004408 Py_INCREF(obj);
Alexandre Vassalotti20c28c12013-11-27 02:26:54 -08004409 reduce_value = _Pickle_FastCall(reduce_func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004410 }
Antoine Pitrouffd41d92011-10-04 09:23:04 +02004411 else if (PyType_IsSubtype(type, &PyType_Type)) {
4412 status = save_global(self, obj, NULL);
4413 goto done;
4414 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004415 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004416 _Py_IDENTIFIER(__reduce__);
4417 _Py_IDENTIFIER(__reduce_ex__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004418
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004419 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
4420 automatically defined as __reduce__. While this is convenient, this
4421 make it impossible to know which method was actually called. Of
4422 course, this is not a big deal. But still, it would be nice to let
4423 the user know which method was called when something go
4424 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
4425 don't actually have to check for a __reduce__ method. */
4426
4427 /* Check for a __reduce_ex__ method. */
Serhiy Storchakaf320be72018-01-25 10:49:40 +02004428 if (_PyObject_LookupAttrId(obj, &PyId___reduce_ex__, &reduce_func) < 0) {
4429 goto error;
4430 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004431 if (reduce_func != NULL) {
4432 PyObject *proto;
4433 proto = PyLong_FromLong(self->proto);
4434 if (proto != NULL) {
Alexandre Vassalotti20c28c12013-11-27 02:26:54 -08004435 reduce_value = _Pickle_FastCall(reduce_func, proto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004436 }
4437 }
4438 else {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004439 /* Check for a __reduce__ method. */
Serhiy Storchaka41c57b32019-09-01 12:03:39 +03004440 if (_PyObject_LookupAttrId(obj, &PyId___reduce__, &reduce_func) < 0) {
4441 goto error;
4442 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004443 if (reduce_func != NULL) {
Victor Stinner2ff58a22019-06-17 14:27:23 +02004444 reduce_value = PyObject_CallNoArgs(reduce_func);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004445 }
4446 else {
Serhiy Storchaka41c57b32019-09-01 12:03:39 +03004447 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004448 PyErr_Format(st->PicklingError,
4449 "can't pickle '%.200s' object: %R",
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004450 type->tp_name, obj);
4451 goto error;
4452 }
4453 }
4454 }
4455
4456 if (reduce_value == NULL)
4457 goto error;
4458
Pierre Glaser289f1f82019-05-08 23:08:25 +02004459 reduce:
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004460 if (PyUnicode_Check(reduce_value)) {
4461 status = save_global(self, obj, reduce_value);
4462 goto done;
4463 }
4464
4465 if (!PyTuple_Check(reduce_value)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004466 PickleState *st = _Pickle_GetGlobalState();
4467 PyErr_SetString(st->PicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004468 "__reduce__ must return a string or tuple");
4469 goto error;
4470 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004471
4472 status = save_reduce(self, reduce_value, obj);
4473
4474 if (0) {
4475 error:
4476 status = -1;
4477 }
4478 done:
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08004479
Alexandre Vassalottidff18342008-07-13 18:48:30 +00004480 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004481 Py_XDECREF(reduce_func);
4482 Py_XDECREF(reduce_value);
4483
4484 return status;
4485}
4486
4487static int
4488dump(PicklerObject *self, PyObject *obj)
4489{
4490 const char stop_op = STOP;
Pierre Glaser0f2f35e2020-02-02 19:55:21 +01004491 int status = -1;
Pierre Glaser289f1f82019-05-08 23:08:25 +02004492 PyObject *tmp;
4493 _Py_IDENTIFIER(reducer_override);
4494
4495 if (_PyObject_LookupAttrId((PyObject *)self, &PyId_reducer_override,
4496 &tmp) < 0) {
Pierre Glaser0f2f35e2020-02-02 19:55:21 +01004497 goto error;
Pierre Glaser289f1f82019-05-08 23:08:25 +02004498 }
4499 /* Cache the reducer_override method, if it exists. */
4500 if (tmp != NULL) {
4501 Py_XSETREF(self->reducer_override, tmp);
4502 }
4503 else {
4504 Py_CLEAR(self->reducer_override);
4505 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004506
4507 if (self->proto >= 2) {
4508 char header[2];
4509
4510 header[0] = PROTO;
4511 assert(self->proto >= 0 && self->proto < 256);
4512 header[1] = (unsigned char)self->proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004513 if (_Pickler_Write(self, header, 2) < 0)
Pierre Glaser0f2f35e2020-02-02 19:55:21 +01004514 goto error;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004515 if (self->proto >= 4)
4516 self->framing = 1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004517 }
4518
4519 if (save(self, obj, 0) < 0 ||
Serhiy Storchakac8695292018-04-04 00:11:27 +03004520 _Pickler_Write(self, &stop_op, 1) < 0 ||
4521 _Pickler_CommitFrame(self) < 0)
Pierre Glaser0f2f35e2020-02-02 19:55:21 +01004522 goto error;
4523
4524 // Success
4525 status = 0;
4526
4527 error:
Serhiy Storchakac8695292018-04-04 00:11:27 +03004528 self->framing = 0;
Pierre Glaser0f2f35e2020-02-02 19:55:21 +01004529
4530 /* Break the reference cycle we generated at the beginning this function
4531 * call when setting the reducer_override attribute of the Pickler instance
4532 * to a bound method of the same instance. This is important as the Pickler
4533 * instance holds a reference to each object it has pickled (through its
4534 * memo): thus, these objects wont be garbage-collected as long as the
4535 * Pickler itself is not collected. */
4536 Py_CLEAR(self->reducer_override);
4537 return status;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004538}
4539
Larry Hastings61272b72014-01-07 12:41:53 -08004540/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004541
4542_pickle.Pickler.clear_memo
4543
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004544Clears the pickler's "memo".
4545
4546The memo is the data structure that remembers which objects the
4547pickler has already seen, so that shared or recursive objects are
4548pickled by reference and not by value. This method is useful when
4549re-using picklers.
Larry Hastings61272b72014-01-07 12:41:53 -08004550[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004551
Larry Hastings3cceb382014-01-04 11:09:09 -08004552static PyObject *
4553_pickle_Pickler_clear_memo_impl(PicklerObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08004554/*[clinic end generated code: output=8665c8658aaa094b input=01bdad52f3d93e56]*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004555{
4556 if (self->memo)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004557 PyMemoTable_Clear(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004558
4559 Py_RETURN_NONE;
4560}
4561
Larry Hastings61272b72014-01-07 12:41:53 -08004562/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004563
4564_pickle.Pickler.dump
4565
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004566 obj: object
4567 /
4568
4569Write a pickled representation of the given object to the open file.
Larry Hastings61272b72014-01-07 12:41:53 -08004570[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004571
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004572static PyObject *
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004573_pickle_Pickler_dump(PicklerObject *self, PyObject *obj)
Larry Hastings581ee362014-01-28 05:00:08 -08004574/*[clinic end generated code: output=87ecad1261e02ac7 input=552eb1c0f52260d9]*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004575{
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00004576 /* Check whether the Pickler was initialized correctly (issue3664).
4577 Developers often forget to call __init__() in their subclasses, which
4578 would trigger a segfault without this check. */
4579 if (self->write == NULL) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004580 PickleState *st = _Pickle_GetGlobalState();
4581 PyErr_Format(st->PicklingError,
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00004582 "Pickler.__init__() was not called by %s.__init__()",
4583 Py_TYPE(self)->tp_name);
4584 return NULL;
4585 }
4586
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004587 if (_Pickler_ClearBuffer(self) < 0)
4588 return NULL;
4589
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004590 if (dump(self, obj) < 0)
4591 return NULL;
4592
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004593 if (_Pickler_FlushToFile(self) < 0)
4594 return NULL;
4595
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004596 Py_RETURN_NONE;
4597}
4598
Serhiy Storchaka5bbd2312014-12-16 19:39:08 +02004599/*[clinic input]
4600
4601_pickle.Pickler.__sizeof__ -> Py_ssize_t
4602
4603Returns size in memory, in bytes.
4604[clinic start generated code]*/
4605
4606static Py_ssize_t
4607_pickle_Pickler___sizeof___impl(PicklerObject *self)
4608/*[clinic end generated code: output=106edb3123f332e1 input=8cbbec9bd5540d42]*/
4609{
4610 Py_ssize_t res, s;
4611
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +02004612 res = _PyObject_SIZE(Py_TYPE(self));
Serhiy Storchaka5bbd2312014-12-16 19:39:08 +02004613 if (self->memo != NULL) {
4614 res += sizeof(PyMemoTable);
4615 res += self->memo->mt_allocated * sizeof(PyMemoEntry);
4616 }
4617 if (self->output_buffer != NULL) {
4618 s = _PySys_GetSizeOf(self->output_buffer);
4619 if (s == -1)
4620 return -1;
4621 res += s;
4622 }
4623 return res;
4624}
4625
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004626static struct PyMethodDef Pickler_methods[] = {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004627 _PICKLE_PICKLER_DUMP_METHODDEF
4628 _PICKLE_PICKLER_CLEAR_MEMO_METHODDEF
Serhiy Storchaka5bbd2312014-12-16 19:39:08 +02004629 _PICKLE_PICKLER___SIZEOF___METHODDEF
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004630 {NULL, NULL} /* sentinel */
4631};
4632
4633static void
4634Pickler_dealloc(PicklerObject *self)
4635{
4636 PyObject_GC_UnTrack(self);
4637
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004638 Py_XDECREF(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004639 Py_XDECREF(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004640 Py_XDECREF(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004641 Py_XDECREF(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004642 Py_XDECREF(self->fast_memo);
Pierre Glaser289f1f82019-05-08 23:08:25 +02004643 Py_XDECREF(self->reducer_override);
Antoine Pitrou91f43802019-05-26 17:10:09 +02004644 Py_XDECREF(self->buffer_callback);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004645
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004646 PyMemoTable_Del(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004647
4648 Py_TYPE(self)->tp_free((PyObject *)self);
4649}
4650
4651static int
4652Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
4653{
4654 Py_VISIT(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004655 Py_VISIT(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004656 Py_VISIT(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004657 Py_VISIT(self->fast_memo);
Pierre Glaser289f1f82019-05-08 23:08:25 +02004658 Py_VISIT(self->reducer_override);
Antoine Pitrou91f43802019-05-26 17:10:09 +02004659 Py_VISIT(self->buffer_callback);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004660 return 0;
4661}
4662
4663static int
4664Pickler_clear(PicklerObject *self)
4665{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004666 Py_CLEAR(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004667 Py_CLEAR(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004668 Py_CLEAR(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004669 Py_CLEAR(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004670 Py_CLEAR(self->fast_memo);
Pierre Glaser289f1f82019-05-08 23:08:25 +02004671 Py_CLEAR(self->reducer_override);
Antoine Pitrou91f43802019-05-26 17:10:09 +02004672 Py_CLEAR(self->buffer_callback);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004673
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004674 if (self->memo != NULL) {
4675 PyMemoTable *memo = self->memo;
4676 self->memo = NULL;
4677 PyMemoTable_Del(memo);
4678 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004679 return 0;
4680}
4681
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004682
Larry Hastings61272b72014-01-07 12:41:53 -08004683/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004684
4685_pickle.Pickler.__init__
4686
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004687 file: object
Serhiy Storchaka279f4462019-09-14 12:24:05 +03004688 protocol: object = None
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004689 fix_imports: bool = True
Serhiy Storchaka279f4462019-09-14 12:24:05 +03004690 buffer_callback: object = None
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004691
4692This takes a binary file for writing a pickle data stream.
4693
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08004694The optional *protocol* argument tells the pickler to use the given
Mark Dickinsone9652e82020-01-24 10:03:22 +00004695protocol; supported protocols are 0, 1, 2, 3, 4 and 5. The default
4696protocol is 4. It was introduced in Python 3.4, and is incompatible
4697with previous versions.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004698
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08004699Specifying a negative protocol version selects the highest protocol
4700version supported. The higher the protocol used, the more recent the
4701version of Python needed to read the pickle produced.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004702
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08004703The *file* argument must have a write() method that accepts a single
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004704bytes argument. It can thus be a file object opened for binary
Martin Panter7462b6492015-11-02 03:37:02 +00004705writing, an io.BytesIO instance, or any other custom object that meets
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08004706this interface.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004707
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08004708If *fix_imports* is True and protocol is less than 3, pickle will try
4709to map the new Python 3 names to the old module names used in Python
47102, so that the pickle data stream is readable with Python 2.
Antoine Pitrou91f43802019-05-26 17:10:09 +02004711
4712If *buffer_callback* is None (the default), buffer views are
4713serialized into *file* as part of the pickle stream.
4714
4715If *buffer_callback* is not None, then it can be called any number
4716of times with a buffer view. If the callback returns a false value
4717(such as None), the given buffer is out-of-band; otherwise the
4718buffer is serialized in-band, i.e. inside the pickle stream.
4719
4720It is an error if *buffer_callback* is not None and *protocol*
4721is None or smaller than 5.
4722
Larry Hastings61272b72014-01-07 12:41:53 -08004723[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004724
Larry Hastingsb7ccb202014-01-18 23:50:21 -08004725static int
Larry Hastings89964c42015-04-14 18:07:59 -04004726_pickle_Pickler___init___impl(PicklerObject *self, PyObject *file,
Antoine Pitrou91f43802019-05-26 17:10:09 +02004727 PyObject *protocol, int fix_imports,
4728 PyObject *buffer_callback)
Mark Dickinsone9652e82020-01-24 10:03:22 +00004729/*[clinic end generated code: output=0abedc50590d259b input=a7c969699bf5dad3]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004730{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02004731 _Py_IDENTIFIER(persistent_id);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004732 _Py_IDENTIFIER(dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004733
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004734 /* In case of multiple __init__() calls, clear previous content. */
4735 if (self->write != NULL)
4736 (void)Pickler_clear(self);
4737
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004738 if (_Pickler_SetProtocol(self, protocol, fix_imports) < 0)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08004739 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004740
4741 if (_Pickler_SetOutputStream(self, file) < 0)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08004742 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004743
Antoine Pitrou91f43802019-05-26 17:10:09 +02004744 if (_Pickler_SetBufferCallback(self, buffer_callback) < 0)
4745 return -1;
4746
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004747 /* memo and output_buffer may have already been created in _Pickler_New */
4748 if (self->memo == NULL) {
4749 self->memo = PyMemoTable_New();
4750 if (self->memo == NULL)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08004751 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004752 }
4753 self->output_len = 0;
4754 if (self->output_buffer == NULL) {
4755 self->max_output_len = WRITE_BUF_SIZE;
4756 self->output_buffer = PyBytes_FromStringAndSize(NULL,
4757 self->max_output_len);
4758 if (self->output_buffer == NULL)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08004759 return -1;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004760 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004761
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004762 self->fast = 0;
4763 self->fast_nesting = 0;
4764 self->fast_memo = NULL;
Serhiy Storchaka04e36af2017-10-22 21:31:34 +03004765
Serhiy Storchaka986375e2017-11-30 22:48:31 +02004766 if (init_method_ref((PyObject *)self, &PyId_persistent_id,
4767 &self->pers_func, &self->pers_func_self) < 0)
4768 {
4769 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004770 }
Serhiy Storchaka04e36af2017-10-22 21:31:34 +03004771
Serhiy Storchakaf320be72018-01-25 10:49:40 +02004772 if (_PyObject_LookupAttrId((PyObject *)self,
4773 &PyId_dispatch_table, &self->dispatch_table) < 0) {
4774 return -1;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004775 }
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004776
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004777 return 0;
4778}
4779
Larry Hastingsb7ccb202014-01-18 23:50:21 -08004780
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004781/* Define a proxy object for the Pickler's internal memo object. This is to
4782 * avoid breaking code like:
4783 * pickler.memo.clear()
4784 * and
4785 * pickler.memo = saved_memo
4786 * Is this a good idea? Not really, but we don't want to break code that uses
4787 * it. Note that we don't implement the entire mapping API here. This is
4788 * intentional, as these should be treated as black-box implementation details.
4789 */
4790
Larry Hastings61272b72014-01-07 12:41:53 -08004791/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004792_pickle.PicklerMemoProxy.clear
4793
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004794Remove all items from memo.
Larry Hastings61272b72014-01-07 12:41:53 -08004795[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004796
Larry Hastings3cceb382014-01-04 11:09:09 -08004797static PyObject *
4798_pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08004799/*[clinic end generated code: output=5fb9370d48ae8b05 input=ccc186dacd0f1405]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004800{
4801 if (self->pickler->memo)
4802 PyMemoTable_Clear(self->pickler->memo);
4803 Py_RETURN_NONE;
4804}
4805
Larry Hastings61272b72014-01-07 12:41:53 -08004806/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004807_pickle.PicklerMemoProxy.copy
4808
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004809Copy the memo to a new object.
Larry Hastings61272b72014-01-07 12:41:53 -08004810[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004811
Larry Hastings3cceb382014-01-04 11:09:09 -08004812static PyObject *
4813_pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08004814/*[clinic end generated code: output=bb83a919d29225ef input=b73043485ac30b36]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004815{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004816 PyMemoTable *memo;
4817 PyObject *new_memo = PyDict_New();
4818 if (new_memo == NULL)
4819 return NULL;
4820
4821 memo = self->pickler->memo;
Benjamin Petersona4ae8282018-09-20 18:36:40 -07004822 for (size_t i = 0; i < memo->mt_allocated; ++i) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004823 PyMemoEntry entry = memo->mt_table[i];
4824 if (entry.me_key != NULL) {
4825 int status;
4826 PyObject *key, *value;
4827
4828 key = PyLong_FromVoidPtr(entry.me_key);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004829 value = Py_BuildValue("nO", entry.me_value, entry.me_key);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004830
4831 if (key == NULL || value == NULL) {
4832 Py_XDECREF(key);
4833 Py_XDECREF(value);
4834 goto error;
4835 }
4836 status = PyDict_SetItem(new_memo, key, value);
4837 Py_DECREF(key);
4838 Py_DECREF(value);
4839 if (status < 0)
4840 goto error;
4841 }
4842 }
4843 return new_memo;
4844
4845 error:
4846 Py_XDECREF(new_memo);
4847 return NULL;
4848}
4849
Larry Hastings61272b72014-01-07 12:41:53 -08004850/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004851_pickle.PicklerMemoProxy.__reduce__
4852
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004853Implement pickle support.
Larry Hastings61272b72014-01-07 12:41:53 -08004854[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004855
Larry Hastings3cceb382014-01-04 11:09:09 -08004856static PyObject *
4857_pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08004858/*[clinic end generated code: output=bebba1168863ab1d input=2f7c540e24b7aae4]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004859{
4860 PyObject *reduce_value, *dict_args;
Larry Hastings3cceb382014-01-04 11:09:09 -08004861 PyObject *contents = _pickle_PicklerMemoProxy_copy_impl(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004862 if (contents == NULL)
4863 return NULL;
4864
4865 reduce_value = PyTuple_New(2);
4866 if (reduce_value == NULL) {
4867 Py_DECREF(contents);
4868 return NULL;
4869 }
4870 dict_args = PyTuple_New(1);
4871 if (dict_args == NULL) {
4872 Py_DECREF(contents);
4873 Py_DECREF(reduce_value);
4874 return NULL;
4875 }
4876 PyTuple_SET_ITEM(dict_args, 0, contents);
4877 Py_INCREF((PyObject *)&PyDict_Type);
4878 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
4879 PyTuple_SET_ITEM(reduce_value, 1, dict_args);
4880 return reduce_value;
4881}
4882
4883static PyMethodDef picklerproxy_methods[] = {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004884 _PICKLE_PICKLERMEMOPROXY_CLEAR_METHODDEF
4885 _PICKLE_PICKLERMEMOPROXY_COPY_METHODDEF
4886 _PICKLE_PICKLERMEMOPROXY___REDUCE___METHODDEF
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004887 {NULL, NULL} /* sentinel */
4888};
4889
4890static void
4891PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
4892{
4893 PyObject_GC_UnTrack(self);
4894 Py_XDECREF(self->pickler);
4895 PyObject_GC_Del((PyObject *)self);
4896}
4897
4898static int
4899PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
4900 visitproc visit, void *arg)
4901{
4902 Py_VISIT(self->pickler);
4903 return 0;
4904}
4905
4906static int
4907PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
4908{
4909 Py_CLEAR(self->pickler);
4910 return 0;
4911}
4912
4913static PyTypeObject PicklerMemoProxyType = {
4914 PyVarObject_HEAD_INIT(NULL, 0)
4915 "_pickle.PicklerMemoProxy", /*tp_name*/
4916 sizeof(PicklerMemoProxyObject), /*tp_basicsize*/
4917 0,
4918 (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004919 0, /* tp_vectorcall_offset */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004920 0, /* tp_getattr */
4921 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02004922 0, /* tp_as_async */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004923 0, /* tp_repr */
4924 0, /* tp_as_number */
4925 0, /* tp_as_sequence */
4926 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00004927 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004928 0, /* tp_call */
4929 0, /* tp_str */
4930 PyObject_GenericGetAttr, /* tp_getattro */
4931 PyObject_GenericSetAttr, /* tp_setattro */
4932 0, /* tp_as_buffer */
4933 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4934 0, /* tp_doc */
4935 (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */
4936 (inquiry)PicklerMemoProxy_clear, /* tp_clear */
4937 0, /* tp_richcompare */
4938 0, /* tp_weaklistoffset */
4939 0, /* tp_iter */
4940 0, /* tp_iternext */
4941 picklerproxy_methods, /* tp_methods */
4942};
4943
4944static PyObject *
4945PicklerMemoProxy_New(PicklerObject *pickler)
4946{
4947 PicklerMemoProxyObject *self;
4948
4949 self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
4950 if (self == NULL)
4951 return NULL;
4952 Py_INCREF(pickler);
4953 self->pickler = pickler;
4954 PyObject_GC_Track(self);
4955 return (PyObject *)self;
4956}
4957
4958/*****************************************************************************/
4959
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004960static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02004961Pickler_get_memo(PicklerObject *self, void *Py_UNUSED(ignored))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004962{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004963 return PicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004964}
4965
4966static int
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02004967Pickler_set_memo(PicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004968{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004969 PyMemoTable *new_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004970
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004971 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004972 PyErr_SetString(PyExc_TypeError,
4973 "attribute deletion is not supported");
4974 return -1;
4975 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004976
Andy Lesterdffe4c02020-03-04 07:15:20 -06004977 if (Py_IS_TYPE(obj, &PicklerMemoProxyType)) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004978 PicklerObject *pickler =
4979 ((PicklerMemoProxyObject *)obj)->pickler;
4980
4981 new_memo = PyMemoTable_Copy(pickler->memo);
4982 if (new_memo == NULL)
4983 return -1;
4984 }
4985 else if (PyDict_Check(obj)) {
4986 Py_ssize_t i = 0;
4987 PyObject *key, *value;
4988
4989 new_memo = PyMemoTable_New();
4990 if (new_memo == NULL)
4991 return -1;
4992
4993 while (PyDict_Next(obj, &i, &key, &value)) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004994 Py_ssize_t memo_id;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004995 PyObject *memo_obj;
4996
Serhiy Storchakafff9a312017-03-21 08:53:25 +02004997 if (!PyTuple_Check(value) || PyTuple_GET_SIZE(value) != 2) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004998 PyErr_SetString(PyExc_TypeError,
4999 "'memo' values must be 2-item tuples");
5000 goto error;
5001 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005002 memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005003 if (memo_id == -1 && PyErr_Occurred())
5004 goto error;
5005 memo_obj = PyTuple_GET_ITEM(value, 1);
5006 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
5007 goto error;
5008 }
5009 }
5010 else {
5011 PyErr_Format(PyExc_TypeError,
Serhiy Storchaka34fd4c22018-11-05 16:20:25 +02005012 "'memo' attribute must be a PicklerMemoProxy object "
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005013 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005014 return -1;
5015 }
5016
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005017 PyMemoTable_Del(self->memo);
5018 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005019
5020 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005021
5022 error:
5023 if (new_memo)
5024 PyMemoTable_Del(new_memo);
5025 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005026}
5027
5028static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02005029Pickler_get_persid(PicklerObject *self, void *Py_UNUSED(ignored))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005030{
Serhiy Storchaka986375e2017-11-30 22:48:31 +02005031 if (self->pers_func == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005032 PyErr_SetString(PyExc_AttributeError, "persistent_id");
Serhiy Storchaka986375e2017-11-30 22:48:31 +02005033 return NULL;
5034 }
5035 return reconstruct_method(self->pers_func, self->pers_func_self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005036}
5037
5038static int
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02005039Pickler_set_persid(PicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005040{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005041 if (value == NULL) {
5042 PyErr_SetString(PyExc_TypeError,
5043 "attribute deletion is not supported");
5044 return -1;
5045 }
5046 if (!PyCallable_Check(value)) {
5047 PyErr_SetString(PyExc_TypeError,
5048 "persistent_id must be a callable taking one argument");
5049 return -1;
5050 }
5051
Serhiy Storchaka986375e2017-11-30 22:48:31 +02005052 self->pers_func_self = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005053 Py_INCREF(value);
Serhiy Storchakaec397562016-04-06 09:50:03 +03005054 Py_XSETREF(self->pers_func, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005055
5056 return 0;
5057}
5058
5059static PyMemberDef Pickler_members[] = {
5060 {"bin", T_INT, offsetof(PicklerObject, bin)},
5061 {"fast", T_INT, offsetof(PicklerObject, fast)},
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01005062 {"dispatch_table", T_OBJECT_EX, offsetof(PicklerObject, dispatch_table)},
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005063 {NULL}
5064};
5065
5066static PyGetSetDef Pickler_getsets[] = {
5067 {"memo", (getter)Pickler_get_memo,
5068 (setter)Pickler_set_memo},
5069 {"persistent_id", (getter)Pickler_get_persid,
5070 (setter)Pickler_set_persid},
5071 {NULL}
5072};
5073
5074static PyTypeObject Pickler_Type = {
5075 PyVarObject_HEAD_INIT(NULL, 0)
5076 "_pickle.Pickler" , /*tp_name*/
5077 sizeof(PicklerObject), /*tp_basicsize*/
5078 0, /*tp_itemsize*/
5079 (destructor)Pickler_dealloc, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02005080 0, /*tp_vectorcall_offset*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005081 0, /*tp_getattr*/
5082 0, /*tp_setattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02005083 0, /*tp_as_async*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005084 0, /*tp_repr*/
5085 0, /*tp_as_number*/
5086 0, /*tp_as_sequence*/
5087 0, /*tp_as_mapping*/
5088 0, /*tp_hash*/
5089 0, /*tp_call*/
5090 0, /*tp_str*/
5091 0, /*tp_getattro*/
5092 0, /*tp_setattro*/
5093 0, /*tp_as_buffer*/
5094 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08005095 _pickle_Pickler___init____doc__, /*tp_doc*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005096 (traverseproc)Pickler_traverse, /*tp_traverse*/
5097 (inquiry)Pickler_clear, /*tp_clear*/
5098 0, /*tp_richcompare*/
5099 0, /*tp_weaklistoffset*/
5100 0, /*tp_iter*/
5101 0, /*tp_iternext*/
5102 Pickler_methods, /*tp_methods*/
5103 Pickler_members, /*tp_members*/
5104 Pickler_getsets, /*tp_getset*/
5105 0, /*tp_base*/
5106 0, /*tp_dict*/
5107 0, /*tp_descr_get*/
5108 0, /*tp_descr_set*/
5109 0, /*tp_dictoffset*/
Larry Hastingsb7ccb202014-01-18 23:50:21 -08005110 _pickle_Pickler___init__, /*tp_init*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005111 PyType_GenericAlloc, /*tp_alloc*/
5112 PyType_GenericNew, /*tp_new*/
5113 PyObject_GC_Del, /*tp_free*/
5114 0, /*tp_is_gc*/
5115};
5116
Victor Stinner121aab42011-09-29 23:40:53 +02005117/* Temporary helper for calling self.find_class().
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005118
5119 XXX: It would be nice to able to avoid Python function call overhead, by
5120 using directly the C version of find_class(), when find_class() is not
5121 overridden by a subclass. Although, this could become rather hackish. A
5122 simpler optimization would be to call the C function when self is not a
5123 subclass instance. */
5124static PyObject *
5125find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
5126{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005127 _Py_IDENTIFIER(find_class);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02005128
Victor Stinner55ba38a2016-12-09 16:09:30 +01005129 return _PyObject_CallMethodIdObjArgs((PyObject *)self, &PyId_find_class,
5130 module_name, global_name, NULL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005131}
5132
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005133static Py_ssize_t
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005134marker(UnpicklerObject *self)
5135{
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005136 Py_ssize_t mark;
5137
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005138 if (self->num_marks < 1) {
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005139 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005140 PyErr_SetString(st->UnpicklingError, "could not find MARK");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005141 return -1;
5142 }
5143
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005144 mark = self->marks[--self->num_marks];
5145 self->stack->mark_set = self->num_marks != 0;
5146 self->stack->fence = self->num_marks ?
5147 self->marks[self->num_marks - 1] : 0;
5148 return mark;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005149}
5150
5151static int
5152load_none(UnpicklerObject *self)
5153{
5154 PDATA_APPEND(self->stack, Py_None, -1);
5155 return 0;
5156}
5157
5158static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005159load_int(UnpicklerObject *self)
5160{
5161 PyObject *value;
5162 char *endptr, *s;
5163 Py_ssize_t len;
5164 long x;
5165
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005166 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005167 return -1;
5168 if (len < 2)
5169 return bad_readline();
5170
5171 errno = 0;
Victor Stinner121aab42011-09-29 23:40:53 +02005172 /* XXX: Should the base argument of strtol() be explicitly set to 10?
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005173 XXX(avassalotti): Should this uses PyOS_strtol()? */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005174 x = strtol(s, &endptr, 0);
5175
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005176 if (errno || (*endptr != '\n' && *endptr != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005177 /* Hm, maybe we've got something long. Let's try reading
Serhiy Storchaka95949422013-08-27 19:40:23 +03005178 * it as a Python int object. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005179 errno = 0;
5180 /* XXX: Same thing about the base here. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005181 value = PyLong_FromString(s, NULL, 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005182 if (value == NULL) {
5183 PyErr_SetString(PyExc_ValueError,
5184 "could not convert string to int");
5185 return -1;
5186 }
5187 }
5188 else {
5189 if (len == 3 && (x == 0 || x == 1)) {
5190 if ((value = PyBool_FromLong(x)) == NULL)
5191 return -1;
5192 }
5193 else {
5194 if ((value = PyLong_FromLong(x)) == NULL)
5195 return -1;
5196 }
5197 }
5198
5199 PDATA_PUSH(self->stack, value, -1);
5200 return 0;
5201}
5202
5203static int
5204load_bool(UnpicklerObject *self, PyObject *boolean)
5205{
5206 assert(boolean == Py_True || boolean == Py_False);
5207 PDATA_APPEND(self->stack, boolean, -1);
5208 return 0;
5209}
5210
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005211/* s contains x bytes of an unsigned little-endian integer. Return its value
5212 * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
5213 */
5214static Py_ssize_t
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005215calc_binsize(char *bytes, int nbytes)
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005216{
5217 unsigned char *s = (unsigned char *)bytes;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005218 int i;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005219 size_t x = 0;
5220
Serhiy Storchakae0606192015-09-29 22:10:07 +03005221 if (nbytes > (int)sizeof(size_t)) {
5222 /* Check for integer overflow. BINBYTES8 and BINUNICODE8 opcodes
5223 * have 64-bit size that can't be represented on 32-bit platform.
5224 */
5225 for (i = (int)sizeof(size_t); i < nbytes; i++) {
5226 if (s[i])
5227 return -1;
5228 }
5229 nbytes = (int)sizeof(size_t);
5230 }
5231 for (i = 0; i < nbytes; i++) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005232 x |= (size_t) s[i] << (8 * i);
5233 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005234
5235 if (x > PY_SSIZE_T_MAX)
5236 return -1;
5237 else
5238 return (Py_ssize_t) x;
5239}
5240
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005241/* s contains x bytes of a little-endian integer. Return its value as a
5242 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +03005243 * int, but when x is 4 it's a signed one. This is a historical source
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005244 * of x-platform bugs.
5245 */
5246static long
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005247calc_binint(char *bytes, int nbytes)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005248{
5249 unsigned char *s = (unsigned char *)bytes;
Victor Stinnerf13c46c2014-08-17 21:05:55 +02005250 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005251 long x = 0;
5252
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005253 for (i = 0; i < nbytes; i++) {
5254 x |= (long)s[i] << (8 * i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005255 }
5256
5257 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
5258 * is signed, so on a box with longs bigger than 4 bytes we need
5259 * to extend a BININT's sign bit to the full width.
5260 */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005261 if (SIZEOF_LONG > 4 && nbytes == 4) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005262 x |= -(x & (1L << 31));
5263 }
5264
5265 return x;
5266}
5267
5268static int
5269load_binintx(UnpicklerObject *self, char *s, int size)
5270{
5271 PyObject *value;
5272 long x;
5273
5274 x = calc_binint(s, size);
5275
5276 if ((value = PyLong_FromLong(x)) == NULL)
5277 return -1;
5278
5279 PDATA_PUSH(self->stack, value, -1);
5280 return 0;
5281}
5282
5283static int
5284load_binint(UnpicklerObject *self)
5285{
5286 char *s;
5287
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005288 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005289 return -1;
5290
5291 return load_binintx(self, s, 4);
5292}
5293
5294static int
5295load_binint1(UnpicklerObject *self)
5296{
5297 char *s;
5298
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005299 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005300 return -1;
5301
5302 return load_binintx(self, s, 1);
5303}
5304
5305static int
5306load_binint2(UnpicklerObject *self)
5307{
5308 char *s;
5309
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005310 if (_Unpickler_Read(self, &s, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005311 return -1;
5312
5313 return load_binintx(self, s, 2);
5314}
5315
5316static int
5317load_long(UnpicklerObject *self)
5318{
5319 PyObject *value;
Victor Stinnerb110dad2016-12-09 17:06:43 +01005320 char *s = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005321 Py_ssize_t len;
5322
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005323 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005324 return -1;
5325 if (len < 2)
5326 return bad_readline();
5327
Mark Dickinson8dd05142009-01-20 20:43:58 +00005328 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
5329 the 'L' before calling PyLong_FromString. In order to maintain
5330 compatibility with Python 3.0.0, we don't actually *require*
5331 the 'L' to be present. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005332 if (s[len-2] == 'L')
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00005333 s[len-2] = '\0';
Alexandre Vassalottie4bccb72009-01-24 01:47:57 +00005334 /* XXX: Should the base argument explicitly set to 10? */
5335 value = PyLong_FromString(s, NULL, 0);
Mark Dickinson8dd05142009-01-20 20:43:58 +00005336 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005337 return -1;
5338
5339 PDATA_PUSH(self->stack, value, -1);
5340 return 0;
5341}
5342
5343/* 'size' bytes contain the # of bytes of little-endian 256's-complement
5344 * data following.
5345 */
5346static int
5347load_counted_long(UnpicklerObject *self, int size)
5348{
5349 PyObject *value;
5350 char *nbytes;
5351 char *pdata;
5352
5353 assert(size == 1 || size == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005354 if (_Unpickler_Read(self, &nbytes, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005355 return -1;
5356
5357 size = calc_binint(nbytes, size);
5358 if (size < 0) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005359 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005360 /* Corrupt or hostile pickle -- we never write one like this */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005361 PyErr_SetString(st->UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005362 "LONG pickle has negative byte count");
5363 return -1;
5364 }
5365
5366 if (size == 0)
5367 value = PyLong_FromLong(0L);
5368 else {
5369 /* Read the raw little-endian bytes and convert. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005370 if (_Unpickler_Read(self, &pdata, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005371 return -1;
5372 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
5373 1 /* little endian */ , 1 /* signed */ );
5374 }
5375 if (value == NULL)
5376 return -1;
5377 PDATA_PUSH(self->stack, value, -1);
5378 return 0;
5379}
5380
5381static int
5382load_float(UnpicklerObject *self)
5383{
5384 PyObject *value;
5385 char *endptr, *s;
5386 Py_ssize_t len;
5387 double d;
5388
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005389 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005390 return -1;
5391 if (len < 2)
5392 return bad_readline();
5393
5394 errno = 0;
Mark Dickinson725bfd82009-05-03 20:33:40 +00005395 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
5396 if (d == -1.0 && PyErr_Occurred())
5397 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005398 if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005399 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
5400 return -1;
5401 }
Mark Dickinson725bfd82009-05-03 20:33:40 +00005402 value = PyFloat_FromDouble(d);
5403 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005404 return -1;
5405
5406 PDATA_PUSH(self->stack, value, -1);
5407 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005408}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005409
5410static int
5411load_binfloat(UnpicklerObject *self)
5412{
5413 PyObject *value;
5414 double x;
5415 char *s;
5416
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005417 if (_Unpickler_Read(self, &s, 8) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005418 return -1;
5419
5420 x = _PyFloat_Unpack8((unsigned char *)s, 0);
5421 if (x == -1.0 && PyErr_Occurred())
5422 return -1;
5423
5424 if ((value = PyFloat_FromDouble(x)) == NULL)
5425 return -1;
5426
5427 PDATA_PUSH(self->stack, value, -1);
5428 return 0;
5429}
5430
5431static int
5432load_string(UnpicklerObject *self)
5433{
5434 PyObject *bytes;
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08005435 PyObject *obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005436 Py_ssize_t len;
5437 char *s, *p;
5438
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005439 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005440 return -1;
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07005441 /* Strip the newline */
5442 len--;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005443 /* Strip outermost quotes */
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07005444 if (len >= 2 && s[0] == s[len - 1] && (s[0] == '\'' || s[0] == '"')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005445 p = s + 1;
5446 len -= 2;
5447 }
5448 else {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005449 PickleState *st = _Pickle_GetGlobalState();
5450 PyErr_SetString(st->UnpicklingError,
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07005451 "the STRING opcode argument must be quoted");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005452 return -1;
5453 }
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07005454 assert(len >= 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005455
5456 /* Use the PyBytes API to decode the string, since that is what is used
5457 to encode, and then coerce the result to Unicode. */
5458 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005459 if (bytes == NULL)
5460 return -1;
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08005461
5462 /* Leave the Python 2.x strings as bytes if the *encoding* given to the
5463 Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5464 if (strcmp(self->encoding, "bytes") == 0) {
5465 obj = bytes;
5466 }
5467 else {
5468 obj = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
5469 Py_DECREF(bytes);
5470 if (obj == NULL) {
5471 return -1;
5472 }
5473 }
5474
5475 PDATA_PUSH(self->stack, obj, -1);
5476 return 0;
5477}
5478
5479static int
5480load_counted_binstring(UnpicklerObject *self, int nbytes)
5481{
5482 PyObject *obj;
5483 Py_ssize_t size;
5484 char *s;
5485
5486 if (_Unpickler_Read(self, &s, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005487 return -1;
5488
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08005489 size = calc_binsize(s, nbytes);
5490 if (size < 0) {
5491 PickleState *st = _Pickle_GetGlobalState();
5492 PyErr_Format(st->UnpicklingError,
5493 "BINSTRING exceeds system's maximum size of %zd bytes",
5494 PY_SSIZE_T_MAX);
5495 return -1;
5496 }
5497
5498 if (_Unpickler_Read(self, &s, size) < 0)
5499 return -1;
5500
5501 /* Convert Python 2.x strings to bytes if the *encoding* given to the
5502 Unpickler was 'bytes'. Otherwise, convert them to unicode. */
5503 if (strcmp(self->encoding, "bytes") == 0) {
5504 obj = PyBytes_FromStringAndSize(s, size);
5505 }
5506 else {
5507 obj = PyUnicode_Decode(s, size, self->encoding, self->errors);
5508 }
5509 if (obj == NULL) {
5510 return -1;
5511 }
5512
5513 PDATA_PUSH(self->stack, obj, -1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005514 return 0;
5515}
5516
5517static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005518load_counted_binbytes(UnpicklerObject *self, int nbytes)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005519{
5520 PyObject *bytes;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005521 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005522 char *s;
5523
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005524 if (_Unpickler_Read(self, &s, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005525 return -1;
5526
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005527 size = calc_binsize(s, nbytes);
5528 if (size < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005529 PyErr_Format(PyExc_OverflowError,
5530 "BINBYTES exceeds system's maximum size of %zd bytes",
Alexandre Vassalotticc757172013-04-14 02:25:10 -07005531 PY_SSIZE_T_MAX);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005532 return -1;
5533 }
5534
Antoine Pitrou91f43802019-05-26 17:10:09 +02005535 bytes = PyBytes_FromStringAndSize(NULL, size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005536 if (bytes == NULL)
5537 return -1;
Antoine Pitrou91f43802019-05-26 17:10:09 +02005538 if (_Unpickler_ReadInto(self, PyBytes_AS_STRING(bytes), size) < 0) {
5539 Py_DECREF(bytes);
5540 return -1;
5541 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005542
5543 PDATA_PUSH(self->stack, bytes, -1);
5544 return 0;
5545}
5546
5547static int
Antoine Pitrou91f43802019-05-26 17:10:09 +02005548load_counted_bytearray(UnpicklerObject *self)
5549{
5550 PyObject *bytearray;
5551 Py_ssize_t size;
5552 char *s;
5553
5554 if (_Unpickler_Read(self, &s, 8) < 0) {
5555 return -1;
5556 }
5557
5558 size = calc_binsize(s, 8);
5559 if (size < 0) {
5560 PyErr_Format(PyExc_OverflowError,
5561 "BYTEARRAY8 exceeds system's maximum size of %zd bytes",
5562 PY_SSIZE_T_MAX);
5563 return -1;
5564 }
5565
5566 bytearray = PyByteArray_FromStringAndSize(NULL, size);
5567 if (bytearray == NULL) {
5568 return -1;
5569 }
5570 if (_Unpickler_ReadInto(self, PyByteArray_AS_STRING(bytearray), size) < 0) {
5571 Py_DECREF(bytearray);
5572 return -1;
5573 }
5574
5575 PDATA_PUSH(self->stack, bytearray, -1);
5576 return 0;
5577}
5578
5579static int
5580load_next_buffer(UnpicklerObject *self)
5581{
5582 if (self->buffers == NULL) {
5583 PickleState *st = _Pickle_GetGlobalState();
5584 PyErr_SetString(st->UnpicklingError,
5585 "pickle stream refers to out-of-band data "
5586 "but no *buffers* argument was given");
5587 return -1;
5588 }
5589 PyObject *buf = PyIter_Next(self->buffers);
5590 if (buf == NULL) {
5591 if (!PyErr_Occurred()) {
5592 PickleState *st = _Pickle_GetGlobalState();
5593 PyErr_SetString(st->UnpicklingError,
5594 "not enough out-of-band buffers");
5595 }
5596 return -1;
5597 }
5598
5599 PDATA_PUSH(self->stack, buf, -1);
5600 return 0;
5601}
5602
5603static int
5604load_readonly_buffer(UnpicklerObject *self)
5605{
5606 Py_ssize_t len = Py_SIZE(self->stack);
5607 if (len <= self->stack->fence) {
5608 return Pdata_stack_underflow(self->stack);
5609 }
5610
5611 PyObject *obj = self->stack->data[len - 1];
5612 PyObject *view = PyMemoryView_FromObject(obj);
5613 if (view == NULL) {
5614 return -1;
5615 }
5616 if (!PyMemoryView_GET_BUFFER(view)->readonly) {
5617 /* Original object is writable */
5618 PyMemoryView_GET_BUFFER(view)->readonly = 1;
5619 self->stack->data[len - 1] = view;
5620 Py_DECREF(obj);
5621 }
5622 else {
5623 /* Original object is read-only, no need to replace it */
5624 Py_DECREF(view);
5625 }
5626 return 0;
5627}
5628
5629static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005630load_unicode(UnpicklerObject *self)
5631{
5632 PyObject *str;
5633 Py_ssize_t len;
Victor Stinnerb110dad2016-12-09 17:06:43 +01005634 char *s = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005635
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005636 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005637 return -1;
5638 if (len < 1)
5639 return bad_readline();
5640
5641 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
5642 if (str == NULL)
5643 return -1;
5644
5645 PDATA_PUSH(self->stack, str, -1);
5646 return 0;
5647}
5648
5649static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005650load_counted_binunicode(UnpicklerObject *self, int nbytes)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005651{
5652 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005653 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005654 char *s;
5655
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005656 if (_Unpickler_Read(self, &s, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005657 return -1;
5658
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005659 size = calc_binsize(s, nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005660 if (size < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005661 PyErr_Format(PyExc_OverflowError,
5662 "BINUNICODE exceeds system's maximum size of %zd bytes",
Alexandre Vassalotticc757172013-04-14 02:25:10 -07005663 PY_SSIZE_T_MAX);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005664 return -1;
5665 }
5666
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005667 if (_Unpickler_Read(self, &s, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005668 return -1;
5669
Victor Stinner485fb562010-04-13 11:07:24 +00005670 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005671 if (str == NULL)
5672 return -1;
5673
5674 PDATA_PUSH(self->stack, str, -1);
5675 return 0;
5676}
5677
5678static int
Victor Stinner21b47112016-03-14 18:09:39 +01005679load_counted_tuple(UnpicklerObject *self, Py_ssize_t len)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005680{
5681 PyObject *tuple;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005682
Serhiy Storchakaa49de6b2015-11-25 15:01:53 +02005683 if (Py_SIZE(self->stack) < len)
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005684 return Pdata_stack_underflow(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005685
Serhiy Storchakaa49de6b2015-11-25 15:01:53 +02005686 tuple = Pdata_poptuple(self->stack, Py_SIZE(self->stack) - len);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005687 if (tuple == NULL)
5688 return -1;
5689 PDATA_PUSH(self->stack, tuple, -1);
5690 return 0;
5691}
5692
5693static int
Serhiy Storchakaa49de6b2015-11-25 15:01:53 +02005694load_tuple(UnpicklerObject *self)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005695{
Serhiy Storchakaa49de6b2015-11-25 15:01:53 +02005696 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005697
Serhiy Storchakaa49de6b2015-11-25 15:01:53 +02005698 if ((i = marker(self)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005699 return -1;
5700
Serhiy Storchakaa49de6b2015-11-25 15:01:53 +02005701 return load_counted_tuple(self, Py_SIZE(self->stack) - i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005702}
5703
5704static int
5705load_empty_list(UnpicklerObject *self)
5706{
5707 PyObject *list;
5708
5709 if ((list = PyList_New(0)) == NULL)
5710 return -1;
5711 PDATA_PUSH(self->stack, list, -1);
5712 return 0;
5713}
5714
5715static int
5716load_empty_dict(UnpicklerObject *self)
5717{
5718 PyObject *dict;
5719
5720 if ((dict = PyDict_New()) == NULL)
5721 return -1;
5722 PDATA_PUSH(self->stack, dict, -1);
5723 return 0;
5724}
5725
5726static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005727load_empty_set(UnpicklerObject *self)
5728{
5729 PyObject *set;
5730
5731 if ((set = PySet_New(NULL)) == NULL)
5732 return -1;
5733 PDATA_PUSH(self->stack, set, -1);
5734 return 0;
5735}
5736
5737static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005738load_list(UnpicklerObject *self)
5739{
5740 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005741 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005742
5743 if ((i = marker(self)) < 0)
5744 return -1;
5745
5746 list = Pdata_poplist(self->stack, i);
5747 if (list == NULL)
5748 return -1;
5749 PDATA_PUSH(self->stack, list, -1);
5750 return 0;
5751}
5752
5753static int
5754load_dict(UnpicklerObject *self)
5755{
5756 PyObject *dict, *key, *value;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005757 Py_ssize_t i, j, k;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005758
5759 if ((i = marker(self)) < 0)
5760 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005761 j = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005762
5763 if ((dict = PyDict_New()) == NULL)
5764 return -1;
5765
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005766 if ((j - i) % 2 != 0) {
5767 PickleState *st = _Pickle_GetGlobalState();
5768 PyErr_SetString(st->UnpicklingError, "odd number of items for DICT");
Serhiy Storchaka3ac53802015-12-07 11:32:00 +02005769 Py_DECREF(dict);
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005770 return -1;
5771 }
5772
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005773 for (k = i + 1; k < j; k += 2) {
5774 key = self->stack->data[k - 1];
5775 value = self->stack->data[k];
5776 if (PyDict_SetItem(dict, key, value) < 0) {
5777 Py_DECREF(dict);
5778 return -1;
5779 }
5780 }
5781 Pdata_clear(self->stack, i);
5782 PDATA_PUSH(self->stack, dict, -1);
5783 return 0;
5784}
5785
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005786static int
5787load_frozenset(UnpicklerObject *self)
5788{
5789 PyObject *items;
5790 PyObject *frozenset;
5791 Py_ssize_t i;
5792
5793 if ((i = marker(self)) < 0)
5794 return -1;
5795
5796 items = Pdata_poptuple(self->stack, i);
5797 if (items == NULL)
5798 return -1;
5799
5800 frozenset = PyFrozenSet_New(items);
5801 Py_DECREF(items);
5802 if (frozenset == NULL)
5803 return -1;
5804
5805 PDATA_PUSH(self->stack, frozenset, -1);
5806 return 0;
5807}
5808
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005809static PyObject *
5810instantiate(PyObject *cls, PyObject *args)
5811{
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005812 /* Caller must assure args are a tuple. Normally, args come from
5813 Pdata_poptuple which packs objects from the top of the stack
5814 into a newly created tuple. */
5815 assert(PyTuple_Check(args));
Serhiy Storchaka04e36af2017-10-22 21:31:34 +03005816 if (!PyTuple_GET_SIZE(args) && PyType_Check(cls)) {
5817 _Py_IDENTIFIER(__getinitargs__);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005818 _Py_IDENTIFIER(__new__);
Serhiy Storchakaf320be72018-01-25 10:49:40 +02005819 PyObject *func;
5820 if (_PyObject_LookupAttrId(cls, &PyId___getinitargs__, &func) < 0) {
5821 return NULL;
5822 }
Serhiy Storchaka04e36af2017-10-22 21:31:34 +03005823 if (func == NULL) {
Jeroen Demeyer59ad1102019-07-11 10:59:05 +02005824 return _PyObject_CallMethodIdOneArg(cls, &PyId___new__, cls);
Serhiy Storchaka04e36af2017-10-22 21:31:34 +03005825 }
5826 Py_DECREF(func);
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005827 }
Serhiy Storchaka04e36af2017-10-22 21:31:34 +03005828 return PyObject_CallObject(cls, args);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005829}
5830
5831static int
5832load_obj(UnpicklerObject *self)
5833{
5834 PyObject *cls, *args, *obj = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005835 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005836
5837 if ((i = marker(self)) < 0)
5838 return -1;
5839
Serhiy Storchakae9b30742015-11-23 15:17:43 +02005840 if (Py_SIZE(self->stack) - i < 1)
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005841 return Pdata_stack_underflow(self->stack);
Serhiy Storchakae9b30742015-11-23 15:17:43 +02005842
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005843 args = Pdata_poptuple(self->stack, i + 1);
5844 if (args == NULL)
5845 return -1;
5846
5847 PDATA_POP(self->stack, cls);
5848 if (cls) {
5849 obj = instantiate(cls, args);
5850 Py_DECREF(cls);
5851 }
5852 Py_DECREF(args);
5853 if (obj == NULL)
5854 return -1;
5855
5856 PDATA_PUSH(self->stack, obj, -1);
5857 return 0;
5858}
5859
5860static int
5861load_inst(UnpicklerObject *self)
5862{
5863 PyObject *cls = NULL;
5864 PyObject *args = NULL;
5865 PyObject *obj = NULL;
5866 PyObject *module_name;
5867 PyObject *class_name;
5868 Py_ssize_t len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005869 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005870 char *s;
5871
5872 if ((i = marker(self)) < 0)
5873 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005874 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005875 return -1;
5876 if (len < 2)
5877 return bad_readline();
5878
5879 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
5880 identifiers are permitted in Python 3.0, since the INST opcode is only
5881 supported by older protocols on Python 2.x. */
5882 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5883 if (module_name == NULL)
5884 return -1;
5885
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005886 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Serhiy Storchakaca28eba2015-12-01 00:18:23 +02005887 if (len < 2) {
5888 Py_DECREF(module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005889 return bad_readline();
Serhiy Storchakaca28eba2015-12-01 00:18:23 +02005890 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005891 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005892 if (class_name != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005893 cls = find_class(self, module_name, class_name);
5894 Py_DECREF(class_name);
5895 }
5896 }
5897 Py_DECREF(module_name);
5898
5899 if (cls == NULL)
5900 return -1;
5901
5902 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
5903 obj = instantiate(cls, args);
5904 Py_DECREF(args);
5905 }
5906 Py_DECREF(cls);
5907
5908 if (obj == NULL)
5909 return -1;
5910
5911 PDATA_PUSH(self->stack, obj, -1);
5912 return 0;
5913}
5914
5915static int
5916load_newobj(UnpicklerObject *self)
5917{
5918 PyObject *args = NULL;
5919 PyObject *clsraw = NULL;
5920 PyTypeObject *cls; /* clsraw cast to its true type */
5921 PyObject *obj;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005922 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005923
5924 /* Stack is ... cls argtuple, and we want to call
5925 * cls.__new__(cls, *argtuple).
5926 */
5927 PDATA_POP(self->stack, args);
5928 if (args == NULL)
5929 goto error;
5930 if (!PyTuple_Check(args)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005931 PyErr_SetString(st->UnpicklingError,
5932 "NEWOBJ expected an arg " "tuple.");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005933 goto error;
5934 }
5935
5936 PDATA_POP(self->stack, clsraw);
5937 cls = (PyTypeObject *)clsraw;
5938 if (cls == NULL)
5939 goto error;
5940 if (!PyType_Check(cls)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005941 PyErr_SetString(st->UnpicklingError, "NEWOBJ class argument "
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005942 "isn't a type object");
5943 goto error;
5944 }
5945 if (cls->tp_new == NULL) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005946 PyErr_SetString(st->UnpicklingError, "NEWOBJ class argument "
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005947 "has NULL tp_new");
5948 goto error;
5949 }
5950
5951 /* Call __new__. */
5952 obj = cls->tp_new(cls, args, NULL);
5953 if (obj == NULL)
5954 goto error;
5955
5956 Py_DECREF(args);
5957 Py_DECREF(clsraw);
5958 PDATA_PUSH(self->stack, obj, -1);
5959 return 0;
5960
5961 error:
5962 Py_XDECREF(args);
5963 Py_XDECREF(clsraw);
5964 return -1;
5965}
5966
5967static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005968load_newobj_ex(UnpicklerObject *self)
5969{
5970 PyObject *cls, *args, *kwargs;
5971 PyObject *obj;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005972 PickleState *st = _Pickle_GetGlobalState();
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005973
5974 PDATA_POP(self->stack, kwargs);
5975 if (kwargs == NULL) {
5976 return -1;
5977 }
5978 PDATA_POP(self->stack, args);
5979 if (args == NULL) {
5980 Py_DECREF(kwargs);
5981 return -1;
5982 }
5983 PDATA_POP(self->stack, cls);
5984 if (cls == NULL) {
5985 Py_DECREF(kwargs);
5986 Py_DECREF(args);
5987 return -1;
5988 }
Larry Hastings61272b72014-01-07 12:41:53 -08005989
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005990 if (!PyType_Check(cls)) {
Larry Hastings61272b72014-01-07 12:41:53 -08005991 PyErr_Format(st->UnpicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005992 "NEWOBJ_EX class argument must be a type, not %.200s",
5993 Py_TYPE(cls)->tp_name);
Miss Islington (bot)57c984f2020-07-13 06:09:27 -07005994 goto error;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005995 }
5996
5997 if (((PyTypeObject *)cls)->tp_new == NULL) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005998 PyErr_SetString(st->UnpicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005999 "NEWOBJ_EX class argument doesn't have __new__");
Miss Islington (bot)57c984f2020-07-13 06:09:27 -07006000 goto error;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006001 }
Miss Islington (bot)57c984f2020-07-13 06:09:27 -07006002 if (!PyTuple_Check(args)) {
6003 PyErr_Format(st->UnpicklingError,
6004 "NEWOBJ_EX args argument must be a tuple, not %.200s",
6005 Py_TYPE(args)->tp_name);
6006 goto error;
6007 }
6008 if (!PyDict_Check(kwargs)) {
6009 PyErr_Format(st->UnpicklingError,
6010 "NEWOBJ_EX kwargs argument must be a dict, not %.200s",
6011 Py_TYPE(kwargs)->tp_name);
6012 goto error;
6013 }
6014
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006015 obj = ((PyTypeObject *)cls)->tp_new((PyTypeObject *)cls, args, kwargs);
6016 Py_DECREF(kwargs);
6017 Py_DECREF(args);
6018 Py_DECREF(cls);
6019 if (obj == NULL) {
6020 return -1;
6021 }
6022 PDATA_PUSH(self->stack, obj, -1);
6023 return 0;
Miss Islington (bot)57c984f2020-07-13 06:09:27 -07006024
6025error:
6026 Py_DECREF(kwargs);
6027 Py_DECREF(args);
6028 Py_DECREF(cls);
6029 return -1;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006030}
6031
6032static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006033load_global(UnpicklerObject *self)
6034{
6035 PyObject *global = NULL;
6036 PyObject *module_name;
6037 PyObject *global_name;
6038 Py_ssize_t len;
6039 char *s;
6040
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006041 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006042 return -1;
6043 if (len < 2)
6044 return bad_readline();
6045 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
6046 if (!module_name)
6047 return -1;
6048
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006049 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006050 if (len < 2) {
6051 Py_DECREF(module_name);
6052 return bad_readline();
6053 }
6054 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
6055 if (global_name) {
6056 global = find_class(self, module_name, global_name);
6057 Py_DECREF(global_name);
6058 }
6059 }
6060 Py_DECREF(module_name);
6061
6062 if (global == NULL)
6063 return -1;
6064 PDATA_PUSH(self->stack, global, -1);
6065 return 0;
6066}
6067
6068static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006069load_stack_global(UnpicklerObject *self)
6070{
6071 PyObject *global;
6072 PyObject *module_name;
6073 PyObject *global_name;
6074
6075 PDATA_POP(self->stack, global_name);
6076 PDATA_POP(self->stack, module_name);
6077 if (module_name == NULL || !PyUnicode_CheckExact(module_name) ||
6078 global_name == NULL || !PyUnicode_CheckExact(global_name)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006079 PickleState *st = _Pickle_GetGlobalState();
6080 PyErr_SetString(st->UnpicklingError, "STACK_GLOBAL requires str");
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006081 Py_XDECREF(global_name);
6082 Py_XDECREF(module_name);
6083 return -1;
6084 }
6085 global = find_class(self, module_name, global_name);
6086 Py_DECREF(global_name);
6087 Py_DECREF(module_name);
6088 if (global == NULL)
6089 return -1;
6090 PDATA_PUSH(self->stack, global, -1);
6091 return 0;
6092}
6093
6094static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006095load_persid(UnpicklerObject *self)
6096{
Serhiy Storchaka986375e2017-11-30 22:48:31 +02006097 PyObject *pid, *obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006098 Py_ssize_t len;
6099 char *s;
6100
6101 if (self->pers_func) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006102 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006103 return -1;
Alexandre Vassalotti896414f2013-11-30 13:52:35 -08006104 if (len < 1)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006105 return bad_readline();
6106
Serhiy Storchakadec25af2016-07-17 11:24:17 +03006107 pid = PyUnicode_DecodeASCII(s, len - 1, "strict");
6108 if (pid == NULL) {
6109 if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
6110 PyErr_SetString(_Pickle_GetGlobalState()->UnpicklingError,
6111 "persistent IDs in protocol 0 must be "
6112 "ASCII strings");
6113 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006114 return -1;
Serhiy Storchakadec25af2016-07-17 11:24:17 +03006115 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006116
Serhiy Storchaka986375e2017-11-30 22:48:31 +02006117 obj = call_method(self->pers_func, self->pers_func_self, pid);
6118 Py_DECREF(pid);
6119 if (obj == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006120 return -1;
6121
Serhiy Storchaka986375e2017-11-30 22:48:31 +02006122 PDATA_PUSH(self->stack, obj, -1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006123 return 0;
6124 }
6125 else {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006126 PickleState *st = _Pickle_GetGlobalState();
6127 PyErr_SetString(st->UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006128 "A load persistent id instruction was encountered,\n"
6129 "but no persistent_load function was specified.");
6130 return -1;
6131 }
6132}
6133
6134static int
6135load_binpersid(UnpicklerObject *self)
6136{
Serhiy Storchaka986375e2017-11-30 22:48:31 +02006137 PyObject *pid, *obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006138
6139 if (self->pers_func) {
6140 PDATA_POP(self->stack, pid);
6141 if (pid == NULL)
6142 return -1;
6143
Serhiy Storchaka986375e2017-11-30 22:48:31 +02006144 obj = call_method(self->pers_func, self->pers_func_self, pid);
6145 Py_DECREF(pid);
6146 if (obj == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006147 return -1;
6148
Serhiy Storchaka986375e2017-11-30 22:48:31 +02006149 PDATA_PUSH(self->stack, obj, -1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006150 return 0;
6151 }
6152 else {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006153 PickleState *st = _Pickle_GetGlobalState();
6154 PyErr_SetString(st->UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006155 "A load persistent id instruction was encountered,\n"
6156 "but no persistent_load function was specified.");
6157 return -1;
6158 }
6159}
6160
6161static int
6162load_pop(UnpicklerObject *self)
6163{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006164 Py_ssize_t len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006165
6166 /* Note that we split the (pickle.py) stack into two stacks,
6167 * an object stack and a mark stack. We have to be clever and
6168 * pop the right one. We do this by looking at the top of the
Collin Winter8ca69de2009-05-26 16:53:41 +00006169 * mark stack first, and only signalling a stack underflow if
6170 * the object stack is empty and the mark stack doesn't match
6171 * our expectations.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006172 */
Collin Winter8ca69de2009-05-26 16:53:41 +00006173 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006174 self->num_marks--;
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006175 self->stack->mark_set = self->num_marks != 0;
6176 self->stack->fence = self->num_marks ?
6177 self->marks[self->num_marks - 1] : 0;
6178 } else if (len <= self->stack->fence)
6179 return Pdata_stack_underflow(self->stack);
6180 else {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006181 len--;
6182 Py_DECREF(self->stack->data[len]);
Victor Stinner60ac6ed2020-02-07 23:18:08 +01006183 Py_SET_SIZE(self->stack, len);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006184 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006185 return 0;
6186}
6187
6188static int
6189load_pop_mark(UnpicklerObject *self)
6190{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006191 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006192
6193 if ((i = marker(self)) < 0)
6194 return -1;
6195
6196 Pdata_clear(self->stack, i);
6197
6198 return 0;
6199}
6200
6201static int
6202load_dup(UnpicklerObject *self)
6203{
6204 PyObject *last;
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006205 Py_ssize_t len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006206
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006207 if (len <= self->stack->fence)
6208 return Pdata_stack_underflow(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006209 last = self->stack->data[len - 1];
6210 PDATA_APPEND(self->stack, last, -1);
6211 return 0;
6212}
6213
6214static int
6215load_get(UnpicklerObject *self)
6216{
6217 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006218 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006219 Py_ssize_t len;
6220 char *s;
6221
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006222 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006223 return -1;
6224 if (len < 2)
6225 return bad_readline();
6226
6227 key = PyLong_FromString(s, NULL, 10);
6228 if (key == NULL)
6229 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006230 idx = PyLong_AsSsize_t(key);
6231 if (idx == -1 && PyErr_Occurred()) {
6232 Py_DECREF(key);
6233 return -1;
6234 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006235
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006236 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006237 if (value == NULL) {
Claudiu Popa6f03b232019-11-24 20:15:08 +01006238 if (!PyErr_Occurred()) {
6239 PickleState *st = _Pickle_GetGlobalState();
6240 PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
6241 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006242 Py_DECREF(key);
6243 return -1;
6244 }
6245 Py_DECREF(key);
6246
6247 PDATA_APPEND(self->stack, value, -1);
6248 return 0;
6249}
6250
6251static int
6252load_binget(UnpicklerObject *self)
6253{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006254 PyObject *value;
6255 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006256 char *s;
6257
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006258 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006259 return -1;
6260
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006261 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006262
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006263 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006264 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006265 PyObject *key = PyLong_FromSsize_t(idx);
Christian Heimes9ee5c372013-07-26 22:45:00 +02006266 if (key != NULL) {
Claudiu Popa6f03b232019-11-24 20:15:08 +01006267 PickleState *st = _Pickle_GetGlobalState();
6268 PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
Christian Heimes9ee5c372013-07-26 22:45:00 +02006269 Py_DECREF(key);
6270 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006271 return -1;
6272 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006273
6274 PDATA_APPEND(self->stack, value, -1);
6275 return 0;
6276}
6277
6278static int
6279load_long_binget(UnpicklerObject *self)
6280{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006281 PyObject *value;
6282 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006283 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006284
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006285 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006286 return -1;
6287
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006288 idx = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006289
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006290 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006291 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006292 PyObject *key = PyLong_FromSsize_t(idx);
Christian Heimes9ee5c372013-07-26 22:45:00 +02006293 if (key != NULL) {
Claudiu Popa6f03b232019-11-24 20:15:08 +01006294 PickleState *st = _Pickle_GetGlobalState();
6295 PyErr_Format(st->UnpicklingError, "Memo value not found at index %ld", idx);
Christian Heimes9ee5c372013-07-26 22:45:00 +02006296 Py_DECREF(key);
6297 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006298 return -1;
6299 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006300
6301 PDATA_APPEND(self->stack, value, -1);
6302 return 0;
6303}
6304
6305/* Push an object from the extension registry (EXT[124]). nbytes is
6306 * the number of bytes following the opcode, holding the index (code) value.
6307 */
6308static int
6309load_extension(UnpicklerObject *self, int nbytes)
6310{
6311 char *codebytes; /* the nbytes bytes after the opcode */
6312 long code; /* calc_binint returns long */
6313 PyObject *py_code; /* code as a Python int */
6314 PyObject *obj; /* the object to push */
6315 PyObject *pair; /* (module_name, class_name) */
6316 PyObject *module_name, *class_name;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006317 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006318
6319 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006320 if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006321 return -1;
6322 code = calc_binint(codebytes, nbytes);
6323 if (code <= 0) { /* note that 0 is forbidden */
6324 /* Corrupt or hostile pickle. */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006325 PyErr_SetString(st->UnpicklingError, "EXT specifies code <= 0");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006326 return -1;
6327 }
6328
6329 /* Look for the code in the cache. */
6330 py_code = PyLong_FromLong(code);
6331 if (py_code == NULL)
6332 return -1;
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08006333 obj = PyDict_GetItemWithError(st->extension_cache, py_code);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006334 if (obj != NULL) {
6335 /* Bingo. */
6336 Py_DECREF(py_code);
6337 PDATA_APPEND(self->stack, obj, -1);
6338 return 0;
6339 }
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08006340 if (PyErr_Occurred()) {
6341 Py_DECREF(py_code);
6342 return -1;
6343 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006344
6345 /* Look up the (module_name, class_name) pair. */
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08006346 pair = PyDict_GetItemWithError(st->inverted_registry, py_code);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006347 if (pair == NULL) {
6348 Py_DECREF(py_code);
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08006349 if (!PyErr_Occurred()) {
6350 PyErr_Format(PyExc_ValueError, "unregistered extension "
6351 "code %ld", code);
6352 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006353 return -1;
6354 }
6355 /* Since the extension registry is manipulable via Python code,
6356 * confirm that pair is really a 2-tuple of strings.
6357 */
Victor Stinnerb37672d2018-11-22 03:37:50 +01006358 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2) {
6359 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006360 }
Victor Stinnerb37672d2018-11-22 03:37:50 +01006361
6362 module_name = PyTuple_GET_ITEM(pair, 0);
6363 if (!PyUnicode_Check(module_name)) {
6364 goto error;
6365 }
6366
6367 class_name = PyTuple_GET_ITEM(pair, 1);
6368 if (!PyUnicode_Check(class_name)) {
6369 goto error;
6370 }
6371
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006372 /* Load the object. */
6373 obj = find_class(self, module_name, class_name);
6374 if (obj == NULL) {
6375 Py_DECREF(py_code);
6376 return -1;
6377 }
6378 /* Cache code -> obj. */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006379 code = PyDict_SetItem(st->extension_cache, py_code, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006380 Py_DECREF(py_code);
6381 if (code < 0) {
6382 Py_DECREF(obj);
6383 return -1;
6384 }
6385 PDATA_PUSH(self->stack, obj, -1);
6386 return 0;
Victor Stinnerb37672d2018-11-22 03:37:50 +01006387
6388error:
6389 Py_DECREF(py_code);
6390 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
6391 "isn't a 2-tuple of strings", code);
6392 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006393}
6394
6395static int
6396load_put(UnpicklerObject *self)
6397{
6398 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006399 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006400 Py_ssize_t len;
Victor Stinnerb110dad2016-12-09 17:06:43 +01006401 char *s = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006402
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006403 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006404 return -1;
6405 if (len < 2)
6406 return bad_readline();
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006407 if (Py_SIZE(self->stack) <= self->stack->fence)
6408 return Pdata_stack_underflow(self->stack);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006409 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006410
6411 key = PyLong_FromString(s, NULL, 10);
6412 if (key == NULL)
6413 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006414 idx = PyLong_AsSsize_t(key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006415 Py_DECREF(key);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02006416 if (idx < 0) {
6417 if (!PyErr_Occurred())
6418 PyErr_SetString(PyExc_ValueError,
6419 "negative PUT argument");
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006420 return -1;
Antoine Pitrou55549ec2011-08-30 00:27:10 +02006421 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006422
6423 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006424}
6425
6426static int
6427load_binput(UnpicklerObject *self)
6428{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006429 PyObject *value;
6430 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006431 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006432
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006433 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006434 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006435
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006436 if (Py_SIZE(self->stack) <= self->stack->fence)
6437 return Pdata_stack_underflow(self->stack);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006438 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006439
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006440 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006441
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006442 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006443}
6444
6445static int
6446load_long_binput(UnpicklerObject *self)
6447{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006448 PyObject *value;
6449 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006450 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006451
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006452 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006453 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006454
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006455 if (Py_SIZE(self->stack) <= self->stack->fence)
6456 return Pdata_stack_underflow(self->stack);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006457 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006458
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006459 idx = calc_binsize(s, 4);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02006460 if (idx < 0) {
6461 PyErr_SetString(PyExc_ValueError,
6462 "negative LONG_BINPUT argument");
6463 return -1;
6464 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006465
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006466 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006467}
6468
6469static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006470load_memoize(UnpicklerObject *self)
6471{
6472 PyObject *value;
6473
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006474 if (Py_SIZE(self->stack) <= self->stack->fence)
6475 return Pdata_stack_underflow(self->stack);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006476 value = self->stack->data[Py_SIZE(self->stack) - 1];
6477
6478 return _Unpickler_MemoPut(self, self->memo_len, value);
6479}
6480
6481static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006482do_append(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006483{
6484 PyObject *value;
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006485 PyObject *slice;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006486 PyObject *list;
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006487 PyObject *result;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006488 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006489
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006490 len = Py_SIZE(self->stack);
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006491 if (x > len || x <= self->stack->fence)
6492 return Pdata_stack_underflow(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006493 if (len == x) /* nothing to do */
6494 return 0;
6495
6496 list = self->stack->data[x - 1];
6497
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006498 if (PyList_CheckExact(list)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006499 Py_ssize_t list_len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006500 int ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006501
6502 slice = Pdata_poplist(self->stack, x);
6503 if (!slice)
6504 return -1;
6505 list_len = PyList_GET_SIZE(list);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006506 ret = PyList_SetSlice(list, list_len, list_len, slice);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006507 Py_DECREF(slice);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006508 return ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006509 }
6510 else {
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006511 PyObject *extend_func;
6512 _Py_IDENTIFIER(extend);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006513
Serhiy Storchaka41c57b32019-09-01 12:03:39 +03006514 if (_PyObject_LookupAttrId(list, &PyId_extend, &extend_func) < 0) {
6515 return -1;
6516 }
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006517 if (extend_func != NULL) {
6518 slice = Pdata_poplist(self->stack, x);
6519 if (!slice) {
6520 Py_DECREF(extend_func);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006521 return -1;
6522 }
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006523 result = _Pickle_FastCall(extend_func, slice);
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006524 Py_DECREF(extend_func);
6525 if (result == NULL)
6526 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006527 Py_DECREF(result);
6528 }
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006529 else {
6530 PyObject *append_func;
6531 _Py_IDENTIFIER(append);
6532
6533 /* Even if the PEP 307 requires extend() and append() methods,
6534 fall back on append() if the object has no extend() method
6535 for backward compatibility. */
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006536 append_func = _PyObject_GetAttrId(list, &PyId_append);
6537 if (append_func == NULL)
6538 return -1;
6539 for (i = x; i < len; i++) {
6540 value = self->stack->data[i];
6541 result = _Pickle_FastCall(append_func, value);
6542 if (result == NULL) {
6543 Pdata_clear(self->stack, i + 1);
Victor Stinner60ac6ed2020-02-07 23:18:08 +01006544 Py_SET_SIZE(self->stack, x);
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006545 Py_DECREF(append_func);
6546 return -1;
6547 }
6548 Py_DECREF(result);
6549 }
Victor Stinner60ac6ed2020-02-07 23:18:08 +01006550 Py_SET_SIZE(self->stack, x);
Serhiy Storchakabee09ae2017-02-02 11:12:47 +02006551 Py_DECREF(append_func);
6552 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006553 }
6554
6555 return 0;
6556}
6557
6558static int
6559load_append(UnpicklerObject *self)
6560{
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006561 if (Py_SIZE(self->stack) - 1 <= self->stack->fence)
6562 return Pdata_stack_underflow(self->stack);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006563 return do_append(self, Py_SIZE(self->stack) - 1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006564}
6565
6566static int
6567load_appends(UnpicklerObject *self)
6568{
Serhiy Storchakae9b30742015-11-23 15:17:43 +02006569 Py_ssize_t i = marker(self);
6570 if (i < 0)
6571 return -1;
6572 return do_append(self, i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006573}
6574
6575static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006576do_setitems(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006577{
6578 PyObject *value, *key;
6579 PyObject *dict;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006580 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006581 int status = 0;
6582
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006583 len = Py_SIZE(self->stack);
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006584 if (x > len || x <= self->stack->fence)
6585 return Pdata_stack_underflow(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006586 if (len == x) /* nothing to do */
6587 return 0;
Victor Stinner121aab42011-09-29 23:40:53 +02006588 if ((len - x) % 2 != 0) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006589 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006590 /* Currupt or hostile pickle -- we never write one like this. */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006591 PyErr_SetString(st->UnpicklingError,
6592 "odd number of items for SETITEMS");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006593 return -1;
6594 }
6595
6596 /* Here, dict does not actually need to be a PyDict; it could be anything
6597 that supports the __setitem__ attribute. */
6598 dict = self->stack->data[x - 1];
6599
6600 for (i = x + 1; i < len; i += 2) {
6601 key = self->stack->data[i - 1];
6602 value = self->stack->data[i];
6603 if (PyObject_SetItem(dict, key, value) < 0) {
6604 status = -1;
6605 break;
6606 }
6607 }
6608
6609 Pdata_clear(self->stack, x);
6610 return status;
6611}
6612
6613static int
6614load_setitem(UnpicklerObject *self)
6615{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006616 return do_setitems(self, Py_SIZE(self->stack) - 2);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006617}
6618
6619static int
6620load_setitems(UnpicklerObject *self)
6621{
Serhiy Storchakae9b30742015-11-23 15:17:43 +02006622 Py_ssize_t i = marker(self);
6623 if (i < 0)
6624 return -1;
6625 return do_setitems(self, i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006626}
6627
6628static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006629load_additems(UnpicklerObject *self)
6630{
6631 PyObject *set;
6632 Py_ssize_t mark, len, i;
6633
6634 mark = marker(self);
Serhiy Storchakae9b30742015-11-23 15:17:43 +02006635 if (mark < 0)
6636 return -1;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006637 len = Py_SIZE(self->stack);
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006638 if (mark > len || mark <= self->stack->fence)
6639 return Pdata_stack_underflow(self->stack);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006640 if (len == mark) /* nothing to do */
6641 return 0;
6642
6643 set = self->stack->data[mark - 1];
6644
6645 if (PySet_Check(set)) {
6646 PyObject *items;
6647 int status;
6648
6649 items = Pdata_poptuple(self->stack, mark);
6650 if (items == NULL)
6651 return -1;
6652
6653 status = _PySet_Update(set, items);
6654 Py_DECREF(items);
6655 return status;
6656 }
6657 else {
6658 PyObject *add_func;
6659 _Py_IDENTIFIER(add);
6660
6661 add_func = _PyObject_GetAttrId(set, &PyId_add);
6662 if (add_func == NULL)
6663 return -1;
6664 for (i = mark; i < len; i++) {
6665 PyObject *result;
6666 PyObject *item;
6667
6668 item = self->stack->data[i];
Alexandre Vassalotti20c28c12013-11-27 02:26:54 -08006669 result = _Pickle_FastCall(add_func, item);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006670 if (result == NULL) {
6671 Pdata_clear(self->stack, i + 1);
Victor Stinner60ac6ed2020-02-07 23:18:08 +01006672 Py_SET_SIZE(self->stack, mark);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006673 return -1;
6674 }
6675 Py_DECREF(result);
6676 }
Victor Stinner60ac6ed2020-02-07 23:18:08 +01006677 Py_SET_SIZE(self->stack, mark);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006678 }
6679
6680 return 0;
6681}
6682
6683static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006684load_build(UnpicklerObject *self)
6685{
6686 PyObject *state, *inst, *slotstate;
6687 PyObject *setstate;
6688 int status = 0;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02006689 _Py_IDENTIFIER(__setstate__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006690
6691 /* Stack is ... instance, state. We want to leave instance at
6692 * the stack top, possibly mutated via instance.__setstate__(state).
6693 */
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006694 if (Py_SIZE(self->stack) - 2 < self->stack->fence)
6695 return Pdata_stack_underflow(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006696
6697 PDATA_POP(self->stack, state);
6698 if (state == NULL)
6699 return -1;
6700
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006701 inst = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006702
Serhiy Storchakaf320be72018-01-25 10:49:40 +02006703 if (_PyObject_LookupAttrId(inst, &PyId___setstate__, &setstate) < 0) {
6704 Py_DECREF(state);
6705 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006706 }
Serhiy Storchakaf320be72018-01-25 10:49:40 +02006707 if (setstate != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006708 PyObject *result;
6709
6710 /* The explicit __setstate__ is responsible for everything. */
Alexandre Vassalotti20c28c12013-11-27 02:26:54 -08006711 result = _Pickle_FastCall(setstate, state);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006712 Py_DECREF(setstate);
6713 if (result == NULL)
6714 return -1;
6715 Py_DECREF(result);
6716 return 0;
6717 }
6718
6719 /* A default __setstate__. First see whether state embeds a
6720 * slot state dict too (a proto 2 addition).
6721 */
Serhiy Storchakafff9a312017-03-21 08:53:25 +02006722 if (PyTuple_Check(state) && PyTuple_GET_SIZE(state) == 2) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006723 PyObject *tmp = state;
6724
6725 state = PyTuple_GET_ITEM(tmp, 0);
6726 slotstate = PyTuple_GET_ITEM(tmp, 1);
6727 Py_INCREF(state);
6728 Py_INCREF(slotstate);
6729 Py_DECREF(tmp);
6730 }
6731 else
6732 slotstate = NULL;
6733
6734 /* Set inst.__dict__ from the state dict (if any). */
6735 if (state != Py_None) {
6736 PyObject *dict;
Antoine Pitroua9f48a02009-05-02 21:41:14 +00006737 PyObject *d_key, *d_value;
6738 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02006739 _Py_IDENTIFIER(__dict__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006740
6741 if (!PyDict_Check(state)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006742 PickleState *st = _Pickle_GetGlobalState();
6743 PyErr_SetString(st->UnpicklingError, "state is not a dictionary");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006744 goto error;
6745 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02006746 dict = _PyObject_GetAttrId(inst, &PyId___dict__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006747 if (dict == NULL)
6748 goto error;
6749
Antoine Pitroua9f48a02009-05-02 21:41:14 +00006750 i = 0;
6751 while (PyDict_Next(state, &i, &d_key, &d_value)) {
6752 /* normally the keys for instance attributes are
6753 interned. we should try to do that here. */
6754 Py_INCREF(d_key);
6755 if (PyUnicode_CheckExact(d_key))
6756 PyUnicode_InternInPlace(&d_key);
6757 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
6758 Py_DECREF(d_key);
6759 goto error;
6760 }
6761 Py_DECREF(d_key);
6762 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006763 Py_DECREF(dict);
6764 }
6765
6766 /* Also set instance attributes from the slotstate dict (if any). */
6767 if (slotstate != NULL) {
6768 PyObject *d_key, *d_value;
6769 Py_ssize_t i;
6770
6771 if (!PyDict_Check(slotstate)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006772 PickleState *st = _Pickle_GetGlobalState();
6773 PyErr_SetString(st->UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006774 "slot state is not a dictionary");
6775 goto error;
6776 }
6777 i = 0;
6778 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
6779 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
6780 goto error;
6781 }
6782 }
6783
6784 if (0) {
6785 error:
6786 status = -1;
6787 }
6788
6789 Py_DECREF(state);
6790 Py_XDECREF(slotstate);
6791 return status;
6792}
6793
6794static int
6795load_mark(UnpicklerObject *self)
6796{
6797
6798 /* Note that we split the (pickle.py) stack into two stacks, an
6799 * object stack and a mark stack. Here we push a mark onto the
6800 * mark stack.
6801 */
6802
Sergey Fedoseev86b89912018-08-25 12:54:40 +05006803 if (self->num_marks >= self->marks_size) {
Sergey Fedoseev90555ec2018-08-25 15:41:58 +05006804 size_t alloc = ((size_t)self->num_marks << 1) + 20;
6805 Py_ssize_t *marks_new = self->marks;
6806 PyMem_RESIZE(marks_new, Py_ssize_t, alloc);
6807 if (marks_new == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006808 PyErr_NoMemory();
6809 return -1;
6810 }
Sergey Fedoseev90555ec2018-08-25 15:41:58 +05006811 self->marks = marks_new;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006812 self->marks_size = (Py_ssize_t)alloc;
6813 }
6814
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006815 self->stack->mark_set = 1;
6816 self->marks[self->num_marks++] = self->stack->fence = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006817
6818 return 0;
6819}
6820
6821static int
6822load_reduce(UnpicklerObject *self)
6823{
6824 PyObject *callable = NULL;
6825 PyObject *argtup = NULL;
6826 PyObject *obj = NULL;
6827
6828 PDATA_POP(self->stack, argtup);
6829 if (argtup == NULL)
6830 return -1;
6831 PDATA_POP(self->stack, callable);
6832 if (callable) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00006833 obj = PyObject_CallObject(callable, argtup);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006834 Py_DECREF(callable);
6835 }
6836 Py_DECREF(argtup);
6837
6838 if (obj == NULL)
6839 return -1;
6840
6841 PDATA_PUSH(self->stack, obj, -1);
6842 return 0;
6843}
6844
6845/* Just raises an error if we don't know the protocol specified. PROTO
6846 * is the first opcode for protocols >= 2.
6847 */
6848static int
6849load_proto(UnpicklerObject *self)
6850{
6851 char *s;
6852 int i;
6853
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006854 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006855 return -1;
6856
6857 i = (unsigned char)s[0];
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006858 if (i <= HIGHEST_PROTOCOL) {
6859 self->proto = i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006860 return 0;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006861 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006862
6863 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
6864 return -1;
6865}
6866
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08006867static int
6868load_frame(UnpicklerObject *self)
6869{
6870 char *s;
6871 Py_ssize_t frame_len;
6872
6873 if (_Unpickler_Read(self, &s, 8) < 0)
6874 return -1;
6875
6876 frame_len = calc_binsize(s, 8);
6877 if (frame_len < 0) {
6878 PyErr_Format(PyExc_OverflowError,
6879 "FRAME length exceeds system's maximum of %zd bytes",
6880 PY_SSIZE_T_MAX);
6881 return -1;
6882 }
6883
6884 if (_Unpickler_Read(self, &s, frame_len) < 0)
6885 return -1;
6886
6887 /* Rewind to start of frame */
6888 self->next_read_idx -= frame_len;
6889 return 0;
6890}
6891
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006892static PyObject *
6893load(UnpicklerObject *self)
6894{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006895 PyObject *value = NULL;
Christian Heimes27ea78b2014-01-27 01:03:53 +01006896 char *s = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006897
6898 self->num_marks = 0;
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006899 self->stack->mark_set = 0;
6900 self->stack->fence = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006901 self->proto = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006902 if (Py_SIZE(self->stack))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006903 Pdata_clear(self->stack, 0);
6904
6905 /* Convenient macros for the dispatch while-switch loop just below. */
6906#define OP(opcode, load_func) \
6907 case opcode: if (load_func(self) < 0) break; continue;
6908
6909#define OP_ARG(opcode, load_func, arg) \
6910 case opcode: if (load_func(self, (arg)) < 0) break; continue;
6911
6912 while (1) {
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03006913 if (_Unpickler_Read(self, &s, 1) < 0) {
6914 PickleState *st = _Pickle_GetGlobalState();
6915 if (PyErr_ExceptionMatches(st->UnpicklingError)) {
6916 PyErr_Format(PyExc_EOFError, "Ran out of input");
6917 }
6918 return NULL;
6919 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006920
6921 switch ((enum opcode)s[0]) {
6922 OP(NONE, load_none)
6923 OP(BININT, load_binint)
6924 OP(BININT1, load_binint1)
6925 OP(BININT2, load_binint2)
6926 OP(INT, load_int)
6927 OP(LONG, load_long)
6928 OP_ARG(LONG1, load_counted_long, 1)
6929 OP_ARG(LONG4, load_counted_long, 4)
6930 OP(FLOAT, load_float)
6931 OP(BINFLOAT, load_binfloat)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006932 OP_ARG(SHORT_BINBYTES, load_counted_binbytes, 1)
6933 OP_ARG(BINBYTES, load_counted_binbytes, 4)
6934 OP_ARG(BINBYTES8, load_counted_binbytes, 8)
Antoine Pitrou91f43802019-05-26 17:10:09 +02006935 OP(BYTEARRAY8, load_counted_bytearray)
6936 OP(NEXT_BUFFER, load_next_buffer)
6937 OP(READONLY_BUFFER, load_readonly_buffer)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006938 OP_ARG(SHORT_BINSTRING, load_counted_binstring, 1)
6939 OP_ARG(BINSTRING, load_counted_binstring, 4)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006940 OP(STRING, load_string)
6941 OP(UNICODE, load_unicode)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006942 OP_ARG(SHORT_BINUNICODE, load_counted_binunicode, 1)
6943 OP_ARG(BINUNICODE, load_counted_binunicode, 4)
6944 OP_ARG(BINUNICODE8, load_counted_binunicode, 8)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006945 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
6946 OP_ARG(TUPLE1, load_counted_tuple, 1)
6947 OP_ARG(TUPLE2, load_counted_tuple, 2)
6948 OP_ARG(TUPLE3, load_counted_tuple, 3)
6949 OP(TUPLE, load_tuple)
6950 OP(EMPTY_LIST, load_empty_list)
6951 OP(LIST, load_list)
6952 OP(EMPTY_DICT, load_empty_dict)
6953 OP(DICT, load_dict)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006954 OP(EMPTY_SET, load_empty_set)
6955 OP(ADDITEMS, load_additems)
6956 OP(FROZENSET, load_frozenset)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006957 OP(OBJ, load_obj)
6958 OP(INST, load_inst)
6959 OP(NEWOBJ, load_newobj)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006960 OP(NEWOBJ_EX, load_newobj_ex)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006961 OP(GLOBAL, load_global)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006962 OP(STACK_GLOBAL, load_stack_global)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006963 OP(APPEND, load_append)
6964 OP(APPENDS, load_appends)
6965 OP(BUILD, load_build)
6966 OP(DUP, load_dup)
6967 OP(BINGET, load_binget)
6968 OP(LONG_BINGET, load_long_binget)
6969 OP(GET, load_get)
6970 OP(MARK, load_mark)
6971 OP(BINPUT, load_binput)
6972 OP(LONG_BINPUT, load_long_binput)
6973 OP(PUT, load_put)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006974 OP(MEMOIZE, load_memoize)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006975 OP(POP, load_pop)
6976 OP(POP_MARK, load_pop_mark)
6977 OP(SETITEM, load_setitem)
6978 OP(SETITEMS, load_setitems)
6979 OP(PERSID, load_persid)
6980 OP(BINPERSID, load_binpersid)
6981 OP(REDUCE, load_reduce)
6982 OP(PROTO, load_proto)
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08006983 OP(FRAME, load_frame)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006984 OP_ARG(EXT1, load_extension, 1)
6985 OP_ARG(EXT2, load_extension, 2)
6986 OP_ARG(EXT4, load_extension, 4)
6987 OP_ARG(NEWTRUE, load_bool, Py_True)
6988 OP_ARG(NEWFALSE, load_bool, Py_False)
6989
6990 case STOP:
6991 break;
6992
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006993 default:
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03006994 {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006995 PickleState *st = _Pickle_GetGlobalState();
Serhiy Storchaka90493ab2016-09-06 23:55:11 +03006996 unsigned char c = (unsigned char) *s;
6997 if (0x20 <= c && c <= 0x7e && c != '\'' && c != '\\') {
6998 PyErr_Format(st->UnpicklingError,
6999 "invalid load key, '%c'.", c);
7000 }
7001 else {
7002 PyErr_Format(st->UnpicklingError,
7003 "invalid load key, '\\x%02x'.", c);
7004 }
7005 return NULL;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007006 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007007 }
7008
7009 break; /* and we are done! */
7010 }
7011
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08007012 if (PyErr_Occurred()) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007013 return NULL;
7014 }
7015
Victor Stinner2ae57e32013-10-31 13:39:23 +01007016 if (_Unpickler_SkipConsumed(self) < 0)
7017 return NULL;
7018
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007019 PDATA_POP(self->stack, value);
7020 return value;
7021}
7022
Larry Hastings61272b72014-01-07 12:41:53 -08007023/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007024
7025_pickle.Unpickler.load
7026
7027Load a pickle.
7028
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007029Read a pickled object representation from the open file object given
7030in the constructor, and return the reconstituted object hierarchy
7031specified therein.
Larry Hastings61272b72014-01-07 12:41:53 -08007032[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007033
Larry Hastings3cceb382014-01-04 11:09:09 -08007034static PyObject *
Larry Hastingsc2047262014-01-25 20:43:29 -08007035_pickle_Unpickler_load_impl(UnpicklerObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08007036/*[clinic end generated code: output=fdcc488aad675b14 input=acbb91a42fa9b7b9]*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007037{
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007038 UnpicklerObject *unpickler = (UnpicklerObject*)self;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007039
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007040 /* Check whether the Unpickler was initialized correctly. This prevents
7041 segfaulting if a subclass overridden __init__ with a function that does
7042 not call Unpickler.__init__(). Here, we simply ensure that self->read
7043 is not NULL. */
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007044 if (unpickler->read == NULL) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007045 PickleState *st = _Pickle_GetGlobalState();
7046 PyErr_Format(st->UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007047 "Unpickler.__init__() was not called by %s.__init__()",
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007048 Py_TYPE(unpickler)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007049 return NULL;
7050 }
7051
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007052 return load(unpickler);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007053}
7054
7055/* The name of find_class() is misleading. In newer pickle protocols, this
7056 function is used for loading any global (i.e., functions), not just
7057 classes. The name is kept only for backward compatibility. */
7058
Larry Hastings61272b72014-01-07 12:41:53 -08007059/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007060
7061_pickle.Unpickler.find_class
7062
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007063 module_name: object
7064 global_name: object
7065 /
7066
7067Return an object from a specified module.
7068
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007069If necessary, the module will be imported. Subclasses may override
7070this method (e.g. to restrict unpickling of arbitrary classes and
7071functions).
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007072
7073This method is called whenever a class or a function object is
7074needed. Both arguments passed are str objects.
Larry Hastings61272b72014-01-07 12:41:53 -08007075[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007076
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007077static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04007078_pickle_Unpickler_find_class_impl(UnpicklerObject *self,
7079 PyObject *module_name,
7080 PyObject *global_name)
7081/*[clinic end generated code: output=becc08d7f9ed41e3 input=e2e6a865de093ef4]*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007082{
7083 PyObject *global;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007084 PyObject *module;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007085
Steve Dowerb82e17e2019-05-23 08:45:22 -07007086 if (PySys_Audit("pickle.find_class", "OO",
7087 module_name, global_name) < 0) {
7088 return NULL;
7089 }
7090
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007091 /* Try to map the old names used in Python 2.x to the new ones used in
7092 Python 3.x. We do this only with old pickle protocols and when the
7093 user has not disabled the feature. */
7094 if (self->proto < 3 && self->fix_imports) {
7095 PyObject *key;
7096 PyObject *item;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007097 PickleState *st = _Pickle_GetGlobalState();
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007098
7099 /* Check if the global (i.e., a function or a class) was renamed
7100 or moved to another module. */
7101 key = PyTuple_Pack(2, module_name, global_name);
7102 if (key == NULL)
7103 return NULL;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007104 item = PyDict_GetItemWithError(st->name_mapping_2to3, key);
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007105 Py_DECREF(key);
7106 if (item) {
7107 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
7108 PyErr_Format(PyExc_RuntimeError,
7109 "_compat_pickle.NAME_MAPPING values should be "
7110 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
7111 return NULL;
7112 }
7113 module_name = PyTuple_GET_ITEM(item, 0);
7114 global_name = PyTuple_GET_ITEM(item, 1);
7115 if (!PyUnicode_Check(module_name) ||
7116 !PyUnicode_Check(global_name)) {
7117 PyErr_Format(PyExc_RuntimeError,
7118 "_compat_pickle.NAME_MAPPING values should be "
7119 "pairs of str, not (%.200s, %.200s)",
7120 Py_TYPE(module_name)->tp_name,
7121 Py_TYPE(global_name)->tp_name);
7122 return NULL;
7123 }
7124 }
7125 else if (PyErr_Occurred()) {
7126 return NULL;
7127 }
Serhiy Storchakabfe18242015-03-31 13:12:37 +03007128 else {
7129 /* Check if the module was renamed. */
7130 item = PyDict_GetItemWithError(st->import_mapping_2to3, module_name);
7131 if (item) {
7132 if (!PyUnicode_Check(item)) {
7133 PyErr_Format(PyExc_RuntimeError,
7134 "_compat_pickle.IMPORT_MAPPING values should be "
7135 "strings, not %.200s", Py_TYPE(item)->tp_name);
7136 return NULL;
7137 }
7138 module_name = item;
7139 }
7140 else if (PyErr_Occurred()) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007141 return NULL;
7142 }
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007143 }
7144 }
7145
tjb9004371c0a2019-02-18 23:30:51 +08007146 /*
7147 * we don't use PyImport_GetModule here, because it can return partially-
7148 * initialised modules, which then cause the getattribute to fail.
7149 */
7150 module = PyImport_Import(module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007151 if (module == NULL) {
tjb9004371c0a2019-02-18 23:30:51 +08007152 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007153 }
Eric Snow3f9eee62017-09-15 16:35:20 -06007154 global = getattribute(module, global_name, self->proto >= 4);
7155 Py_DECREF(module);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007156 return global;
7157}
7158
Serhiy Storchaka5bbd2312014-12-16 19:39:08 +02007159/*[clinic input]
7160
7161_pickle.Unpickler.__sizeof__ -> Py_ssize_t
7162
7163Returns size in memory, in bytes.
7164[clinic start generated code]*/
7165
7166static Py_ssize_t
7167_pickle_Unpickler___sizeof___impl(UnpicklerObject *self)
7168/*[clinic end generated code: output=119d9d03ad4c7651 input=13333471fdeedf5e]*/
7169{
7170 Py_ssize_t res;
7171
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +02007172 res = _PyObject_SIZE(Py_TYPE(self));
Serhiy Storchaka5bbd2312014-12-16 19:39:08 +02007173 if (self->memo != NULL)
7174 res += self->memo_size * sizeof(PyObject *);
7175 if (self->marks != NULL)
7176 res += self->marks_size * sizeof(Py_ssize_t);
7177 if (self->input_line != NULL)
7178 res += strlen(self->input_line) + 1;
7179 if (self->encoding != NULL)
7180 res += strlen(self->encoding) + 1;
7181 if (self->errors != NULL)
7182 res += strlen(self->errors) + 1;
7183 return res;
7184}
7185
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007186static struct PyMethodDef Unpickler_methods[] = {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007187 _PICKLE_UNPICKLER_LOAD_METHODDEF
7188 _PICKLE_UNPICKLER_FIND_CLASS_METHODDEF
Serhiy Storchaka5bbd2312014-12-16 19:39:08 +02007189 _PICKLE_UNPICKLER___SIZEOF___METHODDEF
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007190 {NULL, NULL} /* sentinel */
7191};
7192
7193static void
7194Unpickler_dealloc(UnpicklerObject *self)
7195{
7196 PyObject_GC_UnTrack((PyObject *)self);
7197 Py_XDECREF(self->readline);
Antoine Pitrou91f43802019-05-26 17:10:09 +02007198 Py_XDECREF(self->readinto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007199 Py_XDECREF(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00007200 Py_XDECREF(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007201 Py_XDECREF(self->stack);
7202 Py_XDECREF(self->pers_func);
Antoine Pitrou91f43802019-05-26 17:10:09 +02007203 Py_XDECREF(self->buffers);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007204 if (self->buffer.buf != NULL) {
7205 PyBuffer_Release(&self->buffer);
7206 self->buffer.buf = NULL;
7207 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007208
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007209 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007210 PyMem_Free(self->marks);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007211 PyMem_Free(self->input_line);
Victor Stinner49fc8ec2013-07-07 23:30:24 +02007212 PyMem_Free(self->encoding);
7213 PyMem_Free(self->errors);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007214
7215 Py_TYPE(self)->tp_free((PyObject *)self);
7216}
7217
7218static int
7219Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
7220{
7221 Py_VISIT(self->readline);
Antoine Pitrou91f43802019-05-26 17:10:09 +02007222 Py_VISIT(self->readinto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007223 Py_VISIT(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00007224 Py_VISIT(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007225 Py_VISIT(self->stack);
7226 Py_VISIT(self->pers_func);
Antoine Pitrou91f43802019-05-26 17:10:09 +02007227 Py_VISIT(self->buffers);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007228 return 0;
7229}
7230
7231static int
7232Unpickler_clear(UnpicklerObject *self)
7233{
7234 Py_CLEAR(self->readline);
Antoine Pitrou91f43802019-05-26 17:10:09 +02007235 Py_CLEAR(self->readinto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007236 Py_CLEAR(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00007237 Py_CLEAR(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007238 Py_CLEAR(self->stack);
7239 Py_CLEAR(self->pers_func);
Antoine Pitrou91f43802019-05-26 17:10:09 +02007240 Py_CLEAR(self->buffers);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007241 if (self->buffer.buf != NULL) {
7242 PyBuffer_Release(&self->buffer);
7243 self->buffer.buf = NULL;
7244 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007245
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007246 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007247 PyMem_Free(self->marks);
7248 self->marks = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007249 PyMem_Free(self->input_line);
7250 self->input_line = NULL;
Victor Stinner49fc8ec2013-07-07 23:30:24 +02007251 PyMem_Free(self->encoding);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007252 self->encoding = NULL;
Victor Stinner49fc8ec2013-07-07 23:30:24 +02007253 PyMem_Free(self->errors);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007254 self->errors = NULL;
7255
7256 return 0;
7257}
7258
Larry Hastings61272b72014-01-07 12:41:53 -08007259/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007260
7261_pickle.Unpickler.__init__
7262
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007263 file: object
7264 *
7265 fix_imports: bool = True
7266 encoding: str = 'ASCII'
7267 errors: str = 'strict'
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007268 buffers: object(c_default="NULL") = ()
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007269
7270This takes a binary file for reading a pickle data stream.
7271
7272The protocol version of the pickle is detected automatically, so no
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007273protocol argument is needed. Bytes past the pickled object's
7274representation are ignored.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007275
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007276The argument *file* must have two methods, a read() method that takes
7277an integer argument, and a readline() method that requires no
7278arguments. Both methods should return bytes. Thus *file* can be a
Martin Panter7462b6492015-11-02 03:37:02 +00007279binary file object opened for reading, an io.BytesIO object, or any
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007280other custom object that meets this interface.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007281
7282Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
Martin Panter46f50722016-05-26 05:35:26 +00007283which are used to control compatibility support for pickle stream
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007284generated by Python 2. If *fix_imports* is True, pickle will try to
7285map the old Python 2 names to the new names used in Python 3. The
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007286*encoding* and *errors* tell pickle how to decode 8-bit string
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007287instances pickled by Python 2; these default to 'ASCII' and 'strict',
7288respectively. The *encoding* can be 'bytes' to read these 8-bit
7289string instances as bytes objects.
Larry Hastings61272b72014-01-07 12:41:53 -08007290[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007291
Larry Hastingsb7ccb202014-01-18 23:50:21 -08007292static int
Larry Hastings89964c42015-04-14 18:07:59 -04007293_pickle_Unpickler___init___impl(UnpicklerObject *self, PyObject *file,
7294 int fix_imports, const char *encoding,
Antoine Pitrou91f43802019-05-26 17:10:09 +02007295 const char *errors, PyObject *buffers)
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007296/*[clinic end generated code: output=09f0192649ea3f85 input=ca4c1faea9553121]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007297{
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02007298 _Py_IDENTIFIER(persistent_load);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007299
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007300 /* In case of multiple __init__() calls, clear previous content. */
7301 if (self->read != NULL)
7302 (void)Unpickler_clear(self);
7303
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007304 if (_Unpickler_SetInputStream(self, file) < 0)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08007305 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007306
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007307 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08007308 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007309
Antoine Pitrou91f43802019-05-26 17:10:09 +02007310 if (_Unpickler_SetBuffers(self, buffers) < 0)
7311 return -1;
7312
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007313 self->fix_imports = fix_imports;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007314
Serhiy Storchaka986375e2017-11-30 22:48:31 +02007315 if (init_method_ref((PyObject *)self, &PyId_persistent_load,
7316 &self->pers_func, &self->pers_func_self) < 0)
7317 {
7318 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007319 }
7320
7321 self->stack = (Pdata *)Pdata_New();
7322 if (self->stack == NULL)
Zackery Spytz4b430e52018-09-28 23:48:46 -06007323 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007324
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007325 self->memo_size = 32;
7326 self->memo = _Unpickler_NewMemo(self->memo_size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007327 if (self->memo == NULL)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08007328 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007329
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007330 self->proto = 0;
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00007331
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007332 return 0;
7333}
7334
Larry Hastingsb7ccb202014-01-18 23:50:21 -08007335
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007336/* Define a proxy object for the Unpickler's internal memo object. This is to
7337 * avoid breaking code like:
7338 * unpickler.memo.clear()
7339 * and
7340 * unpickler.memo = saved_memo
7341 * Is this a good idea? Not really, but we don't want to break code that uses
7342 * it. Note that we don't implement the entire mapping API here. This is
7343 * intentional, as these should be treated as black-box implementation details.
7344 *
7345 * We do, however, have to implement pickling/unpickling support because of
Victor Stinner121aab42011-09-29 23:40:53 +02007346 * real-world code like cvs2svn.
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007347 */
7348
Larry Hastings61272b72014-01-07 12:41:53 -08007349/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007350_pickle.UnpicklerMemoProxy.clear
7351
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007352Remove all items from memo.
Larry Hastings61272b72014-01-07 12:41:53 -08007353[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007354
Larry Hastings3cceb382014-01-04 11:09:09 -08007355static PyObject *
7356_pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08007357/*[clinic end generated code: output=d20cd43f4ba1fb1f input=b1df7c52e7afd9bd]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007358{
7359 _Unpickler_MemoCleanup(self->unpickler);
7360 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
7361 if (self->unpickler->memo == NULL)
7362 return NULL;
7363 Py_RETURN_NONE;
7364}
7365
Larry Hastings61272b72014-01-07 12:41:53 -08007366/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007367_pickle.UnpicklerMemoProxy.copy
7368
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007369Copy the memo to a new object.
Larry Hastings61272b72014-01-07 12:41:53 -08007370[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007371
Larry Hastings3cceb382014-01-04 11:09:09 -08007372static PyObject *
7373_pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08007374/*[clinic end generated code: output=e12af7e9bc1e4c77 input=97769247ce032c1d]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007375{
Benjamin Petersona4ae8282018-09-20 18:36:40 -07007376 size_t i;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007377 PyObject *new_memo = PyDict_New();
7378 if (new_memo == NULL)
7379 return NULL;
7380
7381 for (i = 0; i < self->unpickler->memo_size; i++) {
7382 int status;
7383 PyObject *key, *value;
7384
7385 value = self->unpickler->memo[i];
7386 if (value == NULL)
7387 continue;
7388
7389 key = PyLong_FromSsize_t(i);
7390 if (key == NULL)
7391 goto error;
7392 status = PyDict_SetItem(new_memo, key, value);
7393 Py_DECREF(key);
7394 if (status < 0)
7395 goto error;
7396 }
7397 return new_memo;
7398
7399error:
7400 Py_DECREF(new_memo);
7401 return NULL;
7402}
7403
Larry Hastings61272b72014-01-07 12:41:53 -08007404/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007405_pickle.UnpicklerMemoProxy.__reduce__
7406
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007407Implement pickling support.
Larry Hastings61272b72014-01-07 12:41:53 -08007408[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007409
Larry Hastings3cceb382014-01-04 11:09:09 -08007410static PyObject *
7411_pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08007412/*[clinic end generated code: output=6da34ac048d94cca input=6920862413407199]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007413{
7414 PyObject *reduce_value;
7415 PyObject *constructor_args;
Larry Hastings3cceb382014-01-04 11:09:09 -08007416 PyObject *contents = _pickle_UnpicklerMemoProxy_copy_impl(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007417 if (contents == NULL)
7418 return NULL;
7419
7420 reduce_value = PyTuple_New(2);
7421 if (reduce_value == NULL) {
7422 Py_DECREF(contents);
7423 return NULL;
7424 }
7425 constructor_args = PyTuple_New(1);
7426 if (constructor_args == NULL) {
7427 Py_DECREF(contents);
7428 Py_DECREF(reduce_value);
7429 return NULL;
7430 }
7431 PyTuple_SET_ITEM(constructor_args, 0, contents);
7432 Py_INCREF((PyObject *)&PyDict_Type);
7433 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
7434 PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
7435 return reduce_value;
7436}
7437
7438static PyMethodDef unpicklerproxy_methods[] = {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007439 _PICKLE_UNPICKLERMEMOPROXY_CLEAR_METHODDEF
7440 _PICKLE_UNPICKLERMEMOPROXY_COPY_METHODDEF
7441 _PICKLE_UNPICKLERMEMOPROXY___REDUCE___METHODDEF
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007442 {NULL, NULL} /* sentinel */
7443};
7444
7445static void
7446UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
7447{
7448 PyObject_GC_UnTrack(self);
7449 Py_XDECREF(self->unpickler);
7450 PyObject_GC_Del((PyObject *)self);
7451}
7452
7453static int
7454UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
7455 visitproc visit, void *arg)
7456{
7457 Py_VISIT(self->unpickler);
7458 return 0;
7459}
7460
7461static int
7462UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
7463{
7464 Py_CLEAR(self->unpickler);
7465 return 0;
7466}
7467
7468static PyTypeObject UnpicklerMemoProxyType = {
7469 PyVarObject_HEAD_INIT(NULL, 0)
7470 "_pickle.UnpicklerMemoProxy", /*tp_name*/
7471 sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/
7472 0,
7473 (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02007474 0, /* tp_vectorcall_offset */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007475 0, /* tp_getattr */
7476 0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +02007477 0, /* tp_as_async */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007478 0, /* tp_repr */
7479 0, /* tp_as_number */
7480 0, /* tp_as_sequence */
7481 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00007482 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007483 0, /* tp_call */
7484 0, /* tp_str */
7485 PyObject_GenericGetAttr, /* tp_getattro */
7486 PyObject_GenericSetAttr, /* tp_setattro */
7487 0, /* tp_as_buffer */
7488 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
7489 0, /* tp_doc */
7490 (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */
7491 (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */
7492 0, /* tp_richcompare */
7493 0, /* tp_weaklistoffset */
7494 0, /* tp_iter */
7495 0, /* tp_iternext */
7496 unpicklerproxy_methods, /* tp_methods */
7497};
7498
7499static PyObject *
7500UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
7501{
7502 UnpicklerMemoProxyObject *self;
7503
7504 self = PyObject_GC_New(UnpicklerMemoProxyObject,
7505 &UnpicklerMemoProxyType);
7506 if (self == NULL)
7507 return NULL;
7508 Py_INCREF(unpickler);
7509 self->unpickler = unpickler;
7510 PyObject_GC_Track(self);
7511 return (PyObject *)self;
7512}
7513
7514/*****************************************************************************/
7515
7516
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007517static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02007518Unpickler_get_memo(UnpicklerObject *self, void *Py_UNUSED(ignored))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007519{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007520 return UnpicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007521}
7522
7523static int
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02007524Unpickler_set_memo(UnpicklerObject *self, PyObject *obj, void *Py_UNUSED(ignored))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007525{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007526 PyObject **new_memo;
Benjamin Petersona4ae8282018-09-20 18:36:40 -07007527 size_t new_memo_size = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007528
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007529 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007530 PyErr_SetString(PyExc_TypeError,
7531 "attribute deletion is not supported");
7532 return -1;
7533 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007534
Andy Lesterdffe4c02020-03-04 07:15:20 -06007535 if (Py_IS_TYPE(obj, &UnpicklerMemoProxyType)) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007536 UnpicklerObject *unpickler =
7537 ((UnpicklerMemoProxyObject *)obj)->unpickler;
7538
7539 new_memo_size = unpickler->memo_size;
7540 new_memo = _Unpickler_NewMemo(new_memo_size);
7541 if (new_memo == NULL)
7542 return -1;
7543
Benjamin Petersona4ae8282018-09-20 18:36:40 -07007544 for (size_t i = 0; i < new_memo_size; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007545 Py_XINCREF(unpickler->memo[i]);
7546 new_memo[i] = unpickler->memo[i];
7547 }
7548 }
7549 else if (PyDict_Check(obj)) {
7550 Py_ssize_t i = 0;
7551 PyObject *key, *value;
7552
Serhiy Storchaka5ab81d72016-12-16 16:18:57 +02007553 new_memo_size = PyDict_GET_SIZE(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007554 new_memo = _Unpickler_NewMemo(new_memo_size);
7555 if (new_memo == NULL)
7556 return -1;
7557
7558 while (PyDict_Next(obj, &i, &key, &value)) {
7559 Py_ssize_t idx;
7560 if (!PyLong_Check(key)) {
7561 PyErr_SetString(PyExc_TypeError,
7562 "memo key must be integers");
7563 goto error;
7564 }
7565 idx = PyLong_AsSsize_t(key);
7566 if (idx == -1 && PyErr_Occurred())
7567 goto error;
Christian Heimesa24b4d22013-07-01 15:17:45 +02007568 if (idx < 0) {
7569 PyErr_SetString(PyExc_ValueError,
Christian Heimes80878792013-07-01 15:23:39 +02007570 "memo key must be positive integers.");
Christian Heimesa24b4d22013-07-01 15:17:45 +02007571 goto error;
7572 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007573 if (_Unpickler_MemoPut(self, idx, value) < 0)
7574 goto error;
7575 }
7576 }
7577 else {
7578 PyErr_Format(PyExc_TypeError,
Serhiy Storchaka34fd4c22018-11-05 16:20:25 +02007579 "'memo' attribute must be an UnpicklerMemoProxy object "
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007580 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007581 return -1;
7582 }
7583
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007584 _Unpickler_MemoCleanup(self);
7585 self->memo_size = new_memo_size;
7586 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007587
7588 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007589
7590 error:
7591 if (new_memo_size) {
Benjamin Petersona4ae8282018-09-20 18:36:40 -07007592 for (size_t i = new_memo_size - 1; i != SIZE_MAX; i--) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007593 Py_XDECREF(new_memo[i]);
7594 }
7595 PyMem_FREE(new_memo);
7596 }
7597 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007598}
7599
7600static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02007601Unpickler_get_persload(UnpicklerObject *self, void *Py_UNUSED(ignored))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007602{
Serhiy Storchaka986375e2017-11-30 22:48:31 +02007603 if (self->pers_func == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007604 PyErr_SetString(PyExc_AttributeError, "persistent_load");
Serhiy Storchaka986375e2017-11-30 22:48:31 +02007605 return NULL;
7606 }
7607 return reconstruct_method(self->pers_func, self->pers_func_self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007608}
7609
7610static int
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +02007611Unpickler_set_persload(UnpicklerObject *self, PyObject *value, void *Py_UNUSED(ignored))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007612{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007613 if (value == NULL) {
7614 PyErr_SetString(PyExc_TypeError,
7615 "attribute deletion is not supported");
7616 return -1;
7617 }
7618 if (!PyCallable_Check(value)) {
7619 PyErr_SetString(PyExc_TypeError,
7620 "persistent_load must be a callable taking "
7621 "one argument");
7622 return -1;
7623 }
7624
Serhiy Storchaka986375e2017-11-30 22:48:31 +02007625 self->pers_func_self = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007626 Py_INCREF(value);
Serhiy Storchakaec397562016-04-06 09:50:03 +03007627 Py_XSETREF(self->pers_func, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007628
7629 return 0;
7630}
7631
7632static PyGetSetDef Unpickler_getsets[] = {
7633 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
7634 {"persistent_load", (getter)Unpickler_get_persload,
7635 (setter)Unpickler_set_persload},
7636 {NULL}
7637};
7638
7639static PyTypeObject Unpickler_Type = {
7640 PyVarObject_HEAD_INIT(NULL, 0)
7641 "_pickle.Unpickler", /*tp_name*/
7642 sizeof(UnpicklerObject), /*tp_basicsize*/
7643 0, /*tp_itemsize*/
7644 (destructor)Unpickler_dealloc, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02007645 0, /*tp_vectorcall_offset*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007646 0, /*tp_getattr*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007647 0, /*tp_setattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02007648 0, /*tp_as_async*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007649 0, /*tp_repr*/
7650 0, /*tp_as_number*/
7651 0, /*tp_as_sequence*/
7652 0, /*tp_as_mapping*/
7653 0, /*tp_hash*/
7654 0, /*tp_call*/
7655 0, /*tp_str*/
7656 0, /*tp_getattro*/
7657 0, /*tp_setattro*/
7658 0, /*tp_as_buffer*/
7659 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007660 _pickle_Unpickler___init____doc__, /*tp_doc*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007661 (traverseproc)Unpickler_traverse, /*tp_traverse*/
7662 (inquiry)Unpickler_clear, /*tp_clear*/
7663 0, /*tp_richcompare*/
7664 0, /*tp_weaklistoffset*/
7665 0, /*tp_iter*/
7666 0, /*tp_iternext*/
7667 Unpickler_methods, /*tp_methods*/
7668 0, /*tp_members*/
7669 Unpickler_getsets, /*tp_getset*/
7670 0, /*tp_base*/
7671 0, /*tp_dict*/
7672 0, /*tp_descr_get*/
7673 0, /*tp_descr_set*/
7674 0, /*tp_dictoffset*/
Larry Hastingsb7ccb202014-01-18 23:50:21 -08007675 _pickle_Unpickler___init__, /*tp_init*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007676 PyType_GenericAlloc, /*tp_alloc*/
7677 PyType_GenericNew, /*tp_new*/
7678 PyObject_GC_Del, /*tp_free*/
7679 0, /*tp_is_gc*/
7680};
7681
Larry Hastings61272b72014-01-07 12:41:53 -08007682/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007683
7684_pickle.dump
7685
7686 obj: object
7687 file: object
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007688 protocol: object = None
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007689 *
7690 fix_imports: bool = True
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007691 buffer_callback: object = None
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007692
7693Write a pickled representation of obj to the open file object file.
7694
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007695This is equivalent to ``Pickler(file, protocol).dump(obj)``, but may
7696be more efficient.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007697
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007698The optional *protocol* argument tells the pickler to use the given
Mark Dickinsone9652e82020-01-24 10:03:22 +00007699protocol; supported protocols are 0, 1, 2, 3, 4 and 5. The default
7700protocol is 4. It was introduced in Python 3.4, and is incompatible
Łukasz Langac51d8c92018-04-03 23:06:53 -07007701with previous versions.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007702
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007703Specifying a negative protocol version selects the highest protocol
7704version supported. The higher the protocol used, the more recent the
7705version of Python needed to read the pickle produced.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007706
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007707The *file* argument must have a write() method that accepts a single
7708bytes argument. It can thus be a file object opened for binary
Martin Panter7462b6492015-11-02 03:37:02 +00007709writing, an io.BytesIO instance, or any other custom object that meets
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007710this interface.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007711
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007712If *fix_imports* is True and protocol is less than 3, pickle will try
7713to map the new Python 3 names to the old module names used in Python
77142, so that the pickle data stream is readable with Python 2.
Antoine Pitrou91f43802019-05-26 17:10:09 +02007715
7716If *buffer_callback* is None (the default), buffer views are serialized
7717into *file* as part of the pickle stream. It is an error if
7718*buffer_callback* is not None and *protocol* is None or smaller than 5.
7719
Larry Hastings61272b72014-01-07 12:41:53 -08007720[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007721
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007722static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03007723_pickle_dump_impl(PyObject *module, PyObject *obj, PyObject *file,
Antoine Pitrou91f43802019-05-26 17:10:09 +02007724 PyObject *protocol, int fix_imports,
7725 PyObject *buffer_callback)
Mark Dickinsone9652e82020-01-24 10:03:22 +00007726/*[clinic end generated code: output=706186dba996490c input=5ed6653da99cd97c]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007727{
7728 PicklerObject *pickler = _Pickler_New();
7729
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007730 if (pickler == NULL)
7731 return NULL;
7732
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007733 if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007734 goto error;
7735
7736 if (_Pickler_SetOutputStream(pickler, file) < 0)
7737 goto error;
7738
Antoine Pitrou91f43802019-05-26 17:10:09 +02007739 if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
7740 goto error;
7741
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007742 if (dump(pickler, obj) < 0)
7743 goto error;
7744
7745 if (_Pickler_FlushToFile(pickler) < 0)
7746 goto error;
7747
7748 Py_DECREF(pickler);
7749 Py_RETURN_NONE;
7750
7751 error:
7752 Py_XDECREF(pickler);
7753 return NULL;
7754}
7755
Larry Hastings61272b72014-01-07 12:41:53 -08007756/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007757
7758_pickle.dumps
7759
7760 obj: object
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007761 protocol: object = None
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007762 *
7763 fix_imports: bool = True
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007764 buffer_callback: object = None
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007765
7766Return the pickled representation of the object as a bytes object.
7767
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007768The optional *protocol* argument tells the pickler to use the given
Mark Dickinsone9652e82020-01-24 10:03:22 +00007769protocol; supported protocols are 0, 1, 2, 3, 4 and 5. The default
7770protocol is 4. It was introduced in Python 3.4, and is incompatible
Łukasz Langac51d8c92018-04-03 23:06:53 -07007771with previous versions.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007772
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007773Specifying a negative protocol version selects the highest protocol
7774version supported. The higher the protocol used, the more recent the
7775version of Python needed to read the pickle produced.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007776
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007777If *fix_imports* is True and *protocol* is less than 3, pickle will
7778try to map the new Python 3 names to the old module names used in
7779Python 2, so that the pickle data stream is readable with Python 2.
Antoine Pitrou91f43802019-05-26 17:10:09 +02007780
7781If *buffer_callback* is None (the default), buffer views are serialized
7782into *file* as part of the pickle stream. It is an error if
7783*buffer_callback* is not None and *protocol* is None or smaller than 5.
7784
Larry Hastings61272b72014-01-07 12:41:53 -08007785[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007786
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007787static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03007788_pickle_dumps_impl(PyObject *module, PyObject *obj, PyObject *protocol,
Antoine Pitrou91f43802019-05-26 17:10:09 +02007789 int fix_imports, PyObject *buffer_callback)
Mark Dickinsone9652e82020-01-24 10:03:22 +00007790/*[clinic end generated code: output=fbab0093a5580fdf input=e543272436c6f987]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007791{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007792 PyObject *result;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007793 PicklerObject *pickler = _Pickler_New();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007794
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007795 if (pickler == NULL)
7796 return NULL;
7797
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007798 if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007799 goto error;
7800
Antoine Pitrou91f43802019-05-26 17:10:09 +02007801 if (_Pickler_SetBufferCallback(pickler, buffer_callback) < 0)
7802 goto error;
7803
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007804 if (dump(pickler, obj) < 0)
7805 goto error;
7806
7807 result = _Pickler_GetString(pickler);
7808 Py_DECREF(pickler);
7809 return result;
7810
7811 error:
7812 Py_XDECREF(pickler);
7813 return NULL;
7814}
7815
Larry Hastings61272b72014-01-07 12:41:53 -08007816/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007817
7818_pickle.load
7819
7820 file: object
7821 *
7822 fix_imports: bool = True
7823 encoding: str = 'ASCII'
7824 errors: str = 'strict'
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007825 buffers: object(c_default="NULL") = ()
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007826
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007827Read and return an object from the pickle data stored in a file.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007828
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007829This is equivalent to ``Unpickler(file).load()``, but may be more
7830efficient.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007831
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007832The protocol version of the pickle is detected automatically, so no
7833protocol argument is needed. Bytes past the pickled object's
7834representation are ignored.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007835
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007836The argument *file* must have two methods, a read() method that takes
7837an integer argument, and a readline() method that requires no
7838arguments. Both methods should return bytes. Thus *file* can be a
Martin Panter7462b6492015-11-02 03:37:02 +00007839binary file object opened for reading, an io.BytesIO object, or any
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007840other custom object that meets this interface.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007841
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007842Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
Martin Panter46f50722016-05-26 05:35:26 +00007843which are used to control compatibility support for pickle stream
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007844generated by Python 2. If *fix_imports* is True, pickle will try to
7845map the old Python 2 names to the new names used in Python 3. The
7846*encoding* and *errors* tell pickle how to decode 8-bit string
7847instances pickled by Python 2; these default to 'ASCII' and 'strict',
7848respectively. The *encoding* can be 'bytes' to read these 8-bit
7849string instances as bytes objects.
Larry Hastings61272b72014-01-07 12:41:53 -08007850[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007851
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007852static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03007853_pickle_load_impl(PyObject *module, PyObject *file, int fix_imports,
Antoine Pitrou91f43802019-05-26 17:10:09 +02007854 const char *encoding, const char *errors,
7855 PyObject *buffers)
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007856/*[clinic end generated code: output=250452d141c23e76 input=46c7c31c92f4f371]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007857{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007858 PyObject *result;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007859 UnpicklerObject *unpickler = _Unpickler_New();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007860
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007861 if (unpickler == NULL)
7862 return NULL;
7863
7864 if (_Unpickler_SetInputStream(unpickler, file) < 0)
7865 goto error;
7866
7867 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7868 goto error;
7869
Antoine Pitrou91f43802019-05-26 17:10:09 +02007870 if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
7871 goto error;
7872
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007873 unpickler->fix_imports = fix_imports;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007874
7875 result = load(unpickler);
7876 Py_DECREF(unpickler);
7877 return result;
7878
7879 error:
7880 Py_XDECREF(unpickler);
7881 return NULL;
7882}
7883
Larry Hastings61272b72014-01-07 12:41:53 -08007884/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007885
7886_pickle.loads
7887
7888 data: object
Serhiy Storchaka531d1e52020-05-02 09:38:01 +03007889 /
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007890 *
7891 fix_imports: bool = True
7892 encoding: str = 'ASCII'
7893 errors: str = 'strict'
Serhiy Storchaka279f4462019-09-14 12:24:05 +03007894 buffers: object(c_default="NULL") = ()
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007895
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007896Read and return an object from the given pickle data.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007897
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007898The protocol version of the pickle is detected automatically, so no
7899protocol argument is needed. Bytes past the pickled object's
7900representation are ignored.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007901
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007902Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
Martin Panter46f50722016-05-26 05:35:26 +00007903which are used to control compatibility support for pickle stream
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007904generated by Python 2. If *fix_imports* is True, pickle will try to
7905map the old Python 2 names to the new names used in Python 3. The
7906*encoding* and *errors* tell pickle how to decode 8-bit string
7907instances pickled by Python 2; these default to 'ASCII' and 'strict',
7908respectively. The *encoding* can be 'bytes' to read these 8-bit
7909string instances as bytes objects.
Larry Hastings61272b72014-01-07 12:41:53 -08007910[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007911
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007912static PyObject *
Serhiy Storchaka1a2b24f2016-07-07 17:35:15 +03007913_pickle_loads_impl(PyObject *module, PyObject *data, int fix_imports,
Antoine Pitrou91f43802019-05-26 17:10:09 +02007914 const char *encoding, const char *errors,
7915 PyObject *buffers)
Serhiy Storchaka531d1e52020-05-02 09:38:01 +03007916/*[clinic end generated code: output=82ac1e6b588e6d02 input=b3615540d0535087]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007917{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007918 PyObject *result;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007919 UnpicklerObject *unpickler = _Unpickler_New();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007920
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007921 if (unpickler == NULL)
7922 return NULL;
7923
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007924 if (_Unpickler_SetStringInput(unpickler, data) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007925 goto error;
7926
7927 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7928 goto error;
7929
Antoine Pitrou91f43802019-05-26 17:10:09 +02007930 if (_Unpickler_SetBuffers(unpickler, buffers) < 0)
7931 goto error;
7932
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007933 unpickler->fix_imports = fix_imports;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007934
7935 result = load(unpickler);
7936 Py_DECREF(unpickler);
7937 return result;
7938
7939 error:
7940 Py_XDECREF(unpickler);
7941 return NULL;
7942}
7943
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007944static struct PyMethodDef pickle_methods[] = {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007945 _PICKLE_DUMP_METHODDEF
7946 _PICKLE_DUMPS_METHODDEF
7947 _PICKLE_LOAD_METHODDEF
7948 _PICKLE_LOADS_METHODDEF
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007949 {NULL, NULL} /* sentinel */
7950};
7951
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007952static int
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007953pickle_clear(PyObject *m)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007954{
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007955 _Pickle_ClearState(_Pickle_GetState(m));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007956 return 0;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007957}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007958
Stefan Krahf483b0f2013-12-14 13:43:10 +01007959static void
7960pickle_free(PyObject *m)
7961{
7962 _Pickle_ClearState(_Pickle_GetState(m));
7963}
7964
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007965static int
7966pickle_traverse(PyObject *m, visitproc visit, void *arg)
7967{
7968 PickleState *st = _Pickle_GetState(m);
7969 Py_VISIT(st->PickleError);
7970 Py_VISIT(st->PicklingError);
7971 Py_VISIT(st->UnpicklingError);
7972 Py_VISIT(st->dispatch_table);
7973 Py_VISIT(st->extension_registry);
7974 Py_VISIT(st->extension_cache);
7975 Py_VISIT(st->inverted_registry);
7976 Py_VISIT(st->name_mapping_2to3);
7977 Py_VISIT(st->import_mapping_2to3);
7978 Py_VISIT(st->name_mapping_3to2);
7979 Py_VISIT(st->import_mapping_3to2);
7980 Py_VISIT(st->codecs_encode);
Serhiy Storchaka58e41342015-03-31 14:07:24 +03007981 Py_VISIT(st->getattr);
Hai Shi1f577ce2020-03-02 14:28:44 +08007982 Py_VISIT(st->partial);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007983 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007984}
7985
7986static struct PyModuleDef _picklemodule = {
7987 PyModuleDef_HEAD_INIT,
Stefan Krahf483b0f2013-12-14 13:43:10 +01007988 "_pickle", /* m_name */
7989 pickle_module_doc, /* m_doc */
7990 sizeof(PickleState), /* m_size */
7991 pickle_methods, /* m_methods */
7992 NULL, /* m_reload */
7993 pickle_traverse, /* m_traverse */
7994 pickle_clear, /* m_clear */
7995 (freefunc)pickle_free /* m_free */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007996};
7997
7998PyMODINIT_FUNC
7999PyInit__pickle(void)
8000{
8001 PyObject *m;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08008002 PickleState *st;
8003
8004 m = PyState_FindModule(&_picklemodule);
8005 if (m) {
8006 Py_INCREF(m);
8007 return m;
8008 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00008009
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00008010 if (PyType_Ready(&Pdata_Type) < 0)
8011 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00008012 if (PyType_Ready(&PicklerMemoProxyType) < 0)
8013 return NULL;
8014 if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
8015 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00008016
8017 /* Create the module and add the functions. */
8018 m = PyModule_Create(&_picklemodule);
8019 if (m == NULL)
8020 return NULL;
8021
Antoine Pitrou91f43802019-05-26 17:10:09 +02008022 /* Add types */
Dong-hee Na37fcbb62020-03-25 07:08:51 +09008023 if (PyModule_AddType(m, &Pickler_Type) < 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00008024 return NULL;
Dong-hee Na37fcbb62020-03-25 07:08:51 +09008025 }
8026 if (PyModule_AddType(m, &Unpickler_Type) < 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00008027 return NULL;
Dong-hee Na37fcbb62020-03-25 07:08:51 +09008028 }
8029 if (PyModule_AddType(m, &PyPickleBuffer_Type) < 0) {
Antoine Pitrou91f43802019-05-26 17:10:09 +02008030 return NULL;
Dong-hee Na37fcbb62020-03-25 07:08:51 +09008031 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00008032
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08008033 st = _Pickle_GetState(m);
8034
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00008035 /* Initialize the exceptions. */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08008036 st->PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
8037 if (st->PickleError == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00008038 return NULL;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08008039 st->PicklingError = \
8040 PyErr_NewException("_pickle.PicklingError", st->PickleError, NULL);
8041 if (st->PicklingError == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00008042 return NULL;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08008043 st->UnpicklingError = \
8044 PyErr_NewException("_pickle.UnpicklingError", st->PickleError, NULL);
8045 if (st->UnpicklingError == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00008046 return NULL;
8047
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08008048 Py_INCREF(st->PickleError);
8049 if (PyModule_AddObject(m, "PickleError", st->PickleError) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00008050 return NULL;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08008051 Py_INCREF(st->PicklingError);
8052 if (PyModule_AddObject(m, "PicklingError", st->PicklingError) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00008053 return NULL;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08008054 Py_INCREF(st->UnpicklingError);
8055 if (PyModule_AddObject(m, "UnpicklingError", st->UnpicklingError) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00008056 return NULL;
8057
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08008058 if (_Pickle_InitState(st) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00008059 return NULL;
8060
8061 return m;
8062}