blob: 7121ac85cdbea1a32a00b7fb3e96debd245ee01d [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
Larry Hastings61272b72014-01-07 12:41:53 -08007/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08008module _pickle
Larry Hastingsc2047262014-01-25 20:43:29 -08009class _pickle.Pickler "PicklerObject *" "&Pickler_Type"
10class _pickle.PicklerMemoProxy "PicklerMemoProxyObject *" "&PicklerMemoProxyType"
11class _pickle.Unpickler "UnpicklerObject *" "&Unpickler_Type"
12class _pickle.UnpicklerMemoProxy "UnpicklerMemoProxyObject *" "&UnpicklerMemoProxyType"
Larry Hastings61272b72014-01-07 12:41:53 -080013[clinic start generated code]*/
Serhiy Storchaka1009bf12015-04-03 23:53:51 +030014/*[clinic end generated code: output=da39a3ee5e6b4b0d input=4b3e113468a58e6c]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -080015
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000016/* Bump this when new opcodes are added to the pickle protocol. */
17enum {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +010018 HIGHEST_PROTOCOL = 4,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000019 DEFAULT_PROTOCOL = 3
20};
21
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000022/* Pickle opcodes. These must be kept updated with pickle.py.
23 Extensive docs are in pickletools.py. */
24enum opcode {
25 MARK = '(',
26 STOP = '.',
27 POP = '0',
28 POP_MARK = '1',
29 DUP = '2',
30 FLOAT = 'F',
31 INT = 'I',
32 BININT = 'J',
33 BININT1 = 'K',
34 LONG = 'L',
35 BININT2 = 'M',
36 NONE = 'N',
37 PERSID = 'P',
38 BINPERSID = 'Q',
39 REDUCE = 'R',
40 STRING = 'S',
41 BINSTRING = 'T',
42 SHORT_BINSTRING = 'U',
43 UNICODE = 'V',
44 BINUNICODE = 'X',
45 APPEND = 'a',
46 BUILD = 'b',
47 GLOBAL = 'c',
48 DICT = 'd',
49 EMPTY_DICT = '}',
50 APPENDS = 'e',
51 GET = 'g',
52 BINGET = 'h',
53 INST = 'i',
54 LONG_BINGET = 'j',
55 LIST = 'l',
56 EMPTY_LIST = ']',
57 OBJ = 'o',
58 PUT = 'p',
59 BINPUT = 'q',
60 LONG_BINPUT = 'r',
61 SETITEM = 's',
62 TUPLE = 't',
63 EMPTY_TUPLE = ')',
64 SETITEMS = 'u',
65 BINFLOAT = 'G',
66
67 /* Protocol 2. */
68 PROTO = '\x80',
69 NEWOBJ = '\x81',
70 EXT1 = '\x82',
71 EXT2 = '\x83',
72 EXT4 = '\x84',
73 TUPLE1 = '\x85',
74 TUPLE2 = '\x86',
75 TUPLE3 = '\x87',
76 NEWTRUE = '\x88',
77 NEWFALSE = '\x89',
78 LONG1 = '\x8a',
79 LONG4 = '\x8b',
80
81 /* Protocol 3 (Python 3.x) */
82 BINBYTES = 'B',
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +010083 SHORT_BINBYTES = 'C',
84
85 /* Protocol 4 */
86 SHORT_BINUNICODE = '\x8c',
87 BINUNICODE8 = '\x8d',
88 BINBYTES8 = '\x8e',
89 EMPTY_SET = '\x8f',
90 ADDITEMS = '\x90',
91 FROZENSET = '\x91',
92 NEWOBJ_EX = '\x92',
93 STACK_GLOBAL = '\x93',
94 MEMOIZE = '\x94',
95 FRAME = '\x95'
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000096};
97
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000098enum {
99 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
100 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
101 break if this gets out of synch with pickle.py, but it's unclear that would
102 help anything either. */
103 BATCHSIZE = 1000,
104
105 /* Nesting limit until Pickler, when running in "fast mode", starts
106 checking for self-referential data-structures. */
107 FAST_NESTING_LIMIT = 50,
108
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000109 /* Initial size of the write buffer of Pickler. */
110 WRITE_BUF_SIZE = 4096,
111
Antoine Pitrou04248a82010-10-12 20:51:21 +0000112 /* Prefetch size when unpickling (disabled on unpeekable streams) */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100113 PREFETCH = 8192 * 16,
114
115 FRAME_SIZE_TARGET = 64 * 1024,
116
117 FRAME_HEADER_SIZE = 9
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000118};
119
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800120/*************************************************************************/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000121
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800122/* State of the pickle module, per PEP 3121. */
123typedef struct {
124 /* Exception classes for pickle. */
125 PyObject *PickleError;
126 PyObject *PicklingError;
127 PyObject *UnpicklingError;
Larry Hastings61272b72014-01-07 12:41:53 -0800128
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800129 /* copyreg.dispatch_table, {type_object: pickling_function} */
130 PyObject *dispatch_table;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000131
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800132 /* For the extension opcodes EXT1, EXT2 and EXT4. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000133
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800134 /* copyreg._extension_registry, {(module_name, function_name): code} */
135 PyObject *extension_registry;
136 /* copyreg._extension_cache, {code: object} */
137 PyObject *extension_cache;
138 /* copyreg._inverted_registry, {code: (module_name, function_name)} */
139 PyObject *inverted_registry;
140
141 /* Import mappings for compatibility with Python 2.x */
142
143 /* _compat_pickle.NAME_MAPPING,
144 {(oldmodule, oldname): (newmodule, newname)} */
145 PyObject *name_mapping_2to3;
146 /* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
147 PyObject *import_mapping_2to3;
148 /* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
149 PyObject *name_mapping_3to2;
150 PyObject *import_mapping_3to2;
151
152 /* codecs.encode, used for saving bytes in older protocols */
153 PyObject *codecs_encode;
Serhiy Storchaka58e41342015-03-31 14:07:24 +0300154 /* builtins.getattr, used for saving nested names with protocol < 4 */
155 PyObject *getattr;
Serhiy Storchaka0d554d72015-10-10 22:42:18 +0300156 /* functools.partial, used for implementing __newobj_ex__ with protocols
157 2 and 3 */
158 PyObject *partial;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800159} PickleState;
160
161/* Forward declaration of the _pickle module definition. */
162static struct PyModuleDef _picklemodule;
163
164/* Given a module object, get its per-module state. */
165static PickleState *
166_Pickle_GetState(PyObject *module)
167{
168 return (PickleState *)PyModule_GetState(module);
169}
170
171/* Find the module instance imported in the currently running sub-interpreter
172 and get its state. */
173static PickleState *
174_Pickle_GetGlobalState(void)
175{
176 return _Pickle_GetState(PyState_FindModule(&_picklemodule));
177}
178
179/* Clear the given pickle module state. */
180static void
181_Pickle_ClearState(PickleState *st)
182{
183 Py_CLEAR(st->PickleError);
184 Py_CLEAR(st->PicklingError);
185 Py_CLEAR(st->UnpicklingError);
186 Py_CLEAR(st->dispatch_table);
187 Py_CLEAR(st->extension_registry);
188 Py_CLEAR(st->extension_cache);
189 Py_CLEAR(st->inverted_registry);
190 Py_CLEAR(st->name_mapping_2to3);
191 Py_CLEAR(st->import_mapping_2to3);
192 Py_CLEAR(st->name_mapping_3to2);
193 Py_CLEAR(st->import_mapping_3to2);
194 Py_CLEAR(st->codecs_encode);
Serhiy Storchaka58e41342015-03-31 14:07:24 +0300195 Py_CLEAR(st->getattr);
Victor Stinner9ba97df2015-11-17 12:15:07 +0100196 Py_CLEAR(st->partial);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800197}
198
199/* Initialize the given pickle module state. */
200static int
201_Pickle_InitState(PickleState *st)
202{
Serhiy Storchaka58e41342015-03-31 14:07:24 +0300203 PyObject *builtins;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800204 PyObject *copyreg = NULL;
205 PyObject *compat_pickle = NULL;
206 PyObject *codecs = NULL;
Serhiy Storchaka0d554d72015-10-10 22:42:18 +0300207 PyObject *functools = NULL;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800208
Serhiy Storchaka58e41342015-03-31 14:07:24 +0300209 builtins = PyEval_GetBuiltins();
210 if (builtins == NULL)
211 goto error;
212 st->getattr = PyDict_GetItemString(builtins, "getattr");
213 if (st->getattr == NULL)
214 goto error;
215 Py_INCREF(st->getattr);
216
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800217 copyreg = PyImport_ImportModule("copyreg");
218 if (!copyreg)
219 goto error;
220 st->dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
221 if (!st->dispatch_table)
222 goto error;
223 if (!PyDict_CheckExact(st->dispatch_table)) {
224 PyErr_Format(PyExc_RuntimeError,
225 "copyreg.dispatch_table should be a dict, not %.200s",
226 Py_TYPE(st->dispatch_table)->tp_name);
227 goto error;
228 }
229 st->extension_registry = \
230 PyObject_GetAttrString(copyreg, "_extension_registry");
231 if (!st->extension_registry)
232 goto error;
233 if (!PyDict_CheckExact(st->extension_registry)) {
234 PyErr_Format(PyExc_RuntimeError,
235 "copyreg._extension_registry should be a dict, "
236 "not %.200s", Py_TYPE(st->extension_registry)->tp_name);
237 goto error;
238 }
239 st->inverted_registry = \
240 PyObject_GetAttrString(copyreg, "_inverted_registry");
241 if (!st->inverted_registry)
242 goto error;
243 if (!PyDict_CheckExact(st->inverted_registry)) {
244 PyErr_Format(PyExc_RuntimeError,
245 "copyreg._inverted_registry should be a dict, "
246 "not %.200s", Py_TYPE(st->inverted_registry)->tp_name);
247 goto error;
248 }
249 st->extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
250 if (!st->extension_cache)
251 goto error;
252 if (!PyDict_CheckExact(st->extension_cache)) {
253 PyErr_Format(PyExc_RuntimeError,
254 "copyreg._extension_cache should be a dict, "
255 "not %.200s", Py_TYPE(st->extension_cache)->tp_name);
256 goto error;
257 }
258 Py_CLEAR(copyreg);
259
260 /* Load the 2.x -> 3.x stdlib module mapping tables */
261 compat_pickle = PyImport_ImportModule("_compat_pickle");
262 if (!compat_pickle)
263 goto error;
264 st->name_mapping_2to3 = \
265 PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
266 if (!st->name_mapping_2to3)
267 goto error;
268 if (!PyDict_CheckExact(st->name_mapping_2to3)) {
269 PyErr_Format(PyExc_RuntimeError,
270 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
271 Py_TYPE(st->name_mapping_2to3)->tp_name);
272 goto error;
273 }
274 st->import_mapping_2to3 = \
275 PyObject_GetAttrString(compat_pickle, "IMPORT_MAPPING");
276 if (!st->import_mapping_2to3)
277 goto error;
278 if (!PyDict_CheckExact(st->import_mapping_2to3)) {
279 PyErr_Format(PyExc_RuntimeError,
280 "_compat_pickle.IMPORT_MAPPING should be a dict, "
281 "not %.200s", Py_TYPE(st->import_mapping_2to3)->tp_name);
282 goto error;
283 }
284 /* ... and the 3.x -> 2.x mapping tables */
285 st->name_mapping_3to2 = \
286 PyObject_GetAttrString(compat_pickle, "REVERSE_NAME_MAPPING");
287 if (!st->name_mapping_3to2)
288 goto error;
289 if (!PyDict_CheckExact(st->name_mapping_3to2)) {
290 PyErr_Format(PyExc_RuntimeError,
291 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
292 "not %.200s", Py_TYPE(st->name_mapping_3to2)->tp_name);
293 goto error;
294 }
295 st->import_mapping_3to2 = \
296 PyObject_GetAttrString(compat_pickle, "REVERSE_IMPORT_MAPPING");
297 if (!st->import_mapping_3to2)
298 goto error;
299 if (!PyDict_CheckExact(st->import_mapping_3to2)) {
300 PyErr_Format(PyExc_RuntimeError,
301 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
302 "not %.200s", Py_TYPE(st->import_mapping_3to2)->tp_name);
303 goto error;
304 }
305 Py_CLEAR(compat_pickle);
306
307 codecs = PyImport_ImportModule("codecs");
308 if (codecs == NULL)
309 goto error;
310 st->codecs_encode = PyObject_GetAttrString(codecs, "encode");
311 if (st->codecs_encode == NULL) {
312 goto error;
313 }
314 if (!PyCallable_Check(st->codecs_encode)) {
315 PyErr_Format(PyExc_RuntimeError,
316 "codecs.encode should be a callable, not %.200s",
317 Py_TYPE(st->codecs_encode)->tp_name);
318 goto error;
319 }
320 Py_CLEAR(codecs);
321
Serhiy Storchaka0d554d72015-10-10 22:42:18 +0300322 functools = PyImport_ImportModule("functools");
323 if (!functools)
324 goto error;
325 st->partial = PyObject_GetAttrString(functools, "partial");
326 if (!st->partial)
327 goto error;
328 Py_CLEAR(functools);
329
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800330 return 0;
331
332 error:
333 Py_CLEAR(copyreg);
334 Py_CLEAR(compat_pickle);
335 Py_CLEAR(codecs);
Serhiy Storchaka0d554d72015-10-10 22:42:18 +0300336 Py_CLEAR(functools);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800337 _Pickle_ClearState(st);
338 return -1;
339}
340
341/* Helper for calling a function with a single argument quickly.
342
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800343 This function steals the reference of the given argument. */
344static PyObject *
345_Pickle_FastCall(PyObject *func, PyObject *obj)
346{
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800347 PyObject *result;
Alexandre Vassalottib13e6bc2013-11-28 14:56:09 -0800348 PyObject *arg_tuple = PyTuple_New(1);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800349
Alexandre Vassalottib13e6bc2013-11-28 14:56:09 -0800350 /* Note: this function used to reuse the argument tuple. This used to give
351 a slight performance boost with older pickle implementations where many
352 unbuffered reads occurred (thus needing many function calls).
353
354 However, this optimization was removed because it was too complicated
355 to get right. It abused the C API for tuples to mutate them which led
356 to subtle reference counting and concurrency bugs. Furthermore, the
357 introduction of protocol 4 and the prefetching optimization via peek()
358 significantly reduced the number of function calls we do. Thus, the
359 benefits became marginal at best. */
360
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800361 if (arg_tuple == NULL) {
Alexandre Vassalottib13e6bc2013-11-28 14:56:09 -0800362 Py_DECREF(obj);
363 return NULL;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800364 }
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800365 PyTuple_SET_ITEM(arg_tuple, 0, obj);
366 result = PyObject_Call(func, arg_tuple, NULL);
Alexandre Vassalottib13e6bc2013-11-28 14:56:09 -0800367 Py_CLEAR(arg_tuple);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -0800368 return result;
369}
370
371/*************************************************************************/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000372
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000373/* Internal data type used as the unpickling stack. */
374typedef struct {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000375 PyObject_VAR_HEAD
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000376 PyObject **data;
Serhiy Storchaka59fb6342015-12-06 22:01:35 +0200377 int mark_set; /* is MARK set? */
378 Py_ssize_t fence; /* position of top MARK or 0 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000379 Py_ssize_t allocated; /* number of slots in data allocated */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000380} Pdata;
381
382static void
383Pdata_dealloc(Pdata *self)
384{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200385 Py_ssize_t i = Py_SIZE(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000386 while (--i >= 0) {
387 Py_DECREF(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000388 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000389 PyMem_FREE(self->data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000390 PyObject_Del(self);
391}
392
393static PyTypeObject Pdata_Type = {
394 PyVarObject_HEAD_INIT(NULL, 0)
395 "_pickle.Pdata", /*tp_name*/
396 sizeof(Pdata), /*tp_basicsize*/
Serhiy Storchaka5bbd2312014-12-16 19:39:08 +0200397 sizeof(PyObject *), /*tp_itemsize*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000398 (destructor)Pdata_dealloc, /*tp_dealloc*/
399};
400
401static PyObject *
402Pdata_New(void)
403{
404 Pdata *self;
405
406 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
407 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000408 Py_SIZE(self) = 0;
Serhiy Storchaka59fb6342015-12-06 22:01:35 +0200409 self->mark_set = 0;
410 self->fence = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000411 self->allocated = 8;
412 self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000413 if (self->data)
414 return (PyObject *)self;
415 Py_DECREF(self);
416 return PyErr_NoMemory();
417}
418
419
420/* Retain only the initial clearto items. If clearto >= the current
421 * number of items, this is a (non-erroneous) NOP.
422 */
423static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200424Pdata_clear(Pdata *self, Py_ssize_t clearto)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000425{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200426 Py_ssize_t i = Py_SIZE(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000427
Serhiy Storchaka59fb6342015-12-06 22:01:35 +0200428 assert(clearto >= self->fence);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000429 if (clearto >= i)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000430 return 0;
431
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000432 while (--i >= clearto) {
433 Py_CLEAR(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000434 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000435 Py_SIZE(self) = clearto;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000436 return 0;
437}
438
439static int
440Pdata_grow(Pdata *self)
441{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000442 PyObject **data = self->data;
Victor Stinnerf13c46c2014-08-17 21:05:55 +0200443 size_t allocated = (size_t)self->allocated;
444 size_t new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000445
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000446 new_allocated = (allocated >> 3) + 6;
447 /* check for integer overflow */
Victor Stinnerf13c46c2014-08-17 21:05:55 +0200448 if (new_allocated > (size_t)PY_SSIZE_T_MAX - allocated)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000449 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000450 new_allocated += allocated;
Benjamin Peterson59b08c12015-06-27 13:41:33 -0500451 PyMem_RESIZE(data, PyObject *, new_allocated);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000452 if (data == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000453 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000454
455 self->data = data;
Victor Stinnerf13c46c2014-08-17 21:05:55 +0200456 self->allocated = (Py_ssize_t)new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000457 return 0;
458
459 nomemory:
460 PyErr_NoMemory();
461 return -1;
462}
463
Serhiy Storchaka59fb6342015-12-06 22:01:35 +0200464static int
465Pdata_stack_underflow(Pdata *self)
466{
467 PickleState *st = _Pickle_GetGlobalState();
468 PyErr_SetString(st->UnpicklingError,
469 self->mark_set ?
470 "unexpected MARK found" :
471 "unpickling stack underflow");
472 return -1;
473}
474
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000475/* D is a Pdata*. Pop the topmost element and store it into V, which
476 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
477 * is raised and V is set to NULL.
478 */
479static PyObject *
480Pdata_pop(Pdata *self)
481{
Serhiy Storchaka59fb6342015-12-06 22:01:35 +0200482 if (Py_SIZE(self) <= self->fence) {
483 Pdata_stack_underflow(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000484 return NULL;
485 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000486 return self->data[--Py_SIZE(self)];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000487}
488#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
489
490static int
491Pdata_push(Pdata *self, PyObject *obj)
492{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000493 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000494 return -1;
495 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000496 self->data[Py_SIZE(self)++] = obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000497 return 0;
498}
499
500/* Push an object on stack, transferring its ownership to the stack. */
501#define PDATA_PUSH(D, O, ER) do { \
502 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
503
504/* Push an object on stack, adding a new reference to the object. */
505#define PDATA_APPEND(D, O, ER) do { \
506 Py_INCREF((O)); \
507 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
508
509static PyObject *
510Pdata_poptuple(Pdata *self, Py_ssize_t start)
511{
512 PyObject *tuple;
513 Py_ssize_t len, i, j;
514
Serhiy Storchaka59fb6342015-12-06 22:01:35 +0200515 if (start < self->fence) {
516 Pdata_stack_underflow(self);
517 return NULL;
518 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000519 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000520 tuple = PyTuple_New(len);
521 if (tuple == NULL)
522 return NULL;
523 for (i = start, j = 0; j < len; i++, j++)
524 PyTuple_SET_ITEM(tuple, j, self->data[i]);
525
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000526 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000527 return tuple;
528}
529
530static PyObject *
531Pdata_poplist(Pdata *self, Py_ssize_t start)
532{
533 PyObject *list;
534 Py_ssize_t len, i, j;
535
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000536 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000537 list = PyList_New(len);
538 if (list == NULL)
539 return NULL;
540 for (i = start, j = 0; j < len; i++, j++)
541 PyList_SET_ITEM(list, j, self->data[i]);
542
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000543 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000544 return list;
545}
546
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000547typedef struct {
548 PyObject *me_key;
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200549 Py_ssize_t me_value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000550} PyMemoEntry;
551
552typedef struct {
553 Py_ssize_t mt_mask;
554 Py_ssize_t mt_used;
555 Py_ssize_t mt_allocated;
556 PyMemoEntry *mt_table;
557} PyMemoTable;
558
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000559typedef struct PicklerObject {
560 PyObject_HEAD
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000561 PyMemoTable *memo; /* Memo table, keep track of the seen
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000562 objects to support self-referential objects
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000563 pickling. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000564 PyObject *pers_func; /* persistent_id() method, can be NULL */
Antoine Pitrou8d3c2902012-03-04 18:31:48 +0100565 PyObject *dispatch_table; /* private dispatch_table, can be NULL */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000566
567 PyObject *write; /* write() method of the output stream. */
568 PyObject *output_buffer; /* Write into a local bytearray buffer before
569 flushing to the stream. */
570 Py_ssize_t output_len; /* Length of output_buffer. */
571 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000572 int proto; /* Pickle protocol number, >= 0 */
573 int bin; /* Boolean, true if proto > 0 */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100574 int framing; /* True when framing is enabled, proto >= 4 */
575 Py_ssize_t frame_start; /* Position in output_buffer where the
Martin Pantera90a4a92016-05-30 04:04:50 +0000576 current frame begins. -1 if there
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100577 is no frame currently open. */
578
579 Py_ssize_t buf_size; /* Size of the current buffered pickle data */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000580 int fast; /* Enable fast mode if set to a true value.
581 The fast mode disable the usage of memo,
582 therefore speeding the pickling process by
583 not generating superfluous PUT opcodes. It
584 should not be used if with self-referential
585 objects. */
586 int fast_nesting;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000587 int fix_imports; /* Indicate whether Pickler should fix
588 the name of globals for Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000589 PyObject *fast_memo;
590} PicklerObject;
591
592typedef struct UnpicklerObject {
593 PyObject_HEAD
594 Pdata *stack; /* Pickle data stack, store unpickled objects. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000595
596 /* The unpickler memo is just an array of PyObject *s. Using a dict
597 is unnecessary, since the keys are contiguous ints. */
598 PyObject **memo;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100599 Py_ssize_t memo_size; /* Capacity of the memo array */
600 Py_ssize_t memo_len; /* Number of objects in the memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000601
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000602 PyObject *pers_func; /* persistent_load() method, can be NULL. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000603
604 Py_buffer buffer;
605 char *input_buffer;
606 char *input_line;
607 Py_ssize_t input_len;
608 Py_ssize_t next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000609 Py_ssize_t prefetched_idx; /* index of first prefetched byte */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100610
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000611 PyObject *read; /* read() method of the input stream. */
612 PyObject *readline; /* readline() method of the input stream. */
Antoine Pitrou04248a82010-10-12 20:51:21 +0000613 PyObject *peek; /* peek() method of the input stream, or NULL */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000614
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000615 char *encoding; /* Name of the encoding to be used for
616 decoding strings pickled using Python
617 2.x. The default value is "ASCII" */
618 char *errors; /* Name of errors handling scheme to used when
619 decoding strings. The default value is
620 "strict". */
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -0500621 Py_ssize_t *marks; /* Mark stack, used for unpickling container
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000622 objects. */
623 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
624 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000625 int proto; /* Protocol of the pickle loaded. */
626 int fix_imports; /* Indicate whether Unpickler should fix
627 the name of globals pickled by Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000628} UnpicklerObject;
629
Serhiy Storchaka3c1f0f12014-01-27 10:34:22 +0200630typedef struct {
631 PyObject_HEAD
632 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
633} PicklerMemoProxyObject;
634
635typedef struct {
636 PyObject_HEAD
637 UnpicklerObject *unpickler;
638} UnpicklerMemoProxyObject;
639
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000640/* Forward declarations */
641static int save(PicklerObject *, PyObject *, int);
642static int save_reduce(PicklerObject *, PyObject *, PyObject *);
643static PyTypeObject Pickler_Type;
644static PyTypeObject Unpickler_Type;
645
Serhiy Storchaka3c1f0f12014-01-27 10:34:22 +0200646#include "clinic/_pickle.c.h"
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000647
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000648/*************************************************************************
Serhiy Storchaka95949422013-08-27 19:40:23 +0300649 A custom hashtable mapping void* to Python ints. This is used by the pickler
650 for memoization. Using a custom hashtable rather than PyDict allows us to skip
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000651 a bunch of unnecessary object creation. This makes a huge performance
652 difference. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000653
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000654#define MT_MINSIZE 8
655#define PERTURB_SHIFT 5
656
657
658static PyMemoTable *
659PyMemoTable_New(void)
660{
661 PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
662 if (memo == NULL) {
663 PyErr_NoMemory();
664 return NULL;
665 }
666
667 memo->mt_used = 0;
668 memo->mt_allocated = MT_MINSIZE;
669 memo->mt_mask = MT_MINSIZE - 1;
670 memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
671 if (memo->mt_table == NULL) {
672 PyMem_FREE(memo);
673 PyErr_NoMemory();
674 return NULL;
675 }
676 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
677
678 return memo;
679}
680
681static PyMemoTable *
682PyMemoTable_Copy(PyMemoTable *self)
683{
684 Py_ssize_t i;
685 PyMemoTable *new = PyMemoTable_New();
686 if (new == NULL)
687 return NULL;
688
689 new->mt_used = self->mt_used;
690 new->mt_allocated = self->mt_allocated;
691 new->mt_mask = self->mt_mask;
692 /* The table we get from _New() is probably smaller than we wanted.
693 Free it and allocate one that's the right size. */
694 PyMem_FREE(new->mt_table);
Benjamin Peterson59b08c12015-06-27 13:41:33 -0500695 new->mt_table = PyMem_NEW(PyMemoEntry, self->mt_allocated);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000696 if (new->mt_table == NULL) {
697 PyMem_FREE(new);
Victor Stinner42024562013-07-12 00:53:57 +0200698 PyErr_NoMemory();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000699 return NULL;
700 }
701 for (i = 0; i < self->mt_allocated; i++) {
702 Py_XINCREF(self->mt_table[i].me_key);
703 }
704 memcpy(new->mt_table, self->mt_table,
705 sizeof(PyMemoEntry) * self->mt_allocated);
706
707 return new;
708}
709
710static Py_ssize_t
711PyMemoTable_Size(PyMemoTable *self)
712{
713 return self->mt_used;
714}
715
716static int
717PyMemoTable_Clear(PyMemoTable *self)
718{
719 Py_ssize_t i = self->mt_allocated;
720
721 while (--i >= 0) {
722 Py_XDECREF(self->mt_table[i].me_key);
723 }
724 self->mt_used = 0;
725 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
726 return 0;
727}
728
729static void
730PyMemoTable_Del(PyMemoTable *self)
731{
732 if (self == NULL)
733 return;
734 PyMemoTable_Clear(self);
735
736 PyMem_FREE(self->mt_table);
737 PyMem_FREE(self);
738}
739
740/* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
741 can be considerably simpler than dictobject.c's lookdict(). */
742static PyMemoEntry *
743_PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
744{
745 size_t i;
746 size_t perturb;
747 size_t mask = (size_t)self->mt_mask;
748 PyMemoEntry *table = self->mt_table;
749 PyMemoEntry *entry;
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000750 Py_hash_t hash = (Py_hash_t)key >> 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000751
752 i = hash & mask;
753 entry = &table[i];
754 if (entry->me_key == NULL || entry->me_key == key)
755 return entry;
756
757 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
758 i = (i << 2) + i + perturb + 1;
759 entry = &table[i & mask];
760 if (entry->me_key == NULL || entry->me_key == key)
761 return entry;
762 }
763 assert(0); /* Never reached */
764 return NULL;
765}
766
767/* Returns -1 on failure, 0 on success. */
768static int
769_PyMemoTable_ResizeTable(PyMemoTable *self, Py_ssize_t min_size)
770{
771 PyMemoEntry *oldtable = NULL;
772 PyMemoEntry *oldentry, *newentry;
773 Py_ssize_t new_size = MT_MINSIZE;
774 Py_ssize_t to_process;
775
776 assert(min_size > 0);
777
778 /* Find the smallest valid table size >= min_size. */
779 while (new_size < min_size && new_size > 0)
780 new_size <<= 1;
781 if (new_size <= 0) {
782 PyErr_NoMemory();
783 return -1;
784 }
785 /* new_size needs to be a power of two. */
786 assert((new_size & (new_size - 1)) == 0);
787
788 /* Allocate new table. */
789 oldtable = self->mt_table;
Benjamin Peterson59b08c12015-06-27 13:41:33 -0500790 self->mt_table = PyMem_NEW(PyMemoEntry, new_size);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000791 if (self->mt_table == NULL) {
Victor Stinner8ca72e22013-07-12 00:53:26 +0200792 self->mt_table = oldtable;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000793 PyErr_NoMemory();
794 return -1;
795 }
796 self->mt_allocated = new_size;
797 self->mt_mask = new_size - 1;
798 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
799
800 /* Copy entries from the old table. */
801 to_process = self->mt_used;
802 for (oldentry = oldtable; to_process > 0; oldentry++) {
803 if (oldentry->me_key != NULL) {
804 to_process--;
805 /* newentry is a pointer to a chunk of the new
806 mt_table, so we're setting the key:value pair
807 in-place. */
808 newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
809 newentry->me_key = oldentry->me_key;
810 newentry->me_value = oldentry->me_value;
811 }
812 }
813
814 /* Deallocate the old table. */
815 PyMem_FREE(oldtable);
816 return 0;
817}
818
819/* Returns NULL on failure, a pointer to the value otherwise. */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200820static Py_ssize_t *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000821PyMemoTable_Get(PyMemoTable *self, PyObject *key)
822{
823 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
824 if (entry->me_key == NULL)
825 return NULL;
826 return &entry->me_value;
827}
828
829/* Returns -1 on failure, 0 on success. */
830static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200831PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000832{
833 PyMemoEntry *entry;
834
835 assert(key != NULL);
836
837 entry = _PyMemoTable_Lookup(self, key);
838 if (entry->me_key != NULL) {
839 entry->me_value = value;
840 return 0;
841 }
842 Py_INCREF(key);
843 entry->me_key = key;
844 entry->me_value = value;
845 self->mt_used++;
846
847 /* If we added a key, we can safely resize. Otherwise just return!
848 * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
849 *
850 * Quadrupling the size improves average table sparseness
851 * (reducing collisions) at the cost of some memory. It also halves
852 * the number of expensive resize operations in a growing memo table.
853 *
854 * Very large memo tables (over 50K items) use doubling instead.
855 * This may help applications with severe memory constraints.
856 */
857 if (!(self->mt_used * 3 >= (self->mt_mask + 1) * 2))
858 return 0;
859 return _PyMemoTable_ResizeTable(self,
860 (self->mt_used > 50000 ? 2 : 4) * self->mt_used);
861}
862
863#undef MT_MINSIZE
864#undef PERTURB_SHIFT
865
866/*************************************************************************/
867
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000868
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000869static int
870_Pickler_ClearBuffer(PicklerObject *self)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000871{
Serhiy Storchaka48842712016-04-06 09:45:48 +0300872 Py_XSETREF(self->output_buffer,
Serhiy Storchaka4a1e70f2015-12-27 12:36:18 +0200873 PyBytes_FromStringAndSize(NULL, self->max_output_len));
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000874 if (self->output_buffer == NULL)
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000875 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000876 self->output_len = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100877 self->frame_start = -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000878 return 0;
879}
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000880
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100881static void
Antoine Pitrou8f2ee6e2013-11-23 21:05:08 +0100882_write_size64(char *out, size_t value)
883{
Victor Stinnerf13c46c2014-08-17 21:05:55 +0200884 size_t i;
Alexandre Vassalotti1048fb52013-11-25 11:35:46 -0800885
Serhiy Storchakafad85aa2015-11-07 15:42:38 +0200886 Py_BUILD_ASSERT(sizeof(size_t) <= 8);
Alexandre Vassalotti1048fb52013-11-25 11:35:46 -0800887
888 for (i = 0; i < sizeof(size_t); i++) {
889 out[i] = (unsigned char)((value >> (8 * i)) & 0xff);
890 }
891 for (i = sizeof(size_t); i < 8; i++) {
892 out[i] = 0;
Alexandre Vassalottided929b2013-11-24 22:41:13 -0800893 }
Antoine Pitrou8f2ee6e2013-11-23 21:05:08 +0100894}
895
896static void
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100897_Pickler_WriteFrameHeader(PicklerObject *self, char *qdata, size_t frame_len)
898{
Antoine Pitrou8f2ee6e2013-11-23 21:05:08 +0100899 qdata[0] = FRAME;
900 _write_size64(qdata + 1, frame_len);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100901}
902
903static int
904_Pickler_CommitFrame(PicklerObject *self)
905{
906 size_t frame_len;
907 char *qdata;
908
909 if (!self->framing || self->frame_start == -1)
910 return 0;
911 frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
912 qdata = PyBytes_AS_STRING(self->output_buffer) + self->frame_start;
913 _Pickler_WriteFrameHeader(self, qdata, frame_len);
914 self->frame_start = -1;
915 return 0;
916}
917
918static int
919_Pickler_OpcodeBoundary(PicklerObject *self)
920{
921 Py_ssize_t frame_len;
922
923 if (!self->framing || self->frame_start == -1)
924 return 0;
925 frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
926 if (frame_len >= FRAME_SIZE_TARGET)
927 return _Pickler_CommitFrame(self);
928 else
929 return 0;
930}
931
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000932static PyObject *
933_Pickler_GetString(PicklerObject *self)
934{
935 PyObject *output_buffer = self->output_buffer;
936
937 assert(self->output_buffer != NULL);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100938
939 if (_Pickler_CommitFrame(self))
940 return NULL;
941
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000942 self->output_buffer = NULL;
943 /* Resize down to exact size */
944 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
945 return NULL;
946 return output_buffer;
947}
948
949static int
950_Pickler_FlushToFile(PicklerObject *self)
951{
952 PyObject *output, *result;
953
954 assert(self->write != NULL);
955
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100956 /* This will commit the frame first */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000957 output = _Pickler_GetString(self);
958 if (output == NULL)
959 return -1;
960
Alexandre Vassalotti20c28c12013-11-27 02:26:54 -0800961 result = _Pickle_FastCall(self->write, output);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000962 Py_XDECREF(result);
963 return (result == NULL) ? -1 : 0;
964}
965
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200966static Py_ssize_t
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100967_Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t data_len)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000968{
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100969 Py_ssize_t i, n, required;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000970 char *buffer;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100971 int need_new_frame;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000972
973 assert(s != NULL);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100974 need_new_frame = (self->framing && self->frame_start == -1);
975
976 if (need_new_frame)
977 n = data_len + FRAME_HEADER_SIZE;
978 else
979 n = data_len;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000980
981 required = self->output_len + n;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100982 if (required > self->max_output_len) {
983 /* Make place in buffer for the pickle chunk */
984 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
985 PyErr_NoMemory();
986 return -1;
987 }
988 self->max_output_len = (self->output_len + n) / 2 * 3;
989 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
990 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000991 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000992 buffer = PyBytes_AS_STRING(self->output_buffer);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100993 if (need_new_frame) {
994 /* Setup new frame */
995 Py_ssize_t frame_start = self->output_len;
996 self->frame_start = frame_start;
997 for (i = 0; i < FRAME_HEADER_SIZE; i++) {
998 /* Write an invalid value, for debugging */
999 buffer[frame_start + i] = 0xFE;
1000 }
1001 self->output_len += FRAME_HEADER_SIZE;
1002 }
1003 if (data_len < 8) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001004 /* This is faster than memcpy when the string is short. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001005 for (i = 0; i < data_len; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001006 buffer[self->output_len + i] = s[i];
1007 }
1008 }
1009 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001010 memcpy(buffer + self->output_len, s, data_len);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001011 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001012 self->output_len += data_len;
1013 return data_len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001014}
1015
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001016static PicklerObject *
1017_Pickler_New(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001018{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001019 PicklerObject *self;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001020
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001021 self = PyObject_GC_New(PicklerObject, &Pickler_Type);
1022 if (self == NULL)
1023 return NULL;
1024
1025 self->pers_func = NULL;
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01001026 self->dispatch_table = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001027 self->write = NULL;
1028 self->proto = 0;
1029 self->bin = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001030 self->framing = 0;
1031 self->frame_start = -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001032 self->fast = 0;
1033 self->fast_nesting = 0;
1034 self->fix_imports = 0;
1035 self->fast_memo = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001036 self->max_output_len = WRITE_BUF_SIZE;
1037 self->output_len = 0;
Victor Stinner68c8ea22013-07-11 22:56:25 +02001038
1039 self->memo = PyMemoTable_New();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001040 self->output_buffer = PyBytes_FromStringAndSize(NULL,
1041 self->max_output_len);
Victor Stinner68c8ea22013-07-11 22:56:25 +02001042
1043 if (self->memo == NULL || self->output_buffer == NULL) {
Victor Stinnerc31df042013-07-12 00:08:59 +02001044 Py_DECREF(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001045 return NULL;
1046 }
1047 return self;
1048}
1049
1050static int
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08001051_Pickler_SetProtocol(PicklerObject *self, PyObject *protocol, int fix_imports)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001052{
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08001053 long proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001054
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08001055 if (protocol == NULL || protocol == Py_None) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001056 proto = DEFAULT_PROTOCOL;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08001057 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001058 else {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08001059 proto = PyLong_AsLong(protocol);
1060 if (proto < 0) {
1061 if (proto == -1 && PyErr_Occurred())
1062 return -1;
1063 proto = HIGHEST_PROTOCOL;
1064 }
1065 else if (proto > HIGHEST_PROTOCOL) {
1066 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
1067 HIGHEST_PROTOCOL);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001068 return -1;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08001069 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001070 }
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08001071 self->proto = (int)proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001072 self->bin = proto > 0;
1073 self->fix_imports = fix_imports && proto < 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001074 return 0;
1075}
1076
1077/* Returns -1 (with an exception set) on failure, 0 on success. This may
1078 be called once on a freshly created Pickler. */
1079static int
1080_Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
1081{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001082 _Py_IDENTIFIER(write);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001083 assert(file != NULL);
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001084 self->write = _PyObject_GetAttrId(file, &PyId_write);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001085 if (self->write == NULL) {
1086 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1087 PyErr_SetString(PyExc_TypeError,
1088 "file must have a 'write' attribute");
1089 return -1;
1090 }
1091
1092 return 0;
1093}
1094
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001095/* Returns the size of the input on success, -1 on failure. This takes its
1096 own reference to `input`. */
1097static Py_ssize_t
1098_Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
1099{
1100 if (self->buffer.buf != NULL)
1101 PyBuffer_Release(&self->buffer);
1102 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
1103 return -1;
1104 self->input_buffer = self->buffer.buf;
1105 self->input_len = self->buffer.len;
1106 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001107 self->prefetched_idx = self->input_len;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001108 return self->input_len;
1109}
1110
Antoine Pitrou04248a82010-10-12 20:51:21 +00001111static int
1112_Unpickler_SkipConsumed(UnpicklerObject *self)
1113{
Victor Stinnerb43ad1d2013-10-31 13:38:42 +01001114 Py_ssize_t consumed;
1115 PyObject *r;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001116
Victor Stinnerb43ad1d2013-10-31 13:38:42 +01001117 consumed = self->next_read_idx - self->prefetched_idx;
1118 if (consumed <= 0)
1119 return 0;
1120
1121 assert(self->peek); /* otherwise we did something wrong */
Martin Panter6245cb32016-04-15 02:14:19 +00001122 /* This makes a useless copy... */
Victor Stinnerb43ad1d2013-10-31 13:38:42 +01001123 r = PyObject_CallFunction(self->read, "n", consumed);
1124 if (r == NULL)
1125 return -1;
1126 Py_DECREF(r);
1127
1128 self->prefetched_idx = self->next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001129 return 0;
1130}
1131
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001132static const Py_ssize_t READ_WHOLE_LINE = -1;
1133
1134/* If reading from a file, we need to only pull the bytes we need, since there
1135 may be multiple pickle objects arranged contiguously in the same input
1136 buffer.
1137
1138 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
1139 bytes from the input stream/buffer.
1140
1141 Update the unpickler's input buffer with the newly-read data. Returns -1 on
1142 failure; on success, returns the number of bytes read from the file.
1143
1144 On success, self->input_len will be 0; this is intentional so that when
1145 unpickling from a file, the "we've run out of data" code paths will trigger,
1146 causing the Unpickler to go back to the file for more data. Use the returned
1147 size to tell you how much data you can process. */
1148static Py_ssize_t
1149_Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
1150{
1151 PyObject *data;
Serhiy Storchaka6fe39b72013-11-30 23:15:38 +02001152 Py_ssize_t read_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001153
1154 assert(self->read != NULL);
Victor Stinner121aab42011-09-29 23:40:53 +02001155
Antoine Pitrou04248a82010-10-12 20:51:21 +00001156 if (_Unpickler_SkipConsumed(self) < 0)
1157 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001158
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08001159 if (n == READ_WHOLE_LINE) {
Alexandre Vassalotti6bf41e52013-11-28 15:17:29 -08001160 PyObject *empty_tuple = PyTuple_New(0);
1161 data = PyObject_Call(self->readline, empty_tuple, NULL);
1162 Py_DECREF(empty_tuple);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08001163 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001164 else {
Serhiy Storchaka6fe39b72013-11-30 23:15:38 +02001165 PyObject *len;
1166 /* Prefetch some data without advancing the file pointer, if possible */
1167 if (self->peek && n < PREFETCH) {
1168 len = PyLong_FromSsize_t(PREFETCH);
1169 if (len == NULL)
1170 return -1;
1171 data = _Pickle_FastCall(self->peek, len);
1172 if (data == NULL) {
1173 if (!PyErr_ExceptionMatches(PyExc_NotImplementedError))
1174 return -1;
1175 /* peek() is probably not supported by the given file object */
1176 PyErr_Clear();
1177 Py_CLEAR(self->peek);
1178 }
1179 else {
1180 read_size = _Unpickler_SetStringInput(self, data);
1181 Py_DECREF(data);
1182 self->prefetched_idx = 0;
1183 if (n <= read_size)
1184 return n;
1185 }
1186 }
1187 len = PyLong_FromSsize_t(n);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001188 if (len == NULL)
1189 return -1;
Alexandre Vassalotti20c28c12013-11-27 02:26:54 -08001190 data = _Pickle_FastCall(self->read, len);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001191 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001192 if (data == NULL)
1193 return -1;
1194
Serhiy Storchaka6fe39b72013-11-30 23:15:38 +02001195 read_size = _Unpickler_SetStringInput(self, data);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001196 Py_DECREF(data);
1197 return read_size;
1198}
1199
Victor Stinner19ed27e2016-05-20 11:42:37 +02001200/* Don't call it directly: use _Unpickler_Read() */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001201static Py_ssize_t
Victor Stinner19ed27e2016-05-20 11:42:37 +02001202_Unpickler_ReadImpl(UnpicklerObject *self, char **s, Py_ssize_t n)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001203{
Antoine Pitrou04248a82010-10-12 20:51:21 +00001204 Py_ssize_t num_read;
1205
Benjamin Peterson6aa15642015-09-27 01:16:03 -07001206 *s = NULL;
Benjamin Petersone48cf7e2015-09-26 00:08:34 -07001207 if (self->next_read_idx > PY_SSIZE_T_MAX - n) {
1208 PickleState *st = _Pickle_GetGlobalState();
1209 PyErr_SetString(st->UnpicklingError,
1210 "read would overflow (invalid bytecode)");
1211 return -1;
1212 }
Victor Stinner19ed27e2016-05-20 11:42:37 +02001213
1214 /* This case is handled by the _Unpickler_Read() macro for efficiency */
1215 assert(self->next_read_idx + n > self->input_len);
1216
Antoine Pitrou04248a82010-10-12 20:51:21 +00001217 if (!self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001218 PyErr_Format(PyExc_EOFError, "Ran out of input");
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +00001219 return -1;
1220 }
Antoine Pitrou04248a82010-10-12 20:51:21 +00001221 num_read = _Unpickler_ReadFromFile(self, n);
1222 if (num_read < 0)
1223 return -1;
1224 if (num_read < n) {
1225 PyErr_Format(PyExc_EOFError, "Ran out of input");
1226 return -1;
1227 }
1228 *s = self->input_buffer;
1229 self->next_read_idx = n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001230 return n;
1231}
1232
Victor Stinner19ed27e2016-05-20 11:42:37 +02001233/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
1234
1235 This should be used for all data reads, rather than accessing the unpickler's
1236 input buffer directly. This method deals correctly with reading from input
1237 streams, which the input buffer doesn't deal with.
1238
1239 Note that when reading from a file-like object, self->next_read_idx won't
1240 be updated (it should remain at 0 for the entire unpickling process). You
1241 should use this function's return value to know how many bytes you can
1242 consume.
1243
1244 Returns -1 (with an exception set) on failure. On success, return the
1245 number of chars read. */
1246#define _Unpickler_Read(self, s, n) \
Victor Stinnerda230562016-05-20 21:16:59 +02001247 (((n) <= (self)->input_len - (self)->next_read_idx) \
Victor Stinner19ed27e2016-05-20 11:42:37 +02001248 ? (*(s) = (self)->input_buffer + (self)->next_read_idx, \
1249 (self)->next_read_idx += (n), \
1250 (n)) \
1251 : _Unpickler_ReadImpl(self, (s), (n)))
1252
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001253static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001254_Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1255 char **result)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001256{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001257 char *input_line = PyMem_Realloc(self->input_line, len + 1);
Victor Stinner42024562013-07-12 00:53:57 +02001258 if (input_line == NULL) {
1259 PyErr_NoMemory();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001260 return -1;
Victor Stinner42024562013-07-12 00:53:57 +02001261 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001262
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001263 memcpy(input_line, line, len);
1264 input_line[len] = '\0';
1265 self->input_line = input_line;
1266 *result = self->input_line;
1267 return len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001268}
1269
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001270/* Read a line from the input stream/buffer. If we run off the end of the input
1271 before hitting \n, return the data we found.
1272
1273 Returns the number of chars read, or -1 on failure. */
1274static Py_ssize_t
1275_Unpickler_Readline(UnpicklerObject *self, char **result)
1276{
1277 Py_ssize_t i, num_read;
1278
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001279 for (i = self->next_read_idx; i < self->input_len; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001280 if (self->input_buffer[i] == '\n') {
1281 char *line_start = self->input_buffer + self->next_read_idx;
1282 num_read = i - self->next_read_idx + 1;
1283 self->next_read_idx = i + 1;
1284 return _Unpickler_CopyLine(self, line_start, num_read, result);
1285 }
1286 }
1287 if (self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001288 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1289 if (num_read < 0)
1290 return -1;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001291 self->next_read_idx = num_read;
Antoine Pitrouf6c7a852011-08-11 21:04:02 +02001292 return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001293 }
Victor Stinner121aab42011-09-29 23:40:53 +02001294
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001295 /* If we get here, we've run off the end of the input string. Return the
1296 remaining string and let the caller figure it out. */
1297 *result = self->input_buffer + self->next_read_idx;
1298 num_read = i - self->next_read_idx;
1299 self->next_read_idx = i;
1300 return num_read;
1301}
1302
1303/* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1304 will be modified in place. */
1305static int
1306_Unpickler_ResizeMemoList(UnpicklerObject *self, Py_ssize_t new_size)
1307{
1308 Py_ssize_t i;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001309
1310 assert(new_size > self->memo_size);
1311
Benjamin Peterson59b08c12015-06-27 13:41:33 -05001312 PyMem_RESIZE(self->memo, PyObject *, new_size);
1313 if (self->memo == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001314 PyErr_NoMemory();
1315 return -1;
1316 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001317 for (i = self->memo_size; i < new_size; i++)
1318 self->memo[i] = NULL;
1319 self->memo_size = new_size;
1320 return 0;
1321}
1322
1323/* Returns NULL if idx is out of bounds. */
1324static PyObject *
1325_Unpickler_MemoGet(UnpicklerObject *self, Py_ssize_t idx)
1326{
1327 if (idx < 0 || idx >= self->memo_size)
1328 return NULL;
1329
1330 return self->memo[idx];
1331}
1332
1333/* Returns -1 (with an exception set) on failure, 0 on success.
1334 This takes its own reference to `value`. */
1335static int
1336_Unpickler_MemoPut(UnpicklerObject *self, Py_ssize_t idx, PyObject *value)
1337{
1338 PyObject *old_item;
1339
1340 if (idx >= self->memo_size) {
1341 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1342 return -1;
1343 assert(idx < self->memo_size);
1344 }
1345 Py_INCREF(value);
1346 old_item = self->memo[idx];
1347 self->memo[idx] = value;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001348 if (old_item != NULL) {
1349 Py_DECREF(old_item);
1350 }
1351 else {
1352 self->memo_len++;
1353 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001354 return 0;
1355}
1356
1357static PyObject **
1358_Unpickler_NewMemo(Py_ssize_t new_size)
1359{
Benjamin Peterson59b08c12015-06-27 13:41:33 -05001360 PyObject **memo = PyMem_NEW(PyObject *, new_size);
Victor Stinner42024562013-07-12 00:53:57 +02001361 if (memo == NULL) {
1362 PyErr_NoMemory();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001363 return NULL;
Victor Stinner42024562013-07-12 00:53:57 +02001364 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001365 memset(memo, 0, new_size * sizeof(PyObject *));
1366 return memo;
1367}
1368
1369/* Free the unpickler's memo, taking care to decref any items left in it. */
1370static void
1371_Unpickler_MemoCleanup(UnpicklerObject *self)
1372{
1373 Py_ssize_t i;
1374 PyObject **memo = self->memo;
1375
1376 if (self->memo == NULL)
1377 return;
1378 self->memo = NULL;
1379 i = self->memo_size;
1380 while (--i >= 0) {
1381 Py_XDECREF(memo[i]);
1382 }
1383 PyMem_FREE(memo);
1384}
1385
1386static UnpicklerObject *
1387_Unpickler_New(void)
1388{
1389 UnpicklerObject *self;
1390
1391 self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1392 if (self == NULL)
1393 return NULL;
1394
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001395 self->pers_func = NULL;
1396 self->input_buffer = NULL;
1397 self->input_line = NULL;
1398 self->input_len = 0;
1399 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001400 self->prefetched_idx = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001401 self->read = NULL;
1402 self->readline = NULL;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001403 self->peek = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001404 self->encoding = NULL;
1405 self->errors = NULL;
1406 self->marks = NULL;
1407 self->num_marks = 0;
1408 self->marks_size = 0;
1409 self->proto = 0;
1410 self->fix_imports = 0;
Victor Stinner68c8ea22013-07-11 22:56:25 +02001411 memset(&self->buffer, 0, sizeof(Py_buffer));
1412 self->memo_size = 32;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001413 self->memo_len = 0;
Victor Stinner68c8ea22013-07-11 22:56:25 +02001414 self->memo = _Unpickler_NewMemo(self->memo_size);
1415 self->stack = (Pdata *)Pdata_New();
1416
1417 if (self->memo == NULL || self->stack == NULL) {
1418 Py_DECREF(self);
1419 return NULL;
1420 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001421
1422 return self;
1423}
1424
1425/* Returns -1 (with an exception set) on failure, 0 on success. This may
1426 be called once on a freshly created Pickler. */
1427static int
1428_Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1429{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001430 _Py_IDENTIFIER(peek);
1431 _Py_IDENTIFIER(read);
1432 _Py_IDENTIFIER(readline);
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001433
1434 self->peek = _PyObject_GetAttrId(file, &PyId_peek);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001435 if (self->peek == NULL) {
1436 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1437 PyErr_Clear();
1438 else
1439 return -1;
1440 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001441 self->read = _PyObject_GetAttrId(file, &PyId_read);
1442 self->readline = _PyObject_GetAttrId(file, &PyId_readline);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001443 if (self->readline == NULL || self->read == NULL) {
1444 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1445 PyErr_SetString(PyExc_TypeError,
1446 "file must have 'read' and 'readline' attributes");
1447 Py_CLEAR(self->read);
1448 Py_CLEAR(self->readline);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001449 Py_CLEAR(self->peek);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001450 return -1;
1451 }
1452 return 0;
1453}
1454
1455/* Returns -1 (with an exception set) on failure, 0 on success. This may
1456 be called once on a freshly created Pickler. */
1457static int
1458_Unpickler_SetInputEncoding(UnpicklerObject *self,
1459 const char *encoding,
1460 const char *errors)
1461{
1462 if (encoding == NULL)
1463 encoding = "ASCII";
1464 if (errors == NULL)
1465 errors = "strict";
1466
Victor Stinner49fc8ec2013-07-07 23:30:24 +02001467 self->encoding = _PyMem_Strdup(encoding);
1468 self->errors = _PyMem_Strdup(errors);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001469 if (self->encoding == NULL || self->errors == NULL) {
1470 PyErr_NoMemory();
1471 return -1;
1472 }
1473 return 0;
1474}
1475
1476/* Generate a GET opcode for an object stored in the memo. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001477static int
1478memo_get(PicklerObject *self, PyObject *key)
1479{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001480 Py_ssize_t *value;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001481 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001482 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001483
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001484 value = PyMemoTable_Get(self->memo, key);
1485 if (value == NULL) {
1486 PyErr_SetObject(PyExc_KeyError, key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001487 return -1;
1488 }
1489
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001490 if (!self->bin) {
1491 pdata[0] = GET;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001492 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1493 "%" PY_FORMAT_SIZE_T "d\n", *value);
1494 len = strlen(pdata);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001495 }
1496 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001497 if (*value < 256) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001498 pdata[0] = BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001499 pdata[1] = (unsigned char)(*value & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001500 len = 2;
1501 }
Serhiy Storchaka67c719b2014-09-05 10:10:23 +03001502 else if ((size_t)*value <= 0xffffffffUL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001503 pdata[0] = LONG_BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001504 pdata[1] = (unsigned char)(*value & 0xff);
1505 pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1506 pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1507 pdata[4] = (unsigned char)((*value >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001508 len = 5;
1509 }
1510 else { /* unlikely */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08001511 PickleState *st = _Pickle_GetGlobalState();
1512 PyErr_SetString(st->PicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001513 "memo id too large for LONG_BINGET");
1514 return -1;
1515 }
1516 }
1517
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001518 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001519 return -1;
1520
1521 return 0;
1522}
1523
1524/* Store an object in the memo, assign it a new unique ID based on the number
1525 of objects currently stored in the memo and generate a PUT opcode. */
1526static int
1527memo_put(PicklerObject *self, PyObject *obj)
1528{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001529 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001530 Py_ssize_t len;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001531 Py_ssize_t idx;
1532
1533 const char memoize_op = MEMOIZE;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001534
1535 if (self->fast)
1536 return 0;
1537
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001538 idx = PyMemoTable_Size(self->memo);
1539 if (PyMemoTable_Set(self->memo, obj, idx) < 0)
1540 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001541
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001542 if (self->proto >= 4) {
1543 if (_Pickler_Write(self, &memoize_op, 1) < 0)
1544 return -1;
1545 return 0;
1546 }
1547 else if (!self->bin) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001548 pdata[0] = PUT;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001549 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001550 "%" PY_FORMAT_SIZE_T "d\n", idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001551 len = strlen(pdata);
1552 }
1553 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001554 if (idx < 256) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001555 pdata[0] = BINPUT;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001556 pdata[1] = (unsigned char)idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001557 len = 2;
1558 }
Serhiy Storchaka67c719b2014-09-05 10:10:23 +03001559 else if ((size_t)idx <= 0xffffffffUL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001560 pdata[0] = LONG_BINPUT;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001561 pdata[1] = (unsigned char)(idx & 0xff);
1562 pdata[2] = (unsigned char)((idx >> 8) & 0xff);
1563 pdata[3] = (unsigned char)((idx >> 16) & 0xff);
1564 pdata[4] = (unsigned char)((idx >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001565 len = 5;
1566 }
1567 else { /* unlikely */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08001568 PickleState *st = _Pickle_GetGlobalState();
1569 PyErr_SetString(st->PicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001570 "memo id too large for LONG_BINPUT");
1571 return -1;
1572 }
1573 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001574 if (_Pickler_Write(self, pdata, len) < 0)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001575 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001576
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001577 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001578}
1579
1580static PyObject *
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001581get_dotted_path(PyObject *obj, PyObject *name) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001582 _Py_static_string(PyId_dot, ".");
1583 _Py_static_string(PyId_locals, "<locals>");
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001584 PyObject *dotted_path;
1585 Py_ssize_t i, n;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001586
1587 dotted_path = PyUnicode_Split(name, _PyUnicode_FromId(&PyId_dot), -1);
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001588 if (dotted_path == NULL)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001589 return NULL;
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001590 n = PyList_GET_SIZE(dotted_path);
1591 assert(n >= 1);
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001592 for (i = 0; i < n; i++) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001593 PyObject *subpath = PyList_GET_ITEM(dotted_path, i);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001594 PyObject *result = PyUnicode_RichCompare(
1595 subpath, _PyUnicode_FromId(&PyId_locals), Py_EQ);
1596 int is_equal = (result == Py_True);
1597 assert(PyBool_Check(result));
1598 Py_DECREF(result);
1599 if (is_equal) {
Antoine Pitrou6cd5eda2014-12-02 00:20:03 +01001600 if (obj == NULL)
1601 PyErr_Format(PyExc_AttributeError,
1602 "Can't pickle local object %R", name);
1603 else
1604 PyErr_Format(PyExc_AttributeError,
1605 "Can't pickle local attribute %R on %R", name, obj);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001606 Py_DECREF(dotted_path);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001607 return NULL;
1608 }
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001609 }
1610 return dotted_path;
1611}
1612
1613static PyObject *
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001614get_deep_attribute(PyObject *obj, PyObject *names, PyObject **pparent)
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001615{
1616 Py_ssize_t i, n;
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001617 PyObject *parent = NULL;
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001618
1619 assert(PyList_CheckExact(names));
1620 Py_INCREF(obj);
1621 n = PyList_GET_SIZE(names);
1622 for (i = 0; i < n; i++) {
1623 PyObject *name = PyList_GET_ITEM(names, i);
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001624 Py_XDECREF(parent);
1625 parent = obj;
1626 obj = PyObject_GetAttr(parent, name);
1627 if (obj == NULL) {
1628 Py_DECREF(parent);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001629 return NULL;
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001630 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001631 }
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001632 if (pparent != NULL)
1633 *pparent = parent;
1634 else
1635 Py_XDECREF(parent);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001636 return obj;
1637}
1638
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001639static void
1640reformat_attribute_error(PyObject *obj, PyObject *name)
1641{
1642 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
1643 PyErr_Clear();
1644 PyErr_Format(PyExc_AttributeError,
1645 "Can't get attribute %R on %R", name, obj);
1646 }
1647}
1648
1649
1650static PyObject *
1651getattribute(PyObject *obj, PyObject *name, int allow_qualname)
1652{
1653 PyObject *dotted_path, *attr;
1654
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001655 if (allow_qualname) {
1656 dotted_path = get_dotted_path(obj, name);
1657 if (dotted_path == NULL)
1658 return NULL;
1659 attr = get_deep_attribute(obj, dotted_path, NULL);
1660 Py_DECREF(dotted_path);
1661 }
1662 else
1663 attr = PyObject_GetAttr(obj, name);
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001664 if (attr == NULL)
1665 reformat_attribute_error(obj, name);
1666 return attr;
1667}
1668
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001669static PyObject *
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001670whichmodule(PyObject *global, PyObject *dotted_path)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001671{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001672 PyObject *module_name;
1673 PyObject *modules_dict;
1674 PyObject *module;
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001675 Py_ssize_t i;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001676 _Py_IDENTIFIER(__module__);
1677 _Py_IDENTIFIER(modules);
1678 _Py_IDENTIFIER(__main__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001679
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001680 module_name = _PyObject_GetAttrId(global, &PyId___module__);
1681
1682 if (module_name == NULL) {
1683 if (!PyErr_ExceptionMatches(PyExc_AttributeError))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001684 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001685 PyErr_Clear();
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001686 }
1687 else {
1688 /* In some rare cases (e.g., bound methods of extension types),
1689 __module__ can be None. If it is so, then search sys.modules for
1690 the module of global. */
1691 if (module_name != Py_None)
1692 return module_name;
1693 Py_CLEAR(module_name);
1694 }
1695 assert(module_name == NULL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001696
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001697 /* Fallback on walking sys.modules */
Victor Stinnerbb520202013-11-06 22:40:41 +01001698 modules_dict = _PySys_GetObjectId(&PyId_modules);
Victor Stinner1e53bba2013-07-16 22:26:05 +02001699 if (modules_dict == NULL) {
1700 PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001701 return NULL;
Victor Stinner1e53bba2013-07-16 22:26:05 +02001702 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001703
1704 i = 0;
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001705 while (PyDict_Next(modules_dict, &i, &module_name, &module)) {
1706 PyObject *candidate;
1707 if (PyUnicode_Check(module_name) &&
1708 !PyUnicode_CompareWithASCIIString(module_name, "__main__"))
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001709 continue;
1710 if (module == Py_None)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001711 continue;
1712
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001713 candidate = get_deep_attribute(module, dotted_path, NULL);
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001714 if (candidate == NULL) {
Serhiy Storchaka58e41342015-03-31 14:07:24 +03001715 if (!PyErr_ExceptionMatches(PyExc_AttributeError))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001716 return NULL;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001717 PyErr_Clear();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001718 continue;
1719 }
1720
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001721 if (candidate == global) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001722 Py_INCREF(module_name);
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001723 Py_DECREF(candidate);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001724 return module_name;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001725 }
Antoine Pitroufce60ea2014-10-23 22:47:50 +02001726 Py_DECREF(candidate);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001727 }
1728
1729 /* If no module is found, use __main__. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001730 module_name = _PyUnicode_FromId(&PyId___main__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001731 Py_INCREF(module_name);
1732 return module_name;
1733}
1734
1735/* fast_save_enter() and fast_save_leave() are guards against recursive
1736 objects when Pickler is used with the "fast mode" (i.e., with object
1737 memoization disabled). If the nesting of a list or dict object exceed
1738 FAST_NESTING_LIMIT, these guards will start keeping an internal
1739 reference to the seen list or dict objects and check whether these objects
1740 are recursive. These are not strictly necessary, since save() has a
1741 hard-coded recursion limit, but they give a nicer error message than the
1742 typical RuntimeError. */
1743static int
1744fast_save_enter(PicklerObject *self, PyObject *obj)
1745{
1746 /* if fast_nesting < 0, we're doing an error exit. */
1747 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1748 PyObject *key = NULL;
1749 if (self->fast_memo == NULL) {
1750 self->fast_memo = PyDict_New();
1751 if (self->fast_memo == NULL) {
1752 self->fast_nesting = -1;
1753 return 0;
1754 }
1755 }
1756 key = PyLong_FromVoidPtr(obj);
1757 if (key == NULL)
1758 return 0;
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08001759 if (PyDict_GetItemWithError(self->fast_memo, key)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001760 Py_DECREF(key);
1761 PyErr_Format(PyExc_ValueError,
1762 "fast mode: can't pickle cyclic objects "
1763 "including object type %.200s at %p",
1764 obj->ob_type->tp_name, obj);
1765 self->fast_nesting = -1;
1766 return 0;
1767 }
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08001768 if (PyErr_Occurred()) {
1769 return 0;
1770 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001771 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
1772 Py_DECREF(key);
1773 self->fast_nesting = -1;
1774 return 0;
1775 }
1776 Py_DECREF(key);
1777 }
1778 return 1;
1779}
1780
1781static int
1782fast_save_leave(PicklerObject *self, PyObject *obj)
1783{
1784 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
1785 PyObject *key = PyLong_FromVoidPtr(obj);
1786 if (key == NULL)
1787 return 0;
1788 if (PyDict_DelItem(self->fast_memo, key) < 0) {
1789 Py_DECREF(key);
1790 return 0;
1791 }
1792 Py_DECREF(key);
1793 }
1794 return 1;
1795}
1796
1797static int
1798save_none(PicklerObject *self, PyObject *obj)
1799{
1800 const char none_op = NONE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001801 if (_Pickler_Write(self, &none_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001802 return -1;
1803
1804 return 0;
1805}
1806
1807static int
1808save_bool(PicklerObject *self, PyObject *obj)
1809{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001810 if (self->proto >= 2) {
Alexandre Vassalotti8a67f522013-11-24 21:40:18 -08001811 const char bool_op = (obj == Py_True) ? NEWTRUE : NEWFALSE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001812 if (_Pickler_Write(self, &bool_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001813 return -1;
1814 }
Alexandre Vassalotti8a67f522013-11-24 21:40:18 -08001815 else {
1816 /* These aren't opcodes -- they're ways to pickle bools before protocol 2
1817 * so that unpicklers written before bools were introduced unpickle them
1818 * as ints, but unpicklers after can recognize that bools were intended.
1819 * Note that protocol 2 added direct ways to pickle bools.
1820 */
1821 const char *bool_str = (obj == Py_True) ? "I01\n" : "I00\n";
1822 if (_Pickler_Write(self, bool_str, strlen(bool_str)) < 0)
1823 return -1;
1824 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001825 return 0;
1826}
1827
1828static int
Alexandre Vassalottided929b2013-11-24 22:41:13 -08001829save_long(PicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001830{
Alexandre Vassalottided929b2013-11-24 22:41:13 -08001831 PyObject *repr = NULL;
1832 Py_ssize_t size;
1833 long val;
1834 int status = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001835
Alexandre Vassalottided929b2013-11-24 22:41:13 -08001836 const char long_op = LONG;
1837
1838 val= PyLong_AsLong(obj);
1839 if (val == -1 && PyErr_Occurred()) {
1840 /* out of range for int pickling */
1841 PyErr_Clear();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001842 }
Alexandre Vassalottided929b2013-11-24 22:41:13 -08001843 else if (self->bin &&
1844 (sizeof(long) <= 4 ||
Alexandre Vassalotti1048fb52013-11-25 11:35:46 -08001845 (val <= 0x7fffffffL && val >= (-0x7fffffffL - 1)))) {
Larry Hastings61272b72014-01-07 12:41:53 -08001846 /* result fits in a signed 4-byte integer.
Alexandre Vassalotti1048fb52013-11-25 11:35:46 -08001847
1848 Note: we can't use -0x80000000L in the above condition because some
1849 compilers (e.g., MSVC) will promote 0x80000000L to an unsigned type
1850 before applying the unary minus when sizeof(long) <= 4. The
1851 resulting value stays unsigned which is commonly not what we want,
1852 so MSVC happily warns us about it. However, that result would have
1853 been fine because we guard for sizeof(long) <= 4 which turns the
1854 condition true in that particular case. */
Alexandre Vassalottided929b2013-11-24 22:41:13 -08001855 char pdata[32];
1856 Py_ssize_t len = 0;
1857
1858 pdata[1] = (unsigned char)(val & 0xff);
1859 pdata[2] = (unsigned char)((val >> 8) & 0xff);
1860 pdata[3] = (unsigned char)((val >> 16) & 0xff);
1861 pdata[4] = (unsigned char)((val >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001862
1863 if ((pdata[4] == 0) && (pdata[3] == 0)) {
1864 if (pdata[2] == 0) {
1865 pdata[0] = BININT1;
1866 len = 2;
1867 }
1868 else {
1869 pdata[0] = BININT2;
1870 len = 3;
1871 }
1872 }
1873 else {
1874 pdata[0] = BININT;
1875 len = 5;
1876 }
1877
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001878 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001879 return -1;
Alexandre Vassalottided929b2013-11-24 22:41:13 -08001880
1881 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001882 }
1883
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001884 if (self->proto >= 2) {
1885 /* Linear-time pickling. */
1886 size_t nbits;
1887 size_t nbytes;
1888 unsigned char *pdata;
1889 char header[5];
1890 int i;
1891 int sign = _PyLong_Sign(obj);
1892
1893 if (sign == 0) {
1894 header[0] = LONG1;
1895 header[1] = 0; /* It's 0 -- an empty bytestring. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001896 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001897 goto error;
1898 return 0;
1899 }
1900 nbits = _PyLong_NumBits(obj);
1901 if (nbits == (size_t)-1 && PyErr_Occurred())
1902 goto error;
1903 /* How many bytes do we need? There are nbits >> 3 full
1904 * bytes of data, and nbits & 7 leftover bits. If there
1905 * are any leftover bits, then we clearly need another
1906 * byte. Wnat's not so obvious is that we *probably*
1907 * need another byte even if there aren't any leftovers:
1908 * the most-significant bit of the most-significant byte
1909 * acts like a sign bit, and it's usually got a sense
Serhiy Storchaka95949422013-08-27 19:40:23 +03001910 * opposite of the one we need. The exception is ints
1911 * of the form -(2**(8*j-1)) for j > 0. Such an int is
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001912 * its own 256's-complement, so has the right sign bit
1913 * even without the extra byte. That's a pain to check
1914 * for in advance, though, so we always grab an extra
1915 * byte at the start, and cut it back later if possible.
1916 */
1917 nbytes = (nbits >> 3) + 1;
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01001918 if (nbytes > 0x7fffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001919 PyErr_SetString(PyExc_OverflowError,
Serhiy Storchaka95949422013-08-27 19:40:23 +03001920 "int too large to pickle");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001921 goto error;
1922 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001923 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001924 if (repr == NULL)
1925 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001926 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001927 i = _PyLong_AsByteArray((PyLongObject *)obj,
1928 pdata, nbytes,
1929 1 /* little endian */ , 1 /* signed */ );
1930 if (i < 0)
1931 goto error;
Serhiy Storchaka95949422013-08-27 19:40:23 +03001932 /* If the int is negative, this may be a byte more than
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001933 * needed. This is so iff the MSB is all redundant sign
1934 * bits.
1935 */
1936 if (sign < 0 &&
Victor Stinner121aab42011-09-29 23:40:53 +02001937 nbytes > 1 &&
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001938 pdata[nbytes - 1] == 0xff &&
1939 (pdata[nbytes - 2] & 0x80) != 0) {
1940 nbytes--;
1941 }
1942
1943 if (nbytes < 256) {
1944 header[0] = LONG1;
1945 header[1] = (unsigned char)nbytes;
1946 size = 2;
1947 }
1948 else {
1949 header[0] = LONG4;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001950 size = (Py_ssize_t) nbytes;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001951 for (i = 1; i < 5; i++) {
1952 header[i] = (unsigned char)(size & 0xff);
1953 size >>= 8;
1954 }
1955 size = 5;
1956 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001957 if (_Pickler_Write(self, header, size) < 0 ||
1958 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001959 goto error;
1960 }
1961 else {
1962 char *string;
1963
Mark Dickinson8dd05142009-01-20 20:43:58 +00001964 /* proto < 2: write the repr and newline. This is quadratic-time (in
1965 the number of digits), in both directions. We add a trailing 'L'
1966 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001967
1968 repr = PyObject_Repr(obj);
1969 if (repr == NULL)
1970 goto error;
1971
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001972 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001973 if (string == NULL)
1974 goto error;
1975
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001976 if (_Pickler_Write(self, &long_op, 1) < 0 ||
1977 _Pickler_Write(self, string, size) < 0 ||
1978 _Pickler_Write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001979 goto error;
1980 }
1981
1982 if (0) {
1983 error:
1984 status = -1;
1985 }
1986 Py_XDECREF(repr);
1987
1988 return status;
1989}
1990
1991static int
1992save_float(PicklerObject *self, PyObject *obj)
1993{
1994 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1995
1996 if (self->bin) {
1997 char pdata[9];
1998 pdata[0] = BINFLOAT;
1999 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
2000 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002001 if (_Pickler_Write(self, pdata, 9) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002002 return -1;
Victor Stinner121aab42011-09-29 23:40:53 +02002003 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002004 else {
Eric Smith0923d1d2009-04-16 20:16:10 +00002005 int result = -1;
2006 char *buf = NULL;
2007 char op = FLOAT;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002008
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002009 if (_Pickler_Write(self, &op, 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00002010 goto done;
2011
Serhiy Storchakac86ca262015-02-15 14:18:32 +02002012 buf = PyOS_double_to_string(x, 'r', 0, Py_DTSF_ADD_DOT_0, NULL);
Eric Smith0923d1d2009-04-16 20:16:10 +00002013 if (!buf) {
2014 PyErr_NoMemory();
2015 goto done;
2016 }
2017
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002018 if (_Pickler_Write(self, buf, strlen(buf)) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00002019 goto done;
2020
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002021 if (_Pickler_Write(self, "\n", 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00002022 goto done;
2023
2024 result = 0;
2025done:
2026 PyMem_Free(buf);
2027 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002028 }
2029
2030 return 0;
2031}
2032
2033static int
2034save_bytes(PicklerObject *self, PyObject *obj)
2035{
2036 if (self->proto < 3) {
2037 /* Older pickle protocols do not have an opcode for pickling bytes
2038 objects. Therefore, we need to fake the copy protocol (i.e.,
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05002039 the __reduce__ method) to permit bytes object unpickling.
2040
2041 Here we use a hack to be compatible with Python 2. Since in Python
2042 2 'bytes' is just an alias for 'str' (which has different
2043 parameters than the actual bytes object), we use codecs.encode
2044 to create the appropriate 'str' object when unpickled using
2045 Python 2 *and* the appropriate 'bytes' object when unpickled
2046 using Python 3. Again this is a hack and we don't need to do this
2047 with newer protocols. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002048 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002049 int status;
2050
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05002051 if (PyBytes_GET_SIZE(obj) == 0) {
2052 reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
2053 }
2054 else {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08002055 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05002056 PyObject *unicode_str =
2057 PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
2058 PyBytes_GET_SIZE(obj),
2059 "strict");
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002060 _Py_IDENTIFIER(latin1);
2061
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05002062 if (unicode_str == NULL)
2063 return -1;
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05002064 reduce_value = Py_BuildValue("(O(OO))",
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08002065 st->codecs_encode, unicode_str,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002066 _PyUnicode_FromId(&PyId_latin1));
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05002067 Py_DECREF(unicode_str);
2068 }
2069
2070 if (reduce_value == NULL)
2071 return -1;
2072
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002073 /* save_reduce() will memoize the object automatically. */
2074 status = save_reduce(self, reduce_value, obj);
2075 Py_DECREF(reduce_value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002076 return status;
2077 }
2078 else {
2079 Py_ssize_t size;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002080 char header[9];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002081 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002082
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05002083 size = PyBytes_GET_SIZE(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002084 if (size < 0)
2085 return -1;
2086
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002087 if (size <= 0xff) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002088 header[0] = SHORT_BINBYTES;
2089 header[1] = (unsigned char)size;
2090 len = 2;
2091 }
Serhiy Storchaka67c719b2014-09-05 10:10:23 +03002092 else if ((size_t)size <= 0xffffffffUL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002093 header[0] = BINBYTES;
2094 header[1] = (unsigned char)(size & 0xff);
2095 header[2] = (unsigned char)((size >> 8) & 0xff);
2096 header[3] = (unsigned char)((size >> 16) & 0xff);
2097 header[4] = (unsigned char)((size >> 24) & 0xff);
2098 len = 5;
2099 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002100 else if (self->proto >= 4) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002101 header[0] = BINBYTES8;
Alexandre Vassalotti1048fb52013-11-25 11:35:46 -08002102 _write_size64(header + 1, size);
Alexandre Vassalotti6e73ff12013-12-05 19:29:32 -08002103 len = 9;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002104 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002105 else {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002106 PyErr_SetString(PyExc_OverflowError,
Serhiy Storchakaf8def282013-02-16 17:29:56 +02002107 "cannot serialize a bytes object larger than 4 GiB");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002108 return -1; /* string too large */
2109 }
2110
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002111 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002112 return -1;
2113
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002114 if (_Pickler_Write(self, PyBytes_AS_STRING(obj), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002115 return -1;
2116
2117 if (memo_put(self, obj) < 0)
2118 return -1;
2119
2120 return 0;
2121 }
2122}
2123
2124/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
2125 backslash and newline characters to \uXXXX escapes. */
2126static PyObject *
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002127raw_unicode_escape(PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002128{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002129 char *p;
Victor Stinner049e5092014-08-17 22:20:00 +02002130 Py_ssize_t i, size;
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002131 void *data;
2132 unsigned int kind;
Victor Stinner358af132015-10-12 22:36:57 +02002133 _PyBytesWriter writer;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002134
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002135 if (PyUnicode_READY(obj))
2136 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002137
Victor Stinner358af132015-10-12 22:36:57 +02002138 _PyBytesWriter_Init(&writer);
2139
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002140 size = PyUnicode_GET_LENGTH(obj);
2141 data = PyUnicode_DATA(obj);
2142 kind = PyUnicode_KIND(obj);
Victor Stinner121aab42011-09-29 23:40:53 +02002143
Victor Stinner358af132015-10-12 22:36:57 +02002144 p = _PyBytesWriter_Alloc(&writer, size);
2145 if (p == NULL)
2146 goto error;
2147 writer.overallocate = 1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002148
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002149 for (i=0; i < size; i++) {
2150 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002151 /* Map 32-bit characters to '\Uxxxxxxxx' */
2152 if (ch >= 0x10000) {
Victor Stinner358af132015-10-12 22:36:57 +02002153 /* -1: substract 1 preallocated byte */
2154 p = _PyBytesWriter_Prepare(&writer, p, 10-1);
2155 if (p == NULL)
2156 goto error;
2157
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002158 *p++ = '\\';
2159 *p++ = 'U';
Victor Stinnerf5cff562011-10-14 02:13:11 +02002160 *p++ = Py_hexdigits[(ch >> 28) & 0xf];
2161 *p++ = Py_hexdigits[(ch >> 24) & 0xf];
2162 *p++ = Py_hexdigits[(ch >> 20) & 0xf];
2163 *p++ = Py_hexdigits[(ch >> 16) & 0xf];
2164 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2165 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2166 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2167 *p++ = Py_hexdigits[ch & 15];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002168 }
Victor Stinner358af132015-10-12 22:36:57 +02002169 /* Map 16-bit characters, '\\' and '\n' to '\uxxxx' */
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002170 else if (ch >= 256 || ch == '\\' || ch == '\n') {
Victor Stinner358af132015-10-12 22:36:57 +02002171 /* -1: substract 1 preallocated byte */
2172 p = _PyBytesWriter_Prepare(&writer, p, 6-1);
2173 if (p == NULL)
2174 goto error;
2175
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002176 *p++ = '\\';
2177 *p++ = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +02002178 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2179 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2180 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2181 *p++ = Py_hexdigits[ch & 15];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002182 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00002183 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002184 else
2185 *p++ = (char) ch;
2186 }
Victor Stinner358af132015-10-12 22:36:57 +02002187
2188 return _PyBytesWriter_Finish(&writer, p);
2189
2190error:
2191 _PyBytesWriter_Dealloc(&writer);
2192 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002193}
2194
2195static int
Serhiy Storchakaef1585e2015-12-25 20:01:53 +02002196write_utf8(PicklerObject *self, const char *data, Py_ssize_t size)
Antoine Pitrou299978d2013-04-07 17:38:11 +02002197{
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002198 char header[9];
2199 Py_ssize_t len;
Antoine Pitrou299978d2013-04-07 17:38:11 +02002200
Victor Stinnerf13c46c2014-08-17 21:05:55 +02002201 assert(size >= 0);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002202 if (size <= 0xff && self->proto >= 4) {
2203 header[0] = SHORT_BINUNICODE;
2204 header[1] = (unsigned char)(size & 0xff);
2205 len = 2;
2206 }
Victor Stinnerf13c46c2014-08-17 21:05:55 +02002207 else if ((size_t)size <= 0xffffffffUL) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002208 header[0] = BINUNICODE;
2209 header[1] = (unsigned char)(size & 0xff);
2210 header[2] = (unsigned char)((size >> 8) & 0xff);
2211 header[3] = (unsigned char)((size >> 16) & 0xff);
2212 header[4] = (unsigned char)((size >> 24) & 0xff);
2213 len = 5;
2214 }
2215 else if (self->proto >= 4) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002216 header[0] = BINUNICODE8;
Alexandre Vassalotti1048fb52013-11-25 11:35:46 -08002217 _write_size64(header + 1, size);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002218 len = 9;
2219 }
2220 else {
Antoine Pitrou299978d2013-04-07 17:38:11 +02002221 PyErr_SetString(PyExc_OverflowError,
Antoine Pitrou4b7b0f02013-04-07 23:46:52 +02002222 "cannot serialize a string larger than 4GiB");
Antoine Pitrou299978d2013-04-07 17:38:11 +02002223 return -1;
2224 }
Antoine Pitrou299978d2013-04-07 17:38:11 +02002225
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002226 if (_Pickler_Write(self, header, len) < 0)
Antoine Pitrou299978d2013-04-07 17:38:11 +02002227 return -1;
Antoine Pitrou299978d2013-04-07 17:38:11 +02002228 if (_Pickler_Write(self, data, size) < 0)
2229 return -1;
2230
2231 return 0;
2232}
2233
2234static int
2235write_unicode_binary(PicklerObject *self, PyObject *obj)
2236{
2237 PyObject *encoded = NULL;
2238 Py_ssize_t size;
2239 char *data;
2240 int r;
2241
2242 if (PyUnicode_READY(obj))
2243 return -1;
2244
2245 data = PyUnicode_AsUTF8AndSize(obj, &size);
2246 if (data != NULL)
2247 return write_utf8(self, data, size);
2248
2249 /* Issue #8383: for strings with lone surrogates, fallback on the
2250 "surrogatepass" error handler. */
2251 PyErr_Clear();
2252 encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
2253 if (encoded == NULL)
2254 return -1;
2255
2256 r = write_utf8(self, PyBytes_AS_STRING(encoded),
2257 PyBytes_GET_SIZE(encoded));
2258 Py_DECREF(encoded);
2259 return r;
2260}
2261
2262static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002263save_unicode(PicklerObject *self, PyObject *obj)
2264{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002265 if (self->bin) {
Antoine Pitrou299978d2013-04-07 17:38:11 +02002266 if (write_unicode_binary(self, obj) < 0)
2267 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002268 }
2269 else {
Antoine Pitrou299978d2013-04-07 17:38:11 +02002270 PyObject *encoded;
2271 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002272 const char unicode_op = UNICODE;
2273
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002274 encoded = raw_unicode_escape(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002275 if (encoded == NULL)
Antoine Pitrou299978d2013-04-07 17:38:11 +02002276 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002277
Antoine Pitrou299978d2013-04-07 17:38:11 +02002278 if (_Pickler_Write(self, &unicode_op, 1) < 0) {
2279 Py_DECREF(encoded);
2280 return -1;
2281 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002282
2283 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrou299978d2013-04-07 17:38:11 +02002284 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) {
2285 Py_DECREF(encoded);
2286 return -1;
2287 }
2288 Py_DECREF(encoded);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002289
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002290 if (_Pickler_Write(self, "\n", 1) < 0)
Antoine Pitrou299978d2013-04-07 17:38:11 +02002291 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002292 }
2293 if (memo_put(self, obj) < 0)
Antoine Pitrou299978d2013-04-07 17:38:11 +02002294 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002295
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002296 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002297}
2298
2299/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
2300static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002301store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002302{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002303 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002304
2305 assert(PyTuple_Size(t) == len);
2306
2307 for (i = 0; i < len; i++) {
2308 PyObject *element = PyTuple_GET_ITEM(t, i);
2309
2310 if (element == NULL)
2311 return -1;
2312 if (save(self, element, 0) < 0)
2313 return -1;
2314 }
2315
2316 return 0;
2317}
2318
2319/* Tuples are ubiquitous in the pickle protocols, so many techniques are
2320 * used across protocols to minimize the space needed to pickle them.
2321 * Tuples are also the only builtin immutable type that can be recursive
2322 * (a tuple can be reached from itself), and that requires some subtle
2323 * magic so that it works in all cases. IOW, this is a long routine.
2324 */
2325static int
2326save_tuple(PicklerObject *self, PyObject *obj)
2327{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002328 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002329
2330 const char mark_op = MARK;
2331 const char tuple_op = TUPLE;
2332 const char pop_op = POP;
2333 const char pop_mark_op = POP_MARK;
2334 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
2335
2336 if ((len = PyTuple_Size(obj)) < 0)
2337 return -1;
2338
2339 if (len == 0) {
2340 char pdata[2];
2341
2342 if (self->proto) {
2343 pdata[0] = EMPTY_TUPLE;
2344 len = 1;
2345 }
2346 else {
2347 pdata[0] = MARK;
2348 pdata[1] = TUPLE;
2349 len = 2;
2350 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002351 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002352 return -1;
2353 return 0;
2354 }
2355
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002356 /* The tuple isn't in the memo now. If it shows up there after
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002357 * saving the tuple elements, the tuple must be recursive, in
2358 * which case we'll pop everything we put on the stack, and fetch
2359 * its value from the memo.
2360 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002361 if (len <= 3 && self->proto >= 2) {
2362 /* Use TUPLE{1,2,3} opcodes. */
2363 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002364 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002365
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002366 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002367 /* pop the len elements */
2368 for (i = 0; i < len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002369 if (_Pickler_Write(self, &pop_op, 1) < 0)
2370 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002371 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002372 if (memo_get(self, obj) < 0)
2373 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002374
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002375 return 0;
2376 }
2377 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002378 if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2379 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002380 }
2381 goto memoize;
2382 }
2383
2384 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2385 * Generate MARK e1 e2 ... TUPLE
2386 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002387 if (_Pickler_Write(self, &mark_op, 1) < 0)
2388 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002389
2390 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002391 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002392
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002393 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002394 /* pop the stack stuff we pushed */
2395 if (self->bin) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002396 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2397 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002398 }
2399 else {
2400 /* Note that we pop one more than len, to remove
2401 * the MARK too.
2402 */
2403 for (i = 0; i <= len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002404 if (_Pickler_Write(self, &pop_op, 1) < 0)
2405 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002406 }
2407 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002408 if (memo_get(self, obj) < 0)
2409 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002410
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002411 return 0;
2412 }
2413 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002414 if (_Pickler_Write(self, &tuple_op, 1) < 0)
2415 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002416 }
2417
2418 memoize:
2419 if (memo_put(self, obj) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002420 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002421
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002422 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002423}
2424
2425/* iter is an iterator giving items, and we batch up chunks of
2426 * MARK item item ... item APPENDS
2427 * opcode sequences. Calling code should have arranged to first create an
2428 * empty list, or list-like object, for the APPENDS to operate on.
2429 * Returns 0 on success, <0 on error.
2430 */
2431static int
2432batch_list(PicklerObject *self, PyObject *iter)
2433{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002434 PyObject *obj = NULL;
2435 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002436 int i, n;
2437
2438 const char mark_op = MARK;
2439 const char append_op = APPEND;
2440 const char appends_op = APPENDS;
2441
2442 assert(iter != NULL);
2443
2444 /* XXX: I think this function could be made faster by avoiding the
2445 iterator interface and fetching objects directly from list using
2446 PyList_GET_ITEM.
2447 */
2448
2449 if (self->proto == 0) {
2450 /* APPENDS isn't available; do one at a time. */
2451 for (;;) {
2452 obj = PyIter_Next(iter);
2453 if (obj == NULL) {
2454 if (PyErr_Occurred())
2455 return -1;
2456 break;
2457 }
2458 i = save(self, obj, 0);
2459 Py_DECREF(obj);
2460 if (i < 0)
2461 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002462 if (_Pickler_Write(self, &append_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002463 return -1;
2464 }
2465 return 0;
2466 }
2467
2468 /* proto > 0: write in batches of BATCHSIZE. */
2469 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002470 /* Get first item */
2471 firstitem = PyIter_Next(iter);
2472 if (firstitem == NULL) {
2473 if (PyErr_Occurred())
2474 goto error;
2475
2476 /* nothing more to add */
2477 break;
2478 }
2479
2480 /* Try to get a second item */
2481 obj = PyIter_Next(iter);
2482 if (obj == NULL) {
2483 if (PyErr_Occurred())
2484 goto error;
2485
2486 /* Only one item to write */
2487 if (save(self, firstitem, 0) < 0)
2488 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002489 if (_Pickler_Write(self, &append_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002490 goto error;
2491 Py_CLEAR(firstitem);
2492 break;
2493 }
2494
2495 /* More than one item to write */
2496
2497 /* Pump out MARK, items, APPENDS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002498 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002499 goto error;
2500
2501 if (save(self, firstitem, 0) < 0)
2502 goto error;
2503 Py_CLEAR(firstitem);
2504 n = 1;
2505
2506 /* Fetch and save up to BATCHSIZE items */
2507 while (obj) {
2508 if (save(self, obj, 0) < 0)
2509 goto error;
2510 Py_CLEAR(obj);
2511 n += 1;
2512
2513 if (n == BATCHSIZE)
2514 break;
2515
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002516 obj = PyIter_Next(iter);
2517 if (obj == NULL) {
2518 if (PyErr_Occurred())
2519 goto error;
2520 break;
2521 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002522 }
2523
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002524 if (_Pickler_Write(self, &appends_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002525 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002526
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002527 } while (n == BATCHSIZE);
2528 return 0;
2529
2530 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002531 Py_XDECREF(firstitem);
2532 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002533 return -1;
2534}
2535
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002536/* This is a variant of batch_list() above, specialized for lists (with no
2537 * support for list subclasses). Like batch_list(), we batch up chunks of
2538 * MARK item item ... item APPENDS
2539 * opcode sequences. Calling code should have arranged to first create an
2540 * empty list, or list-like object, for the APPENDS to operate on.
2541 * Returns 0 on success, -1 on error.
2542 *
2543 * This version is considerably faster than batch_list(), if less general.
2544 *
2545 * Note that this only works for protocols > 0.
2546 */
2547static int
2548batch_list_exact(PicklerObject *self, PyObject *obj)
2549{
2550 PyObject *item = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002551 Py_ssize_t this_batch, total;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002552
2553 const char append_op = APPEND;
2554 const char appends_op = APPENDS;
2555 const char mark_op = MARK;
2556
2557 assert(obj != NULL);
2558 assert(self->proto > 0);
2559 assert(PyList_CheckExact(obj));
2560
2561 if (PyList_GET_SIZE(obj) == 1) {
2562 item = PyList_GET_ITEM(obj, 0);
2563 if (save(self, item, 0) < 0)
2564 return -1;
2565 if (_Pickler_Write(self, &append_op, 1) < 0)
2566 return -1;
2567 return 0;
2568 }
2569
2570 /* Write in batches of BATCHSIZE. */
2571 total = 0;
2572 do {
2573 this_batch = 0;
2574 if (_Pickler_Write(self, &mark_op, 1) < 0)
2575 return -1;
2576 while (total < PyList_GET_SIZE(obj)) {
2577 item = PyList_GET_ITEM(obj, total);
2578 if (save(self, item, 0) < 0)
2579 return -1;
2580 total++;
2581 if (++this_batch == BATCHSIZE)
2582 break;
2583 }
2584 if (_Pickler_Write(self, &appends_op, 1) < 0)
2585 return -1;
2586
2587 } while (total < PyList_GET_SIZE(obj));
2588
2589 return 0;
2590}
2591
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002592static int
2593save_list(PicklerObject *self, PyObject *obj)
2594{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002595 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002596 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002597 int status = 0;
2598
2599 if (self->fast && !fast_save_enter(self, obj))
2600 goto error;
2601
2602 /* Create an empty list. */
2603 if (self->bin) {
2604 header[0] = EMPTY_LIST;
2605 len = 1;
2606 }
2607 else {
2608 header[0] = MARK;
2609 header[1] = LIST;
2610 len = 2;
2611 }
2612
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002613 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002614 goto error;
2615
2616 /* Get list length, and bow out early if empty. */
2617 if ((len = PyList_Size(obj)) < 0)
2618 goto error;
2619
2620 if (memo_put(self, obj) < 0)
2621 goto error;
2622
2623 if (len != 0) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002624 /* Materialize the list elements. */
2625 if (PyList_CheckExact(obj) && self->proto > 0) {
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002626 if (Py_EnterRecursiveCall(" while pickling an object"))
2627 goto error;
2628 status = batch_list_exact(self, obj);
2629 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002630 } else {
2631 PyObject *iter = PyObject_GetIter(obj);
2632 if (iter == NULL)
2633 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002634
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002635 if (Py_EnterRecursiveCall(" while pickling an object")) {
2636 Py_DECREF(iter);
2637 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002638 }
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002639 status = batch_list(self, iter);
2640 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002641 Py_DECREF(iter);
2642 }
2643 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002644 if (0) {
2645 error:
2646 status = -1;
2647 }
2648
2649 if (self->fast && !fast_save_leave(self, obj))
2650 status = -1;
2651
2652 return status;
2653}
2654
2655/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
2656 * MARK key value ... key value SETITEMS
2657 * opcode sequences. Calling code should have arranged to first create an
2658 * empty dict, or dict-like object, for the SETITEMS to operate on.
2659 * Returns 0 on success, <0 on error.
2660 *
2661 * This is very much like batch_list(). The difference between saving
2662 * elements directly, and picking apart two-tuples, is so long-winded at
2663 * the C level, though, that attempts to combine these routines were too
2664 * ugly to bear.
2665 */
2666static int
2667batch_dict(PicklerObject *self, PyObject *iter)
2668{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002669 PyObject *obj = NULL;
2670 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002671 int i, n;
2672
2673 const char mark_op = MARK;
2674 const char setitem_op = SETITEM;
2675 const char setitems_op = SETITEMS;
2676
2677 assert(iter != NULL);
2678
2679 if (self->proto == 0) {
2680 /* SETITEMS isn't available; do one at a time. */
2681 for (;;) {
2682 obj = PyIter_Next(iter);
2683 if (obj == NULL) {
2684 if (PyErr_Occurred())
2685 return -1;
2686 break;
2687 }
2688 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2689 PyErr_SetString(PyExc_TypeError, "dict items "
2690 "iterator must return 2-tuples");
2691 return -1;
2692 }
2693 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
2694 if (i >= 0)
2695 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
2696 Py_DECREF(obj);
2697 if (i < 0)
2698 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002699 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002700 return -1;
2701 }
2702 return 0;
2703 }
2704
2705 /* proto > 0: write in batches of BATCHSIZE. */
2706 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002707 /* Get first item */
2708 firstitem = PyIter_Next(iter);
2709 if (firstitem == NULL) {
2710 if (PyErr_Occurred())
2711 goto error;
2712
2713 /* nothing more to add */
2714 break;
2715 }
2716 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
2717 PyErr_SetString(PyExc_TypeError, "dict items "
2718 "iterator must return 2-tuples");
2719 goto error;
2720 }
2721
2722 /* Try to get a second item */
2723 obj = PyIter_Next(iter);
2724 if (obj == NULL) {
2725 if (PyErr_Occurred())
2726 goto error;
2727
2728 /* Only one item to write */
2729 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2730 goto error;
2731 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2732 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002733 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002734 goto error;
2735 Py_CLEAR(firstitem);
2736 break;
2737 }
2738
2739 /* More than one item to write */
2740
2741 /* Pump out MARK, items, SETITEMS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002742 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002743 goto error;
2744
2745 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2746 goto error;
2747 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2748 goto error;
2749 Py_CLEAR(firstitem);
2750 n = 1;
2751
2752 /* Fetch and save up to BATCHSIZE items */
2753 while (obj) {
2754 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2755 PyErr_SetString(PyExc_TypeError, "dict items "
2756 "iterator must return 2-tuples");
2757 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002758 }
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002759 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
2760 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
2761 goto error;
2762 Py_CLEAR(obj);
2763 n += 1;
2764
2765 if (n == BATCHSIZE)
2766 break;
2767
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002768 obj = PyIter_Next(iter);
2769 if (obj == NULL) {
2770 if (PyErr_Occurred())
2771 goto error;
2772 break;
2773 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002774 }
2775
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002776 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002777 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002778
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002779 } while (n == BATCHSIZE);
2780 return 0;
2781
2782 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002783 Py_XDECREF(firstitem);
2784 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002785 return -1;
2786}
2787
Collin Winter5c9b02d2009-05-25 05:43:30 +00002788/* This is a variant of batch_dict() above that specializes for dicts, with no
2789 * support for dict subclasses. Like batch_dict(), we batch up chunks of
2790 * MARK key value ... key value SETITEMS
2791 * opcode sequences. Calling code should have arranged to first create an
2792 * empty dict, or dict-like object, for the SETITEMS to operate on.
2793 * Returns 0 on success, -1 on error.
2794 *
2795 * Note that this currently doesn't work for protocol 0.
2796 */
2797static int
2798batch_dict_exact(PicklerObject *self, PyObject *obj)
2799{
2800 PyObject *key = NULL, *value = NULL;
2801 int i;
2802 Py_ssize_t dict_size, ppos = 0;
2803
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00002804 const char mark_op = MARK;
2805 const char setitem_op = SETITEM;
2806 const char setitems_op = SETITEMS;
Collin Winter5c9b02d2009-05-25 05:43:30 +00002807
2808 assert(obj != NULL);
2809 assert(self->proto > 0);
2810
2811 dict_size = PyDict_Size(obj);
2812
2813 /* Special-case len(d) == 1 to save space. */
2814 if (dict_size == 1) {
2815 PyDict_Next(obj, &ppos, &key, &value);
2816 if (save(self, key, 0) < 0)
2817 return -1;
2818 if (save(self, value, 0) < 0)
2819 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002820 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002821 return -1;
2822 return 0;
2823 }
2824
2825 /* Write in batches of BATCHSIZE. */
2826 do {
2827 i = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002828 if (_Pickler_Write(self, &mark_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002829 return -1;
2830 while (PyDict_Next(obj, &ppos, &key, &value)) {
2831 if (save(self, key, 0) < 0)
2832 return -1;
2833 if (save(self, value, 0) < 0)
2834 return -1;
2835 if (++i == BATCHSIZE)
2836 break;
2837 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002838 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002839 return -1;
2840 if (PyDict_Size(obj) != dict_size) {
2841 PyErr_Format(
2842 PyExc_RuntimeError,
2843 "dictionary changed size during iteration");
2844 return -1;
2845 }
2846
2847 } while (i == BATCHSIZE);
2848 return 0;
2849}
2850
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002851static int
2852save_dict(PicklerObject *self, PyObject *obj)
2853{
2854 PyObject *items, *iter;
2855 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002856 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002857 int status = 0;
2858
2859 if (self->fast && !fast_save_enter(self, obj))
2860 goto error;
2861
2862 /* Create an empty dict. */
2863 if (self->bin) {
2864 header[0] = EMPTY_DICT;
2865 len = 1;
2866 }
2867 else {
2868 header[0] = MARK;
2869 header[1] = DICT;
2870 len = 2;
2871 }
2872
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002873 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002874 goto error;
2875
2876 /* Get dict size, and bow out early if empty. */
2877 if ((len = PyDict_Size(obj)) < 0)
2878 goto error;
2879
2880 if (memo_put(self, obj) < 0)
2881 goto error;
2882
2883 if (len != 0) {
2884 /* Save the dict items. */
Collin Winter5c9b02d2009-05-25 05:43:30 +00002885 if (PyDict_CheckExact(obj) && self->proto > 0) {
2886 /* We can take certain shortcuts if we know this is a dict and
2887 not a dict subclass. */
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002888 if (Py_EnterRecursiveCall(" while pickling an object"))
2889 goto error;
2890 status = batch_dict_exact(self, obj);
2891 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002892 } else {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02002893 _Py_IDENTIFIER(items);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002894
2895 items = _PyObject_CallMethodId(obj, &PyId_items, "()");
Collin Winter5c9b02d2009-05-25 05:43:30 +00002896 if (items == NULL)
2897 goto error;
2898 iter = PyObject_GetIter(items);
2899 Py_DECREF(items);
2900 if (iter == NULL)
2901 goto error;
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002902 if (Py_EnterRecursiveCall(" while pickling an object")) {
2903 Py_DECREF(iter);
2904 goto error;
2905 }
Collin Winter5c9b02d2009-05-25 05:43:30 +00002906 status = batch_dict(self, iter);
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002907 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002908 Py_DECREF(iter);
2909 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002910 }
2911
2912 if (0) {
2913 error:
2914 status = -1;
2915 }
2916
2917 if (self->fast && !fast_save_leave(self, obj))
2918 status = -1;
2919
2920 return status;
2921}
2922
2923static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002924save_set(PicklerObject *self, PyObject *obj)
2925{
2926 PyObject *item;
2927 int i;
2928 Py_ssize_t set_size, ppos = 0;
2929 Py_hash_t hash;
2930
2931 const char empty_set_op = EMPTY_SET;
2932 const char mark_op = MARK;
2933 const char additems_op = ADDITEMS;
2934
2935 if (self->proto < 4) {
2936 PyObject *items;
2937 PyObject *reduce_value;
2938 int status;
2939
2940 items = PySequence_List(obj);
2941 if (items == NULL) {
2942 return -1;
2943 }
2944 reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PySet_Type, items);
2945 Py_DECREF(items);
2946 if (reduce_value == NULL) {
2947 return -1;
2948 }
2949 /* save_reduce() will memoize the object automatically. */
2950 status = save_reduce(self, reduce_value, obj);
2951 Py_DECREF(reduce_value);
2952 return status;
2953 }
2954
2955 if (_Pickler_Write(self, &empty_set_op, 1) < 0)
2956 return -1;
2957
2958 if (memo_put(self, obj) < 0)
2959 return -1;
2960
2961 set_size = PySet_GET_SIZE(obj);
2962 if (set_size == 0)
2963 return 0; /* nothing to do */
2964
2965 /* Write in batches of BATCHSIZE. */
2966 do {
2967 i = 0;
2968 if (_Pickler_Write(self, &mark_op, 1) < 0)
2969 return -1;
2970 while (_PySet_NextEntry(obj, &ppos, &item, &hash)) {
2971 if (save(self, item, 0) < 0)
2972 return -1;
2973 if (++i == BATCHSIZE)
2974 break;
2975 }
2976 if (_Pickler_Write(self, &additems_op, 1) < 0)
2977 return -1;
2978 if (PySet_GET_SIZE(obj) != set_size) {
2979 PyErr_Format(
2980 PyExc_RuntimeError,
2981 "set changed size during iteration");
2982 return -1;
2983 }
2984 } while (i == BATCHSIZE);
2985
2986 return 0;
2987}
2988
2989static int
2990save_frozenset(PicklerObject *self, PyObject *obj)
2991{
2992 PyObject *iter;
2993
2994 const char mark_op = MARK;
2995 const char frozenset_op = FROZENSET;
2996
2997 if (self->fast && !fast_save_enter(self, obj))
2998 return -1;
2999
3000 if (self->proto < 4) {
3001 PyObject *items;
3002 PyObject *reduce_value;
3003 int status;
3004
3005 items = PySequence_List(obj);
3006 if (items == NULL) {
3007 return -1;
3008 }
3009 reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PyFrozenSet_Type,
3010 items);
3011 Py_DECREF(items);
3012 if (reduce_value == NULL) {
3013 return -1;
3014 }
3015 /* save_reduce() will memoize the object automatically. */
3016 status = save_reduce(self, reduce_value, obj);
3017 Py_DECREF(reduce_value);
3018 return status;
3019 }
3020
3021 if (_Pickler_Write(self, &mark_op, 1) < 0)
3022 return -1;
3023
3024 iter = PyObject_GetIter(obj);
Christian Heimesb3d3ee42013-11-23 21:01:40 +01003025 if (iter == NULL) {
Christian Heimes74d8d632013-11-23 21:05:31 +01003026 return -1;
Christian Heimesb3d3ee42013-11-23 21:01:40 +01003027 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003028 for (;;) {
3029 PyObject *item;
3030
3031 item = PyIter_Next(iter);
3032 if (item == NULL) {
3033 if (PyErr_Occurred()) {
3034 Py_DECREF(iter);
3035 return -1;
3036 }
3037 break;
3038 }
3039 if (save(self, item, 0) < 0) {
3040 Py_DECREF(item);
3041 Py_DECREF(iter);
3042 return -1;
3043 }
3044 Py_DECREF(item);
3045 }
3046 Py_DECREF(iter);
3047
3048 /* If the object is already in the memo, this means it is
3049 recursive. In this case, throw away everything we put on the
3050 stack, and fetch the object back from the memo. */
3051 if (PyMemoTable_Get(self->memo, obj)) {
3052 const char pop_mark_op = POP_MARK;
3053
3054 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
3055 return -1;
3056 if (memo_get(self, obj) < 0)
3057 return -1;
3058 return 0;
3059 }
3060
3061 if (_Pickler_Write(self, &frozenset_op, 1) < 0)
3062 return -1;
3063 if (memo_put(self, obj) < 0)
3064 return -1;
3065
3066 return 0;
3067}
3068
3069static int
3070fix_imports(PyObject **module_name, PyObject **global_name)
3071{
3072 PyObject *key;
3073 PyObject *item;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003074 PickleState *st = _Pickle_GetGlobalState();
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003075
3076 key = PyTuple_Pack(2, *module_name, *global_name);
3077 if (key == NULL)
3078 return -1;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003079 item = PyDict_GetItemWithError(st->name_mapping_3to2, key);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003080 Py_DECREF(key);
3081 if (item) {
3082 PyObject *fixed_module_name;
3083 PyObject *fixed_global_name;
3084
3085 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
3086 PyErr_Format(PyExc_RuntimeError,
3087 "_compat_pickle.REVERSE_NAME_MAPPING values "
3088 "should be 2-tuples, not %.200s",
3089 Py_TYPE(item)->tp_name);
3090 return -1;
3091 }
3092 fixed_module_name = PyTuple_GET_ITEM(item, 0);
3093 fixed_global_name = PyTuple_GET_ITEM(item, 1);
3094 if (!PyUnicode_Check(fixed_module_name) ||
3095 !PyUnicode_Check(fixed_global_name)) {
3096 PyErr_Format(PyExc_RuntimeError,
3097 "_compat_pickle.REVERSE_NAME_MAPPING values "
3098 "should be pairs of str, not (%.200s, %.200s)",
3099 Py_TYPE(fixed_module_name)->tp_name,
3100 Py_TYPE(fixed_global_name)->tp_name);
3101 return -1;
3102 }
3103
3104 Py_CLEAR(*module_name);
3105 Py_CLEAR(*global_name);
3106 Py_INCREF(fixed_module_name);
3107 Py_INCREF(fixed_global_name);
3108 *module_name = fixed_module_name;
3109 *global_name = fixed_global_name;
Serhiy Storchakabfe18242015-03-31 13:12:37 +03003110 return 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003111 }
3112 else if (PyErr_Occurred()) {
3113 return -1;
3114 }
3115
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003116 item = PyDict_GetItemWithError(st->import_mapping_3to2, *module_name);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003117 if (item) {
3118 if (!PyUnicode_Check(item)) {
3119 PyErr_Format(PyExc_RuntimeError,
3120 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
3121 "should be strings, not %.200s",
3122 Py_TYPE(item)->tp_name);
3123 return -1;
3124 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003125 Py_INCREF(item);
Serhiy Storchaka48842712016-04-06 09:45:48 +03003126 Py_XSETREF(*module_name, item);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003127 }
3128 else if (PyErr_Occurred()) {
3129 return -1;
3130 }
3131
3132 return 0;
3133}
3134
3135static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003136save_global(PicklerObject *self, PyObject *obj, PyObject *name)
3137{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003138 PyObject *global_name = NULL;
3139 PyObject *module_name = NULL;
3140 PyObject *module = NULL;
Serhiy Storchaka58e41342015-03-31 14:07:24 +03003141 PyObject *parent = NULL;
3142 PyObject *dotted_path = NULL;
3143 PyObject *lastname = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003144 PyObject *cls;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003145 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003146 int status = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003147 _Py_IDENTIFIER(__name__);
3148 _Py_IDENTIFIER(__qualname__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003149
3150 const char global_op = GLOBAL;
3151
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003152 if (name) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003153 Py_INCREF(name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003154 global_name = name;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003155 }
3156 else {
Serhiy Storchaka58e41342015-03-31 14:07:24 +03003157 global_name = _PyObject_GetAttrId(obj, &PyId___qualname__);
3158 if (global_name == NULL) {
3159 if (!PyErr_ExceptionMatches(PyExc_AttributeError))
3160 goto error;
3161 PyErr_Clear();
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003162 }
3163 if (global_name == NULL) {
3164 global_name = _PyObject_GetAttrId(obj, &PyId___name__);
3165 if (global_name == NULL)
3166 goto error;
3167 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003168 }
3169
Serhiy Storchaka58e41342015-03-31 14:07:24 +03003170 dotted_path = get_dotted_path(module, global_name);
3171 if (dotted_path == NULL)
3172 goto error;
3173 module_name = whichmodule(obj, dotted_path);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003174 if (module_name == NULL)
3175 goto error;
3176
3177 /* XXX: Change to use the import C API directly with level=0 to disallow
3178 relative imports.
3179
3180 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
3181 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
3182 custom import functions (IMHO, this would be a nice security
3183 feature). The import C API would need to be extended to support the
3184 extra parameters of __import__ to fix that. */
3185 module = PyImport_Import(module_name);
3186 if (module == NULL) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003187 PyErr_Format(st->PicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003188 "Can't pickle %R: import of module %R failed",
3189 obj, module_name);
3190 goto error;
3191 }
Serhiy Storchaka58e41342015-03-31 14:07:24 +03003192 lastname = PyList_GET_ITEM(dotted_path, PyList_GET_SIZE(dotted_path)-1);
3193 Py_INCREF(lastname);
3194 cls = get_deep_attribute(module, dotted_path, &parent);
3195 Py_CLEAR(dotted_path);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003196 if (cls == NULL) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003197 PyErr_Format(st->PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003198 "Can't pickle %R: attribute lookup %S on %S failed",
3199 obj, global_name, module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003200 goto error;
3201 }
3202 if (cls != obj) {
3203 Py_DECREF(cls);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003204 PyErr_Format(st->PicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003205 "Can't pickle %R: it's not the same object as %S.%S",
3206 obj, module_name, global_name);
3207 goto error;
3208 }
3209 Py_DECREF(cls);
3210
3211 if (self->proto >= 2) {
3212 /* See whether this is in the extension registry, and if
3213 * so generate an EXT opcode.
3214 */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003215 PyObject *extension_key;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003216 PyObject *code_obj; /* extension code as Python object */
3217 long code; /* extension code as C value */
3218 char pdata[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003219 Py_ssize_t n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003220
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003221 extension_key = PyTuple_Pack(2, module_name, global_name);
3222 if (extension_key == NULL) {
3223 goto error;
3224 }
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08003225 code_obj = PyDict_GetItemWithError(st->extension_registry,
3226 extension_key);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003227 Py_DECREF(extension_key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003228 /* The object is not registered in the extension registry.
3229 This is the most likely code path. */
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08003230 if (code_obj == NULL) {
3231 if (PyErr_Occurred()) {
3232 goto error;
3233 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003234 goto gen_global;
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08003235 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003236
3237 /* XXX: pickle.py doesn't check neither the type, nor the range
3238 of the value returned by the extension_registry. It should for
3239 consistency. */
3240
3241 /* Verify code_obj has the right type and value. */
3242 if (!PyLong_Check(code_obj)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003243 PyErr_Format(st->PicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003244 "Can't pickle %R: extension code %R isn't an integer",
3245 obj, code_obj);
3246 goto error;
3247 }
3248 code = PyLong_AS_LONG(code_obj);
3249 if (code <= 0 || code > 0x7fffffffL) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003250 if (!PyErr_Occurred())
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003251 PyErr_Format(st->PicklingError, "Can't pickle %R: extension "
3252 "code %ld is out of range", obj, code);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003253 goto error;
3254 }
3255
3256 /* Generate an EXT opcode. */
3257 if (code <= 0xff) {
3258 pdata[0] = EXT1;
3259 pdata[1] = (unsigned char)code;
3260 n = 2;
3261 }
3262 else if (code <= 0xffff) {
3263 pdata[0] = EXT2;
3264 pdata[1] = (unsigned char)(code & 0xff);
3265 pdata[2] = (unsigned char)((code >> 8) & 0xff);
3266 n = 3;
3267 }
3268 else {
3269 pdata[0] = EXT4;
3270 pdata[1] = (unsigned char)(code & 0xff);
3271 pdata[2] = (unsigned char)((code >> 8) & 0xff);
3272 pdata[3] = (unsigned char)((code >> 16) & 0xff);
3273 pdata[4] = (unsigned char)((code >> 24) & 0xff);
3274 n = 5;
3275 }
3276
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003277 if (_Pickler_Write(self, pdata, n) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003278 goto error;
3279 }
3280 else {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003281 gen_global:
Serhiy Storchaka58e41342015-03-31 14:07:24 +03003282 if (parent == module) {
3283 Py_INCREF(lastname);
3284 Py_DECREF(global_name);
3285 global_name = lastname;
3286 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003287 if (self->proto >= 4) {
3288 const char stack_global_op = STACK_GLOBAL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003289
Christian Heimese8b1ba12013-11-23 21:13:39 +01003290 if (save(self, module_name, 0) < 0)
3291 goto error;
3292 if (save(self, global_name, 0) < 0)
3293 goto error;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003294
3295 if (_Pickler_Write(self, &stack_global_op, 1) < 0)
3296 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003297 }
Serhiy Storchaka58e41342015-03-31 14:07:24 +03003298 else if (parent != module) {
3299 PickleState *st = _Pickle_GetGlobalState();
3300 PyObject *reduce_value = Py_BuildValue("(O(OO))",
3301 st->getattr, parent, lastname);
3302 status = save_reduce(self, reduce_value, NULL);
3303 Py_DECREF(reduce_value);
3304 if (status < 0)
3305 goto error;
3306 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003307 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003308 /* Generate a normal global opcode if we are using a pickle
3309 protocol < 4, or if the object is not registered in the
3310 extension registry. */
3311 PyObject *encoded;
3312 PyObject *(*unicode_encoder)(PyObject *);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003313
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003314 if (_Pickler_Write(self, &global_op, 1) < 0)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003315 goto error;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003316
3317 /* For protocol < 3 and if the user didn't request against doing
3318 so, we convert module names to the old 2.x module names. */
3319 if (self->proto < 3 && self->fix_imports) {
3320 if (fix_imports(&module_name, &global_name) < 0) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003321 goto error;
3322 }
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003323 }
3324
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003325 /* Since Python 3.0 now supports non-ASCII identifiers, we encode
3326 both the module name and the global name using UTF-8. We do so
3327 only when we are using the pickle protocol newer than version
3328 3. This is to ensure compatibility with older Unpickler running
3329 on Python 2.x. */
3330 if (self->proto == 3) {
3331 unicode_encoder = PyUnicode_AsUTF8String;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003332 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003333 else {
3334 unicode_encoder = PyUnicode_AsASCIIString;
3335 }
3336 encoded = unicode_encoder(module_name);
3337 if (encoded == NULL) {
3338 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003339 PyErr_Format(st->PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003340 "can't pickle module identifier '%S' using "
3341 "pickle protocol %i",
3342 module_name, self->proto);
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003343 goto error;
3344 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003345 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3346 PyBytes_GET_SIZE(encoded)) < 0) {
3347 Py_DECREF(encoded);
3348 goto error;
3349 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003350 Py_DECREF(encoded);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003351 if(_Pickler_Write(self, "\n", 1) < 0)
3352 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003353
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003354 /* Save the name of the module. */
3355 encoded = unicode_encoder(global_name);
3356 if (encoded == NULL) {
3357 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003358 PyErr_Format(st->PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003359 "can't pickle global identifier '%S' using "
3360 "pickle protocol %i",
3361 global_name, self->proto);
3362 goto error;
3363 }
3364 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3365 PyBytes_GET_SIZE(encoded)) < 0) {
3366 Py_DECREF(encoded);
3367 goto error;
3368 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003369 Py_DECREF(encoded);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003370 if (_Pickler_Write(self, "\n", 1) < 0)
3371 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003372 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003373 /* Memoize the object. */
3374 if (memo_put(self, obj) < 0)
3375 goto error;
3376 }
3377
3378 if (0) {
3379 error:
3380 status = -1;
3381 }
3382 Py_XDECREF(module_name);
3383 Py_XDECREF(global_name);
3384 Py_XDECREF(module);
Serhiy Storchaka58e41342015-03-31 14:07:24 +03003385 Py_XDECREF(parent);
3386 Py_XDECREF(dotted_path);
3387 Py_XDECREF(lastname);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003388
3389 return status;
3390}
3391
3392static int
Alexandre Vassalotti19b6fa62013-11-30 16:06:39 -08003393save_singleton_type(PicklerObject *self, PyObject *obj, PyObject *singleton)
3394{
3395 PyObject *reduce_value;
3396 int status;
3397
3398 reduce_value = Py_BuildValue("O(O)", &PyType_Type, singleton);
3399 if (reduce_value == NULL) {
3400 return -1;
3401 }
3402 status = save_reduce(self, reduce_value, obj);
3403 Py_DECREF(reduce_value);
3404 return status;
3405}
3406
3407static int
3408save_type(PicklerObject *self, PyObject *obj)
3409{
Alexandre Vassalotti65846c62013-11-30 17:55:48 -08003410 if (obj == (PyObject *)&_PyNone_Type) {
Alexandre Vassalotti19b6fa62013-11-30 16:06:39 -08003411 return save_singleton_type(self, obj, Py_None);
3412 }
3413 else if (obj == (PyObject *)&PyEllipsis_Type) {
3414 return save_singleton_type(self, obj, Py_Ellipsis);
3415 }
Alexandre Vassalotti65846c62013-11-30 17:55:48 -08003416 else if (obj == (PyObject *)&_PyNotImplemented_Type) {
Alexandre Vassalotti19b6fa62013-11-30 16:06:39 -08003417 return save_singleton_type(self, obj, Py_NotImplemented);
3418 }
3419 return save_global(self, obj, NULL);
3420}
3421
3422static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003423save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
3424{
3425 PyObject *pid = NULL;
3426 int status = 0;
3427
3428 const char persid_op = PERSID;
3429 const char binpersid_op = BINPERSID;
3430
3431 Py_INCREF(obj);
Alexandre Vassalotti20c28c12013-11-27 02:26:54 -08003432 pid = _Pickle_FastCall(func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003433 if (pid == NULL)
3434 return -1;
3435
3436 if (pid != Py_None) {
3437 if (self->bin) {
3438 if (save(self, pid, 1) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003439 _Pickler_Write(self, &binpersid_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003440 goto error;
3441 }
3442 else {
3443 PyObject *pid_str = NULL;
3444 char *pid_ascii_bytes;
3445 Py_ssize_t size;
3446
3447 pid_str = PyObject_Str(pid);
3448 if (pid_str == NULL)
3449 goto error;
3450
3451 /* XXX: Should it check whether the persistent id only contains
3452 ASCII characters? And what if the pid contains embedded
3453 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00003454 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003455 Py_DECREF(pid_str);
3456 if (pid_ascii_bytes == NULL)
3457 goto error;
3458
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003459 if (_Pickler_Write(self, &persid_op, 1) < 0 ||
3460 _Pickler_Write(self, pid_ascii_bytes, size) < 0 ||
3461 _Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003462 goto error;
3463 }
3464 status = 1;
3465 }
3466
3467 if (0) {
3468 error:
3469 status = -1;
3470 }
3471 Py_XDECREF(pid);
3472
3473 return status;
3474}
3475
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003476static PyObject *
3477get_class(PyObject *obj)
3478{
3479 PyObject *cls;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003480 _Py_IDENTIFIER(__class__);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003481
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003482 cls = _PyObject_GetAttrId(obj, &PyId___class__);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003483 if (cls == NULL) {
3484 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
3485 PyErr_Clear();
3486 cls = (PyObject *) Py_TYPE(obj);
3487 Py_INCREF(cls);
3488 }
3489 }
3490 return cls;
3491}
3492
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003493/* We're saving obj, and args is the 2-thru-5 tuple returned by the
3494 * appropriate __reduce__ method for obj.
3495 */
3496static int
3497save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
3498{
3499 PyObject *callable;
3500 PyObject *argtup;
3501 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003502 PyObject *listitems = Py_None;
3503 PyObject *dictitems = Py_None;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003504 PickleState *st = _Pickle_GetGlobalState();
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00003505 Py_ssize_t size;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003506 int use_newobj = 0, use_newobj_ex = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003507
3508 const char reduce_op = REDUCE;
3509 const char build_op = BUILD;
3510 const char newobj_op = NEWOBJ;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003511 const char newobj_ex_op = NEWOBJ_EX;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003512
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00003513 size = PyTuple_Size(args);
3514 if (size < 2 || size > 5) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003515 PyErr_SetString(st->PicklingError, "tuple returned by "
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00003516 "__reduce__ must contain 2 through 5 elements");
3517 return -1;
3518 }
3519
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003520 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
3521 &callable, &argtup, &state, &listitems, &dictitems))
3522 return -1;
3523
3524 if (!PyCallable_Check(callable)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003525 PyErr_SetString(st->PicklingError, "first item of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003526 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003527 return -1;
3528 }
3529 if (!PyTuple_Check(argtup)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003530 PyErr_SetString(st->PicklingError, "second item of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003531 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003532 return -1;
3533 }
3534
3535 if (state == Py_None)
3536 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003537
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003538 if (listitems == Py_None)
3539 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003540 else if (!PyIter_Check(listitems)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003541 PyErr_Format(st->PicklingError, "fourth element of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003542 "returned by __reduce__ must be an iterator, not %s",
3543 Py_TYPE(listitems)->tp_name);
3544 return -1;
3545 }
3546
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003547 if (dictitems == Py_None)
3548 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003549 else if (!PyIter_Check(dictitems)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003550 PyErr_Format(st->PicklingError, "fifth element of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003551 "returned by __reduce__ must be an iterator, not %s",
3552 Py_TYPE(dictitems)->tp_name);
3553 return -1;
3554 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003555
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003556 if (self->proto >= 2) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003557 PyObject *name;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003558 _Py_IDENTIFIER(__name__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003559
Victor Stinner804e05e2013-11-14 01:26:17 +01003560 name = _PyObject_GetAttrId(callable, &PyId___name__);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003561 if (name == NULL) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003562 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003563 return -1;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003564 }
3565 PyErr_Clear();
3566 }
Serhiy Storchaka707b5cc2014-12-16 19:43:46 +02003567 else if (PyUnicode_Check(name)) {
Serhiy Storchaka0d554d72015-10-10 22:42:18 +03003568 _Py_IDENTIFIER(__newobj_ex__);
3569 use_newobj_ex = PyUnicode_Compare(
3570 name, _PyUnicode_FromId(&PyId___newobj_ex__)) == 0;
Serhiy Storchaka707b5cc2014-12-16 19:43:46 +02003571 if (!use_newobj_ex) {
3572 _Py_IDENTIFIER(__newobj__);
3573 use_newobj = PyUnicode_Compare(
3574 name, _PyUnicode_FromId(&PyId___newobj__)) == 0;
3575 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003576 }
Serhiy Storchaka707b5cc2014-12-16 19:43:46 +02003577 Py_XDECREF(name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003578 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003579
3580 if (use_newobj_ex) {
3581 PyObject *cls;
3582 PyObject *args;
3583 PyObject *kwargs;
3584
3585 if (Py_SIZE(argtup) != 3) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003586 PyErr_Format(st->PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003587 "length of the NEWOBJ_EX argument tuple must be "
3588 "exactly 3, not %zd", Py_SIZE(argtup));
3589 return -1;
3590 }
3591
3592 cls = PyTuple_GET_ITEM(argtup, 0);
3593 if (!PyType_Check(cls)) {
Larry Hastings61272b72014-01-07 12:41:53 -08003594 PyErr_Format(st->PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003595 "first item from NEWOBJ_EX argument tuple must "
3596 "be a class, not %.200s", Py_TYPE(cls)->tp_name);
3597 return -1;
3598 }
3599 args = PyTuple_GET_ITEM(argtup, 1);
3600 if (!PyTuple_Check(args)) {
Larry Hastings61272b72014-01-07 12:41:53 -08003601 PyErr_Format(st->PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003602 "second item from NEWOBJ_EX argument tuple must "
3603 "be a tuple, not %.200s", Py_TYPE(args)->tp_name);
3604 return -1;
3605 }
3606 kwargs = PyTuple_GET_ITEM(argtup, 2);
3607 if (!PyDict_Check(kwargs)) {
Larry Hastings61272b72014-01-07 12:41:53 -08003608 PyErr_Format(st->PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003609 "third item from NEWOBJ_EX argument tuple must "
3610 "be a dict, not %.200s", Py_TYPE(kwargs)->tp_name);
3611 return -1;
3612 }
3613
Serhiy Storchaka0d554d72015-10-10 22:42:18 +03003614 if (self->proto >= 4) {
3615 if (save(self, cls, 0) < 0 ||
3616 save(self, args, 0) < 0 ||
3617 save(self, kwargs, 0) < 0 ||
3618 _Pickler_Write(self, &newobj_ex_op, 1) < 0) {
3619 return -1;
3620 }
3621 }
3622 else {
3623 PyObject *newargs;
3624 PyObject *cls_new;
3625 Py_ssize_t i;
3626 _Py_IDENTIFIER(__new__);
3627
3628 newargs = PyTuple_New(Py_SIZE(args) + 2);
3629 if (newargs == NULL)
3630 return -1;
3631
3632 cls_new = _PyObject_GetAttrId(cls, &PyId___new__);
3633 if (cls_new == NULL) {
3634 Py_DECREF(newargs);
3635 return -1;
3636 }
3637 PyTuple_SET_ITEM(newargs, 0, cls_new);
3638 Py_INCREF(cls);
3639 PyTuple_SET_ITEM(newargs, 1, cls);
3640 for (i = 0; i < Py_SIZE(args); i++) {
3641 PyObject *item = PyTuple_GET_ITEM(args, i);
3642 Py_INCREF(item);
3643 PyTuple_SET_ITEM(newargs, i + 2, item);
3644 }
3645
3646 callable = PyObject_Call(st->partial, newargs, kwargs);
3647 Py_DECREF(newargs);
3648 if (callable == NULL)
3649 return -1;
3650
3651 newargs = PyTuple_New(0);
3652 if (newargs == NULL) {
3653 Py_DECREF(callable);
3654 return -1;
3655 }
3656
3657 if (save(self, callable, 0) < 0 ||
3658 save(self, newargs, 0) < 0 ||
3659 _Pickler_Write(self, &reduce_op, 1) < 0) {
3660 Py_DECREF(newargs);
3661 Py_DECREF(callable);
3662 return -1;
3663 }
3664 Py_DECREF(newargs);
3665 Py_DECREF(callable);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003666 }
3667 }
3668 else if (use_newobj) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003669 PyObject *cls;
3670 PyObject *newargtup;
3671 PyObject *obj_class;
3672 int p;
3673
3674 /* Sanity checks. */
3675 if (Py_SIZE(argtup) < 1) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003676 PyErr_SetString(st->PicklingError, "__newobj__ arglist is empty");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003677 return -1;
3678 }
3679
3680 cls = PyTuple_GET_ITEM(argtup, 0);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003681 if (!PyType_Check(cls)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003682 PyErr_SetString(st->PicklingError, "args[0] from "
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003683 "__newobj__ args is not a type");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003684 return -1;
3685 }
3686
3687 if (obj != NULL) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003688 obj_class = get_class(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003689 p = obj_class != cls; /* true iff a problem */
3690 Py_DECREF(obj_class);
3691 if (p) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003692 PyErr_SetString(st->PicklingError, "args[0] from "
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003693 "__newobj__ args has the wrong class");
3694 return -1;
3695 }
3696 }
3697 /* XXX: These calls save() are prone to infinite recursion. Imagine
3698 what happen if the value returned by the __reduce__() method of
3699 some extension type contains another object of the same type. Ouch!
3700
3701 Here is a quick example, that I ran into, to illustrate what I
3702 mean:
3703
3704 >>> import pickle, copyreg
3705 >>> copyreg.dispatch_table.pop(complex)
3706 >>> pickle.dumps(1+2j)
3707 Traceback (most recent call last):
3708 ...
Yury Selivanovf488fb42015-07-03 01:04:23 -04003709 RecursionError: maximum recursion depth exceeded
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003710
3711 Removing the complex class from copyreg.dispatch_table made the
3712 __reduce_ex__() method emit another complex object:
3713
3714 >>> (1+1j).__reduce_ex__(2)
3715 (<function __newobj__ at 0xb7b71c3c>,
3716 (<class 'complex'>, (1+1j)), None, None, None)
3717
3718 Thus when save() was called on newargstup (the 2nd item) recursion
3719 ensued. Of course, the bug was in the complex class which had a
3720 broken __getnewargs__() that emitted another complex object. But,
3721 the point, here, is it is quite easy to end up with a broken reduce
3722 function. */
3723
3724 /* Save the class and its __new__ arguments. */
3725 if (save(self, cls, 0) < 0)
3726 return -1;
3727
3728 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
3729 if (newargtup == NULL)
3730 return -1;
3731
3732 p = save(self, newargtup, 0);
3733 Py_DECREF(newargtup);
3734 if (p < 0)
3735 return -1;
3736
3737 /* Add NEWOBJ opcode. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003738 if (_Pickler_Write(self, &newobj_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003739 return -1;
3740 }
3741 else { /* Not using NEWOBJ. */
3742 if (save(self, callable, 0) < 0 ||
3743 save(self, argtup, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003744 _Pickler_Write(self, &reduce_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003745 return -1;
3746 }
3747
3748 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
3749 the caller do not want to memoize the object. Not particularly useful,
3750 but that is to mimic the behavior save_reduce() in pickle.py when
3751 obj is None. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003752 if (obj != NULL) {
3753 /* If the object is already in the memo, this means it is
3754 recursive. In this case, throw away everything we put on the
3755 stack, and fetch the object back from the memo. */
3756 if (PyMemoTable_Get(self->memo, obj)) {
3757 const char pop_op = POP;
3758
3759 if (_Pickler_Write(self, &pop_op, 1) < 0)
3760 return -1;
3761 if (memo_get(self, obj) < 0)
3762 return -1;
3763
3764 return 0;
3765 }
3766 else if (memo_put(self, obj) < 0)
3767 return -1;
3768 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003769
3770 if (listitems && batch_list(self, listitems) < 0)
3771 return -1;
3772
3773 if (dictitems && batch_dict(self, dictitems) < 0)
3774 return -1;
3775
3776 if (state) {
Victor Stinner121aab42011-09-29 23:40:53 +02003777 if (save(self, state, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003778 _Pickler_Write(self, &build_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003779 return -1;
3780 }
3781
3782 return 0;
3783}
3784
3785static int
3786save(PicklerObject *self, PyObject *obj, int pers_save)
3787{
3788 PyTypeObject *type;
3789 PyObject *reduce_func = NULL;
3790 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003791 int status = 0;
3792
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08003793 if (_Pickler_OpcodeBoundary(self) < 0)
3794 return -1;
3795
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003796 if (Py_EnterRecursiveCall(" while pickling an object"))
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003797 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003798
3799 /* The extra pers_save argument is necessary to avoid calling save_pers()
3800 on its returned object. */
3801 if (!pers_save && self->pers_func) {
3802 /* save_pers() returns:
3803 -1 to signal an error;
3804 0 if it did nothing successfully;
3805 1 if a persistent id was saved.
3806 */
3807 if ((status = save_pers(self, obj, self->pers_func)) != 0)
3808 goto done;
3809 }
3810
3811 type = Py_TYPE(obj);
3812
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003813 /* The old cPickle had an optimization that used switch-case statement
3814 dispatching on the first letter of the type name. This has was removed
3815 since benchmarks shown that this optimization was actually slowing
3816 things down. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003817
3818 /* Atom types; these aren't memoized, so don't check the memo. */
3819
3820 if (obj == Py_None) {
3821 status = save_none(self, obj);
3822 goto done;
3823 }
3824 else if (obj == Py_False || obj == Py_True) {
3825 status = save_bool(self, obj);
3826 goto done;
3827 }
3828 else if (type == &PyLong_Type) {
3829 status = save_long(self, obj);
3830 goto done;
3831 }
3832 else if (type == &PyFloat_Type) {
3833 status = save_float(self, obj);
3834 goto done;
3835 }
3836
3837 /* Check the memo to see if it has the object. If so, generate
3838 a GET (or BINGET) opcode, instead of pickling the object
3839 once again. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003840 if (PyMemoTable_Get(self->memo, obj)) {
3841 if (memo_get(self, obj) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003842 goto error;
3843 goto done;
3844 }
3845
3846 if (type == &PyBytes_Type) {
3847 status = save_bytes(self, obj);
3848 goto done;
3849 }
3850 else if (type == &PyUnicode_Type) {
3851 status = save_unicode(self, obj);
3852 goto done;
3853 }
3854 else if (type == &PyDict_Type) {
3855 status = save_dict(self, obj);
3856 goto done;
3857 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003858 else if (type == &PySet_Type) {
3859 status = save_set(self, obj);
3860 goto done;
3861 }
3862 else if (type == &PyFrozenSet_Type) {
3863 status = save_frozenset(self, obj);
3864 goto done;
3865 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003866 else if (type == &PyList_Type) {
3867 status = save_list(self, obj);
3868 goto done;
3869 }
3870 else if (type == &PyTuple_Type) {
3871 status = save_tuple(self, obj);
3872 goto done;
3873 }
3874 else if (type == &PyType_Type) {
Alexandre Vassalotti19b6fa62013-11-30 16:06:39 -08003875 status = save_type(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003876 goto done;
3877 }
3878 else if (type == &PyFunction_Type) {
3879 status = save_global(self, obj, NULL);
Alexandre Vassalottifc912852013-11-24 03:07:35 -08003880 goto done;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003881 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003882
3883 /* XXX: This part needs some unit tests. */
3884
3885 /* Get a reduction callable, and call it. This may come from
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003886 * self.dispatch_table, copyreg.dispatch_table, the object's
3887 * __reduce_ex__ method, or the object's __reduce__ method.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003888 */
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003889 if (self->dispatch_table == NULL) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003890 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08003891 reduce_func = PyDict_GetItemWithError(st->dispatch_table,
3892 (PyObject *)type);
3893 if (reduce_func == NULL) {
3894 if (PyErr_Occurred()) {
3895 goto error;
3896 }
3897 } else {
3898 /* PyDict_GetItemWithError() returns a borrowed reference.
3899 Increase the reference count to be consistent with
3900 PyObject_GetItem and _PyObject_GetAttrId used below. */
3901 Py_INCREF(reduce_func);
3902 }
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003903 } else {
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08003904 reduce_func = PyObject_GetItem(self->dispatch_table,
3905 (PyObject *)type);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003906 if (reduce_func == NULL) {
3907 if (PyErr_ExceptionMatches(PyExc_KeyError))
3908 PyErr_Clear();
3909 else
3910 goto error;
3911 }
3912 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003913 if (reduce_func != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003914 Py_INCREF(obj);
Alexandre Vassalotti20c28c12013-11-27 02:26:54 -08003915 reduce_value = _Pickle_FastCall(reduce_func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003916 }
Antoine Pitrouffd41d92011-10-04 09:23:04 +02003917 else if (PyType_IsSubtype(type, &PyType_Type)) {
3918 status = save_global(self, obj, NULL);
3919 goto done;
3920 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003921 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003922 _Py_IDENTIFIER(__reduce__);
3923 _Py_IDENTIFIER(__reduce_ex__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003924
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003925
3926 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
3927 automatically defined as __reduce__. While this is convenient, this
3928 make it impossible to know which method was actually called. Of
3929 course, this is not a big deal. But still, it would be nice to let
3930 the user know which method was called when something go
3931 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
3932 don't actually have to check for a __reduce__ method. */
3933
3934 /* Check for a __reduce_ex__ method. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003935 reduce_func = _PyObject_GetAttrId(obj, &PyId___reduce_ex__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003936 if (reduce_func != NULL) {
3937 PyObject *proto;
3938 proto = PyLong_FromLong(self->proto);
3939 if (proto != NULL) {
Alexandre Vassalotti20c28c12013-11-27 02:26:54 -08003940 reduce_value = _Pickle_FastCall(reduce_func, proto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003941 }
3942 }
3943 else {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003944 PickleState *st = _Pickle_GetGlobalState();
3945
3946 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003947 PyErr_Clear();
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003948 }
3949 else {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003950 goto error;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003951 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003952 /* Check for a __reduce__ method. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003953 reduce_func = _PyObject_GetAttrId(obj, &PyId___reduce__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003954 if (reduce_func != NULL) {
Alexandre Vassalotti6bf41e52013-11-28 15:17:29 -08003955 PyObject *empty_tuple = PyTuple_New(0);
3956 reduce_value = PyObject_Call(reduce_func, empty_tuple,
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003957 NULL);
Alexandre Vassalotti6bf41e52013-11-28 15:17:29 -08003958 Py_DECREF(empty_tuple);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003959 }
3960 else {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003961 PyErr_Format(st->PicklingError,
3962 "can't pickle '%.200s' object: %R",
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003963 type->tp_name, obj);
3964 goto error;
3965 }
3966 }
3967 }
3968
3969 if (reduce_value == NULL)
3970 goto error;
3971
3972 if (PyUnicode_Check(reduce_value)) {
3973 status = save_global(self, obj, reduce_value);
3974 goto done;
3975 }
3976
3977 if (!PyTuple_Check(reduce_value)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08003978 PickleState *st = _Pickle_GetGlobalState();
3979 PyErr_SetString(st->PicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003980 "__reduce__ must return a string or tuple");
3981 goto error;
3982 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003983
3984 status = save_reduce(self, reduce_value, obj);
3985
3986 if (0) {
3987 error:
3988 status = -1;
3989 }
3990 done:
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08003991
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003992 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003993 Py_XDECREF(reduce_func);
3994 Py_XDECREF(reduce_value);
3995
3996 return status;
3997}
3998
3999static int
4000dump(PicklerObject *self, PyObject *obj)
4001{
4002 const char stop_op = STOP;
4003
4004 if (self->proto >= 2) {
4005 char header[2];
4006
4007 header[0] = PROTO;
4008 assert(self->proto >= 0 && self->proto < 256);
4009 header[1] = (unsigned char)self->proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004010 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004011 return -1;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004012 if (self->proto >= 4)
4013 self->framing = 1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004014 }
4015
4016 if (save(self, obj, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004017 _Pickler_Write(self, &stop_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004018 return -1;
4019
4020 return 0;
4021}
4022
Larry Hastings61272b72014-01-07 12:41:53 -08004023/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004024
4025_pickle.Pickler.clear_memo
4026
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004027Clears the pickler's "memo".
4028
4029The memo is the data structure that remembers which objects the
4030pickler has already seen, so that shared or recursive objects are
4031pickled by reference and not by value. This method is useful when
4032re-using picklers.
Larry Hastings61272b72014-01-07 12:41:53 -08004033[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004034
Larry Hastings3cceb382014-01-04 11:09:09 -08004035static PyObject *
4036_pickle_Pickler_clear_memo_impl(PicklerObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08004037/*[clinic end generated code: output=8665c8658aaa094b input=01bdad52f3d93e56]*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004038{
4039 if (self->memo)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004040 PyMemoTable_Clear(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004041
4042 Py_RETURN_NONE;
4043}
4044
Larry Hastings61272b72014-01-07 12:41:53 -08004045/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004046
4047_pickle.Pickler.dump
4048
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004049 obj: object
4050 /
4051
4052Write a pickled representation of the given object to the open file.
Larry Hastings61272b72014-01-07 12:41:53 -08004053[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004054
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004055static PyObject *
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004056_pickle_Pickler_dump(PicklerObject *self, PyObject *obj)
Larry Hastings581ee362014-01-28 05:00:08 -08004057/*[clinic end generated code: output=87ecad1261e02ac7 input=552eb1c0f52260d9]*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004058{
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00004059 /* Check whether the Pickler was initialized correctly (issue3664).
4060 Developers often forget to call __init__() in their subclasses, which
4061 would trigger a segfault without this check. */
4062 if (self->write == NULL) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004063 PickleState *st = _Pickle_GetGlobalState();
4064 PyErr_Format(st->PicklingError,
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00004065 "Pickler.__init__() was not called by %s.__init__()",
4066 Py_TYPE(self)->tp_name);
4067 return NULL;
4068 }
4069
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004070 if (_Pickler_ClearBuffer(self) < 0)
4071 return NULL;
4072
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004073 if (dump(self, obj) < 0)
4074 return NULL;
4075
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004076 if (_Pickler_FlushToFile(self) < 0)
4077 return NULL;
4078
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004079 Py_RETURN_NONE;
4080}
4081
Serhiy Storchaka5bbd2312014-12-16 19:39:08 +02004082/*[clinic input]
4083
4084_pickle.Pickler.__sizeof__ -> Py_ssize_t
4085
4086Returns size in memory, in bytes.
4087[clinic start generated code]*/
4088
4089static Py_ssize_t
4090_pickle_Pickler___sizeof___impl(PicklerObject *self)
4091/*[clinic end generated code: output=106edb3123f332e1 input=8cbbec9bd5540d42]*/
4092{
4093 Py_ssize_t res, s;
4094
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +02004095 res = _PyObject_SIZE(Py_TYPE(self));
Serhiy Storchaka5bbd2312014-12-16 19:39:08 +02004096 if (self->memo != NULL) {
4097 res += sizeof(PyMemoTable);
4098 res += self->memo->mt_allocated * sizeof(PyMemoEntry);
4099 }
4100 if (self->output_buffer != NULL) {
4101 s = _PySys_GetSizeOf(self->output_buffer);
4102 if (s == -1)
4103 return -1;
4104 res += s;
4105 }
4106 return res;
4107}
4108
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004109static struct PyMethodDef Pickler_methods[] = {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004110 _PICKLE_PICKLER_DUMP_METHODDEF
4111 _PICKLE_PICKLER_CLEAR_MEMO_METHODDEF
Serhiy Storchaka5bbd2312014-12-16 19:39:08 +02004112 _PICKLE_PICKLER___SIZEOF___METHODDEF
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004113 {NULL, NULL} /* sentinel */
4114};
4115
4116static void
4117Pickler_dealloc(PicklerObject *self)
4118{
4119 PyObject_GC_UnTrack(self);
4120
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004121 Py_XDECREF(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004122 Py_XDECREF(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004123 Py_XDECREF(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004124 Py_XDECREF(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004125 Py_XDECREF(self->fast_memo);
4126
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004127 PyMemoTable_Del(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004128
4129 Py_TYPE(self)->tp_free((PyObject *)self);
4130}
4131
4132static int
4133Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
4134{
4135 Py_VISIT(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004136 Py_VISIT(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004137 Py_VISIT(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004138 Py_VISIT(self->fast_memo);
4139 return 0;
4140}
4141
4142static int
4143Pickler_clear(PicklerObject *self)
4144{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004145 Py_CLEAR(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004146 Py_CLEAR(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004147 Py_CLEAR(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004148 Py_CLEAR(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004149 Py_CLEAR(self->fast_memo);
4150
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004151 if (self->memo != NULL) {
4152 PyMemoTable *memo = self->memo;
4153 self->memo = NULL;
4154 PyMemoTable_Del(memo);
4155 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004156 return 0;
4157}
4158
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004159
Larry Hastings61272b72014-01-07 12:41:53 -08004160/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004161
4162_pickle.Pickler.__init__
4163
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004164 file: object
4165 protocol: object = NULL
4166 fix_imports: bool = True
4167
4168This takes a binary file for writing a pickle data stream.
4169
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08004170The optional *protocol* argument tells the pickler to use the given
4171protocol; supported protocols are 0, 1, 2, 3 and 4. The default
4172protocol is 3; a backward-incompatible protocol designed for Python 3.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004173
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08004174Specifying a negative protocol version selects the highest protocol
4175version supported. The higher the protocol used, the more recent the
4176version of Python needed to read the pickle produced.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004177
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08004178The *file* argument must have a write() method that accepts a single
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004179bytes argument. It can thus be a file object opened for binary
Martin Panter7462b6492015-11-02 03:37:02 +00004180writing, an io.BytesIO instance, or any other custom object that meets
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08004181this interface.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004182
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08004183If *fix_imports* is True and protocol is less than 3, pickle will try
4184to map the new Python 3 names to the old module names used in Python
41852, so that the pickle data stream is readable with Python 2.
Larry Hastings61272b72014-01-07 12:41:53 -08004186[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004187
Larry Hastingsb7ccb202014-01-18 23:50:21 -08004188static int
Larry Hastings89964c42015-04-14 18:07:59 -04004189_pickle_Pickler___init___impl(PicklerObject *self, PyObject *file,
4190 PyObject *protocol, int fix_imports)
Martin Panter2eb819f2015-11-02 04:04:57 +00004191/*[clinic end generated code: output=b5f31078dab17fb0 input=4faabdbc763c2389]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004192{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02004193 _Py_IDENTIFIER(persistent_id);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004194 _Py_IDENTIFIER(dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004195
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004196 /* In case of multiple __init__() calls, clear previous content. */
4197 if (self->write != NULL)
4198 (void)Pickler_clear(self);
4199
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004200 if (_Pickler_SetProtocol(self, protocol, fix_imports) < 0)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08004201 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004202
4203 if (_Pickler_SetOutputStream(self, file) < 0)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08004204 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004205
4206 /* memo and output_buffer may have already been created in _Pickler_New */
4207 if (self->memo == NULL) {
4208 self->memo = PyMemoTable_New();
4209 if (self->memo == NULL)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08004210 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004211 }
4212 self->output_len = 0;
4213 if (self->output_buffer == NULL) {
4214 self->max_output_len = WRITE_BUF_SIZE;
4215 self->output_buffer = PyBytes_FromStringAndSize(NULL,
4216 self->max_output_len);
4217 if (self->output_buffer == NULL)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08004218 return -1;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004219 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004220
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004221 self->fast = 0;
4222 self->fast_nesting = 0;
4223 self->fast_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004224 self->pers_func = NULL;
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02004225 if (_PyObject_HasAttrId((PyObject *)self, &PyId_persistent_id)) {
4226 self->pers_func = _PyObject_GetAttrId((PyObject *)self,
4227 &PyId_persistent_id);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004228 if (self->pers_func == NULL)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08004229 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004230 }
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004231 self->dispatch_table = NULL;
4232 if (_PyObject_HasAttrId((PyObject *)self, &PyId_dispatch_table)) {
4233 self->dispatch_table = _PyObject_GetAttrId((PyObject *)self,
4234 &PyId_dispatch_table);
4235 if (self->dispatch_table == NULL)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08004236 return -1;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004237 }
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004238
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004239 return 0;
4240}
4241
Larry Hastingsb7ccb202014-01-18 23:50:21 -08004242
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004243/* Define a proxy object for the Pickler's internal memo object. This is to
4244 * avoid breaking code like:
4245 * pickler.memo.clear()
4246 * and
4247 * pickler.memo = saved_memo
4248 * Is this a good idea? Not really, but we don't want to break code that uses
4249 * it. Note that we don't implement the entire mapping API here. This is
4250 * intentional, as these should be treated as black-box implementation details.
4251 */
4252
Larry Hastings61272b72014-01-07 12:41:53 -08004253/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004254_pickle.PicklerMemoProxy.clear
4255
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004256Remove all items from memo.
Larry Hastings61272b72014-01-07 12:41:53 -08004257[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004258
Larry Hastings3cceb382014-01-04 11:09:09 -08004259static PyObject *
4260_pickle_PicklerMemoProxy_clear_impl(PicklerMemoProxyObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08004261/*[clinic end generated code: output=5fb9370d48ae8b05 input=ccc186dacd0f1405]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004262{
4263 if (self->pickler->memo)
4264 PyMemoTable_Clear(self->pickler->memo);
4265 Py_RETURN_NONE;
4266}
4267
Larry Hastings61272b72014-01-07 12:41:53 -08004268/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004269_pickle.PicklerMemoProxy.copy
4270
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004271Copy the memo to a new object.
Larry Hastings61272b72014-01-07 12:41:53 -08004272[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004273
Larry Hastings3cceb382014-01-04 11:09:09 -08004274static PyObject *
4275_pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08004276/*[clinic end generated code: output=bb83a919d29225ef input=b73043485ac30b36]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004277{
4278 Py_ssize_t i;
4279 PyMemoTable *memo;
4280 PyObject *new_memo = PyDict_New();
4281 if (new_memo == NULL)
4282 return NULL;
4283
4284 memo = self->pickler->memo;
4285 for (i = 0; i < memo->mt_allocated; ++i) {
4286 PyMemoEntry entry = memo->mt_table[i];
4287 if (entry.me_key != NULL) {
4288 int status;
4289 PyObject *key, *value;
4290
4291 key = PyLong_FromVoidPtr(entry.me_key);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004292 value = Py_BuildValue("nO", entry.me_value, entry.me_key);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004293
4294 if (key == NULL || value == NULL) {
4295 Py_XDECREF(key);
4296 Py_XDECREF(value);
4297 goto error;
4298 }
4299 status = PyDict_SetItem(new_memo, key, value);
4300 Py_DECREF(key);
4301 Py_DECREF(value);
4302 if (status < 0)
4303 goto error;
4304 }
4305 }
4306 return new_memo;
4307
4308 error:
4309 Py_XDECREF(new_memo);
4310 return NULL;
4311}
4312
Larry Hastings61272b72014-01-07 12:41:53 -08004313/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004314_pickle.PicklerMemoProxy.__reduce__
4315
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004316Implement pickle support.
Larry Hastings61272b72014-01-07 12:41:53 -08004317[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004318
Larry Hastings3cceb382014-01-04 11:09:09 -08004319static PyObject *
4320_pickle_PicklerMemoProxy___reduce___impl(PicklerMemoProxyObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08004321/*[clinic end generated code: output=bebba1168863ab1d input=2f7c540e24b7aae4]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004322{
4323 PyObject *reduce_value, *dict_args;
Larry Hastings3cceb382014-01-04 11:09:09 -08004324 PyObject *contents = _pickle_PicklerMemoProxy_copy_impl(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004325 if (contents == NULL)
4326 return NULL;
4327
4328 reduce_value = PyTuple_New(2);
4329 if (reduce_value == NULL) {
4330 Py_DECREF(contents);
4331 return NULL;
4332 }
4333 dict_args = PyTuple_New(1);
4334 if (dict_args == NULL) {
4335 Py_DECREF(contents);
4336 Py_DECREF(reduce_value);
4337 return NULL;
4338 }
4339 PyTuple_SET_ITEM(dict_args, 0, contents);
4340 Py_INCREF((PyObject *)&PyDict_Type);
4341 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
4342 PyTuple_SET_ITEM(reduce_value, 1, dict_args);
4343 return reduce_value;
4344}
4345
4346static PyMethodDef picklerproxy_methods[] = {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004347 _PICKLE_PICKLERMEMOPROXY_CLEAR_METHODDEF
4348 _PICKLE_PICKLERMEMOPROXY_COPY_METHODDEF
4349 _PICKLE_PICKLERMEMOPROXY___REDUCE___METHODDEF
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004350 {NULL, NULL} /* sentinel */
4351};
4352
4353static void
4354PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
4355{
4356 PyObject_GC_UnTrack(self);
4357 Py_XDECREF(self->pickler);
4358 PyObject_GC_Del((PyObject *)self);
4359}
4360
4361static int
4362PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
4363 visitproc visit, void *arg)
4364{
4365 Py_VISIT(self->pickler);
4366 return 0;
4367}
4368
4369static int
4370PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
4371{
4372 Py_CLEAR(self->pickler);
4373 return 0;
4374}
4375
4376static PyTypeObject PicklerMemoProxyType = {
4377 PyVarObject_HEAD_INIT(NULL, 0)
4378 "_pickle.PicklerMemoProxy", /*tp_name*/
4379 sizeof(PicklerMemoProxyObject), /*tp_basicsize*/
4380 0,
4381 (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */
4382 0, /* tp_print */
4383 0, /* tp_getattr */
4384 0, /* tp_setattr */
4385 0, /* tp_compare */
4386 0, /* tp_repr */
4387 0, /* tp_as_number */
4388 0, /* tp_as_sequence */
4389 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00004390 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004391 0, /* tp_call */
4392 0, /* tp_str */
4393 PyObject_GenericGetAttr, /* tp_getattro */
4394 PyObject_GenericSetAttr, /* tp_setattro */
4395 0, /* tp_as_buffer */
4396 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4397 0, /* tp_doc */
4398 (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */
4399 (inquiry)PicklerMemoProxy_clear, /* tp_clear */
4400 0, /* tp_richcompare */
4401 0, /* tp_weaklistoffset */
4402 0, /* tp_iter */
4403 0, /* tp_iternext */
4404 picklerproxy_methods, /* tp_methods */
4405};
4406
4407static PyObject *
4408PicklerMemoProxy_New(PicklerObject *pickler)
4409{
4410 PicklerMemoProxyObject *self;
4411
4412 self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
4413 if (self == NULL)
4414 return NULL;
4415 Py_INCREF(pickler);
4416 self->pickler = pickler;
4417 PyObject_GC_Track(self);
4418 return (PyObject *)self;
4419}
4420
4421/*****************************************************************************/
4422
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004423static PyObject *
4424Pickler_get_memo(PicklerObject *self)
4425{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004426 return PicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004427}
4428
4429static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004430Pickler_set_memo(PicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004431{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004432 PyMemoTable *new_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004433
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004434 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004435 PyErr_SetString(PyExc_TypeError,
4436 "attribute deletion is not supported");
4437 return -1;
4438 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004439
4440 if (Py_TYPE(obj) == &PicklerMemoProxyType) {
4441 PicklerObject *pickler =
4442 ((PicklerMemoProxyObject *)obj)->pickler;
4443
4444 new_memo = PyMemoTable_Copy(pickler->memo);
4445 if (new_memo == NULL)
4446 return -1;
4447 }
4448 else if (PyDict_Check(obj)) {
4449 Py_ssize_t i = 0;
4450 PyObject *key, *value;
4451
4452 new_memo = PyMemoTable_New();
4453 if (new_memo == NULL)
4454 return -1;
4455
4456 while (PyDict_Next(obj, &i, &key, &value)) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004457 Py_ssize_t memo_id;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004458 PyObject *memo_obj;
4459
4460 if (!PyTuple_Check(value) || Py_SIZE(value) != 2) {
4461 PyErr_SetString(PyExc_TypeError,
4462 "'memo' values must be 2-item tuples");
4463 goto error;
4464 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004465 memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004466 if (memo_id == -1 && PyErr_Occurred())
4467 goto error;
4468 memo_obj = PyTuple_GET_ITEM(value, 1);
4469 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
4470 goto error;
4471 }
4472 }
4473 else {
4474 PyErr_Format(PyExc_TypeError,
Serhiy Storchakab6a9c972016-04-17 09:39:28 +03004475 "'memo' attribute must be a PicklerMemoProxy object"
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004476 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004477 return -1;
4478 }
4479
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004480 PyMemoTable_Del(self->memo);
4481 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004482
4483 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004484
4485 error:
4486 if (new_memo)
4487 PyMemoTable_Del(new_memo);
4488 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004489}
4490
4491static PyObject *
4492Pickler_get_persid(PicklerObject *self)
4493{
4494 if (self->pers_func == NULL)
4495 PyErr_SetString(PyExc_AttributeError, "persistent_id");
4496 else
4497 Py_INCREF(self->pers_func);
4498 return self->pers_func;
4499}
4500
4501static int
4502Pickler_set_persid(PicklerObject *self, PyObject *value)
4503{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004504 if (value == NULL) {
4505 PyErr_SetString(PyExc_TypeError,
4506 "attribute deletion is not supported");
4507 return -1;
4508 }
4509 if (!PyCallable_Check(value)) {
4510 PyErr_SetString(PyExc_TypeError,
4511 "persistent_id must be a callable taking one argument");
4512 return -1;
4513 }
4514
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004515 Py_INCREF(value);
Serhiy Storchakaec397562016-04-06 09:50:03 +03004516 Py_XSETREF(self->pers_func, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004517
4518 return 0;
4519}
4520
4521static PyMemberDef Pickler_members[] = {
4522 {"bin", T_INT, offsetof(PicklerObject, bin)},
4523 {"fast", T_INT, offsetof(PicklerObject, fast)},
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004524 {"dispatch_table", T_OBJECT_EX, offsetof(PicklerObject, dispatch_table)},
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004525 {NULL}
4526};
4527
4528static PyGetSetDef Pickler_getsets[] = {
4529 {"memo", (getter)Pickler_get_memo,
4530 (setter)Pickler_set_memo},
4531 {"persistent_id", (getter)Pickler_get_persid,
4532 (setter)Pickler_set_persid},
4533 {NULL}
4534};
4535
4536static PyTypeObject Pickler_Type = {
4537 PyVarObject_HEAD_INIT(NULL, 0)
4538 "_pickle.Pickler" , /*tp_name*/
4539 sizeof(PicklerObject), /*tp_basicsize*/
4540 0, /*tp_itemsize*/
4541 (destructor)Pickler_dealloc, /*tp_dealloc*/
4542 0, /*tp_print*/
4543 0, /*tp_getattr*/
4544 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00004545 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004546 0, /*tp_repr*/
4547 0, /*tp_as_number*/
4548 0, /*tp_as_sequence*/
4549 0, /*tp_as_mapping*/
4550 0, /*tp_hash*/
4551 0, /*tp_call*/
4552 0, /*tp_str*/
4553 0, /*tp_getattro*/
4554 0, /*tp_setattro*/
4555 0, /*tp_as_buffer*/
4556 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004557 _pickle_Pickler___init____doc__, /*tp_doc*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004558 (traverseproc)Pickler_traverse, /*tp_traverse*/
4559 (inquiry)Pickler_clear, /*tp_clear*/
4560 0, /*tp_richcompare*/
4561 0, /*tp_weaklistoffset*/
4562 0, /*tp_iter*/
4563 0, /*tp_iternext*/
4564 Pickler_methods, /*tp_methods*/
4565 Pickler_members, /*tp_members*/
4566 Pickler_getsets, /*tp_getset*/
4567 0, /*tp_base*/
4568 0, /*tp_dict*/
4569 0, /*tp_descr_get*/
4570 0, /*tp_descr_set*/
4571 0, /*tp_dictoffset*/
Larry Hastingsb7ccb202014-01-18 23:50:21 -08004572 _pickle_Pickler___init__, /*tp_init*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004573 PyType_GenericAlloc, /*tp_alloc*/
4574 PyType_GenericNew, /*tp_new*/
4575 PyObject_GC_Del, /*tp_free*/
4576 0, /*tp_is_gc*/
4577};
4578
Victor Stinner121aab42011-09-29 23:40:53 +02004579/* Temporary helper for calling self.find_class().
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004580
4581 XXX: It would be nice to able to avoid Python function call overhead, by
4582 using directly the C version of find_class(), when find_class() is not
4583 overridden by a subclass. Although, this could become rather hackish. A
4584 simpler optimization would be to call the C function when self is not a
4585 subclass instance. */
4586static PyObject *
4587find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
4588{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02004589 _Py_IDENTIFIER(find_class);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02004590
4591 return _PyObject_CallMethodId((PyObject *)self, &PyId_find_class, "OO",
4592 module_name, global_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004593}
4594
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004595static Py_ssize_t
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004596marker(UnpicklerObject *self)
4597{
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02004598 Py_ssize_t mark;
4599
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004600 if (self->num_marks < 1) {
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02004601 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004602 PyErr_SetString(st->UnpicklingError, "could not find MARK");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004603 return -1;
4604 }
4605
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02004606 mark = self->marks[--self->num_marks];
4607 self->stack->mark_set = self->num_marks != 0;
4608 self->stack->fence = self->num_marks ?
4609 self->marks[self->num_marks - 1] : 0;
4610 return mark;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004611}
4612
4613static int
4614load_none(UnpicklerObject *self)
4615{
4616 PDATA_APPEND(self->stack, Py_None, -1);
4617 return 0;
4618}
4619
4620static int
4621bad_readline(void)
4622{
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004623 PickleState *st = _Pickle_GetGlobalState();
4624 PyErr_SetString(st->UnpicklingError, "pickle data was truncated");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004625 return -1;
4626}
4627
4628static int
4629load_int(UnpicklerObject *self)
4630{
4631 PyObject *value;
4632 char *endptr, *s;
4633 Py_ssize_t len;
4634 long x;
4635
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004636 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004637 return -1;
4638 if (len < 2)
4639 return bad_readline();
4640
4641 errno = 0;
Victor Stinner121aab42011-09-29 23:40:53 +02004642 /* XXX: Should the base argument of strtol() be explicitly set to 10?
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004643 XXX(avassalotti): Should this uses PyOS_strtol()? */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004644 x = strtol(s, &endptr, 0);
4645
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004646 if (errno || (*endptr != '\n' && *endptr != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004647 /* Hm, maybe we've got something long. Let's try reading
Serhiy Storchaka95949422013-08-27 19:40:23 +03004648 * it as a Python int object. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004649 errno = 0;
4650 /* XXX: Same thing about the base here. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004651 value = PyLong_FromString(s, NULL, 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004652 if (value == NULL) {
4653 PyErr_SetString(PyExc_ValueError,
4654 "could not convert string to int");
4655 return -1;
4656 }
4657 }
4658 else {
4659 if (len == 3 && (x == 0 || x == 1)) {
4660 if ((value = PyBool_FromLong(x)) == NULL)
4661 return -1;
4662 }
4663 else {
4664 if ((value = PyLong_FromLong(x)) == NULL)
4665 return -1;
4666 }
4667 }
4668
4669 PDATA_PUSH(self->stack, value, -1);
4670 return 0;
4671}
4672
4673static int
4674load_bool(UnpicklerObject *self, PyObject *boolean)
4675{
4676 assert(boolean == Py_True || boolean == Py_False);
4677 PDATA_APPEND(self->stack, boolean, -1);
4678 return 0;
4679}
4680
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004681/* s contains x bytes of an unsigned little-endian integer. Return its value
4682 * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
4683 */
4684static Py_ssize_t
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004685calc_binsize(char *bytes, int nbytes)
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004686{
4687 unsigned char *s = (unsigned char *)bytes;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004688 int i;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004689 size_t x = 0;
4690
Serhiy Storchakae0606192015-09-29 22:10:07 +03004691 if (nbytes > (int)sizeof(size_t)) {
4692 /* Check for integer overflow. BINBYTES8 and BINUNICODE8 opcodes
4693 * have 64-bit size that can't be represented on 32-bit platform.
4694 */
4695 for (i = (int)sizeof(size_t); i < nbytes; i++) {
4696 if (s[i])
4697 return -1;
4698 }
4699 nbytes = (int)sizeof(size_t);
4700 }
4701 for (i = 0; i < nbytes; i++) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004702 x |= (size_t) s[i] << (8 * i);
4703 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004704
4705 if (x > PY_SSIZE_T_MAX)
4706 return -1;
4707 else
4708 return (Py_ssize_t) x;
4709}
4710
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004711/* s contains x bytes of a little-endian integer. Return its value as a
4712 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +03004713 * int, but when x is 4 it's a signed one. This is a historical source
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004714 * of x-platform bugs.
4715 */
4716static long
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004717calc_binint(char *bytes, int nbytes)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004718{
4719 unsigned char *s = (unsigned char *)bytes;
Victor Stinnerf13c46c2014-08-17 21:05:55 +02004720 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004721 long x = 0;
4722
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004723 for (i = 0; i < nbytes; i++) {
4724 x |= (long)s[i] << (8 * i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004725 }
4726
4727 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
4728 * is signed, so on a box with longs bigger than 4 bytes we need
4729 * to extend a BININT's sign bit to the full width.
4730 */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004731 if (SIZEOF_LONG > 4 && nbytes == 4) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004732 x |= -(x & (1L << 31));
4733 }
4734
4735 return x;
4736}
4737
4738static int
4739load_binintx(UnpicklerObject *self, char *s, int size)
4740{
4741 PyObject *value;
4742 long x;
4743
4744 x = calc_binint(s, size);
4745
4746 if ((value = PyLong_FromLong(x)) == NULL)
4747 return -1;
4748
4749 PDATA_PUSH(self->stack, value, -1);
4750 return 0;
4751}
4752
4753static int
4754load_binint(UnpicklerObject *self)
4755{
4756 char *s;
4757
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004758 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004759 return -1;
4760
4761 return load_binintx(self, s, 4);
4762}
4763
4764static int
4765load_binint1(UnpicklerObject *self)
4766{
4767 char *s;
4768
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004769 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004770 return -1;
4771
4772 return load_binintx(self, s, 1);
4773}
4774
4775static int
4776load_binint2(UnpicklerObject *self)
4777{
4778 char *s;
4779
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004780 if (_Unpickler_Read(self, &s, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004781 return -1;
4782
4783 return load_binintx(self, s, 2);
4784}
4785
4786static int
4787load_long(UnpicklerObject *self)
4788{
4789 PyObject *value;
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00004790 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004791 Py_ssize_t len;
4792
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004793 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004794 return -1;
4795 if (len < 2)
4796 return bad_readline();
4797
Mark Dickinson8dd05142009-01-20 20:43:58 +00004798 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
4799 the 'L' before calling PyLong_FromString. In order to maintain
4800 compatibility with Python 3.0.0, we don't actually *require*
4801 the 'L' to be present. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004802 if (s[len-2] == 'L')
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00004803 s[len-2] = '\0';
Alexandre Vassalottie4bccb72009-01-24 01:47:57 +00004804 /* XXX: Should the base argument explicitly set to 10? */
4805 value = PyLong_FromString(s, NULL, 0);
Mark Dickinson8dd05142009-01-20 20:43:58 +00004806 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004807 return -1;
4808
4809 PDATA_PUSH(self->stack, value, -1);
4810 return 0;
4811}
4812
4813/* 'size' bytes contain the # of bytes of little-endian 256's-complement
4814 * data following.
4815 */
4816static int
4817load_counted_long(UnpicklerObject *self, int size)
4818{
4819 PyObject *value;
4820 char *nbytes;
4821 char *pdata;
4822
4823 assert(size == 1 || size == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004824 if (_Unpickler_Read(self, &nbytes, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004825 return -1;
4826
4827 size = calc_binint(nbytes, size);
4828 if (size < 0) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004829 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004830 /* Corrupt or hostile pickle -- we never write one like this */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004831 PyErr_SetString(st->UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004832 "LONG pickle has negative byte count");
4833 return -1;
4834 }
4835
4836 if (size == 0)
4837 value = PyLong_FromLong(0L);
4838 else {
4839 /* Read the raw little-endian bytes and convert. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004840 if (_Unpickler_Read(self, &pdata, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004841 return -1;
4842 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
4843 1 /* little endian */ , 1 /* signed */ );
4844 }
4845 if (value == NULL)
4846 return -1;
4847 PDATA_PUSH(self->stack, value, -1);
4848 return 0;
4849}
4850
4851static int
4852load_float(UnpicklerObject *self)
4853{
4854 PyObject *value;
4855 char *endptr, *s;
4856 Py_ssize_t len;
4857 double d;
4858
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004859 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004860 return -1;
4861 if (len < 2)
4862 return bad_readline();
4863
4864 errno = 0;
Mark Dickinson725bfd82009-05-03 20:33:40 +00004865 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
4866 if (d == -1.0 && PyErr_Occurred())
4867 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004868 if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004869 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
4870 return -1;
4871 }
Mark Dickinson725bfd82009-05-03 20:33:40 +00004872 value = PyFloat_FromDouble(d);
4873 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004874 return -1;
4875
4876 PDATA_PUSH(self->stack, value, -1);
4877 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004878}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004879
4880static int
4881load_binfloat(UnpicklerObject *self)
4882{
4883 PyObject *value;
4884 double x;
4885 char *s;
4886
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004887 if (_Unpickler_Read(self, &s, 8) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004888 return -1;
4889
4890 x = _PyFloat_Unpack8((unsigned char *)s, 0);
4891 if (x == -1.0 && PyErr_Occurred())
4892 return -1;
4893
4894 if ((value = PyFloat_FromDouble(x)) == NULL)
4895 return -1;
4896
4897 PDATA_PUSH(self->stack, value, -1);
4898 return 0;
4899}
4900
4901static int
4902load_string(UnpicklerObject *self)
4903{
4904 PyObject *bytes;
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08004905 PyObject *obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004906 Py_ssize_t len;
4907 char *s, *p;
4908
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004909 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004910 return -1;
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07004911 /* Strip the newline */
4912 len--;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004913 /* Strip outermost quotes */
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07004914 if (len >= 2 && s[0] == s[len - 1] && (s[0] == '\'' || s[0] == '"')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004915 p = s + 1;
4916 len -= 2;
4917 }
4918 else {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08004919 PickleState *st = _Pickle_GetGlobalState();
4920 PyErr_SetString(st->UnpicklingError,
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07004921 "the STRING opcode argument must be quoted");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004922 return -1;
4923 }
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07004924 assert(len >= 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004925
4926 /* Use the PyBytes API to decode the string, since that is what is used
4927 to encode, and then coerce the result to Unicode. */
4928 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004929 if (bytes == NULL)
4930 return -1;
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08004931
4932 /* Leave the Python 2.x strings as bytes if the *encoding* given to the
4933 Unpickler was 'bytes'. Otherwise, convert them to unicode. */
4934 if (strcmp(self->encoding, "bytes") == 0) {
4935 obj = bytes;
4936 }
4937 else {
4938 obj = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
4939 Py_DECREF(bytes);
4940 if (obj == NULL) {
4941 return -1;
4942 }
4943 }
4944
4945 PDATA_PUSH(self->stack, obj, -1);
4946 return 0;
4947}
4948
4949static int
4950load_counted_binstring(UnpicklerObject *self, int nbytes)
4951{
4952 PyObject *obj;
4953 Py_ssize_t size;
4954 char *s;
4955
4956 if (_Unpickler_Read(self, &s, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004957 return -1;
4958
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08004959 size = calc_binsize(s, nbytes);
4960 if (size < 0) {
4961 PickleState *st = _Pickle_GetGlobalState();
4962 PyErr_Format(st->UnpicklingError,
4963 "BINSTRING exceeds system's maximum size of %zd bytes",
4964 PY_SSIZE_T_MAX);
4965 return -1;
4966 }
4967
4968 if (_Unpickler_Read(self, &s, size) < 0)
4969 return -1;
4970
4971 /* Convert Python 2.x strings to bytes if the *encoding* given to the
4972 Unpickler was 'bytes'. Otherwise, convert them to unicode. */
4973 if (strcmp(self->encoding, "bytes") == 0) {
4974 obj = PyBytes_FromStringAndSize(s, size);
4975 }
4976 else {
4977 obj = PyUnicode_Decode(s, size, self->encoding, self->errors);
4978 }
4979 if (obj == NULL) {
4980 return -1;
4981 }
4982
4983 PDATA_PUSH(self->stack, obj, -1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004984 return 0;
4985}
4986
4987static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004988load_counted_binbytes(UnpicklerObject *self, int nbytes)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004989{
4990 PyObject *bytes;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004991 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004992 char *s;
4993
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004994 if (_Unpickler_Read(self, &s, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004995 return -1;
4996
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004997 size = calc_binsize(s, nbytes);
4998 if (size < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004999 PyErr_Format(PyExc_OverflowError,
5000 "BINBYTES exceeds system's maximum size of %zd bytes",
Alexandre Vassalotticc757172013-04-14 02:25:10 -07005001 PY_SSIZE_T_MAX);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005002 return -1;
5003 }
5004
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005005 if (_Unpickler_Read(self, &s, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005006 return -1;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005007
5008 bytes = PyBytes_FromStringAndSize(s, size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005009 if (bytes == NULL)
5010 return -1;
5011
5012 PDATA_PUSH(self->stack, bytes, -1);
5013 return 0;
5014}
5015
5016static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005017load_unicode(UnpicklerObject *self)
5018{
5019 PyObject *str;
5020 Py_ssize_t len;
5021 char *s;
5022
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005023 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005024 return -1;
5025 if (len < 1)
5026 return bad_readline();
5027
5028 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
5029 if (str == NULL)
5030 return -1;
5031
5032 PDATA_PUSH(self->stack, str, -1);
5033 return 0;
5034}
5035
5036static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005037load_counted_binunicode(UnpicklerObject *self, int nbytes)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005038{
5039 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005040 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005041 char *s;
5042
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005043 if (_Unpickler_Read(self, &s, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005044 return -1;
5045
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005046 size = calc_binsize(s, nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005047 if (size < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005048 PyErr_Format(PyExc_OverflowError,
5049 "BINUNICODE exceeds system's maximum size of %zd bytes",
Alexandre Vassalotticc757172013-04-14 02:25:10 -07005050 PY_SSIZE_T_MAX);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005051 return -1;
5052 }
5053
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005054 if (_Unpickler_Read(self, &s, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005055 return -1;
5056
Victor Stinner485fb562010-04-13 11:07:24 +00005057 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005058 if (str == NULL)
5059 return -1;
5060
5061 PDATA_PUSH(self->stack, str, -1);
5062 return 0;
5063}
5064
5065static int
Victor Stinner21b47112016-03-14 18:09:39 +01005066load_counted_tuple(UnpicklerObject *self, Py_ssize_t len)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005067{
5068 PyObject *tuple;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005069
Serhiy Storchakaa49de6b2015-11-25 15:01:53 +02005070 if (Py_SIZE(self->stack) < len)
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005071 return Pdata_stack_underflow(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005072
Serhiy Storchakaa49de6b2015-11-25 15:01:53 +02005073 tuple = Pdata_poptuple(self->stack, Py_SIZE(self->stack) - len);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005074 if (tuple == NULL)
5075 return -1;
5076 PDATA_PUSH(self->stack, tuple, -1);
5077 return 0;
5078}
5079
5080static int
Serhiy Storchakaa49de6b2015-11-25 15:01:53 +02005081load_tuple(UnpicklerObject *self)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005082{
Serhiy Storchakaa49de6b2015-11-25 15:01:53 +02005083 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005084
Serhiy Storchakaa49de6b2015-11-25 15:01:53 +02005085 if ((i = marker(self)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005086 return -1;
5087
Serhiy Storchakaa49de6b2015-11-25 15:01:53 +02005088 return load_counted_tuple(self, Py_SIZE(self->stack) - i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005089}
5090
5091static int
5092load_empty_list(UnpicklerObject *self)
5093{
5094 PyObject *list;
5095
5096 if ((list = PyList_New(0)) == NULL)
5097 return -1;
5098 PDATA_PUSH(self->stack, list, -1);
5099 return 0;
5100}
5101
5102static int
5103load_empty_dict(UnpicklerObject *self)
5104{
5105 PyObject *dict;
5106
5107 if ((dict = PyDict_New()) == NULL)
5108 return -1;
5109 PDATA_PUSH(self->stack, dict, -1);
5110 return 0;
5111}
5112
5113static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005114load_empty_set(UnpicklerObject *self)
5115{
5116 PyObject *set;
5117
5118 if ((set = PySet_New(NULL)) == NULL)
5119 return -1;
5120 PDATA_PUSH(self->stack, set, -1);
5121 return 0;
5122}
5123
5124static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005125load_list(UnpicklerObject *self)
5126{
5127 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005128 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005129
5130 if ((i = marker(self)) < 0)
5131 return -1;
5132
5133 list = Pdata_poplist(self->stack, i);
5134 if (list == NULL)
5135 return -1;
5136 PDATA_PUSH(self->stack, list, -1);
5137 return 0;
5138}
5139
5140static int
5141load_dict(UnpicklerObject *self)
5142{
5143 PyObject *dict, *key, *value;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005144 Py_ssize_t i, j, k;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005145
5146 if ((i = marker(self)) < 0)
5147 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005148 j = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005149
5150 if ((dict = PyDict_New()) == NULL)
5151 return -1;
5152
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005153 if ((j - i) % 2 != 0) {
5154 PickleState *st = _Pickle_GetGlobalState();
5155 PyErr_SetString(st->UnpicklingError, "odd number of items for DICT");
Serhiy Storchaka3ac53802015-12-07 11:32:00 +02005156 Py_DECREF(dict);
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005157 return -1;
5158 }
5159
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005160 for (k = i + 1; k < j; k += 2) {
5161 key = self->stack->data[k - 1];
5162 value = self->stack->data[k];
5163 if (PyDict_SetItem(dict, key, value) < 0) {
5164 Py_DECREF(dict);
5165 return -1;
5166 }
5167 }
5168 Pdata_clear(self->stack, i);
5169 PDATA_PUSH(self->stack, dict, -1);
5170 return 0;
5171}
5172
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005173static int
5174load_frozenset(UnpicklerObject *self)
5175{
5176 PyObject *items;
5177 PyObject *frozenset;
5178 Py_ssize_t i;
5179
5180 if ((i = marker(self)) < 0)
5181 return -1;
5182
5183 items = Pdata_poptuple(self->stack, i);
5184 if (items == NULL)
5185 return -1;
5186
5187 frozenset = PyFrozenSet_New(items);
5188 Py_DECREF(items);
5189 if (frozenset == NULL)
5190 return -1;
5191
5192 PDATA_PUSH(self->stack, frozenset, -1);
5193 return 0;
5194}
5195
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005196static PyObject *
5197instantiate(PyObject *cls, PyObject *args)
5198{
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005199 PyObject *result = NULL;
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02005200 _Py_IDENTIFIER(__getinitargs__);
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005201 /* Caller must assure args are a tuple. Normally, args come from
5202 Pdata_poptuple which packs objects from the top of the stack
5203 into a newly created tuple. */
5204 assert(PyTuple_Check(args));
5205 if (Py_SIZE(args) > 0 || !PyType_Check(cls) ||
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02005206 _PyObject_HasAttrId(cls, &PyId___getinitargs__)) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005207 result = PyObject_CallObject(cls, args);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005208 }
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005209 else {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005210 _Py_IDENTIFIER(__new__);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02005211
5212 result = _PyObject_CallMethodId(cls, &PyId___new__, "O", cls);
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005213 }
5214 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005215}
5216
5217static int
5218load_obj(UnpicklerObject *self)
5219{
5220 PyObject *cls, *args, *obj = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005221 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005222
5223 if ((i = marker(self)) < 0)
5224 return -1;
5225
Serhiy Storchakae9b30742015-11-23 15:17:43 +02005226 if (Py_SIZE(self->stack) - i < 1)
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005227 return Pdata_stack_underflow(self->stack);
Serhiy Storchakae9b30742015-11-23 15:17:43 +02005228
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005229 args = Pdata_poptuple(self->stack, i + 1);
5230 if (args == NULL)
5231 return -1;
5232
5233 PDATA_POP(self->stack, cls);
5234 if (cls) {
5235 obj = instantiate(cls, args);
5236 Py_DECREF(cls);
5237 }
5238 Py_DECREF(args);
5239 if (obj == NULL)
5240 return -1;
5241
5242 PDATA_PUSH(self->stack, obj, -1);
5243 return 0;
5244}
5245
5246static int
5247load_inst(UnpicklerObject *self)
5248{
5249 PyObject *cls = NULL;
5250 PyObject *args = NULL;
5251 PyObject *obj = NULL;
5252 PyObject *module_name;
5253 PyObject *class_name;
5254 Py_ssize_t len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005255 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005256 char *s;
5257
5258 if ((i = marker(self)) < 0)
5259 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005260 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005261 return -1;
5262 if (len < 2)
5263 return bad_readline();
5264
5265 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
5266 identifiers are permitted in Python 3.0, since the INST opcode is only
5267 supported by older protocols on Python 2.x. */
5268 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5269 if (module_name == NULL)
5270 return -1;
5271
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005272 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Serhiy Storchakaca28eba2015-12-01 00:18:23 +02005273 if (len < 2) {
5274 Py_DECREF(module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005275 return bad_readline();
Serhiy Storchakaca28eba2015-12-01 00:18:23 +02005276 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005277 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005278 if (class_name != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005279 cls = find_class(self, module_name, class_name);
5280 Py_DECREF(class_name);
5281 }
5282 }
5283 Py_DECREF(module_name);
5284
5285 if (cls == NULL)
5286 return -1;
5287
5288 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
5289 obj = instantiate(cls, args);
5290 Py_DECREF(args);
5291 }
5292 Py_DECREF(cls);
5293
5294 if (obj == NULL)
5295 return -1;
5296
5297 PDATA_PUSH(self->stack, obj, -1);
5298 return 0;
5299}
5300
5301static int
5302load_newobj(UnpicklerObject *self)
5303{
5304 PyObject *args = NULL;
5305 PyObject *clsraw = NULL;
5306 PyTypeObject *cls; /* clsraw cast to its true type */
5307 PyObject *obj;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005308 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005309
5310 /* Stack is ... cls argtuple, and we want to call
5311 * cls.__new__(cls, *argtuple).
5312 */
5313 PDATA_POP(self->stack, args);
5314 if (args == NULL)
5315 goto error;
5316 if (!PyTuple_Check(args)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005317 PyErr_SetString(st->UnpicklingError,
5318 "NEWOBJ expected an arg " "tuple.");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005319 goto error;
5320 }
5321
5322 PDATA_POP(self->stack, clsraw);
5323 cls = (PyTypeObject *)clsraw;
5324 if (cls == NULL)
5325 goto error;
5326 if (!PyType_Check(cls)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005327 PyErr_SetString(st->UnpicklingError, "NEWOBJ class argument "
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005328 "isn't a type object");
5329 goto error;
5330 }
5331 if (cls->tp_new == NULL) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005332 PyErr_SetString(st->UnpicklingError, "NEWOBJ class argument "
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005333 "has NULL tp_new");
5334 goto error;
5335 }
5336
5337 /* Call __new__. */
5338 obj = cls->tp_new(cls, args, NULL);
5339 if (obj == NULL)
5340 goto error;
5341
5342 Py_DECREF(args);
5343 Py_DECREF(clsraw);
5344 PDATA_PUSH(self->stack, obj, -1);
5345 return 0;
5346
5347 error:
5348 Py_XDECREF(args);
5349 Py_XDECREF(clsraw);
5350 return -1;
5351}
5352
5353static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005354load_newobj_ex(UnpicklerObject *self)
5355{
5356 PyObject *cls, *args, *kwargs;
5357 PyObject *obj;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005358 PickleState *st = _Pickle_GetGlobalState();
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005359
5360 PDATA_POP(self->stack, kwargs);
5361 if (kwargs == NULL) {
5362 return -1;
5363 }
5364 PDATA_POP(self->stack, args);
5365 if (args == NULL) {
5366 Py_DECREF(kwargs);
5367 return -1;
5368 }
5369 PDATA_POP(self->stack, cls);
5370 if (cls == NULL) {
5371 Py_DECREF(kwargs);
5372 Py_DECREF(args);
5373 return -1;
5374 }
Larry Hastings61272b72014-01-07 12:41:53 -08005375
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005376 if (!PyType_Check(cls)) {
5377 Py_DECREF(kwargs);
5378 Py_DECREF(args);
Larry Hastings61272b72014-01-07 12:41:53 -08005379 PyErr_Format(st->UnpicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005380 "NEWOBJ_EX class argument must be a type, not %.200s",
5381 Py_TYPE(cls)->tp_name);
Benjamin Peterson80f78a32015-07-02 16:18:38 -05005382 Py_DECREF(cls);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005383 return -1;
5384 }
5385
5386 if (((PyTypeObject *)cls)->tp_new == NULL) {
5387 Py_DECREF(kwargs);
5388 Py_DECREF(args);
5389 Py_DECREF(cls);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005390 PyErr_SetString(st->UnpicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005391 "NEWOBJ_EX class argument doesn't have __new__");
5392 return -1;
5393 }
5394 obj = ((PyTypeObject *)cls)->tp_new((PyTypeObject *)cls, args, kwargs);
5395 Py_DECREF(kwargs);
5396 Py_DECREF(args);
5397 Py_DECREF(cls);
5398 if (obj == NULL) {
5399 return -1;
5400 }
5401 PDATA_PUSH(self->stack, obj, -1);
5402 return 0;
5403}
5404
5405static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005406load_global(UnpicklerObject *self)
5407{
5408 PyObject *global = NULL;
5409 PyObject *module_name;
5410 PyObject *global_name;
5411 Py_ssize_t len;
5412 char *s;
5413
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005414 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005415 return -1;
5416 if (len < 2)
5417 return bad_readline();
5418 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
5419 if (!module_name)
5420 return -1;
5421
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005422 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005423 if (len < 2) {
5424 Py_DECREF(module_name);
5425 return bad_readline();
5426 }
5427 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
5428 if (global_name) {
5429 global = find_class(self, module_name, global_name);
5430 Py_DECREF(global_name);
5431 }
5432 }
5433 Py_DECREF(module_name);
5434
5435 if (global == NULL)
5436 return -1;
5437 PDATA_PUSH(self->stack, global, -1);
5438 return 0;
5439}
5440
5441static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005442load_stack_global(UnpicklerObject *self)
5443{
5444 PyObject *global;
5445 PyObject *module_name;
5446 PyObject *global_name;
5447
5448 PDATA_POP(self->stack, global_name);
5449 PDATA_POP(self->stack, module_name);
5450 if (module_name == NULL || !PyUnicode_CheckExact(module_name) ||
5451 global_name == NULL || !PyUnicode_CheckExact(global_name)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005452 PickleState *st = _Pickle_GetGlobalState();
5453 PyErr_SetString(st->UnpicklingError, "STACK_GLOBAL requires str");
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005454 Py_XDECREF(global_name);
5455 Py_XDECREF(module_name);
5456 return -1;
5457 }
5458 global = find_class(self, module_name, global_name);
5459 Py_DECREF(global_name);
5460 Py_DECREF(module_name);
5461 if (global == NULL)
5462 return -1;
5463 PDATA_PUSH(self->stack, global, -1);
5464 return 0;
5465}
5466
5467static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005468load_persid(UnpicklerObject *self)
5469{
5470 PyObject *pid;
5471 Py_ssize_t len;
5472 char *s;
5473
5474 if (self->pers_func) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005475 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005476 return -1;
Alexandre Vassalotti896414f2013-11-30 13:52:35 -08005477 if (len < 1)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005478 return bad_readline();
5479
5480 pid = PyBytes_FromStringAndSize(s, len - 1);
5481 if (pid == NULL)
5482 return -1;
5483
Alexandre Vassalotti20c28c12013-11-27 02:26:54 -08005484 /* This does not leak since _Pickle_FastCall() steals the reference
5485 to pid first. */
5486 pid = _Pickle_FastCall(self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005487 if (pid == NULL)
5488 return -1;
5489
5490 PDATA_PUSH(self->stack, pid, -1);
5491 return 0;
5492 }
5493 else {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005494 PickleState *st = _Pickle_GetGlobalState();
5495 PyErr_SetString(st->UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005496 "A load persistent id instruction was encountered,\n"
5497 "but no persistent_load function was specified.");
5498 return -1;
5499 }
5500}
5501
5502static int
5503load_binpersid(UnpicklerObject *self)
5504{
5505 PyObject *pid;
5506
5507 if (self->pers_func) {
5508 PDATA_POP(self->stack, pid);
5509 if (pid == NULL)
5510 return -1;
5511
Alexandre Vassalotti20c28c12013-11-27 02:26:54 -08005512 /* This does not leak since _Pickle_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005513 reference to pid first. */
Alexandre Vassalotti20c28c12013-11-27 02:26:54 -08005514 pid = _Pickle_FastCall(self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005515 if (pid == NULL)
5516 return -1;
5517
5518 PDATA_PUSH(self->stack, pid, -1);
5519 return 0;
5520 }
5521 else {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005522 PickleState *st = _Pickle_GetGlobalState();
5523 PyErr_SetString(st->UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005524 "A load persistent id instruction was encountered,\n"
5525 "but no persistent_load function was specified.");
5526 return -1;
5527 }
5528}
5529
5530static int
5531load_pop(UnpicklerObject *self)
5532{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005533 Py_ssize_t len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005534
5535 /* Note that we split the (pickle.py) stack into two stacks,
5536 * an object stack and a mark stack. We have to be clever and
5537 * pop the right one. We do this by looking at the top of the
Collin Winter8ca69de2009-05-26 16:53:41 +00005538 * mark stack first, and only signalling a stack underflow if
5539 * the object stack is empty and the mark stack doesn't match
5540 * our expectations.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005541 */
Collin Winter8ca69de2009-05-26 16:53:41 +00005542 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005543 self->num_marks--;
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005544 self->stack->mark_set = self->num_marks != 0;
5545 self->stack->fence = self->num_marks ?
5546 self->marks[self->num_marks - 1] : 0;
5547 } else if (len <= self->stack->fence)
5548 return Pdata_stack_underflow(self->stack);
5549 else {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005550 len--;
5551 Py_DECREF(self->stack->data[len]);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005552 Py_SIZE(self->stack) = len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005553 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005554 return 0;
5555}
5556
5557static int
5558load_pop_mark(UnpicklerObject *self)
5559{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005560 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005561
5562 if ((i = marker(self)) < 0)
5563 return -1;
5564
5565 Pdata_clear(self->stack, i);
5566
5567 return 0;
5568}
5569
5570static int
5571load_dup(UnpicklerObject *self)
5572{
5573 PyObject *last;
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005574 Py_ssize_t len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005575
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005576 if (len <= self->stack->fence)
5577 return Pdata_stack_underflow(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005578 last = self->stack->data[len - 1];
5579 PDATA_APPEND(self->stack, last, -1);
5580 return 0;
5581}
5582
5583static int
5584load_get(UnpicklerObject *self)
5585{
5586 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005587 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005588 Py_ssize_t len;
5589 char *s;
5590
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005591 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005592 return -1;
5593 if (len < 2)
5594 return bad_readline();
5595
5596 key = PyLong_FromString(s, NULL, 10);
5597 if (key == NULL)
5598 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005599 idx = PyLong_AsSsize_t(key);
5600 if (idx == -1 && PyErr_Occurred()) {
5601 Py_DECREF(key);
5602 return -1;
5603 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005604
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005605 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005606 if (value == NULL) {
5607 if (!PyErr_Occurred())
5608 PyErr_SetObject(PyExc_KeyError, key);
5609 Py_DECREF(key);
5610 return -1;
5611 }
5612 Py_DECREF(key);
5613
5614 PDATA_APPEND(self->stack, value, -1);
5615 return 0;
5616}
5617
5618static int
5619load_binget(UnpicklerObject *self)
5620{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005621 PyObject *value;
5622 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005623 char *s;
5624
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005625 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005626 return -1;
5627
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005628 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005629
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005630 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005631 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005632 PyObject *key = PyLong_FromSsize_t(idx);
Christian Heimes9ee5c372013-07-26 22:45:00 +02005633 if (key != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005634 PyErr_SetObject(PyExc_KeyError, key);
Christian Heimes9ee5c372013-07-26 22:45:00 +02005635 Py_DECREF(key);
5636 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005637 return -1;
5638 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005639
5640 PDATA_APPEND(self->stack, value, -1);
5641 return 0;
5642}
5643
5644static int
5645load_long_binget(UnpicklerObject *self)
5646{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005647 PyObject *value;
5648 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005649 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005650
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005651 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005652 return -1;
5653
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005654 idx = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005655
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005656 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005657 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005658 PyObject *key = PyLong_FromSsize_t(idx);
Christian Heimes9ee5c372013-07-26 22:45:00 +02005659 if (key != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005660 PyErr_SetObject(PyExc_KeyError, key);
Christian Heimes9ee5c372013-07-26 22:45:00 +02005661 Py_DECREF(key);
5662 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005663 return -1;
5664 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005665
5666 PDATA_APPEND(self->stack, value, -1);
5667 return 0;
5668}
5669
5670/* Push an object from the extension registry (EXT[124]). nbytes is
5671 * the number of bytes following the opcode, holding the index (code) value.
5672 */
5673static int
5674load_extension(UnpicklerObject *self, int nbytes)
5675{
5676 char *codebytes; /* the nbytes bytes after the opcode */
5677 long code; /* calc_binint returns long */
5678 PyObject *py_code; /* code as a Python int */
5679 PyObject *obj; /* the object to push */
5680 PyObject *pair; /* (module_name, class_name) */
5681 PyObject *module_name, *class_name;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005682 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005683
5684 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005685 if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005686 return -1;
5687 code = calc_binint(codebytes, nbytes);
5688 if (code <= 0) { /* note that 0 is forbidden */
5689 /* Corrupt or hostile pickle. */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005690 PyErr_SetString(st->UnpicklingError, "EXT specifies code <= 0");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005691 return -1;
5692 }
5693
5694 /* Look for the code in the cache. */
5695 py_code = PyLong_FromLong(code);
5696 if (py_code == NULL)
5697 return -1;
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08005698 obj = PyDict_GetItemWithError(st->extension_cache, py_code);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005699 if (obj != NULL) {
5700 /* Bingo. */
5701 Py_DECREF(py_code);
5702 PDATA_APPEND(self->stack, obj, -1);
5703 return 0;
5704 }
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08005705 if (PyErr_Occurred()) {
5706 Py_DECREF(py_code);
5707 return -1;
5708 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005709
5710 /* Look up the (module_name, class_name) pair. */
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08005711 pair = PyDict_GetItemWithError(st->inverted_registry, py_code);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005712 if (pair == NULL) {
5713 Py_DECREF(py_code);
Alexandre Vassalotti567eba12013-11-28 17:09:16 -08005714 if (!PyErr_Occurred()) {
5715 PyErr_Format(PyExc_ValueError, "unregistered extension "
5716 "code %ld", code);
5717 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005718 return -1;
5719 }
5720 /* Since the extension registry is manipulable via Python code,
5721 * confirm that pair is really a 2-tuple of strings.
5722 */
5723 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
5724 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
5725 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
5726 Py_DECREF(py_code);
5727 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
5728 "isn't a 2-tuple of strings", code);
5729 return -1;
5730 }
5731 /* Load the object. */
5732 obj = find_class(self, module_name, class_name);
5733 if (obj == NULL) {
5734 Py_DECREF(py_code);
5735 return -1;
5736 }
5737 /* Cache code -> obj. */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005738 code = PyDict_SetItem(st->extension_cache, py_code, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005739 Py_DECREF(py_code);
5740 if (code < 0) {
5741 Py_DECREF(obj);
5742 return -1;
5743 }
5744 PDATA_PUSH(self->stack, obj, -1);
5745 return 0;
5746}
5747
5748static int
5749load_put(UnpicklerObject *self)
5750{
5751 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005752 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005753 Py_ssize_t len;
5754 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005755
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005756 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005757 return -1;
5758 if (len < 2)
5759 return bad_readline();
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005760 if (Py_SIZE(self->stack) <= self->stack->fence)
5761 return Pdata_stack_underflow(self->stack);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005762 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005763
5764 key = PyLong_FromString(s, NULL, 10);
5765 if (key == NULL)
5766 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005767 idx = PyLong_AsSsize_t(key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005768 Py_DECREF(key);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02005769 if (idx < 0) {
5770 if (!PyErr_Occurred())
5771 PyErr_SetString(PyExc_ValueError,
5772 "negative PUT argument");
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005773 return -1;
Antoine Pitrou55549ec2011-08-30 00:27:10 +02005774 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005775
5776 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005777}
5778
5779static int
5780load_binput(UnpicklerObject *self)
5781{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005782 PyObject *value;
5783 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005784 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005785
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005786 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005787 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005788
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005789 if (Py_SIZE(self->stack) <= self->stack->fence)
5790 return Pdata_stack_underflow(self->stack);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005791 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005792
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005793 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005794
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005795 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005796}
5797
5798static int
5799load_long_binput(UnpicklerObject *self)
5800{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005801 PyObject *value;
5802 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005803 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005804
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005805 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005806 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005807
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005808 if (Py_SIZE(self->stack) <= self->stack->fence)
5809 return Pdata_stack_underflow(self->stack);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005810 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005811
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005812 idx = calc_binsize(s, 4);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02005813 if (idx < 0) {
5814 PyErr_SetString(PyExc_ValueError,
5815 "negative LONG_BINPUT argument");
5816 return -1;
5817 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005818
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005819 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005820}
5821
5822static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005823load_memoize(UnpicklerObject *self)
5824{
5825 PyObject *value;
5826
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005827 if (Py_SIZE(self->stack) <= self->stack->fence)
5828 return Pdata_stack_underflow(self->stack);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005829 value = self->stack->data[Py_SIZE(self->stack) - 1];
5830
5831 return _Unpickler_MemoPut(self, self->memo_len, value);
5832}
5833
5834static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005835do_append(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005836{
5837 PyObject *value;
5838 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005839 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005840
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005841 len = Py_SIZE(self->stack);
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005842 if (x > len || x <= self->stack->fence)
5843 return Pdata_stack_underflow(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005844 if (len == x) /* nothing to do */
5845 return 0;
5846
5847 list = self->stack->data[x - 1];
5848
5849 if (PyList_Check(list)) {
5850 PyObject *slice;
5851 Py_ssize_t list_len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005852 int ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005853
5854 slice = Pdata_poplist(self->stack, x);
5855 if (!slice)
5856 return -1;
5857 list_len = PyList_GET_SIZE(list);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005858 ret = PyList_SetSlice(list, list_len, list_len, slice);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005859 Py_DECREF(slice);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005860 return ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005861 }
5862 else {
5863 PyObject *append_func;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005864 _Py_IDENTIFIER(append);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005865
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005866 append_func = _PyObject_GetAttrId(list, &PyId_append);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005867 if (append_func == NULL)
5868 return -1;
5869 for (i = x; i < len; i++) {
5870 PyObject *result;
5871
5872 value = self->stack->data[i];
Alexandre Vassalotti20c28c12013-11-27 02:26:54 -08005873 result = _Pickle_FastCall(append_func, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005874 if (result == NULL) {
5875 Pdata_clear(self->stack, i + 1);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005876 Py_SIZE(self->stack) = x;
Alexandre Vassalotti637c7c42013-04-20 21:28:21 -07005877 Py_DECREF(append_func);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005878 return -1;
5879 }
5880 Py_DECREF(result);
5881 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005882 Py_SIZE(self->stack) = x;
Alexandre Vassalotti637c7c42013-04-20 21:28:21 -07005883 Py_DECREF(append_func);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005884 }
5885
5886 return 0;
5887}
5888
5889static int
5890load_append(UnpicklerObject *self)
5891{
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005892 if (Py_SIZE(self->stack) - 1 <= self->stack->fence)
5893 return Pdata_stack_underflow(self->stack);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005894 return do_append(self, Py_SIZE(self->stack) - 1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005895}
5896
5897static int
5898load_appends(UnpicklerObject *self)
5899{
Serhiy Storchakae9b30742015-11-23 15:17:43 +02005900 Py_ssize_t i = marker(self);
5901 if (i < 0)
5902 return -1;
5903 return do_append(self, i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005904}
5905
5906static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005907do_setitems(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005908{
5909 PyObject *value, *key;
5910 PyObject *dict;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005911 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005912 int status = 0;
5913
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005914 len = Py_SIZE(self->stack);
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005915 if (x > len || x <= self->stack->fence)
5916 return Pdata_stack_underflow(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005917 if (len == x) /* nothing to do */
5918 return 0;
Victor Stinner121aab42011-09-29 23:40:53 +02005919 if ((len - x) % 2 != 0) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005920 PickleState *st = _Pickle_GetGlobalState();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005921 /* Currupt or hostile pickle -- we never write one like this. */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08005922 PyErr_SetString(st->UnpicklingError,
5923 "odd number of items for SETITEMS");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005924 return -1;
5925 }
5926
5927 /* Here, dict does not actually need to be a PyDict; it could be anything
5928 that supports the __setitem__ attribute. */
5929 dict = self->stack->data[x - 1];
5930
5931 for (i = x + 1; i < len; i += 2) {
5932 key = self->stack->data[i - 1];
5933 value = self->stack->data[i];
5934 if (PyObject_SetItem(dict, key, value) < 0) {
5935 status = -1;
5936 break;
5937 }
5938 }
5939
5940 Pdata_clear(self->stack, x);
5941 return status;
5942}
5943
5944static int
5945load_setitem(UnpicklerObject *self)
5946{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005947 return do_setitems(self, Py_SIZE(self->stack) - 2);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005948}
5949
5950static int
5951load_setitems(UnpicklerObject *self)
5952{
Serhiy Storchakae9b30742015-11-23 15:17:43 +02005953 Py_ssize_t i = marker(self);
5954 if (i < 0)
5955 return -1;
5956 return do_setitems(self, i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005957}
5958
5959static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005960load_additems(UnpicklerObject *self)
5961{
5962 PyObject *set;
5963 Py_ssize_t mark, len, i;
5964
5965 mark = marker(self);
Serhiy Storchakae9b30742015-11-23 15:17:43 +02005966 if (mark < 0)
5967 return -1;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005968 len = Py_SIZE(self->stack);
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02005969 if (mark > len || mark <= self->stack->fence)
5970 return Pdata_stack_underflow(self->stack);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005971 if (len == mark) /* nothing to do */
5972 return 0;
5973
5974 set = self->stack->data[mark - 1];
5975
5976 if (PySet_Check(set)) {
5977 PyObject *items;
5978 int status;
5979
5980 items = Pdata_poptuple(self->stack, mark);
5981 if (items == NULL)
5982 return -1;
5983
5984 status = _PySet_Update(set, items);
5985 Py_DECREF(items);
5986 return status;
5987 }
5988 else {
5989 PyObject *add_func;
5990 _Py_IDENTIFIER(add);
5991
5992 add_func = _PyObject_GetAttrId(set, &PyId_add);
5993 if (add_func == NULL)
5994 return -1;
5995 for (i = mark; i < len; i++) {
5996 PyObject *result;
5997 PyObject *item;
5998
5999 item = self->stack->data[i];
Alexandre Vassalotti20c28c12013-11-27 02:26:54 -08006000 result = _Pickle_FastCall(add_func, item);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006001 if (result == NULL) {
6002 Pdata_clear(self->stack, i + 1);
6003 Py_SIZE(self->stack) = mark;
6004 return -1;
6005 }
6006 Py_DECREF(result);
6007 }
6008 Py_SIZE(self->stack) = mark;
6009 }
6010
6011 return 0;
6012}
6013
6014static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006015load_build(UnpicklerObject *self)
6016{
6017 PyObject *state, *inst, *slotstate;
6018 PyObject *setstate;
6019 int status = 0;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02006020 _Py_IDENTIFIER(__setstate__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006021
6022 /* Stack is ... instance, state. We want to leave instance at
6023 * the stack top, possibly mutated via instance.__setstate__(state).
6024 */
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006025 if (Py_SIZE(self->stack) - 2 < self->stack->fence)
6026 return Pdata_stack_underflow(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006027
6028 PDATA_POP(self->stack, state);
6029 if (state == NULL)
6030 return -1;
6031
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006032 inst = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006033
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02006034 setstate = _PyObject_GetAttrId(inst, &PyId___setstate__);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00006035 if (setstate == NULL) {
6036 if (PyErr_ExceptionMatches(PyExc_AttributeError))
6037 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00006038 else {
6039 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00006040 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00006041 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006042 }
6043 else {
6044 PyObject *result;
6045
6046 /* The explicit __setstate__ is responsible for everything. */
Alexandre Vassalotti20c28c12013-11-27 02:26:54 -08006047 result = _Pickle_FastCall(setstate, state);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006048 Py_DECREF(setstate);
6049 if (result == NULL)
6050 return -1;
6051 Py_DECREF(result);
6052 return 0;
6053 }
6054
6055 /* A default __setstate__. First see whether state embeds a
6056 * slot state dict too (a proto 2 addition).
6057 */
6058 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
6059 PyObject *tmp = state;
6060
6061 state = PyTuple_GET_ITEM(tmp, 0);
6062 slotstate = PyTuple_GET_ITEM(tmp, 1);
6063 Py_INCREF(state);
6064 Py_INCREF(slotstate);
6065 Py_DECREF(tmp);
6066 }
6067 else
6068 slotstate = NULL;
6069
6070 /* Set inst.__dict__ from the state dict (if any). */
6071 if (state != Py_None) {
6072 PyObject *dict;
Antoine Pitroua9f48a02009-05-02 21:41:14 +00006073 PyObject *d_key, *d_value;
6074 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02006075 _Py_IDENTIFIER(__dict__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006076
6077 if (!PyDict_Check(state)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006078 PickleState *st = _Pickle_GetGlobalState();
6079 PyErr_SetString(st->UnpicklingError, "state is not a dictionary");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006080 goto error;
6081 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02006082 dict = _PyObject_GetAttrId(inst, &PyId___dict__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006083 if (dict == NULL)
6084 goto error;
6085
Antoine Pitroua9f48a02009-05-02 21:41:14 +00006086 i = 0;
6087 while (PyDict_Next(state, &i, &d_key, &d_value)) {
6088 /* normally the keys for instance attributes are
6089 interned. we should try to do that here. */
6090 Py_INCREF(d_key);
6091 if (PyUnicode_CheckExact(d_key))
6092 PyUnicode_InternInPlace(&d_key);
6093 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
6094 Py_DECREF(d_key);
6095 goto error;
6096 }
6097 Py_DECREF(d_key);
6098 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006099 Py_DECREF(dict);
6100 }
6101
6102 /* Also set instance attributes from the slotstate dict (if any). */
6103 if (slotstate != NULL) {
6104 PyObject *d_key, *d_value;
6105 Py_ssize_t i;
6106
6107 if (!PyDict_Check(slotstate)) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006108 PickleState *st = _Pickle_GetGlobalState();
6109 PyErr_SetString(st->UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006110 "slot state is not a dictionary");
6111 goto error;
6112 }
6113 i = 0;
6114 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
6115 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
6116 goto error;
6117 }
6118 }
6119
6120 if (0) {
6121 error:
6122 status = -1;
6123 }
6124
6125 Py_DECREF(state);
6126 Py_XDECREF(slotstate);
6127 return status;
6128}
6129
6130static int
6131load_mark(UnpicklerObject *self)
6132{
6133
6134 /* Note that we split the (pickle.py) stack into two stacks, an
6135 * object stack and a mark stack. Here we push a mark onto the
6136 * mark stack.
6137 */
6138
6139 if ((self->num_marks + 1) >= self->marks_size) {
6140 size_t alloc;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006141
6142 /* Use the size_t type to check for overflow. */
6143 alloc = ((size_t)self->num_marks << 1) + 20;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02006144 if (alloc > (PY_SSIZE_T_MAX / sizeof(Py_ssize_t)) ||
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00006145 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006146 PyErr_NoMemory();
6147 return -1;
6148 }
6149
6150 if (self->marks == NULL)
Benjamin Peterson59b08c12015-06-27 13:41:33 -05006151 self->marks = PyMem_NEW(Py_ssize_t, alloc);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006152 else
Benjamin Peterson59b08c12015-06-27 13:41:33 -05006153 PyMem_RESIZE(self->marks, Py_ssize_t, alloc);
6154 if (self->marks == NULL) {
6155 self->marks_size = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006156 PyErr_NoMemory();
6157 return -1;
6158 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006159 self->marks_size = (Py_ssize_t)alloc;
6160 }
6161
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006162 self->stack->mark_set = 1;
6163 self->marks[self->num_marks++] = self->stack->fence = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006164
6165 return 0;
6166}
6167
6168static int
6169load_reduce(UnpicklerObject *self)
6170{
6171 PyObject *callable = NULL;
6172 PyObject *argtup = NULL;
6173 PyObject *obj = NULL;
6174
6175 PDATA_POP(self->stack, argtup);
6176 if (argtup == NULL)
6177 return -1;
6178 PDATA_POP(self->stack, callable);
6179 if (callable) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00006180 obj = PyObject_CallObject(callable, argtup);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006181 Py_DECREF(callable);
6182 }
6183 Py_DECREF(argtup);
6184
6185 if (obj == NULL)
6186 return -1;
6187
6188 PDATA_PUSH(self->stack, obj, -1);
6189 return 0;
6190}
6191
6192/* Just raises an error if we don't know the protocol specified. PROTO
6193 * is the first opcode for protocols >= 2.
6194 */
6195static int
6196load_proto(UnpicklerObject *self)
6197{
6198 char *s;
6199 int i;
6200
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006201 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006202 return -1;
6203
6204 i = (unsigned char)s[0];
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006205 if (i <= HIGHEST_PROTOCOL) {
6206 self->proto = i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006207 return 0;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006208 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006209
6210 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
6211 return -1;
6212}
6213
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08006214static int
6215load_frame(UnpicklerObject *self)
6216{
6217 char *s;
6218 Py_ssize_t frame_len;
6219
6220 if (_Unpickler_Read(self, &s, 8) < 0)
6221 return -1;
6222
6223 frame_len = calc_binsize(s, 8);
6224 if (frame_len < 0) {
6225 PyErr_Format(PyExc_OverflowError,
6226 "FRAME length exceeds system's maximum of %zd bytes",
6227 PY_SSIZE_T_MAX);
6228 return -1;
6229 }
6230
6231 if (_Unpickler_Read(self, &s, frame_len) < 0)
6232 return -1;
6233
6234 /* Rewind to start of frame */
6235 self->next_read_idx -= frame_len;
6236 return 0;
6237}
6238
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006239static PyObject *
6240load(UnpicklerObject *self)
6241{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006242 PyObject *value = NULL;
Christian Heimes27ea78b2014-01-27 01:03:53 +01006243 char *s = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006244
6245 self->num_marks = 0;
Serhiy Storchaka59fb6342015-12-06 22:01:35 +02006246 self->stack->mark_set = 0;
6247 self->stack->fence = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006248 self->proto = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006249 if (Py_SIZE(self->stack))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006250 Pdata_clear(self->stack, 0);
6251
6252 /* Convenient macros for the dispatch while-switch loop just below. */
6253#define OP(opcode, load_func) \
6254 case opcode: if (load_func(self) < 0) break; continue;
6255
6256#define OP_ARG(opcode, load_func, arg) \
6257 case opcode: if (load_func(self, (arg)) < 0) break; continue;
6258
6259 while (1) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006260 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006261 break;
6262
6263 switch ((enum opcode)s[0]) {
6264 OP(NONE, load_none)
6265 OP(BININT, load_binint)
6266 OP(BININT1, load_binint1)
6267 OP(BININT2, load_binint2)
6268 OP(INT, load_int)
6269 OP(LONG, load_long)
6270 OP_ARG(LONG1, load_counted_long, 1)
6271 OP_ARG(LONG4, load_counted_long, 4)
6272 OP(FLOAT, load_float)
6273 OP(BINFLOAT, load_binfloat)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006274 OP_ARG(SHORT_BINBYTES, load_counted_binbytes, 1)
6275 OP_ARG(BINBYTES, load_counted_binbytes, 4)
6276 OP_ARG(BINBYTES8, load_counted_binbytes, 8)
6277 OP_ARG(SHORT_BINSTRING, load_counted_binstring, 1)
6278 OP_ARG(BINSTRING, load_counted_binstring, 4)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006279 OP(STRING, load_string)
6280 OP(UNICODE, load_unicode)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006281 OP_ARG(SHORT_BINUNICODE, load_counted_binunicode, 1)
6282 OP_ARG(BINUNICODE, load_counted_binunicode, 4)
6283 OP_ARG(BINUNICODE8, load_counted_binunicode, 8)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006284 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
6285 OP_ARG(TUPLE1, load_counted_tuple, 1)
6286 OP_ARG(TUPLE2, load_counted_tuple, 2)
6287 OP_ARG(TUPLE3, load_counted_tuple, 3)
6288 OP(TUPLE, load_tuple)
6289 OP(EMPTY_LIST, load_empty_list)
6290 OP(LIST, load_list)
6291 OP(EMPTY_DICT, load_empty_dict)
6292 OP(DICT, load_dict)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006293 OP(EMPTY_SET, load_empty_set)
6294 OP(ADDITEMS, load_additems)
6295 OP(FROZENSET, load_frozenset)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006296 OP(OBJ, load_obj)
6297 OP(INST, load_inst)
6298 OP(NEWOBJ, load_newobj)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006299 OP(NEWOBJ_EX, load_newobj_ex)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006300 OP(GLOBAL, load_global)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006301 OP(STACK_GLOBAL, load_stack_global)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006302 OP(APPEND, load_append)
6303 OP(APPENDS, load_appends)
6304 OP(BUILD, load_build)
6305 OP(DUP, load_dup)
6306 OP(BINGET, load_binget)
6307 OP(LONG_BINGET, load_long_binget)
6308 OP(GET, load_get)
6309 OP(MARK, load_mark)
6310 OP(BINPUT, load_binput)
6311 OP(LONG_BINPUT, load_long_binput)
6312 OP(PUT, load_put)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006313 OP(MEMOIZE, load_memoize)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006314 OP(POP, load_pop)
6315 OP(POP_MARK, load_pop_mark)
6316 OP(SETITEM, load_setitem)
6317 OP(SETITEMS, load_setitems)
6318 OP(PERSID, load_persid)
6319 OP(BINPERSID, load_binpersid)
6320 OP(REDUCE, load_reduce)
6321 OP(PROTO, load_proto)
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08006322 OP(FRAME, load_frame)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006323 OP_ARG(EXT1, load_extension, 1)
6324 OP_ARG(EXT2, load_extension, 2)
6325 OP_ARG(EXT4, load_extension, 4)
6326 OP_ARG(NEWTRUE, load_bool, Py_True)
6327 OP_ARG(NEWFALSE, load_bool, Py_False)
6328
6329 case STOP:
6330 break;
6331
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006332 default:
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006333 if (s[0] == '\0') {
Benjamin Petersonadde86d2011-09-23 13:41:41 -04006334 PyErr_SetNone(PyExc_EOFError);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006335 }
6336 else {
6337 PickleState *st = _Pickle_GetGlobalState();
6338 PyErr_Format(st->UnpicklingError,
Benjamin Petersonadde86d2011-09-23 13:41:41 -04006339 "invalid load key, '%c'.", s[0]);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006340 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006341 return NULL;
6342 }
6343
6344 break; /* and we are done! */
6345 }
6346
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08006347 if (PyErr_Occurred()) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006348 return NULL;
6349 }
6350
Victor Stinner2ae57e32013-10-31 13:39:23 +01006351 if (_Unpickler_SkipConsumed(self) < 0)
6352 return NULL;
6353
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006354 PDATA_POP(self->stack, value);
6355 return value;
6356}
6357
Larry Hastings61272b72014-01-07 12:41:53 -08006358/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006359
6360_pickle.Unpickler.load
6361
6362Load a pickle.
6363
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08006364Read a pickled object representation from the open file object given
6365in the constructor, and return the reconstituted object hierarchy
6366specified therein.
Larry Hastings61272b72014-01-07 12:41:53 -08006367[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006368
Larry Hastings3cceb382014-01-04 11:09:09 -08006369static PyObject *
Larry Hastingsc2047262014-01-25 20:43:29 -08006370_pickle_Unpickler_load_impl(UnpicklerObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08006371/*[clinic end generated code: output=fdcc488aad675b14 input=acbb91a42fa9b7b9]*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006372{
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006373 UnpicklerObject *unpickler = (UnpicklerObject*)self;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006374
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006375 /* Check whether the Unpickler was initialized correctly. This prevents
6376 segfaulting if a subclass overridden __init__ with a function that does
6377 not call Unpickler.__init__(). Here, we simply ensure that self->read
6378 is not NULL. */
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006379 if (unpickler->read == NULL) {
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006380 PickleState *st = _Pickle_GetGlobalState();
6381 PyErr_Format(st->UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006382 "Unpickler.__init__() was not called by %s.__init__()",
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006383 Py_TYPE(unpickler)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006384 return NULL;
6385 }
6386
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006387 return load(unpickler);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006388}
6389
6390/* The name of find_class() is misleading. In newer pickle protocols, this
6391 function is used for loading any global (i.e., functions), not just
6392 classes. The name is kept only for backward compatibility. */
6393
Larry Hastings61272b72014-01-07 12:41:53 -08006394/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006395
6396_pickle.Unpickler.find_class
6397
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006398 module_name: object
6399 global_name: object
6400 /
6401
6402Return an object from a specified module.
6403
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08006404If necessary, the module will be imported. Subclasses may override
6405this method (e.g. to restrict unpickling of arbitrary classes and
6406functions).
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006407
6408This method is called whenever a class or a function object is
6409needed. Both arguments passed are str objects.
Larry Hastings61272b72014-01-07 12:41:53 -08006410[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006411
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006412static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04006413_pickle_Unpickler_find_class_impl(UnpicklerObject *self,
6414 PyObject *module_name,
6415 PyObject *global_name)
6416/*[clinic end generated code: output=becc08d7f9ed41e3 input=e2e6a865de093ef4]*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006417{
6418 PyObject *global;
6419 PyObject *modules_dict;
6420 PyObject *module;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006421 _Py_IDENTIFIER(modules);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006422
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006423 /* Try to map the old names used in Python 2.x to the new ones used in
6424 Python 3.x. We do this only with old pickle protocols and when the
6425 user has not disabled the feature. */
6426 if (self->proto < 3 && self->fix_imports) {
6427 PyObject *key;
6428 PyObject *item;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006429 PickleState *st = _Pickle_GetGlobalState();
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006430
6431 /* Check if the global (i.e., a function or a class) was renamed
6432 or moved to another module. */
6433 key = PyTuple_Pack(2, module_name, global_name);
6434 if (key == NULL)
6435 return NULL;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08006436 item = PyDict_GetItemWithError(st->name_mapping_2to3, key);
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006437 Py_DECREF(key);
6438 if (item) {
6439 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
6440 PyErr_Format(PyExc_RuntimeError,
6441 "_compat_pickle.NAME_MAPPING values should be "
6442 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
6443 return NULL;
6444 }
6445 module_name = PyTuple_GET_ITEM(item, 0);
6446 global_name = PyTuple_GET_ITEM(item, 1);
6447 if (!PyUnicode_Check(module_name) ||
6448 !PyUnicode_Check(global_name)) {
6449 PyErr_Format(PyExc_RuntimeError,
6450 "_compat_pickle.NAME_MAPPING values should be "
6451 "pairs of str, not (%.200s, %.200s)",
6452 Py_TYPE(module_name)->tp_name,
6453 Py_TYPE(global_name)->tp_name);
6454 return NULL;
6455 }
6456 }
6457 else if (PyErr_Occurred()) {
6458 return NULL;
6459 }
Serhiy Storchakabfe18242015-03-31 13:12:37 +03006460 else {
6461 /* Check if the module was renamed. */
6462 item = PyDict_GetItemWithError(st->import_mapping_2to3, module_name);
6463 if (item) {
6464 if (!PyUnicode_Check(item)) {
6465 PyErr_Format(PyExc_RuntimeError,
6466 "_compat_pickle.IMPORT_MAPPING values should be "
6467 "strings, not %.200s", Py_TYPE(item)->tp_name);
6468 return NULL;
6469 }
6470 module_name = item;
6471 }
6472 else if (PyErr_Occurred()) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006473 return NULL;
6474 }
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006475 }
6476 }
6477
Victor Stinnerbb520202013-11-06 22:40:41 +01006478 modules_dict = _PySys_GetObjectId(&PyId_modules);
Victor Stinner1e53bba2013-07-16 22:26:05 +02006479 if (modules_dict == NULL) {
6480 PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006481 return NULL;
Victor Stinner1e53bba2013-07-16 22:26:05 +02006482 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006483
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006484 module = PyDict_GetItemWithError(modules_dict, module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006485 if (module == NULL) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006486 if (PyErr_Occurred())
6487 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006488 module = PyImport_Import(module_name);
6489 if (module == NULL)
6490 return NULL;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006491 global = getattribute(module, global_name, self->proto >= 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006492 Py_DECREF(module);
6493 }
Victor Stinner121aab42011-09-29 23:40:53 +02006494 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006495 global = getattribute(module, global_name, self->proto >= 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006496 }
6497 return global;
6498}
6499
Serhiy Storchaka5bbd2312014-12-16 19:39:08 +02006500/*[clinic input]
6501
6502_pickle.Unpickler.__sizeof__ -> Py_ssize_t
6503
6504Returns size in memory, in bytes.
6505[clinic start generated code]*/
6506
6507static Py_ssize_t
6508_pickle_Unpickler___sizeof___impl(UnpicklerObject *self)
6509/*[clinic end generated code: output=119d9d03ad4c7651 input=13333471fdeedf5e]*/
6510{
6511 Py_ssize_t res;
6512
Serhiy Storchaka5c4064e2015-12-19 20:05:25 +02006513 res = _PyObject_SIZE(Py_TYPE(self));
Serhiy Storchaka5bbd2312014-12-16 19:39:08 +02006514 if (self->memo != NULL)
6515 res += self->memo_size * sizeof(PyObject *);
6516 if (self->marks != NULL)
6517 res += self->marks_size * sizeof(Py_ssize_t);
6518 if (self->input_line != NULL)
6519 res += strlen(self->input_line) + 1;
6520 if (self->encoding != NULL)
6521 res += strlen(self->encoding) + 1;
6522 if (self->errors != NULL)
6523 res += strlen(self->errors) + 1;
6524 return res;
6525}
6526
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006527static struct PyMethodDef Unpickler_methods[] = {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006528 _PICKLE_UNPICKLER_LOAD_METHODDEF
6529 _PICKLE_UNPICKLER_FIND_CLASS_METHODDEF
Serhiy Storchaka5bbd2312014-12-16 19:39:08 +02006530 _PICKLE_UNPICKLER___SIZEOF___METHODDEF
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006531 {NULL, NULL} /* sentinel */
6532};
6533
6534static void
6535Unpickler_dealloc(UnpicklerObject *self)
6536{
6537 PyObject_GC_UnTrack((PyObject *)self);
6538 Py_XDECREF(self->readline);
6539 Py_XDECREF(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00006540 Py_XDECREF(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006541 Py_XDECREF(self->stack);
6542 Py_XDECREF(self->pers_func);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006543 if (self->buffer.buf != NULL) {
6544 PyBuffer_Release(&self->buffer);
6545 self->buffer.buf = NULL;
6546 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006547
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006548 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006549 PyMem_Free(self->marks);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006550 PyMem_Free(self->input_line);
Victor Stinner49fc8ec2013-07-07 23:30:24 +02006551 PyMem_Free(self->encoding);
6552 PyMem_Free(self->errors);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006553
6554 Py_TYPE(self)->tp_free((PyObject *)self);
6555}
6556
6557static int
6558Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
6559{
6560 Py_VISIT(self->readline);
6561 Py_VISIT(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00006562 Py_VISIT(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006563 Py_VISIT(self->stack);
6564 Py_VISIT(self->pers_func);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006565 return 0;
6566}
6567
6568static int
6569Unpickler_clear(UnpicklerObject *self)
6570{
6571 Py_CLEAR(self->readline);
6572 Py_CLEAR(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00006573 Py_CLEAR(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006574 Py_CLEAR(self->stack);
6575 Py_CLEAR(self->pers_func);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006576 if (self->buffer.buf != NULL) {
6577 PyBuffer_Release(&self->buffer);
6578 self->buffer.buf = NULL;
6579 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006580
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006581 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006582 PyMem_Free(self->marks);
6583 self->marks = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006584 PyMem_Free(self->input_line);
6585 self->input_line = NULL;
Victor Stinner49fc8ec2013-07-07 23:30:24 +02006586 PyMem_Free(self->encoding);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006587 self->encoding = NULL;
Victor Stinner49fc8ec2013-07-07 23:30:24 +02006588 PyMem_Free(self->errors);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006589 self->errors = NULL;
6590
6591 return 0;
6592}
6593
Larry Hastings61272b72014-01-07 12:41:53 -08006594/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006595
6596_pickle.Unpickler.__init__
6597
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006598 file: object
6599 *
6600 fix_imports: bool = True
6601 encoding: str = 'ASCII'
6602 errors: str = 'strict'
6603
6604This takes a binary file for reading a pickle data stream.
6605
6606The protocol version of the pickle is detected automatically, so no
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08006607protocol argument is needed. Bytes past the pickled object's
6608representation are ignored.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006609
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08006610The argument *file* must have two methods, a read() method that takes
6611an integer argument, and a readline() method that requires no
6612arguments. Both methods should return bytes. Thus *file* can be a
Martin Panter7462b6492015-11-02 03:37:02 +00006613binary file object opened for reading, an io.BytesIO object, or any
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08006614other custom object that meets this interface.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006615
6616Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
Martin Panter46f50722016-05-26 05:35:26 +00006617which are used to control compatibility support for pickle stream
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08006618generated by Python 2. If *fix_imports* is True, pickle will try to
6619map the old Python 2 names to the new names used in Python 3. The
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006620*encoding* and *errors* tell pickle how to decode 8-bit string
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08006621instances pickled by Python 2; these default to 'ASCII' and 'strict',
6622respectively. The *encoding* can be 'bytes' to read these 8-bit
6623string instances as bytes objects.
Larry Hastings61272b72014-01-07 12:41:53 -08006624[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006625
Larry Hastingsb7ccb202014-01-18 23:50:21 -08006626static int
Larry Hastings89964c42015-04-14 18:07:59 -04006627_pickle_Unpickler___init___impl(UnpicklerObject *self, PyObject *file,
6628 int fix_imports, const char *encoding,
6629 const char *errors)
Martin Panter46f50722016-05-26 05:35:26 +00006630/*[clinic end generated code: output=e2c8ce748edc57b0 input=f9b7da04f5f4f335]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006631{
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02006632 _Py_IDENTIFIER(persistent_load);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006633
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006634 /* In case of multiple __init__() calls, clear previous content. */
6635 if (self->read != NULL)
6636 (void)Unpickler_clear(self);
6637
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006638 if (_Unpickler_SetInputStream(self, file) < 0)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08006639 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006640
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006641 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08006642 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006643
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006644 self->fix_imports = fix_imports;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006645 if (self->fix_imports == -1)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08006646 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006647
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02006648 if (_PyObject_HasAttrId((PyObject *)self, &PyId_persistent_load)) {
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02006649 self->pers_func = _PyObject_GetAttrId((PyObject *)self,
6650 &PyId_persistent_load);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006651 if (self->pers_func == NULL)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08006652 return 1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006653 }
6654 else {
6655 self->pers_func = NULL;
6656 }
6657
6658 self->stack = (Pdata *)Pdata_New();
6659 if (self->stack == NULL)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08006660 return 1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006661
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006662 self->memo_size = 32;
6663 self->memo = _Unpickler_NewMemo(self->memo_size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006664 if (self->memo == NULL)
Larry Hastingsb7ccb202014-01-18 23:50:21 -08006665 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006666
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006667 self->proto = 0;
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00006668
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006669 return 0;
6670}
6671
Larry Hastingsb7ccb202014-01-18 23:50:21 -08006672
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006673/* Define a proxy object for the Unpickler's internal memo object. This is to
6674 * avoid breaking code like:
6675 * unpickler.memo.clear()
6676 * and
6677 * unpickler.memo = saved_memo
6678 * Is this a good idea? Not really, but we don't want to break code that uses
6679 * it. Note that we don't implement the entire mapping API here. This is
6680 * intentional, as these should be treated as black-box implementation details.
6681 *
6682 * We do, however, have to implement pickling/unpickling support because of
Victor Stinner121aab42011-09-29 23:40:53 +02006683 * real-world code like cvs2svn.
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006684 */
6685
Larry Hastings61272b72014-01-07 12:41:53 -08006686/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006687_pickle.UnpicklerMemoProxy.clear
6688
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006689Remove all items from memo.
Larry Hastings61272b72014-01-07 12:41:53 -08006690[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006691
Larry Hastings3cceb382014-01-04 11:09:09 -08006692static PyObject *
6693_pickle_UnpicklerMemoProxy_clear_impl(UnpicklerMemoProxyObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08006694/*[clinic end generated code: output=d20cd43f4ba1fb1f input=b1df7c52e7afd9bd]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006695{
6696 _Unpickler_MemoCleanup(self->unpickler);
6697 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
6698 if (self->unpickler->memo == NULL)
6699 return NULL;
6700 Py_RETURN_NONE;
6701}
6702
Larry Hastings61272b72014-01-07 12:41:53 -08006703/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006704_pickle.UnpicklerMemoProxy.copy
6705
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006706Copy the memo to a new object.
Larry Hastings61272b72014-01-07 12:41:53 -08006707[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006708
Larry Hastings3cceb382014-01-04 11:09:09 -08006709static PyObject *
6710_pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08006711/*[clinic end generated code: output=e12af7e9bc1e4c77 input=97769247ce032c1d]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006712{
6713 Py_ssize_t i;
6714 PyObject *new_memo = PyDict_New();
6715 if (new_memo == NULL)
6716 return NULL;
6717
6718 for (i = 0; i < self->unpickler->memo_size; i++) {
6719 int status;
6720 PyObject *key, *value;
6721
6722 value = self->unpickler->memo[i];
6723 if (value == NULL)
6724 continue;
6725
6726 key = PyLong_FromSsize_t(i);
6727 if (key == NULL)
6728 goto error;
6729 status = PyDict_SetItem(new_memo, key, value);
6730 Py_DECREF(key);
6731 if (status < 0)
6732 goto error;
6733 }
6734 return new_memo;
6735
6736error:
6737 Py_DECREF(new_memo);
6738 return NULL;
6739}
6740
Larry Hastings61272b72014-01-07 12:41:53 -08006741/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006742_pickle.UnpicklerMemoProxy.__reduce__
6743
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006744Implement pickling support.
Larry Hastings61272b72014-01-07 12:41:53 -08006745[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006746
Larry Hastings3cceb382014-01-04 11:09:09 -08006747static PyObject *
6748_pickle_UnpicklerMemoProxy___reduce___impl(UnpicklerMemoProxyObject *self)
Larry Hastings581ee362014-01-28 05:00:08 -08006749/*[clinic end generated code: output=6da34ac048d94cca input=6920862413407199]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006750{
6751 PyObject *reduce_value;
6752 PyObject *constructor_args;
Larry Hastings3cceb382014-01-04 11:09:09 -08006753 PyObject *contents = _pickle_UnpicklerMemoProxy_copy_impl(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006754 if (contents == NULL)
6755 return NULL;
6756
6757 reduce_value = PyTuple_New(2);
6758 if (reduce_value == NULL) {
6759 Py_DECREF(contents);
6760 return NULL;
6761 }
6762 constructor_args = PyTuple_New(1);
6763 if (constructor_args == NULL) {
6764 Py_DECREF(contents);
6765 Py_DECREF(reduce_value);
6766 return NULL;
6767 }
6768 PyTuple_SET_ITEM(constructor_args, 0, contents);
6769 Py_INCREF((PyObject *)&PyDict_Type);
6770 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
6771 PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
6772 return reduce_value;
6773}
6774
6775static PyMethodDef unpicklerproxy_methods[] = {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006776 _PICKLE_UNPICKLERMEMOPROXY_CLEAR_METHODDEF
6777 _PICKLE_UNPICKLERMEMOPROXY_COPY_METHODDEF
6778 _PICKLE_UNPICKLERMEMOPROXY___REDUCE___METHODDEF
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006779 {NULL, NULL} /* sentinel */
6780};
6781
6782static void
6783UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
6784{
6785 PyObject_GC_UnTrack(self);
6786 Py_XDECREF(self->unpickler);
6787 PyObject_GC_Del((PyObject *)self);
6788}
6789
6790static int
6791UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
6792 visitproc visit, void *arg)
6793{
6794 Py_VISIT(self->unpickler);
6795 return 0;
6796}
6797
6798static int
6799UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
6800{
6801 Py_CLEAR(self->unpickler);
6802 return 0;
6803}
6804
6805static PyTypeObject UnpicklerMemoProxyType = {
6806 PyVarObject_HEAD_INIT(NULL, 0)
6807 "_pickle.UnpicklerMemoProxy", /*tp_name*/
6808 sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/
6809 0,
6810 (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */
6811 0, /* tp_print */
6812 0, /* tp_getattr */
6813 0, /* tp_setattr */
6814 0, /* tp_compare */
6815 0, /* tp_repr */
6816 0, /* tp_as_number */
6817 0, /* tp_as_sequence */
6818 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00006819 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006820 0, /* tp_call */
6821 0, /* tp_str */
6822 PyObject_GenericGetAttr, /* tp_getattro */
6823 PyObject_GenericSetAttr, /* tp_setattro */
6824 0, /* tp_as_buffer */
6825 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
6826 0, /* tp_doc */
6827 (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */
6828 (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */
6829 0, /* tp_richcompare */
6830 0, /* tp_weaklistoffset */
6831 0, /* tp_iter */
6832 0, /* tp_iternext */
6833 unpicklerproxy_methods, /* tp_methods */
6834};
6835
6836static PyObject *
6837UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
6838{
6839 UnpicklerMemoProxyObject *self;
6840
6841 self = PyObject_GC_New(UnpicklerMemoProxyObject,
6842 &UnpicklerMemoProxyType);
6843 if (self == NULL)
6844 return NULL;
6845 Py_INCREF(unpickler);
6846 self->unpickler = unpickler;
6847 PyObject_GC_Track(self);
6848 return (PyObject *)self;
6849}
6850
6851/*****************************************************************************/
6852
6853
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006854static PyObject *
6855Unpickler_get_memo(UnpicklerObject *self)
6856{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006857 return UnpicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006858}
6859
6860static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006861Unpickler_set_memo(UnpicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006862{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006863 PyObject **new_memo;
6864 Py_ssize_t new_memo_size = 0;
6865 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006866
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006867 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006868 PyErr_SetString(PyExc_TypeError,
6869 "attribute deletion is not supported");
6870 return -1;
6871 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006872
6873 if (Py_TYPE(obj) == &UnpicklerMemoProxyType) {
6874 UnpicklerObject *unpickler =
6875 ((UnpicklerMemoProxyObject *)obj)->unpickler;
6876
6877 new_memo_size = unpickler->memo_size;
6878 new_memo = _Unpickler_NewMemo(new_memo_size);
6879 if (new_memo == NULL)
6880 return -1;
6881
6882 for (i = 0; i < new_memo_size; i++) {
6883 Py_XINCREF(unpickler->memo[i]);
6884 new_memo[i] = unpickler->memo[i];
6885 }
6886 }
6887 else if (PyDict_Check(obj)) {
6888 Py_ssize_t i = 0;
6889 PyObject *key, *value;
6890
6891 new_memo_size = PyDict_Size(obj);
6892 new_memo = _Unpickler_NewMemo(new_memo_size);
6893 if (new_memo == NULL)
6894 return -1;
6895
6896 while (PyDict_Next(obj, &i, &key, &value)) {
6897 Py_ssize_t idx;
6898 if (!PyLong_Check(key)) {
6899 PyErr_SetString(PyExc_TypeError,
6900 "memo key must be integers");
6901 goto error;
6902 }
6903 idx = PyLong_AsSsize_t(key);
6904 if (idx == -1 && PyErr_Occurred())
6905 goto error;
Christian Heimesa24b4d22013-07-01 15:17:45 +02006906 if (idx < 0) {
6907 PyErr_SetString(PyExc_ValueError,
Christian Heimes80878792013-07-01 15:23:39 +02006908 "memo key must be positive integers.");
Christian Heimesa24b4d22013-07-01 15:17:45 +02006909 goto error;
6910 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006911 if (_Unpickler_MemoPut(self, idx, value) < 0)
6912 goto error;
6913 }
6914 }
6915 else {
6916 PyErr_Format(PyExc_TypeError,
6917 "'memo' attribute must be an UnpicklerMemoProxy object"
6918 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006919 return -1;
6920 }
6921
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006922 _Unpickler_MemoCleanup(self);
6923 self->memo_size = new_memo_size;
6924 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006925
6926 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006927
6928 error:
6929 if (new_memo_size) {
6930 i = new_memo_size;
6931 while (--i >= 0) {
6932 Py_XDECREF(new_memo[i]);
6933 }
6934 PyMem_FREE(new_memo);
6935 }
6936 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006937}
6938
6939static PyObject *
6940Unpickler_get_persload(UnpicklerObject *self)
6941{
6942 if (self->pers_func == NULL)
6943 PyErr_SetString(PyExc_AttributeError, "persistent_load");
6944 else
6945 Py_INCREF(self->pers_func);
6946 return self->pers_func;
6947}
6948
6949static int
6950Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
6951{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006952 if (value == NULL) {
6953 PyErr_SetString(PyExc_TypeError,
6954 "attribute deletion is not supported");
6955 return -1;
6956 }
6957 if (!PyCallable_Check(value)) {
6958 PyErr_SetString(PyExc_TypeError,
6959 "persistent_load must be a callable taking "
6960 "one argument");
6961 return -1;
6962 }
6963
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006964 Py_INCREF(value);
Serhiy Storchakaec397562016-04-06 09:50:03 +03006965 Py_XSETREF(self->pers_func, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006966
6967 return 0;
6968}
6969
6970static PyGetSetDef Unpickler_getsets[] = {
6971 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
6972 {"persistent_load", (getter)Unpickler_get_persload,
6973 (setter)Unpickler_set_persload},
6974 {NULL}
6975};
6976
6977static PyTypeObject Unpickler_Type = {
6978 PyVarObject_HEAD_INIT(NULL, 0)
6979 "_pickle.Unpickler", /*tp_name*/
6980 sizeof(UnpicklerObject), /*tp_basicsize*/
6981 0, /*tp_itemsize*/
6982 (destructor)Unpickler_dealloc, /*tp_dealloc*/
6983 0, /*tp_print*/
6984 0, /*tp_getattr*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006985 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00006986 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006987 0, /*tp_repr*/
6988 0, /*tp_as_number*/
6989 0, /*tp_as_sequence*/
6990 0, /*tp_as_mapping*/
6991 0, /*tp_hash*/
6992 0, /*tp_call*/
6993 0, /*tp_str*/
6994 0, /*tp_getattro*/
6995 0, /*tp_setattro*/
6996 0, /*tp_as_buffer*/
6997 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006998 _pickle_Unpickler___init____doc__, /*tp_doc*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006999 (traverseproc)Unpickler_traverse, /*tp_traverse*/
7000 (inquiry)Unpickler_clear, /*tp_clear*/
7001 0, /*tp_richcompare*/
7002 0, /*tp_weaklistoffset*/
7003 0, /*tp_iter*/
7004 0, /*tp_iternext*/
7005 Unpickler_methods, /*tp_methods*/
7006 0, /*tp_members*/
7007 Unpickler_getsets, /*tp_getset*/
7008 0, /*tp_base*/
7009 0, /*tp_dict*/
7010 0, /*tp_descr_get*/
7011 0, /*tp_descr_set*/
7012 0, /*tp_dictoffset*/
Larry Hastingsb7ccb202014-01-18 23:50:21 -08007013 _pickle_Unpickler___init__, /*tp_init*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007014 PyType_GenericAlloc, /*tp_alloc*/
7015 PyType_GenericNew, /*tp_new*/
7016 PyObject_GC_Del, /*tp_free*/
7017 0, /*tp_is_gc*/
7018};
7019
Larry Hastings61272b72014-01-07 12:41:53 -08007020/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007021
7022_pickle.dump
7023
7024 obj: object
7025 file: object
7026 protocol: object = NULL
7027 *
7028 fix_imports: bool = True
7029
7030Write a pickled representation of obj to the open file object file.
7031
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007032This is equivalent to ``Pickler(file, protocol).dump(obj)``, but may
7033be more efficient.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007034
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007035The optional *protocol* argument tells the pickler to use the given
7036protocol supported protocols are 0, 1, 2, 3 and 4. The default
7037protocol is 3; a backward-incompatible protocol designed for Python 3.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007038
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007039Specifying a negative protocol version selects the highest protocol
7040version supported. The higher the protocol used, the more recent the
7041version of Python needed to read the pickle produced.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007042
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007043The *file* argument must have a write() method that accepts a single
7044bytes argument. It can thus be a file object opened for binary
Martin Panter7462b6492015-11-02 03:37:02 +00007045writing, an io.BytesIO instance, or any other custom object that meets
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007046this interface.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007047
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007048If *fix_imports* is True and protocol is less than 3, pickle will try
7049to map the new Python 3 names to the old module names used in Python
70502, so that the pickle data stream is readable with Python 2.
Larry Hastings61272b72014-01-07 12:41:53 -08007051[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007052
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007053static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04007054_pickle_dump_impl(PyModuleDef *module, PyObject *obj, PyObject *file,
7055 PyObject *protocol, int fix_imports)
Martin Panter2eb819f2015-11-02 04:04:57 +00007056/*[clinic end generated code: output=0de7dff89c406816 input=830f8a64cef6f042]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007057{
7058 PicklerObject *pickler = _Pickler_New();
7059
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007060 if (pickler == NULL)
7061 return NULL;
7062
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007063 if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007064 goto error;
7065
7066 if (_Pickler_SetOutputStream(pickler, file) < 0)
7067 goto error;
7068
7069 if (dump(pickler, obj) < 0)
7070 goto error;
7071
7072 if (_Pickler_FlushToFile(pickler) < 0)
7073 goto error;
7074
7075 Py_DECREF(pickler);
7076 Py_RETURN_NONE;
7077
7078 error:
7079 Py_XDECREF(pickler);
7080 return NULL;
7081}
7082
Larry Hastings61272b72014-01-07 12:41:53 -08007083/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007084
7085_pickle.dumps
7086
7087 obj: object
7088 protocol: object = NULL
7089 *
7090 fix_imports: bool = True
7091
7092Return the pickled representation of the object as a bytes object.
7093
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007094The optional *protocol* argument tells the pickler to use the given
7095protocol; supported protocols are 0, 1, 2, 3 and 4. The default
7096protocol is 3; a backward-incompatible protocol designed for Python 3.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007097
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007098Specifying a negative protocol version selects the highest protocol
7099version supported. The higher the protocol used, the more recent the
7100version of Python needed to read the pickle produced.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007101
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007102If *fix_imports* is True and *protocol* is less than 3, pickle will
7103try to map the new Python 3 names to the old module names used in
7104Python 2, so that the pickle data stream is readable with Python 2.
Larry Hastings61272b72014-01-07 12:41:53 -08007105[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007106
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007107static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04007108_pickle_dumps_impl(PyModuleDef *module, PyObject *obj, PyObject *protocol,
7109 int fix_imports)
7110/*[clinic end generated code: output=daa380db56fe07b9 input=293dbeda181580b7]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007111{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007112 PyObject *result;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007113 PicklerObject *pickler = _Pickler_New();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007114
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007115 if (pickler == NULL)
7116 return NULL;
7117
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007118 if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007119 goto error;
7120
7121 if (dump(pickler, obj) < 0)
7122 goto error;
7123
7124 result = _Pickler_GetString(pickler);
7125 Py_DECREF(pickler);
7126 return result;
7127
7128 error:
7129 Py_XDECREF(pickler);
7130 return NULL;
7131}
7132
Larry Hastings61272b72014-01-07 12:41:53 -08007133/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007134
7135_pickle.load
7136
7137 file: object
7138 *
7139 fix_imports: bool = True
7140 encoding: str = 'ASCII'
7141 errors: str = 'strict'
7142
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007143Read and return an object from the pickle data stored in a file.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007144
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007145This is equivalent to ``Unpickler(file).load()``, but may be more
7146efficient.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007147
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007148The protocol version of the pickle is detected automatically, so no
7149protocol argument is needed. Bytes past the pickled object's
7150representation are ignored.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007151
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007152The argument *file* must have two methods, a read() method that takes
7153an integer argument, and a readline() method that requires no
7154arguments. Both methods should return bytes. Thus *file* can be a
Martin Panter7462b6492015-11-02 03:37:02 +00007155binary file object opened for reading, an io.BytesIO object, or any
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007156other custom object that meets this interface.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007157
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007158Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
Martin Panter46f50722016-05-26 05:35:26 +00007159which are used to control compatibility support for pickle stream
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007160generated by Python 2. If *fix_imports* is True, pickle will try to
7161map the old Python 2 names to the new names used in Python 3. The
7162*encoding* and *errors* tell pickle how to decode 8-bit string
7163instances pickled by Python 2; these default to 'ASCII' and 'strict',
7164respectively. The *encoding* can be 'bytes' to read these 8-bit
7165string instances as bytes objects.
Larry Hastings61272b72014-01-07 12:41:53 -08007166[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007167
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007168static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04007169_pickle_load_impl(PyModuleDef *module, PyObject *file, int fix_imports,
7170 const char *encoding, const char *errors)
Martin Panter46f50722016-05-26 05:35:26 +00007171/*[clinic end generated code: output=798f1c57cb2b4eb1 input=01b44dd3fc07afa7]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007172{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007173 PyObject *result;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007174 UnpicklerObject *unpickler = _Unpickler_New();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007175
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007176 if (unpickler == NULL)
7177 return NULL;
7178
7179 if (_Unpickler_SetInputStream(unpickler, file) < 0)
7180 goto error;
7181
7182 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7183 goto error;
7184
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007185 unpickler->fix_imports = fix_imports;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007186
7187 result = load(unpickler);
7188 Py_DECREF(unpickler);
7189 return result;
7190
7191 error:
7192 Py_XDECREF(unpickler);
7193 return NULL;
7194}
7195
Larry Hastings61272b72014-01-07 12:41:53 -08007196/*[clinic input]
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007197
7198_pickle.loads
7199
7200 data: object
7201 *
7202 fix_imports: bool = True
7203 encoding: str = 'ASCII'
7204 errors: str = 'strict'
7205
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007206Read and return an object from the given pickle data.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007207
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007208The protocol version of the pickle is detected automatically, so no
7209protocol argument is needed. Bytes past the pickled object's
7210representation are ignored.
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007211
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007212Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
Martin Panter46f50722016-05-26 05:35:26 +00007213which are used to control compatibility support for pickle stream
Alexandre Vassalottid05c9ff2013-12-07 01:09:27 -08007214generated by Python 2. If *fix_imports* is True, pickle will try to
7215map the old Python 2 names to the new names used in Python 3. The
7216*encoding* and *errors* tell pickle how to decode 8-bit string
7217instances pickled by Python 2; these default to 'ASCII' and 'strict',
7218respectively. The *encoding* can be 'bytes' to read these 8-bit
7219string instances as bytes objects.
Larry Hastings61272b72014-01-07 12:41:53 -08007220[clinic start generated code]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007221
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007222static PyObject *
Larry Hastings89964c42015-04-14 18:07:59 -04007223_pickle_loads_impl(PyModuleDef *module, PyObject *data, int fix_imports,
7224 const char *encoding, const char *errors)
Martin Panter46f50722016-05-26 05:35:26 +00007225/*[clinic end generated code: output=61e9cdb01e36a736 input=70605948a719feb9]*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007226{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007227 PyObject *result;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007228 UnpicklerObject *unpickler = _Unpickler_New();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007229
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007230 if (unpickler == NULL)
7231 return NULL;
7232
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007233 if (_Unpickler_SetStringInput(unpickler, data) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007234 goto error;
7235
7236 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7237 goto error;
7238
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007239 unpickler->fix_imports = fix_imports;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007240
7241 result = load(unpickler);
7242 Py_DECREF(unpickler);
7243 return result;
7244
7245 error:
7246 Py_XDECREF(unpickler);
7247 return NULL;
7248}
7249
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007250static struct PyMethodDef pickle_methods[] = {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007251 _PICKLE_DUMP_METHODDEF
7252 _PICKLE_DUMPS_METHODDEF
7253 _PICKLE_LOAD_METHODDEF
7254 _PICKLE_LOADS_METHODDEF
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007255 {NULL, NULL} /* sentinel */
7256};
7257
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007258static int
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007259pickle_clear(PyObject *m)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007260{
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007261 _Pickle_ClearState(_Pickle_GetState(m));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007262 return 0;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007263}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007264
Stefan Krahf483b0f2013-12-14 13:43:10 +01007265static void
7266pickle_free(PyObject *m)
7267{
7268 _Pickle_ClearState(_Pickle_GetState(m));
7269}
7270
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007271static int
7272pickle_traverse(PyObject *m, visitproc visit, void *arg)
7273{
7274 PickleState *st = _Pickle_GetState(m);
7275 Py_VISIT(st->PickleError);
7276 Py_VISIT(st->PicklingError);
7277 Py_VISIT(st->UnpicklingError);
7278 Py_VISIT(st->dispatch_table);
7279 Py_VISIT(st->extension_registry);
7280 Py_VISIT(st->extension_cache);
7281 Py_VISIT(st->inverted_registry);
7282 Py_VISIT(st->name_mapping_2to3);
7283 Py_VISIT(st->import_mapping_2to3);
7284 Py_VISIT(st->name_mapping_3to2);
7285 Py_VISIT(st->import_mapping_3to2);
7286 Py_VISIT(st->codecs_encode);
Serhiy Storchaka58e41342015-03-31 14:07:24 +03007287 Py_VISIT(st->getattr);
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007288 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007289}
7290
7291static struct PyModuleDef _picklemodule = {
7292 PyModuleDef_HEAD_INIT,
Stefan Krahf483b0f2013-12-14 13:43:10 +01007293 "_pickle", /* m_name */
7294 pickle_module_doc, /* m_doc */
7295 sizeof(PickleState), /* m_size */
7296 pickle_methods, /* m_methods */
7297 NULL, /* m_reload */
7298 pickle_traverse, /* m_traverse */
7299 pickle_clear, /* m_clear */
7300 (freefunc)pickle_free /* m_free */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007301};
7302
7303PyMODINIT_FUNC
7304PyInit__pickle(void)
7305{
7306 PyObject *m;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007307 PickleState *st;
7308
7309 m = PyState_FindModule(&_picklemodule);
7310 if (m) {
7311 Py_INCREF(m);
7312 return m;
7313 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007314
7315 if (PyType_Ready(&Unpickler_Type) < 0)
7316 return NULL;
7317 if (PyType_Ready(&Pickler_Type) < 0)
7318 return NULL;
7319 if (PyType_Ready(&Pdata_Type) < 0)
7320 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007321 if (PyType_Ready(&PicklerMemoProxyType) < 0)
7322 return NULL;
7323 if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
7324 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007325
7326 /* Create the module and add the functions. */
7327 m = PyModule_Create(&_picklemodule);
7328 if (m == NULL)
7329 return NULL;
7330
Antoine Pitrou8391cf42011-07-15 21:01:21 +02007331 Py_INCREF(&Pickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007332 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
7333 return NULL;
Antoine Pitrou8391cf42011-07-15 21:01:21 +02007334 Py_INCREF(&Unpickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007335 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
7336 return NULL;
7337
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007338 st = _Pickle_GetState(m);
7339
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007340 /* Initialize the exceptions. */
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007341 st->PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
7342 if (st->PickleError == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007343 return NULL;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007344 st->PicklingError = \
7345 PyErr_NewException("_pickle.PicklingError", st->PickleError, NULL);
7346 if (st->PicklingError == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007347 return NULL;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007348 st->UnpicklingError = \
7349 PyErr_NewException("_pickle.UnpicklingError", st->PickleError, NULL);
7350 if (st->UnpicklingError == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007351 return NULL;
7352
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007353 Py_INCREF(st->PickleError);
7354 if (PyModule_AddObject(m, "PickleError", st->PickleError) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007355 return NULL;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007356 Py_INCREF(st->PicklingError);
7357 if (PyModule_AddObject(m, "PicklingError", st->PicklingError) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007358 return NULL;
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007359 Py_INCREF(st->UnpicklingError);
7360 if (PyModule_AddObject(m, "UnpicklingError", st->UnpicklingError) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007361 return NULL;
7362
Alexandre Vassalotti23bdd832013-11-27 19:36:52 -08007363 if (_Pickle_InitState(st) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007364 return NULL;
7365
7366 return m;
7367}