blob: b63a7d92c54ff19f5cc04f54652d79c5ce5077c4 [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01009 HIGHEST_PROTOCOL = 4,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000010 DEFAULT_PROTOCOL = 3
11};
12
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000013/* Pickle opcodes. These must be kept updated with pickle.py.
14 Extensive docs are in pickletools.py. */
15enum opcode {
16 MARK = '(',
17 STOP = '.',
18 POP = '0',
19 POP_MARK = '1',
20 DUP = '2',
21 FLOAT = 'F',
22 INT = 'I',
23 BININT = 'J',
24 BININT1 = 'K',
25 LONG = 'L',
26 BININT2 = 'M',
27 NONE = 'N',
28 PERSID = 'P',
29 BINPERSID = 'Q',
30 REDUCE = 'R',
31 STRING = 'S',
32 BINSTRING = 'T',
33 SHORT_BINSTRING = 'U',
34 UNICODE = 'V',
35 BINUNICODE = 'X',
36 APPEND = 'a',
37 BUILD = 'b',
38 GLOBAL = 'c',
39 DICT = 'd',
40 EMPTY_DICT = '}',
41 APPENDS = 'e',
42 GET = 'g',
43 BINGET = 'h',
44 INST = 'i',
45 LONG_BINGET = 'j',
46 LIST = 'l',
47 EMPTY_LIST = ']',
48 OBJ = 'o',
49 PUT = 'p',
50 BINPUT = 'q',
51 LONG_BINPUT = 'r',
52 SETITEM = 's',
53 TUPLE = 't',
54 EMPTY_TUPLE = ')',
55 SETITEMS = 'u',
56 BINFLOAT = 'G',
57
58 /* Protocol 2. */
59 PROTO = '\x80',
60 NEWOBJ = '\x81',
61 EXT1 = '\x82',
62 EXT2 = '\x83',
63 EXT4 = '\x84',
64 TUPLE1 = '\x85',
65 TUPLE2 = '\x86',
66 TUPLE3 = '\x87',
67 NEWTRUE = '\x88',
68 NEWFALSE = '\x89',
69 LONG1 = '\x8a',
70 LONG4 = '\x8b',
71
72 /* Protocol 3 (Python 3.x) */
73 BINBYTES = 'B',
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +010074 SHORT_BINBYTES = 'C',
75
76 /* Protocol 4 */
77 SHORT_BINUNICODE = '\x8c',
78 BINUNICODE8 = '\x8d',
79 BINBYTES8 = '\x8e',
80 EMPTY_SET = '\x8f',
81 ADDITEMS = '\x90',
82 FROZENSET = '\x91',
83 NEWOBJ_EX = '\x92',
84 STACK_GLOBAL = '\x93',
85 MEMOIZE = '\x94',
86 FRAME = '\x95'
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000087};
88
89/* These aren't opcodes -- they're ways to pickle bools before protocol 2
90 * so that unpicklers written before bools were introduced unpickle them
91 * as ints, but unpicklers after can recognize that bools were intended.
92 * Note that protocol 2 added direct ways to pickle bools.
93 */
94#undef TRUE
95#define TRUE "I01\n"
96#undef FALSE
97#define FALSE "I00\n"
98
99enum {
100 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
101 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
102 break if this gets out of synch with pickle.py, but it's unclear that would
103 help anything either. */
104 BATCHSIZE = 1000,
105
106 /* Nesting limit until Pickler, when running in "fast mode", starts
107 checking for self-referential data-structures. */
108 FAST_NESTING_LIMIT = 50,
109
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000110 /* Initial size of the write buffer of Pickler. */
111 WRITE_BUF_SIZE = 4096,
112
113 /* Maximum size of the write buffer of Pickler when pickling to a
114 stream. This is ignored for in-memory pickling. */
115 MAX_WRITE_BUF_SIZE = 64 * 1024,
Antoine Pitrou04248a82010-10-12 20:51:21 +0000116
117 /* Prefetch size when unpickling (disabled on unpeekable streams) */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100118 PREFETCH = 8192 * 16,
119
120 FRAME_SIZE_TARGET = 64 * 1024,
121
122 FRAME_HEADER_SIZE = 9
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000123};
124
125/* Exception classes for pickle. These should override the ones defined in
126 pickle.py, when the C-optimized Pickler and Unpickler are used. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000127static PyObject *PickleError = NULL;
128static PyObject *PicklingError = NULL;
129static PyObject *UnpicklingError = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000130
131/* copyreg.dispatch_table, {type_object: pickling_function} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000132static PyObject *dispatch_table = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000133/* For EXT[124] opcodes. */
134/* copyreg._extension_registry, {(module_name, function_name): code} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000135static PyObject *extension_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000136/* copyreg._inverted_registry, {code: (module_name, function_name)} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000137static PyObject *inverted_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000138/* copyreg._extension_cache, {code: object} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000139static PyObject *extension_cache = NULL;
140
141/* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */
142static PyObject *name_mapping_2to3 = NULL;
143/* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
144static PyObject *import_mapping_2to3 = NULL;
145/* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
146static PyObject *name_mapping_3to2 = NULL;
147static PyObject *import_mapping_3to2 = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000148
149/* XXX: Are these really nescessary? */
150/* As the name says, an empty tuple. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000151static PyObject *empty_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000152/* For looking up name pairs in copyreg._extension_registry. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000153static PyObject *two_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000154
155static int
156stack_underflow(void)
157{
158 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
159 return -1;
160}
161
162/* Internal data type used as the unpickling stack. */
163typedef struct {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000164 PyObject_VAR_HEAD
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000165 PyObject **data;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000166 Py_ssize_t allocated; /* number of slots in data allocated */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000167} Pdata;
168
169static void
170Pdata_dealloc(Pdata *self)
171{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200172 Py_ssize_t i = Py_SIZE(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000173 while (--i >= 0) {
174 Py_DECREF(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000175 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000176 PyMem_FREE(self->data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000177 PyObject_Del(self);
178}
179
180static PyTypeObject Pdata_Type = {
181 PyVarObject_HEAD_INIT(NULL, 0)
182 "_pickle.Pdata", /*tp_name*/
183 sizeof(Pdata), /*tp_basicsize*/
184 0, /*tp_itemsize*/
185 (destructor)Pdata_dealloc, /*tp_dealloc*/
186};
187
188static PyObject *
189Pdata_New(void)
190{
191 Pdata *self;
192
193 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
194 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000195 Py_SIZE(self) = 0;
196 self->allocated = 8;
197 self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000198 if (self->data)
199 return (PyObject *)self;
200 Py_DECREF(self);
201 return PyErr_NoMemory();
202}
203
204
205/* Retain only the initial clearto items. If clearto >= the current
206 * number of items, this is a (non-erroneous) NOP.
207 */
208static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200209Pdata_clear(Pdata *self, Py_ssize_t clearto)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000210{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200211 Py_ssize_t i = Py_SIZE(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000212
213 if (clearto < 0)
214 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000215 if (clearto >= i)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000216 return 0;
217
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000218 while (--i >= clearto) {
219 Py_CLEAR(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000220 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000221 Py_SIZE(self) = clearto;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000222 return 0;
223}
224
225static int
226Pdata_grow(Pdata *self)
227{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000228 PyObject **data = self->data;
229 Py_ssize_t allocated = self->allocated;
230 Py_ssize_t new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000231
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000232 new_allocated = (allocated >> 3) + 6;
233 /* check for integer overflow */
234 if (new_allocated > PY_SSIZE_T_MAX - allocated)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000235 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000236 new_allocated += allocated;
237 if (new_allocated > (PY_SSIZE_T_MAX / sizeof(PyObject *)))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000238 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000239 data = PyMem_REALLOC(data, new_allocated * sizeof(PyObject *));
240 if (data == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000241 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000242
243 self->data = data;
244 self->allocated = new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000245 return 0;
246
247 nomemory:
248 PyErr_NoMemory();
249 return -1;
250}
251
252/* D is a Pdata*. Pop the topmost element and store it into V, which
253 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
254 * is raised and V is set to NULL.
255 */
256static PyObject *
257Pdata_pop(Pdata *self)
258{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000259 if (Py_SIZE(self) == 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000260 PyErr_SetString(UnpicklingError, "bad pickle data");
261 return NULL;
262 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000263 return self->data[--Py_SIZE(self)];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000264}
265#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
266
267static int
268Pdata_push(Pdata *self, PyObject *obj)
269{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000270 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000271 return -1;
272 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000273 self->data[Py_SIZE(self)++] = obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000274 return 0;
275}
276
277/* Push an object on stack, transferring its ownership to the stack. */
278#define PDATA_PUSH(D, O, ER) do { \
279 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
280
281/* Push an object on stack, adding a new reference to the object. */
282#define PDATA_APPEND(D, O, ER) do { \
283 Py_INCREF((O)); \
284 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
285
286static PyObject *
287Pdata_poptuple(Pdata *self, Py_ssize_t start)
288{
289 PyObject *tuple;
290 Py_ssize_t len, i, j;
291
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000292 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000293 tuple = PyTuple_New(len);
294 if (tuple == NULL)
295 return NULL;
296 for (i = start, j = 0; j < len; i++, j++)
297 PyTuple_SET_ITEM(tuple, j, self->data[i]);
298
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000299 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000300 return tuple;
301}
302
303static PyObject *
304Pdata_poplist(Pdata *self, Py_ssize_t start)
305{
306 PyObject *list;
307 Py_ssize_t len, i, j;
308
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000309 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000310 list = PyList_New(len);
311 if (list == NULL)
312 return NULL;
313 for (i = start, j = 0; j < len; i++, j++)
314 PyList_SET_ITEM(list, j, self->data[i]);
315
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000316 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000317 return list;
318}
319
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000320typedef struct {
321 PyObject *me_key;
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200322 Py_ssize_t me_value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000323} PyMemoEntry;
324
325typedef struct {
326 Py_ssize_t mt_mask;
327 Py_ssize_t mt_used;
328 Py_ssize_t mt_allocated;
329 PyMemoEntry *mt_table;
330} PyMemoTable;
331
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000332typedef struct PicklerObject {
333 PyObject_HEAD
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000334 PyMemoTable *memo; /* Memo table, keep track of the seen
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000335 objects to support self-referential objects
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000336 pickling. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000337 PyObject *pers_func; /* persistent_id() method, can be NULL */
Antoine Pitrou8d3c2902012-03-04 18:31:48 +0100338 PyObject *dispatch_table; /* private dispatch_table, can be NULL */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000339 PyObject *arg;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000340
341 PyObject *write; /* write() method of the output stream. */
342 PyObject *output_buffer; /* Write into a local bytearray buffer before
343 flushing to the stream. */
344 Py_ssize_t output_len; /* Length of output_buffer. */
345 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000346 int proto; /* Pickle protocol number, >= 0 */
347 int bin; /* Boolean, true if proto > 0 */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100348 int framing; /* True when framing is enabled, proto >= 4 */
349 Py_ssize_t frame_start; /* Position in output_buffer where the
350 where the current frame begins. -1 if there
351 is no frame currently open. */
352
353 Py_ssize_t buf_size; /* Size of the current buffered pickle data */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000354 int fast; /* Enable fast mode if set to a true value.
355 The fast mode disable the usage of memo,
356 therefore speeding the pickling process by
357 not generating superfluous PUT opcodes. It
358 should not be used if with self-referential
359 objects. */
360 int fast_nesting;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000361 int fix_imports; /* Indicate whether Pickler should fix
362 the name of globals for Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000363 PyObject *fast_memo;
364} PicklerObject;
365
366typedef struct UnpicklerObject {
367 PyObject_HEAD
368 Pdata *stack; /* Pickle data stack, store unpickled objects. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000369
370 /* The unpickler memo is just an array of PyObject *s. Using a dict
371 is unnecessary, since the keys are contiguous ints. */
372 PyObject **memo;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100373 Py_ssize_t memo_size; /* Capacity of the memo array */
374 Py_ssize_t memo_len; /* Number of objects in the memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000375
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000376 PyObject *arg;
377 PyObject *pers_func; /* persistent_load() method, can be NULL. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000378
379 Py_buffer buffer;
380 char *input_buffer;
381 char *input_line;
382 Py_ssize_t input_len;
383 Py_ssize_t next_read_idx;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100384 Py_ssize_t frame_end_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000385 Py_ssize_t prefetched_idx; /* index of first prefetched byte */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100386
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000387 PyObject *read; /* read() method of the input stream. */
388 PyObject *readline; /* readline() method of the input stream. */
Antoine Pitrou04248a82010-10-12 20:51:21 +0000389 PyObject *peek; /* peek() method of the input stream, or NULL */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000390
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000391 char *encoding; /* Name of the encoding to be used for
392 decoding strings pickled using Python
393 2.x. The default value is "ASCII" */
394 char *errors; /* Name of errors handling scheme to used when
395 decoding strings. The default value is
396 "strict". */
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -0500397 Py_ssize_t *marks; /* Mark stack, used for unpickling container
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000398 objects. */
399 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
400 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000401 int proto; /* Protocol of the pickle loaded. */
402 int fix_imports; /* Indicate whether Unpickler should fix
403 the name of globals pickled by Python 2.x. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100404 int framing; /* True when framing is enabled, proto >= 4 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000405} UnpicklerObject;
406
407/* Forward declarations */
408static int save(PicklerObject *, PyObject *, int);
409static int save_reduce(PicklerObject *, PyObject *, PyObject *);
410static PyTypeObject Pickler_Type;
411static PyTypeObject Unpickler_Type;
412
413
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000414/*************************************************************************
Serhiy Storchaka95949422013-08-27 19:40:23 +0300415 A custom hashtable mapping void* to Python ints. This is used by the pickler
416 for memoization. Using a custom hashtable rather than PyDict allows us to skip
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000417 a bunch of unnecessary object creation. This makes a huge performance
418 difference. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000419
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000420#define MT_MINSIZE 8
421#define PERTURB_SHIFT 5
422
423
424static PyMemoTable *
425PyMemoTable_New(void)
426{
427 PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
428 if (memo == NULL) {
429 PyErr_NoMemory();
430 return NULL;
431 }
432
433 memo->mt_used = 0;
434 memo->mt_allocated = MT_MINSIZE;
435 memo->mt_mask = MT_MINSIZE - 1;
436 memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
437 if (memo->mt_table == NULL) {
438 PyMem_FREE(memo);
439 PyErr_NoMemory();
440 return NULL;
441 }
442 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
443
444 return memo;
445}
446
447static PyMemoTable *
448PyMemoTable_Copy(PyMemoTable *self)
449{
450 Py_ssize_t i;
451 PyMemoTable *new = PyMemoTable_New();
452 if (new == NULL)
453 return NULL;
454
455 new->mt_used = self->mt_used;
456 new->mt_allocated = self->mt_allocated;
457 new->mt_mask = self->mt_mask;
458 /* The table we get from _New() is probably smaller than we wanted.
459 Free it and allocate one that's the right size. */
460 PyMem_FREE(new->mt_table);
461 new->mt_table = PyMem_MALLOC(self->mt_allocated * sizeof(PyMemoEntry));
462 if (new->mt_table == NULL) {
463 PyMem_FREE(new);
Victor Stinner42024562013-07-12 00:53:57 +0200464 PyErr_NoMemory();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000465 return NULL;
466 }
467 for (i = 0; i < self->mt_allocated; i++) {
468 Py_XINCREF(self->mt_table[i].me_key);
469 }
470 memcpy(new->mt_table, self->mt_table,
471 sizeof(PyMemoEntry) * self->mt_allocated);
472
473 return new;
474}
475
476static Py_ssize_t
477PyMemoTable_Size(PyMemoTable *self)
478{
479 return self->mt_used;
480}
481
482static int
483PyMemoTable_Clear(PyMemoTable *self)
484{
485 Py_ssize_t i = self->mt_allocated;
486
487 while (--i >= 0) {
488 Py_XDECREF(self->mt_table[i].me_key);
489 }
490 self->mt_used = 0;
491 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
492 return 0;
493}
494
495static void
496PyMemoTable_Del(PyMemoTable *self)
497{
498 if (self == NULL)
499 return;
500 PyMemoTable_Clear(self);
501
502 PyMem_FREE(self->mt_table);
503 PyMem_FREE(self);
504}
505
506/* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
507 can be considerably simpler than dictobject.c's lookdict(). */
508static PyMemoEntry *
509_PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
510{
511 size_t i;
512 size_t perturb;
513 size_t mask = (size_t)self->mt_mask;
514 PyMemoEntry *table = self->mt_table;
515 PyMemoEntry *entry;
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000516 Py_hash_t hash = (Py_hash_t)key >> 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000517
518 i = hash & mask;
519 entry = &table[i];
520 if (entry->me_key == NULL || entry->me_key == key)
521 return entry;
522
523 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
524 i = (i << 2) + i + perturb + 1;
525 entry = &table[i & mask];
526 if (entry->me_key == NULL || entry->me_key == key)
527 return entry;
528 }
529 assert(0); /* Never reached */
530 return NULL;
531}
532
533/* Returns -1 on failure, 0 on success. */
534static int
535_PyMemoTable_ResizeTable(PyMemoTable *self, Py_ssize_t min_size)
536{
537 PyMemoEntry *oldtable = NULL;
538 PyMemoEntry *oldentry, *newentry;
539 Py_ssize_t new_size = MT_MINSIZE;
540 Py_ssize_t to_process;
541
542 assert(min_size > 0);
543
544 /* Find the smallest valid table size >= min_size. */
545 while (new_size < min_size && new_size > 0)
546 new_size <<= 1;
547 if (new_size <= 0) {
548 PyErr_NoMemory();
549 return -1;
550 }
551 /* new_size needs to be a power of two. */
552 assert((new_size & (new_size - 1)) == 0);
553
554 /* Allocate new table. */
555 oldtable = self->mt_table;
556 self->mt_table = PyMem_MALLOC(new_size * sizeof(PyMemoEntry));
557 if (self->mt_table == NULL) {
Victor Stinner8ca72e22013-07-12 00:53:26 +0200558 self->mt_table = oldtable;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000559 PyErr_NoMemory();
560 return -1;
561 }
562 self->mt_allocated = new_size;
563 self->mt_mask = new_size - 1;
564 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
565
566 /* Copy entries from the old table. */
567 to_process = self->mt_used;
568 for (oldentry = oldtable; to_process > 0; oldentry++) {
569 if (oldentry->me_key != NULL) {
570 to_process--;
571 /* newentry is a pointer to a chunk of the new
572 mt_table, so we're setting the key:value pair
573 in-place. */
574 newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
575 newentry->me_key = oldentry->me_key;
576 newentry->me_value = oldentry->me_value;
577 }
578 }
579
580 /* Deallocate the old table. */
581 PyMem_FREE(oldtable);
582 return 0;
583}
584
585/* Returns NULL on failure, a pointer to the value otherwise. */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200586static Py_ssize_t *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000587PyMemoTable_Get(PyMemoTable *self, PyObject *key)
588{
589 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
590 if (entry->me_key == NULL)
591 return NULL;
592 return &entry->me_value;
593}
594
595/* Returns -1 on failure, 0 on success. */
596static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200597PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000598{
599 PyMemoEntry *entry;
600
601 assert(key != NULL);
602
603 entry = _PyMemoTable_Lookup(self, key);
604 if (entry->me_key != NULL) {
605 entry->me_value = value;
606 return 0;
607 }
608 Py_INCREF(key);
609 entry->me_key = key;
610 entry->me_value = value;
611 self->mt_used++;
612
613 /* If we added a key, we can safely resize. Otherwise just return!
614 * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
615 *
616 * Quadrupling the size improves average table sparseness
617 * (reducing collisions) at the cost of some memory. It also halves
618 * the number of expensive resize operations in a growing memo table.
619 *
620 * Very large memo tables (over 50K items) use doubling instead.
621 * This may help applications with severe memory constraints.
622 */
623 if (!(self->mt_used * 3 >= (self->mt_mask + 1) * 2))
624 return 0;
625 return _PyMemoTable_ResizeTable(self,
626 (self->mt_used > 50000 ? 2 : 4) * self->mt_used);
627}
628
629#undef MT_MINSIZE
630#undef PERTURB_SHIFT
631
632/*************************************************************************/
633
634/* Helpers for creating the argument tuple passed to functions. This has the
Victor Stinner121aab42011-09-29 23:40:53 +0200635 performance advantage of calling PyTuple_New() only once.
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000636
637 XXX(avassalotti): Inline directly in _Pickler_FastCall() and
638 _Unpickler_FastCall(). */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000639#define ARG_TUP(self, obj) do { \
640 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
641 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
642 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
643 } \
644 else { \
645 Py_DECREF((obj)); \
646 } \
647 } while (0)
648
649#define FREE_ARG_TUP(self) do { \
650 if ((self)->arg->ob_refcnt > 1) \
651 Py_CLEAR((self)->arg); \
652 } while (0)
653
654/* A temporary cleaner API for fast single argument function call.
655
656 XXX: Does caching the argument tuple provides any real performance benefits?
657
658 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
659 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
660 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
661 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
662 (i.e, call PyTuple_New() and store the returned value in an array), to save
663 one second (wall clock time). Either ways, the loading time a pickle stream
664 large enough to generate this number of calls would be massively
665 overwhelmed by other factors, like I/O throughput, the GC traversal and
666 object allocation overhead. So, I really doubt these functions provide any
667 real benefits.
668
669 On the other hand, oprofile reports that pickle spends a lot of time in
670 these functions. But, that is probably more related to the function call
671 overhead, than the argument tuple allocation.
672
673 XXX: And, what is the reference behavior of these? Steal, borrow? At first
674 glance, it seems to steal the reference of 'arg' and borrow the reference
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000675 of 'func'. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000676static PyObject *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000677_Pickler_FastCall(PicklerObject *self, PyObject *func, PyObject *arg)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000678{
679 PyObject *result = NULL;
680
681 ARG_TUP(self, arg);
682 if (self->arg) {
683 result = PyObject_Call(func, self->arg, NULL);
684 FREE_ARG_TUP(self);
685 }
686 return result;
687}
688
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000689static int
690_Pickler_ClearBuffer(PicklerObject *self)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000691{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000692 Py_CLEAR(self->output_buffer);
693 self->output_buffer =
694 PyBytes_FromStringAndSize(NULL, self->max_output_len);
695 if (self->output_buffer == NULL)
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000696 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000697 self->output_len = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100698 self->frame_start = -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000699 return 0;
700}
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000701
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100702static void
703_Pickler_WriteFrameHeader(PicklerObject *self, char *qdata, size_t frame_len)
704{
705 qdata[0] = (unsigned char)FRAME;
706 qdata[1] = (unsigned char)(frame_len & 0xff);
707 qdata[2] = (unsigned char)((frame_len >> 8) & 0xff);
708 qdata[3] = (unsigned char)((frame_len >> 16) & 0xff);
709 qdata[4] = (unsigned char)((frame_len >> 24) & 0xff);
710 qdata[5] = (unsigned char)((frame_len >> 32) & 0xff);
711 qdata[6] = (unsigned char)((frame_len >> 40) & 0xff);
712 qdata[7] = (unsigned char)((frame_len >> 48) & 0xff);
713 qdata[8] = (unsigned char)((frame_len >> 56) & 0xff);
714}
715
716static int
717_Pickler_CommitFrame(PicklerObject *self)
718{
719 size_t frame_len;
720 char *qdata;
721
722 if (!self->framing || self->frame_start == -1)
723 return 0;
724 frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
725 qdata = PyBytes_AS_STRING(self->output_buffer) + self->frame_start;
726 _Pickler_WriteFrameHeader(self, qdata, frame_len);
727 self->frame_start = -1;
728 return 0;
729}
730
731static int
732_Pickler_OpcodeBoundary(PicklerObject *self)
733{
734 Py_ssize_t frame_len;
735
736 if (!self->framing || self->frame_start == -1)
737 return 0;
738 frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
739 if (frame_len >= FRAME_SIZE_TARGET)
740 return _Pickler_CommitFrame(self);
741 else
742 return 0;
743}
744
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000745static PyObject *
746_Pickler_GetString(PicklerObject *self)
747{
748 PyObject *output_buffer = self->output_buffer;
749
750 assert(self->output_buffer != NULL);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100751
752 if (_Pickler_CommitFrame(self))
753 return NULL;
754
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000755 self->output_buffer = NULL;
756 /* Resize down to exact size */
757 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
758 return NULL;
759 return output_buffer;
760}
761
762static int
763_Pickler_FlushToFile(PicklerObject *self)
764{
765 PyObject *output, *result;
766
767 assert(self->write != NULL);
768
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100769 /* This will commit the frame first */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000770 output = _Pickler_GetString(self);
771 if (output == NULL)
772 return -1;
773
774 result = _Pickler_FastCall(self, self->write, output);
775 Py_XDECREF(result);
776 return (result == NULL) ? -1 : 0;
777}
778
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200779static Py_ssize_t
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100780_Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t data_len)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000781{
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100782 Py_ssize_t i, n, required;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000783 char *buffer;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100784 int need_new_frame;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000785
786 assert(s != NULL);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100787 need_new_frame = (self->framing && self->frame_start == -1);
788
789 if (need_new_frame)
790 n = data_len + FRAME_HEADER_SIZE;
791 else
792 n = data_len;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000793
794 required = self->output_len + n;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100795 if (self->write != NULL && required > MAX_WRITE_BUF_SIZE) {
796 /* XXX This reallocates a new buffer every time, which is a bit
797 wasteful. */
798 if (_Pickler_FlushToFile(self) < 0)
799 return -1;
800 if (_Pickler_ClearBuffer(self) < 0)
801 return -1;
802 /* The previous frame was just committed by _Pickler_FlushToFile */
803 need_new_frame = self->framing;
804 if (need_new_frame)
805 n = data_len + FRAME_HEADER_SIZE;
806 else
807 n = data_len;
808 required = self->output_len + n;
809 }
810 if (self->write != NULL && n > MAX_WRITE_BUF_SIZE) {
811 /* For large pickle chunks, we write directly to the output
812 file instead of buffering. Note the buffer is empty at this
813 point (it was flushed above, since required >= n). */
814 PyObject *output, *result;
815 if (need_new_frame) {
816 char frame_header[FRAME_HEADER_SIZE];
817 _Pickler_WriteFrameHeader(self, frame_header, (size_t) data_len);
818 output = PyBytes_FromStringAndSize(frame_header, FRAME_HEADER_SIZE);
819 if (output == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000820 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000821 result = _Pickler_FastCall(self, self->write, output);
822 Py_XDECREF(result);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100823 if (result == NULL)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000824 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000825 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100826 /* XXX we could spare an intermediate copy and pass
827 a memoryview instead */
828 output = PyBytes_FromStringAndSize(s, data_len);
829 if (output == NULL)
830 return -1;
831 result = _Pickler_FastCall(self, self->write, output);
832 Py_XDECREF(result);
833 return (result == NULL) ? -1 : 0;
834 }
835 if (required > self->max_output_len) {
836 /* Make place in buffer for the pickle chunk */
837 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
838 PyErr_NoMemory();
839 return -1;
840 }
841 self->max_output_len = (self->output_len + n) / 2 * 3;
842 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
843 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000844 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000845 buffer = PyBytes_AS_STRING(self->output_buffer);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100846 if (need_new_frame) {
847 /* Setup new frame */
848 Py_ssize_t frame_start = self->output_len;
849 self->frame_start = frame_start;
850 for (i = 0; i < FRAME_HEADER_SIZE; i++) {
851 /* Write an invalid value, for debugging */
852 buffer[frame_start + i] = 0xFE;
853 }
854 self->output_len += FRAME_HEADER_SIZE;
855 }
856 if (data_len < 8) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000857 /* This is faster than memcpy when the string is short. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100858 for (i = 0; i < data_len; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000859 buffer[self->output_len + i] = s[i];
860 }
861 }
862 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100863 memcpy(buffer + self->output_len, s, data_len);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000864 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100865 self->output_len += data_len;
866 return data_len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000867}
868
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000869static PicklerObject *
870_Pickler_New(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000871{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000872 PicklerObject *self;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000873
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000874 self = PyObject_GC_New(PicklerObject, &Pickler_Type);
875 if (self == NULL)
876 return NULL;
877
878 self->pers_func = NULL;
Antoine Pitrou8d3c2902012-03-04 18:31:48 +0100879 self->dispatch_table = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000880 self->arg = NULL;
881 self->write = NULL;
882 self->proto = 0;
883 self->bin = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100884 self->framing = 0;
885 self->frame_start = -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000886 self->fast = 0;
887 self->fast_nesting = 0;
888 self->fix_imports = 0;
889 self->fast_memo = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000890 self->max_output_len = WRITE_BUF_SIZE;
891 self->output_len = 0;
Victor Stinner68c8ea22013-07-11 22:56:25 +0200892
893 self->memo = PyMemoTable_New();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000894 self->output_buffer = PyBytes_FromStringAndSize(NULL,
895 self->max_output_len);
Victor Stinner68c8ea22013-07-11 22:56:25 +0200896
897 if (self->memo == NULL || self->output_buffer == NULL) {
Victor Stinnerc31df042013-07-12 00:08:59 +0200898 Py_DECREF(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000899 return NULL;
900 }
901 return self;
902}
903
904static int
905_Pickler_SetProtocol(PicklerObject *self, PyObject *proto_obj,
906 PyObject *fix_imports_obj)
907{
908 long proto = 0;
909 int fix_imports;
910
911 if (proto_obj == NULL || proto_obj == Py_None)
912 proto = DEFAULT_PROTOCOL;
913 else {
914 proto = PyLong_AsLong(proto_obj);
915 if (proto == -1 && PyErr_Occurred())
916 return -1;
917 }
918 if (proto < 0)
919 proto = HIGHEST_PROTOCOL;
920 if (proto > HIGHEST_PROTOCOL) {
921 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
922 HIGHEST_PROTOCOL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000923 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000924 }
925 fix_imports = PyObject_IsTrue(fix_imports_obj);
926 if (fix_imports == -1)
927 return -1;
Victor Stinner121aab42011-09-29 23:40:53 +0200928
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000929 self->proto = proto;
930 self->bin = proto > 0;
931 self->fix_imports = fix_imports && proto < 3;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000932
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000933 return 0;
934}
935
936/* Returns -1 (with an exception set) on failure, 0 on success. This may
937 be called once on a freshly created Pickler. */
938static int
939_Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
940{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200941 _Py_IDENTIFIER(write);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000942 assert(file != NULL);
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +0200943 self->write = _PyObject_GetAttrId(file, &PyId_write);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000944 if (self->write == NULL) {
945 if (PyErr_ExceptionMatches(PyExc_AttributeError))
946 PyErr_SetString(PyExc_TypeError,
947 "file must have a 'write' attribute");
948 return -1;
949 }
950
951 return 0;
952}
953
954/* See documentation for _Pickler_FastCall(). */
955static PyObject *
956_Unpickler_FastCall(UnpicklerObject *self, PyObject *func, PyObject *arg)
957{
958 PyObject *result = NULL;
959
960 ARG_TUP(self, arg);
961 if (self->arg) {
962 result = PyObject_Call(func, self->arg, NULL);
963 FREE_ARG_TUP(self);
964 }
965 return result;
966}
967
968/* Returns the size of the input on success, -1 on failure. This takes its
969 own reference to `input`. */
970static Py_ssize_t
971_Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
972{
973 if (self->buffer.buf != NULL)
974 PyBuffer_Release(&self->buffer);
975 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
976 return -1;
977 self->input_buffer = self->buffer.buf;
978 self->input_len = self->buffer.len;
979 self->next_read_idx = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100980 self->frame_end_idx = -1;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000981 self->prefetched_idx = self->input_len;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000982 return self->input_len;
983}
984
Antoine Pitrou04248a82010-10-12 20:51:21 +0000985static int
986_Unpickler_SkipConsumed(UnpicklerObject *self)
987{
Victor Stinnerb43ad1d2013-10-31 13:38:42 +0100988 Py_ssize_t consumed;
989 PyObject *r;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000990
Victor Stinnerb43ad1d2013-10-31 13:38:42 +0100991 consumed = self->next_read_idx - self->prefetched_idx;
992 if (consumed <= 0)
993 return 0;
994
995 assert(self->peek); /* otherwise we did something wrong */
996 /* This makes an useless copy... */
997 r = PyObject_CallFunction(self->read, "n", consumed);
998 if (r == NULL)
999 return -1;
1000 Py_DECREF(r);
1001
1002 self->prefetched_idx = self->next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001003 return 0;
1004}
1005
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001006static const Py_ssize_t READ_WHOLE_LINE = -1;
1007
1008/* If reading from a file, we need to only pull the bytes we need, since there
1009 may be multiple pickle objects arranged contiguously in the same input
1010 buffer.
1011
1012 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
1013 bytes from the input stream/buffer.
1014
1015 Update the unpickler's input buffer with the newly-read data. Returns -1 on
1016 failure; on success, returns the number of bytes read from the file.
1017
1018 On success, self->input_len will be 0; this is intentional so that when
1019 unpickling from a file, the "we've run out of data" code paths will trigger,
1020 causing the Unpickler to go back to the file for more data. Use the returned
1021 size to tell you how much data you can process. */
1022static Py_ssize_t
1023_Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
1024{
1025 PyObject *data;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001026 Py_ssize_t read_size, prefetched_size = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001027
1028 assert(self->read != NULL);
Victor Stinner121aab42011-09-29 23:40:53 +02001029
Antoine Pitrou04248a82010-10-12 20:51:21 +00001030 if (_Unpickler_SkipConsumed(self) < 0)
1031 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001032
1033 if (n == READ_WHOLE_LINE)
1034 data = PyObject_Call(self->readline, empty_tuple, NULL);
1035 else {
1036 PyObject *len = PyLong_FromSsize_t(n);
1037 if (len == NULL)
1038 return -1;
1039 data = _Unpickler_FastCall(self, self->read, len);
1040 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001041 if (data == NULL)
1042 return -1;
1043
Antoine Pitrou04248a82010-10-12 20:51:21 +00001044 /* Prefetch some data without advancing the file pointer, if possible */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001045 if (self->peek && !self->framing) {
Antoine Pitrou04248a82010-10-12 20:51:21 +00001046 PyObject *len, *prefetched;
1047 len = PyLong_FromSsize_t(PREFETCH);
1048 if (len == NULL) {
1049 Py_DECREF(data);
1050 return -1;
1051 }
1052 prefetched = _Unpickler_FastCall(self, self->peek, len);
1053 if (prefetched == NULL) {
1054 if (PyErr_ExceptionMatches(PyExc_NotImplementedError)) {
1055 /* peek() is probably not supported by the given file object */
1056 PyErr_Clear();
1057 Py_CLEAR(self->peek);
1058 }
1059 else {
1060 Py_DECREF(data);
1061 return -1;
1062 }
1063 }
1064 else {
1065 assert(PyBytes_Check(prefetched));
1066 prefetched_size = PyBytes_GET_SIZE(prefetched);
1067 PyBytes_ConcatAndDel(&data, prefetched);
1068 if (data == NULL)
1069 return -1;
1070 }
1071 }
1072
1073 read_size = _Unpickler_SetStringInput(self, data) - prefetched_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001074 Py_DECREF(data);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001075 self->prefetched_idx = read_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001076 return read_size;
1077}
1078
1079/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
1080
1081 This should be used for all data reads, rather than accessing the unpickler's
1082 input buffer directly. This method deals correctly with reading from input
1083 streams, which the input buffer doesn't deal with.
1084
1085 Note that when reading from a file-like object, self->next_read_idx won't
1086 be updated (it should remain at 0 for the entire unpickling process). You
1087 should use this function's return value to know how many bytes you can
1088 consume.
1089
1090 Returns -1 (with an exception set) on failure. On success, return the
1091 number of chars read. */
1092static Py_ssize_t
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001093_Unpickler_ReadUnframed(UnpicklerObject *self, char **s, Py_ssize_t n)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001094{
Antoine Pitrou04248a82010-10-12 20:51:21 +00001095 Py_ssize_t num_read;
1096
Antoine Pitrou04248a82010-10-12 20:51:21 +00001097 if (self->next_read_idx + n <= self->input_len) {
1098 *s = self->input_buffer + self->next_read_idx;
1099 self->next_read_idx += n;
1100 return n;
1101 }
1102 if (!self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001103 PyErr_Format(PyExc_EOFError, "Ran out of input");
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +00001104 return -1;
1105 }
Antoine Pitrou04248a82010-10-12 20:51:21 +00001106 num_read = _Unpickler_ReadFromFile(self, n);
1107 if (num_read < 0)
1108 return -1;
1109 if (num_read < n) {
1110 PyErr_Format(PyExc_EOFError, "Ran out of input");
1111 return -1;
1112 }
1113 *s = self->input_buffer;
1114 self->next_read_idx = n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001115 return n;
1116}
1117
1118static Py_ssize_t
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001119_Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n)
1120{
1121 if (self->framing &&
1122 (self->frame_end_idx == -1 ||
1123 self->frame_end_idx <= self->next_read_idx)) {
1124 /* Need to read new frame */
1125 char *dummy;
1126 unsigned char *frame_start;
1127 size_t frame_len;
1128 if (_Unpickler_ReadUnframed(self, &dummy, FRAME_HEADER_SIZE) < 0)
1129 return -1;
1130 frame_start = (unsigned char *) dummy;
1131 if (frame_start[0] != (unsigned char)FRAME) {
1132 PyErr_Format(UnpicklingError,
1133 "expected FRAME opcode, got 0x%x instead",
1134 frame_start[0]);
1135 return -1;
1136 }
1137 frame_len = (size_t) frame_start[1];
1138 frame_len |= (size_t) frame_start[2] << 8;
1139 frame_len |= (size_t) frame_start[3] << 16;
1140 frame_len |= (size_t) frame_start[4] << 24;
1141#if SIZEOF_SIZE_T >= 8
1142 frame_len |= (size_t) frame_start[5] << 32;
1143 frame_len |= (size_t) frame_start[6] << 40;
1144 frame_len |= (size_t) frame_start[7] << 48;
1145 frame_len |= (size_t) frame_start[8] << 56;
1146#else
1147 if (frame_start[5] || frame_start[6] ||
1148 frame_start[7] || frame_start[8]) {
1149 PyErr_Format(PyExc_OverflowError,
1150 "Frame size too large for 32-bit build");
1151 return -1;
1152 }
1153#endif
1154 if (frame_len > PY_SSIZE_T_MAX) {
1155 PyErr_Format(UnpicklingError, "Invalid frame length");
1156 return -1;
1157 }
1158 if (frame_len < n) {
1159 PyErr_Format(UnpicklingError, "Bad framing");
1160 return -1;
1161 }
1162 if (_Unpickler_ReadUnframed(self, &dummy /* unused */,
1163 frame_len) < 0)
1164 return -1;
1165 /* Rewind to start of frame */
1166 self->frame_end_idx = self->next_read_idx;
1167 self->next_read_idx -= frame_len;
1168 }
1169 if (self->framing) {
1170 /* Check for bad input */
1171 if (n + self->next_read_idx > self->frame_end_idx) {
1172 PyErr_Format(UnpicklingError, "Bad framing");
1173 return -1;
1174 }
1175 }
1176 return _Unpickler_ReadUnframed(self, s, n);
1177}
1178
1179static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001180_Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1181 char **result)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001182{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001183 char *input_line = PyMem_Realloc(self->input_line, len + 1);
Victor Stinner42024562013-07-12 00:53:57 +02001184 if (input_line == NULL) {
1185 PyErr_NoMemory();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001186 return -1;
Victor Stinner42024562013-07-12 00:53:57 +02001187 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001188
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001189 memcpy(input_line, line, len);
1190 input_line[len] = '\0';
1191 self->input_line = input_line;
1192 *result = self->input_line;
1193 return len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001194}
1195
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001196/* Read a line from the input stream/buffer. If we run off the end of the input
1197 before hitting \n, return the data we found.
1198
1199 Returns the number of chars read, or -1 on failure. */
1200static Py_ssize_t
1201_Unpickler_Readline(UnpicklerObject *self, char **result)
1202{
1203 Py_ssize_t i, num_read;
1204
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001205 for (i = self->next_read_idx; i < self->input_len; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001206 if (self->input_buffer[i] == '\n') {
1207 char *line_start = self->input_buffer + self->next_read_idx;
1208 num_read = i - self->next_read_idx + 1;
1209 self->next_read_idx = i + 1;
1210 return _Unpickler_CopyLine(self, line_start, num_read, result);
1211 }
1212 }
1213 if (self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001214 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1215 if (num_read < 0)
1216 return -1;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001217 self->next_read_idx = num_read;
Antoine Pitrouf6c7a852011-08-11 21:04:02 +02001218 return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001219 }
Victor Stinner121aab42011-09-29 23:40:53 +02001220
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001221 /* If we get here, we've run off the end of the input string. Return the
1222 remaining string and let the caller figure it out. */
1223 *result = self->input_buffer + self->next_read_idx;
1224 num_read = i - self->next_read_idx;
1225 self->next_read_idx = i;
1226 return num_read;
1227}
1228
1229/* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1230 will be modified in place. */
1231static int
1232_Unpickler_ResizeMemoList(UnpicklerObject *self, Py_ssize_t new_size)
1233{
1234 Py_ssize_t i;
1235 PyObject **memo;
1236
1237 assert(new_size > self->memo_size);
1238
1239 memo = PyMem_REALLOC(self->memo, new_size * sizeof(PyObject *));
1240 if (memo == NULL) {
1241 PyErr_NoMemory();
1242 return -1;
1243 }
1244 self->memo = memo;
1245 for (i = self->memo_size; i < new_size; i++)
1246 self->memo[i] = NULL;
1247 self->memo_size = new_size;
1248 return 0;
1249}
1250
1251/* Returns NULL if idx is out of bounds. */
1252static PyObject *
1253_Unpickler_MemoGet(UnpicklerObject *self, Py_ssize_t idx)
1254{
1255 if (idx < 0 || idx >= self->memo_size)
1256 return NULL;
1257
1258 return self->memo[idx];
1259}
1260
1261/* Returns -1 (with an exception set) on failure, 0 on success.
1262 This takes its own reference to `value`. */
1263static int
1264_Unpickler_MemoPut(UnpicklerObject *self, Py_ssize_t idx, PyObject *value)
1265{
1266 PyObject *old_item;
1267
1268 if (idx >= self->memo_size) {
1269 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1270 return -1;
1271 assert(idx < self->memo_size);
1272 }
1273 Py_INCREF(value);
1274 old_item = self->memo[idx];
1275 self->memo[idx] = value;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001276 if (old_item != NULL) {
1277 Py_DECREF(old_item);
1278 }
1279 else {
1280 self->memo_len++;
1281 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001282 return 0;
1283}
1284
1285static PyObject **
1286_Unpickler_NewMemo(Py_ssize_t new_size)
1287{
1288 PyObject **memo = PyMem_MALLOC(new_size * sizeof(PyObject *));
Victor Stinner42024562013-07-12 00:53:57 +02001289 if (memo == NULL) {
1290 PyErr_NoMemory();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001291 return NULL;
Victor Stinner42024562013-07-12 00:53:57 +02001292 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001293 memset(memo, 0, new_size * sizeof(PyObject *));
1294 return memo;
1295}
1296
1297/* Free the unpickler's memo, taking care to decref any items left in it. */
1298static void
1299_Unpickler_MemoCleanup(UnpicklerObject *self)
1300{
1301 Py_ssize_t i;
1302 PyObject **memo = self->memo;
1303
1304 if (self->memo == NULL)
1305 return;
1306 self->memo = NULL;
1307 i = self->memo_size;
1308 while (--i >= 0) {
1309 Py_XDECREF(memo[i]);
1310 }
1311 PyMem_FREE(memo);
1312}
1313
1314static UnpicklerObject *
1315_Unpickler_New(void)
1316{
1317 UnpicklerObject *self;
1318
1319 self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1320 if (self == NULL)
1321 return NULL;
1322
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001323 self->arg = NULL;
1324 self->pers_func = NULL;
1325 self->input_buffer = NULL;
1326 self->input_line = NULL;
1327 self->input_len = 0;
1328 self->next_read_idx = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001329 self->frame_end_idx = -1;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001330 self->prefetched_idx = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001331 self->read = NULL;
1332 self->readline = NULL;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001333 self->peek = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001334 self->encoding = NULL;
1335 self->errors = NULL;
1336 self->marks = NULL;
1337 self->num_marks = 0;
1338 self->marks_size = 0;
1339 self->proto = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001340 self->framing = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001341 self->fix_imports = 0;
Victor Stinner68c8ea22013-07-11 22:56:25 +02001342 memset(&self->buffer, 0, sizeof(Py_buffer));
1343 self->memo_size = 32;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001344 self->memo_len = 0;
Victor Stinner68c8ea22013-07-11 22:56:25 +02001345 self->memo = _Unpickler_NewMemo(self->memo_size);
1346 self->stack = (Pdata *)Pdata_New();
1347
1348 if (self->memo == NULL || self->stack == NULL) {
1349 Py_DECREF(self);
1350 return NULL;
1351 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001352
1353 return self;
1354}
1355
1356/* Returns -1 (with an exception set) on failure, 0 on success. This may
1357 be called once on a freshly created Pickler. */
1358static int
1359_Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1360{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001361 _Py_IDENTIFIER(peek);
1362 _Py_IDENTIFIER(read);
1363 _Py_IDENTIFIER(readline);
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001364
1365 self->peek = _PyObject_GetAttrId(file, &PyId_peek);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001366 if (self->peek == NULL) {
1367 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1368 PyErr_Clear();
1369 else
1370 return -1;
1371 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001372 self->read = _PyObject_GetAttrId(file, &PyId_read);
1373 self->readline = _PyObject_GetAttrId(file, &PyId_readline);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001374 if (self->readline == NULL || self->read == NULL) {
1375 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1376 PyErr_SetString(PyExc_TypeError,
1377 "file must have 'read' and 'readline' attributes");
1378 Py_CLEAR(self->read);
1379 Py_CLEAR(self->readline);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001380 Py_CLEAR(self->peek);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001381 return -1;
1382 }
1383 return 0;
1384}
1385
1386/* Returns -1 (with an exception set) on failure, 0 on success. This may
1387 be called once on a freshly created Pickler. */
1388static int
1389_Unpickler_SetInputEncoding(UnpicklerObject *self,
1390 const char *encoding,
1391 const char *errors)
1392{
1393 if (encoding == NULL)
1394 encoding = "ASCII";
1395 if (errors == NULL)
1396 errors = "strict";
1397
Victor Stinner49fc8ec2013-07-07 23:30:24 +02001398 self->encoding = _PyMem_Strdup(encoding);
1399 self->errors = _PyMem_Strdup(errors);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001400 if (self->encoding == NULL || self->errors == NULL) {
1401 PyErr_NoMemory();
1402 return -1;
1403 }
1404 return 0;
1405}
1406
1407/* Generate a GET opcode for an object stored in the memo. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001408static int
1409memo_get(PicklerObject *self, PyObject *key)
1410{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001411 Py_ssize_t *value;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001412 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001413 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001414
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001415 value = PyMemoTable_Get(self->memo, key);
1416 if (value == NULL) {
1417 PyErr_SetObject(PyExc_KeyError, key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001418 return -1;
1419 }
1420
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001421 if (!self->bin) {
1422 pdata[0] = GET;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001423 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1424 "%" PY_FORMAT_SIZE_T "d\n", *value);
1425 len = strlen(pdata);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001426 }
1427 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001428 if (*value < 256) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001429 pdata[0] = BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001430 pdata[1] = (unsigned char)(*value & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001431 len = 2;
1432 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001433 else if (*value <= 0xffffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001434 pdata[0] = LONG_BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001435 pdata[1] = (unsigned char)(*value & 0xff);
1436 pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1437 pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1438 pdata[4] = (unsigned char)((*value >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001439 len = 5;
1440 }
1441 else { /* unlikely */
1442 PyErr_SetString(PicklingError,
1443 "memo id too large for LONG_BINGET");
1444 return -1;
1445 }
1446 }
1447
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001448 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001449 return -1;
1450
1451 return 0;
1452}
1453
1454/* Store an object in the memo, assign it a new unique ID based on the number
1455 of objects currently stored in the memo and generate a PUT opcode. */
1456static int
1457memo_put(PicklerObject *self, PyObject *obj)
1458{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001459 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001460 Py_ssize_t len;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001461 Py_ssize_t idx;
1462
1463 const char memoize_op = MEMOIZE;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001464
1465 if (self->fast)
1466 return 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001467 if (_Pickler_OpcodeBoundary(self))
1468 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001469
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001470 idx = PyMemoTable_Size(self->memo);
1471 if (PyMemoTable_Set(self->memo, obj, idx) < 0)
1472 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001473
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001474 if (self->proto >= 4) {
1475 if (_Pickler_Write(self, &memoize_op, 1) < 0)
1476 return -1;
1477 return 0;
1478 }
1479 else if (!self->bin) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001480 pdata[0] = PUT;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001481 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001482 "%" PY_FORMAT_SIZE_T "d\n", idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001483 len = strlen(pdata);
1484 }
1485 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001486 if (idx < 256) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001487 pdata[0] = BINPUT;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001488 pdata[1] = (unsigned char)idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001489 len = 2;
1490 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001491 else if (idx <= 0xffffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001492 pdata[0] = LONG_BINPUT;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001493 pdata[1] = (unsigned char)(idx & 0xff);
1494 pdata[2] = (unsigned char)((idx >> 8) & 0xff);
1495 pdata[3] = (unsigned char)((idx >> 16) & 0xff);
1496 pdata[4] = (unsigned char)((idx >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001497 len = 5;
1498 }
1499 else { /* unlikely */
1500 PyErr_SetString(PicklingError,
1501 "memo id too large for LONG_BINPUT");
1502 return -1;
1503 }
1504 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001505 if (_Pickler_Write(self, pdata, len) < 0)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001506 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001507
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001508 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001509}
1510
1511static PyObject *
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001512getattribute(PyObject *obj, PyObject *name, int allow_qualname) {
1513 PyObject *dotted_path;
1514 Py_ssize_t i;
1515 _Py_static_string(PyId_dot, ".");
1516 _Py_static_string(PyId_locals, "<locals>");
1517
1518 dotted_path = PyUnicode_Split(name, _PyUnicode_FromId(&PyId_dot), -1);
1519 if (dotted_path == NULL) {
1520 return NULL;
1521 }
1522 assert(Py_SIZE(dotted_path) >= 1);
1523 if (!allow_qualname && Py_SIZE(dotted_path) > 1) {
1524 PyErr_Format(PyExc_AttributeError,
1525 "Can't get qualified attribute %R on %R;"
1526 "use protocols >= 4 to enable support",
1527 name, obj);
1528 Py_DECREF(dotted_path);
1529 return NULL;
1530 }
1531 Py_INCREF(obj);
1532 for (i = 0; i < Py_SIZE(dotted_path); i++) {
1533 PyObject *subpath = PyList_GET_ITEM(dotted_path, i);
1534 PyObject *tmp;
1535 PyObject *result = PyUnicode_RichCompare(
1536 subpath, _PyUnicode_FromId(&PyId_locals), Py_EQ);
1537 int is_equal = (result == Py_True);
1538 assert(PyBool_Check(result));
1539 Py_DECREF(result);
1540 if (is_equal) {
1541 PyErr_Format(PyExc_AttributeError,
1542 "Can't get local attribute %R on %R", name, obj);
1543 Py_DECREF(dotted_path);
1544 Py_DECREF(obj);
1545 return NULL;
1546 }
1547 tmp = PyObject_GetAttr(obj, subpath);
1548 Py_DECREF(obj);
1549 if (tmp == NULL) {
1550 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
1551 PyErr_Clear();
1552 PyErr_Format(PyExc_AttributeError,
1553 "Can't get attribute %R on %R", name, obj);
1554 }
1555 Py_DECREF(dotted_path);
1556 return NULL;
1557 }
1558 obj = tmp;
1559 }
1560 Py_DECREF(dotted_path);
1561 return obj;
1562}
1563
1564static PyObject *
1565whichmodule(PyObject *global, PyObject *global_name, int allow_qualname)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001566{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001567 PyObject *module_name;
1568 PyObject *modules_dict;
1569 PyObject *module;
1570 PyObject *obj;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001571 Py_ssize_t i, j;
1572 _Py_IDENTIFIER(__module__);
1573 _Py_IDENTIFIER(modules);
1574 _Py_IDENTIFIER(__main__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001575
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001576 module_name = _PyObject_GetAttrId(global, &PyId___module__);
1577
1578 if (module_name == NULL) {
1579 if (!PyErr_ExceptionMatches(PyExc_AttributeError))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001580 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001581 PyErr_Clear();
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001582 }
1583 else {
1584 /* In some rare cases (e.g., bound methods of extension types),
1585 __module__ can be None. If it is so, then search sys.modules for
1586 the module of global. */
1587 if (module_name != Py_None)
1588 return module_name;
1589 Py_CLEAR(module_name);
1590 }
1591 assert(module_name == NULL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001592
Victor Stinnerbb520202013-11-06 22:40:41 +01001593 modules_dict = _PySys_GetObjectId(&PyId_modules);
Victor Stinner1e53bba2013-07-16 22:26:05 +02001594 if (modules_dict == NULL) {
1595 PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001596 return NULL;
Victor Stinner1e53bba2013-07-16 22:26:05 +02001597 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001598
1599 i = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001600 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001601 PyObject *result = PyUnicode_RichCompare(
1602 module_name, _PyUnicode_FromId(&PyId___main__), Py_EQ);
1603 int is_equal = (result == Py_True);
1604 assert(PyBool_Check(result));
1605 Py_DECREF(result);
1606 if (is_equal)
1607 continue;
1608 if (module == Py_None)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001609 continue;
1610
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001611 obj = getattribute(module, global_name, allow_qualname);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001612 if (obj == NULL) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001613 if (!PyErr_ExceptionMatches(PyExc_AttributeError))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001614 return NULL;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001615 PyErr_Clear();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001616 continue;
1617 }
1618
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001619 if (obj == global) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001620 Py_DECREF(obj);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001621 Py_INCREF(module_name);
1622 return module_name;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001623 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001624 Py_DECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001625 }
1626
1627 /* If no module is found, use __main__. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001628 module_name = _PyUnicode_FromId(&PyId___main__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001629 Py_INCREF(module_name);
1630 return module_name;
1631}
1632
1633/* fast_save_enter() and fast_save_leave() are guards against recursive
1634 objects when Pickler is used with the "fast mode" (i.e., with object
1635 memoization disabled). If the nesting of a list or dict object exceed
1636 FAST_NESTING_LIMIT, these guards will start keeping an internal
1637 reference to the seen list or dict objects and check whether these objects
1638 are recursive. These are not strictly necessary, since save() has a
1639 hard-coded recursion limit, but they give a nicer error message than the
1640 typical RuntimeError. */
1641static int
1642fast_save_enter(PicklerObject *self, PyObject *obj)
1643{
1644 /* if fast_nesting < 0, we're doing an error exit. */
1645 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1646 PyObject *key = NULL;
1647 if (self->fast_memo == NULL) {
1648 self->fast_memo = PyDict_New();
1649 if (self->fast_memo == NULL) {
1650 self->fast_nesting = -1;
1651 return 0;
1652 }
1653 }
1654 key = PyLong_FromVoidPtr(obj);
1655 if (key == NULL)
1656 return 0;
1657 if (PyDict_GetItem(self->fast_memo, key)) {
1658 Py_DECREF(key);
1659 PyErr_Format(PyExc_ValueError,
1660 "fast mode: can't pickle cyclic objects "
1661 "including object type %.200s at %p",
1662 obj->ob_type->tp_name, obj);
1663 self->fast_nesting = -1;
1664 return 0;
1665 }
1666 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
1667 Py_DECREF(key);
1668 self->fast_nesting = -1;
1669 return 0;
1670 }
1671 Py_DECREF(key);
1672 }
1673 return 1;
1674}
1675
1676static int
1677fast_save_leave(PicklerObject *self, PyObject *obj)
1678{
1679 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
1680 PyObject *key = PyLong_FromVoidPtr(obj);
1681 if (key == NULL)
1682 return 0;
1683 if (PyDict_DelItem(self->fast_memo, key) < 0) {
1684 Py_DECREF(key);
1685 return 0;
1686 }
1687 Py_DECREF(key);
1688 }
1689 return 1;
1690}
1691
1692static int
1693save_none(PicklerObject *self, PyObject *obj)
1694{
1695 const char none_op = NONE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001696 if (_Pickler_Write(self, &none_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001697 return -1;
1698
1699 return 0;
1700}
1701
1702static int
1703save_bool(PicklerObject *self, PyObject *obj)
1704{
1705 static const char *buf[2] = { FALSE, TRUE };
1706 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
1707 int p = (obj == Py_True);
1708
1709 if (self->proto >= 2) {
1710 const char bool_op = p ? NEWTRUE : NEWFALSE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001711 if (_Pickler_Write(self, &bool_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001712 return -1;
1713 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001714 else if (_Pickler_Write(self, buf[p], len[p]) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001715 return -1;
1716
1717 return 0;
1718}
1719
1720static int
1721save_int(PicklerObject *self, long x)
1722{
1723 char pdata[32];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001724 Py_ssize_t len = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001725
1726 if (!self->bin
1727#if SIZEOF_LONG > 4
1728 || x > 0x7fffffffL || x < -0x80000000L
1729#endif
1730 ) {
1731 /* Text-mode pickle, or long too big to fit in the 4-byte
1732 * signed BININT format: store as a string.
1733 */
Mark Dickinson8dd05142009-01-20 20:43:58 +00001734 pdata[0] = LONG; /* use LONG for consistency with pickle.py */
1735 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ldL\n", x);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001736 if (_Pickler_Write(self, pdata, strlen(pdata)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001737 return -1;
1738 }
1739 else {
1740 /* Binary pickle and x fits in a signed 4-byte int. */
1741 pdata[1] = (unsigned char)(x & 0xff);
1742 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1743 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1744 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1745
1746 if ((pdata[4] == 0) && (pdata[3] == 0)) {
1747 if (pdata[2] == 0) {
1748 pdata[0] = BININT1;
1749 len = 2;
1750 }
1751 else {
1752 pdata[0] = BININT2;
1753 len = 3;
1754 }
1755 }
1756 else {
1757 pdata[0] = BININT;
1758 len = 5;
1759 }
1760
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001761 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001762 return -1;
1763 }
1764
1765 return 0;
1766}
1767
1768static int
1769save_long(PicklerObject *self, PyObject *obj)
1770{
1771 PyObject *repr = NULL;
1772 Py_ssize_t size;
1773 long val = PyLong_AsLong(obj);
1774 int status = 0;
1775
1776 const char long_op = LONG;
1777
1778 if (val == -1 && PyErr_Occurred()) {
1779 /* out of range for int pickling */
1780 PyErr_Clear();
1781 }
Antoine Pitroue58bffb2011-08-13 20:40:32 +02001782 else
1783#if SIZEOF_LONG > 4
1784 if (val <= 0x7fffffffL && val >= -0x80000000L)
1785#endif
1786 return save_int(self, val);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001787
1788 if (self->proto >= 2) {
1789 /* Linear-time pickling. */
1790 size_t nbits;
1791 size_t nbytes;
1792 unsigned char *pdata;
1793 char header[5];
1794 int i;
1795 int sign = _PyLong_Sign(obj);
1796
1797 if (sign == 0) {
1798 header[0] = LONG1;
1799 header[1] = 0; /* It's 0 -- an empty bytestring. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001800 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001801 goto error;
1802 return 0;
1803 }
1804 nbits = _PyLong_NumBits(obj);
1805 if (nbits == (size_t)-1 && PyErr_Occurred())
1806 goto error;
1807 /* How many bytes do we need? There are nbits >> 3 full
1808 * bytes of data, and nbits & 7 leftover bits. If there
1809 * are any leftover bits, then we clearly need another
1810 * byte. Wnat's not so obvious is that we *probably*
1811 * need another byte even if there aren't any leftovers:
1812 * the most-significant bit of the most-significant byte
1813 * acts like a sign bit, and it's usually got a sense
Serhiy Storchaka95949422013-08-27 19:40:23 +03001814 * opposite of the one we need. The exception is ints
1815 * of the form -(2**(8*j-1)) for j > 0. Such an int is
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001816 * its own 256's-complement, so has the right sign bit
1817 * even without the extra byte. That's a pain to check
1818 * for in advance, though, so we always grab an extra
1819 * byte at the start, and cut it back later if possible.
1820 */
1821 nbytes = (nbits >> 3) + 1;
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01001822 if (nbytes > 0x7fffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001823 PyErr_SetString(PyExc_OverflowError,
Serhiy Storchaka95949422013-08-27 19:40:23 +03001824 "int too large to pickle");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001825 goto error;
1826 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001827 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001828 if (repr == NULL)
1829 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001830 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001831 i = _PyLong_AsByteArray((PyLongObject *)obj,
1832 pdata, nbytes,
1833 1 /* little endian */ , 1 /* signed */ );
1834 if (i < 0)
1835 goto error;
Serhiy Storchaka95949422013-08-27 19:40:23 +03001836 /* If the int is negative, this may be a byte more than
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001837 * needed. This is so iff the MSB is all redundant sign
1838 * bits.
1839 */
1840 if (sign < 0 &&
Victor Stinner121aab42011-09-29 23:40:53 +02001841 nbytes > 1 &&
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001842 pdata[nbytes - 1] == 0xff &&
1843 (pdata[nbytes - 2] & 0x80) != 0) {
1844 nbytes--;
1845 }
1846
1847 if (nbytes < 256) {
1848 header[0] = LONG1;
1849 header[1] = (unsigned char)nbytes;
1850 size = 2;
1851 }
1852 else {
1853 header[0] = LONG4;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001854 size = (Py_ssize_t) nbytes;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001855 for (i = 1; i < 5; i++) {
1856 header[i] = (unsigned char)(size & 0xff);
1857 size >>= 8;
1858 }
1859 size = 5;
1860 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001861 if (_Pickler_Write(self, header, size) < 0 ||
1862 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001863 goto error;
1864 }
1865 else {
1866 char *string;
1867
Mark Dickinson8dd05142009-01-20 20:43:58 +00001868 /* proto < 2: write the repr and newline. This is quadratic-time (in
1869 the number of digits), in both directions. We add a trailing 'L'
1870 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001871
1872 repr = PyObject_Repr(obj);
1873 if (repr == NULL)
1874 goto error;
1875
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001876 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001877 if (string == NULL)
1878 goto error;
1879
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001880 if (_Pickler_Write(self, &long_op, 1) < 0 ||
1881 _Pickler_Write(self, string, size) < 0 ||
1882 _Pickler_Write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001883 goto error;
1884 }
1885
1886 if (0) {
1887 error:
1888 status = -1;
1889 }
1890 Py_XDECREF(repr);
1891
1892 return status;
1893}
1894
1895static int
1896save_float(PicklerObject *self, PyObject *obj)
1897{
1898 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1899
1900 if (self->bin) {
1901 char pdata[9];
1902 pdata[0] = BINFLOAT;
1903 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1904 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001905 if (_Pickler_Write(self, pdata, 9) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001906 return -1;
Victor Stinner121aab42011-09-29 23:40:53 +02001907 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001908 else {
Eric Smith0923d1d2009-04-16 20:16:10 +00001909 int result = -1;
1910 char *buf = NULL;
1911 char op = FLOAT;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001912
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001913 if (_Pickler_Write(self, &op, 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001914 goto done;
1915
Mark Dickinson3e09f432009-04-17 08:41:23 +00001916 buf = PyOS_double_to_string(x, 'g', 17, 0, NULL);
Eric Smith0923d1d2009-04-16 20:16:10 +00001917 if (!buf) {
1918 PyErr_NoMemory();
1919 goto done;
1920 }
1921
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001922 if (_Pickler_Write(self, buf, strlen(buf)) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001923 goto done;
1924
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001925 if (_Pickler_Write(self, "\n", 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001926 goto done;
1927
1928 result = 0;
1929done:
1930 PyMem_Free(buf);
1931 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001932 }
1933
1934 return 0;
1935}
1936
1937static int
1938save_bytes(PicklerObject *self, PyObject *obj)
1939{
1940 if (self->proto < 3) {
1941 /* Older pickle protocols do not have an opcode for pickling bytes
1942 objects. Therefore, we need to fake the copy protocol (i.e.,
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001943 the __reduce__ method) to permit bytes object unpickling.
1944
1945 Here we use a hack to be compatible with Python 2. Since in Python
1946 2 'bytes' is just an alias for 'str' (which has different
1947 parameters than the actual bytes object), we use codecs.encode
1948 to create the appropriate 'str' object when unpickled using
1949 Python 2 *and* the appropriate 'bytes' object when unpickled
1950 using Python 3. Again this is a hack and we don't need to do this
1951 with newer protocols. */
1952 static PyObject *codecs_encode = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001953 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001954 int status;
1955
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001956 if (codecs_encode == NULL) {
1957 PyObject *codecs_module = PyImport_ImportModule("codecs");
1958 if (codecs_module == NULL) {
1959 return -1;
1960 }
1961 codecs_encode = PyObject_GetAttrString(codecs_module, "encode");
1962 Py_DECREF(codecs_module);
1963 if (codecs_encode == NULL) {
1964 return -1;
1965 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001966 }
1967
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001968 if (PyBytes_GET_SIZE(obj) == 0) {
1969 reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
1970 }
1971 else {
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001972 PyObject *unicode_str =
1973 PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
1974 PyBytes_GET_SIZE(obj),
1975 "strict");
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001976 _Py_IDENTIFIER(latin1);
1977
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001978 if (unicode_str == NULL)
1979 return -1;
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001980 reduce_value = Py_BuildValue("(O(OO))",
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001981 codecs_encode, unicode_str,
1982 _PyUnicode_FromId(&PyId_latin1));
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001983 Py_DECREF(unicode_str);
1984 }
1985
1986 if (reduce_value == NULL)
1987 return -1;
1988
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001989 /* save_reduce() will memoize the object automatically. */
1990 status = save_reduce(self, reduce_value, obj);
1991 Py_DECREF(reduce_value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001992 return status;
1993 }
1994 else {
1995 Py_ssize_t size;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001996 char header[9];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001997 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001998
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001999 size = PyBytes_GET_SIZE(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002000 if (size < 0)
2001 return -1;
2002
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002003 if (size <= 0xff) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002004 header[0] = SHORT_BINBYTES;
2005 header[1] = (unsigned char)size;
2006 len = 2;
2007 }
2008 else if (size <= 0xffffffffL) {
2009 header[0] = BINBYTES;
2010 header[1] = (unsigned char)(size & 0xff);
2011 header[2] = (unsigned char)((size >> 8) & 0xff);
2012 header[3] = (unsigned char)((size >> 16) & 0xff);
2013 header[4] = (unsigned char)((size >> 24) & 0xff);
2014 len = 5;
2015 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002016 else if (self->proto >= 4) {
2017 int i;
2018 header[0] = BINBYTES8;
2019 for (i = 0; i < 8; i++) {
2020 header[i+1] = (unsigned char)((size >> (8 * i)) & 0xff);
2021 }
2022 len = 8;
2023 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002024 else {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002025 PyErr_SetString(PyExc_OverflowError,
Serhiy Storchakaf8def282013-02-16 17:29:56 +02002026 "cannot serialize a bytes object larger than 4 GiB");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002027 return -1; /* string too large */
2028 }
2029
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002030 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002031 return -1;
2032
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002033 if (_Pickler_Write(self, PyBytes_AS_STRING(obj), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002034 return -1;
2035
2036 if (memo_put(self, obj) < 0)
2037 return -1;
2038
2039 return 0;
2040 }
2041}
2042
2043/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
2044 backslash and newline characters to \uXXXX escapes. */
2045static PyObject *
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002046raw_unicode_escape(PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002047{
2048 PyObject *repr, *result;
2049 char *p;
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002050 Py_ssize_t i, size, expandsize;
2051 void *data;
2052 unsigned int kind;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002053
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002054 if (PyUnicode_READY(obj))
2055 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002056
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002057 size = PyUnicode_GET_LENGTH(obj);
2058 data = PyUnicode_DATA(obj);
2059 kind = PyUnicode_KIND(obj);
2060 if (kind == PyUnicode_4BYTE_KIND)
2061 expandsize = 10;
2062 else
2063 expandsize = 6;
Victor Stinner121aab42011-09-29 23:40:53 +02002064
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00002065 if (size > PY_SSIZE_T_MAX / expandsize)
2066 return PyErr_NoMemory();
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00002067 repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002068 if (repr == NULL)
2069 return NULL;
2070 if (size == 0)
2071 goto done;
2072
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002073 p = PyByteArray_AS_STRING(repr);
2074 for (i=0; i < size; i++) {
2075 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002076 /* Map 32-bit characters to '\Uxxxxxxxx' */
2077 if (ch >= 0x10000) {
2078 *p++ = '\\';
2079 *p++ = 'U';
Victor Stinnerf5cff562011-10-14 02:13:11 +02002080 *p++ = Py_hexdigits[(ch >> 28) & 0xf];
2081 *p++ = Py_hexdigits[(ch >> 24) & 0xf];
2082 *p++ = Py_hexdigits[(ch >> 20) & 0xf];
2083 *p++ = Py_hexdigits[(ch >> 16) & 0xf];
2084 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2085 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2086 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2087 *p++ = Py_hexdigits[ch & 15];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002088 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002089 /* Map 16-bit characters to '\uxxxx' */
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002090 else if (ch >= 256 || ch == '\\' || ch == '\n') {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002091 *p++ = '\\';
2092 *p++ = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +02002093 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
2094 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
2095 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
2096 *p++ = Py_hexdigits[ch & 15];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002097 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00002098 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002099 else
2100 *p++ = (char) ch;
2101 }
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002102 size = p - PyByteArray_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002103
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002104done:
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00002105 result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002106 Py_DECREF(repr);
2107 return result;
2108}
2109
2110static int
Antoine Pitrou299978d2013-04-07 17:38:11 +02002111write_utf8(PicklerObject *self, char *data, Py_ssize_t size)
2112{
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002113 char header[9];
2114 Py_ssize_t len;
Antoine Pitrou299978d2013-04-07 17:38:11 +02002115
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002116 if (size <= 0xff && self->proto >= 4) {
2117 header[0] = SHORT_BINUNICODE;
2118 header[1] = (unsigned char)(size & 0xff);
2119 len = 2;
2120 }
2121 else if (size <= 0xffffffffUL) {
2122 header[0] = BINUNICODE;
2123 header[1] = (unsigned char)(size & 0xff);
2124 header[2] = (unsigned char)((size >> 8) & 0xff);
2125 header[3] = (unsigned char)((size >> 16) & 0xff);
2126 header[4] = (unsigned char)((size >> 24) & 0xff);
2127 len = 5;
2128 }
2129 else if (self->proto >= 4) {
2130 int i;
2131
2132 header[0] = BINUNICODE8;
2133 for (i = 0; i < 8; i++) {
2134 header[i+1] = (unsigned char)((size >> (8 * i)) & 0xff);
2135 }
2136 len = 9;
2137 }
2138 else {
Antoine Pitrou299978d2013-04-07 17:38:11 +02002139 PyErr_SetString(PyExc_OverflowError,
Antoine Pitrou4b7b0f02013-04-07 23:46:52 +02002140 "cannot serialize a string larger than 4GiB");
Antoine Pitrou299978d2013-04-07 17:38:11 +02002141 return -1;
2142 }
Antoine Pitrou299978d2013-04-07 17:38:11 +02002143
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002144 if (_Pickler_Write(self, header, len) < 0)
Antoine Pitrou299978d2013-04-07 17:38:11 +02002145 return -1;
Antoine Pitrou299978d2013-04-07 17:38:11 +02002146 if (_Pickler_Write(self, data, size) < 0)
2147 return -1;
2148
2149 return 0;
2150}
2151
2152static int
2153write_unicode_binary(PicklerObject *self, PyObject *obj)
2154{
2155 PyObject *encoded = NULL;
2156 Py_ssize_t size;
2157 char *data;
2158 int r;
2159
2160 if (PyUnicode_READY(obj))
2161 return -1;
2162
2163 data = PyUnicode_AsUTF8AndSize(obj, &size);
2164 if (data != NULL)
2165 return write_utf8(self, data, size);
2166
2167 /* Issue #8383: for strings with lone surrogates, fallback on the
2168 "surrogatepass" error handler. */
2169 PyErr_Clear();
2170 encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
2171 if (encoded == NULL)
2172 return -1;
2173
2174 r = write_utf8(self, PyBytes_AS_STRING(encoded),
2175 PyBytes_GET_SIZE(encoded));
2176 Py_DECREF(encoded);
2177 return r;
2178}
2179
2180static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002181save_unicode(PicklerObject *self, PyObject *obj)
2182{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002183 if (self->bin) {
Antoine Pitrou299978d2013-04-07 17:38:11 +02002184 if (write_unicode_binary(self, obj) < 0)
2185 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002186 }
2187 else {
Antoine Pitrou299978d2013-04-07 17:38:11 +02002188 PyObject *encoded;
2189 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002190 const char unicode_op = UNICODE;
2191
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002192 encoded = raw_unicode_escape(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002193 if (encoded == NULL)
Antoine Pitrou299978d2013-04-07 17:38:11 +02002194 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002195
Antoine Pitrou299978d2013-04-07 17:38:11 +02002196 if (_Pickler_Write(self, &unicode_op, 1) < 0) {
2197 Py_DECREF(encoded);
2198 return -1;
2199 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002200
2201 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrou299978d2013-04-07 17:38:11 +02002202 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) {
2203 Py_DECREF(encoded);
2204 return -1;
2205 }
2206 Py_DECREF(encoded);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002207
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002208 if (_Pickler_Write(self, "\n", 1) < 0)
Antoine Pitrou299978d2013-04-07 17:38:11 +02002209 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002210 }
2211 if (memo_put(self, obj) < 0)
Antoine Pitrou299978d2013-04-07 17:38:11 +02002212 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002213
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002214 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002215}
2216
2217/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
2218static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002219store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002220{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002221 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002222
2223 assert(PyTuple_Size(t) == len);
2224
2225 for (i = 0; i < len; i++) {
2226 PyObject *element = PyTuple_GET_ITEM(t, i);
2227
2228 if (element == NULL)
2229 return -1;
2230 if (save(self, element, 0) < 0)
2231 return -1;
2232 }
2233
2234 return 0;
2235}
2236
2237/* Tuples are ubiquitous in the pickle protocols, so many techniques are
2238 * used across protocols to minimize the space needed to pickle them.
2239 * Tuples are also the only builtin immutable type that can be recursive
2240 * (a tuple can be reached from itself), and that requires some subtle
2241 * magic so that it works in all cases. IOW, this is a long routine.
2242 */
2243static int
2244save_tuple(PicklerObject *self, PyObject *obj)
2245{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002246 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002247
2248 const char mark_op = MARK;
2249 const char tuple_op = TUPLE;
2250 const char pop_op = POP;
2251 const char pop_mark_op = POP_MARK;
2252 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
2253
2254 if ((len = PyTuple_Size(obj)) < 0)
2255 return -1;
2256
2257 if (len == 0) {
2258 char pdata[2];
2259
2260 if (self->proto) {
2261 pdata[0] = EMPTY_TUPLE;
2262 len = 1;
2263 }
2264 else {
2265 pdata[0] = MARK;
2266 pdata[1] = TUPLE;
2267 len = 2;
2268 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002269 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002270 return -1;
2271 return 0;
2272 }
2273
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002274 /* The tuple isn't in the memo now. If it shows up there after
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002275 * saving the tuple elements, the tuple must be recursive, in
2276 * which case we'll pop everything we put on the stack, and fetch
2277 * its value from the memo.
2278 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002279 if (len <= 3 && self->proto >= 2) {
2280 /* Use TUPLE{1,2,3} opcodes. */
2281 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002282 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002283
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002284 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002285 /* pop the len elements */
2286 for (i = 0; i < len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002287 if (_Pickler_Write(self, &pop_op, 1) < 0)
2288 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002289 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002290 if (memo_get(self, obj) < 0)
2291 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002292
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002293 return 0;
2294 }
2295 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002296 if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2297 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002298 }
2299 goto memoize;
2300 }
2301
2302 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2303 * Generate MARK e1 e2 ... TUPLE
2304 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002305 if (_Pickler_Write(self, &mark_op, 1) < 0)
2306 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002307
2308 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002309 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002310
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002311 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002312 /* pop the stack stuff we pushed */
2313 if (self->bin) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002314 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2315 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002316 }
2317 else {
2318 /* Note that we pop one more than len, to remove
2319 * the MARK too.
2320 */
2321 for (i = 0; i <= len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002322 if (_Pickler_Write(self, &pop_op, 1) < 0)
2323 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002324 }
2325 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002326 if (memo_get(self, obj) < 0)
2327 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002328
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002329 return 0;
2330 }
2331 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002332 if (_Pickler_Write(self, &tuple_op, 1) < 0)
2333 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002334 }
2335
2336 memoize:
2337 if (memo_put(self, obj) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002338 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002339
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002340 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002341}
2342
2343/* iter is an iterator giving items, and we batch up chunks of
2344 * MARK item item ... item APPENDS
2345 * opcode sequences. Calling code should have arranged to first create an
2346 * empty list, or list-like object, for the APPENDS to operate on.
2347 * Returns 0 on success, <0 on error.
2348 */
2349static int
2350batch_list(PicklerObject *self, PyObject *iter)
2351{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002352 PyObject *obj = NULL;
2353 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002354 int i, n;
2355
2356 const char mark_op = MARK;
2357 const char append_op = APPEND;
2358 const char appends_op = APPENDS;
2359
2360 assert(iter != NULL);
2361
2362 /* XXX: I think this function could be made faster by avoiding the
2363 iterator interface and fetching objects directly from list using
2364 PyList_GET_ITEM.
2365 */
2366
2367 if (self->proto == 0) {
2368 /* APPENDS isn't available; do one at a time. */
2369 for (;;) {
2370 obj = PyIter_Next(iter);
2371 if (obj == NULL) {
2372 if (PyErr_Occurred())
2373 return -1;
2374 break;
2375 }
2376 i = save(self, obj, 0);
2377 Py_DECREF(obj);
2378 if (i < 0)
2379 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002380 if (_Pickler_Write(self, &append_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002381 return -1;
2382 }
2383 return 0;
2384 }
2385
2386 /* proto > 0: write in batches of BATCHSIZE. */
2387 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002388 /* Get first item */
2389 firstitem = PyIter_Next(iter);
2390 if (firstitem == NULL) {
2391 if (PyErr_Occurred())
2392 goto error;
2393
2394 /* nothing more to add */
2395 break;
2396 }
2397
2398 /* Try to get a second item */
2399 obj = PyIter_Next(iter);
2400 if (obj == NULL) {
2401 if (PyErr_Occurred())
2402 goto error;
2403
2404 /* Only one item to write */
2405 if (save(self, firstitem, 0) < 0)
2406 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002407 if (_Pickler_Write(self, &append_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002408 goto error;
2409 Py_CLEAR(firstitem);
2410 break;
2411 }
2412
2413 /* More than one item to write */
2414
2415 /* Pump out MARK, items, APPENDS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002416 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002417 goto error;
2418
2419 if (save(self, firstitem, 0) < 0)
2420 goto error;
2421 Py_CLEAR(firstitem);
2422 n = 1;
2423
2424 /* Fetch and save up to BATCHSIZE items */
2425 while (obj) {
2426 if (save(self, obj, 0) < 0)
2427 goto error;
2428 Py_CLEAR(obj);
2429 n += 1;
2430
2431 if (n == BATCHSIZE)
2432 break;
2433
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002434 obj = PyIter_Next(iter);
2435 if (obj == NULL) {
2436 if (PyErr_Occurred())
2437 goto error;
2438 break;
2439 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002440 }
2441
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002442 if (_Pickler_Write(self, &appends_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002443 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002444
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002445 } while (n == BATCHSIZE);
2446 return 0;
2447
2448 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002449 Py_XDECREF(firstitem);
2450 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002451 return -1;
2452}
2453
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002454/* This is a variant of batch_list() above, specialized for lists (with no
2455 * support for list subclasses). Like batch_list(), we batch up chunks of
2456 * MARK item item ... item APPENDS
2457 * opcode sequences. Calling code should have arranged to first create an
2458 * empty list, or list-like object, for the APPENDS to operate on.
2459 * Returns 0 on success, -1 on error.
2460 *
2461 * This version is considerably faster than batch_list(), if less general.
2462 *
2463 * Note that this only works for protocols > 0.
2464 */
2465static int
2466batch_list_exact(PicklerObject *self, PyObject *obj)
2467{
2468 PyObject *item = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002469 Py_ssize_t this_batch, total;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002470
2471 const char append_op = APPEND;
2472 const char appends_op = APPENDS;
2473 const char mark_op = MARK;
2474
2475 assert(obj != NULL);
2476 assert(self->proto > 0);
2477 assert(PyList_CheckExact(obj));
2478
2479 if (PyList_GET_SIZE(obj) == 1) {
2480 item = PyList_GET_ITEM(obj, 0);
2481 if (save(self, item, 0) < 0)
2482 return -1;
2483 if (_Pickler_Write(self, &append_op, 1) < 0)
2484 return -1;
2485 return 0;
2486 }
2487
2488 /* Write in batches of BATCHSIZE. */
2489 total = 0;
2490 do {
2491 this_batch = 0;
2492 if (_Pickler_Write(self, &mark_op, 1) < 0)
2493 return -1;
2494 while (total < PyList_GET_SIZE(obj)) {
2495 item = PyList_GET_ITEM(obj, total);
2496 if (save(self, item, 0) < 0)
2497 return -1;
2498 total++;
2499 if (++this_batch == BATCHSIZE)
2500 break;
2501 }
2502 if (_Pickler_Write(self, &appends_op, 1) < 0)
2503 return -1;
2504
2505 } while (total < PyList_GET_SIZE(obj));
2506
2507 return 0;
2508}
2509
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002510static int
2511save_list(PicklerObject *self, PyObject *obj)
2512{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002513 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002514 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002515 int status = 0;
2516
2517 if (self->fast && !fast_save_enter(self, obj))
2518 goto error;
2519
2520 /* Create an empty list. */
2521 if (self->bin) {
2522 header[0] = EMPTY_LIST;
2523 len = 1;
2524 }
2525 else {
2526 header[0] = MARK;
2527 header[1] = LIST;
2528 len = 2;
2529 }
2530
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002531 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002532 goto error;
2533
2534 /* Get list length, and bow out early if empty. */
2535 if ((len = PyList_Size(obj)) < 0)
2536 goto error;
2537
2538 if (memo_put(self, obj) < 0)
2539 goto error;
2540
2541 if (len != 0) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002542 /* Materialize the list elements. */
2543 if (PyList_CheckExact(obj) && self->proto > 0) {
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002544 if (Py_EnterRecursiveCall(" while pickling an object"))
2545 goto error;
2546 status = batch_list_exact(self, obj);
2547 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002548 } else {
2549 PyObject *iter = PyObject_GetIter(obj);
2550 if (iter == NULL)
2551 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002552
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002553 if (Py_EnterRecursiveCall(" while pickling an object")) {
2554 Py_DECREF(iter);
2555 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002556 }
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002557 status = batch_list(self, iter);
2558 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002559 Py_DECREF(iter);
2560 }
2561 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002562 if (0) {
2563 error:
2564 status = -1;
2565 }
2566
2567 if (self->fast && !fast_save_leave(self, obj))
2568 status = -1;
2569
2570 return status;
2571}
2572
2573/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
2574 * MARK key value ... key value SETITEMS
2575 * opcode sequences. Calling code should have arranged to first create an
2576 * empty dict, or dict-like object, for the SETITEMS to operate on.
2577 * Returns 0 on success, <0 on error.
2578 *
2579 * This is very much like batch_list(). The difference between saving
2580 * elements directly, and picking apart two-tuples, is so long-winded at
2581 * the C level, though, that attempts to combine these routines were too
2582 * ugly to bear.
2583 */
2584static int
2585batch_dict(PicklerObject *self, PyObject *iter)
2586{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002587 PyObject *obj = NULL;
2588 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002589 int i, n;
2590
2591 const char mark_op = MARK;
2592 const char setitem_op = SETITEM;
2593 const char setitems_op = SETITEMS;
2594
2595 assert(iter != NULL);
2596
2597 if (self->proto == 0) {
2598 /* SETITEMS isn't available; do one at a time. */
2599 for (;;) {
2600 obj = PyIter_Next(iter);
2601 if (obj == NULL) {
2602 if (PyErr_Occurred())
2603 return -1;
2604 break;
2605 }
2606 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2607 PyErr_SetString(PyExc_TypeError, "dict items "
2608 "iterator must return 2-tuples");
2609 return -1;
2610 }
2611 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
2612 if (i >= 0)
2613 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
2614 Py_DECREF(obj);
2615 if (i < 0)
2616 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002617 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002618 return -1;
2619 }
2620 return 0;
2621 }
2622
2623 /* proto > 0: write in batches of BATCHSIZE. */
2624 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002625 /* Get first item */
2626 firstitem = PyIter_Next(iter);
2627 if (firstitem == NULL) {
2628 if (PyErr_Occurred())
2629 goto error;
2630
2631 /* nothing more to add */
2632 break;
2633 }
2634 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
2635 PyErr_SetString(PyExc_TypeError, "dict items "
2636 "iterator must return 2-tuples");
2637 goto error;
2638 }
2639
2640 /* Try to get a second item */
2641 obj = PyIter_Next(iter);
2642 if (obj == NULL) {
2643 if (PyErr_Occurred())
2644 goto error;
2645
2646 /* Only one item to write */
2647 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2648 goto error;
2649 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2650 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002651 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002652 goto error;
2653 Py_CLEAR(firstitem);
2654 break;
2655 }
2656
2657 /* More than one item to write */
2658
2659 /* Pump out MARK, items, SETITEMS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002660 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002661 goto error;
2662
2663 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2664 goto error;
2665 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2666 goto error;
2667 Py_CLEAR(firstitem);
2668 n = 1;
2669
2670 /* Fetch and save up to BATCHSIZE items */
2671 while (obj) {
2672 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2673 PyErr_SetString(PyExc_TypeError, "dict items "
2674 "iterator must return 2-tuples");
2675 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002676 }
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002677 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
2678 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
2679 goto error;
2680 Py_CLEAR(obj);
2681 n += 1;
2682
2683 if (n == BATCHSIZE)
2684 break;
2685
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002686 obj = PyIter_Next(iter);
2687 if (obj == NULL) {
2688 if (PyErr_Occurred())
2689 goto error;
2690 break;
2691 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002692 }
2693
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002694 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002695 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002696
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002697 } while (n == BATCHSIZE);
2698 return 0;
2699
2700 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002701 Py_XDECREF(firstitem);
2702 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002703 return -1;
2704}
2705
Collin Winter5c9b02d2009-05-25 05:43:30 +00002706/* This is a variant of batch_dict() above that specializes for dicts, with no
2707 * support for dict subclasses. Like batch_dict(), we batch up chunks of
2708 * MARK key value ... key value SETITEMS
2709 * opcode sequences. Calling code should have arranged to first create an
2710 * empty dict, or dict-like object, for the SETITEMS to operate on.
2711 * Returns 0 on success, -1 on error.
2712 *
2713 * Note that this currently doesn't work for protocol 0.
2714 */
2715static int
2716batch_dict_exact(PicklerObject *self, PyObject *obj)
2717{
2718 PyObject *key = NULL, *value = NULL;
2719 int i;
2720 Py_ssize_t dict_size, ppos = 0;
2721
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00002722 const char mark_op = MARK;
2723 const char setitem_op = SETITEM;
2724 const char setitems_op = SETITEMS;
Collin Winter5c9b02d2009-05-25 05:43:30 +00002725
2726 assert(obj != NULL);
2727 assert(self->proto > 0);
2728
2729 dict_size = PyDict_Size(obj);
2730
2731 /* Special-case len(d) == 1 to save space. */
2732 if (dict_size == 1) {
2733 PyDict_Next(obj, &ppos, &key, &value);
2734 if (save(self, key, 0) < 0)
2735 return -1;
2736 if (save(self, value, 0) < 0)
2737 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002738 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002739 return -1;
2740 return 0;
2741 }
2742
2743 /* Write in batches of BATCHSIZE. */
2744 do {
2745 i = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002746 if (_Pickler_Write(self, &mark_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002747 return -1;
2748 while (PyDict_Next(obj, &ppos, &key, &value)) {
2749 if (save(self, key, 0) < 0)
2750 return -1;
2751 if (save(self, value, 0) < 0)
2752 return -1;
2753 if (++i == BATCHSIZE)
2754 break;
2755 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002756 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002757 return -1;
2758 if (PyDict_Size(obj) != dict_size) {
2759 PyErr_Format(
2760 PyExc_RuntimeError,
2761 "dictionary changed size during iteration");
2762 return -1;
2763 }
2764
2765 } while (i == BATCHSIZE);
2766 return 0;
2767}
2768
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002769static int
2770save_dict(PicklerObject *self, PyObject *obj)
2771{
2772 PyObject *items, *iter;
2773 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002774 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002775 int status = 0;
2776
2777 if (self->fast && !fast_save_enter(self, obj))
2778 goto error;
2779
2780 /* Create an empty dict. */
2781 if (self->bin) {
2782 header[0] = EMPTY_DICT;
2783 len = 1;
2784 }
2785 else {
2786 header[0] = MARK;
2787 header[1] = DICT;
2788 len = 2;
2789 }
2790
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002791 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002792 goto error;
2793
2794 /* Get dict size, and bow out early if empty. */
2795 if ((len = PyDict_Size(obj)) < 0)
2796 goto error;
2797
2798 if (memo_put(self, obj) < 0)
2799 goto error;
2800
2801 if (len != 0) {
2802 /* Save the dict items. */
Collin Winter5c9b02d2009-05-25 05:43:30 +00002803 if (PyDict_CheckExact(obj) && self->proto > 0) {
2804 /* We can take certain shortcuts if we know this is a dict and
2805 not a dict subclass. */
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002806 if (Py_EnterRecursiveCall(" while pickling an object"))
2807 goto error;
2808 status = batch_dict_exact(self, obj);
2809 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002810 } else {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02002811 _Py_IDENTIFIER(items);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002812
2813 items = _PyObject_CallMethodId(obj, &PyId_items, "()");
Collin Winter5c9b02d2009-05-25 05:43:30 +00002814 if (items == NULL)
2815 goto error;
2816 iter = PyObject_GetIter(items);
2817 Py_DECREF(items);
2818 if (iter == NULL)
2819 goto error;
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002820 if (Py_EnterRecursiveCall(" while pickling an object")) {
2821 Py_DECREF(iter);
2822 goto error;
2823 }
Collin Winter5c9b02d2009-05-25 05:43:30 +00002824 status = batch_dict(self, iter);
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002825 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002826 Py_DECREF(iter);
2827 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002828 }
2829
2830 if (0) {
2831 error:
2832 status = -1;
2833 }
2834
2835 if (self->fast && !fast_save_leave(self, obj))
2836 status = -1;
2837
2838 return status;
2839}
2840
2841static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002842save_set(PicklerObject *self, PyObject *obj)
2843{
2844 PyObject *item;
2845 int i;
2846 Py_ssize_t set_size, ppos = 0;
2847 Py_hash_t hash;
2848
2849 const char empty_set_op = EMPTY_SET;
2850 const char mark_op = MARK;
2851 const char additems_op = ADDITEMS;
2852
2853 if (self->proto < 4) {
2854 PyObject *items;
2855 PyObject *reduce_value;
2856 int status;
2857
2858 items = PySequence_List(obj);
2859 if (items == NULL) {
2860 return -1;
2861 }
2862 reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PySet_Type, items);
2863 Py_DECREF(items);
2864 if (reduce_value == NULL) {
2865 return -1;
2866 }
2867 /* save_reduce() will memoize the object automatically. */
2868 status = save_reduce(self, reduce_value, obj);
2869 Py_DECREF(reduce_value);
2870 return status;
2871 }
2872
2873 if (_Pickler_Write(self, &empty_set_op, 1) < 0)
2874 return -1;
2875
2876 if (memo_put(self, obj) < 0)
2877 return -1;
2878
2879 set_size = PySet_GET_SIZE(obj);
2880 if (set_size == 0)
2881 return 0; /* nothing to do */
2882
2883 /* Write in batches of BATCHSIZE. */
2884 do {
2885 i = 0;
2886 if (_Pickler_Write(self, &mark_op, 1) < 0)
2887 return -1;
2888 while (_PySet_NextEntry(obj, &ppos, &item, &hash)) {
2889 if (save(self, item, 0) < 0)
2890 return -1;
2891 if (++i == BATCHSIZE)
2892 break;
2893 }
2894 if (_Pickler_Write(self, &additems_op, 1) < 0)
2895 return -1;
2896 if (PySet_GET_SIZE(obj) != set_size) {
2897 PyErr_Format(
2898 PyExc_RuntimeError,
2899 "set changed size during iteration");
2900 return -1;
2901 }
2902 } while (i == BATCHSIZE);
2903
2904 return 0;
2905}
2906
2907static int
2908save_frozenset(PicklerObject *self, PyObject *obj)
2909{
2910 PyObject *iter;
2911
2912 const char mark_op = MARK;
2913 const char frozenset_op = FROZENSET;
2914
2915 if (self->fast && !fast_save_enter(self, obj))
2916 return -1;
2917
2918 if (self->proto < 4) {
2919 PyObject *items;
2920 PyObject *reduce_value;
2921 int status;
2922
2923 items = PySequence_List(obj);
2924 if (items == NULL) {
2925 return -1;
2926 }
2927 reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PyFrozenSet_Type,
2928 items);
2929 Py_DECREF(items);
2930 if (reduce_value == NULL) {
2931 return -1;
2932 }
2933 /* save_reduce() will memoize the object automatically. */
2934 status = save_reduce(self, reduce_value, obj);
2935 Py_DECREF(reduce_value);
2936 return status;
2937 }
2938
2939 if (_Pickler_Write(self, &mark_op, 1) < 0)
2940 return -1;
2941
2942 iter = PyObject_GetIter(obj);
Christian Heimesb3d3ee42013-11-23 21:01:40 +01002943 if (iter == NULL) {
2944 return NULL;
2945 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002946 for (;;) {
2947 PyObject *item;
2948
2949 item = PyIter_Next(iter);
2950 if (item == NULL) {
2951 if (PyErr_Occurred()) {
2952 Py_DECREF(iter);
2953 return -1;
2954 }
2955 break;
2956 }
2957 if (save(self, item, 0) < 0) {
2958 Py_DECREF(item);
2959 Py_DECREF(iter);
2960 return -1;
2961 }
2962 Py_DECREF(item);
2963 }
2964 Py_DECREF(iter);
2965
2966 /* If the object is already in the memo, this means it is
2967 recursive. In this case, throw away everything we put on the
2968 stack, and fetch the object back from the memo. */
2969 if (PyMemoTable_Get(self->memo, obj)) {
2970 const char pop_mark_op = POP_MARK;
2971
2972 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2973 return -1;
2974 if (memo_get(self, obj) < 0)
2975 return -1;
2976 return 0;
2977 }
2978
2979 if (_Pickler_Write(self, &frozenset_op, 1) < 0)
2980 return -1;
2981 if (memo_put(self, obj) < 0)
2982 return -1;
2983
2984 return 0;
2985}
2986
2987static int
2988fix_imports(PyObject **module_name, PyObject **global_name)
2989{
2990 PyObject *key;
2991 PyObject *item;
2992
2993 key = PyTuple_Pack(2, *module_name, *global_name);
2994 if (key == NULL)
2995 return -1;
2996 item = PyDict_GetItemWithError(name_mapping_3to2, key);
2997 Py_DECREF(key);
2998 if (item) {
2999 PyObject *fixed_module_name;
3000 PyObject *fixed_global_name;
3001
3002 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
3003 PyErr_Format(PyExc_RuntimeError,
3004 "_compat_pickle.REVERSE_NAME_MAPPING values "
3005 "should be 2-tuples, not %.200s",
3006 Py_TYPE(item)->tp_name);
3007 return -1;
3008 }
3009 fixed_module_name = PyTuple_GET_ITEM(item, 0);
3010 fixed_global_name = PyTuple_GET_ITEM(item, 1);
3011 if (!PyUnicode_Check(fixed_module_name) ||
3012 !PyUnicode_Check(fixed_global_name)) {
3013 PyErr_Format(PyExc_RuntimeError,
3014 "_compat_pickle.REVERSE_NAME_MAPPING values "
3015 "should be pairs of str, not (%.200s, %.200s)",
3016 Py_TYPE(fixed_module_name)->tp_name,
3017 Py_TYPE(fixed_global_name)->tp_name);
3018 return -1;
3019 }
3020
3021 Py_CLEAR(*module_name);
3022 Py_CLEAR(*global_name);
3023 Py_INCREF(fixed_module_name);
3024 Py_INCREF(fixed_global_name);
3025 *module_name = fixed_module_name;
3026 *global_name = fixed_global_name;
3027 }
3028 else if (PyErr_Occurred()) {
3029 return -1;
3030 }
3031
3032 item = PyDict_GetItemWithError(import_mapping_3to2, *module_name);
3033 if (item) {
3034 if (!PyUnicode_Check(item)) {
3035 PyErr_Format(PyExc_RuntimeError,
3036 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
3037 "should be strings, not %.200s",
3038 Py_TYPE(item)->tp_name);
3039 return -1;
3040 }
3041 Py_CLEAR(*module_name);
3042 Py_INCREF(item);
3043 *module_name = item;
3044 }
3045 else if (PyErr_Occurred()) {
3046 return -1;
3047 }
3048
3049 return 0;
3050}
3051
3052static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003053save_global(PicklerObject *self, PyObject *obj, PyObject *name)
3054{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003055 PyObject *global_name = NULL;
3056 PyObject *module_name = NULL;
3057 PyObject *module = NULL;
3058 PyObject *cls;
3059 int status = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003060 _Py_IDENTIFIER(__name__);
3061 _Py_IDENTIFIER(__qualname__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003062
3063 const char global_op = GLOBAL;
3064
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003065 if (name) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003066 Py_INCREF(name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003067 global_name = name;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003068 }
3069 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003070 if (self->proto >= 4) {
3071 global_name = _PyObject_GetAttrId(obj, &PyId___qualname__);
3072 if (global_name == NULL) {
3073 if (!PyErr_ExceptionMatches(PyExc_AttributeError))
3074 goto error;
3075 PyErr_Clear();
3076 }
3077 }
3078 if (global_name == NULL) {
3079 global_name = _PyObject_GetAttrId(obj, &PyId___name__);
3080 if (global_name == NULL)
3081 goto error;
3082 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003083 }
3084
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003085 module_name = whichmodule(obj, global_name, self->proto >= 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003086 if (module_name == NULL)
3087 goto error;
3088
3089 /* XXX: Change to use the import C API directly with level=0 to disallow
3090 relative imports.
3091
3092 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
3093 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
3094 custom import functions (IMHO, this would be a nice security
3095 feature). The import C API would need to be extended to support the
3096 extra parameters of __import__ to fix that. */
3097 module = PyImport_Import(module_name);
3098 if (module == NULL) {
3099 PyErr_Format(PicklingError,
3100 "Can't pickle %R: import of module %R failed",
3101 obj, module_name);
3102 goto error;
3103 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003104 cls = getattribute(module, global_name, self->proto >= 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003105 if (cls == NULL) {
3106 PyErr_Format(PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003107 "Can't pickle %R: attribute lookup %S on %S failed",
3108 obj, global_name, module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003109 goto error;
3110 }
3111 if (cls != obj) {
3112 Py_DECREF(cls);
3113 PyErr_Format(PicklingError,
3114 "Can't pickle %R: it's not the same object as %S.%S",
3115 obj, module_name, global_name);
3116 goto error;
3117 }
3118 Py_DECREF(cls);
3119
3120 if (self->proto >= 2) {
3121 /* See whether this is in the extension registry, and if
3122 * so generate an EXT opcode.
3123 */
3124 PyObject *code_obj; /* extension code as Python object */
3125 long code; /* extension code as C value */
3126 char pdata[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003127 Py_ssize_t n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003128
3129 PyTuple_SET_ITEM(two_tuple, 0, module_name);
3130 PyTuple_SET_ITEM(two_tuple, 1, global_name);
3131 code_obj = PyDict_GetItem(extension_registry, two_tuple);
3132 /* The object is not registered in the extension registry.
3133 This is the most likely code path. */
3134 if (code_obj == NULL)
3135 goto gen_global;
3136
3137 /* XXX: pickle.py doesn't check neither the type, nor the range
3138 of the value returned by the extension_registry. It should for
3139 consistency. */
3140
3141 /* Verify code_obj has the right type and value. */
3142 if (!PyLong_Check(code_obj)) {
3143 PyErr_Format(PicklingError,
3144 "Can't pickle %R: extension code %R isn't an integer",
3145 obj, code_obj);
3146 goto error;
3147 }
3148 code = PyLong_AS_LONG(code_obj);
3149 if (code <= 0 || code > 0x7fffffffL) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003150 if (!PyErr_Occurred())
3151 PyErr_Format(PicklingError,
3152 "Can't pickle %R: extension code %ld is out of range",
3153 obj, code);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003154 goto error;
3155 }
3156
3157 /* Generate an EXT opcode. */
3158 if (code <= 0xff) {
3159 pdata[0] = EXT1;
3160 pdata[1] = (unsigned char)code;
3161 n = 2;
3162 }
3163 else if (code <= 0xffff) {
3164 pdata[0] = EXT2;
3165 pdata[1] = (unsigned char)(code & 0xff);
3166 pdata[2] = (unsigned char)((code >> 8) & 0xff);
3167 n = 3;
3168 }
3169 else {
3170 pdata[0] = EXT4;
3171 pdata[1] = (unsigned char)(code & 0xff);
3172 pdata[2] = (unsigned char)((code >> 8) & 0xff);
3173 pdata[3] = (unsigned char)((code >> 16) & 0xff);
3174 pdata[4] = (unsigned char)((code >> 24) & 0xff);
3175 n = 5;
3176 }
3177
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003178 if (_Pickler_Write(self, pdata, n) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003179 goto error;
3180 }
3181 else {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003182 gen_global:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003183 if (self->proto >= 4) {
3184 const char stack_global_op = STACK_GLOBAL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003185
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003186 save(self, module_name, 0);
3187 save(self, global_name, 0);
3188
3189 if (_Pickler_Write(self, &stack_global_op, 1) < 0)
3190 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003191 }
3192 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003193 /* Generate a normal global opcode if we are using a pickle
3194 protocol < 4, or if the object is not registered in the
3195 extension registry. */
3196 PyObject *encoded;
3197 PyObject *(*unicode_encoder)(PyObject *);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003198
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003199 if (_Pickler_Write(self, &global_op, 1) < 0)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003200 goto error;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003201
3202 /* For protocol < 3 and if the user didn't request against doing
3203 so, we convert module names to the old 2.x module names. */
3204 if (self->proto < 3 && self->fix_imports) {
3205 if (fix_imports(&module_name, &global_name) < 0) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003206 goto error;
3207 }
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003208 }
3209
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003210 /* Since Python 3.0 now supports non-ASCII identifiers, we encode
3211 both the module name and the global name using UTF-8. We do so
3212 only when we are using the pickle protocol newer than version
3213 3. This is to ensure compatibility with older Unpickler running
3214 on Python 2.x. */
3215 if (self->proto == 3) {
3216 unicode_encoder = PyUnicode_AsUTF8String;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003217 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003218 else {
3219 unicode_encoder = PyUnicode_AsASCIIString;
3220 }
3221 encoded = unicode_encoder(module_name);
3222 if (encoded == NULL) {
3223 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3224 PyErr_Format(PicklingError,
3225 "can't pickle module identifier '%S' using "
3226 "pickle protocol %i",
3227 module_name, self->proto);
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003228 goto error;
3229 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003230 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3231 PyBytes_GET_SIZE(encoded)) < 0) {
3232 Py_DECREF(encoded);
3233 goto error;
3234 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003235 Py_DECREF(encoded);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003236 if(_Pickler_Write(self, "\n", 1) < 0)
3237 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003238
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003239 /* Save the name of the module. */
3240 encoded = unicode_encoder(global_name);
3241 if (encoded == NULL) {
3242 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3243 PyErr_Format(PicklingError,
3244 "can't pickle global identifier '%S' using "
3245 "pickle protocol %i",
3246 global_name, self->proto);
3247 goto error;
3248 }
3249 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3250 PyBytes_GET_SIZE(encoded)) < 0) {
3251 Py_DECREF(encoded);
3252 goto error;
3253 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003254 Py_DECREF(encoded);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003255 if (_Pickler_Write(self, "\n", 1) < 0)
3256 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003257 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003258 /* Memoize the object. */
3259 if (memo_put(self, obj) < 0)
3260 goto error;
3261 }
3262
3263 if (0) {
3264 error:
3265 status = -1;
3266 }
3267 Py_XDECREF(module_name);
3268 Py_XDECREF(global_name);
3269 Py_XDECREF(module);
3270
3271 return status;
3272}
3273
3274static int
Łukasz Langaf3078fb2012-03-12 19:46:12 +01003275save_ellipsis(PicklerObject *self, PyObject *obj)
3276{
Łukasz Langadbd78252012-03-12 22:59:11 +01003277 PyObject *str = PyUnicode_FromString("Ellipsis");
Benjamin Petersone80b29b2012-03-16 18:45:31 -05003278 int res;
Łukasz Langadbd78252012-03-12 22:59:11 +01003279 if (str == NULL)
Łukasz Langacad1a072012-03-12 23:41:07 +01003280 return -1;
Benjamin Petersone80b29b2012-03-16 18:45:31 -05003281 res = save_global(self, Py_Ellipsis, str);
3282 Py_DECREF(str);
3283 return res;
Łukasz Langaf3078fb2012-03-12 19:46:12 +01003284}
3285
3286static int
3287save_notimplemented(PicklerObject *self, PyObject *obj)
3288{
Łukasz Langadbd78252012-03-12 22:59:11 +01003289 PyObject *str = PyUnicode_FromString("NotImplemented");
Benjamin Petersone80b29b2012-03-16 18:45:31 -05003290 int res;
Łukasz Langadbd78252012-03-12 22:59:11 +01003291 if (str == NULL)
Łukasz Langacad1a072012-03-12 23:41:07 +01003292 return -1;
Benjamin Petersone80b29b2012-03-16 18:45:31 -05003293 res = save_global(self, Py_NotImplemented, str);
3294 Py_DECREF(str);
3295 return res;
Łukasz Langaf3078fb2012-03-12 19:46:12 +01003296}
3297
3298static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003299save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
3300{
3301 PyObject *pid = NULL;
3302 int status = 0;
3303
3304 const char persid_op = PERSID;
3305 const char binpersid_op = BINPERSID;
3306
3307 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003308 pid = _Pickler_FastCall(self, func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003309 if (pid == NULL)
3310 return -1;
3311
3312 if (pid != Py_None) {
3313 if (self->bin) {
3314 if (save(self, pid, 1) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003315 _Pickler_Write(self, &binpersid_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003316 goto error;
3317 }
3318 else {
3319 PyObject *pid_str = NULL;
3320 char *pid_ascii_bytes;
3321 Py_ssize_t size;
3322
3323 pid_str = PyObject_Str(pid);
3324 if (pid_str == NULL)
3325 goto error;
3326
3327 /* XXX: Should it check whether the persistent id only contains
3328 ASCII characters? And what if the pid contains embedded
3329 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00003330 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003331 Py_DECREF(pid_str);
3332 if (pid_ascii_bytes == NULL)
3333 goto error;
3334
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003335 if (_Pickler_Write(self, &persid_op, 1) < 0 ||
3336 _Pickler_Write(self, pid_ascii_bytes, size) < 0 ||
3337 _Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003338 goto error;
3339 }
3340 status = 1;
3341 }
3342
3343 if (0) {
3344 error:
3345 status = -1;
3346 }
3347 Py_XDECREF(pid);
3348
3349 return status;
3350}
3351
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003352static PyObject *
3353get_class(PyObject *obj)
3354{
3355 PyObject *cls;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003356 _Py_IDENTIFIER(__class__);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003357
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003358 cls = _PyObject_GetAttrId(obj, &PyId___class__);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003359 if (cls == NULL) {
3360 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
3361 PyErr_Clear();
3362 cls = (PyObject *) Py_TYPE(obj);
3363 Py_INCREF(cls);
3364 }
3365 }
3366 return cls;
3367}
3368
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003369/* We're saving obj, and args is the 2-thru-5 tuple returned by the
3370 * appropriate __reduce__ method for obj.
3371 */
3372static int
3373save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
3374{
3375 PyObject *callable;
3376 PyObject *argtup;
3377 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003378 PyObject *listitems = Py_None;
3379 PyObject *dictitems = Py_None;
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00003380 Py_ssize_t size;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003381 int use_newobj = 0, use_newobj_ex = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003382
3383 const char reduce_op = REDUCE;
3384 const char build_op = BUILD;
3385 const char newobj_op = NEWOBJ;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003386 const char newobj_ex_op = NEWOBJ_EX;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003387
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00003388 size = PyTuple_Size(args);
3389 if (size < 2 || size > 5) {
3390 PyErr_SetString(PicklingError, "tuple returned by "
3391 "__reduce__ must contain 2 through 5 elements");
3392 return -1;
3393 }
3394
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003395 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
3396 &callable, &argtup, &state, &listitems, &dictitems))
3397 return -1;
3398
3399 if (!PyCallable_Check(callable)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003400 PyErr_SetString(PicklingError, "first item of the tuple "
3401 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003402 return -1;
3403 }
3404 if (!PyTuple_Check(argtup)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003405 PyErr_SetString(PicklingError, "second item of the tuple "
3406 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003407 return -1;
3408 }
3409
3410 if (state == Py_None)
3411 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003412
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003413 if (listitems == Py_None)
3414 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003415 else if (!PyIter_Check(listitems)) {
Alexandre Vassalotti00d83f22013-04-14 01:28:01 -07003416 PyErr_Format(PicklingError, "fourth element of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003417 "returned by __reduce__ must be an iterator, not %s",
3418 Py_TYPE(listitems)->tp_name);
3419 return -1;
3420 }
3421
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003422 if (dictitems == Py_None)
3423 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003424 else if (!PyIter_Check(dictitems)) {
Alexandre Vassalotti00d83f22013-04-14 01:28:01 -07003425 PyErr_Format(PicklingError, "fifth element of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003426 "returned by __reduce__ must be an iterator, not %s",
3427 Py_TYPE(dictitems)->tp_name);
3428 return -1;
3429 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003430
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003431 if (self->proto >= 2) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003432 PyObject *name;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003433 _Py_IDENTIFIER(__name__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003434
Victor Stinner804e05e2013-11-14 01:26:17 +01003435 name = _PyObject_GetAttrId(callable, &PyId___name__);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003436 if (name == NULL) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003437 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003438 return -1;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003439 }
3440 PyErr_Clear();
3441 }
3442 else if (self->proto >= 4) {
3443 _Py_IDENTIFIER(__newobj_ex__);
3444 use_newobj_ex = PyUnicode_Check(name) &&
3445 PyUnicode_Compare(
3446 name, _PyUnicode_FromId(&PyId___newobj_ex__)) == 0;
3447 Py_DECREF(name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003448 }
3449 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003450 _Py_IDENTIFIER(__newobj__);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003451 use_newobj = PyUnicode_Check(name) &&
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003452 PyUnicode_Compare(
3453 name, _PyUnicode_FromId(&PyId___newobj__)) == 0;
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003454 Py_DECREF(name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003455 }
3456 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003457
3458 if (use_newobj_ex) {
3459 PyObject *cls;
3460 PyObject *args;
3461 PyObject *kwargs;
3462
3463 if (Py_SIZE(argtup) != 3) {
3464 PyErr_Format(PicklingError,
3465 "length of the NEWOBJ_EX argument tuple must be "
3466 "exactly 3, not %zd", Py_SIZE(argtup));
3467 return -1;
3468 }
3469
3470 cls = PyTuple_GET_ITEM(argtup, 0);
3471 if (!PyType_Check(cls)) {
3472 PyErr_Format(PicklingError,
3473 "first item from NEWOBJ_EX argument tuple must "
3474 "be a class, not %.200s", Py_TYPE(cls)->tp_name);
3475 return -1;
3476 }
3477 args = PyTuple_GET_ITEM(argtup, 1);
3478 if (!PyTuple_Check(args)) {
3479 PyErr_Format(PicklingError,
3480 "second item from NEWOBJ_EX argument tuple must "
3481 "be a tuple, not %.200s", Py_TYPE(args)->tp_name);
3482 return -1;
3483 }
3484 kwargs = PyTuple_GET_ITEM(argtup, 2);
3485 if (!PyDict_Check(kwargs)) {
3486 PyErr_Format(PicklingError,
3487 "third item from NEWOBJ_EX argument tuple must "
3488 "be a dict, not %.200s", Py_TYPE(kwargs)->tp_name);
3489 return -1;
3490 }
3491
3492 if (save(self, cls, 0) < 0 ||
3493 save(self, args, 0) < 0 ||
3494 save(self, kwargs, 0) < 0 ||
3495 _Pickler_Write(self, &newobj_ex_op, 1) < 0) {
3496 return -1;
3497 }
3498 }
3499 else if (use_newobj) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003500 PyObject *cls;
3501 PyObject *newargtup;
3502 PyObject *obj_class;
3503 int p;
3504
3505 /* Sanity checks. */
3506 if (Py_SIZE(argtup) < 1) {
3507 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
3508 return -1;
3509 }
3510
3511 cls = PyTuple_GET_ITEM(argtup, 0);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003512 if (!PyType_Check(cls)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003513 PyErr_SetString(PicklingError, "args[0] from "
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003514 "__newobj__ args is not a type");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003515 return -1;
3516 }
3517
3518 if (obj != NULL) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003519 obj_class = get_class(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003520 p = obj_class != cls; /* true iff a problem */
3521 Py_DECREF(obj_class);
3522 if (p) {
3523 PyErr_SetString(PicklingError, "args[0] from "
3524 "__newobj__ args has the wrong class");
3525 return -1;
3526 }
3527 }
3528 /* XXX: These calls save() are prone to infinite recursion. Imagine
3529 what happen if the value returned by the __reduce__() method of
3530 some extension type contains another object of the same type. Ouch!
3531
3532 Here is a quick example, that I ran into, to illustrate what I
3533 mean:
3534
3535 >>> import pickle, copyreg
3536 >>> copyreg.dispatch_table.pop(complex)
3537 >>> pickle.dumps(1+2j)
3538 Traceback (most recent call last):
3539 ...
3540 RuntimeError: maximum recursion depth exceeded
3541
3542 Removing the complex class from copyreg.dispatch_table made the
3543 __reduce_ex__() method emit another complex object:
3544
3545 >>> (1+1j).__reduce_ex__(2)
3546 (<function __newobj__ at 0xb7b71c3c>,
3547 (<class 'complex'>, (1+1j)), None, None, None)
3548
3549 Thus when save() was called on newargstup (the 2nd item) recursion
3550 ensued. Of course, the bug was in the complex class which had a
3551 broken __getnewargs__() that emitted another complex object. But,
3552 the point, here, is it is quite easy to end up with a broken reduce
3553 function. */
3554
3555 /* Save the class and its __new__ arguments. */
3556 if (save(self, cls, 0) < 0)
3557 return -1;
3558
3559 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
3560 if (newargtup == NULL)
3561 return -1;
3562
3563 p = save(self, newargtup, 0);
3564 Py_DECREF(newargtup);
3565 if (p < 0)
3566 return -1;
3567
3568 /* Add NEWOBJ opcode. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003569 if (_Pickler_Write(self, &newobj_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003570 return -1;
3571 }
3572 else { /* Not using NEWOBJ. */
3573 if (save(self, callable, 0) < 0 ||
3574 save(self, argtup, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003575 _Pickler_Write(self, &reduce_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003576 return -1;
3577 }
3578
3579 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
3580 the caller do not want to memoize the object. Not particularly useful,
3581 but that is to mimic the behavior save_reduce() in pickle.py when
3582 obj is None. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003583 if (obj != NULL) {
3584 /* If the object is already in the memo, this means it is
3585 recursive. In this case, throw away everything we put on the
3586 stack, and fetch the object back from the memo. */
3587 if (PyMemoTable_Get(self->memo, obj)) {
3588 const char pop_op = POP;
3589
3590 if (_Pickler_Write(self, &pop_op, 1) < 0)
3591 return -1;
3592 if (memo_get(self, obj) < 0)
3593 return -1;
3594
3595 return 0;
3596 }
3597 else if (memo_put(self, obj) < 0)
3598 return -1;
3599 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003600
3601 if (listitems && batch_list(self, listitems) < 0)
3602 return -1;
3603
3604 if (dictitems && batch_dict(self, dictitems) < 0)
3605 return -1;
3606
3607 if (state) {
Victor Stinner121aab42011-09-29 23:40:53 +02003608 if (save(self, state, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003609 _Pickler_Write(self, &build_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003610 return -1;
3611 }
3612
3613 return 0;
3614}
3615
3616static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003617save_method(PicklerObject *self, PyObject *obj)
3618{
3619 PyObject *method_self = PyCFunction_GET_SELF(obj);
3620
3621 if (method_self == NULL || PyModule_Check(method_self)) {
3622 return save_global(self, obj, NULL);
3623 }
3624 else {
3625 PyObject *builtins;
3626 PyObject *getattr;
3627 PyObject *reduce_value;
3628 int status = -1;
3629 _Py_IDENTIFIER(getattr);
3630
3631 builtins = PyEval_GetBuiltins();
3632 getattr = _PyDict_GetItemId(builtins, &PyId_getattr);
3633 reduce_value = \
3634 Py_BuildValue("O(Os)", getattr, method_self,
3635 ((PyCFunctionObject *)obj)->m_ml->ml_name);
3636 if (reduce_value != NULL) {
3637 status = save_reduce(self, reduce_value, obj);
3638 Py_DECREF(reduce_value);
3639 }
3640 return status;
3641 }
3642}
3643
3644static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003645save(PicklerObject *self, PyObject *obj, int pers_save)
3646{
3647 PyTypeObject *type;
3648 PyObject *reduce_func = NULL;
3649 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003650 int status = 0;
3651
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003652 if (Py_EnterRecursiveCall(" while pickling an object"))
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003653 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003654
3655 /* The extra pers_save argument is necessary to avoid calling save_pers()
3656 on its returned object. */
3657 if (!pers_save && self->pers_func) {
3658 /* save_pers() returns:
3659 -1 to signal an error;
3660 0 if it did nothing successfully;
3661 1 if a persistent id was saved.
3662 */
3663 if ((status = save_pers(self, obj, self->pers_func)) != 0)
3664 goto done;
3665 }
3666
3667 type = Py_TYPE(obj);
3668
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003669 /* The old cPickle had an optimization that used switch-case statement
3670 dispatching on the first letter of the type name. This has was removed
3671 since benchmarks shown that this optimization was actually slowing
3672 things down. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003673
3674 /* Atom types; these aren't memoized, so don't check the memo. */
3675
3676 if (obj == Py_None) {
3677 status = save_none(self, obj);
3678 goto done;
3679 }
Łukasz Langaf3078fb2012-03-12 19:46:12 +01003680 else if (obj == Py_Ellipsis) {
3681 status = save_ellipsis(self, obj);
3682 goto done;
3683 }
3684 else if (obj == Py_NotImplemented) {
3685 status = save_notimplemented(self, obj);
3686 goto done;
3687 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003688 else if (obj == Py_False || obj == Py_True) {
3689 status = save_bool(self, obj);
3690 goto done;
3691 }
3692 else if (type == &PyLong_Type) {
3693 status = save_long(self, obj);
3694 goto done;
3695 }
3696 else if (type == &PyFloat_Type) {
3697 status = save_float(self, obj);
3698 goto done;
3699 }
3700
3701 /* Check the memo to see if it has the object. If so, generate
3702 a GET (or BINGET) opcode, instead of pickling the object
3703 once again. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003704 if (PyMemoTable_Get(self->memo, obj)) {
3705 if (memo_get(self, obj) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003706 goto error;
3707 goto done;
3708 }
3709
3710 if (type == &PyBytes_Type) {
3711 status = save_bytes(self, obj);
3712 goto done;
3713 }
3714 else if (type == &PyUnicode_Type) {
3715 status = save_unicode(self, obj);
3716 goto done;
3717 }
3718 else if (type == &PyDict_Type) {
3719 status = save_dict(self, obj);
3720 goto done;
3721 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003722 else if (type == &PySet_Type) {
3723 status = save_set(self, obj);
3724 goto done;
3725 }
3726 else if (type == &PyFrozenSet_Type) {
3727 status = save_frozenset(self, obj);
3728 goto done;
3729 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003730 else if (type == &PyList_Type) {
3731 status = save_list(self, obj);
3732 goto done;
3733 }
3734 else if (type == &PyTuple_Type) {
3735 status = save_tuple(self, obj);
3736 goto done;
3737 }
3738 else if (type == &PyType_Type) {
3739 status = save_global(self, obj, NULL);
3740 goto done;
3741 }
3742 else if (type == &PyFunction_Type) {
3743 status = save_global(self, obj, NULL);
3744 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
3745 /* fall back to reduce */
3746 PyErr_Clear();
3747 }
3748 else {
3749 goto done;
3750 }
3751 }
3752 else if (type == &PyCFunction_Type) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003753 status = save_method(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003754 goto done;
3755 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003756
3757 /* XXX: This part needs some unit tests. */
3758
3759 /* Get a reduction callable, and call it. This may come from
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003760 * self.dispatch_table, copyreg.dispatch_table, the object's
3761 * __reduce_ex__ method, or the object's __reduce__ method.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003762 */
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003763 if (self->dispatch_table == NULL) {
3764 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
3765 /* PyDict_GetItem() unlike PyObject_GetItem() and
3766 PyObject_GetAttr() returns a borrowed ref */
3767 Py_XINCREF(reduce_func);
3768 } else {
3769 reduce_func = PyObject_GetItem(self->dispatch_table, (PyObject *)type);
3770 if (reduce_func == NULL) {
3771 if (PyErr_ExceptionMatches(PyExc_KeyError))
3772 PyErr_Clear();
3773 else
3774 goto error;
3775 }
3776 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003777 if (reduce_func != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003778 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003779 reduce_value = _Pickler_FastCall(self, reduce_func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003780 }
Antoine Pitrouffd41d92011-10-04 09:23:04 +02003781 else if (PyType_IsSubtype(type, &PyType_Type)) {
3782 status = save_global(self, obj, NULL);
3783 goto done;
3784 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003785 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003786 _Py_IDENTIFIER(__reduce__);
3787 _Py_IDENTIFIER(__reduce_ex__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003788
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003789
3790 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
3791 automatically defined as __reduce__. While this is convenient, this
3792 make it impossible to know which method was actually called. Of
3793 course, this is not a big deal. But still, it would be nice to let
3794 the user know which method was called when something go
3795 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
3796 don't actually have to check for a __reduce__ method. */
3797
3798 /* Check for a __reduce_ex__ method. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003799 reduce_func = _PyObject_GetAttrId(obj, &PyId___reduce_ex__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003800 if (reduce_func != NULL) {
3801 PyObject *proto;
3802 proto = PyLong_FromLong(self->proto);
3803 if (proto != NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003804 reduce_value = _Pickler_FastCall(self, reduce_func, proto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003805 }
3806 }
3807 else {
3808 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3809 PyErr_Clear();
3810 else
3811 goto error;
3812 /* Check for a __reduce__ method. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003813 reduce_func = _PyObject_GetAttrId(obj, &PyId___reduce__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003814 if (reduce_func != NULL) {
3815 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
3816 }
3817 else {
3818 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
3819 type->tp_name, obj);
3820 goto error;
3821 }
3822 }
3823 }
3824
3825 if (reduce_value == NULL)
3826 goto error;
3827
3828 if (PyUnicode_Check(reduce_value)) {
3829 status = save_global(self, obj, reduce_value);
3830 goto done;
3831 }
3832
3833 if (!PyTuple_Check(reduce_value)) {
3834 PyErr_SetString(PicklingError,
3835 "__reduce__ must return a string or tuple");
3836 goto error;
3837 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003838
3839 status = save_reduce(self, reduce_value, obj);
3840
3841 if (0) {
3842 error:
3843 status = -1;
3844 }
3845 done:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003846 if (status == 0)
3847 status = _Pickler_OpcodeBoundary(self);
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003848 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003849 Py_XDECREF(reduce_func);
3850 Py_XDECREF(reduce_value);
3851
3852 return status;
3853}
3854
3855static int
3856dump(PicklerObject *self, PyObject *obj)
3857{
3858 const char stop_op = STOP;
3859
3860 if (self->proto >= 2) {
3861 char header[2];
3862
3863 header[0] = PROTO;
3864 assert(self->proto >= 0 && self->proto < 256);
3865 header[1] = (unsigned char)self->proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003866 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003867 return -1;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003868 if (self->proto >= 4)
3869 self->framing = 1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003870 }
3871
3872 if (save(self, obj, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003873 _Pickler_Write(self, &stop_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003874 return -1;
3875
3876 return 0;
3877}
3878
3879PyDoc_STRVAR(Pickler_clear_memo_doc,
3880"clear_memo() -> None. Clears the pickler's \"memo\"."
3881"\n"
3882"The memo is the data structure that remembers which objects the\n"
3883"pickler has already seen, so that shared or recursive objects are\n"
3884"pickled by reference and not by value. This method is useful when\n"
3885"re-using picklers.");
3886
3887static PyObject *
3888Pickler_clear_memo(PicklerObject *self)
3889{
3890 if (self->memo)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003891 PyMemoTable_Clear(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003892
3893 Py_RETURN_NONE;
3894}
3895
3896PyDoc_STRVAR(Pickler_dump_doc,
3897"dump(obj) -> None. Write a pickled representation of obj to the open file.");
3898
3899static PyObject *
3900Pickler_dump(PicklerObject *self, PyObject *args)
3901{
3902 PyObject *obj;
3903
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00003904 /* Check whether the Pickler was initialized correctly (issue3664).
3905 Developers often forget to call __init__() in their subclasses, which
3906 would trigger a segfault without this check. */
3907 if (self->write == NULL) {
Victor Stinner121aab42011-09-29 23:40:53 +02003908 PyErr_Format(PicklingError,
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00003909 "Pickler.__init__() was not called by %s.__init__()",
3910 Py_TYPE(self)->tp_name);
3911 return NULL;
3912 }
3913
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003914 if (!PyArg_ParseTuple(args, "O:dump", &obj))
3915 return NULL;
3916
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003917 if (_Pickler_ClearBuffer(self) < 0)
3918 return NULL;
3919
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003920 if (dump(self, obj) < 0)
3921 return NULL;
3922
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003923 if (_Pickler_FlushToFile(self) < 0)
3924 return NULL;
3925
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003926 Py_RETURN_NONE;
3927}
3928
3929static struct PyMethodDef Pickler_methods[] = {
3930 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
3931 Pickler_dump_doc},
3932 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
3933 Pickler_clear_memo_doc},
3934 {NULL, NULL} /* sentinel */
3935};
3936
3937static void
3938Pickler_dealloc(PicklerObject *self)
3939{
3940 PyObject_GC_UnTrack(self);
3941
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003942 Py_XDECREF(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003943 Py_XDECREF(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003944 Py_XDECREF(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003945 Py_XDECREF(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003946 Py_XDECREF(self->arg);
3947 Py_XDECREF(self->fast_memo);
3948
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003949 PyMemoTable_Del(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003950
3951 Py_TYPE(self)->tp_free((PyObject *)self);
3952}
3953
3954static int
3955Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
3956{
3957 Py_VISIT(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003958 Py_VISIT(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003959 Py_VISIT(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003960 Py_VISIT(self->arg);
3961 Py_VISIT(self->fast_memo);
3962 return 0;
3963}
3964
3965static int
3966Pickler_clear(PicklerObject *self)
3967{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003968 Py_CLEAR(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003969 Py_CLEAR(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003970 Py_CLEAR(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003971 Py_CLEAR(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003972 Py_CLEAR(self->arg);
3973 Py_CLEAR(self->fast_memo);
3974
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003975 if (self->memo != NULL) {
3976 PyMemoTable *memo = self->memo;
3977 self->memo = NULL;
3978 PyMemoTable_Del(memo);
3979 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003980 return 0;
3981}
3982
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003983
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003984PyDoc_STRVAR(Pickler_doc,
3985"Pickler(file, protocol=None)"
3986"\n"
3987"This takes a binary file for writing a pickle data stream.\n"
3988"\n"
3989"The optional protocol argument tells the pickler to use the\n"
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003990"given protocol; supported protocols are 0, 1, 2, 3 and 4. The\n"
3991"default protocol is 3; a backward-incompatible protocol designed for\n"
3992"Python 3.\n"
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003993"\n"
3994"Specifying a negative protocol version selects the highest\n"
3995"protocol version supported. The higher the protocol used, the\n"
3996"more recent the version of Python needed to read the pickle\n"
3997"produced.\n"
3998"\n"
3999"The file argument must have a write() method that accepts a single\n"
4000"bytes argument. It can thus be a file object opened for binary\n"
4001"writing, a io.BytesIO instance, or any other custom object that\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004002"meets this interface.\n"
4003"\n"
4004"If fix_imports is True and protocol is less than 3, pickle will try to\n"
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004005"map the new Python 3 names to the old module names used in Python 2,\n"
4006"so that the pickle data stream is readable with Python 2.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004007
4008static int
4009Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
4010{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004011 static char *kwlist[] = {"file", "protocol", "fix_imports", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004012 PyObject *file;
4013 PyObject *proto_obj = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004014 PyObject *fix_imports = Py_True;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02004015 _Py_IDENTIFIER(persistent_id);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004016 _Py_IDENTIFIER(dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004017
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004018 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:Pickler",
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004019 kwlist, &file, &proto_obj, &fix_imports))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004020 return -1;
4021
4022 /* In case of multiple __init__() calls, clear previous content. */
4023 if (self->write != NULL)
4024 (void)Pickler_clear(self);
4025
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004026 if (_Pickler_SetProtocol(self, proto_obj, fix_imports) < 0)
4027 return -1;
4028
4029 if (_Pickler_SetOutputStream(self, file) < 0)
4030 return -1;
4031
4032 /* memo and output_buffer may have already been created in _Pickler_New */
4033 if (self->memo == NULL) {
4034 self->memo = PyMemoTable_New();
4035 if (self->memo == NULL)
4036 return -1;
4037 }
4038 self->output_len = 0;
4039 if (self->output_buffer == NULL) {
4040 self->max_output_len = WRITE_BUF_SIZE;
4041 self->output_buffer = PyBytes_FromStringAndSize(NULL,
4042 self->max_output_len);
4043 if (self->output_buffer == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004044 return -1;
4045 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004046
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004047 self->arg = NULL;
4048 self->fast = 0;
4049 self->fast_nesting = 0;
4050 self->fast_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004051 self->pers_func = NULL;
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02004052 if (_PyObject_HasAttrId((PyObject *)self, &PyId_persistent_id)) {
4053 self->pers_func = _PyObject_GetAttrId((PyObject *)self,
4054 &PyId_persistent_id);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004055 if (self->pers_func == NULL)
4056 return -1;
4057 }
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004058 self->dispatch_table = NULL;
4059 if (_PyObject_HasAttrId((PyObject *)self, &PyId_dispatch_table)) {
4060 self->dispatch_table = _PyObject_GetAttrId((PyObject *)self,
4061 &PyId_dispatch_table);
4062 if (self->dispatch_table == NULL)
4063 return -1;
4064 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004065 return 0;
4066}
4067
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004068/* Define a proxy object for the Pickler's internal memo object. This is to
4069 * avoid breaking code like:
4070 * pickler.memo.clear()
4071 * and
4072 * pickler.memo = saved_memo
4073 * Is this a good idea? Not really, but we don't want to break code that uses
4074 * it. Note that we don't implement the entire mapping API here. This is
4075 * intentional, as these should be treated as black-box implementation details.
4076 */
4077
4078typedef struct {
4079 PyObject_HEAD
4080 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
4081} PicklerMemoProxyObject;
4082
4083PyDoc_STRVAR(pmp_clear_doc,
4084"memo.clear() -> None. Remove all items from memo.");
4085
4086static PyObject *
4087pmp_clear(PicklerMemoProxyObject *self)
4088{
4089 if (self->pickler->memo)
4090 PyMemoTable_Clear(self->pickler->memo);
4091 Py_RETURN_NONE;
4092}
4093
4094PyDoc_STRVAR(pmp_copy_doc,
4095"memo.copy() -> new_memo. Copy the memo to a new object.");
4096
4097static PyObject *
4098pmp_copy(PicklerMemoProxyObject *self)
4099{
4100 Py_ssize_t i;
4101 PyMemoTable *memo;
4102 PyObject *new_memo = PyDict_New();
4103 if (new_memo == NULL)
4104 return NULL;
4105
4106 memo = self->pickler->memo;
4107 for (i = 0; i < memo->mt_allocated; ++i) {
4108 PyMemoEntry entry = memo->mt_table[i];
4109 if (entry.me_key != NULL) {
4110 int status;
4111 PyObject *key, *value;
4112
4113 key = PyLong_FromVoidPtr(entry.me_key);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004114 value = Py_BuildValue("nO", entry.me_value, entry.me_key);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004115
4116 if (key == NULL || value == NULL) {
4117 Py_XDECREF(key);
4118 Py_XDECREF(value);
4119 goto error;
4120 }
4121 status = PyDict_SetItem(new_memo, key, value);
4122 Py_DECREF(key);
4123 Py_DECREF(value);
4124 if (status < 0)
4125 goto error;
4126 }
4127 }
4128 return new_memo;
4129
4130 error:
4131 Py_XDECREF(new_memo);
4132 return NULL;
4133}
4134
4135PyDoc_STRVAR(pmp_reduce_doc,
4136"memo.__reduce__(). Pickling support.");
4137
4138static PyObject *
4139pmp_reduce(PicklerMemoProxyObject *self, PyObject *args)
4140{
4141 PyObject *reduce_value, *dict_args;
4142 PyObject *contents = pmp_copy(self);
4143 if (contents == NULL)
4144 return NULL;
4145
4146 reduce_value = PyTuple_New(2);
4147 if (reduce_value == NULL) {
4148 Py_DECREF(contents);
4149 return NULL;
4150 }
4151 dict_args = PyTuple_New(1);
4152 if (dict_args == NULL) {
4153 Py_DECREF(contents);
4154 Py_DECREF(reduce_value);
4155 return NULL;
4156 }
4157 PyTuple_SET_ITEM(dict_args, 0, contents);
4158 Py_INCREF((PyObject *)&PyDict_Type);
4159 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
4160 PyTuple_SET_ITEM(reduce_value, 1, dict_args);
4161 return reduce_value;
4162}
4163
4164static PyMethodDef picklerproxy_methods[] = {
4165 {"clear", (PyCFunction)pmp_clear, METH_NOARGS, pmp_clear_doc},
4166 {"copy", (PyCFunction)pmp_copy, METH_NOARGS, pmp_copy_doc},
4167 {"__reduce__", (PyCFunction)pmp_reduce, METH_VARARGS, pmp_reduce_doc},
4168 {NULL, NULL} /* sentinel */
4169};
4170
4171static void
4172PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
4173{
4174 PyObject_GC_UnTrack(self);
4175 Py_XDECREF(self->pickler);
4176 PyObject_GC_Del((PyObject *)self);
4177}
4178
4179static int
4180PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
4181 visitproc visit, void *arg)
4182{
4183 Py_VISIT(self->pickler);
4184 return 0;
4185}
4186
4187static int
4188PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
4189{
4190 Py_CLEAR(self->pickler);
4191 return 0;
4192}
4193
4194static PyTypeObject PicklerMemoProxyType = {
4195 PyVarObject_HEAD_INIT(NULL, 0)
4196 "_pickle.PicklerMemoProxy", /*tp_name*/
4197 sizeof(PicklerMemoProxyObject), /*tp_basicsize*/
4198 0,
4199 (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */
4200 0, /* tp_print */
4201 0, /* tp_getattr */
4202 0, /* tp_setattr */
4203 0, /* tp_compare */
4204 0, /* tp_repr */
4205 0, /* tp_as_number */
4206 0, /* tp_as_sequence */
4207 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00004208 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004209 0, /* tp_call */
4210 0, /* tp_str */
4211 PyObject_GenericGetAttr, /* tp_getattro */
4212 PyObject_GenericSetAttr, /* tp_setattro */
4213 0, /* tp_as_buffer */
4214 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4215 0, /* tp_doc */
4216 (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */
4217 (inquiry)PicklerMemoProxy_clear, /* tp_clear */
4218 0, /* tp_richcompare */
4219 0, /* tp_weaklistoffset */
4220 0, /* tp_iter */
4221 0, /* tp_iternext */
4222 picklerproxy_methods, /* tp_methods */
4223};
4224
4225static PyObject *
4226PicklerMemoProxy_New(PicklerObject *pickler)
4227{
4228 PicklerMemoProxyObject *self;
4229
4230 self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
4231 if (self == NULL)
4232 return NULL;
4233 Py_INCREF(pickler);
4234 self->pickler = pickler;
4235 PyObject_GC_Track(self);
4236 return (PyObject *)self;
4237}
4238
4239/*****************************************************************************/
4240
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004241static PyObject *
4242Pickler_get_memo(PicklerObject *self)
4243{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004244 return PicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004245}
4246
4247static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004248Pickler_set_memo(PicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004249{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004250 PyMemoTable *new_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004251
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004252 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004253 PyErr_SetString(PyExc_TypeError,
4254 "attribute deletion is not supported");
4255 return -1;
4256 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004257
4258 if (Py_TYPE(obj) == &PicklerMemoProxyType) {
4259 PicklerObject *pickler =
4260 ((PicklerMemoProxyObject *)obj)->pickler;
4261
4262 new_memo = PyMemoTable_Copy(pickler->memo);
4263 if (new_memo == NULL)
4264 return -1;
4265 }
4266 else if (PyDict_Check(obj)) {
4267 Py_ssize_t i = 0;
4268 PyObject *key, *value;
4269
4270 new_memo = PyMemoTable_New();
4271 if (new_memo == NULL)
4272 return -1;
4273
4274 while (PyDict_Next(obj, &i, &key, &value)) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004275 Py_ssize_t memo_id;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004276 PyObject *memo_obj;
4277
4278 if (!PyTuple_Check(value) || Py_SIZE(value) != 2) {
4279 PyErr_SetString(PyExc_TypeError,
4280 "'memo' values must be 2-item tuples");
4281 goto error;
4282 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004283 memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004284 if (memo_id == -1 && PyErr_Occurred())
4285 goto error;
4286 memo_obj = PyTuple_GET_ITEM(value, 1);
4287 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
4288 goto error;
4289 }
4290 }
4291 else {
4292 PyErr_Format(PyExc_TypeError,
4293 "'memo' attribute must be an PicklerMemoProxy object"
4294 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004295 return -1;
4296 }
4297
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004298 PyMemoTable_Del(self->memo);
4299 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004300
4301 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004302
4303 error:
4304 if (new_memo)
4305 PyMemoTable_Del(new_memo);
4306 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004307}
4308
4309static PyObject *
4310Pickler_get_persid(PicklerObject *self)
4311{
4312 if (self->pers_func == NULL)
4313 PyErr_SetString(PyExc_AttributeError, "persistent_id");
4314 else
4315 Py_INCREF(self->pers_func);
4316 return self->pers_func;
4317}
4318
4319static int
4320Pickler_set_persid(PicklerObject *self, PyObject *value)
4321{
4322 PyObject *tmp;
4323
4324 if (value == NULL) {
4325 PyErr_SetString(PyExc_TypeError,
4326 "attribute deletion is not supported");
4327 return -1;
4328 }
4329 if (!PyCallable_Check(value)) {
4330 PyErr_SetString(PyExc_TypeError,
4331 "persistent_id must be a callable taking one argument");
4332 return -1;
4333 }
4334
4335 tmp = self->pers_func;
4336 Py_INCREF(value);
4337 self->pers_func = value;
4338 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
4339
4340 return 0;
4341}
4342
4343static PyMemberDef Pickler_members[] = {
4344 {"bin", T_INT, offsetof(PicklerObject, bin)},
4345 {"fast", T_INT, offsetof(PicklerObject, fast)},
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004346 {"dispatch_table", T_OBJECT_EX, offsetof(PicklerObject, dispatch_table)},
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004347 {NULL}
4348};
4349
4350static PyGetSetDef Pickler_getsets[] = {
4351 {"memo", (getter)Pickler_get_memo,
4352 (setter)Pickler_set_memo},
4353 {"persistent_id", (getter)Pickler_get_persid,
4354 (setter)Pickler_set_persid},
4355 {NULL}
4356};
4357
4358static PyTypeObject Pickler_Type = {
4359 PyVarObject_HEAD_INIT(NULL, 0)
4360 "_pickle.Pickler" , /*tp_name*/
4361 sizeof(PicklerObject), /*tp_basicsize*/
4362 0, /*tp_itemsize*/
4363 (destructor)Pickler_dealloc, /*tp_dealloc*/
4364 0, /*tp_print*/
4365 0, /*tp_getattr*/
4366 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00004367 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004368 0, /*tp_repr*/
4369 0, /*tp_as_number*/
4370 0, /*tp_as_sequence*/
4371 0, /*tp_as_mapping*/
4372 0, /*tp_hash*/
4373 0, /*tp_call*/
4374 0, /*tp_str*/
4375 0, /*tp_getattro*/
4376 0, /*tp_setattro*/
4377 0, /*tp_as_buffer*/
4378 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4379 Pickler_doc, /*tp_doc*/
4380 (traverseproc)Pickler_traverse, /*tp_traverse*/
4381 (inquiry)Pickler_clear, /*tp_clear*/
4382 0, /*tp_richcompare*/
4383 0, /*tp_weaklistoffset*/
4384 0, /*tp_iter*/
4385 0, /*tp_iternext*/
4386 Pickler_methods, /*tp_methods*/
4387 Pickler_members, /*tp_members*/
4388 Pickler_getsets, /*tp_getset*/
4389 0, /*tp_base*/
4390 0, /*tp_dict*/
4391 0, /*tp_descr_get*/
4392 0, /*tp_descr_set*/
4393 0, /*tp_dictoffset*/
4394 (initproc)Pickler_init, /*tp_init*/
4395 PyType_GenericAlloc, /*tp_alloc*/
4396 PyType_GenericNew, /*tp_new*/
4397 PyObject_GC_Del, /*tp_free*/
4398 0, /*tp_is_gc*/
4399};
4400
Victor Stinner121aab42011-09-29 23:40:53 +02004401/* Temporary helper for calling self.find_class().
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004402
4403 XXX: It would be nice to able to avoid Python function call overhead, by
4404 using directly the C version of find_class(), when find_class() is not
4405 overridden by a subclass. Although, this could become rather hackish. A
4406 simpler optimization would be to call the C function when self is not a
4407 subclass instance. */
4408static PyObject *
4409find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
4410{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02004411 _Py_IDENTIFIER(find_class);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02004412
4413 return _PyObject_CallMethodId((PyObject *)self, &PyId_find_class, "OO",
4414 module_name, global_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004415}
4416
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004417static Py_ssize_t
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004418marker(UnpicklerObject *self)
4419{
4420 if (self->num_marks < 1) {
4421 PyErr_SetString(UnpicklingError, "could not find MARK");
4422 return -1;
4423 }
4424
4425 return self->marks[--self->num_marks];
4426}
4427
4428static int
4429load_none(UnpicklerObject *self)
4430{
4431 PDATA_APPEND(self->stack, Py_None, -1);
4432 return 0;
4433}
4434
4435static int
4436bad_readline(void)
4437{
4438 PyErr_SetString(UnpicklingError, "pickle data was truncated");
4439 return -1;
4440}
4441
4442static int
4443load_int(UnpicklerObject *self)
4444{
4445 PyObject *value;
4446 char *endptr, *s;
4447 Py_ssize_t len;
4448 long x;
4449
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004450 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004451 return -1;
4452 if (len < 2)
4453 return bad_readline();
4454
4455 errno = 0;
Victor Stinner121aab42011-09-29 23:40:53 +02004456 /* XXX: Should the base argument of strtol() be explicitly set to 10?
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004457 XXX(avassalotti): Should this uses PyOS_strtol()? */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004458 x = strtol(s, &endptr, 0);
4459
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004460 if (errno || (*endptr != '\n' && *endptr != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004461 /* Hm, maybe we've got something long. Let's try reading
Serhiy Storchaka95949422013-08-27 19:40:23 +03004462 * it as a Python int object. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004463 errno = 0;
4464 /* XXX: Same thing about the base here. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004465 value = PyLong_FromString(s, NULL, 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004466 if (value == NULL) {
4467 PyErr_SetString(PyExc_ValueError,
4468 "could not convert string to int");
4469 return -1;
4470 }
4471 }
4472 else {
4473 if (len == 3 && (x == 0 || x == 1)) {
4474 if ((value = PyBool_FromLong(x)) == NULL)
4475 return -1;
4476 }
4477 else {
4478 if ((value = PyLong_FromLong(x)) == NULL)
4479 return -1;
4480 }
4481 }
4482
4483 PDATA_PUSH(self->stack, value, -1);
4484 return 0;
4485}
4486
4487static int
4488load_bool(UnpicklerObject *self, PyObject *boolean)
4489{
4490 assert(boolean == Py_True || boolean == Py_False);
4491 PDATA_APPEND(self->stack, boolean, -1);
4492 return 0;
4493}
4494
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004495/* s contains x bytes of an unsigned little-endian integer. Return its value
4496 * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
4497 */
4498static Py_ssize_t
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004499calc_binsize(char *bytes, int nbytes)
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004500{
4501 unsigned char *s = (unsigned char *)bytes;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004502 int i;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004503 size_t x = 0;
4504
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004505 for (i = 0; i < nbytes; i++) {
4506 x |= (size_t) s[i] << (8 * i);
4507 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004508
4509 if (x > PY_SSIZE_T_MAX)
4510 return -1;
4511 else
4512 return (Py_ssize_t) x;
4513}
4514
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004515/* s contains x bytes of a little-endian integer. Return its value as a
4516 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
4517 * int, but when x is 4 it's a signed one. This is an historical source
4518 * of x-platform bugs.
4519 */
4520static long
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004521calc_binint(char *bytes, int nbytes)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004522{
4523 unsigned char *s = (unsigned char *)bytes;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004524 int i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004525 long x = 0;
4526
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004527 for (i = 0; i < nbytes; i++) {
4528 x |= (long)s[i] << (8 * i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004529 }
4530
4531 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
4532 * is signed, so on a box with longs bigger than 4 bytes we need
4533 * to extend a BININT's sign bit to the full width.
4534 */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004535 if (SIZEOF_LONG > 4 && nbytes == 4) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004536 x |= -(x & (1L << 31));
4537 }
4538
4539 return x;
4540}
4541
4542static int
4543load_binintx(UnpicklerObject *self, char *s, int size)
4544{
4545 PyObject *value;
4546 long x;
4547
4548 x = calc_binint(s, size);
4549
4550 if ((value = PyLong_FromLong(x)) == NULL)
4551 return -1;
4552
4553 PDATA_PUSH(self->stack, value, -1);
4554 return 0;
4555}
4556
4557static int
4558load_binint(UnpicklerObject *self)
4559{
4560 char *s;
4561
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004562 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004563 return -1;
4564
4565 return load_binintx(self, s, 4);
4566}
4567
4568static int
4569load_binint1(UnpicklerObject *self)
4570{
4571 char *s;
4572
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004573 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004574 return -1;
4575
4576 return load_binintx(self, s, 1);
4577}
4578
4579static int
4580load_binint2(UnpicklerObject *self)
4581{
4582 char *s;
4583
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004584 if (_Unpickler_Read(self, &s, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004585 return -1;
4586
4587 return load_binintx(self, s, 2);
4588}
4589
4590static int
4591load_long(UnpicklerObject *self)
4592{
4593 PyObject *value;
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00004594 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004595 Py_ssize_t len;
4596
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004597 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004598 return -1;
4599 if (len < 2)
4600 return bad_readline();
4601
Mark Dickinson8dd05142009-01-20 20:43:58 +00004602 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
4603 the 'L' before calling PyLong_FromString. In order to maintain
4604 compatibility with Python 3.0.0, we don't actually *require*
4605 the 'L' to be present. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004606 if (s[len-2] == 'L')
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00004607 s[len-2] = '\0';
Alexandre Vassalottie4bccb72009-01-24 01:47:57 +00004608 /* XXX: Should the base argument explicitly set to 10? */
4609 value = PyLong_FromString(s, NULL, 0);
Mark Dickinson8dd05142009-01-20 20:43:58 +00004610 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004611 return -1;
4612
4613 PDATA_PUSH(self->stack, value, -1);
4614 return 0;
4615}
4616
4617/* 'size' bytes contain the # of bytes of little-endian 256's-complement
4618 * data following.
4619 */
4620static int
4621load_counted_long(UnpicklerObject *self, int size)
4622{
4623 PyObject *value;
4624 char *nbytes;
4625 char *pdata;
4626
4627 assert(size == 1 || size == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004628 if (_Unpickler_Read(self, &nbytes, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004629 return -1;
4630
4631 size = calc_binint(nbytes, size);
4632 if (size < 0) {
4633 /* Corrupt or hostile pickle -- we never write one like this */
4634 PyErr_SetString(UnpicklingError,
4635 "LONG pickle has negative byte count");
4636 return -1;
4637 }
4638
4639 if (size == 0)
4640 value = PyLong_FromLong(0L);
4641 else {
4642 /* Read the raw little-endian bytes and convert. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004643 if (_Unpickler_Read(self, &pdata, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004644 return -1;
4645 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
4646 1 /* little endian */ , 1 /* signed */ );
4647 }
4648 if (value == NULL)
4649 return -1;
4650 PDATA_PUSH(self->stack, value, -1);
4651 return 0;
4652}
4653
4654static int
4655load_float(UnpicklerObject *self)
4656{
4657 PyObject *value;
4658 char *endptr, *s;
4659 Py_ssize_t len;
4660 double d;
4661
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004662 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004663 return -1;
4664 if (len < 2)
4665 return bad_readline();
4666
4667 errno = 0;
Mark Dickinson725bfd82009-05-03 20:33:40 +00004668 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
4669 if (d == -1.0 && PyErr_Occurred())
4670 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004671 if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004672 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
4673 return -1;
4674 }
Mark Dickinson725bfd82009-05-03 20:33:40 +00004675 value = PyFloat_FromDouble(d);
4676 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004677 return -1;
4678
4679 PDATA_PUSH(self->stack, value, -1);
4680 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004681}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004682
4683static int
4684load_binfloat(UnpicklerObject *self)
4685{
4686 PyObject *value;
4687 double x;
4688 char *s;
4689
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004690 if (_Unpickler_Read(self, &s, 8) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004691 return -1;
4692
4693 x = _PyFloat_Unpack8((unsigned char *)s, 0);
4694 if (x == -1.0 && PyErr_Occurred())
4695 return -1;
4696
4697 if ((value = PyFloat_FromDouble(x)) == NULL)
4698 return -1;
4699
4700 PDATA_PUSH(self->stack, value, -1);
4701 return 0;
4702}
4703
4704static int
4705load_string(UnpicklerObject *self)
4706{
4707 PyObject *bytes;
4708 PyObject *str = NULL;
4709 Py_ssize_t len;
4710 char *s, *p;
4711
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004712 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004713 return -1;
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07004714 /* Strip the newline */
4715 len--;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004716 /* Strip outermost quotes */
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07004717 if (len >= 2 && s[0] == s[len - 1] && (s[0] == '\'' || s[0] == '"')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004718 p = s + 1;
4719 len -= 2;
4720 }
4721 else {
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07004722 PyErr_SetString(UnpicklingError,
4723 "the STRING opcode argument must be quoted");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004724 return -1;
4725 }
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07004726 assert(len >= 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004727
4728 /* Use the PyBytes API to decode the string, since that is what is used
4729 to encode, and then coerce the result to Unicode. */
4730 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004731 if (bytes == NULL)
4732 return -1;
4733 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
4734 Py_DECREF(bytes);
4735 if (str == NULL)
4736 return -1;
4737
4738 PDATA_PUSH(self->stack, str, -1);
4739 return 0;
4740}
4741
4742static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004743load_counted_binbytes(UnpicklerObject *self, int nbytes)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004744{
4745 PyObject *bytes;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004746 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004747 char *s;
4748
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004749 if (_Unpickler_Read(self, &s, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004750 return -1;
4751
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004752 size = calc_binsize(s, nbytes);
4753 if (size < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004754 PyErr_Format(PyExc_OverflowError,
4755 "BINBYTES exceeds system's maximum size of %zd bytes",
Alexandre Vassalotticc757172013-04-14 02:25:10 -07004756 PY_SSIZE_T_MAX);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004757 return -1;
4758 }
4759
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004760 if (_Unpickler_Read(self, &s, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004761 return -1;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004762
4763 bytes = PyBytes_FromStringAndSize(s, size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004764 if (bytes == NULL)
4765 return -1;
4766
4767 PDATA_PUSH(self->stack, bytes, -1);
4768 return 0;
4769}
4770
4771static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004772load_counted_binstring(UnpicklerObject *self, int nbytes)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004773{
4774 PyObject *str;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004775 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004776 char *s;
4777
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004778 if (_Unpickler_Read(self, &s, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004779 return -1;
4780
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004781 size = calc_binsize(s, nbytes);
4782 if (size < 0) {
4783 PyErr_Format(UnpicklingError,
4784 "BINSTRING exceeds system's maximum size of %zd bytes",
4785 PY_SSIZE_T_MAX);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004786 return -1;
4787 }
4788
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004789 if (_Unpickler_Read(self, &s, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004790 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004791 /* Convert Python 2.x strings to unicode. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004792 str = PyUnicode_Decode(s, size, self->encoding, self->errors);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004793 if (str == NULL)
4794 return -1;
4795
4796 PDATA_PUSH(self->stack, str, -1);
4797 return 0;
4798}
4799
4800static int
4801load_unicode(UnpicklerObject *self)
4802{
4803 PyObject *str;
4804 Py_ssize_t len;
4805 char *s;
4806
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004807 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004808 return -1;
4809 if (len < 1)
4810 return bad_readline();
4811
4812 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
4813 if (str == NULL)
4814 return -1;
4815
4816 PDATA_PUSH(self->stack, str, -1);
4817 return 0;
4818}
4819
4820static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004821load_counted_binunicode(UnpicklerObject *self, int nbytes)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004822{
4823 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004824 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004825 char *s;
4826
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004827 if (_Unpickler_Read(self, &s, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004828 return -1;
4829
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004830 size = calc_binsize(s, nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004831 if (size < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004832 PyErr_Format(PyExc_OverflowError,
4833 "BINUNICODE exceeds system's maximum size of %zd bytes",
Alexandre Vassalotticc757172013-04-14 02:25:10 -07004834 PY_SSIZE_T_MAX);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004835 return -1;
4836 }
4837
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004838 if (_Unpickler_Read(self, &s, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004839 return -1;
4840
Victor Stinner485fb562010-04-13 11:07:24 +00004841 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004842 if (str == NULL)
4843 return -1;
4844
4845 PDATA_PUSH(self->stack, str, -1);
4846 return 0;
4847}
4848
4849static int
4850load_tuple(UnpicklerObject *self)
4851{
4852 PyObject *tuple;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004853 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004854
4855 if ((i = marker(self)) < 0)
4856 return -1;
4857
4858 tuple = Pdata_poptuple(self->stack, i);
4859 if (tuple == NULL)
4860 return -1;
4861 PDATA_PUSH(self->stack, tuple, -1);
4862 return 0;
4863}
4864
4865static int
4866load_counted_tuple(UnpicklerObject *self, int len)
4867{
4868 PyObject *tuple;
4869
4870 tuple = PyTuple_New(len);
4871 if (tuple == NULL)
4872 return -1;
4873
4874 while (--len >= 0) {
4875 PyObject *item;
4876
4877 PDATA_POP(self->stack, item);
4878 if (item == NULL)
4879 return -1;
4880 PyTuple_SET_ITEM(tuple, len, item);
4881 }
4882 PDATA_PUSH(self->stack, tuple, -1);
4883 return 0;
4884}
4885
4886static int
4887load_empty_list(UnpicklerObject *self)
4888{
4889 PyObject *list;
4890
4891 if ((list = PyList_New(0)) == NULL)
4892 return -1;
4893 PDATA_PUSH(self->stack, list, -1);
4894 return 0;
4895}
4896
4897static int
4898load_empty_dict(UnpicklerObject *self)
4899{
4900 PyObject *dict;
4901
4902 if ((dict = PyDict_New()) == NULL)
4903 return -1;
4904 PDATA_PUSH(self->stack, dict, -1);
4905 return 0;
4906}
4907
4908static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004909load_empty_set(UnpicklerObject *self)
4910{
4911 PyObject *set;
4912
4913 if ((set = PySet_New(NULL)) == NULL)
4914 return -1;
4915 PDATA_PUSH(self->stack, set, -1);
4916 return 0;
4917}
4918
4919static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004920load_list(UnpicklerObject *self)
4921{
4922 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004923 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004924
4925 if ((i = marker(self)) < 0)
4926 return -1;
4927
4928 list = Pdata_poplist(self->stack, i);
4929 if (list == NULL)
4930 return -1;
4931 PDATA_PUSH(self->stack, list, -1);
4932 return 0;
4933}
4934
4935static int
4936load_dict(UnpicklerObject *self)
4937{
4938 PyObject *dict, *key, *value;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004939 Py_ssize_t i, j, k;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004940
4941 if ((i = marker(self)) < 0)
4942 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004943 j = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004944
4945 if ((dict = PyDict_New()) == NULL)
4946 return -1;
4947
4948 for (k = i + 1; k < j; k += 2) {
4949 key = self->stack->data[k - 1];
4950 value = self->stack->data[k];
4951 if (PyDict_SetItem(dict, key, value) < 0) {
4952 Py_DECREF(dict);
4953 return -1;
4954 }
4955 }
4956 Pdata_clear(self->stack, i);
4957 PDATA_PUSH(self->stack, dict, -1);
4958 return 0;
4959}
4960
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004961static int
4962load_frozenset(UnpicklerObject *self)
4963{
4964 PyObject *items;
4965 PyObject *frozenset;
4966 Py_ssize_t i;
4967
4968 if ((i = marker(self)) < 0)
4969 return -1;
4970
4971 items = Pdata_poptuple(self->stack, i);
4972 if (items == NULL)
4973 return -1;
4974
4975 frozenset = PyFrozenSet_New(items);
4976 Py_DECREF(items);
4977 if (frozenset == NULL)
4978 return -1;
4979
4980 PDATA_PUSH(self->stack, frozenset, -1);
4981 return 0;
4982}
4983
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004984static PyObject *
4985instantiate(PyObject *cls, PyObject *args)
4986{
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004987 PyObject *result = NULL;
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02004988 _Py_IDENTIFIER(__getinitargs__);
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004989 /* Caller must assure args are a tuple. Normally, args come from
4990 Pdata_poptuple which packs objects from the top of the stack
4991 into a newly created tuple. */
4992 assert(PyTuple_Check(args));
4993 if (Py_SIZE(args) > 0 || !PyType_Check(cls) ||
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02004994 _PyObject_HasAttrId(cls, &PyId___getinitargs__)) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004995 result = PyObject_CallObject(cls, args);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004996 }
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004997 else {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02004998 _Py_IDENTIFIER(__new__);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02004999
5000 result = _PyObject_CallMethodId(cls, &PyId___new__, "O", cls);
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005001 }
5002 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005003}
5004
5005static int
5006load_obj(UnpicklerObject *self)
5007{
5008 PyObject *cls, *args, *obj = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005009 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005010
5011 if ((i = marker(self)) < 0)
5012 return -1;
5013
5014 args = Pdata_poptuple(self->stack, i + 1);
5015 if (args == NULL)
5016 return -1;
5017
5018 PDATA_POP(self->stack, cls);
5019 if (cls) {
5020 obj = instantiate(cls, args);
5021 Py_DECREF(cls);
5022 }
5023 Py_DECREF(args);
5024 if (obj == NULL)
5025 return -1;
5026
5027 PDATA_PUSH(self->stack, obj, -1);
5028 return 0;
5029}
5030
5031static int
5032load_inst(UnpicklerObject *self)
5033{
5034 PyObject *cls = NULL;
5035 PyObject *args = NULL;
5036 PyObject *obj = NULL;
5037 PyObject *module_name;
5038 PyObject *class_name;
5039 Py_ssize_t len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005040 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005041 char *s;
5042
5043 if ((i = marker(self)) < 0)
5044 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005045 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005046 return -1;
5047 if (len < 2)
5048 return bad_readline();
5049
5050 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
5051 identifiers are permitted in Python 3.0, since the INST opcode is only
5052 supported by older protocols on Python 2.x. */
5053 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5054 if (module_name == NULL)
5055 return -1;
5056
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005057 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005058 if (len < 2)
5059 return bad_readline();
5060 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005061 if (class_name != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005062 cls = find_class(self, module_name, class_name);
5063 Py_DECREF(class_name);
5064 }
5065 }
5066 Py_DECREF(module_name);
5067
5068 if (cls == NULL)
5069 return -1;
5070
5071 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
5072 obj = instantiate(cls, args);
5073 Py_DECREF(args);
5074 }
5075 Py_DECREF(cls);
5076
5077 if (obj == NULL)
5078 return -1;
5079
5080 PDATA_PUSH(self->stack, obj, -1);
5081 return 0;
5082}
5083
5084static int
5085load_newobj(UnpicklerObject *self)
5086{
5087 PyObject *args = NULL;
5088 PyObject *clsraw = NULL;
5089 PyTypeObject *cls; /* clsraw cast to its true type */
5090 PyObject *obj;
5091
5092 /* Stack is ... cls argtuple, and we want to call
5093 * cls.__new__(cls, *argtuple).
5094 */
5095 PDATA_POP(self->stack, args);
5096 if (args == NULL)
5097 goto error;
5098 if (!PyTuple_Check(args)) {
5099 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
5100 goto error;
5101 }
5102
5103 PDATA_POP(self->stack, clsraw);
5104 cls = (PyTypeObject *)clsraw;
5105 if (cls == NULL)
5106 goto error;
5107 if (!PyType_Check(cls)) {
5108 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
5109 "isn't a type object");
5110 goto error;
5111 }
5112 if (cls->tp_new == NULL) {
5113 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
5114 "has NULL tp_new");
5115 goto error;
5116 }
5117
5118 /* Call __new__. */
5119 obj = cls->tp_new(cls, args, NULL);
5120 if (obj == NULL)
5121 goto error;
5122
5123 Py_DECREF(args);
5124 Py_DECREF(clsraw);
5125 PDATA_PUSH(self->stack, obj, -1);
5126 return 0;
5127
5128 error:
5129 Py_XDECREF(args);
5130 Py_XDECREF(clsraw);
5131 return -1;
5132}
5133
5134static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005135load_newobj_ex(UnpicklerObject *self)
5136{
5137 PyObject *cls, *args, *kwargs;
5138 PyObject *obj;
5139
5140 PDATA_POP(self->stack, kwargs);
5141 if (kwargs == NULL) {
5142 return -1;
5143 }
5144 PDATA_POP(self->stack, args);
5145 if (args == NULL) {
5146 Py_DECREF(kwargs);
5147 return -1;
5148 }
5149 PDATA_POP(self->stack, cls);
5150 if (cls == NULL) {
5151 Py_DECREF(kwargs);
5152 Py_DECREF(args);
5153 return -1;
5154 }
5155
5156 if (!PyType_Check(cls)) {
5157 Py_DECREF(kwargs);
5158 Py_DECREF(args);
5159 Py_DECREF(cls);
5160 PyErr_Format(UnpicklingError,
5161 "NEWOBJ_EX class argument must be a type, not %.200s",
5162 Py_TYPE(cls)->tp_name);
5163 return -1;
5164 }
5165
5166 if (((PyTypeObject *)cls)->tp_new == NULL) {
5167 Py_DECREF(kwargs);
5168 Py_DECREF(args);
5169 Py_DECREF(cls);
5170 PyErr_SetString(UnpicklingError,
5171 "NEWOBJ_EX class argument doesn't have __new__");
5172 return -1;
5173 }
5174 obj = ((PyTypeObject *)cls)->tp_new((PyTypeObject *)cls, args, kwargs);
5175 Py_DECREF(kwargs);
5176 Py_DECREF(args);
5177 Py_DECREF(cls);
5178 if (obj == NULL) {
5179 return -1;
5180 }
5181 PDATA_PUSH(self->stack, obj, -1);
5182 return 0;
5183}
5184
5185static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005186load_global(UnpicklerObject *self)
5187{
5188 PyObject *global = NULL;
5189 PyObject *module_name;
5190 PyObject *global_name;
5191 Py_ssize_t len;
5192 char *s;
5193
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005194 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005195 return -1;
5196 if (len < 2)
5197 return bad_readline();
5198 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
5199 if (!module_name)
5200 return -1;
5201
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005202 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005203 if (len < 2) {
5204 Py_DECREF(module_name);
5205 return bad_readline();
5206 }
5207 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
5208 if (global_name) {
5209 global = find_class(self, module_name, global_name);
5210 Py_DECREF(global_name);
5211 }
5212 }
5213 Py_DECREF(module_name);
5214
5215 if (global == NULL)
5216 return -1;
5217 PDATA_PUSH(self->stack, global, -1);
5218 return 0;
5219}
5220
5221static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005222load_stack_global(UnpicklerObject *self)
5223{
5224 PyObject *global;
5225 PyObject *module_name;
5226 PyObject *global_name;
5227
5228 PDATA_POP(self->stack, global_name);
5229 PDATA_POP(self->stack, module_name);
5230 if (module_name == NULL || !PyUnicode_CheckExact(module_name) ||
5231 global_name == NULL || !PyUnicode_CheckExact(global_name)) {
5232 PyErr_SetString(UnpicklingError, "STACK_GLOBAL requires str");
5233 Py_XDECREF(global_name);
5234 Py_XDECREF(module_name);
5235 return -1;
5236 }
5237 global = find_class(self, module_name, global_name);
5238 Py_DECREF(global_name);
5239 Py_DECREF(module_name);
5240 if (global == NULL)
5241 return -1;
5242 PDATA_PUSH(self->stack, global, -1);
5243 return 0;
5244}
5245
5246static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005247load_persid(UnpicklerObject *self)
5248{
5249 PyObject *pid;
5250 Py_ssize_t len;
5251 char *s;
5252
5253 if (self->pers_func) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005254 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005255 return -1;
5256 if (len < 2)
5257 return bad_readline();
5258
5259 pid = PyBytes_FromStringAndSize(s, len - 1);
5260 if (pid == NULL)
5261 return -1;
5262
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005263 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005264 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005265 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005266 if (pid == NULL)
5267 return -1;
5268
5269 PDATA_PUSH(self->stack, pid, -1);
5270 return 0;
5271 }
5272 else {
5273 PyErr_SetString(UnpicklingError,
5274 "A load persistent id instruction was encountered,\n"
5275 "but no persistent_load function was specified.");
5276 return -1;
5277 }
5278}
5279
5280static int
5281load_binpersid(UnpicklerObject *self)
5282{
5283 PyObject *pid;
5284
5285 if (self->pers_func) {
5286 PDATA_POP(self->stack, pid);
5287 if (pid == NULL)
5288 return -1;
5289
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005290 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005291 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005292 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005293 if (pid == NULL)
5294 return -1;
5295
5296 PDATA_PUSH(self->stack, pid, -1);
5297 return 0;
5298 }
5299 else {
5300 PyErr_SetString(UnpicklingError,
5301 "A load persistent id instruction was encountered,\n"
5302 "but no persistent_load function was specified.");
5303 return -1;
5304 }
5305}
5306
5307static int
5308load_pop(UnpicklerObject *self)
5309{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005310 Py_ssize_t len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005311
5312 /* Note that we split the (pickle.py) stack into two stacks,
5313 * an object stack and a mark stack. We have to be clever and
5314 * pop the right one. We do this by looking at the top of the
Collin Winter8ca69de2009-05-26 16:53:41 +00005315 * mark stack first, and only signalling a stack underflow if
5316 * the object stack is empty and the mark stack doesn't match
5317 * our expectations.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005318 */
Collin Winter8ca69de2009-05-26 16:53:41 +00005319 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005320 self->num_marks--;
Antoine Pitrou01a15ea2010-01-07 17:57:31 +00005321 } else if (len > 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005322 len--;
5323 Py_DECREF(self->stack->data[len]);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005324 Py_SIZE(self->stack) = len;
Collin Winter8ca69de2009-05-26 16:53:41 +00005325 } else {
5326 return stack_underflow();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005327 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005328 return 0;
5329}
5330
5331static int
5332load_pop_mark(UnpicklerObject *self)
5333{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005334 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005335
5336 if ((i = marker(self)) < 0)
5337 return -1;
5338
5339 Pdata_clear(self->stack, i);
5340
5341 return 0;
5342}
5343
5344static int
5345load_dup(UnpicklerObject *self)
5346{
5347 PyObject *last;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005348 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005349
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005350 if ((len = Py_SIZE(self->stack)) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005351 return stack_underflow();
5352 last = self->stack->data[len - 1];
5353 PDATA_APPEND(self->stack, last, -1);
5354 return 0;
5355}
5356
5357static int
5358load_get(UnpicklerObject *self)
5359{
5360 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005361 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005362 Py_ssize_t len;
5363 char *s;
5364
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005365 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005366 return -1;
5367 if (len < 2)
5368 return bad_readline();
5369
5370 key = PyLong_FromString(s, NULL, 10);
5371 if (key == NULL)
5372 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005373 idx = PyLong_AsSsize_t(key);
5374 if (idx == -1 && PyErr_Occurred()) {
5375 Py_DECREF(key);
5376 return -1;
5377 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005378
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005379 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005380 if (value == NULL) {
5381 if (!PyErr_Occurred())
5382 PyErr_SetObject(PyExc_KeyError, key);
5383 Py_DECREF(key);
5384 return -1;
5385 }
5386 Py_DECREF(key);
5387
5388 PDATA_APPEND(self->stack, value, -1);
5389 return 0;
5390}
5391
5392static int
5393load_binget(UnpicklerObject *self)
5394{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005395 PyObject *value;
5396 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005397 char *s;
5398
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005399 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005400 return -1;
5401
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005402 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005403
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005404 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005405 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005406 PyObject *key = PyLong_FromSsize_t(idx);
Christian Heimes9ee5c372013-07-26 22:45:00 +02005407 if (key != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005408 PyErr_SetObject(PyExc_KeyError, key);
Christian Heimes9ee5c372013-07-26 22:45:00 +02005409 Py_DECREF(key);
5410 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005411 return -1;
5412 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005413
5414 PDATA_APPEND(self->stack, value, -1);
5415 return 0;
5416}
5417
5418static int
5419load_long_binget(UnpicklerObject *self)
5420{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005421 PyObject *value;
5422 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005423 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005424
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005425 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005426 return -1;
5427
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005428 idx = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005429
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005430 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005431 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005432 PyObject *key = PyLong_FromSsize_t(idx);
Christian Heimes9ee5c372013-07-26 22:45:00 +02005433 if (key != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005434 PyErr_SetObject(PyExc_KeyError, key);
Christian Heimes9ee5c372013-07-26 22:45:00 +02005435 Py_DECREF(key);
5436 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005437 return -1;
5438 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005439
5440 PDATA_APPEND(self->stack, value, -1);
5441 return 0;
5442}
5443
5444/* Push an object from the extension registry (EXT[124]). nbytes is
5445 * the number of bytes following the opcode, holding the index (code) value.
5446 */
5447static int
5448load_extension(UnpicklerObject *self, int nbytes)
5449{
5450 char *codebytes; /* the nbytes bytes after the opcode */
5451 long code; /* calc_binint returns long */
5452 PyObject *py_code; /* code as a Python int */
5453 PyObject *obj; /* the object to push */
5454 PyObject *pair; /* (module_name, class_name) */
5455 PyObject *module_name, *class_name;
5456
5457 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005458 if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005459 return -1;
5460 code = calc_binint(codebytes, nbytes);
5461 if (code <= 0) { /* note that 0 is forbidden */
5462 /* Corrupt or hostile pickle. */
5463 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
5464 return -1;
5465 }
5466
5467 /* Look for the code in the cache. */
5468 py_code = PyLong_FromLong(code);
5469 if (py_code == NULL)
5470 return -1;
5471 obj = PyDict_GetItem(extension_cache, py_code);
5472 if (obj != NULL) {
5473 /* Bingo. */
5474 Py_DECREF(py_code);
5475 PDATA_APPEND(self->stack, obj, -1);
5476 return 0;
5477 }
5478
5479 /* Look up the (module_name, class_name) pair. */
5480 pair = PyDict_GetItem(inverted_registry, py_code);
5481 if (pair == NULL) {
5482 Py_DECREF(py_code);
5483 PyErr_Format(PyExc_ValueError, "unregistered extension "
5484 "code %ld", code);
5485 return -1;
5486 }
5487 /* Since the extension registry is manipulable via Python code,
5488 * confirm that pair is really a 2-tuple of strings.
5489 */
5490 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
5491 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
5492 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
5493 Py_DECREF(py_code);
5494 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
5495 "isn't a 2-tuple of strings", code);
5496 return -1;
5497 }
5498 /* Load the object. */
5499 obj = find_class(self, module_name, class_name);
5500 if (obj == NULL) {
5501 Py_DECREF(py_code);
5502 return -1;
5503 }
5504 /* Cache code -> obj. */
5505 code = PyDict_SetItem(extension_cache, py_code, obj);
5506 Py_DECREF(py_code);
5507 if (code < 0) {
5508 Py_DECREF(obj);
5509 return -1;
5510 }
5511 PDATA_PUSH(self->stack, obj, -1);
5512 return 0;
5513}
5514
5515static int
5516load_put(UnpicklerObject *self)
5517{
5518 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005519 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005520 Py_ssize_t len;
5521 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005522
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005523 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005524 return -1;
5525 if (len < 2)
5526 return bad_readline();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005527 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005528 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005529 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005530
5531 key = PyLong_FromString(s, NULL, 10);
5532 if (key == NULL)
5533 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005534 idx = PyLong_AsSsize_t(key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005535 Py_DECREF(key);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02005536 if (idx < 0) {
5537 if (!PyErr_Occurred())
5538 PyErr_SetString(PyExc_ValueError,
5539 "negative PUT argument");
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005540 return -1;
Antoine Pitrou55549ec2011-08-30 00:27:10 +02005541 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005542
5543 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005544}
5545
5546static int
5547load_binput(UnpicklerObject *self)
5548{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005549 PyObject *value;
5550 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005551 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005552
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005553 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005554 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005555
5556 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005557 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005558 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005559
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005560 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005561
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005562 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005563}
5564
5565static int
5566load_long_binput(UnpicklerObject *self)
5567{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005568 PyObject *value;
5569 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005570 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005571
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005572 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005573 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005574
5575 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005576 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005577 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005578
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005579 idx = calc_binsize(s, 4);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02005580 if (idx < 0) {
5581 PyErr_SetString(PyExc_ValueError,
5582 "negative LONG_BINPUT argument");
5583 return -1;
5584 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005585
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005586 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005587}
5588
5589static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005590load_memoize(UnpicklerObject *self)
5591{
5592 PyObject *value;
5593
5594 if (Py_SIZE(self->stack) <= 0)
5595 return stack_underflow();
5596 value = self->stack->data[Py_SIZE(self->stack) - 1];
5597
5598 return _Unpickler_MemoPut(self, self->memo_len, value);
5599}
5600
5601static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005602do_append(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005603{
5604 PyObject *value;
5605 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005606 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005607
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005608 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005609 if (x > len || x <= 0)
5610 return stack_underflow();
5611 if (len == x) /* nothing to do */
5612 return 0;
5613
5614 list = self->stack->data[x - 1];
5615
5616 if (PyList_Check(list)) {
5617 PyObject *slice;
5618 Py_ssize_t list_len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005619 int ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005620
5621 slice = Pdata_poplist(self->stack, x);
5622 if (!slice)
5623 return -1;
5624 list_len = PyList_GET_SIZE(list);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005625 ret = PyList_SetSlice(list, list_len, list_len, slice);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005626 Py_DECREF(slice);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005627 return ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005628 }
5629 else {
5630 PyObject *append_func;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005631 _Py_IDENTIFIER(append);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005632
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005633 append_func = _PyObject_GetAttrId(list, &PyId_append);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005634 if (append_func == NULL)
5635 return -1;
5636 for (i = x; i < len; i++) {
5637 PyObject *result;
5638
5639 value = self->stack->data[i];
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005640 result = _Unpickler_FastCall(self, append_func, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005641 if (result == NULL) {
5642 Pdata_clear(self->stack, i + 1);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005643 Py_SIZE(self->stack) = x;
Alexandre Vassalotti637c7c42013-04-20 21:28:21 -07005644 Py_DECREF(append_func);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005645 return -1;
5646 }
5647 Py_DECREF(result);
5648 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005649 Py_SIZE(self->stack) = x;
Alexandre Vassalotti637c7c42013-04-20 21:28:21 -07005650 Py_DECREF(append_func);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005651 }
5652
5653 return 0;
5654}
5655
5656static int
5657load_append(UnpicklerObject *self)
5658{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005659 return do_append(self, Py_SIZE(self->stack) - 1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005660}
5661
5662static int
5663load_appends(UnpicklerObject *self)
5664{
5665 return do_append(self, marker(self));
5666}
5667
5668static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005669do_setitems(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005670{
5671 PyObject *value, *key;
5672 PyObject *dict;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005673 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005674 int status = 0;
5675
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005676 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005677 if (x > len || x <= 0)
5678 return stack_underflow();
5679 if (len == x) /* nothing to do */
5680 return 0;
Victor Stinner121aab42011-09-29 23:40:53 +02005681 if ((len - x) % 2 != 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005682 /* Currupt or hostile pickle -- we never write one like this. */
5683 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
5684 return -1;
5685 }
5686
5687 /* Here, dict does not actually need to be a PyDict; it could be anything
5688 that supports the __setitem__ attribute. */
5689 dict = self->stack->data[x - 1];
5690
5691 for (i = x + 1; i < len; i += 2) {
5692 key = self->stack->data[i - 1];
5693 value = self->stack->data[i];
5694 if (PyObject_SetItem(dict, key, value) < 0) {
5695 status = -1;
5696 break;
5697 }
5698 }
5699
5700 Pdata_clear(self->stack, x);
5701 return status;
5702}
5703
5704static int
5705load_setitem(UnpicklerObject *self)
5706{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005707 return do_setitems(self, Py_SIZE(self->stack) - 2);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005708}
5709
5710static int
5711load_setitems(UnpicklerObject *self)
5712{
5713 return do_setitems(self, marker(self));
5714}
5715
5716static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005717load_additems(UnpicklerObject *self)
5718{
5719 PyObject *set;
5720 Py_ssize_t mark, len, i;
5721
5722 mark = marker(self);
5723 len = Py_SIZE(self->stack);
5724 if (mark > len || mark <= 0)
5725 return stack_underflow();
5726 if (len == mark) /* nothing to do */
5727 return 0;
5728
5729 set = self->stack->data[mark - 1];
5730
5731 if (PySet_Check(set)) {
5732 PyObject *items;
5733 int status;
5734
5735 items = Pdata_poptuple(self->stack, mark);
5736 if (items == NULL)
5737 return -1;
5738
5739 status = _PySet_Update(set, items);
5740 Py_DECREF(items);
5741 return status;
5742 }
5743 else {
5744 PyObject *add_func;
5745 _Py_IDENTIFIER(add);
5746
5747 add_func = _PyObject_GetAttrId(set, &PyId_add);
5748 if (add_func == NULL)
5749 return -1;
5750 for (i = mark; i < len; i++) {
5751 PyObject *result;
5752 PyObject *item;
5753
5754 item = self->stack->data[i];
5755 result = _Unpickler_FastCall(self, add_func, item);
5756 if (result == NULL) {
5757 Pdata_clear(self->stack, i + 1);
5758 Py_SIZE(self->stack) = mark;
5759 return -1;
5760 }
5761 Py_DECREF(result);
5762 }
5763 Py_SIZE(self->stack) = mark;
5764 }
5765
5766 return 0;
5767}
5768
5769static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005770load_build(UnpicklerObject *self)
5771{
5772 PyObject *state, *inst, *slotstate;
5773 PyObject *setstate;
5774 int status = 0;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005775 _Py_IDENTIFIER(__setstate__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005776
5777 /* Stack is ... instance, state. We want to leave instance at
5778 * the stack top, possibly mutated via instance.__setstate__(state).
5779 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005780 if (Py_SIZE(self->stack) < 2)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005781 return stack_underflow();
5782
5783 PDATA_POP(self->stack, state);
5784 if (state == NULL)
5785 return -1;
5786
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005787 inst = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005788
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005789 setstate = _PyObject_GetAttrId(inst, &PyId___setstate__);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005790 if (setstate == NULL) {
5791 if (PyErr_ExceptionMatches(PyExc_AttributeError))
5792 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00005793 else {
5794 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005795 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00005796 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005797 }
5798 else {
5799 PyObject *result;
5800
5801 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005802 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Antoine Pitroud79dc622008-09-05 00:03:33 +00005803 reference to state first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005804 result = _Unpickler_FastCall(self, setstate, state);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005805 Py_DECREF(setstate);
5806 if (result == NULL)
5807 return -1;
5808 Py_DECREF(result);
5809 return 0;
5810 }
5811
5812 /* A default __setstate__. First see whether state embeds a
5813 * slot state dict too (a proto 2 addition).
5814 */
5815 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
5816 PyObject *tmp = state;
5817
5818 state = PyTuple_GET_ITEM(tmp, 0);
5819 slotstate = PyTuple_GET_ITEM(tmp, 1);
5820 Py_INCREF(state);
5821 Py_INCREF(slotstate);
5822 Py_DECREF(tmp);
5823 }
5824 else
5825 slotstate = NULL;
5826
5827 /* Set inst.__dict__ from the state dict (if any). */
5828 if (state != Py_None) {
5829 PyObject *dict;
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005830 PyObject *d_key, *d_value;
5831 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005832 _Py_IDENTIFIER(__dict__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005833
5834 if (!PyDict_Check(state)) {
5835 PyErr_SetString(UnpicklingError, "state is not a dictionary");
5836 goto error;
5837 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005838 dict = _PyObject_GetAttrId(inst, &PyId___dict__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005839 if (dict == NULL)
5840 goto error;
5841
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005842 i = 0;
5843 while (PyDict_Next(state, &i, &d_key, &d_value)) {
5844 /* normally the keys for instance attributes are
5845 interned. we should try to do that here. */
5846 Py_INCREF(d_key);
5847 if (PyUnicode_CheckExact(d_key))
5848 PyUnicode_InternInPlace(&d_key);
5849 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
5850 Py_DECREF(d_key);
5851 goto error;
5852 }
5853 Py_DECREF(d_key);
5854 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005855 Py_DECREF(dict);
5856 }
5857
5858 /* Also set instance attributes from the slotstate dict (if any). */
5859 if (slotstate != NULL) {
5860 PyObject *d_key, *d_value;
5861 Py_ssize_t i;
5862
5863 if (!PyDict_Check(slotstate)) {
5864 PyErr_SetString(UnpicklingError,
5865 "slot state is not a dictionary");
5866 goto error;
5867 }
5868 i = 0;
5869 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
5870 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
5871 goto error;
5872 }
5873 }
5874
5875 if (0) {
5876 error:
5877 status = -1;
5878 }
5879
5880 Py_DECREF(state);
5881 Py_XDECREF(slotstate);
5882 return status;
5883}
5884
5885static int
5886load_mark(UnpicklerObject *self)
5887{
5888
5889 /* Note that we split the (pickle.py) stack into two stacks, an
5890 * object stack and a mark stack. Here we push a mark onto the
5891 * mark stack.
5892 */
5893
5894 if ((self->num_marks + 1) >= self->marks_size) {
5895 size_t alloc;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005896 Py_ssize_t *marks;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005897
5898 /* Use the size_t type to check for overflow. */
5899 alloc = ((size_t)self->num_marks << 1) + 20;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005900 if (alloc > (PY_SSIZE_T_MAX / sizeof(Py_ssize_t)) ||
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00005901 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005902 PyErr_NoMemory();
5903 return -1;
5904 }
5905
5906 if (self->marks == NULL)
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005907 marks = (Py_ssize_t *) PyMem_Malloc(alloc * sizeof(Py_ssize_t));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005908 else
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005909 marks = (Py_ssize_t *) PyMem_Realloc(self->marks,
5910 alloc * sizeof(Py_ssize_t));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005911 if (marks == NULL) {
5912 PyErr_NoMemory();
5913 return -1;
5914 }
5915 self->marks = marks;
5916 self->marks_size = (Py_ssize_t)alloc;
5917 }
5918
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005919 self->marks[self->num_marks++] = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005920
5921 return 0;
5922}
5923
5924static int
5925load_reduce(UnpicklerObject *self)
5926{
5927 PyObject *callable = NULL;
5928 PyObject *argtup = NULL;
5929 PyObject *obj = NULL;
5930
5931 PDATA_POP(self->stack, argtup);
5932 if (argtup == NULL)
5933 return -1;
5934 PDATA_POP(self->stack, callable);
5935 if (callable) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005936 obj = PyObject_CallObject(callable, argtup);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005937 Py_DECREF(callable);
5938 }
5939 Py_DECREF(argtup);
5940
5941 if (obj == NULL)
5942 return -1;
5943
5944 PDATA_PUSH(self->stack, obj, -1);
5945 return 0;
5946}
5947
5948/* Just raises an error if we don't know the protocol specified. PROTO
5949 * is the first opcode for protocols >= 2.
5950 */
5951static int
5952load_proto(UnpicklerObject *self)
5953{
5954 char *s;
5955 int i;
5956
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005957 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005958 return -1;
5959
5960 i = (unsigned char)s[0];
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005961 if (i <= HIGHEST_PROTOCOL) {
5962 self->proto = i;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005963 self->framing = (self->proto >= 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005964 return 0;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005965 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005966
5967 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
5968 return -1;
5969}
5970
5971static PyObject *
5972load(UnpicklerObject *self)
5973{
5974 PyObject *err;
5975 PyObject *value = NULL;
5976 char *s;
5977
5978 self->num_marks = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005979 self->proto = 0;
5980 self->framing = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005981 if (Py_SIZE(self->stack))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005982 Pdata_clear(self->stack, 0);
5983
5984 /* Convenient macros for the dispatch while-switch loop just below. */
5985#define OP(opcode, load_func) \
5986 case opcode: if (load_func(self) < 0) break; continue;
5987
5988#define OP_ARG(opcode, load_func, arg) \
5989 case opcode: if (load_func(self, (arg)) < 0) break; continue;
5990
5991 while (1) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005992 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005993 break;
5994
5995 switch ((enum opcode)s[0]) {
5996 OP(NONE, load_none)
5997 OP(BININT, load_binint)
5998 OP(BININT1, load_binint1)
5999 OP(BININT2, load_binint2)
6000 OP(INT, load_int)
6001 OP(LONG, load_long)
6002 OP_ARG(LONG1, load_counted_long, 1)
6003 OP_ARG(LONG4, load_counted_long, 4)
6004 OP(FLOAT, load_float)
6005 OP(BINFLOAT, load_binfloat)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006006 OP_ARG(SHORT_BINBYTES, load_counted_binbytes, 1)
6007 OP_ARG(BINBYTES, load_counted_binbytes, 4)
6008 OP_ARG(BINBYTES8, load_counted_binbytes, 8)
6009 OP_ARG(SHORT_BINSTRING, load_counted_binstring, 1)
6010 OP_ARG(BINSTRING, load_counted_binstring, 4)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006011 OP(STRING, load_string)
6012 OP(UNICODE, load_unicode)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006013 OP_ARG(SHORT_BINUNICODE, load_counted_binunicode, 1)
6014 OP_ARG(BINUNICODE, load_counted_binunicode, 4)
6015 OP_ARG(BINUNICODE8, load_counted_binunicode, 8)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006016 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
6017 OP_ARG(TUPLE1, load_counted_tuple, 1)
6018 OP_ARG(TUPLE2, load_counted_tuple, 2)
6019 OP_ARG(TUPLE3, load_counted_tuple, 3)
6020 OP(TUPLE, load_tuple)
6021 OP(EMPTY_LIST, load_empty_list)
6022 OP(LIST, load_list)
6023 OP(EMPTY_DICT, load_empty_dict)
6024 OP(DICT, load_dict)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006025 OP(EMPTY_SET, load_empty_set)
6026 OP(ADDITEMS, load_additems)
6027 OP(FROZENSET, load_frozenset)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006028 OP(OBJ, load_obj)
6029 OP(INST, load_inst)
6030 OP(NEWOBJ, load_newobj)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006031 OP(NEWOBJ_EX, load_newobj_ex)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006032 OP(GLOBAL, load_global)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006033 OP(STACK_GLOBAL, load_stack_global)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006034 OP(APPEND, load_append)
6035 OP(APPENDS, load_appends)
6036 OP(BUILD, load_build)
6037 OP(DUP, load_dup)
6038 OP(BINGET, load_binget)
6039 OP(LONG_BINGET, load_long_binget)
6040 OP(GET, load_get)
6041 OP(MARK, load_mark)
6042 OP(BINPUT, load_binput)
6043 OP(LONG_BINPUT, load_long_binput)
6044 OP(PUT, load_put)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006045 OP(MEMOIZE, load_memoize)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006046 OP(POP, load_pop)
6047 OP(POP_MARK, load_pop_mark)
6048 OP(SETITEM, load_setitem)
6049 OP(SETITEMS, load_setitems)
6050 OP(PERSID, load_persid)
6051 OP(BINPERSID, load_binpersid)
6052 OP(REDUCE, load_reduce)
6053 OP(PROTO, load_proto)
6054 OP_ARG(EXT1, load_extension, 1)
6055 OP_ARG(EXT2, load_extension, 2)
6056 OP_ARG(EXT4, load_extension, 4)
6057 OP_ARG(NEWTRUE, load_bool, Py_True)
6058 OP_ARG(NEWFALSE, load_bool, Py_False)
6059
6060 case STOP:
6061 break;
6062
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006063 default:
Benjamin Petersonadde86d2011-09-23 13:41:41 -04006064 if (s[0] == '\0')
6065 PyErr_SetNone(PyExc_EOFError);
6066 else
6067 PyErr_Format(UnpicklingError,
6068 "invalid load key, '%c'.", s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006069 return NULL;
6070 }
6071
6072 break; /* and we are done! */
6073 }
6074
6075 /* XXX: It is not clear what this is actually for. */
6076 if ((err = PyErr_Occurred())) {
6077 if (err == PyExc_EOFError) {
6078 PyErr_SetNone(PyExc_EOFError);
6079 }
6080 return NULL;
6081 }
6082
Victor Stinner2ae57e32013-10-31 13:39:23 +01006083 if (_Unpickler_SkipConsumed(self) < 0)
6084 return NULL;
6085
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006086 PDATA_POP(self->stack, value);
6087 return value;
6088}
6089
6090PyDoc_STRVAR(Unpickler_load_doc,
6091"load() -> object. Load a pickle."
6092"\n"
6093"Read a pickled object representation from the open file object given in\n"
6094"the constructor, and return the reconstituted object hierarchy specified\n"
6095"therein.\n");
6096
6097static PyObject *
6098Unpickler_load(UnpicklerObject *self)
6099{
6100 /* Check whether the Unpickler was initialized correctly. This prevents
6101 segfaulting if a subclass overridden __init__ with a function that does
6102 not call Unpickler.__init__(). Here, we simply ensure that self->read
6103 is not NULL. */
6104 if (self->read == NULL) {
Victor Stinner121aab42011-09-29 23:40:53 +02006105 PyErr_Format(UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006106 "Unpickler.__init__() was not called by %s.__init__()",
6107 Py_TYPE(self)->tp_name);
6108 return NULL;
6109 }
6110
6111 return load(self);
6112}
6113
6114/* The name of find_class() is misleading. In newer pickle protocols, this
6115 function is used for loading any global (i.e., functions), not just
6116 classes. The name is kept only for backward compatibility. */
6117
6118PyDoc_STRVAR(Unpickler_find_class_doc,
6119"find_class(module_name, global_name) -> object.\n"
6120"\n"
6121"Return an object from a specified module, importing the module if\n"
6122"necessary. Subclasses may override this method (e.g. to restrict\n"
6123"unpickling of arbitrary classes and functions).\n"
6124"\n"
6125"This method is called whenever a class or a function object is\n"
6126"needed. Both arguments passed are str objects.\n");
6127
6128static PyObject *
6129Unpickler_find_class(UnpicklerObject *self, PyObject *args)
6130{
6131 PyObject *global;
6132 PyObject *modules_dict;
6133 PyObject *module;
6134 PyObject *module_name, *global_name;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006135 _Py_IDENTIFIER(modules);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006136
6137 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
6138 &module_name, &global_name))
6139 return NULL;
6140
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006141 /* Try to map the old names used in Python 2.x to the new ones used in
6142 Python 3.x. We do this only with old pickle protocols and when the
6143 user has not disabled the feature. */
6144 if (self->proto < 3 && self->fix_imports) {
6145 PyObject *key;
6146 PyObject *item;
6147
6148 /* Check if the global (i.e., a function or a class) was renamed
6149 or moved to another module. */
6150 key = PyTuple_Pack(2, module_name, global_name);
6151 if (key == NULL)
6152 return NULL;
6153 item = PyDict_GetItemWithError(name_mapping_2to3, key);
6154 Py_DECREF(key);
6155 if (item) {
6156 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
6157 PyErr_Format(PyExc_RuntimeError,
6158 "_compat_pickle.NAME_MAPPING values should be "
6159 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
6160 return NULL;
6161 }
6162 module_name = PyTuple_GET_ITEM(item, 0);
6163 global_name = PyTuple_GET_ITEM(item, 1);
6164 if (!PyUnicode_Check(module_name) ||
6165 !PyUnicode_Check(global_name)) {
6166 PyErr_Format(PyExc_RuntimeError,
6167 "_compat_pickle.NAME_MAPPING values should be "
6168 "pairs of str, not (%.200s, %.200s)",
6169 Py_TYPE(module_name)->tp_name,
6170 Py_TYPE(global_name)->tp_name);
6171 return NULL;
6172 }
6173 }
6174 else if (PyErr_Occurred()) {
6175 return NULL;
6176 }
6177
6178 /* Check if the module was renamed. */
6179 item = PyDict_GetItemWithError(import_mapping_2to3, module_name);
6180 if (item) {
6181 if (!PyUnicode_Check(item)) {
6182 PyErr_Format(PyExc_RuntimeError,
6183 "_compat_pickle.IMPORT_MAPPING values should be "
6184 "strings, not %.200s", Py_TYPE(item)->tp_name);
6185 return NULL;
6186 }
6187 module_name = item;
6188 }
6189 else if (PyErr_Occurred()) {
6190 return NULL;
6191 }
6192 }
6193
Victor Stinnerbb520202013-11-06 22:40:41 +01006194 modules_dict = _PySys_GetObjectId(&PyId_modules);
Victor Stinner1e53bba2013-07-16 22:26:05 +02006195 if (modules_dict == NULL) {
6196 PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006197 return NULL;
Victor Stinner1e53bba2013-07-16 22:26:05 +02006198 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006199
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006200 module = PyDict_GetItemWithError(modules_dict, module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006201 if (module == NULL) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006202 if (PyErr_Occurred())
6203 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006204 module = PyImport_Import(module_name);
6205 if (module == NULL)
6206 return NULL;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006207 global = getattribute(module, global_name, self->proto >= 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006208 Py_DECREF(module);
6209 }
Victor Stinner121aab42011-09-29 23:40:53 +02006210 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006211 global = getattribute(module, global_name, self->proto >= 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006212 }
6213 return global;
6214}
6215
6216static struct PyMethodDef Unpickler_methods[] = {
6217 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
6218 Unpickler_load_doc},
6219 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
6220 Unpickler_find_class_doc},
6221 {NULL, NULL} /* sentinel */
6222};
6223
6224static void
6225Unpickler_dealloc(UnpicklerObject *self)
6226{
6227 PyObject_GC_UnTrack((PyObject *)self);
6228 Py_XDECREF(self->readline);
6229 Py_XDECREF(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00006230 Py_XDECREF(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006231 Py_XDECREF(self->stack);
6232 Py_XDECREF(self->pers_func);
6233 Py_XDECREF(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006234 if (self->buffer.buf != NULL) {
6235 PyBuffer_Release(&self->buffer);
6236 self->buffer.buf = NULL;
6237 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006238
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006239 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006240 PyMem_Free(self->marks);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006241 PyMem_Free(self->input_line);
Victor Stinner49fc8ec2013-07-07 23:30:24 +02006242 PyMem_Free(self->encoding);
6243 PyMem_Free(self->errors);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006244
6245 Py_TYPE(self)->tp_free((PyObject *)self);
6246}
6247
6248static int
6249Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
6250{
6251 Py_VISIT(self->readline);
6252 Py_VISIT(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00006253 Py_VISIT(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006254 Py_VISIT(self->stack);
6255 Py_VISIT(self->pers_func);
6256 Py_VISIT(self->arg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006257 return 0;
6258}
6259
6260static int
6261Unpickler_clear(UnpicklerObject *self)
6262{
6263 Py_CLEAR(self->readline);
6264 Py_CLEAR(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00006265 Py_CLEAR(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006266 Py_CLEAR(self->stack);
6267 Py_CLEAR(self->pers_func);
6268 Py_CLEAR(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006269 if (self->buffer.buf != NULL) {
6270 PyBuffer_Release(&self->buffer);
6271 self->buffer.buf = NULL;
6272 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006273
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006274 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006275 PyMem_Free(self->marks);
6276 self->marks = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006277 PyMem_Free(self->input_line);
6278 self->input_line = NULL;
Victor Stinner49fc8ec2013-07-07 23:30:24 +02006279 PyMem_Free(self->encoding);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006280 self->encoding = NULL;
Victor Stinner49fc8ec2013-07-07 23:30:24 +02006281 PyMem_Free(self->errors);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006282 self->errors = NULL;
6283
6284 return 0;
6285}
6286
6287PyDoc_STRVAR(Unpickler_doc,
6288"Unpickler(file, *, encoding='ASCII', errors='strict')"
6289"\n"
6290"This takes a binary file for reading a pickle data stream.\n"
6291"\n"
6292"The protocol version of the pickle is detected automatically, so no\n"
6293"proto argument is needed.\n"
6294"\n"
6295"The file-like object must have two methods, a read() method\n"
6296"that takes an integer argument, and a readline() method that\n"
6297"requires no arguments. Both methods should return bytes.\n"
6298"Thus file-like object can be a binary file object opened for\n"
6299"reading, a BytesIO object, or any other custom object that\n"
6300"meets this interface.\n"
6301"\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006302"Optional keyword arguments are *fix_imports*, *encoding* and *errors*,\n"
6303"which are used to control compatiblity support for pickle stream\n"
6304"generated by Python 2.x. If *fix_imports* is True, pickle will try to\n"
6305"map the old Python 2.x names to the new names used in Python 3.x. The\n"
6306"*encoding* and *errors* tell pickle how to decode 8-bit string\n"
6307"instances pickled by Python 2.x; these default to 'ASCII' and\n"
6308"'strict', respectively.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006309
6310static int
6311Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
6312{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006313 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006314 PyObject *file;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006315 PyObject *fix_imports = Py_True;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006316 char *encoding = NULL;
6317 char *errors = NULL;
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02006318 _Py_IDENTIFIER(persistent_load);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006319
6320 /* XXX: That is an horrible error message. But, I don't know how to do
6321 better... */
6322 if (Py_SIZE(args) != 1) {
6323 PyErr_Format(PyExc_TypeError,
6324 "%s takes exactly one positional argument (%zd given)",
6325 Py_TYPE(self)->tp_name, Py_SIZE(args));
6326 return -1;
6327 }
6328
6329 /* Arguments parsing needs to be done in the __init__() method to allow
6330 subclasses to define their own __init__() method, which may (or may
6331 not) support Unpickler arguments. However, this means we need to be
6332 extra careful in the other Unpickler methods, since a subclass could
6333 forget to call Unpickler.__init__() thus breaking our internal
6334 invariants. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006335 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:Unpickler", kwlist,
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006336 &file, &fix_imports, &encoding, &errors))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006337 return -1;
6338
6339 /* In case of multiple __init__() calls, clear previous content. */
6340 if (self->read != NULL)
6341 (void)Unpickler_clear(self);
6342
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006343 if (_Unpickler_SetInputStream(self, file) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006344 return -1;
6345
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006346 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006347 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006348
6349 self->fix_imports = PyObject_IsTrue(fix_imports);
6350 if (self->fix_imports == -1)
6351 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006352
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02006353 if (_PyObject_HasAttrId((PyObject *)self, &PyId_persistent_load)) {
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02006354 self->pers_func = _PyObject_GetAttrId((PyObject *)self,
6355 &PyId_persistent_load);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006356 if (self->pers_func == NULL)
6357 return -1;
6358 }
6359 else {
6360 self->pers_func = NULL;
6361 }
6362
6363 self->stack = (Pdata *)Pdata_New();
6364 if (self->stack == NULL)
6365 return -1;
6366
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006367 self->memo_size = 32;
6368 self->memo = _Unpickler_NewMemo(self->memo_size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006369 if (self->memo == NULL)
6370 return -1;
6371
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00006372 self->arg = NULL;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006373 self->proto = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006374 self->framing = 0;
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00006375
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006376 return 0;
6377}
6378
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006379/* Define a proxy object for the Unpickler's internal memo object. This is to
6380 * avoid breaking code like:
6381 * unpickler.memo.clear()
6382 * and
6383 * unpickler.memo = saved_memo
6384 * Is this a good idea? Not really, but we don't want to break code that uses
6385 * it. Note that we don't implement the entire mapping API here. This is
6386 * intentional, as these should be treated as black-box implementation details.
6387 *
6388 * We do, however, have to implement pickling/unpickling support because of
Victor Stinner121aab42011-09-29 23:40:53 +02006389 * real-world code like cvs2svn.
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006390 */
6391
6392typedef struct {
6393 PyObject_HEAD
6394 UnpicklerObject *unpickler;
6395} UnpicklerMemoProxyObject;
6396
6397PyDoc_STRVAR(ump_clear_doc,
6398"memo.clear() -> None. Remove all items from memo.");
6399
6400static PyObject *
6401ump_clear(UnpicklerMemoProxyObject *self)
6402{
6403 _Unpickler_MemoCleanup(self->unpickler);
6404 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
6405 if (self->unpickler->memo == NULL)
6406 return NULL;
6407 Py_RETURN_NONE;
6408}
6409
6410PyDoc_STRVAR(ump_copy_doc,
6411"memo.copy() -> new_memo. Copy the memo to a new object.");
6412
6413static PyObject *
6414ump_copy(UnpicklerMemoProxyObject *self)
6415{
6416 Py_ssize_t i;
6417 PyObject *new_memo = PyDict_New();
6418 if (new_memo == NULL)
6419 return NULL;
6420
6421 for (i = 0; i < self->unpickler->memo_size; i++) {
6422 int status;
6423 PyObject *key, *value;
6424
6425 value = self->unpickler->memo[i];
6426 if (value == NULL)
6427 continue;
6428
6429 key = PyLong_FromSsize_t(i);
6430 if (key == NULL)
6431 goto error;
6432 status = PyDict_SetItem(new_memo, key, value);
6433 Py_DECREF(key);
6434 if (status < 0)
6435 goto error;
6436 }
6437 return new_memo;
6438
6439error:
6440 Py_DECREF(new_memo);
6441 return NULL;
6442}
6443
6444PyDoc_STRVAR(ump_reduce_doc,
6445"memo.__reduce__(). Pickling support.");
6446
6447static PyObject *
6448ump_reduce(UnpicklerMemoProxyObject *self, PyObject *args)
6449{
6450 PyObject *reduce_value;
6451 PyObject *constructor_args;
6452 PyObject *contents = ump_copy(self);
6453 if (contents == NULL)
6454 return NULL;
6455
6456 reduce_value = PyTuple_New(2);
6457 if (reduce_value == NULL) {
6458 Py_DECREF(contents);
6459 return NULL;
6460 }
6461 constructor_args = PyTuple_New(1);
6462 if (constructor_args == NULL) {
6463 Py_DECREF(contents);
6464 Py_DECREF(reduce_value);
6465 return NULL;
6466 }
6467 PyTuple_SET_ITEM(constructor_args, 0, contents);
6468 Py_INCREF((PyObject *)&PyDict_Type);
6469 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
6470 PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
6471 return reduce_value;
6472}
6473
6474static PyMethodDef unpicklerproxy_methods[] = {
6475 {"clear", (PyCFunction)ump_clear, METH_NOARGS, ump_clear_doc},
6476 {"copy", (PyCFunction)ump_copy, METH_NOARGS, ump_copy_doc},
6477 {"__reduce__", (PyCFunction)ump_reduce, METH_VARARGS, ump_reduce_doc},
6478 {NULL, NULL} /* sentinel */
6479};
6480
6481static void
6482UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
6483{
6484 PyObject_GC_UnTrack(self);
6485 Py_XDECREF(self->unpickler);
6486 PyObject_GC_Del((PyObject *)self);
6487}
6488
6489static int
6490UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
6491 visitproc visit, void *arg)
6492{
6493 Py_VISIT(self->unpickler);
6494 return 0;
6495}
6496
6497static int
6498UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
6499{
6500 Py_CLEAR(self->unpickler);
6501 return 0;
6502}
6503
6504static PyTypeObject UnpicklerMemoProxyType = {
6505 PyVarObject_HEAD_INIT(NULL, 0)
6506 "_pickle.UnpicklerMemoProxy", /*tp_name*/
6507 sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/
6508 0,
6509 (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */
6510 0, /* tp_print */
6511 0, /* tp_getattr */
6512 0, /* tp_setattr */
6513 0, /* tp_compare */
6514 0, /* tp_repr */
6515 0, /* tp_as_number */
6516 0, /* tp_as_sequence */
6517 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00006518 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006519 0, /* tp_call */
6520 0, /* tp_str */
6521 PyObject_GenericGetAttr, /* tp_getattro */
6522 PyObject_GenericSetAttr, /* tp_setattro */
6523 0, /* tp_as_buffer */
6524 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
6525 0, /* tp_doc */
6526 (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */
6527 (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */
6528 0, /* tp_richcompare */
6529 0, /* tp_weaklistoffset */
6530 0, /* tp_iter */
6531 0, /* tp_iternext */
6532 unpicklerproxy_methods, /* tp_methods */
6533};
6534
6535static PyObject *
6536UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
6537{
6538 UnpicklerMemoProxyObject *self;
6539
6540 self = PyObject_GC_New(UnpicklerMemoProxyObject,
6541 &UnpicklerMemoProxyType);
6542 if (self == NULL)
6543 return NULL;
6544 Py_INCREF(unpickler);
6545 self->unpickler = unpickler;
6546 PyObject_GC_Track(self);
6547 return (PyObject *)self;
6548}
6549
6550/*****************************************************************************/
6551
6552
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006553static PyObject *
6554Unpickler_get_memo(UnpicklerObject *self)
6555{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006556 return UnpicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006557}
6558
6559static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006560Unpickler_set_memo(UnpicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006561{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006562 PyObject **new_memo;
6563 Py_ssize_t new_memo_size = 0;
6564 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006565
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006566 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006567 PyErr_SetString(PyExc_TypeError,
6568 "attribute deletion is not supported");
6569 return -1;
6570 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006571
6572 if (Py_TYPE(obj) == &UnpicklerMemoProxyType) {
6573 UnpicklerObject *unpickler =
6574 ((UnpicklerMemoProxyObject *)obj)->unpickler;
6575
6576 new_memo_size = unpickler->memo_size;
6577 new_memo = _Unpickler_NewMemo(new_memo_size);
6578 if (new_memo == NULL)
6579 return -1;
6580
6581 for (i = 0; i < new_memo_size; i++) {
6582 Py_XINCREF(unpickler->memo[i]);
6583 new_memo[i] = unpickler->memo[i];
6584 }
6585 }
6586 else if (PyDict_Check(obj)) {
6587 Py_ssize_t i = 0;
6588 PyObject *key, *value;
6589
6590 new_memo_size = PyDict_Size(obj);
6591 new_memo = _Unpickler_NewMemo(new_memo_size);
6592 if (new_memo == NULL)
6593 return -1;
6594
6595 while (PyDict_Next(obj, &i, &key, &value)) {
6596 Py_ssize_t idx;
6597 if (!PyLong_Check(key)) {
6598 PyErr_SetString(PyExc_TypeError,
6599 "memo key must be integers");
6600 goto error;
6601 }
6602 idx = PyLong_AsSsize_t(key);
6603 if (idx == -1 && PyErr_Occurred())
6604 goto error;
Christian Heimesa24b4d22013-07-01 15:17:45 +02006605 if (idx < 0) {
6606 PyErr_SetString(PyExc_ValueError,
Christian Heimes80878792013-07-01 15:23:39 +02006607 "memo key must be positive integers.");
Christian Heimesa24b4d22013-07-01 15:17:45 +02006608 goto error;
6609 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006610 if (_Unpickler_MemoPut(self, idx, value) < 0)
6611 goto error;
6612 }
6613 }
6614 else {
6615 PyErr_Format(PyExc_TypeError,
6616 "'memo' attribute must be an UnpicklerMemoProxy object"
6617 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006618 return -1;
6619 }
6620
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006621 _Unpickler_MemoCleanup(self);
6622 self->memo_size = new_memo_size;
6623 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006624
6625 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006626
6627 error:
6628 if (new_memo_size) {
6629 i = new_memo_size;
6630 while (--i >= 0) {
6631 Py_XDECREF(new_memo[i]);
6632 }
6633 PyMem_FREE(new_memo);
6634 }
6635 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006636}
6637
6638static PyObject *
6639Unpickler_get_persload(UnpicklerObject *self)
6640{
6641 if (self->pers_func == NULL)
6642 PyErr_SetString(PyExc_AttributeError, "persistent_load");
6643 else
6644 Py_INCREF(self->pers_func);
6645 return self->pers_func;
6646}
6647
6648static int
6649Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
6650{
6651 PyObject *tmp;
6652
6653 if (value == NULL) {
6654 PyErr_SetString(PyExc_TypeError,
6655 "attribute deletion is not supported");
6656 return -1;
6657 }
6658 if (!PyCallable_Check(value)) {
6659 PyErr_SetString(PyExc_TypeError,
6660 "persistent_load must be a callable taking "
6661 "one argument");
6662 return -1;
6663 }
6664
6665 tmp = self->pers_func;
6666 Py_INCREF(value);
6667 self->pers_func = value;
6668 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
6669
6670 return 0;
6671}
6672
6673static PyGetSetDef Unpickler_getsets[] = {
6674 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
6675 {"persistent_load", (getter)Unpickler_get_persload,
6676 (setter)Unpickler_set_persload},
6677 {NULL}
6678};
6679
6680static PyTypeObject Unpickler_Type = {
6681 PyVarObject_HEAD_INIT(NULL, 0)
6682 "_pickle.Unpickler", /*tp_name*/
6683 sizeof(UnpicklerObject), /*tp_basicsize*/
6684 0, /*tp_itemsize*/
6685 (destructor)Unpickler_dealloc, /*tp_dealloc*/
6686 0, /*tp_print*/
6687 0, /*tp_getattr*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006688 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00006689 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006690 0, /*tp_repr*/
6691 0, /*tp_as_number*/
6692 0, /*tp_as_sequence*/
6693 0, /*tp_as_mapping*/
6694 0, /*tp_hash*/
6695 0, /*tp_call*/
6696 0, /*tp_str*/
6697 0, /*tp_getattro*/
6698 0, /*tp_setattro*/
6699 0, /*tp_as_buffer*/
6700 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
6701 Unpickler_doc, /*tp_doc*/
6702 (traverseproc)Unpickler_traverse, /*tp_traverse*/
6703 (inquiry)Unpickler_clear, /*tp_clear*/
6704 0, /*tp_richcompare*/
6705 0, /*tp_weaklistoffset*/
6706 0, /*tp_iter*/
6707 0, /*tp_iternext*/
6708 Unpickler_methods, /*tp_methods*/
6709 0, /*tp_members*/
6710 Unpickler_getsets, /*tp_getset*/
6711 0, /*tp_base*/
6712 0, /*tp_dict*/
6713 0, /*tp_descr_get*/
6714 0, /*tp_descr_set*/
6715 0, /*tp_dictoffset*/
6716 (initproc)Unpickler_init, /*tp_init*/
6717 PyType_GenericAlloc, /*tp_alloc*/
6718 PyType_GenericNew, /*tp_new*/
6719 PyObject_GC_Del, /*tp_free*/
6720 0, /*tp_is_gc*/
6721};
6722
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006723PyDoc_STRVAR(pickle_dump_doc,
6724"dump(obj, file, protocol=None, *, fix_imports=True) -> None\n"
6725"\n"
6726"Write a pickled representation of obj to the open file object file. This\n"
6727"is equivalent to ``Pickler(file, protocol).dump(obj)``, but may be more\n"
6728"efficient.\n"
6729"\n"
6730"The optional protocol argument tells the pickler to use the given protocol;\n"
6731"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
6732"backward-incompatible protocol designed for Python 3.0.\n"
6733"\n"
6734"Specifying a negative protocol version selects the highest protocol version\n"
6735"supported. The higher the protocol used, the more recent the version of\n"
6736"Python needed to read the pickle produced.\n"
6737"\n"
6738"The file argument must have a write() method that accepts a single bytes\n"
6739"argument. It can thus be a file object opened for binary writing, a\n"
6740"io.BytesIO instance, or any other custom object that meets this interface.\n"
6741"\n"
6742"If fix_imports is True and protocol is less than 3, pickle will try to\n"
6743"map the new Python 3.x names to the old module names used in Python 2.x,\n"
6744"so that the pickle data stream is readable with Python 2.x.\n");
6745
6746static PyObject *
6747pickle_dump(PyObject *self, PyObject *args, PyObject *kwds)
6748{
6749 static char *kwlist[] = {"obj", "file", "protocol", "fix_imports", 0};
6750 PyObject *obj;
6751 PyObject *file;
6752 PyObject *proto = NULL;
6753 PyObject *fix_imports = Py_True;
6754 PicklerObject *pickler;
6755
6756 /* fix_imports is a keyword-only argument. */
6757 if (Py_SIZE(args) > 3) {
6758 PyErr_Format(PyExc_TypeError,
6759 "pickle.dump() takes at most 3 positional "
6760 "argument (%zd given)", Py_SIZE(args));
6761 return NULL;
6762 }
6763
6764 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|OO:dump", kwlist,
6765 &obj, &file, &proto, &fix_imports))
6766 return NULL;
6767
6768 pickler = _Pickler_New();
6769 if (pickler == NULL)
6770 return NULL;
6771
6772 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
6773 goto error;
6774
6775 if (_Pickler_SetOutputStream(pickler, file) < 0)
6776 goto error;
6777
6778 if (dump(pickler, obj) < 0)
6779 goto error;
6780
6781 if (_Pickler_FlushToFile(pickler) < 0)
6782 goto error;
6783
6784 Py_DECREF(pickler);
6785 Py_RETURN_NONE;
6786
6787 error:
6788 Py_XDECREF(pickler);
6789 return NULL;
6790}
6791
6792PyDoc_STRVAR(pickle_dumps_doc,
6793"dumps(obj, protocol=None, *, fix_imports=True) -> bytes\n"
6794"\n"
6795"Return the pickled representation of the object as a bytes\n"
6796"object, instead of writing it to a file.\n"
6797"\n"
6798"The optional protocol argument tells the pickler to use the given protocol;\n"
6799"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
6800"backward-incompatible protocol designed for Python 3.0.\n"
6801"\n"
6802"Specifying a negative protocol version selects the highest protocol version\n"
6803"supported. The higher the protocol used, the more recent the version of\n"
6804"Python needed to read the pickle produced.\n"
6805"\n"
6806"If fix_imports is True and *protocol* is less than 3, pickle will try to\n"
6807"map the new Python 3.x names to the old module names used in Python 2.x,\n"
6808"so that the pickle data stream is readable with Python 2.x.\n");
6809
6810static PyObject *
6811pickle_dumps(PyObject *self, PyObject *args, PyObject *kwds)
6812{
6813 static char *kwlist[] = {"obj", "protocol", "fix_imports", 0};
6814 PyObject *obj;
6815 PyObject *proto = NULL;
6816 PyObject *result;
6817 PyObject *fix_imports = Py_True;
6818 PicklerObject *pickler;
6819
6820 /* fix_imports is a keyword-only argument. */
6821 if (Py_SIZE(args) > 2) {
6822 PyErr_Format(PyExc_TypeError,
6823 "pickle.dumps() takes at most 2 positional "
6824 "argument (%zd given)", Py_SIZE(args));
6825 return NULL;
6826 }
6827
6828 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:dumps", kwlist,
6829 &obj, &proto, &fix_imports))
6830 return NULL;
6831
6832 pickler = _Pickler_New();
6833 if (pickler == NULL)
6834 return NULL;
6835
6836 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
6837 goto error;
6838
6839 if (dump(pickler, obj) < 0)
6840 goto error;
6841
6842 result = _Pickler_GetString(pickler);
6843 Py_DECREF(pickler);
6844 return result;
6845
6846 error:
6847 Py_XDECREF(pickler);
6848 return NULL;
6849}
6850
6851PyDoc_STRVAR(pickle_load_doc,
6852"load(file, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6853"\n"
6854"Read a pickled object representation from the open file object file and\n"
6855"return the reconstituted object hierarchy specified therein. This is\n"
6856"equivalent to ``Unpickler(file).load()``, but may be more efficient.\n"
6857"\n"
6858"The protocol version of the pickle is detected automatically, so no protocol\n"
6859"argument is needed. Bytes past the pickled object's representation are\n"
6860"ignored.\n"
6861"\n"
6862"The argument file must have two methods, a read() method that takes an\n"
6863"integer argument, and a readline() method that requires no arguments. Both\n"
6864"methods should return bytes. Thus *file* can be a binary file object opened\n"
6865"for reading, a BytesIO object, or any other custom object that meets this\n"
6866"interface.\n"
6867"\n"
6868"Optional keyword arguments are fix_imports, encoding and errors,\n"
6869"which are used to control compatiblity support for pickle stream generated\n"
6870"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6871"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6872"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6873"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6874
6875static PyObject *
6876pickle_load(PyObject *self, PyObject *args, PyObject *kwds)
6877{
6878 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
6879 PyObject *file;
6880 PyObject *fix_imports = Py_True;
6881 PyObject *result;
6882 char *encoding = NULL;
6883 char *errors = NULL;
6884 UnpicklerObject *unpickler;
6885
6886 /* fix_imports, encoding and errors are a keyword-only argument. */
6887 if (Py_SIZE(args) != 1) {
6888 PyErr_Format(PyExc_TypeError,
6889 "pickle.load() takes exactly one positional "
6890 "argument (%zd given)", Py_SIZE(args));
6891 return NULL;
6892 }
6893
6894 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:load", kwlist,
6895 &file, &fix_imports, &encoding, &errors))
6896 return NULL;
6897
6898 unpickler = _Unpickler_New();
6899 if (unpickler == NULL)
6900 return NULL;
6901
6902 if (_Unpickler_SetInputStream(unpickler, file) < 0)
6903 goto error;
6904
6905 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6906 goto error;
6907
6908 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6909 if (unpickler->fix_imports == -1)
6910 goto error;
6911
6912 result = load(unpickler);
6913 Py_DECREF(unpickler);
6914 return result;
6915
6916 error:
6917 Py_XDECREF(unpickler);
6918 return NULL;
6919}
6920
6921PyDoc_STRVAR(pickle_loads_doc,
6922"loads(input, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6923"\n"
6924"Read a pickled object hierarchy from a bytes object and return the\n"
6925"reconstituted object hierarchy specified therein\n"
6926"\n"
6927"The protocol version of the pickle is detected automatically, so no protocol\n"
6928"argument is needed. Bytes past the pickled object's representation are\n"
6929"ignored.\n"
6930"\n"
6931"Optional keyword arguments are fix_imports, encoding and errors, which\n"
6932"are used to control compatiblity support for pickle stream generated\n"
6933"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6934"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6935"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6936"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6937
6938static PyObject *
6939pickle_loads(PyObject *self, PyObject *args, PyObject *kwds)
6940{
6941 static char *kwlist[] = {"input", "fix_imports", "encoding", "errors", 0};
6942 PyObject *input;
6943 PyObject *fix_imports = Py_True;
6944 PyObject *result;
6945 char *encoding = NULL;
6946 char *errors = NULL;
6947 UnpicklerObject *unpickler;
6948
6949 /* fix_imports, encoding and errors are a keyword-only argument. */
6950 if (Py_SIZE(args) != 1) {
6951 PyErr_Format(PyExc_TypeError,
6952 "pickle.loads() takes exactly one positional "
6953 "argument (%zd given)", Py_SIZE(args));
6954 return NULL;
6955 }
6956
6957 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:loads", kwlist,
6958 &input, &fix_imports, &encoding, &errors))
6959 return NULL;
6960
6961 unpickler = _Unpickler_New();
6962 if (unpickler == NULL)
6963 return NULL;
6964
6965 if (_Unpickler_SetStringInput(unpickler, input) < 0)
6966 goto error;
6967
6968 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6969 goto error;
6970
6971 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6972 if (unpickler->fix_imports == -1)
6973 goto error;
6974
6975 result = load(unpickler);
6976 Py_DECREF(unpickler);
6977 return result;
6978
6979 error:
6980 Py_XDECREF(unpickler);
6981 return NULL;
6982}
6983
6984
6985static struct PyMethodDef pickle_methods[] = {
6986 {"dump", (PyCFunction)pickle_dump, METH_VARARGS|METH_KEYWORDS,
6987 pickle_dump_doc},
6988 {"dumps", (PyCFunction)pickle_dumps, METH_VARARGS|METH_KEYWORDS,
6989 pickle_dumps_doc},
6990 {"load", (PyCFunction)pickle_load, METH_VARARGS|METH_KEYWORDS,
6991 pickle_load_doc},
6992 {"loads", (PyCFunction)pickle_loads, METH_VARARGS|METH_KEYWORDS,
6993 pickle_loads_doc},
6994 {NULL, NULL} /* sentinel */
6995};
6996
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006997static int
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006998initmodule(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006999{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007000 PyObject *copyreg = NULL;
7001 PyObject *compat_pickle = NULL;
7002
7003 /* XXX: We should ensure that the types of the dictionaries imported are
7004 exactly PyDict objects. Otherwise, it is possible to crash the pickle
7005 since we use the PyDict API directly to access these dictionaries. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007006
7007 copyreg = PyImport_ImportModule("copyreg");
7008 if (!copyreg)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007009 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007010 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
7011 if (!dispatch_table)
7012 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007013 extension_registry = \
7014 PyObject_GetAttrString(copyreg, "_extension_registry");
7015 if (!extension_registry)
7016 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007017 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
7018 if (!inverted_registry)
7019 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007020 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
7021 if (!extension_cache)
7022 goto error;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007023 Py_CLEAR(copyreg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007024
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007025 /* Load the 2.x -> 3.x stdlib module mapping tables */
7026 compat_pickle = PyImport_ImportModule("_compat_pickle");
7027 if (!compat_pickle)
7028 goto error;
7029 name_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
7030 if (!name_mapping_2to3)
7031 goto error;
7032 if (!PyDict_CheckExact(name_mapping_2to3)) {
7033 PyErr_Format(PyExc_RuntimeError,
7034 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
7035 Py_TYPE(name_mapping_2to3)->tp_name);
7036 goto error;
7037 }
7038 import_mapping_2to3 = PyObject_GetAttrString(compat_pickle,
7039 "IMPORT_MAPPING");
7040 if (!import_mapping_2to3)
7041 goto error;
7042 if (!PyDict_CheckExact(import_mapping_2to3)) {
7043 PyErr_Format(PyExc_RuntimeError,
7044 "_compat_pickle.IMPORT_MAPPING should be a dict, "
7045 "not %.200s", Py_TYPE(import_mapping_2to3)->tp_name);
7046 goto error;
7047 }
7048 /* ... and the 3.x -> 2.x mapping tables */
7049 name_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
7050 "REVERSE_NAME_MAPPING");
7051 if (!name_mapping_3to2)
7052 goto error;
7053 if (!PyDict_CheckExact(name_mapping_3to2)) {
7054 PyErr_Format(PyExc_RuntimeError,
Ezio Melotti13925002011-03-16 11:05:33 +02007055 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007056 "not %.200s", Py_TYPE(name_mapping_3to2)->tp_name);
7057 goto error;
7058 }
7059 import_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
7060 "REVERSE_IMPORT_MAPPING");
7061 if (!import_mapping_3to2)
7062 goto error;
7063 if (!PyDict_CheckExact(import_mapping_3to2)) {
7064 PyErr_Format(PyExc_RuntimeError,
7065 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
7066 "not %.200s", Py_TYPE(import_mapping_3to2)->tp_name);
7067 goto error;
7068 }
7069 Py_CLEAR(compat_pickle);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007070
7071 empty_tuple = PyTuple_New(0);
7072 if (empty_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007073 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007074 two_tuple = PyTuple_New(2);
7075 if (two_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007076 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007077 /* We use this temp container with no regard to refcounts, or to
7078 * keeping containees alive. Exempt from GC, because we don't
7079 * want anything looking at two_tuple() by magic.
7080 */
7081 PyObject_GC_UnTrack(two_tuple);
7082
7083 return 0;
7084
7085 error:
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007086 Py_CLEAR(copyreg);
7087 Py_CLEAR(dispatch_table);
7088 Py_CLEAR(extension_registry);
7089 Py_CLEAR(inverted_registry);
7090 Py_CLEAR(extension_cache);
7091 Py_CLEAR(compat_pickle);
7092 Py_CLEAR(name_mapping_2to3);
7093 Py_CLEAR(import_mapping_2to3);
7094 Py_CLEAR(name_mapping_3to2);
7095 Py_CLEAR(import_mapping_3to2);
7096 Py_CLEAR(empty_tuple);
7097 Py_CLEAR(two_tuple);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007098 return -1;
7099}
7100
7101static struct PyModuleDef _picklemodule = {
7102 PyModuleDef_HEAD_INIT,
7103 "_pickle",
7104 pickle_module_doc,
7105 -1,
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007106 pickle_methods,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007107 NULL,
7108 NULL,
7109 NULL,
7110 NULL
7111};
7112
7113PyMODINIT_FUNC
7114PyInit__pickle(void)
7115{
7116 PyObject *m;
7117
7118 if (PyType_Ready(&Unpickler_Type) < 0)
7119 return NULL;
7120 if (PyType_Ready(&Pickler_Type) < 0)
7121 return NULL;
7122 if (PyType_Ready(&Pdata_Type) < 0)
7123 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007124 if (PyType_Ready(&PicklerMemoProxyType) < 0)
7125 return NULL;
7126 if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
7127 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007128
7129 /* Create the module and add the functions. */
7130 m = PyModule_Create(&_picklemodule);
7131 if (m == NULL)
7132 return NULL;
7133
Antoine Pitrou8391cf42011-07-15 21:01:21 +02007134 Py_INCREF(&Pickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007135 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
7136 return NULL;
Antoine Pitrou8391cf42011-07-15 21:01:21 +02007137 Py_INCREF(&Unpickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007138 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
7139 return NULL;
7140
7141 /* Initialize the exceptions. */
7142 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
7143 if (PickleError == NULL)
7144 return NULL;
7145 PicklingError = \
7146 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
7147 if (PicklingError == NULL)
7148 return NULL;
7149 UnpicklingError = \
7150 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
7151 if (UnpicklingError == NULL)
7152 return NULL;
7153
7154 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
7155 return NULL;
7156 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
7157 return NULL;
7158 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
7159 return NULL;
7160
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007161 if (initmodule() < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007162 return NULL;
7163
7164 return m;
7165}