blob: 75b0441f93706ede8d22798cf6a6970491d71a59 [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000013/* Pickle opcodes. These must be kept updated with pickle.py.
14 Extensive docs are in pickletools.py. */
15enum opcode {
16 MARK = '(',
17 STOP = '.',
18 POP = '0',
19 POP_MARK = '1',
20 DUP = '2',
21 FLOAT = 'F',
22 INT = 'I',
23 BININT = 'J',
24 BININT1 = 'K',
25 LONG = 'L',
26 BININT2 = 'M',
27 NONE = 'N',
28 PERSID = 'P',
29 BINPERSID = 'Q',
30 REDUCE = 'R',
31 STRING = 'S',
32 BINSTRING = 'T',
33 SHORT_BINSTRING = 'U',
34 UNICODE = 'V',
35 BINUNICODE = 'X',
36 APPEND = 'a',
37 BUILD = 'b',
38 GLOBAL = 'c',
39 DICT = 'd',
40 EMPTY_DICT = '}',
41 APPENDS = 'e',
42 GET = 'g',
43 BINGET = 'h',
44 INST = 'i',
45 LONG_BINGET = 'j',
46 LIST = 'l',
47 EMPTY_LIST = ']',
48 OBJ = 'o',
49 PUT = 'p',
50 BINPUT = 'q',
51 LONG_BINPUT = 'r',
52 SETITEM = 's',
53 TUPLE = 't',
54 EMPTY_TUPLE = ')',
55 SETITEMS = 'u',
56 BINFLOAT = 'G',
57
58 /* Protocol 2. */
59 PROTO = '\x80',
60 NEWOBJ = '\x81',
61 EXT1 = '\x82',
62 EXT2 = '\x83',
63 EXT4 = '\x84',
64 TUPLE1 = '\x85',
65 TUPLE2 = '\x86',
66 TUPLE3 = '\x87',
67 NEWTRUE = '\x88',
68 NEWFALSE = '\x89',
69 LONG1 = '\x8a',
70 LONG4 = '\x8b',
71
72 /* Protocol 3 (Python 3.x) */
73 BINBYTES = 'B',
Victor Stinner132ef6c2010-11-09 09:39:41 +000074 SHORT_BINBYTES = 'C'
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000075};
76
77/* These aren't opcodes -- they're ways to pickle bools before protocol 2
78 * so that unpicklers written before bools were introduced unpickle them
79 * as ints, but unpicklers after can recognize that bools were intended.
80 * Note that protocol 2 added direct ways to pickle bools.
81 */
82#undef TRUE
83#define TRUE "I01\n"
84#undef FALSE
85#define FALSE "I00\n"
86
87enum {
88 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
89 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
90 break if this gets out of synch with pickle.py, but it's unclear that would
91 help anything either. */
92 BATCHSIZE = 1000,
93
94 /* Nesting limit until Pickler, when running in "fast mode", starts
95 checking for self-referential data-structures. */
96 FAST_NESTING_LIMIT = 50,
97
Antoine Pitrouea99c5c2010-09-09 18:33:21 +000098 /* Initial size of the write buffer of Pickler. */
99 WRITE_BUF_SIZE = 4096,
100
101 /* Maximum size of the write buffer of Pickler when pickling to a
102 stream. This is ignored for in-memory pickling. */
103 MAX_WRITE_BUF_SIZE = 64 * 1024,
Antoine Pitrou04248a82010-10-12 20:51:21 +0000104
105 /* Prefetch size when unpickling (disabled on unpeekable streams) */
Victor Stinner132ef6c2010-11-09 09:39:41 +0000106 PREFETCH = 8192 * 16
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000107};
108
109/* Exception classes for pickle. These should override the ones defined in
110 pickle.py, when the C-optimized Pickler and Unpickler are used. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000111static PyObject *PickleError = NULL;
112static PyObject *PicklingError = NULL;
113static PyObject *UnpicklingError = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000114
115/* copyreg.dispatch_table, {type_object: pickling_function} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000116static PyObject *dispatch_table = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000117/* For EXT[124] opcodes. */
118/* copyreg._extension_registry, {(module_name, function_name): code} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000119static PyObject *extension_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000120/* copyreg._inverted_registry, {code: (module_name, function_name)} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000121static PyObject *inverted_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000122/* copyreg._extension_cache, {code: object} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000123static PyObject *extension_cache = NULL;
124
125/* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */
126static PyObject *name_mapping_2to3 = NULL;
127/* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
128static PyObject *import_mapping_2to3 = NULL;
129/* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
130static PyObject *name_mapping_3to2 = NULL;
131static PyObject *import_mapping_3to2 = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000132
133/* XXX: Are these really nescessary? */
134/* As the name says, an empty tuple. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000135static PyObject *empty_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000136/* For looking up name pairs in copyreg._extension_registry. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000137static PyObject *two_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000138
Victor Stinnerbb520202013-11-06 22:40:41 +0100139_Py_IDENTIFIER(modules);
140
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000141static int
142stack_underflow(void)
143{
144 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
145 return -1;
146}
147
148/* Internal data type used as the unpickling stack. */
149typedef struct {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000150 PyObject_VAR_HEAD
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000151 PyObject **data;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000152 Py_ssize_t allocated; /* number of slots in data allocated */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000153} Pdata;
154
155static void
156Pdata_dealloc(Pdata *self)
157{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200158 Py_ssize_t i = Py_SIZE(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000159 while (--i >= 0) {
160 Py_DECREF(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000161 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000162 PyMem_FREE(self->data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000163 PyObject_Del(self);
164}
165
166static PyTypeObject Pdata_Type = {
167 PyVarObject_HEAD_INIT(NULL, 0)
168 "_pickle.Pdata", /*tp_name*/
169 sizeof(Pdata), /*tp_basicsize*/
170 0, /*tp_itemsize*/
171 (destructor)Pdata_dealloc, /*tp_dealloc*/
172};
173
174static PyObject *
175Pdata_New(void)
176{
177 Pdata *self;
178
179 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
180 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000181 Py_SIZE(self) = 0;
182 self->allocated = 8;
183 self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000184 if (self->data)
185 return (PyObject *)self;
186 Py_DECREF(self);
187 return PyErr_NoMemory();
188}
189
190
191/* Retain only the initial clearto items. If clearto >= the current
192 * number of items, this is a (non-erroneous) NOP.
193 */
194static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200195Pdata_clear(Pdata *self, Py_ssize_t clearto)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000196{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200197 Py_ssize_t i = Py_SIZE(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000198
199 if (clearto < 0)
200 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000201 if (clearto >= i)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000202 return 0;
203
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000204 while (--i >= clearto) {
205 Py_CLEAR(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000206 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000207 Py_SIZE(self) = clearto;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000208 return 0;
209}
210
211static int
212Pdata_grow(Pdata *self)
213{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000214 PyObject **data = self->data;
215 Py_ssize_t allocated = self->allocated;
216 Py_ssize_t new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000217
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000218 new_allocated = (allocated >> 3) + 6;
219 /* check for integer overflow */
220 if (new_allocated > PY_SSIZE_T_MAX - allocated)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000221 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000222 new_allocated += allocated;
223 if (new_allocated > (PY_SSIZE_T_MAX / sizeof(PyObject *)))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000224 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000225 data = PyMem_REALLOC(data, new_allocated * sizeof(PyObject *));
226 if (data == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000227 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000228
229 self->data = data;
230 self->allocated = new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000231 return 0;
232
233 nomemory:
234 PyErr_NoMemory();
235 return -1;
236}
237
238/* D is a Pdata*. Pop the topmost element and store it into V, which
239 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
240 * is raised and V is set to NULL.
241 */
242static PyObject *
243Pdata_pop(Pdata *self)
244{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000245 if (Py_SIZE(self) == 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000246 PyErr_SetString(UnpicklingError, "bad pickle data");
247 return NULL;
248 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000249 return self->data[--Py_SIZE(self)];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000250}
251#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
252
253static int
254Pdata_push(Pdata *self, PyObject *obj)
255{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000256 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000257 return -1;
258 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000259 self->data[Py_SIZE(self)++] = obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000260 return 0;
261}
262
263/* Push an object on stack, transferring its ownership to the stack. */
264#define PDATA_PUSH(D, O, ER) do { \
265 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
266
267/* Push an object on stack, adding a new reference to the object. */
268#define PDATA_APPEND(D, O, ER) do { \
269 Py_INCREF((O)); \
270 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
271
272static PyObject *
273Pdata_poptuple(Pdata *self, Py_ssize_t start)
274{
275 PyObject *tuple;
276 Py_ssize_t len, i, j;
277
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000278 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000279 tuple = PyTuple_New(len);
280 if (tuple == NULL)
281 return NULL;
282 for (i = start, j = 0; j < len; i++, j++)
283 PyTuple_SET_ITEM(tuple, j, self->data[i]);
284
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000285 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000286 return tuple;
287}
288
289static PyObject *
290Pdata_poplist(Pdata *self, Py_ssize_t start)
291{
292 PyObject *list;
293 Py_ssize_t len, i, j;
294
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000295 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000296 list = PyList_New(len);
297 if (list == NULL)
298 return NULL;
299 for (i = start, j = 0; j < len; i++, j++)
300 PyList_SET_ITEM(list, j, self->data[i]);
301
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000302 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000303 return list;
304}
305
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000306typedef struct {
307 PyObject *me_key;
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200308 Py_ssize_t me_value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000309} PyMemoEntry;
310
311typedef struct {
312 Py_ssize_t mt_mask;
313 Py_ssize_t mt_used;
314 Py_ssize_t mt_allocated;
315 PyMemoEntry *mt_table;
316} PyMemoTable;
317
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000318typedef struct PicklerObject {
319 PyObject_HEAD
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000320 PyMemoTable *memo; /* Memo table, keep track of the seen
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000321 objects to support self-referential objects
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000322 pickling. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000323 PyObject *pers_func; /* persistent_id() method, can be NULL */
Antoine Pitrou8d3c2902012-03-04 18:31:48 +0100324 PyObject *dispatch_table; /* private dispatch_table, can be NULL */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000325 PyObject *arg;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000326
327 PyObject *write; /* write() method of the output stream. */
328 PyObject *output_buffer; /* Write into a local bytearray buffer before
329 flushing to the stream. */
330 Py_ssize_t output_len; /* Length of output_buffer. */
331 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000332 int proto; /* Pickle protocol number, >= 0 */
333 int bin; /* Boolean, true if proto > 0 */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200334 Py_ssize_t buf_size; /* Size of the current buffered pickle data */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000335 int fast; /* Enable fast mode if set to a true value.
336 The fast mode disable the usage of memo,
337 therefore speeding the pickling process by
338 not generating superfluous PUT opcodes. It
339 should not be used if with self-referential
340 objects. */
341 int fast_nesting;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000342 int fix_imports; /* Indicate whether Pickler should fix
343 the name of globals for Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000344 PyObject *fast_memo;
345} PicklerObject;
346
347typedef struct UnpicklerObject {
348 PyObject_HEAD
349 Pdata *stack; /* Pickle data stack, store unpickled objects. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000350
351 /* The unpickler memo is just an array of PyObject *s. Using a dict
352 is unnecessary, since the keys are contiguous ints. */
353 PyObject **memo;
354 Py_ssize_t memo_size;
355
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000356 PyObject *arg;
357 PyObject *pers_func; /* persistent_load() method, can be NULL. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000358
359 Py_buffer buffer;
360 char *input_buffer;
361 char *input_line;
362 Py_ssize_t input_len;
363 Py_ssize_t next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000364 Py_ssize_t prefetched_idx; /* index of first prefetched byte */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000365 PyObject *read; /* read() method of the input stream. */
366 PyObject *readline; /* readline() method of the input stream. */
Antoine Pitrou04248a82010-10-12 20:51:21 +0000367 PyObject *peek; /* peek() method of the input stream, or NULL */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000368
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000369 char *encoding; /* Name of the encoding to be used for
370 decoding strings pickled using Python
371 2.x. The default value is "ASCII" */
372 char *errors; /* Name of errors handling scheme to used when
373 decoding strings. The default value is
374 "strict". */
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -0500375 Py_ssize_t *marks; /* Mark stack, used for unpickling container
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000376 objects. */
377 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
378 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000379 int proto; /* Protocol of the pickle loaded. */
380 int fix_imports; /* Indicate whether Unpickler should fix
381 the name of globals pickled by Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000382} UnpicklerObject;
383
384/* Forward declarations */
385static int save(PicklerObject *, PyObject *, int);
386static int save_reduce(PicklerObject *, PyObject *, PyObject *);
387static PyTypeObject Pickler_Type;
388static PyTypeObject Unpickler_Type;
389
390
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000391/*************************************************************************
Serhiy Storchaka95949422013-08-27 19:40:23 +0300392 A custom hashtable mapping void* to Python ints. This is used by the pickler
393 for memoization. Using a custom hashtable rather than PyDict allows us to skip
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000394 a bunch of unnecessary object creation. This makes a huge performance
395 difference. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000396
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000397#define MT_MINSIZE 8
398#define PERTURB_SHIFT 5
399
400
401static PyMemoTable *
402PyMemoTable_New(void)
403{
404 PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
405 if (memo == NULL) {
406 PyErr_NoMemory();
407 return NULL;
408 }
409
410 memo->mt_used = 0;
411 memo->mt_allocated = MT_MINSIZE;
412 memo->mt_mask = MT_MINSIZE - 1;
413 memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
414 if (memo->mt_table == NULL) {
415 PyMem_FREE(memo);
416 PyErr_NoMemory();
417 return NULL;
418 }
419 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
420
421 return memo;
422}
423
424static PyMemoTable *
425PyMemoTable_Copy(PyMemoTable *self)
426{
427 Py_ssize_t i;
428 PyMemoTable *new = PyMemoTable_New();
429 if (new == NULL)
430 return NULL;
431
432 new->mt_used = self->mt_used;
433 new->mt_allocated = self->mt_allocated;
434 new->mt_mask = self->mt_mask;
435 /* The table we get from _New() is probably smaller than we wanted.
436 Free it and allocate one that's the right size. */
437 PyMem_FREE(new->mt_table);
438 new->mt_table = PyMem_MALLOC(self->mt_allocated * sizeof(PyMemoEntry));
439 if (new->mt_table == NULL) {
440 PyMem_FREE(new);
Victor Stinner42024562013-07-12 00:53:57 +0200441 PyErr_NoMemory();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000442 return NULL;
443 }
444 for (i = 0; i < self->mt_allocated; i++) {
445 Py_XINCREF(self->mt_table[i].me_key);
446 }
447 memcpy(new->mt_table, self->mt_table,
448 sizeof(PyMemoEntry) * self->mt_allocated);
449
450 return new;
451}
452
453static Py_ssize_t
454PyMemoTable_Size(PyMemoTable *self)
455{
456 return self->mt_used;
457}
458
459static int
460PyMemoTable_Clear(PyMemoTable *self)
461{
462 Py_ssize_t i = self->mt_allocated;
463
464 while (--i >= 0) {
465 Py_XDECREF(self->mt_table[i].me_key);
466 }
467 self->mt_used = 0;
468 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
469 return 0;
470}
471
472static void
473PyMemoTable_Del(PyMemoTable *self)
474{
475 if (self == NULL)
476 return;
477 PyMemoTable_Clear(self);
478
479 PyMem_FREE(self->mt_table);
480 PyMem_FREE(self);
481}
482
483/* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
484 can be considerably simpler than dictobject.c's lookdict(). */
485static PyMemoEntry *
486_PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
487{
488 size_t i;
489 size_t perturb;
490 size_t mask = (size_t)self->mt_mask;
491 PyMemoEntry *table = self->mt_table;
492 PyMemoEntry *entry;
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000493 Py_hash_t hash = (Py_hash_t)key >> 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000494
495 i = hash & mask;
496 entry = &table[i];
497 if (entry->me_key == NULL || entry->me_key == key)
498 return entry;
499
500 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
501 i = (i << 2) + i + perturb + 1;
502 entry = &table[i & mask];
503 if (entry->me_key == NULL || entry->me_key == key)
504 return entry;
505 }
506 assert(0); /* Never reached */
507 return NULL;
508}
509
510/* Returns -1 on failure, 0 on success. */
511static int
512_PyMemoTable_ResizeTable(PyMemoTable *self, Py_ssize_t min_size)
513{
514 PyMemoEntry *oldtable = NULL;
515 PyMemoEntry *oldentry, *newentry;
516 Py_ssize_t new_size = MT_MINSIZE;
517 Py_ssize_t to_process;
518
519 assert(min_size > 0);
520
521 /* Find the smallest valid table size >= min_size. */
522 while (new_size < min_size && new_size > 0)
523 new_size <<= 1;
524 if (new_size <= 0) {
525 PyErr_NoMemory();
526 return -1;
527 }
528 /* new_size needs to be a power of two. */
529 assert((new_size & (new_size - 1)) == 0);
530
531 /* Allocate new table. */
532 oldtable = self->mt_table;
533 self->mt_table = PyMem_MALLOC(new_size * sizeof(PyMemoEntry));
534 if (self->mt_table == NULL) {
Victor Stinner8ca72e22013-07-12 00:53:26 +0200535 self->mt_table = oldtable;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000536 PyErr_NoMemory();
537 return -1;
538 }
539 self->mt_allocated = new_size;
540 self->mt_mask = new_size - 1;
541 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
542
543 /* Copy entries from the old table. */
544 to_process = self->mt_used;
545 for (oldentry = oldtable; to_process > 0; oldentry++) {
546 if (oldentry->me_key != NULL) {
547 to_process--;
548 /* newentry is a pointer to a chunk of the new
549 mt_table, so we're setting the key:value pair
550 in-place. */
551 newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
552 newentry->me_key = oldentry->me_key;
553 newentry->me_value = oldentry->me_value;
554 }
555 }
556
557 /* Deallocate the old table. */
558 PyMem_FREE(oldtable);
559 return 0;
560}
561
562/* Returns NULL on failure, a pointer to the value otherwise. */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200563static Py_ssize_t *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000564PyMemoTable_Get(PyMemoTable *self, PyObject *key)
565{
566 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
567 if (entry->me_key == NULL)
568 return NULL;
569 return &entry->me_value;
570}
571
572/* Returns -1 on failure, 0 on success. */
573static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200574PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000575{
576 PyMemoEntry *entry;
577
578 assert(key != NULL);
579
580 entry = _PyMemoTable_Lookup(self, key);
581 if (entry->me_key != NULL) {
582 entry->me_value = value;
583 return 0;
584 }
585 Py_INCREF(key);
586 entry->me_key = key;
587 entry->me_value = value;
588 self->mt_used++;
589
590 /* If we added a key, we can safely resize. Otherwise just return!
591 * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
592 *
593 * Quadrupling the size improves average table sparseness
594 * (reducing collisions) at the cost of some memory. It also halves
595 * the number of expensive resize operations in a growing memo table.
596 *
597 * Very large memo tables (over 50K items) use doubling instead.
598 * This may help applications with severe memory constraints.
599 */
600 if (!(self->mt_used * 3 >= (self->mt_mask + 1) * 2))
601 return 0;
602 return _PyMemoTable_ResizeTable(self,
603 (self->mt_used > 50000 ? 2 : 4) * self->mt_used);
604}
605
606#undef MT_MINSIZE
607#undef PERTURB_SHIFT
608
609/*************************************************************************/
610
611/* Helpers for creating the argument tuple passed to functions. This has the
Victor Stinner121aab42011-09-29 23:40:53 +0200612 performance advantage of calling PyTuple_New() only once.
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000613
614 XXX(avassalotti): Inline directly in _Pickler_FastCall() and
615 _Unpickler_FastCall(). */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000616#define ARG_TUP(self, obj) do { \
617 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
618 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
619 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
620 } \
621 else { \
622 Py_DECREF((obj)); \
623 } \
624 } while (0)
625
626#define FREE_ARG_TUP(self) do { \
627 if ((self)->arg->ob_refcnt > 1) \
628 Py_CLEAR((self)->arg); \
629 } while (0)
630
631/* A temporary cleaner API for fast single argument function call.
632
633 XXX: Does caching the argument tuple provides any real performance benefits?
634
635 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
636 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
637 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
638 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
639 (i.e, call PyTuple_New() and store the returned value in an array), to save
640 one second (wall clock time). Either ways, the loading time a pickle stream
641 large enough to generate this number of calls would be massively
642 overwhelmed by other factors, like I/O throughput, the GC traversal and
643 object allocation overhead. So, I really doubt these functions provide any
644 real benefits.
645
646 On the other hand, oprofile reports that pickle spends a lot of time in
647 these functions. But, that is probably more related to the function call
648 overhead, than the argument tuple allocation.
649
650 XXX: And, what is the reference behavior of these? Steal, borrow? At first
651 glance, it seems to steal the reference of 'arg' and borrow the reference
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000652 of 'func'. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000653static PyObject *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000654_Pickler_FastCall(PicklerObject *self, PyObject *func, PyObject *arg)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000655{
656 PyObject *result = NULL;
657
658 ARG_TUP(self, arg);
659 if (self->arg) {
660 result = PyObject_Call(func, self->arg, NULL);
661 FREE_ARG_TUP(self);
662 }
663 return result;
664}
665
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000666static int
667_Pickler_ClearBuffer(PicklerObject *self)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000668{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000669 Py_CLEAR(self->output_buffer);
670 self->output_buffer =
671 PyBytes_FromStringAndSize(NULL, self->max_output_len);
672 if (self->output_buffer == NULL)
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000673 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000674 self->output_len = 0;
675 return 0;
676}
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000677
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000678static PyObject *
679_Pickler_GetString(PicklerObject *self)
680{
681 PyObject *output_buffer = self->output_buffer;
682
683 assert(self->output_buffer != NULL);
684 self->output_buffer = NULL;
685 /* Resize down to exact size */
686 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
687 return NULL;
688 return output_buffer;
689}
690
691static int
692_Pickler_FlushToFile(PicklerObject *self)
693{
694 PyObject *output, *result;
695
696 assert(self->write != NULL);
697
698 output = _Pickler_GetString(self);
699 if (output == NULL)
700 return -1;
701
702 result = _Pickler_FastCall(self, self->write, output);
703 Py_XDECREF(result);
704 return (result == NULL) ? -1 : 0;
705}
706
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200707static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000708_Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t n)
709{
710 Py_ssize_t i, required;
711 char *buffer;
712
713 assert(s != NULL);
714
715 required = self->output_len + n;
716 if (required > self->max_output_len) {
717 if (self->write != NULL && required > MAX_WRITE_BUF_SIZE) {
718 /* XXX This reallocates a new buffer every time, which is a bit
719 wasteful. */
720 if (_Pickler_FlushToFile(self) < 0)
721 return -1;
722 if (_Pickler_ClearBuffer(self) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000723 return -1;
724 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000725 if (self->write != NULL && n > MAX_WRITE_BUF_SIZE) {
726 /* we already flushed above, so the buffer is empty */
727 PyObject *result;
728 /* XXX we could spare an intermediate copy and pass
729 a memoryview instead */
730 PyObject *output = PyBytes_FromStringAndSize(s, n);
731 if (s == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000732 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000733 result = _Pickler_FastCall(self, self->write, output);
734 Py_XDECREF(result);
735 return (result == NULL) ? -1 : 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000736 }
737 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000738 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
739 PyErr_NoMemory();
740 return -1;
741 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200742 self->max_output_len = (self->output_len + n) / 2 * 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000743 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
744 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000745 }
746 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000747 buffer = PyBytes_AS_STRING(self->output_buffer);
748 if (n < 8) {
749 /* This is faster than memcpy when the string is short. */
750 for (i = 0; i < n; i++) {
751 buffer[self->output_len + i] = s[i];
752 }
753 }
754 else {
755 memcpy(buffer + self->output_len, s, n);
756 }
757 self->output_len += n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000758 return n;
759}
760
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000761static PicklerObject *
762_Pickler_New(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000763{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000764 PicklerObject *self;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000765
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000766 self = PyObject_GC_New(PicklerObject, &Pickler_Type);
767 if (self == NULL)
768 return NULL;
769
770 self->pers_func = NULL;
Antoine Pitrou8d3c2902012-03-04 18:31:48 +0100771 self->dispatch_table = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000772 self->arg = NULL;
773 self->write = NULL;
774 self->proto = 0;
775 self->bin = 0;
776 self->fast = 0;
777 self->fast_nesting = 0;
778 self->fix_imports = 0;
779 self->fast_memo = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000780 self->max_output_len = WRITE_BUF_SIZE;
781 self->output_len = 0;
Victor Stinner68c8ea22013-07-11 22:56:25 +0200782
783 self->memo = PyMemoTable_New();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000784 self->output_buffer = PyBytes_FromStringAndSize(NULL,
785 self->max_output_len);
Victor Stinner68c8ea22013-07-11 22:56:25 +0200786
787 if (self->memo == NULL || self->output_buffer == NULL) {
Victor Stinnerc31df042013-07-12 00:08:59 +0200788 Py_DECREF(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000789 return NULL;
790 }
791 return self;
792}
793
794static int
795_Pickler_SetProtocol(PicklerObject *self, PyObject *proto_obj,
796 PyObject *fix_imports_obj)
797{
798 long proto = 0;
799 int fix_imports;
800
801 if (proto_obj == NULL || proto_obj == Py_None)
802 proto = DEFAULT_PROTOCOL;
803 else {
804 proto = PyLong_AsLong(proto_obj);
805 if (proto == -1 && PyErr_Occurred())
806 return -1;
807 }
808 if (proto < 0)
809 proto = HIGHEST_PROTOCOL;
810 if (proto > HIGHEST_PROTOCOL) {
811 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
812 HIGHEST_PROTOCOL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000813 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000814 }
815 fix_imports = PyObject_IsTrue(fix_imports_obj);
816 if (fix_imports == -1)
817 return -1;
Victor Stinner121aab42011-09-29 23:40:53 +0200818
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000819 self->proto = proto;
820 self->bin = proto > 0;
821 self->fix_imports = fix_imports && proto < 3;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000822
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000823 return 0;
824}
825
826/* Returns -1 (with an exception set) on failure, 0 on success. This may
827 be called once on a freshly created Pickler. */
828static int
829_Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
830{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200831 _Py_IDENTIFIER(write);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000832 assert(file != NULL);
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +0200833 self->write = _PyObject_GetAttrId(file, &PyId_write);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000834 if (self->write == NULL) {
835 if (PyErr_ExceptionMatches(PyExc_AttributeError))
836 PyErr_SetString(PyExc_TypeError,
837 "file must have a 'write' attribute");
838 return -1;
839 }
840
841 return 0;
842}
843
844/* See documentation for _Pickler_FastCall(). */
845static PyObject *
846_Unpickler_FastCall(UnpicklerObject *self, PyObject *func, PyObject *arg)
847{
848 PyObject *result = NULL;
849
850 ARG_TUP(self, arg);
851 if (self->arg) {
852 result = PyObject_Call(func, self->arg, NULL);
853 FREE_ARG_TUP(self);
854 }
855 return result;
856}
857
858/* Returns the size of the input on success, -1 on failure. This takes its
859 own reference to `input`. */
860static Py_ssize_t
861_Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
862{
863 if (self->buffer.buf != NULL)
864 PyBuffer_Release(&self->buffer);
865 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
866 return -1;
867 self->input_buffer = self->buffer.buf;
868 self->input_len = self->buffer.len;
869 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000870 self->prefetched_idx = self->input_len;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000871 return self->input_len;
872}
873
Antoine Pitrou04248a82010-10-12 20:51:21 +0000874static int
875_Unpickler_SkipConsumed(UnpicklerObject *self)
876{
Victor Stinnerb43ad1d2013-10-31 13:38:42 +0100877 Py_ssize_t consumed;
878 PyObject *r;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000879
Victor Stinnerb43ad1d2013-10-31 13:38:42 +0100880 consumed = self->next_read_idx - self->prefetched_idx;
881 if (consumed <= 0)
882 return 0;
883
884 assert(self->peek); /* otherwise we did something wrong */
885 /* This makes an useless copy... */
886 r = PyObject_CallFunction(self->read, "n", consumed);
887 if (r == NULL)
888 return -1;
889 Py_DECREF(r);
890
891 self->prefetched_idx = self->next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000892 return 0;
893}
894
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000895static const Py_ssize_t READ_WHOLE_LINE = -1;
896
897/* If reading from a file, we need to only pull the bytes we need, since there
898 may be multiple pickle objects arranged contiguously in the same input
899 buffer.
900
901 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
902 bytes from the input stream/buffer.
903
904 Update the unpickler's input buffer with the newly-read data. Returns -1 on
905 failure; on success, returns the number of bytes read from the file.
906
907 On success, self->input_len will be 0; this is intentional so that when
908 unpickling from a file, the "we've run out of data" code paths will trigger,
909 causing the Unpickler to go back to the file for more data. Use the returned
910 size to tell you how much data you can process. */
911static Py_ssize_t
912_Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
913{
914 PyObject *data;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000915 Py_ssize_t read_size, prefetched_size = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000916
917 assert(self->read != NULL);
Victor Stinner121aab42011-09-29 23:40:53 +0200918
Antoine Pitrou04248a82010-10-12 20:51:21 +0000919 if (_Unpickler_SkipConsumed(self) < 0)
920 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000921
922 if (n == READ_WHOLE_LINE)
923 data = PyObject_Call(self->readline, empty_tuple, NULL);
924 else {
925 PyObject *len = PyLong_FromSsize_t(n);
926 if (len == NULL)
927 return -1;
928 data = _Unpickler_FastCall(self, self->read, len);
929 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000930 if (data == NULL)
931 return -1;
932
Antoine Pitrou04248a82010-10-12 20:51:21 +0000933 /* Prefetch some data without advancing the file pointer, if possible */
934 if (self->peek) {
935 PyObject *len, *prefetched;
936 len = PyLong_FromSsize_t(PREFETCH);
937 if (len == NULL) {
938 Py_DECREF(data);
939 return -1;
940 }
941 prefetched = _Unpickler_FastCall(self, self->peek, len);
942 if (prefetched == NULL) {
943 if (PyErr_ExceptionMatches(PyExc_NotImplementedError)) {
944 /* peek() is probably not supported by the given file object */
945 PyErr_Clear();
946 Py_CLEAR(self->peek);
947 }
948 else {
949 Py_DECREF(data);
950 return -1;
951 }
952 }
953 else {
954 assert(PyBytes_Check(prefetched));
955 prefetched_size = PyBytes_GET_SIZE(prefetched);
956 PyBytes_ConcatAndDel(&data, prefetched);
957 if (data == NULL)
958 return -1;
959 }
960 }
961
962 read_size = _Unpickler_SetStringInput(self, data) - prefetched_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000963 Py_DECREF(data);
Antoine Pitrou04248a82010-10-12 20:51:21 +0000964 self->prefetched_idx = read_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000965 return read_size;
966}
967
968/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
969
970 This should be used for all data reads, rather than accessing the unpickler's
971 input buffer directly. This method deals correctly with reading from input
972 streams, which the input buffer doesn't deal with.
973
974 Note that when reading from a file-like object, self->next_read_idx won't
975 be updated (it should remain at 0 for the entire unpickling process). You
976 should use this function's return value to know how many bytes you can
977 consume.
978
979 Returns -1 (with an exception set) on failure. On success, return the
980 number of chars read. */
981static Py_ssize_t
982_Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n)
983{
Antoine Pitrou04248a82010-10-12 20:51:21 +0000984 Py_ssize_t num_read;
985
Antoine Pitrou04248a82010-10-12 20:51:21 +0000986 if (self->next_read_idx + n <= self->input_len) {
987 *s = self->input_buffer + self->next_read_idx;
988 self->next_read_idx += n;
989 return n;
990 }
991 if (!self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000992 PyErr_Format(PyExc_EOFError, "Ran out of input");
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000993 return -1;
994 }
Antoine Pitrou04248a82010-10-12 20:51:21 +0000995 num_read = _Unpickler_ReadFromFile(self, n);
996 if (num_read < 0)
997 return -1;
998 if (num_read < n) {
999 PyErr_Format(PyExc_EOFError, "Ran out of input");
1000 return -1;
1001 }
1002 *s = self->input_buffer;
1003 self->next_read_idx = n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001004 return n;
1005}
1006
1007static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001008_Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1009 char **result)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001010{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001011 char *input_line = PyMem_Realloc(self->input_line, len + 1);
Victor Stinner42024562013-07-12 00:53:57 +02001012 if (input_line == NULL) {
1013 PyErr_NoMemory();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001014 return -1;
Victor Stinner42024562013-07-12 00:53:57 +02001015 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001016
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001017 memcpy(input_line, line, len);
1018 input_line[len] = '\0';
1019 self->input_line = input_line;
1020 *result = self->input_line;
1021 return len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001022}
1023
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001024/* Read a line from the input stream/buffer. If we run off the end of the input
1025 before hitting \n, return the data we found.
1026
1027 Returns the number of chars read, or -1 on failure. */
1028static Py_ssize_t
1029_Unpickler_Readline(UnpicklerObject *self, char **result)
1030{
1031 Py_ssize_t i, num_read;
1032
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001033 for (i = self->next_read_idx; i < self->input_len; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001034 if (self->input_buffer[i] == '\n') {
1035 char *line_start = self->input_buffer + self->next_read_idx;
1036 num_read = i - self->next_read_idx + 1;
1037 self->next_read_idx = i + 1;
1038 return _Unpickler_CopyLine(self, line_start, num_read, result);
1039 }
1040 }
1041 if (self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001042 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1043 if (num_read < 0)
1044 return -1;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001045 self->next_read_idx = num_read;
Antoine Pitrouf6c7a852011-08-11 21:04:02 +02001046 return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001047 }
Victor Stinner121aab42011-09-29 23:40:53 +02001048
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001049 /* If we get here, we've run off the end of the input string. Return the
1050 remaining string and let the caller figure it out. */
1051 *result = self->input_buffer + self->next_read_idx;
1052 num_read = i - self->next_read_idx;
1053 self->next_read_idx = i;
1054 return num_read;
1055}
1056
1057/* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1058 will be modified in place. */
1059static int
1060_Unpickler_ResizeMemoList(UnpicklerObject *self, Py_ssize_t new_size)
1061{
1062 Py_ssize_t i;
1063 PyObject **memo;
1064
1065 assert(new_size > self->memo_size);
1066
1067 memo = PyMem_REALLOC(self->memo, new_size * sizeof(PyObject *));
1068 if (memo == NULL) {
1069 PyErr_NoMemory();
1070 return -1;
1071 }
1072 self->memo = memo;
1073 for (i = self->memo_size; i < new_size; i++)
1074 self->memo[i] = NULL;
1075 self->memo_size = new_size;
1076 return 0;
1077}
1078
1079/* Returns NULL if idx is out of bounds. */
1080static PyObject *
1081_Unpickler_MemoGet(UnpicklerObject *self, Py_ssize_t idx)
1082{
1083 if (idx < 0 || idx >= self->memo_size)
1084 return NULL;
1085
1086 return self->memo[idx];
1087}
1088
1089/* Returns -1 (with an exception set) on failure, 0 on success.
1090 This takes its own reference to `value`. */
1091static int
1092_Unpickler_MemoPut(UnpicklerObject *self, Py_ssize_t idx, PyObject *value)
1093{
1094 PyObject *old_item;
1095
1096 if (idx >= self->memo_size) {
1097 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1098 return -1;
1099 assert(idx < self->memo_size);
1100 }
1101 Py_INCREF(value);
1102 old_item = self->memo[idx];
1103 self->memo[idx] = value;
1104 Py_XDECREF(old_item);
1105 return 0;
1106}
1107
1108static PyObject **
1109_Unpickler_NewMemo(Py_ssize_t new_size)
1110{
1111 PyObject **memo = PyMem_MALLOC(new_size * sizeof(PyObject *));
Victor Stinner42024562013-07-12 00:53:57 +02001112 if (memo == NULL) {
1113 PyErr_NoMemory();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001114 return NULL;
Victor Stinner42024562013-07-12 00:53:57 +02001115 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001116 memset(memo, 0, new_size * sizeof(PyObject *));
1117 return memo;
1118}
1119
1120/* Free the unpickler's memo, taking care to decref any items left in it. */
1121static void
1122_Unpickler_MemoCleanup(UnpicklerObject *self)
1123{
1124 Py_ssize_t i;
1125 PyObject **memo = self->memo;
1126
1127 if (self->memo == NULL)
1128 return;
1129 self->memo = NULL;
1130 i = self->memo_size;
1131 while (--i >= 0) {
1132 Py_XDECREF(memo[i]);
1133 }
1134 PyMem_FREE(memo);
1135}
1136
1137static UnpicklerObject *
1138_Unpickler_New(void)
1139{
1140 UnpicklerObject *self;
1141
1142 self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1143 if (self == NULL)
1144 return NULL;
1145
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001146 self->arg = NULL;
1147 self->pers_func = NULL;
1148 self->input_buffer = NULL;
1149 self->input_line = NULL;
1150 self->input_len = 0;
1151 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001152 self->prefetched_idx = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001153 self->read = NULL;
1154 self->readline = NULL;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001155 self->peek = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001156 self->encoding = NULL;
1157 self->errors = NULL;
1158 self->marks = NULL;
1159 self->num_marks = 0;
1160 self->marks_size = 0;
1161 self->proto = 0;
1162 self->fix_imports = 0;
Victor Stinner68c8ea22013-07-11 22:56:25 +02001163 memset(&self->buffer, 0, sizeof(Py_buffer));
1164 self->memo_size = 32;
1165 self->memo = _Unpickler_NewMemo(self->memo_size);
1166 self->stack = (Pdata *)Pdata_New();
1167
1168 if (self->memo == NULL || self->stack == NULL) {
1169 Py_DECREF(self);
1170 return NULL;
1171 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001172
1173 return self;
1174}
1175
1176/* Returns -1 (with an exception set) on failure, 0 on success. This may
1177 be called once on a freshly created Pickler. */
1178static int
1179_Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1180{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001181 _Py_IDENTIFIER(peek);
1182 _Py_IDENTIFIER(read);
1183 _Py_IDENTIFIER(readline);
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001184
1185 self->peek = _PyObject_GetAttrId(file, &PyId_peek);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001186 if (self->peek == NULL) {
1187 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1188 PyErr_Clear();
1189 else
1190 return -1;
1191 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001192 self->read = _PyObject_GetAttrId(file, &PyId_read);
1193 self->readline = _PyObject_GetAttrId(file, &PyId_readline);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001194 if (self->readline == NULL || self->read == NULL) {
1195 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1196 PyErr_SetString(PyExc_TypeError,
1197 "file must have 'read' and 'readline' attributes");
1198 Py_CLEAR(self->read);
1199 Py_CLEAR(self->readline);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001200 Py_CLEAR(self->peek);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001201 return -1;
1202 }
1203 return 0;
1204}
1205
1206/* Returns -1 (with an exception set) on failure, 0 on success. This may
1207 be called once on a freshly created Pickler. */
1208static int
1209_Unpickler_SetInputEncoding(UnpicklerObject *self,
1210 const char *encoding,
1211 const char *errors)
1212{
1213 if (encoding == NULL)
1214 encoding = "ASCII";
1215 if (errors == NULL)
1216 errors = "strict";
1217
Victor Stinner49fc8ec2013-07-07 23:30:24 +02001218 self->encoding = _PyMem_Strdup(encoding);
1219 self->errors = _PyMem_Strdup(errors);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001220 if (self->encoding == NULL || self->errors == NULL) {
1221 PyErr_NoMemory();
1222 return -1;
1223 }
1224 return 0;
1225}
1226
1227/* Generate a GET opcode for an object stored in the memo. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001228static int
1229memo_get(PicklerObject *self, PyObject *key)
1230{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001231 Py_ssize_t *value;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001232 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001233 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001234
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001235 value = PyMemoTable_Get(self->memo, key);
1236 if (value == NULL) {
1237 PyErr_SetObject(PyExc_KeyError, key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001238 return -1;
1239 }
1240
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001241 if (!self->bin) {
1242 pdata[0] = GET;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001243 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1244 "%" PY_FORMAT_SIZE_T "d\n", *value);
1245 len = strlen(pdata);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001246 }
1247 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001248 if (*value < 256) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001249 pdata[0] = BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001250 pdata[1] = (unsigned char)(*value & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001251 len = 2;
1252 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001253 else if (*value <= 0xffffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001254 pdata[0] = LONG_BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001255 pdata[1] = (unsigned char)(*value & 0xff);
1256 pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1257 pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1258 pdata[4] = (unsigned char)((*value >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001259 len = 5;
1260 }
1261 else { /* unlikely */
1262 PyErr_SetString(PicklingError,
1263 "memo id too large for LONG_BINGET");
1264 return -1;
1265 }
1266 }
1267
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001268 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001269 return -1;
1270
1271 return 0;
1272}
1273
1274/* Store an object in the memo, assign it a new unique ID based on the number
1275 of objects currently stored in the memo and generate a PUT opcode. */
1276static int
1277memo_put(PicklerObject *self, PyObject *obj)
1278{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001279 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001280 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001281 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001282 int status = 0;
1283
1284 if (self->fast)
1285 return 0;
1286
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001287 x = PyMemoTable_Size(self->memo);
1288 if (PyMemoTable_Set(self->memo, obj, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001289 goto error;
1290
1291 if (!self->bin) {
1292 pdata[0] = PUT;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001293 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1294 "%" PY_FORMAT_SIZE_T "d\n", x);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001295 len = strlen(pdata);
1296 }
1297 else {
1298 if (x < 256) {
1299 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00001300 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001301 len = 2;
1302 }
1303 else if (x <= 0xffffffffL) {
1304 pdata[0] = LONG_BINPUT;
1305 pdata[1] = (unsigned char)(x & 0xff);
1306 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1307 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1308 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1309 len = 5;
1310 }
1311 else { /* unlikely */
1312 PyErr_SetString(PicklingError,
1313 "memo id too large for LONG_BINPUT");
1314 return -1;
1315 }
1316 }
1317
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001318 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001319 goto error;
1320
1321 if (0) {
1322 error:
1323 status = -1;
1324 }
1325
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001326 return status;
1327}
1328
1329static PyObject *
1330whichmodule(PyObject *global, PyObject *global_name)
1331{
1332 Py_ssize_t i, j;
1333 static PyObject *module_str = NULL;
1334 static PyObject *main_str = NULL;
1335 PyObject *module_name;
1336 PyObject *modules_dict;
1337 PyObject *module;
1338 PyObject *obj;
1339
1340 if (module_str == NULL) {
1341 module_str = PyUnicode_InternFromString("__module__");
1342 if (module_str == NULL)
1343 return NULL;
1344 main_str = PyUnicode_InternFromString("__main__");
1345 if (main_str == NULL)
1346 return NULL;
1347 }
1348
1349 module_name = PyObject_GetAttr(global, module_str);
1350
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00001351 /* In some rare cases (e.g., bound methods of extension types),
1352 __module__ can be None. If it is so, then search sys.modules
1353 for the module of global. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001354 if (module_name == Py_None) {
1355 Py_DECREF(module_name);
1356 goto search;
1357 }
1358
1359 if (module_name) {
1360 return module_name;
1361 }
1362 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1363 PyErr_Clear();
1364 else
1365 return NULL;
1366
1367 search:
Victor Stinnerbb520202013-11-06 22:40:41 +01001368 modules_dict = _PySys_GetObjectId(&PyId_modules);
Victor Stinner1e53bba2013-07-16 22:26:05 +02001369 if (modules_dict == NULL) {
1370 PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001371 return NULL;
Victor Stinner1e53bba2013-07-16 22:26:05 +02001372 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001373
1374 i = 0;
1375 module_name = NULL;
1376 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
Mark Dickinson211c6252009-02-01 10:28:51 +00001377 if (PyObject_RichCompareBool(module_name, main_str, Py_EQ) == 1)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001378 continue;
1379
1380 obj = PyObject_GetAttr(module, global_name);
1381 if (obj == NULL) {
1382 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1383 PyErr_Clear();
1384 else
1385 return NULL;
1386 continue;
1387 }
1388
1389 if (obj != global) {
1390 Py_DECREF(obj);
1391 continue;
1392 }
1393
1394 Py_DECREF(obj);
1395 break;
1396 }
1397
1398 /* If no module is found, use __main__. */
1399 if (!j) {
1400 module_name = main_str;
1401 }
1402
1403 Py_INCREF(module_name);
1404 return module_name;
1405}
1406
1407/* fast_save_enter() and fast_save_leave() are guards against recursive
1408 objects when Pickler is used with the "fast mode" (i.e., with object
1409 memoization disabled). If the nesting of a list or dict object exceed
1410 FAST_NESTING_LIMIT, these guards will start keeping an internal
1411 reference to the seen list or dict objects and check whether these objects
1412 are recursive. These are not strictly necessary, since save() has a
1413 hard-coded recursion limit, but they give a nicer error message than the
1414 typical RuntimeError. */
1415static int
1416fast_save_enter(PicklerObject *self, PyObject *obj)
1417{
1418 /* if fast_nesting < 0, we're doing an error exit. */
1419 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1420 PyObject *key = NULL;
1421 if (self->fast_memo == NULL) {
1422 self->fast_memo = PyDict_New();
1423 if (self->fast_memo == NULL) {
1424 self->fast_nesting = -1;
1425 return 0;
1426 }
1427 }
1428 key = PyLong_FromVoidPtr(obj);
1429 if (key == NULL)
1430 return 0;
1431 if (PyDict_GetItem(self->fast_memo, key)) {
1432 Py_DECREF(key);
1433 PyErr_Format(PyExc_ValueError,
1434 "fast mode: can't pickle cyclic objects "
1435 "including object type %.200s at %p",
1436 obj->ob_type->tp_name, obj);
1437 self->fast_nesting = -1;
1438 return 0;
1439 }
1440 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
1441 Py_DECREF(key);
1442 self->fast_nesting = -1;
1443 return 0;
1444 }
1445 Py_DECREF(key);
1446 }
1447 return 1;
1448}
1449
1450static int
1451fast_save_leave(PicklerObject *self, PyObject *obj)
1452{
1453 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
1454 PyObject *key = PyLong_FromVoidPtr(obj);
1455 if (key == NULL)
1456 return 0;
1457 if (PyDict_DelItem(self->fast_memo, key) < 0) {
1458 Py_DECREF(key);
1459 return 0;
1460 }
1461 Py_DECREF(key);
1462 }
1463 return 1;
1464}
1465
1466static int
1467save_none(PicklerObject *self, PyObject *obj)
1468{
1469 const char none_op = NONE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001470 if (_Pickler_Write(self, &none_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001471 return -1;
1472
1473 return 0;
1474}
1475
1476static int
1477save_bool(PicklerObject *self, PyObject *obj)
1478{
1479 static const char *buf[2] = { FALSE, TRUE };
1480 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
1481 int p = (obj == Py_True);
1482
1483 if (self->proto >= 2) {
1484 const char bool_op = p ? NEWTRUE : NEWFALSE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001485 if (_Pickler_Write(self, &bool_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001486 return -1;
1487 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001488 else if (_Pickler_Write(self, buf[p], len[p]) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001489 return -1;
1490
1491 return 0;
1492}
1493
1494static int
1495save_int(PicklerObject *self, long x)
1496{
1497 char pdata[32];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001498 Py_ssize_t len = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001499
1500 if (!self->bin
1501#if SIZEOF_LONG > 4
1502 || x > 0x7fffffffL || x < -0x80000000L
1503#endif
1504 ) {
1505 /* Text-mode pickle, or long too big to fit in the 4-byte
1506 * signed BININT format: store as a string.
1507 */
Mark Dickinson8dd05142009-01-20 20:43:58 +00001508 pdata[0] = LONG; /* use LONG for consistency with pickle.py */
1509 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ldL\n", x);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001510 if (_Pickler_Write(self, pdata, strlen(pdata)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001511 return -1;
1512 }
1513 else {
1514 /* Binary pickle and x fits in a signed 4-byte int. */
1515 pdata[1] = (unsigned char)(x & 0xff);
1516 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1517 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1518 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1519
1520 if ((pdata[4] == 0) && (pdata[3] == 0)) {
1521 if (pdata[2] == 0) {
1522 pdata[0] = BININT1;
1523 len = 2;
1524 }
1525 else {
1526 pdata[0] = BININT2;
1527 len = 3;
1528 }
1529 }
1530 else {
1531 pdata[0] = BININT;
1532 len = 5;
1533 }
1534
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001535 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001536 return -1;
1537 }
1538
1539 return 0;
1540}
1541
1542static int
1543save_long(PicklerObject *self, PyObject *obj)
1544{
1545 PyObject *repr = NULL;
1546 Py_ssize_t size;
1547 long val = PyLong_AsLong(obj);
1548 int status = 0;
1549
1550 const char long_op = LONG;
1551
1552 if (val == -1 && PyErr_Occurred()) {
1553 /* out of range for int pickling */
1554 PyErr_Clear();
1555 }
Antoine Pitroue58bffb2011-08-13 20:40:32 +02001556 else
1557#if SIZEOF_LONG > 4
1558 if (val <= 0x7fffffffL && val >= -0x80000000L)
1559#endif
1560 return save_int(self, val);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001561
1562 if (self->proto >= 2) {
1563 /* Linear-time pickling. */
1564 size_t nbits;
1565 size_t nbytes;
1566 unsigned char *pdata;
1567 char header[5];
1568 int i;
1569 int sign = _PyLong_Sign(obj);
1570
1571 if (sign == 0) {
1572 header[0] = LONG1;
1573 header[1] = 0; /* It's 0 -- an empty bytestring. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001574 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001575 goto error;
1576 return 0;
1577 }
1578 nbits = _PyLong_NumBits(obj);
1579 if (nbits == (size_t)-1 && PyErr_Occurred())
1580 goto error;
1581 /* How many bytes do we need? There are nbits >> 3 full
1582 * bytes of data, and nbits & 7 leftover bits. If there
1583 * are any leftover bits, then we clearly need another
1584 * byte. Wnat's not so obvious is that we *probably*
1585 * need another byte even if there aren't any leftovers:
1586 * the most-significant bit of the most-significant byte
1587 * acts like a sign bit, and it's usually got a sense
Serhiy Storchaka95949422013-08-27 19:40:23 +03001588 * opposite of the one we need. The exception is ints
1589 * of the form -(2**(8*j-1)) for j > 0. Such an int is
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001590 * its own 256's-complement, so has the right sign bit
1591 * even without the extra byte. That's a pain to check
1592 * for in advance, though, so we always grab an extra
1593 * byte at the start, and cut it back later if possible.
1594 */
1595 nbytes = (nbits >> 3) + 1;
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01001596 if (nbytes > 0x7fffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001597 PyErr_SetString(PyExc_OverflowError,
Serhiy Storchaka95949422013-08-27 19:40:23 +03001598 "int too large to pickle");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001599 goto error;
1600 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001601 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001602 if (repr == NULL)
1603 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001604 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001605 i = _PyLong_AsByteArray((PyLongObject *)obj,
1606 pdata, nbytes,
1607 1 /* little endian */ , 1 /* signed */ );
1608 if (i < 0)
1609 goto error;
Serhiy Storchaka95949422013-08-27 19:40:23 +03001610 /* If the int is negative, this may be a byte more than
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001611 * needed. This is so iff the MSB is all redundant sign
1612 * bits.
1613 */
1614 if (sign < 0 &&
Victor Stinner121aab42011-09-29 23:40:53 +02001615 nbytes > 1 &&
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001616 pdata[nbytes - 1] == 0xff &&
1617 (pdata[nbytes - 2] & 0x80) != 0) {
1618 nbytes--;
1619 }
1620
1621 if (nbytes < 256) {
1622 header[0] = LONG1;
1623 header[1] = (unsigned char)nbytes;
1624 size = 2;
1625 }
1626 else {
1627 header[0] = LONG4;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001628 size = (Py_ssize_t) nbytes;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001629 for (i = 1; i < 5; i++) {
1630 header[i] = (unsigned char)(size & 0xff);
1631 size >>= 8;
1632 }
1633 size = 5;
1634 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001635 if (_Pickler_Write(self, header, size) < 0 ||
1636 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001637 goto error;
1638 }
1639 else {
1640 char *string;
1641
Mark Dickinson8dd05142009-01-20 20:43:58 +00001642 /* proto < 2: write the repr and newline. This is quadratic-time (in
1643 the number of digits), in both directions. We add a trailing 'L'
1644 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001645
1646 repr = PyObject_Repr(obj);
1647 if (repr == NULL)
1648 goto error;
1649
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001650 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001651 if (string == NULL)
1652 goto error;
1653
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001654 if (_Pickler_Write(self, &long_op, 1) < 0 ||
1655 _Pickler_Write(self, string, size) < 0 ||
1656 _Pickler_Write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001657 goto error;
1658 }
1659
1660 if (0) {
1661 error:
1662 status = -1;
1663 }
1664 Py_XDECREF(repr);
1665
1666 return status;
1667}
1668
1669static int
1670save_float(PicklerObject *self, PyObject *obj)
1671{
1672 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1673
1674 if (self->bin) {
1675 char pdata[9];
1676 pdata[0] = BINFLOAT;
1677 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1678 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001679 if (_Pickler_Write(self, pdata, 9) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001680 return -1;
Victor Stinner121aab42011-09-29 23:40:53 +02001681 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001682 else {
Eric Smith0923d1d2009-04-16 20:16:10 +00001683 int result = -1;
1684 char *buf = NULL;
1685 char op = FLOAT;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001686
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001687 if (_Pickler_Write(self, &op, 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001688 goto done;
1689
Mark Dickinson3e09f432009-04-17 08:41:23 +00001690 buf = PyOS_double_to_string(x, 'g', 17, 0, NULL);
Eric Smith0923d1d2009-04-16 20:16:10 +00001691 if (!buf) {
1692 PyErr_NoMemory();
1693 goto done;
1694 }
1695
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001696 if (_Pickler_Write(self, buf, strlen(buf)) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001697 goto done;
1698
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001699 if (_Pickler_Write(self, "\n", 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001700 goto done;
1701
1702 result = 0;
1703done:
1704 PyMem_Free(buf);
1705 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001706 }
1707
1708 return 0;
1709}
1710
1711static int
1712save_bytes(PicklerObject *self, PyObject *obj)
1713{
1714 if (self->proto < 3) {
1715 /* Older pickle protocols do not have an opcode for pickling bytes
1716 objects. Therefore, we need to fake the copy protocol (i.e.,
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001717 the __reduce__ method) to permit bytes object unpickling.
1718
1719 Here we use a hack to be compatible with Python 2. Since in Python
1720 2 'bytes' is just an alias for 'str' (which has different
1721 parameters than the actual bytes object), we use codecs.encode
1722 to create the appropriate 'str' object when unpickled using
1723 Python 2 *and* the appropriate 'bytes' object when unpickled
1724 using Python 3. Again this is a hack and we don't need to do this
1725 with newer protocols. */
1726 static PyObject *codecs_encode = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001727 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001728 int status;
1729
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001730 if (codecs_encode == NULL) {
1731 PyObject *codecs_module = PyImport_ImportModule("codecs");
1732 if (codecs_module == NULL) {
1733 return -1;
1734 }
1735 codecs_encode = PyObject_GetAttrString(codecs_module, "encode");
1736 Py_DECREF(codecs_module);
1737 if (codecs_encode == NULL) {
1738 return -1;
1739 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001740 }
1741
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001742 if (PyBytes_GET_SIZE(obj) == 0) {
1743 reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
1744 }
1745 else {
1746 static PyObject *latin1 = NULL;
1747 PyObject *unicode_str =
1748 PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
1749 PyBytes_GET_SIZE(obj),
1750 "strict");
1751 if (unicode_str == NULL)
1752 return -1;
1753 if (latin1 == NULL) {
1754 latin1 = PyUnicode_InternFromString("latin1");
Christian Heimes82e6b942013-06-29 21:37:34 +02001755 if (latin1 == NULL) {
1756 Py_DECREF(unicode_str);
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001757 return -1;
Christian Heimes82e6b942013-06-29 21:37:34 +02001758 }
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001759 }
1760 reduce_value = Py_BuildValue("(O(OO))",
1761 codecs_encode, unicode_str, latin1);
1762 Py_DECREF(unicode_str);
1763 }
1764
1765 if (reduce_value == NULL)
1766 return -1;
1767
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001768 /* save_reduce() will memoize the object automatically. */
1769 status = save_reduce(self, reduce_value, obj);
1770 Py_DECREF(reduce_value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001771 return status;
1772 }
1773 else {
1774 Py_ssize_t size;
1775 char header[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001776 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001777
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001778 size = PyBytes_GET_SIZE(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001779 if (size < 0)
1780 return -1;
1781
1782 if (size < 256) {
1783 header[0] = SHORT_BINBYTES;
1784 header[1] = (unsigned char)size;
1785 len = 2;
1786 }
1787 else if (size <= 0xffffffffL) {
1788 header[0] = BINBYTES;
1789 header[1] = (unsigned char)(size & 0xff);
1790 header[2] = (unsigned char)((size >> 8) & 0xff);
1791 header[3] = (unsigned char)((size >> 16) & 0xff);
1792 header[4] = (unsigned char)((size >> 24) & 0xff);
1793 len = 5;
1794 }
1795 else {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001796 PyErr_SetString(PyExc_OverflowError,
Serhiy Storchakaf8def282013-02-16 17:29:56 +02001797 "cannot serialize a bytes object larger than 4 GiB");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001798 return -1; /* string too large */
1799 }
1800
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001801 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001802 return -1;
1803
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001804 if (_Pickler_Write(self, PyBytes_AS_STRING(obj), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001805 return -1;
1806
1807 if (memo_put(self, obj) < 0)
1808 return -1;
1809
1810 return 0;
1811 }
1812}
1813
1814/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1815 backslash and newline characters to \uXXXX escapes. */
1816static PyObject *
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001817raw_unicode_escape(PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001818{
1819 PyObject *repr, *result;
1820 char *p;
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001821 Py_ssize_t i, size, expandsize;
1822 void *data;
1823 unsigned int kind;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001824
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001825 if (PyUnicode_READY(obj))
1826 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001827
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001828 size = PyUnicode_GET_LENGTH(obj);
1829 data = PyUnicode_DATA(obj);
1830 kind = PyUnicode_KIND(obj);
1831 if (kind == PyUnicode_4BYTE_KIND)
1832 expandsize = 10;
1833 else
1834 expandsize = 6;
Victor Stinner121aab42011-09-29 23:40:53 +02001835
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001836 if (size > PY_SSIZE_T_MAX / expandsize)
1837 return PyErr_NoMemory();
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001838 repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001839 if (repr == NULL)
1840 return NULL;
1841 if (size == 0)
1842 goto done;
1843
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001844 p = PyByteArray_AS_STRING(repr);
1845 for (i=0; i < size; i++) {
1846 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001847 /* Map 32-bit characters to '\Uxxxxxxxx' */
1848 if (ch >= 0x10000) {
1849 *p++ = '\\';
1850 *p++ = 'U';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001851 *p++ = Py_hexdigits[(ch >> 28) & 0xf];
1852 *p++ = Py_hexdigits[(ch >> 24) & 0xf];
1853 *p++ = Py_hexdigits[(ch >> 20) & 0xf];
1854 *p++ = Py_hexdigits[(ch >> 16) & 0xf];
1855 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
1856 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
1857 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
1858 *p++ = Py_hexdigits[ch & 15];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001859 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001860 /* Map 16-bit characters to '\uxxxx' */
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001861 else if (ch >= 256 || ch == '\\' || ch == '\n') {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001862 *p++ = '\\';
1863 *p++ = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001864 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
1865 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
1866 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
1867 *p++ = Py_hexdigits[ch & 15];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001868 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001869 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001870 else
1871 *p++ = (char) ch;
1872 }
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001873 size = p - PyByteArray_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001874
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001875done:
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001876 result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001877 Py_DECREF(repr);
1878 return result;
1879}
1880
1881static int
Antoine Pitrou299978d2013-04-07 17:38:11 +02001882write_utf8(PicklerObject *self, char *data, Py_ssize_t size)
1883{
1884 char pdata[5];
1885
1886#if SIZEOF_SIZE_T > 4
1887 if (size > 0xffffffffUL) {
1888 /* string too large */
1889 PyErr_SetString(PyExc_OverflowError,
Antoine Pitrou4b7b0f02013-04-07 23:46:52 +02001890 "cannot serialize a string larger than 4GiB");
Antoine Pitrou299978d2013-04-07 17:38:11 +02001891 return -1;
1892 }
1893#endif
1894
1895 pdata[0] = BINUNICODE;
1896 pdata[1] = (unsigned char)(size & 0xff);
1897 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1898 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1899 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1900
1901 if (_Pickler_Write(self, pdata, sizeof(pdata)) < 0)
1902 return -1;
1903
1904 if (_Pickler_Write(self, data, size) < 0)
1905 return -1;
1906
1907 return 0;
1908}
1909
1910static int
1911write_unicode_binary(PicklerObject *self, PyObject *obj)
1912{
1913 PyObject *encoded = NULL;
1914 Py_ssize_t size;
1915 char *data;
1916 int r;
1917
1918 if (PyUnicode_READY(obj))
1919 return -1;
1920
1921 data = PyUnicode_AsUTF8AndSize(obj, &size);
1922 if (data != NULL)
1923 return write_utf8(self, data, size);
1924
1925 /* Issue #8383: for strings with lone surrogates, fallback on the
1926 "surrogatepass" error handler. */
1927 PyErr_Clear();
1928 encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
1929 if (encoded == NULL)
1930 return -1;
1931
1932 r = write_utf8(self, PyBytes_AS_STRING(encoded),
1933 PyBytes_GET_SIZE(encoded));
1934 Py_DECREF(encoded);
1935 return r;
1936}
1937
1938static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001939save_unicode(PicklerObject *self, PyObject *obj)
1940{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001941 if (self->bin) {
Antoine Pitrou299978d2013-04-07 17:38:11 +02001942 if (write_unicode_binary(self, obj) < 0)
1943 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001944 }
1945 else {
Antoine Pitrou299978d2013-04-07 17:38:11 +02001946 PyObject *encoded;
1947 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001948 const char unicode_op = UNICODE;
1949
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001950 encoded = raw_unicode_escape(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001951 if (encoded == NULL)
Antoine Pitrou299978d2013-04-07 17:38:11 +02001952 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001953
Antoine Pitrou299978d2013-04-07 17:38:11 +02001954 if (_Pickler_Write(self, &unicode_op, 1) < 0) {
1955 Py_DECREF(encoded);
1956 return -1;
1957 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001958
1959 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrou299978d2013-04-07 17:38:11 +02001960 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) {
1961 Py_DECREF(encoded);
1962 return -1;
1963 }
1964 Py_DECREF(encoded);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001965
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001966 if (_Pickler_Write(self, "\n", 1) < 0)
Antoine Pitrou299978d2013-04-07 17:38:11 +02001967 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001968 }
1969 if (memo_put(self, obj) < 0)
Antoine Pitrou299978d2013-04-07 17:38:11 +02001970 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001971
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001972 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001973}
1974
1975/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1976static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001977store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001978{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001979 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001980
1981 assert(PyTuple_Size(t) == len);
1982
1983 for (i = 0; i < len; i++) {
1984 PyObject *element = PyTuple_GET_ITEM(t, i);
1985
1986 if (element == NULL)
1987 return -1;
1988 if (save(self, element, 0) < 0)
1989 return -1;
1990 }
1991
1992 return 0;
1993}
1994
1995/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1996 * used across protocols to minimize the space needed to pickle them.
1997 * Tuples are also the only builtin immutable type that can be recursive
1998 * (a tuple can be reached from itself), and that requires some subtle
1999 * magic so that it works in all cases. IOW, this is a long routine.
2000 */
2001static int
2002save_tuple(PicklerObject *self, PyObject *obj)
2003{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002004 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002005
2006 const char mark_op = MARK;
2007 const char tuple_op = TUPLE;
2008 const char pop_op = POP;
2009 const char pop_mark_op = POP_MARK;
2010 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
2011
2012 if ((len = PyTuple_Size(obj)) < 0)
2013 return -1;
2014
2015 if (len == 0) {
2016 char pdata[2];
2017
2018 if (self->proto) {
2019 pdata[0] = EMPTY_TUPLE;
2020 len = 1;
2021 }
2022 else {
2023 pdata[0] = MARK;
2024 pdata[1] = TUPLE;
2025 len = 2;
2026 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002027 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002028 return -1;
2029 return 0;
2030 }
2031
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002032 /* The tuple isn't in the memo now. If it shows up there after
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002033 * saving the tuple elements, the tuple must be recursive, in
2034 * which case we'll pop everything we put on the stack, and fetch
2035 * its value from the memo.
2036 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002037 if (len <= 3 && self->proto >= 2) {
2038 /* Use TUPLE{1,2,3} opcodes. */
2039 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002040 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002041
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002042 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002043 /* pop the len elements */
2044 for (i = 0; i < len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002045 if (_Pickler_Write(self, &pop_op, 1) < 0)
2046 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002047 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002048 if (memo_get(self, obj) < 0)
2049 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002050
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002051 return 0;
2052 }
2053 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002054 if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2055 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002056 }
2057 goto memoize;
2058 }
2059
2060 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2061 * Generate MARK e1 e2 ... TUPLE
2062 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002063 if (_Pickler_Write(self, &mark_op, 1) < 0)
2064 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002065
2066 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002067 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002068
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002069 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002070 /* pop the stack stuff we pushed */
2071 if (self->bin) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002072 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2073 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002074 }
2075 else {
2076 /* Note that we pop one more than len, to remove
2077 * the MARK too.
2078 */
2079 for (i = 0; i <= len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002080 if (_Pickler_Write(self, &pop_op, 1) < 0)
2081 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002082 }
2083 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002084 if (memo_get(self, obj) < 0)
2085 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002086
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002087 return 0;
2088 }
2089 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002090 if (_Pickler_Write(self, &tuple_op, 1) < 0)
2091 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002092 }
2093
2094 memoize:
2095 if (memo_put(self, obj) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002096 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002097
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002098 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002099}
2100
2101/* iter is an iterator giving items, and we batch up chunks of
2102 * MARK item item ... item APPENDS
2103 * opcode sequences. Calling code should have arranged to first create an
2104 * empty list, or list-like object, for the APPENDS to operate on.
2105 * Returns 0 on success, <0 on error.
2106 */
2107static int
2108batch_list(PicklerObject *self, PyObject *iter)
2109{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002110 PyObject *obj = NULL;
2111 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002112 int i, n;
2113
2114 const char mark_op = MARK;
2115 const char append_op = APPEND;
2116 const char appends_op = APPENDS;
2117
2118 assert(iter != NULL);
2119
2120 /* XXX: I think this function could be made faster by avoiding the
2121 iterator interface and fetching objects directly from list using
2122 PyList_GET_ITEM.
2123 */
2124
2125 if (self->proto == 0) {
2126 /* APPENDS isn't available; do one at a time. */
2127 for (;;) {
2128 obj = PyIter_Next(iter);
2129 if (obj == NULL) {
2130 if (PyErr_Occurred())
2131 return -1;
2132 break;
2133 }
2134 i = save(self, obj, 0);
2135 Py_DECREF(obj);
2136 if (i < 0)
2137 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002138 if (_Pickler_Write(self, &append_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002139 return -1;
2140 }
2141 return 0;
2142 }
2143
2144 /* proto > 0: write in batches of BATCHSIZE. */
2145 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002146 /* Get first item */
2147 firstitem = PyIter_Next(iter);
2148 if (firstitem == NULL) {
2149 if (PyErr_Occurred())
2150 goto error;
2151
2152 /* nothing more to add */
2153 break;
2154 }
2155
2156 /* Try to get a second item */
2157 obj = PyIter_Next(iter);
2158 if (obj == NULL) {
2159 if (PyErr_Occurred())
2160 goto error;
2161
2162 /* Only one item to write */
2163 if (save(self, firstitem, 0) < 0)
2164 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002165 if (_Pickler_Write(self, &append_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002166 goto error;
2167 Py_CLEAR(firstitem);
2168 break;
2169 }
2170
2171 /* More than one item to write */
2172
2173 /* Pump out MARK, items, APPENDS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002174 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002175 goto error;
2176
2177 if (save(self, firstitem, 0) < 0)
2178 goto error;
2179 Py_CLEAR(firstitem);
2180 n = 1;
2181
2182 /* Fetch and save up to BATCHSIZE items */
2183 while (obj) {
2184 if (save(self, obj, 0) < 0)
2185 goto error;
2186 Py_CLEAR(obj);
2187 n += 1;
2188
2189 if (n == BATCHSIZE)
2190 break;
2191
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002192 obj = PyIter_Next(iter);
2193 if (obj == NULL) {
2194 if (PyErr_Occurred())
2195 goto error;
2196 break;
2197 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002198 }
2199
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002200 if (_Pickler_Write(self, &appends_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002201 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002202
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002203 } while (n == BATCHSIZE);
2204 return 0;
2205
2206 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002207 Py_XDECREF(firstitem);
2208 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002209 return -1;
2210}
2211
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002212/* This is a variant of batch_list() above, specialized for lists (with no
2213 * support for list subclasses). Like batch_list(), we batch up chunks of
2214 * MARK item item ... item APPENDS
2215 * opcode sequences. Calling code should have arranged to first create an
2216 * empty list, or list-like object, for the APPENDS to operate on.
2217 * Returns 0 on success, -1 on error.
2218 *
2219 * This version is considerably faster than batch_list(), if less general.
2220 *
2221 * Note that this only works for protocols > 0.
2222 */
2223static int
2224batch_list_exact(PicklerObject *self, PyObject *obj)
2225{
2226 PyObject *item = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002227 Py_ssize_t this_batch, total;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002228
2229 const char append_op = APPEND;
2230 const char appends_op = APPENDS;
2231 const char mark_op = MARK;
2232
2233 assert(obj != NULL);
2234 assert(self->proto > 0);
2235 assert(PyList_CheckExact(obj));
2236
2237 if (PyList_GET_SIZE(obj) == 1) {
2238 item = PyList_GET_ITEM(obj, 0);
2239 if (save(self, item, 0) < 0)
2240 return -1;
2241 if (_Pickler_Write(self, &append_op, 1) < 0)
2242 return -1;
2243 return 0;
2244 }
2245
2246 /* Write in batches of BATCHSIZE. */
2247 total = 0;
2248 do {
2249 this_batch = 0;
2250 if (_Pickler_Write(self, &mark_op, 1) < 0)
2251 return -1;
2252 while (total < PyList_GET_SIZE(obj)) {
2253 item = PyList_GET_ITEM(obj, total);
2254 if (save(self, item, 0) < 0)
2255 return -1;
2256 total++;
2257 if (++this_batch == BATCHSIZE)
2258 break;
2259 }
2260 if (_Pickler_Write(self, &appends_op, 1) < 0)
2261 return -1;
2262
2263 } while (total < PyList_GET_SIZE(obj));
2264
2265 return 0;
2266}
2267
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002268static int
2269save_list(PicklerObject *self, PyObject *obj)
2270{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002271 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002272 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002273 int status = 0;
2274
2275 if (self->fast && !fast_save_enter(self, obj))
2276 goto error;
2277
2278 /* Create an empty list. */
2279 if (self->bin) {
2280 header[0] = EMPTY_LIST;
2281 len = 1;
2282 }
2283 else {
2284 header[0] = MARK;
2285 header[1] = LIST;
2286 len = 2;
2287 }
2288
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002289 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002290 goto error;
2291
2292 /* Get list length, and bow out early if empty. */
2293 if ((len = PyList_Size(obj)) < 0)
2294 goto error;
2295
2296 if (memo_put(self, obj) < 0)
2297 goto error;
2298
2299 if (len != 0) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002300 /* Materialize the list elements. */
2301 if (PyList_CheckExact(obj) && self->proto > 0) {
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002302 if (Py_EnterRecursiveCall(" while pickling an object"))
2303 goto error;
2304 status = batch_list_exact(self, obj);
2305 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002306 } else {
2307 PyObject *iter = PyObject_GetIter(obj);
2308 if (iter == NULL)
2309 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002310
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002311 if (Py_EnterRecursiveCall(" while pickling an object")) {
2312 Py_DECREF(iter);
2313 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002314 }
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002315 status = batch_list(self, iter);
2316 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002317 Py_DECREF(iter);
2318 }
2319 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002320 if (0) {
2321 error:
2322 status = -1;
2323 }
2324
2325 if (self->fast && !fast_save_leave(self, obj))
2326 status = -1;
2327
2328 return status;
2329}
2330
2331/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
2332 * MARK key value ... key value SETITEMS
2333 * opcode sequences. Calling code should have arranged to first create an
2334 * empty dict, or dict-like object, for the SETITEMS to operate on.
2335 * Returns 0 on success, <0 on error.
2336 *
2337 * This is very much like batch_list(). The difference between saving
2338 * elements directly, and picking apart two-tuples, is so long-winded at
2339 * the C level, though, that attempts to combine these routines were too
2340 * ugly to bear.
2341 */
2342static int
2343batch_dict(PicklerObject *self, PyObject *iter)
2344{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002345 PyObject *obj = NULL;
2346 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002347 int i, n;
2348
2349 const char mark_op = MARK;
2350 const char setitem_op = SETITEM;
2351 const char setitems_op = SETITEMS;
2352
2353 assert(iter != NULL);
2354
2355 if (self->proto == 0) {
2356 /* SETITEMS isn't available; do one at a time. */
2357 for (;;) {
2358 obj = PyIter_Next(iter);
2359 if (obj == NULL) {
2360 if (PyErr_Occurred())
2361 return -1;
2362 break;
2363 }
2364 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2365 PyErr_SetString(PyExc_TypeError, "dict items "
2366 "iterator must return 2-tuples");
2367 return -1;
2368 }
2369 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
2370 if (i >= 0)
2371 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
2372 Py_DECREF(obj);
2373 if (i < 0)
2374 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002375 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002376 return -1;
2377 }
2378 return 0;
2379 }
2380
2381 /* proto > 0: write in batches of BATCHSIZE. */
2382 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002383 /* Get first item */
2384 firstitem = PyIter_Next(iter);
2385 if (firstitem == NULL) {
2386 if (PyErr_Occurred())
2387 goto error;
2388
2389 /* nothing more to add */
2390 break;
2391 }
2392 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
2393 PyErr_SetString(PyExc_TypeError, "dict items "
2394 "iterator must return 2-tuples");
2395 goto error;
2396 }
2397
2398 /* Try to get a second item */
2399 obj = PyIter_Next(iter);
2400 if (obj == NULL) {
2401 if (PyErr_Occurred())
2402 goto error;
2403
2404 /* Only one item to write */
2405 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2406 goto error;
2407 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2408 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002409 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002410 goto error;
2411 Py_CLEAR(firstitem);
2412 break;
2413 }
2414
2415 /* More than one item to write */
2416
2417 /* Pump out MARK, items, SETITEMS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002418 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002419 goto error;
2420
2421 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2422 goto error;
2423 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2424 goto error;
2425 Py_CLEAR(firstitem);
2426 n = 1;
2427
2428 /* Fetch and save up to BATCHSIZE items */
2429 while (obj) {
2430 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2431 PyErr_SetString(PyExc_TypeError, "dict items "
2432 "iterator must return 2-tuples");
2433 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002434 }
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002435 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
2436 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
2437 goto error;
2438 Py_CLEAR(obj);
2439 n += 1;
2440
2441 if (n == BATCHSIZE)
2442 break;
2443
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002444 obj = PyIter_Next(iter);
2445 if (obj == NULL) {
2446 if (PyErr_Occurred())
2447 goto error;
2448 break;
2449 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002450 }
2451
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002452 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002453 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002454
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002455 } while (n == BATCHSIZE);
2456 return 0;
2457
2458 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002459 Py_XDECREF(firstitem);
2460 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002461 return -1;
2462}
2463
Collin Winter5c9b02d2009-05-25 05:43:30 +00002464/* This is a variant of batch_dict() above that specializes for dicts, with no
2465 * support for dict subclasses. Like batch_dict(), we batch up chunks of
2466 * MARK key value ... key value SETITEMS
2467 * opcode sequences. Calling code should have arranged to first create an
2468 * empty dict, or dict-like object, for the SETITEMS to operate on.
2469 * Returns 0 on success, -1 on error.
2470 *
2471 * Note that this currently doesn't work for protocol 0.
2472 */
2473static int
2474batch_dict_exact(PicklerObject *self, PyObject *obj)
2475{
2476 PyObject *key = NULL, *value = NULL;
2477 int i;
2478 Py_ssize_t dict_size, ppos = 0;
2479
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00002480 const char mark_op = MARK;
2481 const char setitem_op = SETITEM;
2482 const char setitems_op = SETITEMS;
Collin Winter5c9b02d2009-05-25 05:43:30 +00002483
2484 assert(obj != NULL);
2485 assert(self->proto > 0);
2486
2487 dict_size = PyDict_Size(obj);
2488
2489 /* Special-case len(d) == 1 to save space. */
2490 if (dict_size == 1) {
2491 PyDict_Next(obj, &ppos, &key, &value);
2492 if (save(self, key, 0) < 0)
2493 return -1;
2494 if (save(self, value, 0) < 0)
2495 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002496 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002497 return -1;
2498 return 0;
2499 }
2500
2501 /* Write in batches of BATCHSIZE. */
2502 do {
2503 i = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002504 if (_Pickler_Write(self, &mark_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002505 return -1;
2506 while (PyDict_Next(obj, &ppos, &key, &value)) {
2507 if (save(self, key, 0) < 0)
2508 return -1;
2509 if (save(self, value, 0) < 0)
2510 return -1;
2511 if (++i == BATCHSIZE)
2512 break;
2513 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002514 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002515 return -1;
2516 if (PyDict_Size(obj) != dict_size) {
2517 PyErr_Format(
2518 PyExc_RuntimeError,
2519 "dictionary changed size during iteration");
2520 return -1;
2521 }
2522
2523 } while (i == BATCHSIZE);
2524 return 0;
2525}
2526
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002527static int
2528save_dict(PicklerObject *self, PyObject *obj)
2529{
2530 PyObject *items, *iter;
2531 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002532 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002533 int status = 0;
2534
2535 if (self->fast && !fast_save_enter(self, obj))
2536 goto error;
2537
2538 /* Create an empty dict. */
2539 if (self->bin) {
2540 header[0] = EMPTY_DICT;
2541 len = 1;
2542 }
2543 else {
2544 header[0] = MARK;
2545 header[1] = DICT;
2546 len = 2;
2547 }
2548
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002549 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002550 goto error;
2551
2552 /* Get dict size, and bow out early if empty. */
2553 if ((len = PyDict_Size(obj)) < 0)
2554 goto error;
2555
2556 if (memo_put(self, obj) < 0)
2557 goto error;
2558
2559 if (len != 0) {
2560 /* Save the dict items. */
Collin Winter5c9b02d2009-05-25 05:43:30 +00002561 if (PyDict_CheckExact(obj) && self->proto > 0) {
2562 /* We can take certain shortcuts if we know this is a dict and
2563 not a dict subclass. */
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002564 if (Py_EnterRecursiveCall(" while pickling an object"))
2565 goto error;
2566 status = batch_dict_exact(self, obj);
2567 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002568 } else {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02002569 _Py_IDENTIFIER(items);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002570
2571 items = _PyObject_CallMethodId(obj, &PyId_items, "()");
Collin Winter5c9b02d2009-05-25 05:43:30 +00002572 if (items == NULL)
2573 goto error;
2574 iter = PyObject_GetIter(items);
2575 Py_DECREF(items);
2576 if (iter == NULL)
2577 goto error;
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002578 if (Py_EnterRecursiveCall(" while pickling an object")) {
2579 Py_DECREF(iter);
2580 goto error;
2581 }
Collin Winter5c9b02d2009-05-25 05:43:30 +00002582 status = batch_dict(self, iter);
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002583 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002584 Py_DECREF(iter);
2585 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002586 }
2587
2588 if (0) {
2589 error:
2590 status = -1;
2591 }
2592
2593 if (self->fast && !fast_save_leave(self, obj))
2594 status = -1;
2595
2596 return status;
2597}
2598
2599static int
2600save_global(PicklerObject *self, PyObject *obj, PyObject *name)
2601{
2602 static PyObject *name_str = NULL;
2603 PyObject *global_name = NULL;
2604 PyObject *module_name = NULL;
2605 PyObject *module = NULL;
2606 PyObject *cls;
2607 int status = 0;
2608
2609 const char global_op = GLOBAL;
2610
2611 if (name_str == NULL) {
2612 name_str = PyUnicode_InternFromString("__name__");
2613 if (name_str == NULL)
2614 goto error;
2615 }
2616
2617 if (name) {
2618 global_name = name;
2619 Py_INCREF(global_name);
2620 }
2621 else {
2622 global_name = PyObject_GetAttr(obj, name_str);
2623 if (global_name == NULL)
2624 goto error;
2625 }
2626
2627 module_name = whichmodule(obj, global_name);
2628 if (module_name == NULL)
2629 goto error;
2630
2631 /* XXX: Change to use the import C API directly with level=0 to disallow
2632 relative imports.
2633
2634 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
2635 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
2636 custom import functions (IMHO, this would be a nice security
2637 feature). The import C API would need to be extended to support the
2638 extra parameters of __import__ to fix that. */
2639 module = PyImport_Import(module_name);
2640 if (module == NULL) {
2641 PyErr_Format(PicklingError,
2642 "Can't pickle %R: import of module %R failed",
2643 obj, module_name);
2644 goto error;
2645 }
2646 cls = PyObject_GetAttr(module, global_name);
2647 if (cls == NULL) {
2648 PyErr_Format(PicklingError,
2649 "Can't pickle %R: attribute lookup %S.%S failed",
2650 obj, module_name, global_name);
2651 goto error;
2652 }
2653 if (cls != obj) {
2654 Py_DECREF(cls);
2655 PyErr_Format(PicklingError,
2656 "Can't pickle %R: it's not the same object as %S.%S",
2657 obj, module_name, global_name);
2658 goto error;
2659 }
2660 Py_DECREF(cls);
2661
2662 if (self->proto >= 2) {
2663 /* See whether this is in the extension registry, and if
2664 * so generate an EXT opcode.
2665 */
2666 PyObject *code_obj; /* extension code as Python object */
2667 long code; /* extension code as C value */
2668 char pdata[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002669 Py_ssize_t n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002670
2671 PyTuple_SET_ITEM(two_tuple, 0, module_name);
2672 PyTuple_SET_ITEM(two_tuple, 1, global_name);
2673 code_obj = PyDict_GetItem(extension_registry, two_tuple);
2674 /* The object is not registered in the extension registry.
2675 This is the most likely code path. */
2676 if (code_obj == NULL)
2677 goto gen_global;
2678
2679 /* XXX: pickle.py doesn't check neither the type, nor the range
2680 of the value returned by the extension_registry. It should for
2681 consistency. */
2682
2683 /* Verify code_obj has the right type and value. */
2684 if (!PyLong_Check(code_obj)) {
2685 PyErr_Format(PicklingError,
2686 "Can't pickle %R: extension code %R isn't an integer",
2687 obj, code_obj);
2688 goto error;
2689 }
2690 code = PyLong_AS_LONG(code_obj);
2691 if (code <= 0 || code > 0x7fffffffL) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002692 if (!PyErr_Occurred())
2693 PyErr_Format(PicklingError,
2694 "Can't pickle %R: extension code %ld is out of range",
2695 obj, code);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002696 goto error;
2697 }
2698
2699 /* Generate an EXT opcode. */
2700 if (code <= 0xff) {
2701 pdata[0] = EXT1;
2702 pdata[1] = (unsigned char)code;
2703 n = 2;
2704 }
2705 else if (code <= 0xffff) {
2706 pdata[0] = EXT2;
2707 pdata[1] = (unsigned char)(code & 0xff);
2708 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2709 n = 3;
2710 }
2711 else {
2712 pdata[0] = EXT4;
2713 pdata[1] = (unsigned char)(code & 0xff);
2714 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2715 pdata[3] = (unsigned char)((code >> 16) & 0xff);
2716 pdata[4] = (unsigned char)((code >> 24) & 0xff);
2717 n = 5;
2718 }
2719
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002720 if (_Pickler_Write(self, pdata, n) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002721 goto error;
2722 }
2723 else {
2724 /* Generate a normal global opcode if we are using a pickle
2725 protocol <= 2, or if the object is not registered in the
2726 extension registry. */
2727 PyObject *encoded;
2728 PyObject *(*unicode_encoder)(PyObject *);
2729
2730 gen_global:
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002731 if (_Pickler_Write(self, &global_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002732 goto error;
2733
2734 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
2735 the module name and the global name using UTF-8. We do so only when
2736 we are using the pickle protocol newer than version 3. This is to
2737 ensure compatibility with older Unpickler running on Python 2.x. */
2738 if (self->proto >= 3) {
2739 unicode_encoder = PyUnicode_AsUTF8String;
2740 }
2741 else {
2742 unicode_encoder = PyUnicode_AsASCIIString;
2743 }
2744
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002745 /* For protocol < 3 and if the user didn't request against doing so,
2746 we convert module names to the old 2.x module names. */
2747 if (self->fix_imports) {
2748 PyObject *key;
2749 PyObject *item;
2750
2751 key = PyTuple_Pack(2, module_name, global_name);
2752 if (key == NULL)
2753 goto error;
2754 item = PyDict_GetItemWithError(name_mapping_3to2, key);
2755 Py_DECREF(key);
2756 if (item) {
2757 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
2758 PyErr_Format(PyExc_RuntimeError,
2759 "_compat_pickle.REVERSE_NAME_MAPPING values "
2760 "should be 2-tuples, not %.200s",
2761 Py_TYPE(item)->tp_name);
2762 goto error;
2763 }
2764 Py_CLEAR(module_name);
2765 Py_CLEAR(global_name);
2766 module_name = PyTuple_GET_ITEM(item, 0);
2767 global_name = PyTuple_GET_ITEM(item, 1);
2768 if (!PyUnicode_Check(module_name) ||
2769 !PyUnicode_Check(global_name)) {
2770 PyErr_Format(PyExc_RuntimeError,
2771 "_compat_pickle.REVERSE_NAME_MAPPING values "
2772 "should be pairs of str, not (%.200s, %.200s)",
2773 Py_TYPE(module_name)->tp_name,
2774 Py_TYPE(global_name)->tp_name);
2775 goto error;
2776 }
2777 Py_INCREF(module_name);
2778 Py_INCREF(global_name);
2779 }
2780 else if (PyErr_Occurred()) {
2781 goto error;
2782 }
2783
2784 item = PyDict_GetItemWithError(import_mapping_3to2, module_name);
2785 if (item) {
2786 if (!PyUnicode_Check(item)) {
2787 PyErr_Format(PyExc_RuntimeError,
2788 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
2789 "should be strings, not %.200s",
2790 Py_TYPE(item)->tp_name);
2791 goto error;
2792 }
2793 Py_CLEAR(module_name);
2794 module_name = item;
2795 Py_INCREF(module_name);
2796 }
2797 else if (PyErr_Occurred()) {
2798 goto error;
2799 }
2800 }
2801
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002802 /* Save the name of the module. */
2803 encoded = unicode_encoder(module_name);
2804 if (encoded == NULL) {
2805 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2806 PyErr_Format(PicklingError,
2807 "can't pickle module identifier '%S' using "
2808 "pickle protocol %i", module_name, self->proto);
2809 goto error;
2810 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002811 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002812 PyBytes_GET_SIZE(encoded)) < 0) {
2813 Py_DECREF(encoded);
2814 goto error;
2815 }
2816 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002817 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002818 goto error;
2819
2820 /* Save the name of the module. */
2821 encoded = unicode_encoder(global_name);
2822 if (encoded == NULL) {
2823 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2824 PyErr_Format(PicklingError,
2825 "can't pickle global identifier '%S' using "
2826 "pickle protocol %i", global_name, self->proto);
2827 goto error;
2828 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002829 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002830 PyBytes_GET_SIZE(encoded)) < 0) {
2831 Py_DECREF(encoded);
2832 goto error;
2833 }
2834 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002835 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002836 goto error;
2837
2838 /* Memoize the object. */
2839 if (memo_put(self, obj) < 0)
2840 goto error;
2841 }
2842
2843 if (0) {
2844 error:
2845 status = -1;
2846 }
2847 Py_XDECREF(module_name);
2848 Py_XDECREF(global_name);
2849 Py_XDECREF(module);
2850
2851 return status;
2852}
2853
2854static int
Łukasz Langaf3078fb2012-03-12 19:46:12 +01002855save_ellipsis(PicklerObject *self, PyObject *obj)
2856{
Łukasz Langadbd78252012-03-12 22:59:11 +01002857 PyObject *str = PyUnicode_FromString("Ellipsis");
Benjamin Petersone80b29b2012-03-16 18:45:31 -05002858 int res;
Łukasz Langadbd78252012-03-12 22:59:11 +01002859 if (str == NULL)
Łukasz Langacad1a072012-03-12 23:41:07 +01002860 return -1;
Benjamin Petersone80b29b2012-03-16 18:45:31 -05002861 res = save_global(self, Py_Ellipsis, str);
2862 Py_DECREF(str);
2863 return res;
Łukasz Langaf3078fb2012-03-12 19:46:12 +01002864}
2865
2866static int
2867save_notimplemented(PicklerObject *self, PyObject *obj)
2868{
Łukasz Langadbd78252012-03-12 22:59:11 +01002869 PyObject *str = PyUnicode_FromString("NotImplemented");
Benjamin Petersone80b29b2012-03-16 18:45:31 -05002870 int res;
Łukasz Langadbd78252012-03-12 22:59:11 +01002871 if (str == NULL)
Łukasz Langacad1a072012-03-12 23:41:07 +01002872 return -1;
Benjamin Petersone80b29b2012-03-16 18:45:31 -05002873 res = save_global(self, Py_NotImplemented, str);
2874 Py_DECREF(str);
2875 return res;
Łukasz Langaf3078fb2012-03-12 19:46:12 +01002876}
2877
2878static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002879save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
2880{
2881 PyObject *pid = NULL;
2882 int status = 0;
2883
2884 const char persid_op = PERSID;
2885 const char binpersid_op = BINPERSID;
2886
2887 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002888 pid = _Pickler_FastCall(self, func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002889 if (pid == NULL)
2890 return -1;
2891
2892 if (pid != Py_None) {
2893 if (self->bin) {
2894 if (save(self, pid, 1) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002895 _Pickler_Write(self, &binpersid_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002896 goto error;
2897 }
2898 else {
2899 PyObject *pid_str = NULL;
2900 char *pid_ascii_bytes;
2901 Py_ssize_t size;
2902
2903 pid_str = PyObject_Str(pid);
2904 if (pid_str == NULL)
2905 goto error;
2906
2907 /* XXX: Should it check whether the persistent id only contains
2908 ASCII characters? And what if the pid contains embedded
2909 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00002910 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002911 Py_DECREF(pid_str);
2912 if (pid_ascii_bytes == NULL)
2913 goto error;
2914
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002915 if (_Pickler_Write(self, &persid_op, 1) < 0 ||
2916 _Pickler_Write(self, pid_ascii_bytes, size) < 0 ||
2917 _Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002918 goto error;
2919 }
2920 status = 1;
2921 }
2922
2923 if (0) {
2924 error:
2925 status = -1;
2926 }
2927 Py_XDECREF(pid);
2928
2929 return status;
2930}
2931
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002932static PyObject *
2933get_class(PyObject *obj)
2934{
2935 PyObject *cls;
2936 static PyObject *str_class;
2937
2938 if (str_class == NULL) {
2939 str_class = PyUnicode_InternFromString("__class__");
2940 if (str_class == NULL)
2941 return NULL;
2942 }
2943 cls = PyObject_GetAttr(obj, str_class);
2944 if (cls == NULL) {
2945 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2946 PyErr_Clear();
2947 cls = (PyObject *) Py_TYPE(obj);
2948 Py_INCREF(cls);
2949 }
2950 }
2951 return cls;
2952}
2953
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002954/* We're saving obj, and args is the 2-thru-5 tuple returned by the
2955 * appropriate __reduce__ method for obj.
2956 */
2957static int
2958save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
2959{
2960 PyObject *callable;
2961 PyObject *argtup;
2962 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002963 PyObject *listitems = Py_None;
2964 PyObject *dictitems = Py_None;
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002965 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002966
2967 int use_newobj = self->proto >= 2;
2968
2969 const char reduce_op = REDUCE;
2970 const char build_op = BUILD;
2971 const char newobj_op = NEWOBJ;
2972
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002973 size = PyTuple_Size(args);
2974 if (size < 2 || size > 5) {
2975 PyErr_SetString(PicklingError, "tuple returned by "
2976 "__reduce__ must contain 2 through 5 elements");
2977 return -1;
2978 }
2979
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002980 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
2981 &callable, &argtup, &state, &listitems, &dictitems))
2982 return -1;
2983
2984 if (!PyCallable_Check(callable)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002985 PyErr_SetString(PicklingError, "first item of the tuple "
2986 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002987 return -1;
2988 }
2989 if (!PyTuple_Check(argtup)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002990 PyErr_SetString(PicklingError, "second item of the tuple "
2991 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002992 return -1;
2993 }
2994
2995 if (state == Py_None)
2996 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002997
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002998 if (listitems == Py_None)
2999 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003000 else if (!PyIter_Check(listitems)) {
Alexandre Vassalotti00d83f22013-04-14 01:28:01 -07003001 PyErr_Format(PicklingError, "fourth element of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003002 "returned by __reduce__ must be an iterator, not %s",
3003 Py_TYPE(listitems)->tp_name);
3004 return -1;
3005 }
3006
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003007 if (dictitems == Py_None)
3008 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003009 else if (!PyIter_Check(dictitems)) {
Alexandre Vassalotti00d83f22013-04-14 01:28:01 -07003010 PyErr_Format(PicklingError, "fifth element of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003011 "returned by __reduce__ must be an iterator, not %s",
3012 Py_TYPE(dictitems)->tp_name);
3013 return -1;
3014 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003015
3016 /* Protocol 2 special case: if callable's name is __newobj__, use
3017 NEWOBJ. */
3018 if (use_newobj) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003019 static PyObject *newobj_str = NULL, *name_str = NULL;
3020 PyObject *name;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003021
3022 if (newobj_str == NULL) {
3023 newobj_str = PyUnicode_InternFromString("__newobj__");
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003024 name_str = PyUnicode_InternFromString("__name__");
3025 if (newobj_str == NULL || name_str == NULL)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003026 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003027 }
3028
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003029 name = PyObject_GetAttr(callable, name_str);
3030 if (name == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003031 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3032 PyErr_Clear();
3033 else
3034 return -1;
3035 use_newobj = 0;
3036 }
3037 else {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003038 use_newobj = PyUnicode_Check(name) &&
3039 PyUnicode_Compare(name, newobj_str) == 0;
3040 Py_DECREF(name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003041 }
3042 }
3043 if (use_newobj) {
3044 PyObject *cls;
3045 PyObject *newargtup;
3046 PyObject *obj_class;
3047 int p;
3048
3049 /* Sanity checks. */
3050 if (Py_SIZE(argtup) < 1) {
3051 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
3052 return -1;
3053 }
3054
3055 cls = PyTuple_GET_ITEM(argtup, 0);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003056 if (!PyType_Check(cls)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003057 PyErr_SetString(PicklingError, "args[0] from "
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003058 "__newobj__ args is not a type");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003059 return -1;
3060 }
3061
3062 if (obj != NULL) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003063 obj_class = get_class(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003064 p = obj_class != cls; /* true iff a problem */
3065 Py_DECREF(obj_class);
3066 if (p) {
3067 PyErr_SetString(PicklingError, "args[0] from "
3068 "__newobj__ args has the wrong class");
3069 return -1;
3070 }
3071 }
3072 /* XXX: These calls save() are prone to infinite recursion. Imagine
3073 what happen if the value returned by the __reduce__() method of
3074 some extension type contains another object of the same type. Ouch!
3075
3076 Here is a quick example, that I ran into, to illustrate what I
3077 mean:
3078
3079 >>> import pickle, copyreg
3080 >>> copyreg.dispatch_table.pop(complex)
3081 >>> pickle.dumps(1+2j)
3082 Traceback (most recent call last):
3083 ...
3084 RuntimeError: maximum recursion depth exceeded
3085
3086 Removing the complex class from copyreg.dispatch_table made the
3087 __reduce_ex__() method emit another complex object:
3088
3089 >>> (1+1j).__reduce_ex__(2)
3090 (<function __newobj__ at 0xb7b71c3c>,
3091 (<class 'complex'>, (1+1j)), None, None, None)
3092
3093 Thus when save() was called on newargstup (the 2nd item) recursion
3094 ensued. Of course, the bug was in the complex class which had a
3095 broken __getnewargs__() that emitted another complex object. But,
3096 the point, here, is it is quite easy to end up with a broken reduce
3097 function. */
3098
3099 /* Save the class and its __new__ arguments. */
3100 if (save(self, cls, 0) < 0)
3101 return -1;
3102
3103 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
3104 if (newargtup == NULL)
3105 return -1;
3106
3107 p = save(self, newargtup, 0);
3108 Py_DECREF(newargtup);
3109 if (p < 0)
3110 return -1;
3111
3112 /* Add NEWOBJ opcode. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003113 if (_Pickler_Write(self, &newobj_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003114 return -1;
3115 }
3116 else { /* Not using NEWOBJ. */
3117 if (save(self, callable, 0) < 0 ||
3118 save(self, argtup, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003119 _Pickler_Write(self, &reduce_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003120 return -1;
3121 }
3122
3123 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
3124 the caller do not want to memoize the object. Not particularly useful,
3125 but that is to mimic the behavior save_reduce() in pickle.py when
3126 obj is None. */
3127 if (obj && memo_put(self, obj) < 0)
3128 return -1;
3129
3130 if (listitems && batch_list(self, listitems) < 0)
3131 return -1;
3132
3133 if (dictitems && batch_dict(self, dictitems) < 0)
3134 return -1;
3135
3136 if (state) {
Victor Stinner121aab42011-09-29 23:40:53 +02003137 if (save(self, state, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003138 _Pickler_Write(self, &build_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003139 return -1;
3140 }
3141
3142 return 0;
3143}
3144
3145static int
3146save(PicklerObject *self, PyObject *obj, int pers_save)
3147{
3148 PyTypeObject *type;
3149 PyObject *reduce_func = NULL;
3150 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003151 int status = 0;
3152
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003153 if (Py_EnterRecursiveCall(" while pickling an object"))
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003154 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003155
3156 /* The extra pers_save argument is necessary to avoid calling save_pers()
3157 on its returned object. */
3158 if (!pers_save && self->pers_func) {
3159 /* save_pers() returns:
3160 -1 to signal an error;
3161 0 if it did nothing successfully;
3162 1 if a persistent id was saved.
3163 */
3164 if ((status = save_pers(self, obj, self->pers_func)) != 0)
3165 goto done;
3166 }
3167
3168 type = Py_TYPE(obj);
3169
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003170 /* The old cPickle had an optimization that used switch-case statement
3171 dispatching on the first letter of the type name. This has was removed
3172 since benchmarks shown that this optimization was actually slowing
3173 things down. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003174
3175 /* Atom types; these aren't memoized, so don't check the memo. */
3176
3177 if (obj == Py_None) {
3178 status = save_none(self, obj);
3179 goto done;
3180 }
Łukasz Langaf3078fb2012-03-12 19:46:12 +01003181 else if (obj == Py_Ellipsis) {
3182 status = save_ellipsis(self, obj);
3183 goto done;
3184 }
3185 else if (obj == Py_NotImplemented) {
3186 status = save_notimplemented(self, obj);
3187 goto done;
3188 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003189 else if (obj == Py_False || obj == Py_True) {
3190 status = save_bool(self, obj);
3191 goto done;
3192 }
3193 else if (type == &PyLong_Type) {
3194 status = save_long(self, obj);
3195 goto done;
3196 }
3197 else if (type == &PyFloat_Type) {
3198 status = save_float(self, obj);
3199 goto done;
3200 }
3201
3202 /* Check the memo to see if it has the object. If so, generate
3203 a GET (or BINGET) opcode, instead of pickling the object
3204 once again. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003205 if (PyMemoTable_Get(self->memo, obj)) {
3206 if (memo_get(self, obj) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003207 goto error;
3208 goto done;
3209 }
3210
3211 if (type == &PyBytes_Type) {
3212 status = save_bytes(self, obj);
3213 goto done;
3214 }
3215 else if (type == &PyUnicode_Type) {
3216 status = save_unicode(self, obj);
3217 goto done;
3218 }
3219 else if (type == &PyDict_Type) {
3220 status = save_dict(self, obj);
3221 goto done;
3222 }
3223 else if (type == &PyList_Type) {
3224 status = save_list(self, obj);
3225 goto done;
3226 }
3227 else if (type == &PyTuple_Type) {
3228 status = save_tuple(self, obj);
3229 goto done;
3230 }
3231 else if (type == &PyType_Type) {
3232 status = save_global(self, obj, NULL);
3233 goto done;
3234 }
3235 else if (type == &PyFunction_Type) {
3236 status = save_global(self, obj, NULL);
3237 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
3238 /* fall back to reduce */
3239 PyErr_Clear();
3240 }
3241 else {
3242 goto done;
3243 }
3244 }
3245 else if (type == &PyCFunction_Type) {
3246 status = save_global(self, obj, NULL);
3247 goto done;
3248 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003249
3250 /* XXX: This part needs some unit tests. */
3251
3252 /* Get a reduction callable, and call it. This may come from
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003253 * self.dispatch_table, copyreg.dispatch_table, the object's
3254 * __reduce_ex__ method, or the object's __reduce__ method.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003255 */
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003256 if (self->dispatch_table == NULL) {
3257 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
3258 /* PyDict_GetItem() unlike PyObject_GetItem() and
3259 PyObject_GetAttr() returns a borrowed ref */
3260 Py_XINCREF(reduce_func);
3261 } else {
3262 reduce_func = PyObject_GetItem(self->dispatch_table, (PyObject *)type);
3263 if (reduce_func == NULL) {
3264 if (PyErr_ExceptionMatches(PyExc_KeyError))
3265 PyErr_Clear();
3266 else
3267 goto error;
3268 }
3269 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003270 if (reduce_func != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003271 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003272 reduce_value = _Pickler_FastCall(self, reduce_func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003273 }
Antoine Pitrouffd41d92011-10-04 09:23:04 +02003274 else if (PyType_IsSubtype(type, &PyType_Type)) {
3275 status = save_global(self, obj, NULL);
3276 goto done;
3277 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003278 else {
3279 static PyObject *reduce_str = NULL;
3280 static PyObject *reduce_ex_str = NULL;
3281
3282 /* Cache the name of the reduce methods. */
3283 if (reduce_str == NULL) {
3284 reduce_str = PyUnicode_InternFromString("__reduce__");
3285 if (reduce_str == NULL)
3286 goto error;
3287 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
3288 if (reduce_ex_str == NULL)
3289 goto error;
3290 }
3291
3292 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
3293 automatically defined as __reduce__. While this is convenient, this
3294 make it impossible to know which method was actually called. Of
3295 course, this is not a big deal. But still, it would be nice to let
3296 the user know which method was called when something go
3297 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
3298 don't actually have to check for a __reduce__ method. */
3299
3300 /* Check for a __reduce_ex__ method. */
3301 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
3302 if (reduce_func != NULL) {
3303 PyObject *proto;
3304 proto = PyLong_FromLong(self->proto);
3305 if (proto != NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003306 reduce_value = _Pickler_FastCall(self, reduce_func, proto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003307 }
3308 }
3309 else {
3310 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3311 PyErr_Clear();
3312 else
3313 goto error;
3314 /* Check for a __reduce__ method. */
3315 reduce_func = PyObject_GetAttr(obj, reduce_str);
3316 if (reduce_func != NULL) {
3317 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
3318 }
3319 else {
3320 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
3321 type->tp_name, obj);
3322 goto error;
3323 }
3324 }
3325 }
3326
3327 if (reduce_value == NULL)
3328 goto error;
3329
3330 if (PyUnicode_Check(reduce_value)) {
3331 status = save_global(self, obj, reduce_value);
3332 goto done;
3333 }
3334
3335 if (!PyTuple_Check(reduce_value)) {
3336 PyErr_SetString(PicklingError,
3337 "__reduce__ must return a string or tuple");
3338 goto error;
3339 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003340
3341 status = save_reduce(self, reduce_value, obj);
3342
3343 if (0) {
3344 error:
3345 status = -1;
3346 }
3347 done:
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003348 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003349 Py_XDECREF(reduce_func);
3350 Py_XDECREF(reduce_value);
3351
3352 return status;
3353}
3354
3355static int
3356dump(PicklerObject *self, PyObject *obj)
3357{
3358 const char stop_op = STOP;
3359
3360 if (self->proto >= 2) {
3361 char header[2];
3362
3363 header[0] = PROTO;
3364 assert(self->proto >= 0 && self->proto < 256);
3365 header[1] = (unsigned char)self->proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003366 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003367 return -1;
3368 }
3369
3370 if (save(self, obj, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003371 _Pickler_Write(self, &stop_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003372 return -1;
3373
3374 return 0;
3375}
3376
3377PyDoc_STRVAR(Pickler_clear_memo_doc,
3378"clear_memo() -> None. Clears the pickler's \"memo\"."
3379"\n"
3380"The memo is the data structure that remembers which objects the\n"
3381"pickler has already seen, so that shared or recursive objects are\n"
3382"pickled by reference and not by value. This method is useful when\n"
3383"re-using picklers.");
3384
3385static PyObject *
3386Pickler_clear_memo(PicklerObject *self)
3387{
3388 if (self->memo)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003389 PyMemoTable_Clear(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003390
3391 Py_RETURN_NONE;
3392}
3393
3394PyDoc_STRVAR(Pickler_dump_doc,
3395"dump(obj) -> None. Write a pickled representation of obj to the open file.");
3396
3397static PyObject *
3398Pickler_dump(PicklerObject *self, PyObject *args)
3399{
3400 PyObject *obj;
3401
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00003402 /* Check whether the Pickler was initialized correctly (issue3664).
3403 Developers often forget to call __init__() in their subclasses, which
3404 would trigger a segfault without this check. */
3405 if (self->write == NULL) {
Victor Stinner121aab42011-09-29 23:40:53 +02003406 PyErr_Format(PicklingError,
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00003407 "Pickler.__init__() was not called by %s.__init__()",
3408 Py_TYPE(self)->tp_name);
3409 return NULL;
3410 }
3411
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003412 if (!PyArg_ParseTuple(args, "O:dump", &obj))
3413 return NULL;
3414
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003415 if (_Pickler_ClearBuffer(self) < 0)
3416 return NULL;
3417
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003418 if (dump(self, obj) < 0)
3419 return NULL;
3420
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003421 if (_Pickler_FlushToFile(self) < 0)
3422 return NULL;
3423
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003424 Py_RETURN_NONE;
3425}
3426
3427static struct PyMethodDef Pickler_methods[] = {
3428 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
3429 Pickler_dump_doc},
3430 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
3431 Pickler_clear_memo_doc},
3432 {NULL, NULL} /* sentinel */
3433};
3434
3435static void
3436Pickler_dealloc(PicklerObject *self)
3437{
3438 PyObject_GC_UnTrack(self);
3439
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003440 Py_XDECREF(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003441 Py_XDECREF(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003442 Py_XDECREF(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003443 Py_XDECREF(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003444 Py_XDECREF(self->arg);
3445 Py_XDECREF(self->fast_memo);
3446
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003447 PyMemoTable_Del(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003448
3449 Py_TYPE(self)->tp_free((PyObject *)self);
3450}
3451
3452static int
3453Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
3454{
3455 Py_VISIT(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003456 Py_VISIT(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003457 Py_VISIT(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003458 Py_VISIT(self->arg);
3459 Py_VISIT(self->fast_memo);
3460 return 0;
3461}
3462
3463static int
3464Pickler_clear(PicklerObject *self)
3465{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003466 Py_CLEAR(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003467 Py_CLEAR(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003468 Py_CLEAR(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003469 Py_CLEAR(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003470 Py_CLEAR(self->arg);
3471 Py_CLEAR(self->fast_memo);
3472
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003473 if (self->memo != NULL) {
3474 PyMemoTable *memo = self->memo;
3475 self->memo = NULL;
3476 PyMemoTable_Del(memo);
3477 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003478 return 0;
3479}
3480
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003481
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003482PyDoc_STRVAR(Pickler_doc,
3483"Pickler(file, protocol=None)"
3484"\n"
3485"This takes a binary file for writing a pickle data stream.\n"
3486"\n"
3487"The optional protocol argument tells the pickler to use the\n"
3488"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
3489"protocol is 3; a backward-incompatible protocol designed for\n"
3490"Python 3.0.\n"
3491"\n"
3492"Specifying a negative protocol version selects the highest\n"
3493"protocol version supported. The higher the protocol used, the\n"
3494"more recent the version of Python needed to read the pickle\n"
3495"produced.\n"
3496"\n"
3497"The file argument must have a write() method that accepts a single\n"
3498"bytes argument. It can thus be a file object opened for binary\n"
3499"writing, a io.BytesIO instance, or any other custom object that\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003500"meets this interface.\n"
3501"\n"
3502"If fix_imports is True and protocol is less than 3, pickle will try to\n"
3503"map the new Python 3.x names to the old module names used in Python\n"
3504"2.x, so that the pickle data stream is readable with Python 2.x.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003505
3506static int
3507Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
3508{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003509 static char *kwlist[] = {"file", "protocol", "fix_imports", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003510 PyObject *file;
3511 PyObject *proto_obj = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003512 PyObject *fix_imports = Py_True;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02003513 _Py_IDENTIFIER(persistent_id);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003514 _Py_IDENTIFIER(dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003515
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003516 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:Pickler",
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003517 kwlist, &file, &proto_obj, &fix_imports))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003518 return -1;
3519
3520 /* In case of multiple __init__() calls, clear previous content. */
3521 if (self->write != NULL)
3522 (void)Pickler_clear(self);
3523
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003524 if (_Pickler_SetProtocol(self, proto_obj, fix_imports) < 0)
3525 return -1;
3526
3527 if (_Pickler_SetOutputStream(self, file) < 0)
3528 return -1;
3529
3530 /* memo and output_buffer may have already been created in _Pickler_New */
3531 if (self->memo == NULL) {
3532 self->memo = PyMemoTable_New();
3533 if (self->memo == NULL)
3534 return -1;
3535 }
3536 self->output_len = 0;
3537 if (self->output_buffer == NULL) {
3538 self->max_output_len = WRITE_BUF_SIZE;
3539 self->output_buffer = PyBytes_FromStringAndSize(NULL,
3540 self->max_output_len);
3541 if (self->output_buffer == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003542 return -1;
3543 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003544
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003545 self->arg = NULL;
3546 self->fast = 0;
3547 self->fast_nesting = 0;
3548 self->fast_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003549 self->pers_func = NULL;
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02003550 if (_PyObject_HasAttrId((PyObject *)self, &PyId_persistent_id)) {
3551 self->pers_func = _PyObject_GetAttrId((PyObject *)self,
3552 &PyId_persistent_id);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003553 if (self->pers_func == NULL)
3554 return -1;
3555 }
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003556 self->dispatch_table = NULL;
3557 if (_PyObject_HasAttrId((PyObject *)self, &PyId_dispatch_table)) {
3558 self->dispatch_table = _PyObject_GetAttrId((PyObject *)self,
3559 &PyId_dispatch_table);
3560 if (self->dispatch_table == NULL)
3561 return -1;
3562 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003563 return 0;
3564}
3565
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003566/* Define a proxy object for the Pickler's internal memo object. This is to
3567 * avoid breaking code like:
3568 * pickler.memo.clear()
3569 * and
3570 * pickler.memo = saved_memo
3571 * Is this a good idea? Not really, but we don't want to break code that uses
3572 * it. Note that we don't implement the entire mapping API here. This is
3573 * intentional, as these should be treated as black-box implementation details.
3574 */
3575
3576typedef struct {
3577 PyObject_HEAD
3578 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
3579} PicklerMemoProxyObject;
3580
3581PyDoc_STRVAR(pmp_clear_doc,
3582"memo.clear() -> None. Remove all items from memo.");
3583
3584static PyObject *
3585pmp_clear(PicklerMemoProxyObject *self)
3586{
3587 if (self->pickler->memo)
3588 PyMemoTable_Clear(self->pickler->memo);
3589 Py_RETURN_NONE;
3590}
3591
3592PyDoc_STRVAR(pmp_copy_doc,
3593"memo.copy() -> new_memo. Copy the memo to a new object.");
3594
3595static PyObject *
3596pmp_copy(PicklerMemoProxyObject *self)
3597{
3598 Py_ssize_t i;
3599 PyMemoTable *memo;
3600 PyObject *new_memo = PyDict_New();
3601 if (new_memo == NULL)
3602 return NULL;
3603
3604 memo = self->pickler->memo;
3605 for (i = 0; i < memo->mt_allocated; ++i) {
3606 PyMemoEntry entry = memo->mt_table[i];
3607 if (entry.me_key != NULL) {
3608 int status;
3609 PyObject *key, *value;
3610
3611 key = PyLong_FromVoidPtr(entry.me_key);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003612 value = Py_BuildValue("nO", entry.me_value, entry.me_key);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003613
3614 if (key == NULL || value == NULL) {
3615 Py_XDECREF(key);
3616 Py_XDECREF(value);
3617 goto error;
3618 }
3619 status = PyDict_SetItem(new_memo, key, value);
3620 Py_DECREF(key);
3621 Py_DECREF(value);
3622 if (status < 0)
3623 goto error;
3624 }
3625 }
3626 return new_memo;
3627
3628 error:
3629 Py_XDECREF(new_memo);
3630 return NULL;
3631}
3632
3633PyDoc_STRVAR(pmp_reduce_doc,
3634"memo.__reduce__(). Pickling support.");
3635
3636static PyObject *
3637pmp_reduce(PicklerMemoProxyObject *self, PyObject *args)
3638{
3639 PyObject *reduce_value, *dict_args;
3640 PyObject *contents = pmp_copy(self);
3641 if (contents == NULL)
3642 return NULL;
3643
3644 reduce_value = PyTuple_New(2);
3645 if (reduce_value == NULL) {
3646 Py_DECREF(contents);
3647 return NULL;
3648 }
3649 dict_args = PyTuple_New(1);
3650 if (dict_args == NULL) {
3651 Py_DECREF(contents);
3652 Py_DECREF(reduce_value);
3653 return NULL;
3654 }
3655 PyTuple_SET_ITEM(dict_args, 0, contents);
3656 Py_INCREF((PyObject *)&PyDict_Type);
3657 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
3658 PyTuple_SET_ITEM(reduce_value, 1, dict_args);
3659 return reduce_value;
3660}
3661
3662static PyMethodDef picklerproxy_methods[] = {
3663 {"clear", (PyCFunction)pmp_clear, METH_NOARGS, pmp_clear_doc},
3664 {"copy", (PyCFunction)pmp_copy, METH_NOARGS, pmp_copy_doc},
3665 {"__reduce__", (PyCFunction)pmp_reduce, METH_VARARGS, pmp_reduce_doc},
3666 {NULL, NULL} /* sentinel */
3667};
3668
3669static void
3670PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
3671{
3672 PyObject_GC_UnTrack(self);
3673 Py_XDECREF(self->pickler);
3674 PyObject_GC_Del((PyObject *)self);
3675}
3676
3677static int
3678PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
3679 visitproc visit, void *arg)
3680{
3681 Py_VISIT(self->pickler);
3682 return 0;
3683}
3684
3685static int
3686PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
3687{
3688 Py_CLEAR(self->pickler);
3689 return 0;
3690}
3691
3692static PyTypeObject PicklerMemoProxyType = {
3693 PyVarObject_HEAD_INIT(NULL, 0)
3694 "_pickle.PicklerMemoProxy", /*tp_name*/
3695 sizeof(PicklerMemoProxyObject), /*tp_basicsize*/
3696 0,
3697 (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */
3698 0, /* tp_print */
3699 0, /* tp_getattr */
3700 0, /* tp_setattr */
3701 0, /* tp_compare */
3702 0, /* tp_repr */
3703 0, /* tp_as_number */
3704 0, /* tp_as_sequence */
3705 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00003706 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003707 0, /* tp_call */
3708 0, /* tp_str */
3709 PyObject_GenericGetAttr, /* tp_getattro */
3710 PyObject_GenericSetAttr, /* tp_setattro */
3711 0, /* tp_as_buffer */
3712 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3713 0, /* tp_doc */
3714 (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */
3715 (inquiry)PicklerMemoProxy_clear, /* tp_clear */
3716 0, /* tp_richcompare */
3717 0, /* tp_weaklistoffset */
3718 0, /* tp_iter */
3719 0, /* tp_iternext */
3720 picklerproxy_methods, /* tp_methods */
3721};
3722
3723static PyObject *
3724PicklerMemoProxy_New(PicklerObject *pickler)
3725{
3726 PicklerMemoProxyObject *self;
3727
3728 self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
3729 if (self == NULL)
3730 return NULL;
3731 Py_INCREF(pickler);
3732 self->pickler = pickler;
3733 PyObject_GC_Track(self);
3734 return (PyObject *)self;
3735}
3736
3737/*****************************************************************************/
3738
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003739static PyObject *
3740Pickler_get_memo(PicklerObject *self)
3741{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003742 return PicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003743}
3744
3745static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003746Pickler_set_memo(PicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003747{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003748 PyMemoTable *new_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003749
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003750 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003751 PyErr_SetString(PyExc_TypeError,
3752 "attribute deletion is not supported");
3753 return -1;
3754 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003755
3756 if (Py_TYPE(obj) == &PicklerMemoProxyType) {
3757 PicklerObject *pickler =
3758 ((PicklerMemoProxyObject *)obj)->pickler;
3759
3760 new_memo = PyMemoTable_Copy(pickler->memo);
3761 if (new_memo == NULL)
3762 return -1;
3763 }
3764 else if (PyDict_Check(obj)) {
3765 Py_ssize_t i = 0;
3766 PyObject *key, *value;
3767
3768 new_memo = PyMemoTable_New();
3769 if (new_memo == NULL)
3770 return -1;
3771
3772 while (PyDict_Next(obj, &i, &key, &value)) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003773 Py_ssize_t memo_id;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003774 PyObject *memo_obj;
3775
3776 if (!PyTuple_Check(value) || Py_SIZE(value) != 2) {
3777 PyErr_SetString(PyExc_TypeError,
3778 "'memo' values must be 2-item tuples");
3779 goto error;
3780 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003781 memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003782 if (memo_id == -1 && PyErr_Occurred())
3783 goto error;
3784 memo_obj = PyTuple_GET_ITEM(value, 1);
3785 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
3786 goto error;
3787 }
3788 }
3789 else {
3790 PyErr_Format(PyExc_TypeError,
3791 "'memo' attribute must be an PicklerMemoProxy object"
3792 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003793 return -1;
3794 }
3795
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003796 PyMemoTable_Del(self->memo);
3797 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003798
3799 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003800
3801 error:
3802 if (new_memo)
3803 PyMemoTable_Del(new_memo);
3804 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003805}
3806
3807static PyObject *
3808Pickler_get_persid(PicklerObject *self)
3809{
3810 if (self->pers_func == NULL)
3811 PyErr_SetString(PyExc_AttributeError, "persistent_id");
3812 else
3813 Py_INCREF(self->pers_func);
3814 return self->pers_func;
3815}
3816
3817static int
3818Pickler_set_persid(PicklerObject *self, PyObject *value)
3819{
3820 PyObject *tmp;
3821
3822 if (value == NULL) {
3823 PyErr_SetString(PyExc_TypeError,
3824 "attribute deletion is not supported");
3825 return -1;
3826 }
3827 if (!PyCallable_Check(value)) {
3828 PyErr_SetString(PyExc_TypeError,
3829 "persistent_id must be a callable taking one argument");
3830 return -1;
3831 }
3832
3833 tmp = self->pers_func;
3834 Py_INCREF(value);
3835 self->pers_func = value;
3836 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
3837
3838 return 0;
3839}
3840
3841static PyMemberDef Pickler_members[] = {
3842 {"bin", T_INT, offsetof(PicklerObject, bin)},
3843 {"fast", T_INT, offsetof(PicklerObject, fast)},
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003844 {"dispatch_table", T_OBJECT_EX, offsetof(PicklerObject, dispatch_table)},
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003845 {NULL}
3846};
3847
3848static PyGetSetDef Pickler_getsets[] = {
3849 {"memo", (getter)Pickler_get_memo,
3850 (setter)Pickler_set_memo},
3851 {"persistent_id", (getter)Pickler_get_persid,
3852 (setter)Pickler_set_persid},
3853 {NULL}
3854};
3855
3856static PyTypeObject Pickler_Type = {
3857 PyVarObject_HEAD_INIT(NULL, 0)
3858 "_pickle.Pickler" , /*tp_name*/
3859 sizeof(PicklerObject), /*tp_basicsize*/
3860 0, /*tp_itemsize*/
3861 (destructor)Pickler_dealloc, /*tp_dealloc*/
3862 0, /*tp_print*/
3863 0, /*tp_getattr*/
3864 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00003865 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003866 0, /*tp_repr*/
3867 0, /*tp_as_number*/
3868 0, /*tp_as_sequence*/
3869 0, /*tp_as_mapping*/
3870 0, /*tp_hash*/
3871 0, /*tp_call*/
3872 0, /*tp_str*/
3873 0, /*tp_getattro*/
3874 0, /*tp_setattro*/
3875 0, /*tp_as_buffer*/
3876 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3877 Pickler_doc, /*tp_doc*/
3878 (traverseproc)Pickler_traverse, /*tp_traverse*/
3879 (inquiry)Pickler_clear, /*tp_clear*/
3880 0, /*tp_richcompare*/
3881 0, /*tp_weaklistoffset*/
3882 0, /*tp_iter*/
3883 0, /*tp_iternext*/
3884 Pickler_methods, /*tp_methods*/
3885 Pickler_members, /*tp_members*/
3886 Pickler_getsets, /*tp_getset*/
3887 0, /*tp_base*/
3888 0, /*tp_dict*/
3889 0, /*tp_descr_get*/
3890 0, /*tp_descr_set*/
3891 0, /*tp_dictoffset*/
3892 (initproc)Pickler_init, /*tp_init*/
3893 PyType_GenericAlloc, /*tp_alloc*/
3894 PyType_GenericNew, /*tp_new*/
3895 PyObject_GC_Del, /*tp_free*/
3896 0, /*tp_is_gc*/
3897};
3898
Victor Stinner121aab42011-09-29 23:40:53 +02003899/* Temporary helper for calling self.find_class().
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003900
3901 XXX: It would be nice to able to avoid Python function call overhead, by
3902 using directly the C version of find_class(), when find_class() is not
3903 overridden by a subclass. Although, this could become rather hackish. A
3904 simpler optimization would be to call the C function when self is not a
3905 subclass instance. */
3906static PyObject *
3907find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
3908{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02003909 _Py_IDENTIFIER(find_class);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02003910
3911 return _PyObject_CallMethodId((PyObject *)self, &PyId_find_class, "OO",
3912 module_name, global_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003913}
3914
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003915static Py_ssize_t
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003916marker(UnpicklerObject *self)
3917{
3918 if (self->num_marks < 1) {
3919 PyErr_SetString(UnpicklingError, "could not find MARK");
3920 return -1;
3921 }
3922
3923 return self->marks[--self->num_marks];
3924}
3925
3926static int
3927load_none(UnpicklerObject *self)
3928{
3929 PDATA_APPEND(self->stack, Py_None, -1);
3930 return 0;
3931}
3932
3933static int
3934bad_readline(void)
3935{
3936 PyErr_SetString(UnpicklingError, "pickle data was truncated");
3937 return -1;
3938}
3939
3940static int
3941load_int(UnpicklerObject *self)
3942{
3943 PyObject *value;
3944 char *endptr, *s;
3945 Py_ssize_t len;
3946 long x;
3947
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003948 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003949 return -1;
3950 if (len < 2)
3951 return bad_readline();
3952
3953 errno = 0;
Victor Stinner121aab42011-09-29 23:40:53 +02003954 /* XXX: Should the base argument of strtol() be explicitly set to 10?
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003955 XXX(avassalotti): Should this uses PyOS_strtol()? */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003956 x = strtol(s, &endptr, 0);
3957
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003958 if (errno || (*endptr != '\n' && *endptr != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003959 /* Hm, maybe we've got something long. Let's try reading
Serhiy Storchaka95949422013-08-27 19:40:23 +03003960 * it as a Python int object. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003961 errno = 0;
3962 /* XXX: Same thing about the base here. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003963 value = PyLong_FromString(s, NULL, 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003964 if (value == NULL) {
3965 PyErr_SetString(PyExc_ValueError,
3966 "could not convert string to int");
3967 return -1;
3968 }
3969 }
3970 else {
3971 if (len == 3 && (x == 0 || x == 1)) {
3972 if ((value = PyBool_FromLong(x)) == NULL)
3973 return -1;
3974 }
3975 else {
3976 if ((value = PyLong_FromLong(x)) == NULL)
3977 return -1;
3978 }
3979 }
3980
3981 PDATA_PUSH(self->stack, value, -1);
3982 return 0;
3983}
3984
3985static int
3986load_bool(UnpicklerObject *self, PyObject *boolean)
3987{
3988 assert(boolean == Py_True || boolean == Py_False);
3989 PDATA_APPEND(self->stack, boolean, -1);
3990 return 0;
3991}
3992
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003993/* s contains x bytes of an unsigned little-endian integer. Return its value
3994 * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
3995 */
3996static Py_ssize_t
3997calc_binsize(char *bytes, int size)
3998{
3999 unsigned char *s = (unsigned char *)bytes;
4000 size_t x = 0;
4001
4002 assert(size == 4);
4003
4004 x = (size_t) s[0];
4005 x |= (size_t) s[1] << 8;
4006 x |= (size_t) s[2] << 16;
4007 x |= (size_t) s[3] << 24;
4008
4009 if (x > PY_SSIZE_T_MAX)
4010 return -1;
4011 else
4012 return (Py_ssize_t) x;
4013}
4014
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004015/* s contains x bytes of a little-endian integer. Return its value as a
4016 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
4017 * int, but when x is 4 it's a signed one. This is an historical source
4018 * of x-platform bugs.
4019 */
4020static long
4021calc_binint(char *bytes, int size)
4022{
4023 unsigned char *s = (unsigned char *)bytes;
4024 int i = size;
4025 long x = 0;
4026
4027 for (i = 0; i < size; i++) {
4028 x |= (long)s[i] << (i * 8);
4029 }
4030
4031 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
4032 * is signed, so on a box with longs bigger than 4 bytes we need
4033 * to extend a BININT's sign bit to the full width.
4034 */
4035 if (SIZEOF_LONG > 4 && size == 4) {
4036 x |= -(x & (1L << 31));
4037 }
4038
4039 return x;
4040}
4041
4042static int
4043load_binintx(UnpicklerObject *self, char *s, int size)
4044{
4045 PyObject *value;
4046 long x;
4047
4048 x = calc_binint(s, size);
4049
4050 if ((value = PyLong_FromLong(x)) == NULL)
4051 return -1;
4052
4053 PDATA_PUSH(self->stack, value, -1);
4054 return 0;
4055}
4056
4057static int
4058load_binint(UnpicklerObject *self)
4059{
4060 char *s;
4061
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004062 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004063 return -1;
4064
4065 return load_binintx(self, s, 4);
4066}
4067
4068static int
4069load_binint1(UnpicklerObject *self)
4070{
4071 char *s;
4072
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004073 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004074 return -1;
4075
4076 return load_binintx(self, s, 1);
4077}
4078
4079static int
4080load_binint2(UnpicklerObject *self)
4081{
4082 char *s;
4083
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004084 if (_Unpickler_Read(self, &s, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004085 return -1;
4086
4087 return load_binintx(self, s, 2);
4088}
4089
4090static int
4091load_long(UnpicklerObject *self)
4092{
4093 PyObject *value;
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00004094 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004095 Py_ssize_t len;
4096
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004097 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004098 return -1;
4099 if (len < 2)
4100 return bad_readline();
4101
Mark Dickinson8dd05142009-01-20 20:43:58 +00004102 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
4103 the 'L' before calling PyLong_FromString. In order to maintain
4104 compatibility with Python 3.0.0, we don't actually *require*
4105 the 'L' to be present. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004106 if (s[len-2] == 'L')
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00004107 s[len-2] = '\0';
Alexandre Vassalottie4bccb72009-01-24 01:47:57 +00004108 /* XXX: Should the base argument explicitly set to 10? */
4109 value = PyLong_FromString(s, NULL, 0);
Mark Dickinson8dd05142009-01-20 20:43:58 +00004110 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004111 return -1;
4112
4113 PDATA_PUSH(self->stack, value, -1);
4114 return 0;
4115}
4116
4117/* 'size' bytes contain the # of bytes of little-endian 256's-complement
4118 * data following.
4119 */
4120static int
4121load_counted_long(UnpicklerObject *self, int size)
4122{
4123 PyObject *value;
4124 char *nbytes;
4125 char *pdata;
4126
4127 assert(size == 1 || size == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004128 if (_Unpickler_Read(self, &nbytes, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004129 return -1;
4130
4131 size = calc_binint(nbytes, size);
4132 if (size < 0) {
4133 /* Corrupt or hostile pickle -- we never write one like this */
4134 PyErr_SetString(UnpicklingError,
4135 "LONG pickle has negative byte count");
4136 return -1;
4137 }
4138
4139 if (size == 0)
4140 value = PyLong_FromLong(0L);
4141 else {
4142 /* Read the raw little-endian bytes and convert. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004143 if (_Unpickler_Read(self, &pdata, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004144 return -1;
4145 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
4146 1 /* little endian */ , 1 /* signed */ );
4147 }
4148 if (value == NULL)
4149 return -1;
4150 PDATA_PUSH(self->stack, value, -1);
4151 return 0;
4152}
4153
4154static int
4155load_float(UnpicklerObject *self)
4156{
4157 PyObject *value;
4158 char *endptr, *s;
4159 Py_ssize_t len;
4160 double d;
4161
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004162 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004163 return -1;
4164 if (len < 2)
4165 return bad_readline();
4166
4167 errno = 0;
Mark Dickinson725bfd82009-05-03 20:33:40 +00004168 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
4169 if (d == -1.0 && PyErr_Occurred())
4170 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004171 if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004172 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
4173 return -1;
4174 }
Mark Dickinson725bfd82009-05-03 20:33:40 +00004175 value = PyFloat_FromDouble(d);
4176 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004177 return -1;
4178
4179 PDATA_PUSH(self->stack, value, -1);
4180 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004181}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004182
4183static int
4184load_binfloat(UnpicklerObject *self)
4185{
4186 PyObject *value;
4187 double x;
4188 char *s;
4189
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004190 if (_Unpickler_Read(self, &s, 8) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004191 return -1;
4192
4193 x = _PyFloat_Unpack8((unsigned char *)s, 0);
4194 if (x == -1.0 && PyErr_Occurred())
4195 return -1;
4196
4197 if ((value = PyFloat_FromDouble(x)) == NULL)
4198 return -1;
4199
4200 PDATA_PUSH(self->stack, value, -1);
4201 return 0;
4202}
4203
4204static int
4205load_string(UnpicklerObject *self)
4206{
4207 PyObject *bytes;
4208 PyObject *str = NULL;
4209 Py_ssize_t len;
4210 char *s, *p;
4211
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004212 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004213 return -1;
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07004214 /* Strip the newline */
4215 len--;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004216 /* Strip outermost quotes */
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07004217 if (len >= 2 && s[0] == s[len - 1] && (s[0] == '\'' || s[0] == '"')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004218 p = s + 1;
4219 len -= 2;
4220 }
4221 else {
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07004222 PyErr_SetString(UnpicklingError,
4223 "the STRING opcode argument must be quoted");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004224 return -1;
4225 }
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07004226 assert(len >= 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004227
4228 /* Use the PyBytes API to decode the string, since that is what is used
4229 to encode, and then coerce the result to Unicode. */
4230 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004231 if (bytes == NULL)
4232 return -1;
4233 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
4234 Py_DECREF(bytes);
4235 if (str == NULL)
4236 return -1;
4237
4238 PDATA_PUSH(self->stack, str, -1);
4239 return 0;
4240}
4241
4242static int
4243load_binbytes(UnpicklerObject *self)
4244{
4245 PyObject *bytes;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004246 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004247 char *s;
4248
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004249 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004250 return -1;
4251
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004252 x = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004253 if (x < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004254 PyErr_Format(PyExc_OverflowError,
4255 "BINBYTES exceeds system's maximum size of %zd bytes",
Alexandre Vassalotticc757172013-04-14 02:25:10 -07004256 PY_SSIZE_T_MAX);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004257 return -1;
4258 }
4259
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004260 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004261 return -1;
4262 bytes = PyBytes_FromStringAndSize(s, x);
4263 if (bytes == NULL)
4264 return -1;
4265
4266 PDATA_PUSH(self->stack, bytes, -1);
4267 return 0;
4268}
4269
4270static int
4271load_short_binbytes(UnpicklerObject *self)
4272{
4273 PyObject *bytes;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004274 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004275 char *s;
4276
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004277 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004278 return -1;
4279
4280 x = (unsigned char)s[0];
4281
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004282 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004283 return -1;
4284
4285 bytes = PyBytes_FromStringAndSize(s, x);
4286 if (bytes == NULL)
4287 return -1;
4288
4289 PDATA_PUSH(self->stack, bytes, -1);
4290 return 0;
4291}
4292
4293static int
4294load_binstring(UnpicklerObject *self)
4295{
4296 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004297 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004298 char *s;
4299
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004300 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004301 return -1;
4302
4303 x = calc_binint(s, 4);
4304 if (x < 0) {
Victor Stinner121aab42011-09-29 23:40:53 +02004305 PyErr_SetString(UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004306 "BINSTRING pickle has negative byte count");
4307 return -1;
4308 }
4309
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004310 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004311 return -1;
4312
4313 /* Convert Python 2.x strings to unicode. */
4314 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4315 if (str == NULL)
4316 return -1;
4317
4318 PDATA_PUSH(self->stack, str, -1);
4319 return 0;
4320}
4321
4322static int
4323load_short_binstring(UnpicklerObject *self)
4324{
4325 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004326 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004327 char *s;
4328
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004329 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004330 return -1;
4331
4332 x = (unsigned char)s[0];
4333
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004334 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004335 return -1;
4336
4337 /* Convert Python 2.x strings to unicode. */
4338 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4339 if (str == NULL)
4340 return -1;
4341
4342 PDATA_PUSH(self->stack, str, -1);
4343 return 0;
4344}
4345
4346static int
4347load_unicode(UnpicklerObject *self)
4348{
4349 PyObject *str;
4350 Py_ssize_t len;
4351 char *s;
4352
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004353 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004354 return -1;
4355 if (len < 1)
4356 return bad_readline();
4357
4358 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
4359 if (str == NULL)
4360 return -1;
4361
4362 PDATA_PUSH(self->stack, str, -1);
4363 return 0;
4364}
4365
4366static int
4367load_binunicode(UnpicklerObject *self)
4368{
4369 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004370 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004371 char *s;
4372
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004373 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004374 return -1;
4375
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004376 size = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004377 if (size < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004378 PyErr_Format(PyExc_OverflowError,
4379 "BINUNICODE exceeds system's maximum size of %zd bytes",
Alexandre Vassalotticc757172013-04-14 02:25:10 -07004380 PY_SSIZE_T_MAX);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004381 return -1;
4382 }
4383
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004384
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004385 if (_Unpickler_Read(self, &s, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004386 return -1;
4387
Victor Stinner485fb562010-04-13 11:07:24 +00004388 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004389 if (str == NULL)
4390 return -1;
4391
4392 PDATA_PUSH(self->stack, str, -1);
4393 return 0;
4394}
4395
4396static int
4397load_tuple(UnpicklerObject *self)
4398{
4399 PyObject *tuple;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004400 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004401
4402 if ((i = marker(self)) < 0)
4403 return -1;
4404
4405 tuple = Pdata_poptuple(self->stack, i);
4406 if (tuple == NULL)
4407 return -1;
4408 PDATA_PUSH(self->stack, tuple, -1);
4409 return 0;
4410}
4411
4412static int
4413load_counted_tuple(UnpicklerObject *self, int len)
4414{
4415 PyObject *tuple;
4416
4417 tuple = PyTuple_New(len);
4418 if (tuple == NULL)
4419 return -1;
4420
4421 while (--len >= 0) {
4422 PyObject *item;
4423
4424 PDATA_POP(self->stack, item);
4425 if (item == NULL)
4426 return -1;
4427 PyTuple_SET_ITEM(tuple, len, item);
4428 }
4429 PDATA_PUSH(self->stack, tuple, -1);
4430 return 0;
4431}
4432
4433static int
4434load_empty_list(UnpicklerObject *self)
4435{
4436 PyObject *list;
4437
4438 if ((list = PyList_New(0)) == NULL)
4439 return -1;
4440 PDATA_PUSH(self->stack, list, -1);
4441 return 0;
4442}
4443
4444static int
4445load_empty_dict(UnpicklerObject *self)
4446{
4447 PyObject *dict;
4448
4449 if ((dict = PyDict_New()) == NULL)
4450 return -1;
4451 PDATA_PUSH(self->stack, dict, -1);
4452 return 0;
4453}
4454
4455static int
4456load_list(UnpicklerObject *self)
4457{
4458 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004459 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004460
4461 if ((i = marker(self)) < 0)
4462 return -1;
4463
4464 list = Pdata_poplist(self->stack, i);
4465 if (list == NULL)
4466 return -1;
4467 PDATA_PUSH(self->stack, list, -1);
4468 return 0;
4469}
4470
4471static int
4472load_dict(UnpicklerObject *self)
4473{
4474 PyObject *dict, *key, *value;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004475 Py_ssize_t i, j, k;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004476
4477 if ((i = marker(self)) < 0)
4478 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004479 j = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004480
4481 if ((dict = PyDict_New()) == NULL)
4482 return -1;
4483
4484 for (k = i + 1; k < j; k += 2) {
4485 key = self->stack->data[k - 1];
4486 value = self->stack->data[k];
4487 if (PyDict_SetItem(dict, key, value) < 0) {
4488 Py_DECREF(dict);
4489 return -1;
4490 }
4491 }
4492 Pdata_clear(self->stack, i);
4493 PDATA_PUSH(self->stack, dict, -1);
4494 return 0;
4495}
4496
4497static PyObject *
4498instantiate(PyObject *cls, PyObject *args)
4499{
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004500 PyObject *result = NULL;
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02004501 _Py_IDENTIFIER(__getinitargs__);
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004502 /* Caller must assure args are a tuple. Normally, args come from
4503 Pdata_poptuple which packs objects from the top of the stack
4504 into a newly created tuple. */
4505 assert(PyTuple_Check(args));
4506 if (Py_SIZE(args) > 0 || !PyType_Check(cls) ||
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02004507 _PyObject_HasAttrId(cls, &PyId___getinitargs__)) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004508 result = PyObject_CallObject(cls, args);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004509 }
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004510 else {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02004511 _Py_IDENTIFIER(__new__);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02004512
4513 result = _PyObject_CallMethodId(cls, &PyId___new__, "O", cls);
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004514 }
4515 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004516}
4517
4518static int
4519load_obj(UnpicklerObject *self)
4520{
4521 PyObject *cls, *args, *obj = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004522 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004523
4524 if ((i = marker(self)) < 0)
4525 return -1;
4526
4527 args = Pdata_poptuple(self->stack, i + 1);
4528 if (args == NULL)
4529 return -1;
4530
4531 PDATA_POP(self->stack, cls);
4532 if (cls) {
4533 obj = instantiate(cls, args);
4534 Py_DECREF(cls);
4535 }
4536 Py_DECREF(args);
4537 if (obj == NULL)
4538 return -1;
4539
4540 PDATA_PUSH(self->stack, obj, -1);
4541 return 0;
4542}
4543
4544static int
4545load_inst(UnpicklerObject *self)
4546{
4547 PyObject *cls = NULL;
4548 PyObject *args = NULL;
4549 PyObject *obj = NULL;
4550 PyObject *module_name;
4551 PyObject *class_name;
4552 Py_ssize_t len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004553 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004554 char *s;
4555
4556 if ((i = marker(self)) < 0)
4557 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004558 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004559 return -1;
4560 if (len < 2)
4561 return bad_readline();
4562
4563 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
4564 identifiers are permitted in Python 3.0, since the INST opcode is only
4565 supported by older protocols on Python 2.x. */
4566 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
4567 if (module_name == NULL)
4568 return -1;
4569
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004570 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004571 if (len < 2)
4572 return bad_readline();
4573 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004574 if (class_name != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004575 cls = find_class(self, module_name, class_name);
4576 Py_DECREF(class_name);
4577 }
4578 }
4579 Py_DECREF(module_name);
4580
4581 if (cls == NULL)
4582 return -1;
4583
4584 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
4585 obj = instantiate(cls, args);
4586 Py_DECREF(args);
4587 }
4588 Py_DECREF(cls);
4589
4590 if (obj == NULL)
4591 return -1;
4592
4593 PDATA_PUSH(self->stack, obj, -1);
4594 return 0;
4595}
4596
4597static int
4598load_newobj(UnpicklerObject *self)
4599{
4600 PyObject *args = NULL;
4601 PyObject *clsraw = NULL;
4602 PyTypeObject *cls; /* clsraw cast to its true type */
4603 PyObject *obj;
4604
4605 /* Stack is ... cls argtuple, and we want to call
4606 * cls.__new__(cls, *argtuple).
4607 */
4608 PDATA_POP(self->stack, args);
4609 if (args == NULL)
4610 goto error;
4611 if (!PyTuple_Check(args)) {
4612 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
4613 goto error;
4614 }
4615
4616 PDATA_POP(self->stack, clsraw);
4617 cls = (PyTypeObject *)clsraw;
4618 if (cls == NULL)
4619 goto error;
4620 if (!PyType_Check(cls)) {
4621 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4622 "isn't a type object");
4623 goto error;
4624 }
4625 if (cls->tp_new == NULL) {
4626 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4627 "has NULL tp_new");
4628 goto error;
4629 }
4630
4631 /* Call __new__. */
4632 obj = cls->tp_new(cls, args, NULL);
4633 if (obj == NULL)
4634 goto error;
4635
4636 Py_DECREF(args);
4637 Py_DECREF(clsraw);
4638 PDATA_PUSH(self->stack, obj, -1);
4639 return 0;
4640
4641 error:
4642 Py_XDECREF(args);
4643 Py_XDECREF(clsraw);
4644 return -1;
4645}
4646
4647static int
4648load_global(UnpicklerObject *self)
4649{
4650 PyObject *global = NULL;
4651 PyObject *module_name;
4652 PyObject *global_name;
4653 Py_ssize_t len;
4654 char *s;
4655
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004656 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004657 return -1;
4658 if (len < 2)
4659 return bad_readline();
4660 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4661 if (!module_name)
4662 return -1;
4663
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004664 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004665 if (len < 2) {
4666 Py_DECREF(module_name);
4667 return bad_readline();
4668 }
4669 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4670 if (global_name) {
4671 global = find_class(self, module_name, global_name);
4672 Py_DECREF(global_name);
4673 }
4674 }
4675 Py_DECREF(module_name);
4676
4677 if (global == NULL)
4678 return -1;
4679 PDATA_PUSH(self->stack, global, -1);
4680 return 0;
4681}
4682
4683static int
4684load_persid(UnpicklerObject *self)
4685{
4686 PyObject *pid;
4687 Py_ssize_t len;
4688 char *s;
4689
4690 if (self->pers_func) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004691 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004692 return -1;
4693 if (len < 2)
4694 return bad_readline();
4695
4696 pid = PyBytes_FromStringAndSize(s, len - 1);
4697 if (pid == NULL)
4698 return -1;
4699
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004700 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004701 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004702 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004703 if (pid == NULL)
4704 return -1;
4705
4706 PDATA_PUSH(self->stack, pid, -1);
4707 return 0;
4708 }
4709 else {
4710 PyErr_SetString(UnpicklingError,
4711 "A load persistent id instruction was encountered,\n"
4712 "but no persistent_load function was specified.");
4713 return -1;
4714 }
4715}
4716
4717static int
4718load_binpersid(UnpicklerObject *self)
4719{
4720 PyObject *pid;
4721
4722 if (self->pers_func) {
4723 PDATA_POP(self->stack, pid);
4724 if (pid == NULL)
4725 return -1;
4726
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004727 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004728 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004729 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004730 if (pid == NULL)
4731 return -1;
4732
4733 PDATA_PUSH(self->stack, pid, -1);
4734 return 0;
4735 }
4736 else {
4737 PyErr_SetString(UnpicklingError,
4738 "A load persistent id instruction was encountered,\n"
4739 "but no persistent_load function was specified.");
4740 return -1;
4741 }
4742}
4743
4744static int
4745load_pop(UnpicklerObject *self)
4746{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004747 Py_ssize_t len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004748
4749 /* Note that we split the (pickle.py) stack into two stacks,
4750 * an object stack and a mark stack. We have to be clever and
4751 * pop the right one. We do this by looking at the top of the
Collin Winter8ca69de2009-05-26 16:53:41 +00004752 * mark stack first, and only signalling a stack underflow if
4753 * the object stack is empty and the mark stack doesn't match
4754 * our expectations.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004755 */
Collin Winter8ca69de2009-05-26 16:53:41 +00004756 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004757 self->num_marks--;
Antoine Pitrou01a15ea2010-01-07 17:57:31 +00004758 } else if (len > 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004759 len--;
4760 Py_DECREF(self->stack->data[len]);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004761 Py_SIZE(self->stack) = len;
Collin Winter8ca69de2009-05-26 16:53:41 +00004762 } else {
4763 return stack_underflow();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004764 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004765 return 0;
4766}
4767
4768static int
4769load_pop_mark(UnpicklerObject *self)
4770{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004771 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004772
4773 if ((i = marker(self)) < 0)
4774 return -1;
4775
4776 Pdata_clear(self->stack, i);
4777
4778 return 0;
4779}
4780
4781static int
4782load_dup(UnpicklerObject *self)
4783{
4784 PyObject *last;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004785 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004786
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004787 if ((len = Py_SIZE(self->stack)) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004788 return stack_underflow();
4789 last = self->stack->data[len - 1];
4790 PDATA_APPEND(self->stack, last, -1);
4791 return 0;
4792}
4793
4794static int
4795load_get(UnpicklerObject *self)
4796{
4797 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004798 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004799 Py_ssize_t len;
4800 char *s;
4801
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004802 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004803 return -1;
4804 if (len < 2)
4805 return bad_readline();
4806
4807 key = PyLong_FromString(s, NULL, 10);
4808 if (key == NULL)
4809 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004810 idx = PyLong_AsSsize_t(key);
4811 if (idx == -1 && PyErr_Occurred()) {
4812 Py_DECREF(key);
4813 return -1;
4814 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004815
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004816 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004817 if (value == NULL) {
4818 if (!PyErr_Occurred())
4819 PyErr_SetObject(PyExc_KeyError, key);
4820 Py_DECREF(key);
4821 return -1;
4822 }
4823 Py_DECREF(key);
4824
4825 PDATA_APPEND(self->stack, value, -1);
4826 return 0;
4827}
4828
4829static int
4830load_binget(UnpicklerObject *self)
4831{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004832 PyObject *value;
4833 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004834 char *s;
4835
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004836 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004837 return -1;
4838
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004839 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004840
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004841 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004842 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004843 PyObject *key = PyLong_FromSsize_t(idx);
Christian Heimes9ee5c372013-07-26 22:45:00 +02004844 if (key != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004845 PyErr_SetObject(PyExc_KeyError, key);
Christian Heimes9ee5c372013-07-26 22:45:00 +02004846 Py_DECREF(key);
4847 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004848 return -1;
4849 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004850
4851 PDATA_APPEND(self->stack, value, -1);
4852 return 0;
4853}
4854
4855static int
4856load_long_binget(UnpicklerObject *self)
4857{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004858 PyObject *value;
4859 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004860 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004861
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004862 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004863 return -1;
4864
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004865 idx = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004866
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004867 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004868 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004869 PyObject *key = PyLong_FromSsize_t(idx);
Christian Heimes9ee5c372013-07-26 22:45:00 +02004870 if (key != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004871 PyErr_SetObject(PyExc_KeyError, key);
Christian Heimes9ee5c372013-07-26 22:45:00 +02004872 Py_DECREF(key);
4873 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004874 return -1;
4875 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004876
4877 PDATA_APPEND(self->stack, value, -1);
4878 return 0;
4879}
4880
4881/* Push an object from the extension registry (EXT[124]). nbytes is
4882 * the number of bytes following the opcode, holding the index (code) value.
4883 */
4884static int
4885load_extension(UnpicklerObject *self, int nbytes)
4886{
4887 char *codebytes; /* the nbytes bytes after the opcode */
4888 long code; /* calc_binint returns long */
4889 PyObject *py_code; /* code as a Python int */
4890 PyObject *obj; /* the object to push */
4891 PyObject *pair; /* (module_name, class_name) */
4892 PyObject *module_name, *class_name;
4893
4894 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004895 if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004896 return -1;
4897 code = calc_binint(codebytes, nbytes);
4898 if (code <= 0) { /* note that 0 is forbidden */
4899 /* Corrupt or hostile pickle. */
4900 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
4901 return -1;
4902 }
4903
4904 /* Look for the code in the cache. */
4905 py_code = PyLong_FromLong(code);
4906 if (py_code == NULL)
4907 return -1;
4908 obj = PyDict_GetItem(extension_cache, py_code);
4909 if (obj != NULL) {
4910 /* Bingo. */
4911 Py_DECREF(py_code);
4912 PDATA_APPEND(self->stack, obj, -1);
4913 return 0;
4914 }
4915
4916 /* Look up the (module_name, class_name) pair. */
4917 pair = PyDict_GetItem(inverted_registry, py_code);
4918 if (pair == NULL) {
4919 Py_DECREF(py_code);
4920 PyErr_Format(PyExc_ValueError, "unregistered extension "
4921 "code %ld", code);
4922 return -1;
4923 }
4924 /* Since the extension registry is manipulable via Python code,
4925 * confirm that pair is really a 2-tuple of strings.
4926 */
4927 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
4928 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
4929 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
4930 Py_DECREF(py_code);
4931 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
4932 "isn't a 2-tuple of strings", code);
4933 return -1;
4934 }
4935 /* Load the object. */
4936 obj = find_class(self, module_name, class_name);
4937 if (obj == NULL) {
4938 Py_DECREF(py_code);
4939 return -1;
4940 }
4941 /* Cache code -> obj. */
4942 code = PyDict_SetItem(extension_cache, py_code, obj);
4943 Py_DECREF(py_code);
4944 if (code < 0) {
4945 Py_DECREF(obj);
4946 return -1;
4947 }
4948 PDATA_PUSH(self->stack, obj, -1);
4949 return 0;
4950}
4951
4952static int
4953load_put(UnpicklerObject *self)
4954{
4955 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004956 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004957 Py_ssize_t len;
4958 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004959
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004960 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004961 return -1;
4962 if (len < 2)
4963 return bad_readline();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004964 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004965 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004966 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004967
4968 key = PyLong_FromString(s, NULL, 10);
4969 if (key == NULL)
4970 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004971 idx = PyLong_AsSsize_t(key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004972 Py_DECREF(key);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004973 if (idx < 0) {
4974 if (!PyErr_Occurred())
4975 PyErr_SetString(PyExc_ValueError,
4976 "negative PUT argument");
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004977 return -1;
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004978 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004979
4980 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004981}
4982
4983static int
4984load_binput(UnpicklerObject *self)
4985{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004986 PyObject *value;
4987 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004988 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004989
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004990 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004991 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004992
4993 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004994 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004995 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004996
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004997 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004998
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004999 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005000}
5001
5002static int
5003load_long_binput(UnpicklerObject *self)
5004{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005005 PyObject *value;
5006 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005007 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005008
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005009 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005010 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005011
5012 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005013 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005014 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005015
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005016 idx = calc_binsize(s, 4);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02005017 if (idx < 0) {
5018 PyErr_SetString(PyExc_ValueError,
5019 "negative LONG_BINPUT argument");
5020 return -1;
5021 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005022
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005023 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005024}
5025
5026static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005027do_append(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005028{
5029 PyObject *value;
5030 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005031 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005032
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005033 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005034 if (x > len || x <= 0)
5035 return stack_underflow();
5036 if (len == x) /* nothing to do */
5037 return 0;
5038
5039 list = self->stack->data[x - 1];
5040
5041 if (PyList_Check(list)) {
5042 PyObject *slice;
5043 Py_ssize_t list_len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005044 int ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005045
5046 slice = Pdata_poplist(self->stack, x);
5047 if (!slice)
5048 return -1;
5049 list_len = PyList_GET_SIZE(list);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005050 ret = PyList_SetSlice(list, list_len, list_len, slice);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005051 Py_DECREF(slice);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005052 return ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005053 }
5054 else {
5055 PyObject *append_func;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005056 _Py_IDENTIFIER(append);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005057
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005058 append_func = _PyObject_GetAttrId(list, &PyId_append);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005059 if (append_func == NULL)
5060 return -1;
5061 for (i = x; i < len; i++) {
5062 PyObject *result;
5063
5064 value = self->stack->data[i];
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005065 result = _Unpickler_FastCall(self, append_func, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005066 if (result == NULL) {
5067 Pdata_clear(self->stack, i + 1);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005068 Py_SIZE(self->stack) = x;
Alexandre Vassalotti637c7c42013-04-20 21:28:21 -07005069 Py_DECREF(append_func);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005070 return -1;
5071 }
5072 Py_DECREF(result);
5073 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005074 Py_SIZE(self->stack) = x;
Alexandre Vassalotti637c7c42013-04-20 21:28:21 -07005075 Py_DECREF(append_func);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005076 }
5077
5078 return 0;
5079}
5080
5081static int
5082load_append(UnpicklerObject *self)
5083{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005084 return do_append(self, Py_SIZE(self->stack) - 1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005085}
5086
5087static int
5088load_appends(UnpicklerObject *self)
5089{
5090 return do_append(self, marker(self));
5091}
5092
5093static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005094do_setitems(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005095{
5096 PyObject *value, *key;
5097 PyObject *dict;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005098 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005099 int status = 0;
5100
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005101 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005102 if (x > len || x <= 0)
5103 return stack_underflow();
5104 if (len == x) /* nothing to do */
5105 return 0;
Victor Stinner121aab42011-09-29 23:40:53 +02005106 if ((len - x) % 2 != 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005107 /* Currupt or hostile pickle -- we never write one like this. */
5108 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
5109 return -1;
5110 }
5111
5112 /* Here, dict does not actually need to be a PyDict; it could be anything
5113 that supports the __setitem__ attribute. */
5114 dict = self->stack->data[x - 1];
5115
5116 for (i = x + 1; i < len; i += 2) {
5117 key = self->stack->data[i - 1];
5118 value = self->stack->data[i];
5119 if (PyObject_SetItem(dict, key, value) < 0) {
5120 status = -1;
5121 break;
5122 }
5123 }
5124
5125 Pdata_clear(self->stack, x);
5126 return status;
5127}
5128
5129static int
5130load_setitem(UnpicklerObject *self)
5131{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005132 return do_setitems(self, Py_SIZE(self->stack) - 2);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005133}
5134
5135static int
5136load_setitems(UnpicklerObject *self)
5137{
5138 return do_setitems(self, marker(self));
5139}
5140
5141static int
5142load_build(UnpicklerObject *self)
5143{
5144 PyObject *state, *inst, *slotstate;
5145 PyObject *setstate;
5146 int status = 0;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005147 _Py_IDENTIFIER(__setstate__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005148
5149 /* Stack is ... instance, state. We want to leave instance at
5150 * the stack top, possibly mutated via instance.__setstate__(state).
5151 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005152 if (Py_SIZE(self->stack) < 2)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005153 return stack_underflow();
5154
5155 PDATA_POP(self->stack, state);
5156 if (state == NULL)
5157 return -1;
5158
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005159 inst = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005160
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005161 setstate = _PyObject_GetAttrId(inst, &PyId___setstate__);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005162 if (setstate == NULL) {
5163 if (PyErr_ExceptionMatches(PyExc_AttributeError))
5164 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00005165 else {
5166 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005167 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00005168 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005169 }
5170 else {
5171 PyObject *result;
5172
5173 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005174 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Antoine Pitroud79dc622008-09-05 00:03:33 +00005175 reference to state first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005176 result = _Unpickler_FastCall(self, setstate, state);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005177 Py_DECREF(setstate);
5178 if (result == NULL)
5179 return -1;
5180 Py_DECREF(result);
5181 return 0;
5182 }
5183
5184 /* A default __setstate__. First see whether state embeds a
5185 * slot state dict too (a proto 2 addition).
5186 */
5187 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
5188 PyObject *tmp = state;
5189
5190 state = PyTuple_GET_ITEM(tmp, 0);
5191 slotstate = PyTuple_GET_ITEM(tmp, 1);
5192 Py_INCREF(state);
5193 Py_INCREF(slotstate);
5194 Py_DECREF(tmp);
5195 }
5196 else
5197 slotstate = NULL;
5198
5199 /* Set inst.__dict__ from the state dict (if any). */
5200 if (state != Py_None) {
5201 PyObject *dict;
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005202 PyObject *d_key, *d_value;
5203 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005204 _Py_IDENTIFIER(__dict__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005205
5206 if (!PyDict_Check(state)) {
5207 PyErr_SetString(UnpicklingError, "state is not a dictionary");
5208 goto error;
5209 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005210 dict = _PyObject_GetAttrId(inst, &PyId___dict__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005211 if (dict == NULL)
5212 goto error;
5213
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005214 i = 0;
5215 while (PyDict_Next(state, &i, &d_key, &d_value)) {
5216 /* normally the keys for instance attributes are
5217 interned. we should try to do that here. */
5218 Py_INCREF(d_key);
5219 if (PyUnicode_CheckExact(d_key))
5220 PyUnicode_InternInPlace(&d_key);
5221 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
5222 Py_DECREF(d_key);
5223 goto error;
5224 }
5225 Py_DECREF(d_key);
5226 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005227 Py_DECREF(dict);
5228 }
5229
5230 /* Also set instance attributes from the slotstate dict (if any). */
5231 if (slotstate != NULL) {
5232 PyObject *d_key, *d_value;
5233 Py_ssize_t i;
5234
5235 if (!PyDict_Check(slotstate)) {
5236 PyErr_SetString(UnpicklingError,
5237 "slot state is not a dictionary");
5238 goto error;
5239 }
5240 i = 0;
5241 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
5242 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
5243 goto error;
5244 }
5245 }
5246
5247 if (0) {
5248 error:
5249 status = -1;
5250 }
5251
5252 Py_DECREF(state);
5253 Py_XDECREF(slotstate);
5254 return status;
5255}
5256
5257static int
5258load_mark(UnpicklerObject *self)
5259{
5260
5261 /* Note that we split the (pickle.py) stack into two stacks, an
5262 * object stack and a mark stack. Here we push a mark onto the
5263 * mark stack.
5264 */
5265
5266 if ((self->num_marks + 1) >= self->marks_size) {
5267 size_t alloc;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005268 Py_ssize_t *marks;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005269
5270 /* Use the size_t type to check for overflow. */
5271 alloc = ((size_t)self->num_marks << 1) + 20;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005272 if (alloc > (PY_SSIZE_T_MAX / sizeof(Py_ssize_t)) ||
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00005273 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005274 PyErr_NoMemory();
5275 return -1;
5276 }
5277
5278 if (self->marks == NULL)
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005279 marks = (Py_ssize_t *) PyMem_Malloc(alloc * sizeof(Py_ssize_t));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005280 else
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005281 marks = (Py_ssize_t *) PyMem_Realloc(self->marks,
5282 alloc * sizeof(Py_ssize_t));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005283 if (marks == NULL) {
5284 PyErr_NoMemory();
5285 return -1;
5286 }
5287 self->marks = marks;
5288 self->marks_size = (Py_ssize_t)alloc;
5289 }
5290
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005291 self->marks[self->num_marks++] = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005292
5293 return 0;
5294}
5295
5296static int
5297load_reduce(UnpicklerObject *self)
5298{
5299 PyObject *callable = NULL;
5300 PyObject *argtup = NULL;
5301 PyObject *obj = NULL;
5302
5303 PDATA_POP(self->stack, argtup);
5304 if (argtup == NULL)
5305 return -1;
5306 PDATA_POP(self->stack, callable);
5307 if (callable) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005308 obj = PyObject_CallObject(callable, argtup);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005309 Py_DECREF(callable);
5310 }
5311 Py_DECREF(argtup);
5312
5313 if (obj == NULL)
5314 return -1;
5315
5316 PDATA_PUSH(self->stack, obj, -1);
5317 return 0;
5318}
5319
5320/* Just raises an error if we don't know the protocol specified. PROTO
5321 * is the first opcode for protocols >= 2.
5322 */
5323static int
5324load_proto(UnpicklerObject *self)
5325{
5326 char *s;
5327 int i;
5328
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005329 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005330 return -1;
5331
5332 i = (unsigned char)s[0];
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005333 if (i <= HIGHEST_PROTOCOL) {
5334 self->proto = i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005335 return 0;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005336 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005337
5338 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
5339 return -1;
5340}
5341
5342static PyObject *
5343load(UnpicklerObject *self)
5344{
5345 PyObject *err;
5346 PyObject *value = NULL;
5347 char *s;
5348
5349 self->num_marks = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005350 if (Py_SIZE(self->stack))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005351 Pdata_clear(self->stack, 0);
5352
5353 /* Convenient macros for the dispatch while-switch loop just below. */
5354#define OP(opcode, load_func) \
5355 case opcode: if (load_func(self) < 0) break; continue;
5356
5357#define OP_ARG(opcode, load_func, arg) \
5358 case opcode: if (load_func(self, (arg)) < 0) break; continue;
5359
5360 while (1) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005361 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005362 break;
5363
5364 switch ((enum opcode)s[0]) {
5365 OP(NONE, load_none)
5366 OP(BININT, load_binint)
5367 OP(BININT1, load_binint1)
5368 OP(BININT2, load_binint2)
5369 OP(INT, load_int)
5370 OP(LONG, load_long)
5371 OP_ARG(LONG1, load_counted_long, 1)
5372 OP_ARG(LONG4, load_counted_long, 4)
5373 OP(FLOAT, load_float)
5374 OP(BINFLOAT, load_binfloat)
5375 OP(BINBYTES, load_binbytes)
5376 OP(SHORT_BINBYTES, load_short_binbytes)
5377 OP(BINSTRING, load_binstring)
5378 OP(SHORT_BINSTRING, load_short_binstring)
5379 OP(STRING, load_string)
5380 OP(UNICODE, load_unicode)
5381 OP(BINUNICODE, load_binunicode)
5382 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
5383 OP_ARG(TUPLE1, load_counted_tuple, 1)
5384 OP_ARG(TUPLE2, load_counted_tuple, 2)
5385 OP_ARG(TUPLE3, load_counted_tuple, 3)
5386 OP(TUPLE, load_tuple)
5387 OP(EMPTY_LIST, load_empty_list)
5388 OP(LIST, load_list)
5389 OP(EMPTY_DICT, load_empty_dict)
5390 OP(DICT, load_dict)
5391 OP(OBJ, load_obj)
5392 OP(INST, load_inst)
5393 OP(NEWOBJ, load_newobj)
5394 OP(GLOBAL, load_global)
5395 OP(APPEND, load_append)
5396 OP(APPENDS, load_appends)
5397 OP(BUILD, load_build)
5398 OP(DUP, load_dup)
5399 OP(BINGET, load_binget)
5400 OP(LONG_BINGET, load_long_binget)
5401 OP(GET, load_get)
5402 OP(MARK, load_mark)
5403 OP(BINPUT, load_binput)
5404 OP(LONG_BINPUT, load_long_binput)
5405 OP(PUT, load_put)
5406 OP(POP, load_pop)
5407 OP(POP_MARK, load_pop_mark)
5408 OP(SETITEM, load_setitem)
5409 OP(SETITEMS, load_setitems)
5410 OP(PERSID, load_persid)
5411 OP(BINPERSID, load_binpersid)
5412 OP(REDUCE, load_reduce)
5413 OP(PROTO, load_proto)
5414 OP_ARG(EXT1, load_extension, 1)
5415 OP_ARG(EXT2, load_extension, 2)
5416 OP_ARG(EXT4, load_extension, 4)
5417 OP_ARG(NEWTRUE, load_bool, Py_True)
5418 OP_ARG(NEWFALSE, load_bool, Py_False)
5419
5420 case STOP:
5421 break;
5422
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005423 default:
Benjamin Petersonadde86d2011-09-23 13:41:41 -04005424 if (s[0] == '\0')
5425 PyErr_SetNone(PyExc_EOFError);
5426 else
5427 PyErr_Format(UnpicklingError,
5428 "invalid load key, '%c'.", s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005429 return NULL;
5430 }
5431
5432 break; /* and we are done! */
5433 }
5434
5435 /* XXX: It is not clear what this is actually for. */
5436 if ((err = PyErr_Occurred())) {
5437 if (err == PyExc_EOFError) {
5438 PyErr_SetNone(PyExc_EOFError);
5439 }
5440 return NULL;
5441 }
5442
Victor Stinner2ae57e32013-10-31 13:39:23 +01005443 if (_Unpickler_SkipConsumed(self) < 0)
5444 return NULL;
5445
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005446 PDATA_POP(self->stack, value);
5447 return value;
5448}
5449
5450PyDoc_STRVAR(Unpickler_load_doc,
5451"load() -> object. Load a pickle."
5452"\n"
5453"Read a pickled object representation from the open file object given in\n"
5454"the constructor, and return the reconstituted object hierarchy specified\n"
5455"therein.\n");
5456
5457static PyObject *
5458Unpickler_load(UnpicklerObject *self)
5459{
5460 /* Check whether the Unpickler was initialized correctly. This prevents
5461 segfaulting if a subclass overridden __init__ with a function that does
5462 not call Unpickler.__init__(). Here, we simply ensure that self->read
5463 is not NULL. */
5464 if (self->read == NULL) {
Victor Stinner121aab42011-09-29 23:40:53 +02005465 PyErr_Format(UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005466 "Unpickler.__init__() was not called by %s.__init__()",
5467 Py_TYPE(self)->tp_name);
5468 return NULL;
5469 }
5470
5471 return load(self);
5472}
5473
5474/* The name of find_class() is misleading. In newer pickle protocols, this
5475 function is used for loading any global (i.e., functions), not just
5476 classes. The name is kept only for backward compatibility. */
5477
5478PyDoc_STRVAR(Unpickler_find_class_doc,
5479"find_class(module_name, global_name) -> object.\n"
5480"\n"
5481"Return an object from a specified module, importing the module if\n"
5482"necessary. Subclasses may override this method (e.g. to restrict\n"
5483"unpickling of arbitrary classes and functions).\n"
5484"\n"
5485"This method is called whenever a class or a function object is\n"
5486"needed. Both arguments passed are str objects.\n");
5487
5488static PyObject *
5489Unpickler_find_class(UnpicklerObject *self, PyObject *args)
5490{
5491 PyObject *global;
5492 PyObject *modules_dict;
5493 PyObject *module;
5494 PyObject *module_name, *global_name;
5495
5496 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
5497 &module_name, &global_name))
5498 return NULL;
5499
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005500 /* Try to map the old names used in Python 2.x to the new ones used in
5501 Python 3.x. We do this only with old pickle protocols and when the
5502 user has not disabled the feature. */
5503 if (self->proto < 3 && self->fix_imports) {
5504 PyObject *key;
5505 PyObject *item;
5506
5507 /* Check if the global (i.e., a function or a class) was renamed
5508 or moved to another module. */
5509 key = PyTuple_Pack(2, module_name, global_name);
5510 if (key == NULL)
5511 return NULL;
5512 item = PyDict_GetItemWithError(name_mapping_2to3, key);
5513 Py_DECREF(key);
5514 if (item) {
5515 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
5516 PyErr_Format(PyExc_RuntimeError,
5517 "_compat_pickle.NAME_MAPPING values should be "
5518 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
5519 return NULL;
5520 }
5521 module_name = PyTuple_GET_ITEM(item, 0);
5522 global_name = PyTuple_GET_ITEM(item, 1);
5523 if (!PyUnicode_Check(module_name) ||
5524 !PyUnicode_Check(global_name)) {
5525 PyErr_Format(PyExc_RuntimeError,
5526 "_compat_pickle.NAME_MAPPING values should be "
5527 "pairs of str, not (%.200s, %.200s)",
5528 Py_TYPE(module_name)->tp_name,
5529 Py_TYPE(global_name)->tp_name);
5530 return NULL;
5531 }
5532 }
5533 else if (PyErr_Occurred()) {
5534 return NULL;
5535 }
5536
5537 /* Check if the module was renamed. */
5538 item = PyDict_GetItemWithError(import_mapping_2to3, module_name);
5539 if (item) {
5540 if (!PyUnicode_Check(item)) {
5541 PyErr_Format(PyExc_RuntimeError,
5542 "_compat_pickle.IMPORT_MAPPING values should be "
5543 "strings, not %.200s", Py_TYPE(item)->tp_name);
5544 return NULL;
5545 }
5546 module_name = item;
5547 }
5548 else if (PyErr_Occurred()) {
5549 return NULL;
5550 }
5551 }
5552
Victor Stinnerbb520202013-11-06 22:40:41 +01005553 modules_dict = _PySys_GetObjectId(&PyId_modules);
Victor Stinner1e53bba2013-07-16 22:26:05 +02005554 if (modules_dict == NULL) {
5555 PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005556 return NULL;
Victor Stinner1e53bba2013-07-16 22:26:05 +02005557 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005558
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005559 module = PyDict_GetItemWithError(modules_dict, module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005560 if (module == NULL) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005561 if (PyErr_Occurred())
5562 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005563 module = PyImport_Import(module_name);
5564 if (module == NULL)
5565 return NULL;
5566 global = PyObject_GetAttr(module, global_name);
5567 Py_DECREF(module);
5568 }
Victor Stinner121aab42011-09-29 23:40:53 +02005569 else {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005570 global = PyObject_GetAttr(module, global_name);
5571 }
5572 return global;
5573}
5574
5575static struct PyMethodDef Unpickler_methods[] = {
5576 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
5577 Unpickler_load_doc},
5578 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
5579 Unpickler_find_class_doc},
5580 {NULL, NULL} /* sentinel */
5581};
5582
5583static void
5584Unpickler_dealloc(UnpicklerObject *self)
5585{
5586 PyObject_GC_UnTrack((PyObject *)self);
5587 Py_XDECREF(self->readline);
5588 Py_XDECREF(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005589 Py_XDECREF(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005590 Py_XDECREF(self->stack);
5591 Py_XDECREF(self->pers_func);
5592 Py_XDECREF(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005593 if (self->buffer.buf != NULL) {
5594 PyBuffer_Release(&self->buffer);
5595 self->buffer.buf = NULL;
5596 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005597
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005598 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005599 PyMem_Free(self->marks);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005600 PyMem_Free(self->input_line);
Victor Stinner49fc8ec2013-07-07 23:30:24 +02005601 PyMem_Free(self->encoding);
5602 PyMem_Free(self->errors);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005603
5604 Py_TYPE(self)->tp_free((PyObject *)self);
5605}
5606
5607static int
5608Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
5609{
5610 Py_VISIT(self->readline);
5611 Py_VISIT(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005612 Py_VISIT(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005613 Py_VISIT(self->stack);
5614 Py_VISIT(self->pers_func);
5615 Py_VISIT(self->arg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005616 return 0;
5617}
5618
5619static int
5620Unpickler_clear(UnpicklerObject *self)
5621{
5622 Py_CLEAR(self->readline);
5623 Py_CLEAR(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005624 Py_CLEAR(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005625 Py_CLEAR(self->stack);
5626 Py_CLEAR(self->pers_func);
5627 Py_CLEAR(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005628 if (self->buffer.buf != NULL) {
5629 PyBuffer_Release(&self->buffer);
5630 self->buffer.buf = NULL;
5631 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005632
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005633 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005634 PyMem_Free(self->marks);
5635 self->marks = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005636 PyMem_Free(self->input_line);
5637 self->input_line = NULL;
Victor Stinner49fc8ec2013-07-07 23:30:24 +02005638 PyMem_Free(self->encoding);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005639 self->encoding = NULL;
Victor Stinner49fc8ec2013-07-07 23:30:24 +02005640 PyMem_Free(self->errors);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005641 self->errors = NULL;
5642
5643 return 0;
5644}
5645
5646PyDoc_STRVAR(Unpickler_doc,
5647"Unpickler(file, *, encoding='ASCII', errors='strict')"
5648"\n"
5649"This takes a binary file for reading a pickle data stream.\n"
5650"\n"
5651"The protocol version of the pickle is detected automatically, so no\n"
5652"proto argument is needed.\n"
5653"\n"
5654"The file-like object must have two methods, a read() method\n"
5655"that takes an integer argument, and a readline() method that\n"
5656"requires no arguments. Both methods should return bytes.\n"
5657"Thus file-like object can be a binary file object opened for\n"
5658"reading, a BytesIO object, or any other custom object that\n"
5659"meets this interface.\n"
5660"\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005661"Optional keyword arguments are *fix_imports*, *encoding* and *errors*,\n"
5662"which are used to control compatiblity support for pickle stream\n"
5663"generated by Python 2.x. If *fix_imports* is True, pickle will try to\n"
5664"map the old Python 2.x names to the new names used in Python 3.x. The\n"
5665"*encoding* and *errors* tell pickle how to decode 8-bit string\n"
5666"instances pickled by Python 2.x; these default to 'ASCII' and\n"
5667"'strict', respectively.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005668
5669static int
5670Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
5671{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005672 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005673 PyObject *file;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005674 PyObject *fix_imports = Py_True;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005675 char *encoding = NULL;
5676 char *errors = NULL;
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02005677 _Py_IDENTIFIER(persistent_load);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005678
5679 /* XXX: That is an horrible error message. But, I don't know how to do
5680 better... */
5681 if (Py_SIZE(args) != 1) {
5682 PyErr_Format(PyExc_TypeError,
5683 "%s takes exactly one positional argument (%zd given)",
5684 Py_TYPE(self)->tp_name, Py_SIZE(args));
5685 return -1;
5686 }
5687
5688 /* Arguments parsing needs to be done in the __init__() method to allow
5689 subclasses to define their own __init__() method, which may (or may
5690 not) support Unpickler arguments. However, this means we need to be
5691 extra careful in the other Unpickler methods, since a subclass could
5692 forget to call Unpickler.__init__() thus breaking our internal
5693 invariants. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005694 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:Unpickler", kwlist,
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005695 &file, &fix_imports, &encoding, &errors))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005696 return -1;
5697
5698 /* In case of multiple __init__() calls, clear previous content. */
5699 if (self->read != NULL)
5700 (void)Unpickler_clear(self);
5701
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005702 if (_Unpickler_SetInputStream(self, file) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005703 return -1;
5704
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005705 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005706 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005707
5708 self->fix_imports = PyObject_IsTrue(fix_imports);
5709 if (self->fix_imports == -1)
5710 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005711
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02005712 if (_PyObject_HasAttrId((PyObject *)self, &PyId_persistent_load)) {
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005713 self->pers_func = _PyObject_GetAttrId((PyObject *)self,
5714 &PyId_persistent_load);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005715 if (self->pers_func == NULL)
5716 return -1;
5717 }
5718 else {
5719 self->pers_func = NULL;
5720 }
5721
5722 self->stack = (Pdata *)Pdata_New();
5723 if (self->stack == NULL)
5724 return -1;
5725
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005726 self->memo_size = 32;
5727 self->memo = _Unpickler_NewMemo(self->memo_size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005728 if (self->memo == NULL)
5729 return -1;
5730
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005731 self->arg = NULL;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005732 self->proto = 0;
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005733
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005734 return 0;
5735}
5736
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005737/* Define a proxy object for the Unpickler's internal memo object. This is to
5738 * avoid breaking code like:
5739 * unpickler.memo.clear()
5740 * and
5741 * unpickler.memo = saved_memo
5742 * Is this a good idea? Not really, but we don't want to break code that uses
5743 * it. Note that we don't implement the entire mapping API here. This is
5744 * intentional, as these should be treated as black-box implementation details.
5745 *
5746 * We do, however, have to implement pickling/unpickling support because of
Victor Stinner121aab42011-09-29 23:40:53 +02005747 * real-world code like cvs2svn.
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005748 */
5749
5750typedef struct {
5751 PyObject_HEAD
5752 UnpicklerObject *unpickler;
5753} UnpicklerMemoProxyObject;
5754
5755PyDoc_STRVAR(ump_clear_doc,
5756"memo.clear() -> None. Remove all items from memo.");
5757
5758static PyObject *
5759ump_clear(UnpicklerMemoProxyObject *self)
5760{
5761 _Unpickler_MemoCleanup(self->unpickler);
5762 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
5763 if (self->unpickler->memo == NULL)
5764 return NULL;
5765 Py_RETURN_NONE;
5766}
5767
5768PyDoc_STRVAR(ump_copy_doc,
5769"memo.copy() -> new_memo. Copy the memo to a new object.");
5770
5771static PyObject *
5772ump_copy(UnpicklerMemoProxyObject *self)
5773{
5774 Py_ssize_t i;
5775 PyObject *new_memo = PyDict_New();
5776 if (new_memo == NULL)
5777 return NULL;
5778
5779 for (i = 0; i < self->unpickler->memo_size; i++) {
5780 int status;
5781 PyObject *key, *value;
5782
5783 value = self->unpickler->memo[i];
5784 if (value == NULL)
5785 continue;
5786
5787 key = PyLong_FromSsize_t(i);
5788 if (key == NULL)
5789 goto error;
5790 status = PyDict_SetItem(new_memo, key, value);
5791 Py_DECREF(key);
5792 if (status < 0)
5793 goto error;
5794 }
5795 return new_memo;
5796
5797error:
5798 Py_DECREF(new_memo);
5799 return NULL;
5800}
5801
5802PyDoc_STRVAR(ump_reduce_doc,
5803"memo.__reduce__(). Pickling support.");
5804
5805static PyObject *
5806ump_reduce(UnpicklerMemoProxyObject *self, PyObject *args)
5807{
5808 PyObject *reduce_value;
5809 PyObject *constructor_args;
5810 PyObject *contents = ump_copy(self);
5811 if (contents == NULL)
5812 return NULL;
5813
5814 reduce_value = PyTuple_New(2);
5815 if (reduce_value == NULL) {
5816 Py_DECREF(contents);
5817 return NULL;
5818 }
5819 constructor_args = PyTuple_New(1);
5820 if (constructor_args == NULL) {
5821 Py_DECREF(contents);
5822 Py_DECREF(reduce_value);
5823 return NULL;
5824 }
5825 PyTuple_SET_ITEM(constructor_args, 0, contents);
5826 Py_INCREF((PyObject *)&PyDict_Type);
5827 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
5828 PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
5829 return reduce_value;
5830}
5831
5832static PyMethodDef unpicklerproxy_methods[] = {
5833 {"clear", (PyCFunction)ump_clear, METH_NOARGS, ump_clear_doc},
5834 {"copy", (PyCFunction)ump_copy, METH_NOARGS, ump_copy_doc},
5835 {"__reduce__", (PyCFunction)ump_reduce, METH_VARARGS, ump_reduce_doc},
5836 {NULL, NULL} /* sentinel */
5837};
5838
5839static void
5840UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
5841{
5842 PyObject_GC_UnTrack(self);
5843 Py_XDECREF(self->unpickler);
5844 PyObject_GC_Del((PyObject *)self);
5845}
5846
5847static int
5848UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
5849 visitproc visit, void *arg)
5850{
5851 Py_VISIT(self->unpickler);
5852 return 0;
5853}
5854
5855static int
5856UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
5857{
5858 Py_CLEAR(self->unpickler);
5859 return 0;
5860}
5861
5862static PyTypeObject UnpicklerMemoProxyType = {
5863 PyVarObject_HEAD_INIT(NULL, 0)
5864 "_pickle.UnpicklerMemoProxy", /*tp_name*/
5865 sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/
5866 0,
5867 (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */
5868 0, /* tp_print */
5869 0, /* tp_getattr */
5870 0, /* tp_setattr */
5871 0, /* tp_compare */
5872 0, /* tp_repr */
5873 0, /* tp_as_number */
5874 0, /* tp_as_sequence */
5875 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00005876 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005877 0, /* tp_call */
5878 0, /* tp_str */
5879 PyObject_GenericGetAttr, /* tp_getattro */
5880 PyObject_GenericSetAttr, /* tp_setattro */
5881 0, /* tp_as_buffer */
5882 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5883 0, /* tp_doc */
5884 (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */
5885 (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */
5886 0, /* tp_richcompare */
5887 0, /* tp_weaklistoffset */
5888 0, /* tp_iter */
5889 0, /* tp_iternext */
5890 unpicklerproxy_methods, /* tp_methods */
5891};
5892
5893static PyObject *
5894UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
5895{
5896 UnpicklerMemoProxyObject *self;
5897
5898 self = PyObject_GC_New(UnpicklerMemoProxyObject,
5899 &UnpicklerMemoProxyType);
5900 if (self == NULL)
5901 return NULL;
5902 Py_INCREF(unpickler);
5903 self->unpickler = unpickler;
5904 PyObject_GC_Track(self);
5905 return (PyObject *)self;
5906}
5907
5908/*****************************************************************************/
5909
5910
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005911static PyObject *
5912Unpickler_get_memo(UnpicklerObject *self)
5913{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005914 return UnpicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005915}
5916
5917static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005918Unpickler_set_memo(UnpicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005919{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005920 PyObject **new_memo;
5921 Py_ssize_t new_memo_size = 0;
5922 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005923
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005924 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005925 PyErr_SetString(PyExc_TypeError,
5926 "attribute deletion is not supported");
5927 return -1;
5928 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005929
5930 if (Py_TYPE(obj) == &UnpicklerMemoProxyType) {
5931 UnpicklerObject *unpickler =
5932 ((UnpicklerMemoProxyObject *)obj)->unpickler;
5933
5934 new_memo_size = unpickler->memo_size;
5935 new_memo = _Unpickler_NewMemo(new_memo_size);
5936 if (new_memo == NULL)
5937 return -1;
5938
5939 for (i = 0; i < new_memo_size; i++) {
5940 Py_XINCREF(unpickler->memo[i]);
5941 new_memo[i] = unpickler->memo[i];
5942 }
5943 }
5944 else if (PyDict_Check(obj)) {
5945 Py_ssize_t i = 0;
5946 PyObject *key, *value;
5947
5948 new_memo_size = PyDict_Size(obj);
5949 new_memo = _Unpickler_NewMemo(new_memo_size);
5950 if (new_memo == NULL)
5951 return -1;
5952
5953 while (PyDict_Next(obj, &i, &key, &value)) {
5954 Py_ssize_t idx;
5955 if (!PyLong_Check(key)) {
5956 PyErr_SetString(PyExc_TypeError,
5957 "memo key must be integers");
5958 goto error;
5959 }
5960 idx = PyLong_AsSsize_t(key);
5961 if (idx == -1 && PyErr_Occurred())
5962 goto error;
Christian Heimesa24b4d22013-07-01 15:17:45 +02005963 if (idx < 0) {
5964 PyErr_SetString(PyExc_ValueError,
Christian Heimes80878792013-07-01 15:23:39 +02005965 "memo key must be positive integers.");
Christian Heimesa24b4d22013-07-01 15:17:45 +02005966 goto error;
5967 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005968 if (_Unpickler_MemoPut(self, idx, value) < 0)
5969 goto error;
5970 }
5971 }
5972 else {
5973 PyErr_Format(PyExc_TypeError,
5974 "'memo' attribute must be an UnpicklerMemoProxy object"
5975 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005976 return -1;
5977 }
5978
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005979 _Unpickler_MemoCleanup(self);
5980 self->memo_size = new_memo_size;
5981 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005982
5983 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005984
5985 error:
5986 if (new_memo_size) {
5987 i = new_memo_size;
5988 while (--i >= 0) {
5989 Py_XDECREF(new_memo[i]);
5990 }
5991 PyMem_FREE(new_memo);
5992 }
5993 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005994}
5995
5996static PyObject *
5997Unpickler_get_persload(UnpicklerObject *self)
5998{
5999 if (self->pers_func == NULL)
6000 PyErr_SetString(PyExc_AttributeError, "persistent_load");
6001 else
6002 Py_INCREF(self->pers_func);
6003 return self->pers_func;
6004}
6005
6006static int
6007Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
6008{
6009 PyObject *tmp;
6010
6011 if (value == NULL) {
6012 PyErr_SetString(PyExc_TypeError,
6013 "attribute deletion is not supported");
6014 return -1;
6015 }
6016 if (!PyCallable_Check(value)) {
6017 PyErr_SetString(PyExc_TypeError,
6018 "persistent_load must be a callable taking "
6019 "one argument");
6020 return -1;
6021 }
6022
6023 tmp = self->pers_func;
6024 Py_INCREF(value);
6025 self->pers_func = value;
6026 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
6027
6028 return 0;
6029}
6030
6031static PyGetSetDef Unpickler_getsets[] = {
6032 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
6033 {"persistent_load", (getter)Unpickler_get_persload,
6034 (setter)Unpickler_set_persload},
6035 {NULL}
6036};
6037
6038static PyTypeObject Unpickler_Type = {
6039 PyVarObject_HEAD_INIT(NULL, 0)
6040 "_pickle.Unpickler", /*tp_name*/
6041 sizeof(UnpicklerObject), /*tp_basicsize*/
6042 0, /*tp_itemsize*/
6043 (destructor)Unpickler_dealloc, /*tp_dealloc*/
6044 0, /*tp_print*/
6045 0, /*tp_getattr*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006046 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00006047 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006048 0, /*tp_repr*/
6049 0, /*tp_as_number*/
6050 0, /*tp_as_sequence*/
6051 0, /*tp_as_mapping*/
6052 0, /*tp_hash*/
6053 0, /*tp_call*/
6054 0, /*tp_str*/
6055 0, /*tp_getattro*/
6056 0, /*tp_setattro*/
6057 0, /*tp_as_buffer*/
6058 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
6059 Unpickler_doc, /*tp_doc*/
6060 (traverseproc)Unpickler_traverse, /*tp_traverse*/
6061 (inquiry)Unpickler_clear, /*tp_clear*/
6062 0, /*tp_richcompare*/
6063 0, /*tp_weaklistoffset*/
6064 0, /*tp_iter*/
6065 0, /*tp_iternext*/
6066 Unpickler_methods, /*tp_methods*/
6067 0, /*tp_members*/
6068 Unpickler_getsets, /*tp_getset*/
6069 0, /*tp_base*/
6070 0, /*tp_dict*/
6071 0, /*tp_descr_get*/
6072 0, /*tp_descr_set*/
6073 0, /*tp_dictoffset*/
6074 (initproc)Unpickler_init, /*tp_init*/
6075 PyType_GenericAlloc, /*tp_alloc*/
6076 PyType_GenericNew, /*tp_new*/
6077 PyObject_GC_Del, /*tp_free*/
6078 0, /*tp_is_gc*/
6079};
6080
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006081PyDoc_STRVAR(pickle_dump_doc,
6082"dump(obj, file, protocol=None, *, fix_imports=True) -> None\n"
6083"\n"
6084"Write a pickled representation of obj to the open file object file. This\n"
6085"is equivalent to ``Pickler(file, protocol).dump(obj)``, but may be more\n"
6086"efficient.\n"
6087"\n"
6088"The optional protocol argument tells the pickler to use the given protocol;\n"
6089"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
6090"backward-incompatible protocol designed for Python 3.0.\n"
6091"\n"
6092"Specifying a negative protocol version selects the highest protocol version\n"
6093"supported. The higher the protocol used, the more recent the version of\n"
6094"Python needed to read the pickle produced.\n"
6095"\n"
6096"The file argument must have a write() method that accepts a single bytes\n"
6097"argument. It can thus be a file object opened for binary writing, a\n"
6098"io.BytesIO instance, or any other custom object that meets this interface.\n"
6099"\n"
6100"If fix_imports is True and protocol is less than 3, pickle will try to\n"
6101"map the new Python 3.x names to the old module names used in Python 2.x,\n"
6102"so that the pickle data stream is readable with Python 2.x.\n");
6103
6104static PyObject *
6105pickle_dump(PyObject *self, PyObject *args, PyObject *kwds)
6106{
6107 static char *kwlist[] = {"obj", "file", "protocol", "fix_imports", 0};
6108 PyObject *obj;
6109 PyObject *file;
6110 PyObject *proto = NULL;
6111 PyObject *fix_imports = Py_True;
6112 PicklerObject *pickler;
6113
6114 /* fix_imports is a keyword-only argument. */
6115 if (Py_SIZE(args) > 3) {
6116 PyErr_Format(PyExc_TypeError,
6117 "pickle.dump() takes at most 3 positional "
6118 "argument (%zd given)", Py_SIZE(args));
6119 return NULL;
6120 }
6121
6122 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|OO:dump", kwlist,
6123 &obj, &file, &proto, &fix_imports))
6124 return NULL;
6125
6126 pickler = _Pickler_New();
6127 if (pickler == NULL)
6128 return NULL;
6129
6130 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
6131 goto error;
6132
6133 if (_Pickler_SetOutputStream(pickler, file) < 0)
6134 goto error;
6135
6136 if (dump(pickler, obj) < 0)
6137 goto error;
6138
6139 if (_Pickler_FlushToFile(pickler) < 0)
6140 goto error;
6141
6142 Py_DECREF(pickler);
6143 Py_RETURN_NONE;
6144
6145 error:
6146 Py_XDECREF(pickler);
6147 return NULL;
6148}
6149
6150PyDoc_STRVAR(pickle_dumps_doc,
6151"dumps(obj, protocol=None, *, fix_imports=True) -> bytes\n"
6152"\n"
6153"Return the pickled representation of the object as a bytes\n"
6154"object, instead of writing it to a file.\n"
6155"\n"
6156"The optional protocol argument tells the pickler to use the given protocol;\n"
6157"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
6158"backward-incompatible protocol designed for Python 3.0.\n"
6159"\n"
6160"Specifying a negative protocol version selects the highest protocol version\n"
6161"supported. The higher the protocol used, the more recent the version of\n"
6162"Python needed to read the pickle produced.\n"
6163"\n"
6164"If fix_imports is True and *protocol* is less than 3, pickle will try to\n"
6165"map the new Python 3.x names to the old module names used in Python 2.x,\n"
6166"so that the pickle data stream is readable with Python 2.x.\n");
6167
6168static PyObject *
6169pickle_dumps(PyObject *self, PyObject *args, PyObject *kwds)
6170{
6171 static char *kwlist[] = {"obj", "protocol", "fix_imports", 0};
6172 PyObject *obj;
6173 PyObject *proto = NULL;
6174 PyObject *result;
6175 PyObject *fix_imports = Py_True;
6176 PicklerObject *pickler;
6177
6178 /* fix_imports is a keyword-only argument. */
6179 if (Py_SIZE(args) > 2) {
6180 PyErr_Format(PyExc_TypeError,
6181 "pickle.dumps() takes at most 2 positional "
6182 "argument (%zd given)", Py_SIZE(args));
6183 return NULL;
6184 }
6185
6186 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:dumps", kwlist,
6187 &obj, &proto, &fix_imports))
6188 return NULL;
6189
6190 pickler = _Pickler_New();
6191 if (pickler == NULL)
6192 return NULL;
6193
6194 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
6195 goto error;
6196
6197 if (dump(pickler, obj) < 0)
6198 goto error;
6199
6200 result = _Pickler_GetString(pickler);
6201 Py_DECREF(pickler);
6202 return result;
6203
6204 error:
6205 Py_XDECREF(pickler);
6206 return NULL;
6207}
6208
6209PyDoc_STRVAR(pickle_load_doc,
6210"load(file, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6211"\n"
6212"Read a pickled object representation from the open file object file and\n"
6213"return the reconstituted object hierarchy specified therein. This is\n"
6214"equivalent to ``Unpickler(file).load()``, but may be more efficient.\n"
6215"\n"
6216"The protocol version of the pickle is detected automatically, so no protocol\n"
6217"argument is needed. Bytes past the pickled object's representation are\n"
6218"ignored.\n"
6219"\n"
6220"The argument file must have two methods, a read() method that takes an\n"
6221"integer argument, and a readline() method that requires no arguments. Both\n"
6222"methods should return bytes. Thus *file* can be a binary file object opened\n"
6223"for reading, a BytesIO object, or any other custom object that meets this\n"
6224"interface.\n"
6225"\n"
6226"Optional keyword arguments are fix_imports, encoding and errors,\n"
6227"which are used to control compatiblity support for pickle stream generated\n"
6228"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6229"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6230"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6231"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6232
6233static PyObject *
6234pickle_load(PyObject *self, PyObject *args, PyObject *kwds)
6235{
6236 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
6237 PyObject *file;
6238 PyObject *fix_imports = Py_True;
6239 PyObject *result;
6240 char *encoding = NULL;
6241 char *errors = NULL;
6242 UnpicklerObject *unpickler;
6243
6244 /* fix_imports, encoding and errors are a keyword-only argument. */
6245 if (Py_SIZE(args) != 1) {
6246 PyErr_Format(PyExc_TypeError,
6247 "pickle.load() takes exactly one positional "
6248 "argument (%zd given)", Py_SIZE(args));
6249 return NULL;
6250 }
6251
6252 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:load", kwlist,
6253 &file, &fix_imports, &encoding, &errors))
6254 return NULL;
6255
6256 unpickler = _Unpickler_New();
6257 if (unpickler == NULL)
6258 return NULL;
6259
6260 if (_Unpickler_SetInputStream(unpickler, file) < 0)
6261 goto error;
6262
6263 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6264 goto error;
6265
6266 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6267 if (unpickler->fix_imports == -1)
6268 goto error;
6269
6270 result = load(unpickler);
6271 Py_DECREF(unpickler);
6272 return result;
6273
6274 error:
6275 Py_XDECREF(unpickler);
6276 return NULL;
6277}
6278
6279PyDoc_STRVAR(pickle_loads_doc,
6280"loads(input, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6281"\n"
6282"Read a pickled object hierarchy from a bytes object and return the\n"
6283"reconstituted object hierarchy specified therein\n"
6284"\n"
6285"The protocol version of the pickle is detected automatically, so no protocol\n"
6286"argument is needed. Bytes past the pickled object's representation are\n"
6287"ignored.\n"
6288"\n"
6289"Optional keyword arguments are fix_imports, encoding and errors, which\n"
6290"are used to control compatiblity support for pickle stream generated\n"
6291"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6292"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6293"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6294"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6295
6296static PyObject *
6297pickle_loads(PyObject *self, PyObject *args, PyObject *kwds)
6298{
6299 static char *kwlist[] = {"input", "fix_imports", "encoding", "errors", 0};
6300 PyObject *input;
6301 PyObject *fix_imports = Py_True;
6302 PyObject *result;
6303 char *encoding = NULL;
6304 char *errors = NULL;
6305 UnpicklerObject *unpickler;
6306
6307 /* fix_imports, encoding and errors are a keyword-only argument. */
6308 if (Py_SIZE(args) != 1) {
6309 PyErr_Format(PyExc_TypeError,
6310 "pickle.loads() takes exactly one positional "
6311 "argument (%zd given)", Py_SIZE(args));
6312 return NULL;
6313 }
6314
6315 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:loads", kwlist,
6316 &input, &fix_imports, &encoding, &errors))
6317 return NULL;
6318
6319 unpickler = _Unpickler_New();
6320 if (unpickler == NULL)
6321 return NULL;
6322
6323 if (_Unpickler_SetStringInput(unpickler, input) < 0)
6324 goto error;
6325
6326 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6327 goto error;
6328
6329 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6330 if (unpickler->fix_imports == -1)
6331 goto error;
6332
6333 result = load(unpickler);
6334 Py_DECREF(unpickler);
6335 return result;
6336
6337 error:
6338 Py_XDECREF(unpickler);
6339 return NULL;
6340}
6341
6342
6343static struct PyMethodDef pickle_methods[] = {
6344 {"dump", (PyCFunction)pickle_dump, METH_VARARGS|METH_KEYWORDS,
6345 pickle_dump_doc},
6346 {"dumps", (PyCFunction)pickle_dumps, METH_VARARGS|METH_KEYWORDS,
6347 pickle_dumps_doc},
6348 {"load", (PyCFunction)pickle_load, METH_VARARGS|METH_KEYWORDS,
6349 pickle_load_doc},
6350 {"loads", (PyCFunction)pickle_loads, METH_VARARGS|METH_KEYWORDS,
6351 pickle_loads_doc},
6352 {NULL, NULL} /* sentinel */
6353};
6354
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006355static int
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006356initmodule(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006357{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006358 PyObject *copyreg = NULL;
6359 PyObject *compat_pickle = NULL;
6360
6361 /* XXX: We should ensure that the types of the dictionaries imported are
6362 exactly PyDict objects. Otherwise, it is possible to crash the pickle
6363 since we use the PyDict API directly to access these dictionaries. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006364
6365 copyreg = PyImport_ImportModule("copyreg");
6366 if (!copyreg)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006367 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006368 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
6369 if (!dispatch_table)
6370 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006371 extension_registry = \
6372 PyObject_GetAttrString(copyreg, "_extension_registry");
6373 if (!extension_registry)
6374 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006375 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
6376 if (!inverted_registry)
6377 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006378 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
6379 if (!extension_cache)
6380 goto error;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006381 Py_CLEAR(copyreg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006382
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006383 /* Load the 2.x -> 3.x stdlib module mapping tables */
6384 compat_pickle = PyImport_ImportModule("_compat_pickle");
6385 if (!compat_pickle)
6386 goto error;
6387 name_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
6388 if (!name_mapping_2to3)
6389 goto error;
6390 if (!PyDict_CheckExact(name_mapping_2to3)) {
6391 PyErr_Format(PyExc_RuntimeError,
6392 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
6393 Py_TYPE(name_mapping_2to3)->tp_name);
6394 goto error;
6395 }
6396 import_mapping_2to3 = PyObject_GetAttrString(compat_pickle,
6397 "IMPORT_MAPPING");
6398 if (!import_mapping_2to3)
6399 goto error;
6400 if (!PyDict_CheckExact(import_mapping_2to3)) {
6401 PyErr_Format(PyExc_RuntimeError,
6402 "_compat_pickle.IMPORT_MAPPING should be a dict, "
6403 "not %.200s", Py_TYPE(import_mapping_2to3)->tp_name);
6404 goto error;
6405 }
6406 /* ... and the 3.x -> 2.x mapping tables */
6407 name_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6408 "REVERSE_NAME_MAPPING");
6409 if (!name_mapping_3to2)
6410 goto error;
6411 if (!PyDict_CheckExact(name_mapping_3to2)) {
6412 PyErr_Format(PyExc_RuntimeError,
Ezio Melotti13925002011-03-16 11:05:33 +02006413 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006414 "not %.200s", Py_TYPE(name_mapping_3to2)->tp_name);
6415 goto error;
6416 }
6417 import_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6418 "REVERSE_IMPORT_MAPPING");
6419 if (!import_mapping_3to2)
6420 goto error;
6421 if (!PyDict_CheckExact(import_mapping_3to2)) {
6422 PyErr_Format(PyExc_RuntimeError,
6423 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
6424 "not %.200s", Py_TYPE(import_mapping_3to2)->tp_name);
6425 goto error;
6426 }
6427 Py_CLEAR(compat_pickle);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006428
6429 empty_tuple = PyTuple_New(0);
6430 if (empty_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006431 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006432 two_tuple = PyTuple_New(2);
6433 if (two_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006434 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006435 /* We use this temp container with no regard to refcounts, or to
6436 * keeping containees alive. Exempt from GC, because we don't
6437 * want anything looking at two_tuple() by magic.
6438 */
6439 PyObject_GC_UnTrack(two_tuple);
6440
6441 return 0;
6442
6443 error:
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006444 Py_CLEAR(copyreg);
6445 Py_CLEAR(dispatch_table);
6446 Py_CLEAR(extension_registry);
6447 Py_CLEAR(inverted_registry);
6448 Py_CLEAR(extension_cache);
6449 Py_CLEAR(compat_pickle);
6450 Py_CLEAR(name_mapping_2to3);
6451 Py_CLEAR(import_mapping_2to3);
6452 Py_CLEAR(name_mapping_3to2);
6453 Py_CLEAR(import_mapping_3to2);
6454 Py_CLEAR(empty_tuple);
6455 Py_CLEAR(two_tuple);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006456 return -1;
6457}
6458
6459static struct PyModuleDef _picklemodule = {
6460 PyModuleDef_HEAD_INIT,
6461 "_pickle",
6462 pickle_module_doc,
6463 -1,
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006464 pickle_methods,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006465 NULL,
6466 NULL,
6467 NULL,
6468 NULL
6469};
6470
6471PyMODINIT_FUNC
6472PyInit__pickle(void)
6473{
6474 PyObject *m;
6475
6476 if (PyType_Ready(&Unpickler_Type) < 0)
6477 return NULL;
6478 if (PyType_Ready(&Pickler_Type) < 0)
6479 return NULL;
6480 if (PyType_Ready(&Pdata_Type) < 0)
6481 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006482 if (PyType_Ready(&PicklerMemoProxyType) < 0)
6483 return NULL;
6484 if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
6485 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006486
6487 /* Create the module and add the functions. */
6488 m = PyModule_Create(&_picklemodule);
6489 if (m == NULL)
6490 return NULL;
6491
Antoine Pitrou8391cf42011-07-15 21:01:21 +02006492 Py_INCREF(&Pickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006493 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
6494 return NULL;
Antoine Pitrou8391cf42011-07-15 21:01:21 +02006495 Py_INCREF(&Unpickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006496 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
6497 return NULL;
6498
6499 /* Initialize the exceptions. */
6500 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
6501 if (PickleError == NULL)
6502 return NULL;
6503 PicklingError = \
6504 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
6505 if (PicklingError == NULL)
6506 return NULL;
6507 UnpicklingError = \
6508 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
6509 if (UnpicklingError == NULL)
6510 return NULL;
6511
6512 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
6513 return NULL;
6514 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
6515 return NULL;
6516 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
6517 return NULL;
6518
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006519 if (initmodule() < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006520 return NULL;
6521
6522 return m;
6523}