blob: 9852cd374b023c33acefb104773672b5ddbb3233 [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000013/* Pickle opcodes. These must be kept updated with pickle.py.
14 Extensive docs are in pickletools.py. */
15enum opcode {
16 MARK = '(',
17 STOP = '.',
18 POP = '0',
19 POP_MARK = '1',
20 DUP = '2',
21 FLOAT = 'F',
22 INT = 'I',
23 BININT = 'J',
24 BININT1 = 'K',
25 LONG = 'L',
26 BININT2 = 'M',
27 NONE = 'N',
28 PERSID = 'P',
29 BINPERSID = 'Q',
30 REDUCE = 'R',
31 STRING = 'S',
32 BINSTRING = 'T',
33 SHORT_BINSTRING = 'U',
34 UNICODE = 'V',
35 BINUNICODE = 'X',
36 APPEND = 'a',
37 BUILD = 'b',
38 GLOBAL = 'c',
39 DICT = 'd',
40 EMPTY_DICT = '}',
41 APPENDS = 'e',
42 GET = 'g',
43 BINGET = 'h',
44 INST = 'i',
45 LONG_BINGET = 'j',
46 LIST = 'l',
47 EMPTY_LIST = ']',
48 OBJ = 'o',
49 PUT = 'p',
50 BINPUT = 'q',
51 LONG_BINPUT = 'r',
52 SETITEM = 's',
53 TUPLE = 't',
54 EMPTY_TUPLE = ')',
55 SETITEMS = 'u',
56 BINFLOAT = 'G',
57
58 /* Protocol 2. */
59 PROTO = '\x80',
60 NEWOBJ = '\x81',
61 EXT1 = '\x82',
62 EXT2 = '\x83',
63 EXT4 = '\x84',
64 TUPLE1 = '\x85',
65 TUPLE2 = '\x86',
66 TUPLE3 = '\x87',
67 NEWTRUE = '\x88',
68 NEWFALSE = '\x89',
69 LONG1 = '\x8a',
70 LONG4 = '\x8b',
71
72 /* Protocol 3 (Python 3.x) */
73 BINBYTES = 'B',
Victor Stinner132ef6c2010-11-09 09:39:41 +000074 SHORT_BINBYTES = 'C'
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000075};
76
77/* These aren't opcodes -- they're ways to pickle bools before protocol 2
78 * so that unpicklers written before bools were introduced unpickle them
79 * as ints, but unpicklers after can recognize that bools were intended.
80 * Note that protocol 2 added direct ways to pickle bools.
81 */
82#undef TRUE
83#define TRUE "I01\n"
84#undef FALSE
85#define FALSE "I00\n"
86
87enum {
88 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
89 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
90 break if this gets out of synch with pickle.py, but it's unclear that would
91 help anything either. */
92 BATCHSIZE = 1000,
93
94 /* Nesting limit until Pickler, when running in "fast mode", starts
95 checking for self-referential data-structures. */
96 FAST_NESTING_LIMIT = 50,
97
Antoine Pitrouea99c5c2010-09-09 18:33:21 +000098 /* Initial size of the write buffer of Pickler. */
99 WRITE_BUF_SIZE = 4096,
100
101 /* Maximum size of the write buffer of Pickler when pickling to a
102 stream. This is ignored for in-memory pickling. */
103 MAX_WRITE_BUF_SIZE = 64 * 1024,
Antoine Pitrou04248a82010-10-12 20:51:21 +0000104
105 /* Prefetch size when unpickling (disabled on unpeekable streams) */
Victor Stinner132ef6c2010-11-09 09:39:41 +0000106 PREFETCH = 8192 * 16
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000107};
108
109/* Exception classes for pickle. These should override the ones defined in
110 pickle.py, when the C-optimized Pickler and Unpickler are used. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000111static PyObject *PickleError = NULL;
112static PyObject *PicklingError = NULL;
113static PyObject *UnpicklingError = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000114
115/* copyreg.dispatch_table, {type_object: pickling_function} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000116static PyObject *dispatch_table = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000117/* For EXT[124] opcodes. */
118/* copyreg._extension_registry, {(module_name, function_name): code} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000119static PyObject *extension_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000120/* copyreg._inverted_registry, {code: (module_name, function_name)} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000121static PyObject *inverted_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000122/* copyreg._extension_cache, {code: object} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000123static PyObject *extension_cache = NULL;
124
125/* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */
126static PyObject *name_mapping_2to3 = NULL;
127/* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
128static PyObject *import_mapping_2to3 = NULL;
129/* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
130static PyObject *name_mapping_3to2 = NULL;
131static PyObject *import_mapping_3to2 = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000132
133/* XXX: Are these really nescessary? */
134/* As the name says, an empty tuple. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000135static PyObject *empty_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000136/* For looking up name pairs in copyreg._extension_registry. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000137static PyObject *two_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000138
Victor Stinner804e05e2013-11-14 01:26:17 +0100139_Py_IDENTIFIER(__name__);
Victor Stinnerbb520202013-11-06 22:40:41 +0100140_Py_IDENTIFIER(modules);
141
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000142static int
143stack_underflow(void)
144{
145 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
146 return -1;
147}
148
149/* Internal data type used as the unpickling stack. */
150typedef struct {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000151 PyObject_VAR_HEAD
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000152 PyObject **data;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000153 Py_ssize_t allocated; /* number of slots in data allocated */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000154} Pdata;
155
156static void
157Pdata_dealloc(Pdata *self)
158{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200159 Py_ssize_t i = Py_SIZE(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000160 while (--i >= 0) {
161 Py_DECREF(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000162 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000163 PyMem_FREE(self->data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000164 PyObject_Del(self);
165}
166
167static PyTypeObject Pdata_Type = {
168 PyVarObject_HEAD_INIT(NULL, 0)
169 "_pickle.Pdata", /*tp_name*/
170 sizeof(Pdata), /*tp_basicsize*/
171 0, /*tp_itemsize*/
172 (destructor)Pdata_dealloc, /*tp_dealloc*/
173};
174
175static PyObject *
176Pdata_New(void)
177{
178 Pdata *self;
179
180 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
181 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000182 Py_SIZE(self) = 0;
183 self->allocated = 8;
184 self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000185 if (self->data)
186 return (PyObject *)self;
187 Py_DECREF(self);
188 return PyErr_NoMemory();
189}
190
191
192/* Retain only the initial clearto items. If clearto >= the current
193 * number of items, this is a (non-erroneous) NOP.
194 */
195static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200196Pdata_clear(Pdata *self, Py_ssize_t clearto)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000197{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200198 Py_ssize_t i = Py_SIZE(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000199
200 if (clearto < 0)
201 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000202 if (clearto >= i)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000203 return 0;
204
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000205 while (--i >= clearto) {
206 Py_CLEAR(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000207 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000208 Py_SIZE(self) = clearto;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000209 return 0;
210}
211
212static int
213Pdata_grow(Pdata *self)
214{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000215 PyObject **data = self->data;
216 Py_ssize_t allocated = self->allocated;
217 Py_ssize_t new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000218
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000219 new_allocated = (allocated >> 3) + 6;
220 /* check for integer overflow */
221 if (new_allocated > PY_SSIZE_T_MAX - allocated)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000222 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000223 new_allocated += allocated;
224 if (new_allocated > (PY_SSIZE_T_MAX / sizeof(PyObject *)))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000225 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000226 data = PyMem_REALLOC(data, new_allocated * sizeof(PyObject *));
227 if (data == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000228 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000229
230 self->data = data;
231 self->allocated = new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000232 return 0;
233
234 nomemory:
235 PyErr_NoMemory();
236 return -1;
237}
238
239/* D is a Pdata*. Pop the topmost element and store it into V, which
240 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
241 * is raised and V is set to NULL.
242 */
243static PyObject *
244Pdata_pop(Pdata *self)
245{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000246 if (Py_SIZE(self) == 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000247 PyErr_SetString(UnpicklingError, "bad pickle data");
248 return NULL;
249 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000250 return self->data[--Py_SIZE(self)];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000251}
252#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
253
254static int
255Pdata_push(Pdata *self, PyObject *obj)
256{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000257 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000258 return -1;
259 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000260 self->data[Py_SIZE(self)++] = obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000261 return 0;
262}
263
264/* Push an object on stack, transferring its ownership to the stack. */
265#define PDATA_PUSH(D, O, ER) do { \
266 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
267
268/* Push an object on stack, adding a new reference to the object. */
269#define PDATA_APPEND(D, O, ER) do { \
270 Py_INCREF((O)); \
271 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
272
273static PyObject *
274Pdata_poptuple(Pdata *self, Py_ssize_t start)
275{
276 PyObject *tuple;
277 Py_ssize_t len, i, j;
278
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000279 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000280 tuple = PyTuple_New(len);
281 if (tuple == NULL)
282 return NULL;
283 for (i = start, j = 0; j < len; i++, j++)
284 PyTuple_SET_ITEM(tuple, j, self->data[i]);
285
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000286 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000287 return tuple;
288}
289
290static PyObject *
291Pdata_poplist(Pdata *self, Py_ssize_t start)
292{
293 PyObject *list;
294 Py_ssize_t len, i, j;
295
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000296 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000297 list = PyList_New(len);
298 if (list == NULL)
299 return NULL;
300 for (i = start, j = 0; j < len; i++, j++)
301 PyList_SET_ITEM(list, j, self->data[i]);
302
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000303 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000304 return list;
305}
306
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000307typedef struct {
308 PyObject *me_key;
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200309 Py_ssize_t me_value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000310} PyMemoEntry;
311
312typedef struct {
313 Py_ssize_t mt_mask;
314 Py_ssize_t mt_used;
315 Py_ssize_t mt_allocated;
316 PyMemoEntry *mt_table;
317} PyMemoTable;
318
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000319typedef struct PicklerObject {
320 PyObject_HEAD
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000321 PyMemoTable *memo; /* Memo table, keep track of the seen
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000322 objects to support self-referential objects
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000323 pickling. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000324 PyObject *pers_func; /* persistent_id() method, can be NULL */
Antoine Pitrou8d3c2902012-03-04 18:31:48 +0100325 PyObject *dispatch_table; /* private dispatch_table, can be NULL */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000326 PyObject *arg;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000327
328 PyObject *write; /* write() method of the output stream. */
329 PyObject *output_buffer; /* Write into a local bytearray buffer before
330 flushing to the stream. */
331 Py_ssize_t output_len; /* Length of output_buffer. */
332 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000333 int proto; /* Pickle protocol number, >= 0 */
334 int bin; /* Boolean, true if proto > 0 */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200335 Py_ssize_t buf_size; /* Size of the current buffered pickle data */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000336 int fast; /* Enable fast mode if set to a true value.
337 The fast mode disable the usage of memo,
338 therefore speeding the pickling process by
339 not generating superfluous PUT opcodes. It
340 should not be used if with self-referential
341 objects. */
342 int fast_nesting;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000343 int fix_imports; /* Indicate whether Pickler should fix
344 the name of globals for Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000345 PyObject *fast_memo;
346} PicklerObject;
347
348typedef struct UnpicklerObject {
349 PyObject_HEAD
350 Pdata *stack; /* Pickle data stack, store unpickled objects. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000351
352 /* The unpickler memo is just an array of PyObject *s. Using a dict
353 is unnecessary, since the keys are contiguous ints. */
354 PyObject **memo;
355 Py_ssize_t memo_size;
356
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000357 PyObject *arg;
358 PyObject *pers_func; /* persistent_load() method, can be NULL. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000359
360 Py_buffer buffer;
361 char *input_buffer;
362 char *input_line;
363 Py_ssize_t input_len;
364 Py_ssize_t next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000365 Py_ssize_t prefetched_idx; /* index of first prefetched byte */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000366 PyObject *read; /* read() method of the input stream. */
367 PyObject *readline; /* readline() method of the input stream. */
Antoine Pitrou04248a82010-10-12 20:51:21 +0000368 PyObject *peek; /* peek() method of the input stream, or NULL */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000369
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000370 char *encoding; /* Name of the encoding to be used for
371 decoding strings pickled using Python
372 2.x. The default value is "ASCII" */
373 char *errors; /* Name of errors handling scheme to used when
374 decoding strings. The default value is
375 "strict". */
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -0500376 Py_ssize_t *marks; /* Mark stack, used for unpickling container
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000377 objects. */
378 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
379 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000380 int proto; /* Protocol of the pickle loaded. */
381 int fix_imports; /* Indicate whether Unpickler should fix
382 the name of globals pickled by Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000383} UnpicklerObject;
384
385/* Forward declarations */
386static int save(PicklerObject *, PyObject *, int);
387static int save_reduce(PicklerObject *, PyObject *, PyObject *);
388static PyTypeObject Pickler_Type;
389static PyTypeObject Unpickler_Type;
390
391
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000392/*************************************************************************
Serhiy Storchaka95949422013-08-27 19:40:23 +0300393 A custom hashtable mapping void* to Python ints. This is used by the pickler
394 for memoization. Using a custom hashtable rather than PyDict allows us to skip
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000395 a bunch of unnecessary object creation. This makes a huge performance
396 difference. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000397
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000398#define MT_MINSIZE 8
399#define PERTURB_SHIFT 5
400
401
402static PyMemoTable *
403PyMemoTable_New(void)
404{
405 PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
406 if (memo == NULL) {
407 PyErr_NoMemory();
408 return NULL;
409 }
410
411 memo->mt_used = 0;
412 memo->mt_allocated = MT_MINSIZE;
413 memo->mt_mask = MT_MINSIZE - 1;
414 memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
415 if (memo->mt_table == NULL) {
416 PyMem_FREE(memo);
417 PyErr_NoMemory();
418 return NULL;
419 }
420 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
421
422 return memo;
423}
424
425static PyMemoTable *
426PyMemoTable_Copy(PyMemoTable *self)
427{
428 Py_ssize_t i;
429 PyMemoTable *new = PyMemoTable_New();
430 if (new == NULL)
431 return NULL;
432
433 new->mt_used = self->mt_used;
434 new->mt_allocated = self->mt_allocated;
435 new->mt_mask = self->mt_mask;
436 /* The table we get from _New() is probably smaller than we wanted.
437 Free it and allocate one that's the right size. */
438 PyMem_FREE(new->mt_table);
439 new->mt_table = PyMem_MALLOC(self->mt_allocated * sizeof(PyMemoEntry));
440 if (new->mt_table == NULL) {
441 PyMem_FREE(new);
Victor Stinner42024562013-07-12 00:53:57 +0200442 PyErr_NoMemory();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000443 return NULL;
444 }
445 for (i = 0; i < self->mt_allocated; i++) {
446 Py_XINCREF(self->mt_table[i].me_key);
447 }
448 memcpy(new->mt_table, self->mt_table,
449 sizeof(PyMemoEntry) * self->mt_allocated);
450
451 return new;
452}
453
454static Py_ssize_t
455PyMemoTable_Size(PyMemoTable *self)
456{
457 return self->mt_used;
458}
459
460static int
461PyMemoTable_Clear(PyMemoTable *self)
462{
463 Py_ssize_t i = self->mt_allocated;
464
465 while (--i >= 0) {
466 Py_XDECREF(self->mt_table[i].me_key);
467 }
468 self->mt_used = 0;
469 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
470 return 0;
471}
472
473static void
474PyMemoTable_Del(PyMemoTable *self)
475{
476 if (self == NULL)
477 return;
478 PyMemoTable_Clear(self);
479
480 PyMem_FREE(self->mt_table);
481 PyMem_FREE(self);
482}
483
484/* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
485 can be considerably simpler than dictobject.c's lookdict(). */
486static PyMemoEntry *
487_PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
488{
489 size_t i;
490 size_t perturb;
491 size_t mask = (size_t)self->mt_mask;
492 PyMemoEntry *table = self->mt_table;
493 PyMemoEntry *entry;
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000494 Py_hash_t hash = (Py_hash_t)key >> 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000495
496 i = hash & mask;
497 entry = &table[i];
498 if (entry->me_key == NULL || entry->me_key == key)
499 return entry;
500
501 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
502 i = (i << 2) + i + perturb + 1;
503 entry = &table[i & mask];
504 if (entry->me_key == NULL || entry->me_key == key)
505 return entry;
506 }
507 assert(0); /* Never reached */
508 return NULL;
509}
510
511/* Returns -1 on failure, 0 on success. */
512static int
513_PyMemoTable_ResizeTable(PyMemoTable *self, Py_ssize_t min_size)
514{
515 PyMemoEntry *oldtable = NULL;
516 PyMemoEntry *oldentry, *newentry;
517 Py_ssize_t new_size = MT_MINSIZE;
518 Py_ssize_t to_process;
519
520 assert(min_size > 0);
521
522 /* Find the smallest valid table size >= min_size. */
523 while (new_size < min_size && new_size > 0)
524 new_size <<= 1;
525 if (new_size <= 0) {
526 PyErr_NoMemory();
527 return -1;
528 }
529 /* new_size needs to be a power of two. */
530 assert((new_size & (new_size - 1)) == 0);
531
532 /* Allocate new table. */
533 oldtable = self->mt_table;
534 self->mt_table = PyMem_MALLOC(new_size * sizeof(PyMemoEntry));
535 if (self->mt_table == NULL) {
Victor Stinner8ca72e22013-07-12 00:53:26 +0200536 self->mt_table = oldtable;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000537 PyErr_NoMemory();
538 return -1;
539 }
540 self->mt_allocated = new_size;
541 self->mt_mask = new_size - 1;
542 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
543
544 /* Copy entries from the old table. */
545 to_process = self->mt_used;
546 for (oldentry = oldtable; to_process > 0; oldentry++) {
547 if (oldentry->me_key != NULL) {
548 to_process--;
549 /* newentry is a pointer to a chunk of the new
550 mt_table, so we're setting the key:value pair
551 in-place. */
552 newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
553 newentry->me_key = oldentry->me_key;
554 newentry->me_value = oldentry->me_value;
555 }
556 }
557
558 /* Deallocate the old table. */
559 PyMem_FREE(oldtable);
560 return 0;
561}
562
563/* Returns NULL on failure, a pointer to the value otherwise. */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200564static Py_ssize_t *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000565PyMemoTable_Get(PyMemoTable *self, PyObject *key)
566{
567 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
568 if (entry->me_key == NULL)
569 return NULL;
570 return &entry->me_value;
571}
572
573/* Returns -1 on failure, 0 on success. */
574static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200575PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000576{
577 PyMemoEntry *entry;
578
579 assert(key != NULL);
580
581 entry = _PyMemoTable_Lookup(self, key);
582 if (entry->me_key != NULL) {
583 entry->me_value = value;
584 return 0;
585 }
586 Py_INCREF(key);
587 entry->me_key = key;
588 entry->me_value = value;
589 self->mt_used++;
590
591 /* If we added a key, we can safely resize. Otherwise just return!
592 * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
593 *
594 * Quadrupling the size improves average table sparseness
595 * (reducing collisions) at the cost of some memory. It also halves
596 * the number of expensive resize operations in a growing memo table.
597 *
598 * Very large memo tables (over 50K items) use doubling instead.
599 * This may help applications with severe memory constraints.
600 */
601 if (!(self->mt_used * 3 >= (self->mt_mask + 1) * 2))
602 return 0;
603 return _PyMemoTable_ResizeTable(self,
604 (self->mt_used > 50000 ? 2 : 4) * self->mt_used);
605}
606
607#undef MT_MINSIZE
608#undef PERTURB_SHIFT
609
610/*************************************************************************/
611
612/* Helpers for creating the argument tuple passed to functions. This has the
Victor Stinner121aab42011-09-29 23:40:53 +0200613 performance advantage of calling PyTuple_New() only once.
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000614
615 XXX(avassalotti): Inline directly in _Pickler_FastCall() and
616 _Unpickler_FastCall(). */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000617#define ARG_TUP(self, obj) do { \
618 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
619 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
620 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
621 } \
622 else { \
623 Py_DECREF((obj)); \
624 } \
625 } while (0)
626
627#define FREE_ARG_TUP(self) do { \
628 if ((self)->arg->ob_refcnt > 1) \
629 Py_CLEAR((self)->arg); \
630 } while (0)
631
632/* A temporary cleaner API for fast single argument function call.
633
634 XXX: Does caching the argument tuple provides any real performance benefits?
635
636 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
637 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
638 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
639 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
640 (i.e, call PyTuple_New() and store the returned value in an array), to save
641 one second (wall clock time). Either ways, the loading time a pickle stream
642 large enough to generate this number of calls would be massively
643 overwhelmed by other factors, like I/O throughput, the GC traversal and
644 object allocation overhead. So, I really doubt these functions provide any
645 real benefits.
646
647 On the other hand, oprofile reports that pickle spends a lot of time in
648 these functions. But, that is probably more related to the function call
649 overhead, than the argument tuple allocation.
650
651 XXX: And, what is the reference behavior of these? Steal, borrow? At first
652 glance, it seems to steal the reference of 'arg' and borrow the reference
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000653 of 'func'. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000654static PyObject *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000655_Pickler_FastCall(PicklerObject *self, PyObject *func, PyObject *arg)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000656{
657 PyObject *result = NULL;
658
659 ARG_TUP(self, arg);
660 if (self->arg) {
661 result = PyObject_Call(func, self->arg, NULL);
662 FREE_ARG_TUP(self);
663 }
664 return result;
665}
666
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000667static int
668_Pickler_ClearBuffer(PicklerObject *self)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000669{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000670 Py_CLEAR(self->output_buffer);
671 self->output_buffer =
672 PyBytes_FromStringAndSize(NULL, self->max_output_len);
673 if (self->output_buffer == NULL)
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000674 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000675 self->output_len = 0;
676 return 0;
677}
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000678
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000679static PyObject *
680_Pickler_GetString(PicklerObject *self)
681{
682 PyObject *output_buffer = self->output_buffer;
683
684 assert(self->output_buffer != NULL);
685 self->output_buffer = NULL;
686 /* Resize down to exact size */
687 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
688 return NULL;
689 return output_buffer;
690}
691
692static int
693_Pickler_FlushToFile(PicklerObject *self)
694{
695 PyObject *output, *result;
696
697 assert(self->write != NULL);
698
699 output = _Pickler_GetString(self);
700 if (output == NULL)
701 return -1;
702
703 result = _Pickler_FastCall(self, self->write, output);
704 Py_XDECREF(result);
705 return (result == NULL) ? -1 : 0;
706}
707
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200708static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000709_Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t n)
710{
711 Py_ssize_t i, required;
712 char *buffer;
713
714 assert(s != NULL);
715
716 required = self->output_len + n;
717 if (required > self->max_output_len) {
718 if (self->write != NULL && required > MAX_WRITE_BUF_SIZE) {
719 /* XXX This reallocates a new buffer every time, which is a bit
720 wasteful. */
721 if (_Pickler_FlushToFile(self) < 0)
722 return -1;
723 if (_Pickler_ClearBuffer(self) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000724 return -1;
725 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000726 if (self->write != NULL && n > MAX_WRITE_BUF_SIZE) {
727 /* we already flushed above, so the buffer is empty */
728 PyObject *result;
729 /* XXX we could spare an intermediate copy and pass
730 a memoryview instead */
731 PyObject *output = PyBytes_FromStringAndSize(s, n);
732 if (s == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000733 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000734 result = _Pickler_FastCall(self, self->write, output);
735 Py_XDECREF(result);
736 return (result == NULL) ? -1 : 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000737 }
738 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000739 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
740 PyErr_NoMemory();
741 return -1;
742 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200743 self->max_output_len = (self->output_len + n) / 2 * 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000744 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
745 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000746 }
747 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000748 buffer = PyBytes_AS_STRING(self->output_buffer);
749 if (n < 8) {
750 /* This is faster than memcpy when the string is short. */
751 for (i = 0; i < n; i++) {
752 buffer[self->output_len + i] = s[i];
753 }
754 }
755 else {
756 memcpy(buffer + self->output_len, s, n);
757 }
758 self->output_len += n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000759 return n;
760}
761
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000762static PicklerObject *
763_Pickler_New(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000764{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000765 PicklerObject *self;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000766
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000767 self = PyObject_GC_New(PicklerObject, &Pickler_Type);
768 if (self == NULL)
769 return NULL;
770
771 self->pers_func = NULL;
Antoine Pitrou8d3c2902012-03-04 18:31:48 +0100772 self->dispatch_table = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000773 self->arg = NULL;
774 self->write = NULL;
775 self->proto = 0;
776 self->bin = 0;
777 self->fast = 0;
778 self->fast_nesting = 0;
779 self->fix_imports = 0;
780 self->fast_memo = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000781 self->max_output_len = WRITE_BUF_SIZE;
782 self->output_len = 0;
Victor Stinner68c8ea22013-07-11 22:56:25 +0200783
784 self->memo = PyMemoTable_New();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000785 self->output_buffer = PyBytes_FromStringAndSize(NULL,
786 self->max_output_len);
Victor Stinner68c8ea22013-07-11 22:56:25 +0200787
788 if (self->memo == NULL || self->output_buffer == NULL) {
Victor Stinnerc31df042013-07-12 00:08:59 +0200789 Py_DECREF(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000790 return NULL;
791 }
792 return self;
793}
794
795static int
796_Pickler_SetProtocol(PicklerObject *self, PyObject *proto_obj,
797 PyObject *fix_imports_obj)
798{
799 long proto = 0;
800 int fix_imports;
801
802 if (proto_obj == NULL || proto_obj == Py_None)
803 proto = DEFAULT_PROTOCOL;
804 else {
805 proto = PyLong_AsLong(proto_obj);
806 if (proto == -1 && PyErr_Occurred())
807 return -1;
808 }
809 if (proto < 0)
810 proto = HIGHEST_PROTOCOL;
811 if (proto > HIGHEST_PROTOCOL) {
812 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
813 HIGHEST_PROTOCOL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000814 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000815 }
816 fix_imports = PyObject_IsTrue(fix_imports_obj);
817 if (fix_imports == -1)
818 return -1;
Victor Stinner121aab42011-09-29 23:40:53 +0200819
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000820 self->proto = proto;
821 self->bin = proto > 0;
822 self->fix_imports = fix_imports && proto < 3;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000823
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000824 return 0;
825}
826
827/* Returns -1 (with an exception set) on failure, 0 on success. This may
828 be called once on a freshly created Pickler. */
829static int
830_Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
831{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200832 _Py_IDENTIFIER(write);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000833 assert(file != NULL);
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +0200834 self->write = _PyObject_GetAttrId(file, &PyId_write);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000835 if (self->write == NULL) {
836 if (PyErr_ExceptionMatches(PyExc_AttributeError))
837 PyErr_SetString(PyExc_TypeError,
838 "file must have a 'write' attribute");
839 return -1;
840 }
841
842 return 0;
843}
844
845/* See documentation for _Pickler_FastCall(). */
846static PyObject *
847_Unpickler_FastCall(UnpicklerObject *self, PyObject *func, PyObject *arg)
848{
849 PyObject *result = NULL;
850
851 ARG_TUP(self, arg);
852 if (self->arg) {
853 result = PyObject_Call(func, self->arg, NULL);
854 FREE_ARG_TUP(self);
855 }
856 return result;
857}
858
859/* Returns the size of the input on success, -1 on failure. This takes its
860 own reference to `input`. */
861static Py_ssize_t
862_Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
863{
864 if (self->buffer.buf != NULL)
865 PyBuffer_Release(&self->buffer);
866 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
867 return -1;
868 self->input_buffer = self->buffer.buf;
869 self->input_len = self->buffer.len;
870 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000871 self->prefetched_idx = self->input_len;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000872 return self->input_len;
873}
874
Antoine Pitrou04248a82010-10-12 20:51:21 +0000875static int
876_Unpickler_SkipConsumed(UnpicklerObject *self)
877{
Victor Stinnerb43ad1d2013-10-31 13:38:42 +0100878 Py_ssize_t consumed;
879 PyObject *r;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000880
Victor Stinnerb43ad1d2013-10-31 13:38:42 +0100881 consumed = self->next_read_idx - self->prefetched_idx;
882 if (consumed <= 0)
883 return 0;
884
885 assert(self->peek); /* otherwise we did something wrong */
886 /* This makes an useless copy... */
887 r = PyObject_CallFunction(self->read, "n", consumed);
888 if (r == NULL)
889 return -1;
890 Py_DECREF(r);
891
892 self->prefetched_idx = self->next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000893 return 0;
894}
895
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000896static const Py_ssize_t READ_WHOLE_LINE = -1;
897
898/* If reading from a file, we need to only pull the bytes we need, since there
899 may be multiple pickle objects arranged contiguously in the same input
900 buffer.
901
902 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
903 bytes from the input stream/buffer.
904
905 Update the unpickler's input buffer with the newly-read data. Returns -1 on
906 failure; on success, returns the number of bytes read from the file.
907
908 On success, self->input_len will be 0; this is intentional so that when
909 unpickling from a file, the "we've run out of data" code paths will trigger,
910 causing the Unpickler to go back to the file for more data. Use the returned
911 size to tell you how much data you can process. */
912static Py_ssize_t
913_Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
914{
915 PyObject *data;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000916 Py_ssize_t read_size, prefetched_size = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000917
918 assert(self->read != NULL);
Victor Stinner121aab42011-09-29 23:40:53 +0200919
Antoine Pitrou04248a82010-10-12 20:51:21 +0000920 if (_Unpickler_SkipConsumed(self) < 0)
921 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000922
923 if (n == READ_WHOLE_LINE)
924 data = PyObject_Call(self->readline, empty_tuple, NULL);
925 else {
926 PyObject *len = PyLong_FromSsize_t(n);
927 if (len == NULL)
928 return -1;
929 data = _Unpickler_FastCall(self, self->read, len);
930 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000931 if (data == NULL)
932 return -1;
933
Antoine Pitrou04248a82010-10-12 20:51:21 +0000934 /* Prefetch some data without advancing the file pointer, if possible */
935 if (self->peek) {
936 PyObject *len, *prefetched;
937 len = PyLong_FromSsize_t(PREFETCH);
938 if (len == NULL) {
939 Py_DECREF(data);
940 return -1;
941 }
942 prefetched = _Unpickler_FastCall(self, self->peek, len);
943 if (prefetched == NULL) {
944 if (PyErr_ExceptionMatches(PyExc_NotImplementedError)) {
945 /* peek() is probably not supported by the given file object */
946 PyErr_Clear();
947 Py_CLEAR(self->peek);
948 }
949 else {
950 Py_DECREF(data);
951 return -1;
952 }
953 }
954 else {
955 assert(PyBytes_Check(prefetched));
956 prefetched_size = PyBytes_GET_SIZE(prefetched);
957 PyBytes_ConcatAndDel(&data, prefetched);
958 if (data == NULL)
959 return -1;
960 }
961 }
962
963 read_size = _Unpickler_SetStringInput(self, data) - prefetched_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000964 Py_DECREF(data);
Antoine Pitrou04248a82010-10-12 20:51:21 +0000965 self->prefetched_idx = read_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000966 return read_size;
967}
968
969/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
970
971 This should be used for all data reads, rather than accessing the unpickler's
972 input buffer directly. This method deals correctly with reading from input
973 streams, which the input buffer doesn't deal with.
974
975 Note that when reading from a file-like object, self->next_read_idx won't
976 be updated (it should remain at 0 for the entire unpickling process). You
977 should use this function's return value to know how many bytes you can
978 consume.
979
980 Returns -1 (with an exception set) on failure. On success, return the
981 number of chars read. */
982static Py_ssize_t
983_Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n)
984{
Antoine Pitrou04248a82010-10-12 20:51:21 +0000985 Py_ssize_t num_read;
986
Antoine Pitrou04248a82010-10-12 20:51:21 +0000987 if (self->next_read_idx + n <= self->input_len) {
988 *s = self->input_buffer + self->next_read_idx;
989 self->next_read_idx += n;
990 return n;
991 }
992 if (!self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000993 PyErr_Format(PyExc_EOFError, "Ran out of input");
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000994 return -1;
995 }
Antoine Pitrou04248a82010-10-12 20:51:21 +0000996 num_read = _Unpickler_ReadFromFile(self, n);
997 if (num_read < 0)
998 return -1;
999 if (num_read < n) {
1000 PyErr_Format(PyExc_EOFError, "Ran out of input");
1001 return -1;
1002 }
1003 *s = self->input_buffer;
1004 self->next_read_idx = n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001005 return n;
1006}
1007
1008static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001009_Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1010 char **result)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001011{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001012 char *input_line = PyMem_Realloc(self->input_line, len + 1);
Victor Stinner42024562013-07-12 00:53:57 +02001013 if (input_line == NULL) {
1014 PyErr_NoMemory();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001015 return -1;
Victor Stinner42024562013-07-12 00:53:57 +02001016 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001017
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001018 memcpy(input_line, line, len);
1019 input_line[len] = '\0';
1020 self->input_line = input_line;
1021 *result = self->input_line;
1022 return len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001023}
1024
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001025/* Read a line from the input stream/buffer. If we run off the end of the input
1026 before hitting \n, return the data we found.
1027
1028 Returns the number of chars read, or -1 on failure. */
1029static Py_ssize_t
1030_Unpickler_Readline(UnpicklerObject *self, char **result)
1031{
1032 Py_ssize_t i, num_read;
1033
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001034 for (i = self->next_read_idx; i < self->input_len; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001035 if (self->input_buffer[i] == '\n') {
1036 char *line_start = self->input_buffer + self->next_read_idx;
1037 num_read = i - self->next_read_idx + 1;
1038 self->next_read_idx = i + 1;
1039 return _Unpickler_CopyLine(self, line_start, num_read, result);
1040 }
1041 }
1042 if (self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001043 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1044 if (num_read < 0)
1045 return -1;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001046 self->next_read_idx = num_read;
Antoine Pitrouf6c7a852011-08-11 21:04:02 +02001047 return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001048 }
Victor Stinner121aab42011-09-29 23:40:53 +02001049
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001050 /* If we get here, we've run off the end of the input string. Return the
1051 remaining string and let the caller figure it out. */
1052 *result = self->input_buffer + self->next_read_idx;
1053 num_read = i - self->next_read_idx;
1054 self->next_read_idx = i;
1055 return num_read;
1056}
1057
1058/* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1059 will be modified in place. */
1060static int
1061_Unpickler_ResizeMemoList(UnpicklerObject *self, Py_ssize_t new_size)
1062{
1063 Py_ssize_t i;
1064 PyObject **memo;
1065
1066 assert(new_size > self->memo_size);
1067
1068 memo = PyMem_REALLOC(self->memo, new_size * sizeof(PyObject *));
1069 if (memo == NULL) {
1070 PyErr_NoMemory();
1071 return -1;
1072 }
1073 self->memo = memo;
1074 for (i = self->memo_size; i < new_size; i++)
1075 self->memo[i] = NULL;
1076 self->memo_size = new_size;
1077 return 0;
1078}
1079
1080/* Returns NULL if idx is out of bounds. */
1081static PyObject *
1082_Unpickler_MemoGet(UnpicklerObject *self, Py_ssize_t idx)
1083{
1084 if (idx < 0 || idx >= self->memo_size)
1085 return NULL;
1086
1087 return self->memo[idx];
1088}
1089
1090/* Returns -1 (with an exception set) on failure, 0 on success.
1091 This takes its own reference to `value`. */
1092static int
1093_Unpickler_MemoPut(UnpicklerObject *self, Py_ssize_t idx, PyObject *value)
1094{
1095 PyObject *old_item;
1096
1097 if (idx >= self->memo_size) {
1098 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1099 return -1;
1100 assert(idx < self->memo_size);
1101 }
1102 Py_INCREF(value);
1103 old_item = self->memo[idx];
1104 self->memo[idx] = value;
1105 Py_XDECREF(old_item);
1106 return 0;
1107}
1108
1109static PyObject **
1110_Unpickler_NewMemo(Py_ssize_t new_size)
1111{
1112 PyObject **memo = PyMem_MALLOC(new_size * sizeof(PyObject *));
Victor Stinner42024562013-07-12 00:53:57 +02001113 if (memo == NULL) {
1114 PyErr_NoMemory();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001115 return NULL;
Victor Stinner42024562013-07-12 00:53:57 +02001116 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001117 memset(memo, 0, new_size * sizeof(PyObject *));
1118 return memo;
1119}
1120
1121/* Free the unpickler's memo, taking care to decref any items left in it. */
1122static void
1123_Unpickler_MemoCleanup(UnpicklerObject *self)
1124{
1125 Py_ssize_t i;
1126 PyObject **memo = self->memo;
1127
1128 if (self->memo == NULL)
1129 return;
1130 self->memo = NULL;
1131 i = self->memo_size;
1132 while (--i >= 0) {
1133 Py_XDECREF(memo[i]);
1134 }
1135 PyMem_FREE(memo);
1136}
1137
1138static UnpicklerObject *
1139_Unpickler_New(void)
1140{
1141 UnpicklerObject *self;
1142
1143 self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1144 if (self == NULL)
1145 return NULL;
1146
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001147 self->arg = NULL;
1148 self->pers_func = NULL;
1149 self->input_buffer = NULL;
1150 self->input_line = NULL;
1151 self->input_len = 0;
1152 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001153 self->prefetched_idx = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001154 self->read = NULL;
1155 self->readline = NULL;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001156 self->peek = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001157 self->encoding = NULL;
1158 self->errors = NULL;
1159 self->marks = NULL;
1160 self->num_marks = 0;
1161 self->marks_size = 0;
1162 self->proto = 0;
1163 self->fix_imports = 0;
Victor Stinner68c8ea22013-07-11 22:56:25 +02001164 memset(&self->buffer, 0, sizeof(Py_buffer));
1165 self->memo_size = 32;
1166 self->memo = _Unpickler_NewMemo(self->memo_size);
1167 self->stack = (Pdata *)Pdata_New();
1168
1169 if (self->memo == NULL || self->stack == NULL) {
1170 Py_DECREF(self);
1171 return NULL;
1172 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001173
1174 return self;
1175}
1176
1177/* Returns -1 (with an exception set) on failure, 0 on success. This may
1178 be called once on a freshly created Pickler. */
1179static int
1180_Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1181{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001182 _Py_IDENTIFIER(peek);
1183 _Py_IDENTIFIER(read);
1184 _Py_IDENTIFIER(readline);
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001185
1186 self->peek = _PyObject_GetAttrId(file, &PyId_peek);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001187 if (self->peek == NULL) {
1188 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1189 PyErr_Clear();
1190 else
1191 return -1;
1192 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001193 self->read = _PyObject_GetAttrId(file, &PyId_read);
1194 self->readline = _PyObject_GetAttrId(file, &PyId_readline);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001195 if (self->readline == NULL || self->read == NULL) {
1196 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1197 PyErr_SetString(PyExc_TypeError,
1198 "file must have 'read' and 'readline' attributes");
1199 Py_CLEAR(self->read);
1200 Py_CLEAR(self->readline);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001201 Py_CLEAR(self->peek);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001202 return -1;
1203 }
1204 return 0;
1205}
1206
1207/* Returns -1 (with an exception set) on failure, 0 on success. This may
1208 be called once on a freshly created Pickler. */
1209static int
1210_Unpickler_SetInputEncoding(UnpicklerObject *self,
1211 const char *encoding,
1212 const char *errors)
1213{
1214 if (encoding == NULL)
1215 encoding = "ASCII";
1216 if (errors == NULL)
1217 errors = "strict";
1218
Victor Stinner49fc8ec2013-07-07 23:30:24 +02001219 self->encoding = _PyMem_Strdup(encoding);
1220 self->errors = _PyMem_Strdup(errors);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001221 if (self->encoding == NULL || self->errors == NULL) {
1222 PyErr_NoMemory();
1223 return -1;
1224 }
1225 return 0;
1226}
1227
1228/* Generate a GET opcode for an object stored in the memo. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001229static int
1230memo_get(PicklerObject *self, PyObject *key)
1231{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001232 Py_ssize_t *value;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001233 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001234 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001235
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001236 value = PyMemoTable_Get(self->memo, key);
1237 if (value == NULL) {
1238 PyErr_SetObject(PyExc_KeyError, key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001239 return -1;
1240 }
1241
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001242 if (!self->bin) {
1243 pdata[0] = GET;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001244 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1245 "%" PY_FORMAT_SIZE_T "d\n", *value);
1246 len = strlen(pdata);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001247 }
1248 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001249 if (*value < 256) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001250 pdata[0] = BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001251 pdata[1] = (unsigned char)(*value & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001252 len = 2;
1253 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001254 else if (*value <= 0xffffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001255 pdata[0] = LONG_BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001256 pdata[1] = (unsigned char)(*value & 0xff);
1257 pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1258 pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1259 pdata[4] = (unsigned char)((*value >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001260 len = 5;
1261 }
1262 else { /* unlikely */
1263 PyErr_SetString(PicklingError,
1264 "memo id too large for LONG_BINGET");
1265 return -1;
1266 }
1267 }
1268
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001269 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001270 return -1;
1271
1272 return 0;
1273}
1274
1275/* Store an object in the memo, assign it a new unique ID based on the number
1276 of objects currently stored in the memo and generate a PUT opcode. */
1277static int
1278memo_put(PicklerObject *self, PyObject *obj)
1279{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001280 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001281 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001282 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001283 int status = 0;
1284
1285 if (self->fast)
1286 return 0;
1287
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001288 x = PyMemoTable_Size(self->memo);
1289 if (PyMemoTable_Set(self->memo, obj, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001290 goto error;
1291
1292 if (!self->bin) {
1293 pdata[0] = PUT;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001294 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1295 "%" PY_FORMAT_SIZE_T "d\n", x);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001296 len = strlen(pdata);
1297 }
1298 else {
1299 if (x < 256) {
1300 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00001301 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001302 len = 2;
1303 }
1304 else if (x <= 0xffffffffL) {
1305 pdata[0] = LONG_BINPUT;
1306 pdata[1] = (unsigned char)(x & 0xff);
1307 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1308 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1309 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1310 len = 5;
1311 }
1312 else { /* unlikely */
1313 PyErr_SetString(PicklingError,
1314 "memo id too large for LONG_BINPUT");
1315 return -1;
1316 }
1317 }
1318
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001319 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001320 goto error;
1321
1322 if (0) {
1323 error:
1324 status = -1;
1325 }
1326
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001327 return status;
1328}
1329
1330static PyObject *
1331whichmodule(PyObject *global, PyObject *global_name)
1332{
1333 Py_ssize_t i, j;
1334 static PyObject *module_str = NULL;
1335 static PyObject *main_str = NULL;
1336 PyObject *module_name;
1337 PyObject *modules_dict;
1338 PyObject *module;
1339 PyObject *obj;
1340
1341 if (module_str == NULL) {
1342 module_str = PyUnicode_InternFromString("__module__");
1343 if (module_str == NULL)
1344 return NULL;
1345 main_str = PyUnicode_InternFromString("__main__");
1346 if (main_str == NULL)
1347 return NULL;
1348 }
1349
1350 module_name = PyObject_GetAttr(global, module_str);
1351
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00001352 /* In some rare cases (e.g., bound methods of extension types),
1353 __module__ can be None. If it is so, then search sys.modules
1354 for the module of global. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001355 if (module_name == Py_None) {
1356 Py_DECREF(module_name);
1357 goto search;
1358 }
1359
1360 if (module_name) {
1361 return module_name;
1362 }
1363 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1364 PyErr_Clear();
1365 else
1366 return NULL;
1367
1368 search:
Victor Stinnerbb520202013-11-06 22:40:41 +01001369 modules_dict = _PySys_GetObjectId(&PyId_modules);
Victor Stinner1e53bba2013-07-16 22:26:05 +02001370 if (modules_dict == NULL) {
1371 PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001372 return NULL;
Victor Stinner1e53bba2013-07-16 22:26:05 +02001373 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001374
1375 i = 0;
1376 module_name = NULL;
1377 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
Mark Dickinson211c6252009-02-01 10:28:51 +00001378 if (PyObject_RichCompareBool(module_name, main_str, Py_EQ) == 1)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001379 continue;
1380
1381 obj = PyObject_GetAttr(module, global_name);
1382 if (obj == NULL) {
1383 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1384 PyErr_Clear();
1385 else
1386 return NULL;
1387 continue;
1388 }
1389
1390 if (obj != global) {
1391 Py_DECREF(obj);
1392 continue;
1393 }
1394
1395 Py_DECREF(obj);
1396 break;
1397 }
1398
1399 /* If no module is found, use __main__. */
1400 if (!j) {
1401 module_name = main_str;
1402 }
1403
1404 Py_INCREF(module_name);
1405 return module_name;
1406}
1407
1408/* fast_save_enter() and fast_save_leave() are guards against recursive
1409 objects when Pickler is used with the "fast mode" (i.e., with object
1410 memoization disabled). If the nesting of a list or dict object exceed
1411 FAST_NESTING_LIMIT, these guards will start keeping an internal
1412 reference to the seen list or dict objects and check whether these objects
1413 are recursive. These are not strictly necessary, since save() has a
1414 hard-coded recursion limit, but they give a nicer error message than the
1415 typical RuntimeError. */
1416static int
1417fast_save_enter(PicklerObject *self, PyObject *obj)
1418{
1419 /* if fast_nesting < 0, we're doing an error exit. */
1420 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1421 PyObject *key = NULL;
1422 if (self->fast_memo == NULL) {
1423 self->fast_memo = PyDict_New();
1424 if (self->fast_memo == NULL) {
1425 self->fast_nesting = -1;
1426 return 0;
1427 }
1428 }
1429 key = PyLong_FromVoidPtr(obj);
1430 if (key == NULL)
1431 return 0;
1432 if (PyDict_GetItem(self->fast_memo, key)) {
1433 Py_DECREF(key);
1434 PyErr_Format(PyExc_ValueError,
1435 "fast mode: can't pickle cyclic objects "
1436 "including object type %.200s at %p",
1437 obj->ob_type->tp_name, obj);
1438 self->fast_nesting = -1;
1439 return 0;
1440 }
1441 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
1442 Py_DECREF(key);
1443 self->fast_nesting = -1;
1444 return 0;
1445 }
1446 Py_DECREF(key);
1447 }
1448 return 1;
1449}
1450
1451static int
1452fast_save_leave(PicklerObject *self, PyObject *obj)
1453{
1454 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
1455 PyObject *key = PyLong_FromVoidPtr(obj);
1456 if (key == NULL)
1457 return 0;
1458 if (PyDict_DelItem(self->fast_memo, key) < 0) {
1459 Py_DECREF(key);
1460 return 0;
1461 }
1462 Py_DECREF(key);
1463 }
1464 return 1;
1465}
1466
1467static int
1468save_none(PicklerObject *self, PyObject *obj)
1469{
1470 const char none_op = NONE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001471 if (_Pickler_Write(self, &none_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001472 return -1;
1473
1474 return 0;
1475}
1476
1477static int
1478save_bool(PicklerObject *self, PyObject *obj)
1479{
1480 static const char *buf[2] = { FALSE, TRUE };
1481 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
1482 int p = (obj == Py_True);
1483
1484 if (self->proto >= 2) {
1485 const char bool_op = p ? NEWTRUE : NEWFALSE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001486 if (_Pickler_Write(self, &bool_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001487 return -1;
1488 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001489 else if (_Pickler_Write(self, buf[p], len[p]) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001490 return -1;
1491
1492 return 0;
1493}
1494
1495static int
1496save_int(PicklerObject *self, long x)
1497{
1498 char pdata[32];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001499 Py_ssize_t len = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001500
1501 if (!self->bin
1502#if SIZEOF_LONG > 4
1503 || x > 0x7fffffffL || x < -0x80000000L
1504#endif
1505 ) {
1506 /* Text-mode pickle, or long too big to fit in the 4-byte
1507 * signed BININT format: store as a string.
1508 */
Mark Dickinson8dd05142009-01-20 20:43:58 +00001509 pdata[0] = LONG; /* use LONG for consistency with pickle.py */
1510 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ldL\n", x);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001511 if (_Pickler_Write(self, pdata, strlen(pdata)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001512 return -1;
1513 }
1514 else {
1515 /* Binary pickle and x fits in a signed 4-byte int. */
1516 pdata[1] = (unsigned char)(x & 0xff);
1517 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1518 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1519 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1520
1521 if ((pdata[4] == 0) && (pdata[3] == 0)) {
1522 if (pdata[2] == 0) {
1523 pdata[0] = BININT1;
1524 len = 2;
1525 }
1526 else {
1527 pdata[0] = BININT2;
1528 len = 3;
1529 }
1530 }
1531 else {
1532 pdata[0] = BININT;
1533 len = 5;
1534 }
1535
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001536 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001537 return -1;
1538 }
1539
1540 return 0;
1541}
1542
1543static int
1544save_long(PicklerObject *self, PyObject *obj)
1545{
1546 PyObject *repr = NULL;
1547 Py_ssize_t size;
1548 long val = PyLong_AsLong(obj);
1549 int status = 0;
1550
1551 const char long_op = LONG;
1552
1553 if (val == -1 && PyErr_Occurred()) {
1554 /* out of range for int pickling */
1555 PyErr_Clear();
1556 }
Antoine Pitroue58bffb2011-08-13 20:40:32 +02001557 else
1558#if SIZEOF_LONG > 4
1559 if (val <= 0x7fffffffL && val >= -0x80000000L)
1560#endif
1561 return save_int(self, val);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001562
1563 if (self->proto >= 2) {
1564 /* Linear-time pickling. */
1565 size_t nbits;
1566 size_t nbytes;
1567 unsigned char *pdata;
1568 char header[5];
1569 int i;
1570 int sign = _PyLong_Sign(obj);
1571
1572 if (sign == 0) {
1573 header[0] = LONG1;
1574 header[1] = 0; /* It's 0 -- an empty bytestring. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001575 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001576 goto error;
1577 return 0;
1578 }
1579 nbits = _PyLong_NumBits(obj);
1580 if (nbits == (size_t)-1 && PyErr_Occurred())
1581 goto error;
1582 /* How many bytes do we need? There are nbits >> 3 full
1583 * bytes of data, and nbits & 7 leftover bits. If there
1584 * are any leftover bits, then we clearly need another
1585 * byte. Wnat's not so obvious is that we *probably*
1586 * need another byte even if there aren't any leftovers:
1587 * the most-significant bit of the most-significant byte
1588 * acts like a sign bit, and it's usually got a sense
Serhiy Storchaka95949422013-08-27 19:40:23 +03001589 * opposite of the one we need. The exception is ints
1590 * of the form -(2**(8*j-1)) for j > 0. Such an int is
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001591 * its own 256's-complement, so has the right sign bit
1592 * even without the extra byte. That's a pain to check
1593 * for in advance, though, so we always grab an extra
1594 * byte at the start, and cut it back later if possible.
1595 */
1596 nbytes = (nbits >> 3) + 1;
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01001597 if (nbytes > 0x7fffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001598 PyErr_SetString(PyExc_OverflowError,
Serhiy Storchaka95949422013-08-27 19:40:23 +03001599 "int too large to pickle");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001600 goto error;
1601 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001602 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001603 if (repr == NULL)
1604 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001605 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001606 i = _PyLong_AsByteArray((PyLongObject *)obj,
1607 pdata, nbytes,
1608 1 /* little endian */ , 1 /* signed */ );
1609 if (i < 0)
1610 goto error;
Serhiy Storchaka95949422013-08-27 19:40:23 +03001611 /* If the int is negative, this may be a byte more than
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001612 * needed. This is so iff the MSB is all redundant sign
1613 * bits.
1614 */
1615 if (sign < 0 &&
Victor Stinner121aab42011-09-29 23:40:53 +02001616 nbytes > 1 &&
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001617 pdata[nbytes - 1] == 0xff &&
1618 (pdata[nbytes - 2] & 0x80) != 0) {
1619 nbytes--;
1620 }
1621
1622 if (nbytes < 256) {
1623 header[0] = LONG1;
1624 header[1] = (unsigned char)nbytes;
1625 size = 2;
1626 }
1627 else {
1628 header[0] = LONG4;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001629 size = (Py_ssize_t) nbytes;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001630 for (i = 1; i < 5; i++) {
1631 header[i] = (unsigned char)(size & 0xff);
1632 size >>= 8;
1633 }
1634 size = 5;
1635 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001636 if (_Pickler_Write(self, header, size) < 0 ||
1637 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001638 goto error;
1639 }
1640 else {
1641 char *string;
1642
Mark Dickinson8dd05142009-01-20 20:43:58 +00001643 /* proto < 2: write the repr and newline. This is quadratic-time (in
1644 the number of digits), in both directions. We add a trailing 'L'
1645 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001646
1647 repr = PyObject_Repr(obj);
1648 if (repr == NULL)
1649 goto error;
1650
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001651 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001652 if (string == NULL)
1653 goto error;
1654
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001655 if (_Pickler_Write(self, &long_op, 1) < 0 ||
1656 _Pickler_Write(self, string, size) < 0 ||
1657 _Pickler_Write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001658 goto error;
1659 }
1660
1661 if (0) {
1662 error:
1663 status = -1;
1664 }
1665 Py_XDECREF(repr);
1666
1667 return status;
1668}
1669
1670static int
1671save_float(PicklerObject *self, PyObject *obj)
1672{
1673 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1674
1675 if (self->bin) {
1676 char pdata[9];
1677 pdata[0] = BINFLOAT;
1678 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1679 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001680 if (_Pickler_Write(self, pdata, 9) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001681 return -1;
Victor Stinner121aab42011-09-29 23:40:53 +02001682 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001683 else {
Eric Smith0923d1d2009-04-16 20:16:10 +00001684 int result = -1;
1685 char *buf = NULL;
1686 char op = FLOAT;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001687
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001688 if (_Pickler_Write(self, &op, 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001689 goto done;
1690
Mark Dickinson3e09f432009-04-17 08:41:23 +00001691 buf = PyOS_double_to_string(x, 'g', 17, 0, NULL);
Eric Smith0923d1d2009-04-16 20:16:10 +00001692 if (!buf) {
1693 PyErr_NoMemory();
1694 goto done;
1695 }
1696
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001697 if (_Pickler_Write(self, buf, strlen(buf)) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001698 goto done;
1699
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001700 if (_Pickler_Write(self, "\n", 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001701 goto done;
1702
1703 result = 0;
1704done:
1705 PyMem_Free(buf);
1706 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001707 }
1708
1709 return 0;
1710}
1711
1712static int
1713save_bytes(PicklerObject *self, PyObject *obj)
1714{
1715 if (self->proto < 3) {
1716 /* Older pickle protocols do not have an opcode for pickling bytes
1717 objects. Therefore, we need to fake the copy protocol (i.e.,
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001718 the __reduce__ method) to permit bytes object unpickling.
1719
1720 Here we use a hack to be compatible with Python 2. Since in Python
1721 2 'bytes' is just an alias for 'str' (which has different
1722 parameters than the actual bytes object), we use codecs.encode
1723 to create the appropriate 'str' object when unpickled using
1724 Python 2 *and* the appropriate 'bytes' object when unpickled
1725 using Python 3. Again this is a hack and we don't need to do this
1726 with newer protocols. */
1727 static PyObject *codecs_encode = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001728 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001729 int status;
1730
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001731 if (codecs_encode == NULL) {
1732 PyObject *codecs_module = PyImport_ImportModule("codecs");
1733 if (codecs_module == NULL) {
1734 return -1;
1735 }
1736 codecs_encode = PyObject_GetAttrString(codecs_module, "encode");
1737 Py_DECREF(codecs_module);
1738 if (codecs_encode == NULL) {
1739 return -1;
1740 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001741 }
1742
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001743 if (PyBytes_GET_SIZE(obj) == 0) {
1744 reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
1745 }
1746 else {
1747 static PyObject *latin1 = NULL;
1748 PyObject *unicode_str =
1749 PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
1750 PyBytes_GET_SIZE(obj),
1751 "strict");
1752 if (unicode_str == NULL)
1753 return -1;
1754 if (latin1 == NULL) {
1755 latin1 = PyUnicode_InternFromString("latin1");
Christian Heimes82e6b942013-06-29 21:37:34 +02001756 if (latin1 == NULL) {
1757 Py_DECREF(unicode_str);
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001758 return -1;
Christian Heimes82e6b942013-06-29 21:37:34 +02001759 }
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001760 }
1761 reduce_value = Py_BuildValue("(O(OO))",
1762 codecs_encode, unicode_str, latin1);
1763 Py_DECREF(unicode_str);
1764 }
1765
1766 if (reduce_value == NULL)
1767 return -1;
1768
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001769 /* save_reduce() will memoize the object automatically. */
1770 status = save_reduce(self, reduce_value, obj);
1771 Py_DECREF(reduce_value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001772 return status;
1773 }
1774 else {
1775 Py_ssize_t size;
1776 char header[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001777 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001778
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001779 size = PyBytes_GET_SIZE(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001780 if (size < 0)
1781 return -1;
1782
1783 if (size < 256) {
1784 header[0] = SHORT_BINBYTES;
1785 header[1] = (unsigned char)size;
1786 len = 2;
1787 }
1788 else if (size <= 0xffffffffL) {
1789 header[0] = BINBYTES;
1790 header[1] = (unsigned char)(size & 0xff);
1791 header[2] = (unsigned char)((size >> 8) & 0xff);
1792 header[3] = (unsigned char)((size >> 16) & 0xff);
1793 header[4] = (unsigned char)((size >> 24) & 0xff);
1794 len = 5;
1795 }
1796 else {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001797 PyErr_SetString(PyExc_OverflowError,
Serhiy Storchakaf8def282013-02-16 17:29:56 +02001798 "cannot serialize a bytes object larger than 4 GiB");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001799 return -1; /* string too large */
1800 }
1801
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001802 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001803 return -1;
1804
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001805 if (_Pickler_Write(self, PyBytes_AS_STRING(obj), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001806 return -1;
1807
1808 if (memo_put(self, obj) < 0)
1809 return -1;
1810
1811 return 0;
1812 }
1813}
1814
1815/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1816 backslash and newline characters to \uXXXX escapes. */
1817static PyObject *
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001818raw_unicode_escape(PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001819{
1820 PyObject *repr, *result;
1821 char *p;
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001822 Py_ssize_t i, size, expandsize;
1823 void *data;
1824 unsigned int kind;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001825
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001826 if (PyUnicode_READY(obj))
1827 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001828
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001829 size = PyUnicode_GET_LENGTH(obj);
1830 data = PyUnicode_DATA(obj);
1831 kind = PyUnicode_KIND(obj);
1832 if (kind == PyUnicode_4BYTE_KIND)
1833 expandsize = 10;
1834 else
1835 expandsize = 6;
Victor Stinner121aab42011-09-29 23:40:53 +02001836
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001837 if (size > PY_SSIZE_T_MAX / expandsize)
1838 return PyErr_NoMemory();
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001839 repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001840 if (repr == NULL)
1841 return NULL;
1842 if (size == 0)
1843 goto done;
1844
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001845 p = PyByteArray_AS_STRING(repr);
1846 for (i=0; i < size; i++) {
1847 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001848 /* Map 32-bit characters to '\Uxxxxxxxx' */
1849 if (ch >= 0x10000) {
1850 *p++ = '\\';
1851 *p++ = 'U';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001852 *p++ = Py_hexdigits[(ch >> 28) & 0xf];
1853 *p++ = Py_hexdigits[(ch >> 24) & 0xf];
1854 *p++ = Py_hexdigits[(ch >> 20) & 0xf];
1855 *p++ = Py_hexdigits[(ch >> 16) & 0xf];
1856 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
1857 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
1858 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
1859 *p++ = Py_hexdigits[ch & 15];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001860 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001861 /* Map 16-bit characters to '\uxxxx' */
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001862 else if (ch >= 256 || ch == '\\' || ch == '\n') {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001863 *p++ = '\\';
1864 *p++ = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001865 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
1866 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
1867 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
1868 *p++ = Py_hexdigits[ch & 15];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001869 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001870 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001871 else
1872 *p++ = (char) ch;
1873 }
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001874 size = p - PyByteArray_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001875
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001876done:
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001877 result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001878 Py_DECREF(repr);
1879 return result;
1880}
1881
1882static int
Antoine Pitrou299978d2013-04-07 17:38:11 +02001883write_utf8(PicklerObject *self, char *data, Py_ssize_t size)
1884{
1885 char pdata[5];
1886
1887#if SIZEOF_SIZE_T > 4
1888 if (size > 0xffffffffUL) {
1889 /* string too large */
1890 PyErr_SetString(PyExc_OverflowError,
Antoine Pitrou4b7b0f02013-04-07 23:46:52 +02001891 "cannot serialize a string larger than 4GiB");
Antoine Pitrou299978d2013-04-07 17:38:11 +02001892 return -1;
1893 }
1894#endif
1895
1896 pdata[0] = BINUNICODE;
1897 pdata[1] = (unsigned char)(size & 0xff);
1898 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1899 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1900 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1901
1902 if (_Pickler_Write(self, pdata, sizeof(pdata)) < 0)
1903 return -1;
1904
1905 if (_Pickler_Write(self, data, size) < 0)
1906 return -1;
1907
1908 return 0;
1909}
1910
1911static int
1912write_unicode_binary(PicklerObject *self, PyObject *obj)
1913{
1914 PyObject *encoded = NULL;
1915 Py_ssize_t size;
1916 char *data;
1917 int r;
1918
1919 if (PyUnicode_READY(obj))
1920 return -1;
1921
1922 data = PyUnicode_AsUTF8AndSize(obj, &size);
1923 if (data != NULL)
1924 return write_utf8(self, data, size);
1925
1926 /* Issue #8383: for strings with lone surrogates, fallback on the
1927 "surrogatepass" error handler. */
1928 PyErr_Clear();
1929 encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
1930 if (encoded == NULL)
1931 return -1;
1932
1933 r = write_utf8(self, PyBytes_AS_STRING(encoded),
1934 PyBytes_GET_SIZE(encoded));
1935 Py_DECREF(encoded);
1936 return r;
1937}
1938
1939static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001940save_unicode(PicklerObject *self, PyObject *obj)
1941{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001942 if (self->bin) {
Antoine Pitrou299978d2013-04-07 17:38:11 +02001943 if (write_unicode_binary(self, obj) < 0)
1944 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001945 }
1946 else {
Antoine Pitrou299978d2013-04-07 17:38:11 +02001947 PyObject *encoded;
1948 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001949 const char unicode_op = UNICODE;
1950
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001951 encoded = raw_unicode_escape(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001952 if (encoded == NULL)
Antoine Pitrou299978d2013-04-07 17:38:11 +02001953 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001954
Antoine Pitrou299978d2013-04-07 17:38:11 +02001955 if (_Pickler_Write(self, &unicode_op, 1) < 0) {
1956 Py_DECREF(encoded);
1957 return -1;
1958 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001959
1960 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrou299978d2013-04-07 17:38:11 +02001961 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) {
1962 Py_DECREF(encoded);
1963 return -1;
1964 }
1965 Py_DECREF(encoded);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001966
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001967 if (_Pickler_Write(self, "\n", 1) < 0)
Antoine Pitrou299978d2013-04-07 17:38:11 +02001968 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001969 }
1970 if (memo_put(self, obj) < 0)
Antoine Pitrou299978d2013-04-07 17:38:11 +02001971 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001972
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001973 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001974}
1975
1976/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1977static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001978store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001979{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001980 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001981
1982 assert(PyTuple_Size(t) == len);
1983
1984 for (i = 0; i < len; i++) {
1985 PyObject *element = PyTuple_GET_ITEM(t, i);
1986
1987 if (element == NULL)
1988 return -1;
1989 if (save(self, element, 0) < 0)
1990 return -1;
1991 }
1992
1993 return 0;
1994}
1995
1996/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1997 * used across protocols to minimize the space needed to pickle them.
1998 * Tuples are also the only builtin immutable type that can be recursive
1999 * (a tuple can be reached from itself), and that requires some subtle
2000 * magic so that it works in all cases. IOW, this is a long routine.
2001 */
2002static int
2003save_tuple(PicklerObject *self, PyObject *obj)
2004{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002005 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002006
2007 const char mark_op = MARK;
2008 const char tuple_op = TUPLE;
2009 const char pop_op = POP;
2010 const char pop_mark_op = POP_MARK;
2011 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
2012
2013 if ((len = PyTuple_Size(obj)) < 0)
2014 return -1;
2015
2016 if (len == 0) {
2017 char pdata[2];
2018
2019 if (self->proto) {
2020 pdata[0] = EMPTY_TUPLE;
2021 len = 1;
2022 }
2023 else {
2024 pdata[0] = MARK;
2025 pdata[1] = TUPLE;
2026 len = 2;
2027 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002028 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002029 return -1;
2030 return 0;
2031 }
2032
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002033 /* The tuple isn't in the memo now. If it shows up there after
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002034 * saving the tuple elements, the tuple must be recursive, in
2035 * which case we'll pop everything we put on the stack, and fetch
2036 * its value from the memo.
2037 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002038 if (len <= 3 && self->proto >= 2) {
2039 /* Use TUPLE{1,2,3} opcodes. */
2040 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002041 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002042
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002043 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002044 /* pop the len elements */
2045 for (i = 0; i < len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002046 if (_Pickler_Write(self, &pop_op, 1) < 0)
2047 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002048 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002049 if (memo_get(self, obj) < 0)
2050 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002051
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002052 return 0;
2053 }
2054 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002055 if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2056 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002057 }
2058 goto memoize;
2059 }
2060
2061 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2062 * Generate MARK e1 e2 ... TUPLE
2063 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002064 if (_Pickler_Write(self, &mark_op, 1) < 0)
2065 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002066
2067 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002068 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002069
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002070 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002071 /* pop the stack stuff we pushed */
2072 if (self->bin) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002073 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2074 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002075 }
2076 else {
2077 /* Note that we pop one more than len, to remove
2078 * the MARK too.
2079 */
2080 for (i = 0; i <= len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002081 if (_Pickler_Write(self, &pop_op, 1) < 0)
2082 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002083 }
2084 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002085 if (memo_get(self, obj) < 0)
2086 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002087
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002088 return 0;
2089 }
2090 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002091 if (_Pickler_Write(self, &tuple_op, 1) < 0)
2092 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002093 }
2094
2095 memoize:
2096 if (memo_put(self, obj) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002097 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002098
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002099 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002100}
2101
2102/* iter is an iterator giving items, and we batch up chunks of
2103 * MARK item item ... item APPENDS
2104 * opcode sequences. Calling code should have arranged to first create an
2105 * empty list, or list-like object, for the APPENDS to operate on.
2106 * Returns 0 on success, <0 on error.
2107 */
2108static int
2109batch_list(PicklerObject *self, PyObject *iter)
2110{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002111 PyObject *obj = NULL;
2112 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002113 int i, n;
2114
2115 const char mark_op = MARK;
2116 const char append_op = APPEND;
2117 const char appends_op = APPENDS;
2118
2119 assert(iter != NULL);
2120
2121 /* XXX: I think this function could be made faster by avoiding the
2122 iterator interface and fetching objects directly from list using
2123 PyList_GET_ITEM.
2124 */
2125
2126 if (self->proto == 0) {
2127 /* APPENDS isn't available; do one at a time. */
2128 for (;;) {
2129 obj = PyIter_Next(iter);
2130 if (obj == NULL) {
2131 if (PyErr_Occurred())
2132 return -1;
2133 break;
2134 }
2135 i = save(self, obj, 0);
2136 Py_DECREF(obj);
2137 if (i < 0)
2138 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002139 if (_Pickler_Write(self, &append_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002140 return -1;
2141 }
2142 return 0;
2143 }
2144
2145 /* proto > 0: write in batches of BATCHSIZE. */
2146 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002147 /* Get first item */
2148 firstitem = PyIter_Next(iter);
2149 if (firstitem == NULL) {
2150 if (PyErr_Occurred())
2151 goto error;
2152
2153 /* nothing more to add */
2154 break;
2155 }
2156
2157 /* Try to get a second item */
2158 obj = PyIter_Next(iter);
2159 if (obj == NULL) {
2160 if (PyErr_Occurred())
2161 goto error;
2162
2163 /* Only one item to write */
2164 if (save(self, firstitem, 0) < 0)
2165 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002166 if (_Pickler_Write(self, &append_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002167 goto error;
2168 Py_CLEAR(firstitem);
2169 break;
2170 }
2171
2172 /* More than one item to write */
2173
2174 /* Pump out MARK, items, APPENDS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002175 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002176 goto error;
2177
2178 if (save(self, firstitem, 0) < 0)
2179 goto error;
2180 Py_CLEAR(firstitem);
2181 n = 1;
2182
2183 /* Fetch and save up to BATCHSIZE items */
2184 while (obj) {
2185 if (save(self, obj, 0) < 0)
2186 goto error;
2187 Py_CLEAR(obj);
2188 n += 1;
2189
2190 if (n == BATCHSIZE)
2191 break;
2192
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002193 obj = PyIter_Next(iter);
2194 if (obj == NULL) {
2195 if (PyErr_Occurred())
2196 goto error;
2197 break;
2198 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002199 }
2200
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002201 if (_Pickler_Write(self, &appends_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002202 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002203
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002204 } while (n == BATCHSIZE);
2205 return 0;
2206
2207 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002208 Py_XDECREF(firstitem);
2209 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002210 return -1;
2211}
2212
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002213/* This is a variant of batch_list() above, specialized for lists (with no
2214 * support for list subclasses). Like batch_list(), we batch up chunks of
2215 * MARK item item ... item APPENDS
2216 * opcode sequences. Calling code should have arranged to first create an
2217 * empty list, or list-like object, for the APPENDS to operate on.
2218 * Returns 0 on success, -1 on error.
2219 *
2220 * This version is considerably faster than batch_list(), if less general.
2221 *
2222 * Note that this only works for protocols > 0.
2223 */
2224static int
2225batch_list_exact(PicklerObject *self, PyObject *obj)
2226{
2227 PyObject *item = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002228 Py_ssize_t this_batch, total;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002229
2230 const char append_op = APPEND;
2231 const char appends_op = APPENDS;
2232 const char mark_op = MARK;
2233
2234 assert(obj != NULL);
2235 assert(self->proto > 0);
2236 assert(PyList_CheckExact(obj));
2237
2238 if (PyList_GET_SIZE(obj) == 1) {
2239 item = PyList_GET_ITEM(obj, 0);
2240 if (save(self, item, 0) < 0)
2241 return -1;
2242 if (_Pickler_Write(self, &append_op, 1) < 0)
2243 return -1;
2244 return 0;
2245 }
2246
2247 /* Write in batches of BATCHSIZE. */
2248 total = 0;
2249 do {
2250 this_batch = 0;
2251 if (_Pickler_Write(self, &mark_op, 1) < 0)
2252 return -1;
2253 while (total < PyList_GET_SIZE(obj)) {
2254 item = PyList_GET_ITEM(obj, total);
2255 if (save(self, item, 0) < 0)
2256 return -1;
2257 total++;
2258 if (++this_batch == BATCHSIZE)
2259 break;
2260 }
2261 if (_Pickler_Write(self, &appends_op, 1) < 0)
2262 return -1;
2263
2264 } while (total < PyList_GET_SIZE(obj));
2265
2266 return 0;
2267}
2268
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002269static int
2270save_list(PicklerObject *self, PyObject *obj)
2271{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002272 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002273 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002274 int status = 0;
2275
2276 if (self->fast && !fast_save_enter(self, obj))
2277 goto error;
2278
2279 /* Create an empty list. */
2280 if (self->bin) {
2281 header[0] = EMPTY_LIST;
2282 len = 1;
2283 }
2284 else {
2285 header[0] = MARK;
2286 header[1] = LIST;
2287 len = 2;
2288 }
2289
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002290 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002291 goto error;
2292
2293 /* Get list length, and bow out early if empty. */
2294 if ((len = PyList_Size(obj)) < 0)
2295 goto error;
2296
2297 if (memo_put(self, obj) < 0)
2298 goto error;
2299
2300 if (len != 0) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002301 /* Materialize the list elements. */
2302 if (PyList_CheckExact(obj) && self->proto > 0) {
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002303 if (Py_EnterRecursiveCall(" while pickling an object"))
2304 goto error;
2305 status = batch_list_exact(self, obj);
2306 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002307 } else {
2308 PyObject *iter = PyObject_GetIter(obj);
2309 if (iter == NULL)
2310 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002311
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002312 if (Py_EnterRecursiveCall(" while pickling an object")) {
2313 Py_DECREF(iter);
2314 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002315 }
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002316 status = batch_list(self, iter);
2317 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002318 Py_DECREF(iter);
2319 }
2320 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002321 if (0) {
2322 error:
2323 status = -1;
2324 }
2325
2326 if (self->fast && !fast_save_leave(self, obj))
2327 status = -1;
2328
2329 return status;
2330}
2331
2332/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
2333 * MARK key value ... key value SETITEMS
2334 * opcode sequences. Calling code should have arranged to first create an
2335 * empty dict, or dict-like object, for the SETITEMS to operate on.
2336 * Returns 0 on success, <0 on error.
2337 *
2338 * This is very much like batch_list(). The difference between saving
2339 * elements directly, and picking apart two-tuples, is so long-winded at
2340 * the C level, though, that attempts to combine these routines were too
2341 * ugly to bear.
2342 */
2343static int
2344batch_dict(PicklerObject *self, PyObject *iter)
2345{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002346 PyObject *obj = NULL;
2347 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002348 int i, n;
2349
2350 const char mark_op = MARK;
2351 const char setitem_op = SETITEM;
2352 const char setitems_op = SETITEMS;
2353
2354 assert(iter != NULL);
2355
2356 if (self->proto == 0) {
2357 /* SETITEMS isn't available; do one at a time. */
2358 for (;;) {
2359 obj = PyIter_Next(iter);
2360 if (obj == NULL) {
2361 if (PyErr_Occurred())
2362 return -1;
2363 break;
2364 }
2365 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2366 PyErr_SetString(PyExc_TypeError, "dict items "
2367 "iterator must return 2-tuples");
2368 return -1;
2369 }
2370 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
2371 if (i >= 0)
2372 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
2373 Py_DECREF(obj);
2374 if (i < 0)
2375 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002376 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002377 return -1;
2378 }
2379 return 0;
2380 }
2381
2382 /* proto > 0: write in batches of BATCHSIZE. */
2383 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002384 /* Get first item */
2385 firstitem = PyIter_Next(iter);
2386 if (firstitem == NULL) {
2387 if (PyErr_Occurred())
2388 goto error;
2389
2390 /* nothing more to add */
2391 break;
2392 }
2393 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
2394 PyErr_SetString(PyExc_TypeError, "dict items "
2395 "iterator must return 2-tuples");
2396 goto error;
2397 }
2398
2399 /* Try to get a second item */
2400 obj = PyIter_Next(iter);
2401 if (obj == NULL) {
2402 if (PyErr_Occurred())
2403 goto error;
2404
2405 /* Only one item to write */
2406 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2407 goto error;
2408 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2409 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002410 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002411 goto error;
2412 Py_CLEAR(firstitem);
2413 break;
2414 }
2415
2416 /* More than one item to write */
2417
2418 /* Pump out MARK, items, SETITEMS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002419 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002420 goto error;
2421
2422 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2423 goto error;
2424 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2425 goto error;
2426 Py_CLEAR(firstitem);
2427 n = 1;
2428
2429 /* Fetch and save up to BATCHSIZE items */
2430 while (obj) {
2431 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2432 PyErr_SetString(PyExc_TypeError, "dict items "
2433 "iterator must return 2-tuples");
2434 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002435 }
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002436 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
2437 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
2438 goto error;
2439 Py_CLEAR(obj);
2440 n += 1;
2441
2442 if (n == BATCHSIZE)
2443 break;
2444
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002445 obj = PyIter_Next(iter);
2446 if (obj == NULL) {
2447 if (PyErr_Occurred())
2448 goto error;
2449 break;
2450 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002451 }
2452
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002453 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002454 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002455
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002456 } while (n == BATCHSIZE);
2457 return 0;
2458
2459 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002460 Py_XDECREF(firstitem);
2461 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002462 return -1;
2463}
2464
Collin Winter5c9b02d2009-05-25 05:43:30 +00002465/* This is a variant of batch_dict() above that specializes for dicts, with no
2466 * support for dict subclasses. Like batch_dict(), we batch up chunks of
2467 * MARK key value ... key value SETITEMS
2468 * opcode sequences. Calling code should have arranged to first create an
2469 * empty dict, or dict-like object, for the SETITEMS to operate on.
2470 * Returns 0 on success, -1 on error.
2471 *
2472 * Note that this currently doesn't work for protocol 0.
2473 */
2474static int
2475batch_dict_exact(PicklerObject *self, PyObject *obj)
2476{
2477 PyObject *key = NULL, *value = NULL;
2478 int i;
2479 Py_ssize_t dict_size, ppos = 0;
2480
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00002481 const char mark_op = MARK;
2482 const char setitem_op = SETITEM;
2483 const char setitems_op = SETITEMS;
Collin Winter5c9b02d2009-05-25 05:43:30 +00002484
2485 assert(obj != NULL);
2486 assert(self->proto > 0);
2487
2488 dict_size = PyDict_Size(obj);
2489
2490 /* Special-case len(d) == 1 to save space. */
2491 if (dict_size == 1) {
2492 PyDict_Next(obj, &ppos, &key, &value);
2493 if (save(self, key, 0) < 0)
2494 return -1;
2495 if (save(self, value, 0) < 0)
2496 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002497 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002498 return -1;
2499 return 0;
2500 }
2501
2502 /* Write in batches of BATCHSIZE. */
2503 do {
2504 i = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002505 if (_Pickler_Write(self, &mark_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002506 return -1;
2507 while (PyDict_Next(obj, &ppos, &key, &value)) {
2508 if (save(self, key, 0) < 0)
2509 return -1;
2510 if (save(self, value, 0) < 0)
2511 return -1;
2512 if (++i == BATCHSIZE)
2513 break;
2514 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002515 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002516 return -1;
2517 if (PyDict_Size(obj) != dict_size) {
2518 PyErr_Format(
2519 PyExc_RuntimeError,
2520 "dictionary changed size during iteration");
2521 return -1;
2522 }
2523
2524 } while (i == BATCHSIZE);
2525 return 0;
2526}
2527
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002528static int
2529save_dict(PicklerObject *self, PyObject *obj)
2530{
2531 PyObject *items, *iter;
2532 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002533 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002534 int status = 0;
2535
2536 if (self->fast && !fast_save_enter(self, obj))
2537 goto error;
2538
2539 /* Create an empty dict. */
2540 if (self->bin) {
2541 header[0] = EMPTY_DICT;
2542 len = 1;
2543 }
2544 else {
2545 header[0] = MARK;
2546 header[1] = DICT;
2547 len = 2;
2548 }
2549
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002550 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002551 goto error;
2552
2553 /* Get dict size, and bow out early if empty. */
2554 if ((len = PyDict_Size(obj)) < 0)
2555 goto error;
2556
2557 if (memo_put(self, obj) < 0)
2558 goto error;
2559
2560 if (len != 0) {
2561 /* Save the dict items. */
Collin Winter5c9b02d2009-05-25 05:43:30 +00002562 if (PyDict_CheckExact(obj) && self->proto > 0) {
2563 /* We can take certain shortcuts if we know this is a dict and
2564 not a dict subclass. */
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002565 if (Py_EnterRecursiveCall(" while pickling an object"))
2566 goto error;
2567 status = batch_dict_exact(self, obj);
2568 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002569 } else {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02002570 _Py_IDENTIFIER(items);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002571
2572 items = _PyObject_CallMethodId(obj, &PyId_items, "()");
Collin Winter5c9b02d2009-05-25 05:43:30 +00002573 if (items == NULL)
2574 goto error;
2575 iter = PyObject_GetIter(items);
2576 Py_DECREF(items);
2577 if (iter == NULL)
2578 goto error;
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002579 if (Py_EnterRecursiveCall(" while pickling an object")) {
2580 Py_DECREF(iter);
2581 goto error;
2582 }
Collin Winter5c9b02d2009-05-25 05:43:30 +00002583 status = batch_dict(self, iter);
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002584 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002585 Py_DECREF(iter);
2586 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002587 }
2588
2589 if (0) {
2590 error:
2591 status = -1;
2592 }
2593
2594 if (self->fast && !fast_save_leave(self, obj))
2595 status = -1;
2596
2597 return status;
2598}
2599
2600static int
2601save_global(PicklerObject *self, PyObject *obj, PyObject *name)
2602{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002603 PyObject *global_name = NULL;
2604 PyObject *module_name = NULL;
2605 PyObject *module = NULL;
2606 PyObject *cls;
2607 int status = 0;
2608
2609 const char global_op = GLOBAL;
2610
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002611 if (name) {
2612 global_name = name;
2613 Py_INCREF(global_name);
2614 }
2615 else {
Victor Stinner804e05e2013-11-14 01:26:17 +01002616 global_name = _PyObject_GetAttrId(obj, &PyId___name__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002617 if (global_name == NULL)
2618 goto error;
2619 }
2620
2621 module_name = whichmodule(obj, global_name);
2622 if (module_name == NULL)
2623 goto error;
2624
2625 /* XXX: Change to use the import C API directly with level=0 to disallow
2626 relative imports.
2627
2628 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
2629 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
2630 custom import functions (IMHO, this would be a nice security
2631 feature). The import C API would need to be extended to support the
2632 extra parameters of __import__ to fix that. */
2633 module = PyImport_Import(module_name);
2634 if (module == NULL) {
2635 PyErr_Format(PicklingError,
2636 "Can't pickle %R: import of module %R failed",
2637 obj, module_name);
2638 goto error;
2639 }
2640 cls = PyObject_GetAttr(module, global_name);
2641 if (cls == NULL) {
2642 PyErr_Format(PicklingError,
2643 "Can't pickle %R: attribute lookup %S.%S failed",
2644 obj, module_name, global_name);
2645 goto error;
2646 }
2647 if (cls != obj) {
2648 Py_DECREF(cls);
2649 PyErr_Format(PicklingError,
2650 "Can't pickle %R: it's not the same object as %S.%S",
2651 obj, module_name, global_name);
2652 goto error;
2653 }
2654 Py_DECREF(cls);
2655
2656 if (self->proto >= 2) {
2657 /* See whether this is in the extension registry, and if
2658 * so generate an EXT opcode.
2659 */
2660 PyObject *code_obj; /* extension code as Python object */
2661 long code; /* extension code as C value */
2662 char pdata[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002663 Py_ssize_t n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002664
2665 PyTuple_SET_ITEM(two_tuple, 0, module_name);
2666 PyTuple_SET_ITEM(two_tuple, 1, global_name);
2667 code_obj = PyDict_GetItem(extension_registry, two_tuple);
2668 /* The object is not registered in the extension registry.
2669 This is the most likely code path. */
2670 if (code_obj == NULL)
2671 goto gen_global;
2672
2673 /* XXX: pickle.py doesn't check neither the type, nor the range
2674 of the value returned by the extension_registry. It should for
2675 consistency. */
2676
2677 /* Verify code_obj has the right type and value. */
2678 if (!PyLong_Check(code_obj)) {
2679 PyErr_Format(PicklingError,
2680 "Can't pickle %R: extension code %R isn't an integer",
2681 obj, code_obj);
2682 goto error;
2683 }
2684 code = PyLong_AS_LONG(code_obj);
2685 if (code <= 0 || code > 0x7fffffffL) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002686 if (!PyErr_Occurred())
2687 PyErr_Format(PicklingError,
2688 "Can't pickle %R: extension code %ld is out of range",
2689 obj, code);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002690 goto error;
2691 }
2692
2693 /* Generate an EXT opcode. */
2694 if (code <= 0xff) {
2695 pdata[0] = EXT1;
2696 pdata[1] = (unsigned char)code;
2697 n = 2;
2698 }
2699 else if (code <= 0xffff) {
2700 pdata[0] = EXT2;
2701 pdata[1] = (unsigned char)(code & 0xff);
2702 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2703 n = 3;
2704 }
2705 else {
2706 pdata[0] = EXT4;
2707 pdata[1] = (unsigned char)(code & 0xff);
2708 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2709 pdata[3] = (unsigned char)((code >> 16) & 0xff);
2710 pdata[4] = (unsigned char)((code >> 24) & 0xff);
2711 n = 5;
2712 }
2713
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002714 if (_Pickler_Write(self, pdata, n) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002715 goto error;
2716 }
2717 else {
2718 /* Generate a normal global opcode if we are using a pickle
2719 protocol <= 2, or if the object is not registered in the
2720 extension registry. */
2721 PyObject *encoded;
2722 PyObject *(*unicode_encoder)(PyObject *);
2723
2724 gen_global:
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002725 if (_Pickler_Write(self, &global_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002726 goto error;
2727
2728 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
2729 the module name and the global name using UTF-8. We do so only when
2730 we are using the pickle protocol newer than version 3. This is to
2731 ensure compatibility with older Unpickler running on Python 2.x. */
2732 if (self->proto >= 3) {
2733 unicode_encoder = PyUnicode_AsUTF8String;
2734 }
2735 else {
2736 unicode_encoder = PyUnicode_AsASCIIString;
2737 }
2738
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002739 /* For protocol < 3 and if the user didn't request against doing so,
2740 we convert module names to the old 2.x module names. */
2741 if (self->fix_imports) {
2742 PyObject *key;
2743 PyObject *item;
2744
2745 key = PyTuple_Pack(2, module_name, global_name);
2746 if (key == NULL)
2747 goto error;
2748 item = PyDict_GetItemWithError(name_mapping_3to2, key);
2749 Py_DECREF(key);
2750 if (item) {
2751 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
2752 PyErr_Format(PyExc_RuntimeError,
2753 "_compat_pickle.REVERSE_NAME_MAPPING values "
2754 "should be 2-tuples, not %.200s",
2755 Py_TYPE(item)->tp_name);
2756 goto error;
2757 }
2758 Py_CLEAR(module_name);
2759 Py_CLEAR(global_name);
2760 module_name = PyTuple_GET_ITEM(item, 0);
2761 global_name = PyTuple_GET_ITEM(item, 1);
2762 if (!PyUnicode_Check(module_name) ||
2763 !PyUnicode_Check(global_name)) {
2764 PyErr_Format(PyExc_RuntimeError,
2765 "_compat_pickle.REVERSE_NAME_MAPPING values "
2766 "should be pairs of str, not (%.200s, %.200s)",
2767 Py_TYPE(module_name)->tp_name,
2768 Py_TYPE(global_name)->tp_name);
2769 goto error;
2770 }
2771 Py_INCREF(module_name);
2772 Py_INCREF(global_name);
2773 }
2774 else if (PyErr_Occurred()) {
2775 goto error;
2776 }
2777
2778 item = PyDict_GetItemWithError(import_mapping_3to2, module_name);
2779 if (item) {
2780 if (!PyUnicode_Check(item)) {
2781 PyErr_Format(PyExc_RuntimeError,
2782 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
2783 "should be strings, not %.200s",
2784 Py_TYPE(item)->tp_name);
2785 goto error;
2786 }
2787 Py_CLEAR(module_name);
2788 module_name = item;
2789 Py_INCREF(module_name);
2790 }
2791 else if (PyErr_Occurred()) {
2792 goto error;
2793 }
2794 }
2795
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002796 /* Save the name of the module. */
2797 encoded = unicode_encoder(module_name);
2798 if (encoded == NULL) {
2799 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2800 PyErr_Format(PicklingError,
2801 "can't pickle module identifier '%S' using "
2802 "pickle protocol %i", module_name, self->proto);
2803 goto error;
2804 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002805 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002806 PyBytes_GET_SIZE(encoded)) < 0) {
2807 Py_DECREF(encoded);
2808 goto error;
2809 }
2810 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002811 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002812 goto error;
2813
2814 /* Save the name of the module. */
2815 encoded = unicode_encoder(global_name);
2816 if (encoded == NULL) {
2817 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2818 PyErr_Format(PicklingError,
2819 "can't pickle global identifier '%S' using "
2820 "pickle protocol %i", global_name, self->proto);
2821 goto error;
2822 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002823 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002824 PyBytes_GET_SIZE(encoded)) < 0) {
2825 Py_DECREF(encoded);
2826 goto error;
2827 }
2828 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002829 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002830 goto error;
2831
2832 /* Memoize the object. */
2833 if (memo_put(self, obj) < 0)
2834 goto error;
2835 }
2836
2837 if (0) {
2838 error:
2839 status = -1;
2840 }
2841 Py_XDECREF(module_name);
2842 Py_XDECREF(global_name);
2843 Py_XDECREF(module);
2844
2845 return status;
2846}
2847
2848static int
Łukasz Langaf3078fb2012-03-12 19:46:12 +01002849save_ellipsis(PicklerObject *self, PyObject *obj)
2850{
Łukasz Langadbd78252012-03-12 22:59:11 +01002851 PyObject *str = PyUnicode_FromString("Ellipsis");
Benjamin Petersone80b29b2012-03-16 18:45:31 -05002852 int res;
Łukasz Langadbd78252012-03-12 22:59:11 +01002853 if (str == NULL)
Łukasz Langacad1a072012-03-12 23:41:07 +01002854 return -1;
Benjamin Petersone80b29b2012-03-16 18:45:31 -05002855 res = save_global(self, Py_Ellipsis, str);
2856 Py_DECREF(str);
2857 return res;
Łukasz Langaf3078fb2012-03-12 19:46:12 +01002858}
2859
2860static int
2861save_notimplemented(PicklerObject *self, PyObject *obj)
2862{
Łukasz Langadbd78252012-03-12 22:59:11 +01002863 PyObject *str = PyUnicode_FromString("NotImplemented");
Benjamin Petersone80b29b2012-03-16 18:45:31 -05002864 int res;
Łukasz Langadbd78252012-03-12 22:59:11 +01002865 if (str == NULL)
Łukasz Langacad1a072012-03-12 23:41:07 +01002866 return -1;
Benjamin Petersone80b29b2012-03-16 18:45:31 -05002867 res = save_global(self, Py_NotImplemented, str);
2868 Py_DECREF(str);
2869 return res;
Łukasz Langaf3078fb2012-03-12 19:46:12 +01002870}
2871
2872static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002873save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
2874{
2875 PyObject *pid = NULL;
2876 int status = 0;
2877
2878 const char persid_op = PERSID;
2879 const char binpersid_op = BINPERSID;
2880
2881 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002882 pid = _Pickler_FastCall(self, func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002883 if (pid == NULL)
2884 return -1;
2885
2886 if (pid != Py_None) {
2887 if (self->bin) {
2888 if (save(self, pid, 1) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002889 _Pickler_Write(self, &binpersid_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002890 goto error;
2891 }
2892 else {
2893 PyObject *pid_str = NULL;
2894 char *pid_ascii_bytes;
2895 Py_ssize_t size;
2896
2897 pid_str = PyObject_Str(pid);
2898 if (pid_str == NULL)
2899 goto error;
2900
2901 /* XXX: Should it check whether the persistent id only contains
2902 ASCII characters? And what if the pid contains embedded
2903 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00002904 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002905 Py_DECREF(pid_str);
2906 if (pid_ascii_bytes == NULL)
2907 goto error;
2908
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002909 if (_Pickler_Write(self, &persid_op, 1) < 0 ||
2910 _Pickler_Write(self, pid_ascii_bytes, size) < 0 ||
2911 _Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002912 goto error;
2913 }
2914 status = 1;
2915 }
2916
2917 if (0) {
2918 error:
2919 status = -1;
2920 }
2921 Py_XDECREF(pid);
2922
2923 return status;
2924}
2925
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002926static PyObject *
2927get_class(PyObject *obj)
2928{
2929 PyObject *cls;
2930 static PyObject *str_class;
2931
2932 if (str_class == NULL) {
2933 str_class = PyUnicode_InternFromString("__class__");
2934 if (str_class == NULL)
2935 return NULL;
2936 }
2937 cls = PyObject_GetAttr(obj, str_class);
2938 if (cls == NULL) {
2939 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2940 PyErr_Clear();
2941 cls = (PyObject *) Py_TYPE(obj);
2942 Py_INCREF(cls);
2943 }
2944 }
2945 return cls;
2946}
2947
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002948/* We're saving obj, and args is the 2-thru-5 tuple returned by the
2949 * appropriate __reduce__ method for obj.
2950 */
2951static int
2952save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
2953{
2954 PyObject *callable;
2955 PyObject *argtup;
2956 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002957 PyObject *listitems = Py_None;
2958 PyObject *dictitems = Py_None;
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002959 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002960
2961 int use_newobj = self->proto >= 2;
2962
2963 const char reduce_op = REDUCE;
2964 const char build_op = BUILD;
2965 const char newobj_op = NEWOBJ;
2966
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002967 size = PyTuple_Size(args);
2968 if (size < 2 || size > 5) {
2969 PyErr_SetString(PicklingError, "tuple returned by "
2970 "__reduce__ must contain 2 through 5 elements");
2971 return -1;
2972 }
2973
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002974 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
2975 &callable, &argtup, &state, &listitems, &dictitems))
2976 return -1;
2977
2978 if (!PyCallable_Check(callable)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002979 PyErr_SetString(PicklingError, "first item of the tuple "
2980 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002981 return -1;
2982 }
2983 if (!PyTuple_Check(argtup)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002984 PyErr_SetString(PicklingError, "second item of the tuple "
2985 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002986 return -1;
2987 }
2988
2989 if (state == Py_None)
2990 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002991
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002992 if (listitems == Py_None)
2993 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002994 else if (!PyIter_Check(listitems)) {
Alexandre Vassalotti00d83f22013-04-14 01:28:01 -07002995 PyErr_Format(PicklingError, "fourth element of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002996 "returned by __reduce__ must be an iterator, not %s",
2997 Py_TYPE(listitems)->tp_name);
2998 return -1;
2999 }
3000
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003001 if (dictitems == Py_None)
3002 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003003 else if (!PyIter_Check(dictitems)) {
Alexandre Vassalotti00d83f22013-04-14 01:28:01 -07003004 PyErr_Format(PicklingError, "fifth element of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003005 "returned by __reduce__ must be an iterator, not %s",
3006 Py_TYPE(dictitems)->tp_name);
3007 return -1;
3008 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003009
3010 /* Protocol 2 special case: if callable's name is __newobj__, use
3011 NEWOBJ. */
3012 if (use_newobj) {
Victor Stinner804e05e2013-11-14 01:26:17 +01003013 static PyObject *newobj_str = NULL;
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003014 PyObject *name;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003015
3016 if (newobj_str == NULL) {
3017 newobj_str = PyUnicode_InternFromString("__newobj__");
Victor Stinner804e05e2013-11-14 01:26:17 +01003018 if (newobj_str == NULL)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003019 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003020 }
3021
Victor Stinner804e05e2013-11-14 01:26:17 +01003022 name = _PyObject_GetAttrId(callable, &PyId___name__);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003023 if (name == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003024 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3025 PyErr_Clear();
3026 else
3027 return -1;
3028 use_newobj = 0;
3029 }
3030 else {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003031 use_newobj = PyUnicode_Check(name) &&
3032 PyUnicode_Compare(name, newobj_str) == 0;
3033 Py_DECREF(name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003034 }
3035 }
3036 if (use_newobj) {
3037 PyObject *cls;
3038 PyObject *newargtup;
3039 PyObject *obj_class;
3040 int p;
3041
3042 /* Sanity checks. */
3043 if (Py_SIZE(argtup) < 1) {
3044 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
3045 return -1;
3046 }
3047
3048 cls = PyTuple_GET_ITEM(argtup, 0);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003049 if (!PyType_Check(cls)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003050 PyErr_SetString(PicklingError, "args[0] from "
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003051 "__newobj__ args is not a type");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003052 return -1;
3053 }
3054
3055 if (obj != NULL) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003056 obj_class = get_class(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003057 p = obj_class != cls; /* true iff a problem */
3058 Py_DECREF(obj_class);
3059 if (p) {
3060 PyErr_SetString(PicklingError, "args[0] from "
3061 "__newobj__ args has the wrong class");
3062 return -1;
3063 }
3064 }
3065 /* XXX: These calls save() are prone to infinite recursion. Imagine
3066 what happen if the value returned by the __reduce__() method of
3067 some extension type contains another object of the same type. Ouch!
3068
3069 Here is a quick example, that I ran into, to illustrate what I
3070 mean:
3071
3072 >>> import pickle, copyreg
3073 >>> copyreg.dispatch_table.pop(complex)
3074 >>> pickle.dumps(1+2j)
3075 Traceback (most recent call last):
3076 ...
3077 RuntimeError: maximum recursion depth exceeded
3078
3079 Removing the complex class from copyreg.dispatch_table made the
3080 __reduce_ex__() method emit another complex object:
3081
3082 >>> (1+1j).__reduce_ex__(2)
3083 (<function __newobj__ at 0xb7b71c3c>,
3084 (<class 'complex'>, (1+1j)), None, None, None)
3085
3086 Thus when save() was called on newargstup (the 2nd item) recursion
3087 ensued. Of course, the bug was in the complex class which had a
3088 broken __getnewargs__() that emitted another complex object. But,
3089 the point, here, is it is quite easy to end up with a broken reduce
3090 function. */
3091
3092 /* Save the class and its __new__ arguments. */
3093 if (save(self, cls, 0) < 0)
3094 return -1;
3095
3096 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
3097 if (newargtup == NULL)
3098 return -1;
3099
3100 p = save(self, newargtup, 0);
3101 Py_DECREF(newargtup);
3102 if (p < 0)
3103 return -1;
3104
3105 /* Add NEWOBJ opcode. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003106 if (_Pickler_Write(self, &newobj_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003107 return -1;
3108 }
3109 else { /* Not using NEWOBJ. */
3110 if (save(self, callable, 0) < 0 ||
3111 save(self, argtup, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003112 _Pickler_Write(self, &reduce_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003113 return -1;
3114 }
3115
3116 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
3117 the caller do not want to memoize the object. Not particularly useful,
3118 but that is to mimic the behavior save_reduce() in pickle.py when
3119 obj is None. */
3120 if (obj && memo_put(self, obj) < 0)
3121 return -1;
3122
3123 if (listitems && batch_list(self, listitems) < 0)
3124 return -1;
3125
3126 if (dictitems && batch_dict(self, dictitems) < 0)
3127 return -1;
3128
3129 if (state) {
Victor Stinner121aab42011-09-29 23:40:53 +02003130 if (save(self, state, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003131 _Pickler_Write(self, &build_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003132 return -1;
3133 }
3134
3135 return 0;
3136}
3137
3138static int
3139save(PicklerObject *self, PyObject *obj, int pers_save)
3140{
3141 PyTypeObject *type;
3142 PyObject *reduce_func = NULL;
3143 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003144 int status = 0;
3145
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003146 if (Py_EnterRecursiveCall(" while pickling an object"))
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003147 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003148
3149 /* The extra pers_save argument is necessary to avoid calling save_pers()
3150 on its returned object. */
3151 if (!pers_save && self->pers_func) {
3152 /* save_pers() returns:
3153 -1 to signal an error;
3154 0 if it did nothing successfully;
3155 1 if a persistent id was saved.
3156 */
3157 if ((status = save_pers(self, obj, self->pers_func)) != 0)
3158 goto done;
3159 }
3160
3161 type = Py_TYPE(obj);
3162
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003163 /* The old cPickle had an optimization that used switch-case statement
3164 dispatching on the first letter of the type name. This has was removed
3165 since benchmarks shown that this optimization was actually slowing
3166 things down. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003167
3168 /* Atom types; these aren't memoized, so don't check the memo. */
3169
3170 if (obj == Py_None) {
3171 status = save_none(self, obj);
3172 goto done;
3173 }
Łukasz Langaf3078fb2012-03-12 19:46:12 +01003174 else if (obj == Py_Ellipsis) {
3175 status = save_ellipsis(self, obj);
3176 goto done;
3177 }
3178 else if (obj == Py_NotImplemented) {
3179 status = save_notimplemented(self, obj);
3180 goto done;
3181 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003182 else if (obj == Py_False || obj == Py_True) {
3183 status = save_bool(self, obj);
3184 goto done;
3185 }
3186 else if (type == &PyLong_Type) {
3187 status = save_long(self, obj);
3188 goto done;
3189 }
3190 else if (type == &PyFloat_Type) {
3191 status = save_float(self, obj);
3192 goto done;
3193 }
3194
3195 /* Check the memo to see if it has the object. If so, generate
3196 a GET (or BINGET) opcode, instead of pickling the object
3197 once again. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003198 if (PyMemoTable_Get(self->memo, obj)) {
3199 if (memo_get(self, obj) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003200 goto error;
3201 goto done;
3202 }
3203
3204 if (type == &PyBytes_Type) {
3205 status = save_bytes(self, obj);
3206 goto done;
3207 }
3208 else if (type == &PyUnicode_Type) {
3209 status = save_unicode(self, obj);
3210 goto done;
3211 }
3212 else if (type == &PyDict_Type) {
3213 status = save_dict(self, obj);
3214 goto done;
3215 }
3216 else if (type == &PyList_Type) {
3217 status = save_list(self, obj);
3218 goto done;
3219 }
3220 else if (type == &PyTuple_Type) {
3221 status = save_tuple(self, obj);
3222 goto done;
3223 }
3224 else if (type == &PyType_Type) {
3225 status = save_global(self, obj, NULL);
3226 goto done;
3227 }
3228 else if (type == &PyFunction_Type) {
3229 status = save_global(self, obj, NULL);
3230 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
3231 /* fall back to reduce */
3232 PyErr_Clear();
3233 }
3234 else {
3235 goto done;
3236 }
3237 }
3238 else if (type == &PyCFunction_Type) {
3239 status = save_global(self, obj, NULL);
3240 goto done;
3241 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003242
3243 /* XXX: This part needs some unit tests. */
3244
3245 /* Get a reduction callable, and call it. This may come from
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003246 * self.dispatch_table, copyreg.dispatch_table, the object's
3247 * __reduce_ex__ method, or the object's __reduce__ method.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003248 */
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003249 if (self->dispatch_table == NULL) {
3250 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
3251 /* PyDict_GetItem() unlike PyObject_GetItem() and
3252 PyObject_GetAttr() returns a borrowed ref */
3253 Py_XINCREF(reduce_func);
3254 } else {
3255 reduce_func = PyObject_GetItem(self->dispatch_table, (PyObject *)type);
3256 if (reduce_func == NULL) {
3257 if (PyErr_ExceptionMatches(PyExc_KeyError))
3258 PyErr_Clear();
3259 else
3260 goto error;
3261 }
3262 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003263 if (reduce_func != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003264 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003265 reduce_value = _Pickler_FastCall(self, reduce_func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003266 }
Antoine Pitrouffd41d92011-10-04 09:23:04 +02003267 else if (PyType_IsSubtype(type, &PyType_Type)) {
3268 status = save_global(self, obj, NULL);
3269 goto done;
3270 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003271 else {
3272 static PyObject *reduce_str = NULL;
3273 static PyObject *reduce_ex_str = NULL;
3274
3275 /* Cache the name of the reduce methods. */
3276 if (reduce_str == NULL) {
3277 reduce_str = PyUnicode_InternFromString("__reduce__");
3278 if (reduce_str == NULL)
3279 goto error;
3280 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
3281 if (reduce_ex_str == NULL)
3282 goto error;
3283 }
3284
3285 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
3286 automatically defined as __reduce__. While this is convenient, this
3287 make it impossible to know which method was actually called. Of
3288 course, this is not a big deal. But still, it would be nice to let
3289 the user know which method was called when something go
3290 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
3291 don't actually have to check for a __reduce__ method. */
3292
3293 /* Check for a __reduce_ex__ method. */
3294 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
3295 if (reduce_func != NULL) {
3296 PyObject *proto;
3297 proto = PyLong_FromLong(self->proto);
3298 if (proto != NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003299 reduce_value = _Pickler_FastCall(self, reduce_func, proto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003300 }
3301 }
3302 else {
3303 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3304 PyErr_Clear();
3305 else
3306 goto error;
3307 /* Check for a __reduce__ method. */
3308 reduce_func = PyObject_GetAttr(obj, reduce_str);
3309 if (reduce_func != NULL) {
3310 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
3311 }
3312 else {
3313 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
3314 type->tp_name, obj);
3315 goto error;
3316 }
3317 }
3318 }
3319
3320 if (reduce_value == NULL)
3321 goto error;
3322
3323 if (PyUnicode_Check(reduce_value)) {
3324 status = save_global(self, obj, reduce_value);
3325 goto done;
3326 }
3327
3328 if (!PyTuple_Check(reduce_value)) {
3329 PyErr_SetString(PicklingError,
3330 "__reduce__ must return a string or tuple");
3331 goto error;
3332 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003333
3334 status = save_reduce(self, reduce_value, obj);
3335
3336 if (0) {
3337 error:
3338 status = -1;
3339 }
3340 done:
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003341 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003342 Py_XDECREF(reduce_func);
3343 Py_XDECREF(reduce_value);
3344
3345 return status;
3346}
3347
3348static int
3349dump(PicklerObject *self, PyObject *obj)
3350{
3351 const char stop_op = STOP;
3352
3353 if (self->proto >= 2) {
3354 char header[2];
3355
3356 header[0] = PROTO;
3357 assert(self->proto >= 0 && self->proto < 256);
3358 header[1] = (unsigned char)self->proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003359 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003360 return -1;
3361 }
3362
3363 if (save(self, obj, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003364 _Pickler_Write(self, &stop_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003365 return -1;
3366
3367 return 0;
3368}
3369
3370PyDoc_STRVAR(Pickler_clear_memo_doc,
3371"clear_memo() -> None. Clears the pickler's \"memo\"."
3372"\n"
3373"The memo is the data structure that remembers which objects the\n"
3374"pickler has already seen, so that shared or recursive objects are\n"
3375"pickled by reference and not by value. This method is useful when\n"
3376"re-using picklers.");
3377
3378static PyObject *
3379Pickler_clear_memo(PicklerObject *self)
3380{
3381 if (self->memo)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003382 PyMemoTable_Clear(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003383
3384 Py_RETURN_NONE;
3385}
3386
3387PyDoc_STRVAR(Pickler_dump_doc,
3388"dump(obj) -> None. Write a pickled representation of obj to the open file.");
3389
3390static PyObject *
3391Pickler_dump(PicklerObject *self, PyObject *args)
3392{
3393 PyObject *obj;
3394
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00003395 /* Check whether the Pickler was initialized correctly (issue3664).
3396 Developers often forget to call __init__() in their subclasses, which
3397 would trigger a segfault without this check. */
3398 if (self->write == NULL) {
Victor Stinner121aab42011-09-29 23:40:53 +02003399 PyErr_Format(PicklingError,
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00003400 "Pickler.__init__() was not called by %s.__init__()",
3401 Py_TYPE(self)->tp_name);
3402 return NULL;
3403 }
3404
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003405 if (!PyArg_ParseTuple(args, "O:dump", &obj))
3406 return NULL;
3407
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003408 if (_Pickler_ClearBuffer(self) < 0)
3409 return NULL;
3410
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003411 if (dump(self, obj) < 0)
3412 return NULL;
3413
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003414 if (_Pickler_FlushToFile(self) < 0)
3415 return NULL;
3416
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003417 Py_RETURN_NONE;
3418}
3419
3420static struct PyMethodDef Pickler_methods[] = {
3421 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
3422 Pickler_dump_doc},
3423 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
3424 Pickler_clear_memo_doc},
3425 {NULL, NULL} /* sentinel */
3426};
3427
3428static void
3429Pickler_dealloc(PicklerObject *self)
3430{
3431 PyObject_GC_UnTrack(self);
3432
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003433 Py_XDECREF(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003434 Py_XDECREF(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003435 Py_XDECREF(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003436 Py_XDECREF(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003437 Py_XDECREF(self->arg);
3438 Py_XDECREF(self->fast_memo);
3439
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003440 PyMemoTable_Del(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003441
3442 Py_TYPE(self)->tp_free((PyObject *)self);
3443}
3444
3445static int
3446Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
3447{
3448 Py_VISIT(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003449 Py_VISIT(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003450 Py_VISIT(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003451 Py_VISIT(self->arg);
3452 Py_VISIT(self->fast_memo);
3453 return 0;
3454}
3455
3456static int
3457Pickler_clear(PicklerObject *self)
3458{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003459 Py_CLEAR(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003460 Py_CLEAR(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003461 Py_CLEAR(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003462 Py_CLEAR(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003463 Py_CLEAR(self->arg);
3464 Py_CLEAR(self->fast_memo);
3465
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003466 if (self->memo != NULL) {
3467 PyMemoTable *memo = self->memo;
3468 self->memo = NULL;
3469 PyMemoTable_Del(memo);
3470 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003471 return 0;
3472}
3473
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003474
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003475PyDoc_STRVAR(Pickler_doc,
3476"Pickler(file, protocol=None)"
3477"\n"
3478"This takes a binary file for writing a pickle data stream.\n"
3479"\n"
3480"The optional protocol argument tells the pickler to use the\n"
3481"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
3482"protocol is 3; a backward-incompatible protocol designed for\n"
3483"Python 3.0.\n"
3484"\n"
3485"Specifying a negative protocol version selects the highest\n"
3486"protocol version supported. The higher the protocol used, the\n"
3487"more recent the version of Python needed to read the pickle\n"
3488"produced.\n"
3489"\n"
3490"The file argument must have a write() method that accepts a single\n"
3491"bytes argument. It can thus be a file object opened for binary\n"
3492"writing, a io.BytesIO instance, or any other custom object that\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003493"meets this interface.\n"
3494"\n"
3495"If fix_imports is True and protocol is less than 3, pickle will try to\n"
3496"map the new Python 3.x names to the old module names used in Python\n"
3497"2.x, so that the pickle data stream is readable with Python 2.x.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003498
3499static int
3500Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
3501{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003502 static char *kwlist[] = {"file", "protocol", "fix_imports", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003503 PyObject *file;
3504 PyObject *proto_obj = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003505 PyObject *fix_imports = Py_True;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02003506 _Py_IDENTIFIER(persistent_id);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003507 _Py_IDENTIFIER(dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003508
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003509 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:Pickler",
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003510 kwlist, &file, &proto_obj, &fix_imports))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003511 return -1;
3512
3513 /* In case of multiple __init__() calls, clear previous content. */
3514 if (self->write != NULL)
3515 (void)Pickler_clear(self);
3516
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003517 if (_Pickler_SetProtocol(self, proto_obj, fix_imports) < 0)
3518 return -1;
3519
3520 if (_Pickler_SetOutputStream(self, file) < 0)
3521 return -1;
3522
3523 /* memo and output_buffer may have already been created in _Pickler_New */
3524 if (self->memo == NULL) {
3525 self->memo = PyMemoTable_New();
3526 if (self->memo == NULL)
3527 return -1;
3528 }
3529 self->output_len = 0;
3530 if (self->output_buffer == NULL) {
3531 self->max_output_len = WRITE_BUF_SIZE;
3532 self->output_buffer = PyBytes_FromStringAndSize(NULL,
3533 self->max_output_len);
3534 if (self->output_buffer == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003535 return -1;
3536 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003537
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003538 self->arg = NULL;
3539 self->fast = 0;
3540 self->fast_nesting = 0;
3541 self->fast_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003542 self->pers_func = NULL;
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02003543 if (_PyObject_HasAttrId((PyObject *)self, &PyId_persistent_id)) {
3544 self->pers_func = _PyObject_GetAttrId((PyObject *)self,
3545 &PyId_persistent_id);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003546 if (self->pers_func == NULL)
3547 return -1;
3548 }
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003549 self->dispatch_table = NULL;
3550 if (_PyObject_HasAttrId((PyObject *)self, &PyId_dispatch_table)) {
3551 self->dispatch_table = _PyObject_GetAttrId((PyObject *)self,
3552 &PyId_dispatch_table);
3553 if (self->dispatch_table == NULL)
3554 return -1;
3555 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003556 return 0;
3557}
3558
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003559/* Define a proxy object for the Pickler's internal memo object. This is to
3560 * avoid breaking code like:
3561 * pickler.memo.clear()
3562 * and
3563 * pickler.memo = saved_memo
3564 * Is this a good idea? Not really, but we don't want to break code that uses
3565 * it. Note that we don't implement the entire mapping API here. This is
3566 * intentional, as these should be treated as black-box implementation details.
3567 */
3568
3569typedef struct {
3570 PyObject_HEAD
3571 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
3572} PicklerMemoProxyObject;
3573
3574PyDoc_STRVAR(pmp_clear_doc,
3575"memo.clear() -> None. Remove all items from memo.");
3576
3577static PyObject *
3578pmp_clear(PicklerMemoProxyObject *self)
3579{
3580 if (self->pickler->memo)
3581 PyMemoTable_Clear(self->pickler->memo);
3582 Py_RETURN_NONE;
3583}
3584
3585PyDoc_STRVAR(pmp_copy_doc,
3586"memo.copy() -> new_memo. Copy the memo to a new object.");
3587
3588static PyObject *
3589pmp_copy(PicklerMemoProxyObject *self)
3590{
3591 Py_ssize_t i;
3592 PyMemoTable *memo;
3593 PyObject *new_memo = PyDict_New();
3594 if (new_memo == NULL)
3595 return NULL;
3596
3597 memo = self->pickler->memo;
3598 for (i = 0; i < memo->mt_allocated; ++i) {
3599 PyMemoEntry entry = memo->mt_table[i];
3600 if (entry.me_key != NULL) {
3601 int status;
3602 PyObject *key, *value;
3603
3604 key = PyLong_FromVoidPtr(entry.me_key);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003605 value = Py_BuildValue("nO", entry.me_value, entry.me_key);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003606
3607 if (key == NULL || value == NULL) {
3608 Py_XDECREF(key);
3609 Py_XDECREF(value);
3610 goto error;
3611 }
3612 status = PyDict_SetItem(new_memo, key, value);
3613 Py_DECREF(key);
3614 Py_DECREF(value);
3615 if (status < 0)
3616 goto error;
3617 }
3618 }
3619 return new_memo;
3620
3621 error:
3622 Py_XDECREF(new_memo);
3623 return NULL;
3624}
3625
3626PyDoc_STRVAR(pmp_reduce_doc,
3627"memo.__reduce__(). Pickling support.");
3628
3629static PyObject *
3630pmp_reduce(PicklerMemoProxyObject *self, PyObject *args)
3631{
3632 PyObject *reduce_value, *dict_args;
3633 PyObject *contents = pmp_copy(self);
3634 if (contents == NULL)
3635 return NULL;
3636
3637 reduce_value = PyTuple_New(2);
3638 if (reduce_value == NULL) {
3639 Py_DECREF(contents);
3640 return NULL;
3641 }
3642 dict_args = PyTuple_New(1);
3643 if (dict_args == NULL) {
3644 Py_DECREF(contents);
3645 Py_DECREF(reduce_value);
3646 return NULL;
3647 }
3648 PyTuple_SET_ITEM(dict_args, 0, contents);
3649 Py_INCREF((PyObject *)&PyDict_Type);
3650 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
3651 PyTuple_SET_ITEM(reduce_value, 1, dict_args);
3652 return reduce_value;
3653}
3654
3655static PyMethodDef picklerproxy_methods[] = {
3656 {"clear", (PyCFunction)pmp_clear, METH_NOARGS, pmp_clear_doc},
3657 {"copy", (PyCFunction)pmp_copy, METH_NOARGS, pmp_copy_doc},
3658 {"__reduce__", (PyCFunction)pmp_reduce, METH_VARARGS, pmp_reduce_doc},
3659 {NULL, NULL} /* sentinel */
3660};
3661
3662static void
3663PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
3664{
3665 PyObject_GC_UnTrack(self);
3666 Py_XDECREF(self->pickler);
3667 PyObject_GC_Del((PyObject *)self);
3668}
3669
3670static int
3671PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
3672 visitproc visit, void *arg)
3673{
3674 Py_VISIT(self->pickler);
3675 return 0;
3676}
3677
3678static int
3679PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
3680{
3681 Py_CLEAR(self->pickler);
3682 return 0;
3683}
3684
3685static PyTypeObject PicklerMemoProxyType = {
3686 PyVarObject_HEAD_INIT(NULL, 0)
3687 "_pickle.PicklerMemoProxy", /*tp_name*/
3688 sizeof(PicklerMemoProxyObject), /*tp_basicsize*/
3689 0,
3690 (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */
3691 0, /* tp_print */
3692 0, /* tp_getattr */
3693 0, /* tp_setattr */
3694 0, /* tp_compare */
3695 0, /* tp_repr */
3696 0, /* tp_as_number */
3697 0, /* tp_as_sequence */
3698 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00003699 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003700 0, /* tp_call */
3701 0, /* tp_str */
3702 PyObject_GenericGetAttr, /* tp_getattro */
3703 PyObject_GenericSetAttr, /* tp_setattro */
3704 0, /* tp_as_buffer */
3705 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3706 0, /* tp_doc */
3707 (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */
3708 (inquiry)PicklerMemoProxy_clear, /* tp_clear */
3709 0, /* tp_richcompare */
3710 0, /* tp_weaklistoffset */
3711 0, /* tp_iter */
3712 0, /* tp_iternext */
3713 picklerproxy_methods, /* tp_methods */
3714};
3715
3716static PyObject *
3717PicklerMemoProxy_New(PicklerObject *pickler)
3718{
3719 PicklerMemoProxyObject *self;
3720
3721 self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
3722 if (self == NULL)
3723 return NULL;
3724 Py_INCREF(pickler);
3725 self->pickler = pickler;
3726 PyObject_GC_Track(self);
3727 return (PyObject *)self;
3728}
3729
3730/*****************************************************************************/
3731
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003732static PyObject *
3733Pickler_get_memo(PicklerObject *self)
3734{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003735 return PicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003736}
3737
3738static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003739Pickler_set_memo(PicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003740{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003741 PyMemoTable *new_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003742
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003743 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003744 PyErr_SetString(PyExc_TypeError,
3745 "attribute deletion is not supported");
3746 return -1;
3747 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003748
3749 if (Py_TYPE(obj) == &PicklerMemoProxyType) {
3750 PicklerObject *pickler =
3751 ((PicklerMemoProxyObject *)obj)->pickler;
3752
3753 new_memo = PyMemoTable_Copy(pickler->memo);
3754 if (new_memo == NULL)
3755 return -1;
3756 }
3757 else if (PyDict_Check(obj)) {
3758 Py_ssize_t i = 0;
3759 PyObject *key, *value;
3760
3761 new_memo = PyMemoTable_New();
3762 if (new_memo == NULL)
3763 return -1;
3764
3765 while (PyDict_Next(obj, &i, &key, &value)) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003766 Py_ssize_t memo_id;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003767 PyObject *memo_obj;
3768
3769 if (!PyTuple_Check(value) || Py_SIZE(value) != 2) {
3770 PyErr_SetString(PyExc_TypeError,
3771 "'memo' values must be 2-item tuples");
3772 goto error;
3773 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003774 memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003775 if (memo_id == -1 && PyErr_Occurred())
3776 goto error;
3777 memo_obj = PyTuple_GET_ITEM(value, 1);
3778 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
3779 goto error;
3780 }
3781 }
3782 else {
3783 PyErr_Format(PyExc_TypeError,
3784 "'memo' attribute must be an PicklerMemoProxy object"
3785 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003786 return -1;
3787 }
3788
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003789 PyMemoTable_Del(self->memo);
3790 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003791
3792 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003793
3794 error:
3795 if (new_memo)
3796 PyMemoTable_Del(new_memo);
3797 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003798}
3799
3800static PyObject *
3801Pickler_get_persid(PicklerObject *self)
3802{
3803 if (self->pers_func == NULL)
3804 PyErr_SetString(PyExc_AttributeError, "persistent_id");
3805 else
3806 Py_INCREF(self->pers_func);
3807 return self->pers_func;
3808}
3809
3810static int
3811Pickler_set_persid(PicklerObject *self, PyObject *value)
3812{
3813 PyObject *tmp;
3814
3815 if (value == NULL) {
3816 PyErr_SetString(PyExc_TypeError,
3817 "attribute deletion is not supported");
3818 return -1;
3819 }
3820 if (!PyCallable_Check(value)) {
3821 PyErr_SetString(PyExc_TypeError,
3822 "persistent_id must be a callable taking one argument");
3823 return -1;
3824 }
3825
3826 tmp = self->pers_func;
3827 Py_INCREF(value);
3828 self->pers_func = value;
3829 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
3830
3831 return 0;
3832}
3833
3834static PyMemberDef Pickler_members[] = {
3835 {"bin", T_INT, offsetof(PicklerObject, bin)},
3836 {"fast", T_INT, offsetof(PicklerObject, fast)},
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003837 {"dispatch_table", T_OBJECT_EX, offsetof(PicklerObject, dispatch_table)},
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003838 {NULL}
3839};
3840
3841static PyGetSetDef Pickler_getsets[] = {
3842 {"memo", (getter)Pickler_get_memo,
3843 (setter)Pickler_set_memo},
3844 {"persistent_id", (getter)Pickler_get_persid,
3845 (setter)Pickler_set_persid},
3846 {NULL}
3847};
3848
3849static PyTypeObject Pickler_Type = {
3850 PyVarObject_HEAD_INIT(NULL, 0)
3851 "_pickle.Pickler" , /*tp_name*/
3852 sizeof(PicklerObject), /*tp_basicsize*/
3853 0, /*tp_itemsize*/
3854 (destructor)Pickler_dealloc, /*tp_dealloc*/
3855 0, /*tp_print*/
3856 0, /*tp_getattr*/
3857 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00003858 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003859 0, /*tp_repr*/
3860 0, /*tp_as_number*/
3861 0, /*tp_as_sequence*/
3862 0, /*tp_as_mapping*/
3863 0, /*tp_hash*/
3864 0, /*tp_call*/
3865 0, /*tp_str*/
3866 0, /*tp_getattro*/
3867 0, /*tp_setattro*/
3868 0, /*tp_as_buffer*/
3869 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3870 Pickler_doc, /*tp_doc*/
3871 (traverseproc)Pickler_traverse, /*tp_traverse*/
3872 (inquiry)Pickler_clear, /*tp_clear*/
3873 0, /*tp_richcompare*/
3874 0, /*tp_weaklistoffset*/
3875 0, /*tp_iter*/
3876 0, /*tp_iternext*/
3877 Pickler_methods, /*tp_methods*/
3878 Pickler_members, /*tp_members*/
3879 Pickler_getsets, /*tp_getset*/
3880 0, /*tp_base*/
3881 0, /*tp_dict*/
3882 0, /*tp_descr_get*/
3883 0, /*tp_descr_set*/
3884 0, /*tp_dictoffset*/
3885 (initproc)Pickler_init, /*tp_init*/
3886 PyType_GenericAlloc, /*tp_alloc*/
3887 PyType_GenericNew, /*tp_new*/
3888 PyObject_GC_Del, /*tp_free*/
3889 0, /*tp_is_gc*/
3890};
3891
Victor Stinner121aab42011-09-29 23:40:53 +02003892/* Temporary helper for calling self.find_class().
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003893
3894 XXX: It would be nice to able to avoid Python function call overhead, by
3895 using directly the C version of find_class(), when find_class() is not
3896 overridden by a subclass. Although, this could become rather hackish. A
3897 simpler optimization would be to call the C function when self is not a
3898 subclass instance. */
3899static PyObject *
3900find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
3901{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02003902 _Py_IDENTIFIER(find_class);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02003903
3904 return _PyObject_CallMethodId((PyObject *)self, &PyId_find_class, "OO",
3905 module_name, global_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003906}
3907
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003908static Py_ssize_t
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003909marker(UnpicklerObject *self)
3910{
3911 if (self->num_marks < 1) {
3912 PyErr_SetString(UnpicklingError, "could not find MARK");
3913 return -1;
3914 }
3915
3916 return self->marks[--self->num_marks];
3917}
3918
3919static int
3920load_none(UnpicklerObject *self)
3921{
3922 PDATA_APPEND(self->stack, Py_None, -1);
3923 return 0;
3924}
3925
3926static int
3927bad_readline(void)
3928{
3929 PyErr_SetString(UnpicklingError, "pickle data was truncated");
3930 return -1;
3931}
3932
3933static int
3934load_int(UnpicklerObject *self)
3935{
3936 PyObject *value;
3937 char *endptr, *s;
3938 Py_ssize_t len;
3939 long x;
3940
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003941 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003942 return -1;
3943 if (len < 2)
3944 return bad_readline();
3945
3946 errno = 0;
Victor Stinner121aab42011-09-29 23:40:53 +02003947 /* XXX: Should the base argument of strtol() be explicitly set to 10?
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003948 XXX(avassalotti): Should this uses PyOS_strtol()? */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003949 x = strtol(s, &endptr, 0);
3950
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003951 if (errno || (*endptr != '\n' && *endptr != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003952 /* Hm, maybe we've got something long. Let's try reading
Serhiy Storchaka95949422013-08-27 19:40:23 +03003953 * it as a Python int object. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003954 errno = 0;
3955 /* XXX: Same thing about the base here. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003956 value = PyLong_FromString(s, NULL, 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003957 if (value == NULL) {
3958 PyErr_SetString(PyExc_ValueError,
3959 "could not convert string to int");
3960 return -1;
3961 }
3962 }
3963 else {
3964 if (len == 3 && (x == 0 || x == 1)) {
3965 if ((value = PyBool_FromLong(x)) == NULL)
3966 return -1;
3967 }
3968 else {
3969 if ((value = PyLong_FromLong(x)) == NULL)
3970 return -1;
3971 }
3972 }
3973
3974 PDATA_PUSH(self->stack, value, -1);
3975 return 0;
3976}
3977
3978static int
3979load_bool(UnpicklerObject *self, PyObject *boolean)
3980{
3981 assert(boolean == Py_True || boolean == Py_False);
3982 PDATA_APPEND(self->stack, boolean, -1);
3983 return 0;
3984}
3985
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003986/* s contains x bytes of an unsigned little-endian integer. Return its value
3987 * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
3988 */
3989static Py_ssize_t
3990calc_binsize(char *bytes, int size)
3991{
3992 unsigned char *s = (unsigned char *)bytes;
3993 size_t x = 0;
3994
3995 assert(size == 4);
3996
3997 x = (size_t) s[0];
3998 x |= (size_t) s[1] << 8;
3999 x |= (size_t) s[2] << 16;
4000 x |= (size_t) s[3] << 24;
4001
4002 if (x > PY_SSIZE_T_MAX)
4003 return -1;
4004 else
4005 return (Py_ssize_t) x;
4006}
4007
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004008/* s contains x bytes of a little-endian integer. Return its value as a
4009 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
4010 * int, but when x is 4 it's a signed one. This is an historical source
4011 * of x-platform bugs.
4012 */
4013static long
4014calc_binint(char *bytes, int size)
4015{
4016 unsigned char *s = (unsigned char *)bytes;
4017 int i = size;
4018 long x = 0;
4019
4020 for (i = 0; i < size; i++) {
4021 x |= (long)s[i] << (i * 8);
4022 }
4023
4024 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
4025 * is signed, so on a box with longs bigger than 4 bytes we need
4026 * to extend a BININT's sign bit to the full width.
4027 */
4028 if (SIZEOF_LONG > 4 && size == 4) {
4029 x |= -(x & (1L << 31));
4030 }
4031
4032 return x;
4033}
4034
4035static int
4036load_binintx(UnpicklerObject *self, char *s, int size)
4037{
4038 PyObject *value;
4039 long x;
4040
4041 x = calc_binint(s, size);
4042
4043 if ((value = PyLong_FromLong(x)) == NULL)
4044 return -1;
4045
4046 PDATA_PUSH(self->stack, value, -1);
4047 return 0;
4048}
4049
4050static int
4051load_binint(UnpicklerObject *self)
4052{
4053 char *s;
4054
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004055 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004056 return -1;
4057
4058 return load_binintx(self, s, 4);
4059}
4060
4061static int
4062load_binint1(UnpicklerObject *self)
4063{
4064 char *s;
4065
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004066 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004067 return -1;
4068
4069 return load_binintx(self, s, 1);
4070}
4071
4072static int
4073load_binint2(UnpicklerObject *self)
4074{
4075 char *s;
4076
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004077 if (_Unpickler_Read(self, &s, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004078 return -1;
4079
4080 return load_binintx(self, s, 2);
4081}
4082
4083static int
4084load_long(UnpicklerObject *self)
4085{
4086 PyObject *value;
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00004087 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004088 Py_ssize_t len;
4089
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004090 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004091 return -1;
4092 if (len < 2)
4093 return bad_readline();
4094
Mark Dickinson8dd05142009-01-20 20:43:58 +00004095 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
4096 the 'L' before calling PyLong_FromString. In order to maintain
4097 compatibility with Python 3.0.0, we don't actually *require*
4098 the 'L' to be present. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004099 if (s[len-2] == 'L')
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00004100 s[len-2] = '\0';
Alexandre Vassalottie4bccb72009-01-24 01:47:57 +00004101 /* XXX: Should the base argument explicitly set to 10? */
4102 value = PyLong_FromString(s, NULL, 0);
Mark Dickinson8dd05142009-01-20 20:43:58 +00004103 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004104 return -1;
4105
4106 PDATA_PUSH(self->stack, value, -1);
4107 return 0;
4108}
4109
4110/* 'size' bytes contain the # of bytes of little-endian 256's-complement
4111 * data following.
4112 */
4113static int
4114load_counted_long(UnpicklerObject *self, int size)
4115{
4116 PyObject *value;
4117 char *nbytes;
4118 char *pdata;
4119
4120 assert(size == 1 || size == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004121 if (_Unpickler_Read(self, &nbytes, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004122 return -1;
4123
4124 size = calc_binint(nbytes, size);
4125 if (size < 0) {
4126 /* Corrupt or hostile pickle -- we never write one like this */
4127 PyErr_SetString(UnpicklingError,
4128 "LONG pickle has negative byte count");
4129 return -1;
4130 }
4131
4132 if (size == 0)
4133 value = PyLong_FromLong(0L);
4134 else {
4135 /* Read the raw little-endian bytes and convert. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004136 if (_Unpickler_Read(self, &pdata, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004137 return -1;
4138 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
4139 1 /* little endian */ , 1 /* signed */ );
4140 }
4141 if (value == NULL)
4142 return -1;
4143 PDATA_PUSH(self->stack, value, -1);
4144 return 0;
4145}
4146
4147static int
4148load_float(UnpicklerObject *self)
4149{
4150 PyObject *value;
4151 char *endptr, *s;
4152 Py_ssize_t len;
4153 double d;
4154
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004155 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004156 return -1;
4157 if (len < 2)
4158 return bad_readline();
4159
4160 errno = 0;
Mark Dickinson725bfd82009-05-03 20:33:40 +00004161 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
4162 if (d == -1.0 && PyErr_Occurred())
4163 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004164 if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004165 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
4166 return -1;
4167 }
Mark Dickinson725bfd82009-05-03 20:33:40 +00004168 value = PyFloat_FromDouble(d);
4169 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004170 return -1;
4171
4172 PDATA_PUSH(self->stack, value, -1);
4173 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004174}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004175
4176static int
4177load_binfloat(UnpicklerObject *self)
4178{
4179 PyObject *value;
4180 double x;
4181 char *s;
4182
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004183 if (_Unpickler_Read(self, &s, 8) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004184 return -1;
4185
4186 x = _PyFloat_Unpack8((unsigned char *)s, 0);
4187 if (x == -1.0 && PyErr_Occurred())
4188 return -1;
4189
4190 if ((value = PyFloat_FromDouble(x)) == NULL)
4191 return -1;
4192
4193 PDATA_PUSH(self->stack, value, -1);
4194 return 0;
4195}
4196
4197static int
4198load_string(UnpicklerObject *self)
4199{
4200 PyObject *bytes;
4201 PyObject *str = NULL;
4202 Py_ssize_t len;
4203 char *s, *p;
4204
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004205 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004206 return -1;
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07004207 /* Strip the newline */
4208 len--;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004209 /* Strip outermost quotes */
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07004210 if (len >= 2 && s[0] == s[len - 1] && (s[0] == '\'' || s[0] == '"')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004211 p = s + 1;
4212 len -= 2;
4213 }
4214 else {
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07004215 PyErr_SetString(UnpicklingError,
4216 "the STRING opcode argument must be quoted");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004217 return -1;
4218 }
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07004219 assert(len >= 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004220
4221 /* Use the PyBytes API to decode the string, since that is what is used
4222 to encode, and then coerce the result to Unicode. */
4223 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004224 if (bytes == NULL)
4225 return -1;
4226 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
4227 Py_DECREF(bytes);
4228 if (str == NULL)
4229 return -1;
4230
4231 PDATA_PUSH(self->stack, str, -1);
4232 return 0;
4233}
4234
4235static int
4236load_binbytes(UnpicklerObject *self)
4237{
4238 PyObject *bytes;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004239 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004240 char *s;
4241
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004242 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004243 return -1;
4244
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004245 x = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004246 if (x < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004247 PyErr_Format(PyExc_OverflowError,
4248 "BINBYTES exceeds system's maximum size of %zd bytes",
Alexandre Vassalotticc757172013-04-14 02:25:10 -07004249 PY_SSIZE_T_MAX);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004250 return -1;
4251 }
4252
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004253 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004254 return -1;
4255 bytes = PyBytes_FromStringAndSize(s, x);
4256 if (bytes == NULL)
4257 return -1;
4258
4259 PDATA_PUSH(self->stack, bytes, -1);
4260 return 0;
4261}
4262
4263static int
4264load_short_binbytes(UnpicklerObject *self)
4265{
4266 PyObject *bytes;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004267 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004268 char *s;
4269
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004270 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004271 return -1;
4272
4273 x = (unsigned char)s[0];
4274
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004275 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004276 return -1;
4277
4278 bytes = PyBytes_FromStringAndSize(s, x);
4279 if (bytes == NULL)
4280 return -1;
4281
4282 PDATA_PUSH(self->stack, bytes, -1);
4283 return 0;
4284}
4285
4286static int
4287load_binstring(UnpicklerObject *self)
4288{
4289 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004290 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004291 char *s;
4292
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004293 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004294 return -1;
4295
4296 x = calc_binint(s, 4);
4297 if (x < 0) {
Victor Stinner121aab42011-09-29 23:40:53 +02004298 PyErr_SetString(UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004299 "BINSTRING pickle has negative byte count");
4300 return -1;
4301 }
4302
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004303 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004304 return -1;
4305
4306 /* Convert Python 2.x strings to unicode. */
4307 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4308 if (str == NULL)
4309 return -1;
4310
4311 PDATA_PUSH(self->stack, str, -1);
4312 return 0;
4313}
4314
4315static int
4316load_short_binstring(UnpicklerObject *self)
4317{
4318 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004319 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004320 char *s;
4321
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004322 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004323 return -1;
4324
4325 x = (unsigned char)s[0];
4326
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004327 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004328 return -1;
4329
4330 /* Convert Python 2.x strings to unicode. */
4331 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4332 if (str == NULL)
4333 return -1;
4334
4335 PDATA_PUSH(self->stack, str, -1);
4336 return 0;
4337}
4338
4339static int
4340load_unicode(UnpicklerObject *self)
4341{
4342 PyObject *str;
4343 Py_ssize_t len;
4344 char *s;
4345
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004346 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004347 return -1;
4348 if (len < 1)
4349 return bad_readline();
4350
4351 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
4352 if (str == NULL)
4353 return -1;
4354
4355 PDATA_PUSH(self->stack, str, -1);
4356 return 0;
4357}
4358
4359static int
4360load_binunicode(UnpicklerObject *self)
4361{
4362 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004363 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004364 char *s;
4365
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004366 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004367 return -1;
4368
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004369 size = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004370 if (size < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004371 PyErr_Format(PyExc_OverflowError,
4372 "BINUNICODE exceeds system's maximum size of %zd bytes",
Alexandre Vassalotticc757172013-04-14 02:25:10 -07004373 PY_SSIZE_T_MAX);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004374 return -1;
4375 }
4376
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004377
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004378 if (_Unpickler_Read(self, &s, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004379 return -1;
4380
Victor Stinner485fb562010-04-13 11:07:24 +00004381 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004382 if (str == NULL)
4383 return -1;
4384
4385 PDATA_PUSH(self->stack, str, -1);
4386 return 0;
4387}
4388
4389static int
4390load_tuple(UnpicklerObject *self)
4391{
4392 PyObject *tuple;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004393 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004394
4395 if ((i = marker(self)) < 0)
4396 return -1;
4397
4398 tuple = Pdata_poptuple(self->stack, i);
4399 if (tuple == NULL)
4400 return -1;
4401 PDATA_PUSH(self->stack, tuple, -1);
4402 return 0;
4403}
4404
4405static int
4406load_counted_tuple(UnpicklerObject *self, int len)
4407{
4408 PyObject *tuple;
4409
4410 tuple = PyTuple_New(len);
4411 if (tuple == NULL)
4412 return -1;
4413
4414 while (--len >= 0) {
4415 PyObject *item;
4416
4417 PDATA_POP(self->stack, item);
4418 if (item == NULL)
4419 return -1;
4420 PyTuple_SET_ITEM(tuple, len, item);
4421 }
4422 PDATA_PUSH(self->stack, tuple, -1);
4423 return 0;
4424}
4425
4426static int
4427load_empty_list(UnpicklerObject *self)
4428{
4429 PyObject *list;
4430
4431 if ((list = PyList_New(0)) == NULL)
4432 return -1;
4433 PDATA_PUSH(self->stack, list, -1);
4434 return 0;
4435}
4436
4437static int
4438load_empty_dict(UnpicklerObject *self)
4439{
4440 PyObject *dict;
4441
4442 if ((dict = PyDict_New()) == NULL)
4443 return -1;
4444 PDATA_PUSH(self->stack, dict, -1);
4445 return 0;
4446}
4447
4448static int
4449load_list(UnpicklerObject *self)
4450{
4451 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004452 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004453
4454 if ((i = marker(self)) < 0)
4455 return -1;
4456
4457 list = Pdata_poplist(self->stack, i);
4458 if (list == NULL)
4459 return -1;
4460 PDATA_PUSH(self->stack, list, -1);
4461 return 0;
4462}
4463
4464static int
4465load_dict(UnpicklerObject *self)
4466{
4467 PyObject *dict, *key, *value;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004468 Py_ssize_t i, j, k;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004469
4470 if ((i = marker(self)) < 0)
4471 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004472 j = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004473
4474 if ((dict = PyDict_New()) == NULL)
4475 return -1;
4476
4477 for (k = i + 1; k < j; k += 2) {
4478 key = self->stack->data[k - 1];
4479 value = self->stack->data[k];
4480 if (PyDict_SetItem(dict, key, value) < 0) {
4481 Py_DECREF(dict);
4482 return -1;
4483 }
4484 }
4485 Pdata_clear(self->stack, i);
4486 PDATA_PUSH(self->stack, dict, -1);
4487 return 0;
4488}
4489
4490static PyObject *
4491instantiate(PyObject *cls, PyObject *args)
4492{
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004493 PyObject *result = NULL;
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02004494 _Py_IDENTIFIER(__getinitargs__);
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004495 /* Caller must assure args are a tuple. Normally, args come from
4496 Pdata_poptuple which packs objects from the top of the stack
4497 into a newly created tuple. */
4498 assert(PyTuple_Check(args));
4499 if (Py_SIZE(args) > 0 || !PyType_Check(cls) ||
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02004500 _PyObject_HasAttrId(cls, &PyId___getinitargs__)) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004501 result = PyObject_CallObject(cls, args);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004502 }
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004503 else {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02004504 _Py_IDENTIFIER(__new__);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02004505
4506 result = _PyObject_CallMethodId(cls, &PyId___new__, "O", cls);
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004507 }
4508 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004509}
4510
4511static int
4512load_obj(UnpicklerObject *self)
4513{
4514 PyObject *cls, *args, *obj = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004515 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004516
4517 if ((i = marker(self)) < 0)
4518 return -1;
4519
4520 args = Pdata_poptuple(self->stack, i + 1);
4521 if (args == NULL)
4522 return -1;
4523
4524 PDATA_POP(self->stack, cls);
4525 if (cls) {
4526 obj = instantiate(cls, args);
4527 Py_DECREF(cls);
4528 }
4529 Py_DECREF(args);
4530 if (obj == NULL)
4531 return -1;
4532
4533 PDATA_PUSH(self->stack, obj, -1);
4534 return 0;
4535}
4536
4537static int
4538load_inst(UnpicklerObject *self)
4539{
4540 PyObject *cls = NULL;
4541 PyObject *args = NULL;
4542 PyObject *obj = NULL;
4543 PyObject *module_name;
4544 PyObject *class_name;
4545 Py_ssize_t len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004546 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004547 char *s;
4548
4549 if ((i = marker(self)) < 0)
4550 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004551 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004552 return -1;
4553 if (len < 2)
4554 return bad_readline();
4555
4556 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
4557 identifiers are permitted in Python 3.0, since the INST opcode is only
4558 supported by older protocols on Python 2.x. */
4559 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
4560 if (module_name == NULL)
4561 return -1;
4562
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004563 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004564 if (len < 2)
4565 return bad_readline();
4566 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004567 if (class_name != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004568 cls = find_class(self, module_name, class_name);
4569 Py_DECREF(class_name);
4570 }
4571 }
4572 Py_DECREF(module_name);
4573
4574 if (cls == NULL)
4575 return -1;
4576
4577 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
4578 obj = instantiate(cls, args);
4579 Py_DECREF(args);
4580 }
4581 Py_DECREF(cls);
4582
4583 if (obj == NULL)
4584 return -1;
4585
4586 PDATA_PUSH(self->stack, obj, -1);
4587 return 0;
4588}
4589
4590static int
4591load_newobj(UnpicklerObject *self)
4592{
4593 PyObject *args = NULL;
4594 PyObject *clsraw = NULL;
4595 PyTypeObject *cls; /* clsraw cast to its true type */
4596 PyObject *obj;
4597
4598 /* Stack is ... cls argtuple, and we want to call
4599 * cls.__new__(cls, *argtuple).
4600 */
4601 PDATA_POP(self->stack, args);
4602 if (args == NULL)
4603 goto error;
4604 if (!PyTuple_Check(args)) {
4605 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
4606 goto error;
4607 }
4608
4609 PDATA_POP(self->stack, clsraw);
4610 cls = (PyTypeObject *)clsraw;
4611 if (cls == NULL)
4612 goto error;
4613 if (!PyType_Check(cls)) {
4614 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4615 "isn't a type object");
4616 goto error;
4617 }
4618 if (cls->tp_new == NULL) {
4619 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4620 "has NULL tp_new");
4621 goto error;
4622 }
4623
4624 /* Call __new__. */
4625 obj = cls->tp_new(cls, args, NULL);
4626 if (obj == NULL)
4627 goto error;
4628
4629 Py_DECREF(args);
4630 Py_DECREF(clsraw);
4631 PDATA_PUSH(self->stack, obj, -1);
4632 return 0;
4633
4634 error:
4635 Py_XDECREF(args);
4636 Py_XDECREF(clsraw);
4637 return -1;
4638}
4639
4640static int
4641load_global(UnpicklerObject *self)
4642{
4643 PyObject *global = NULL;
4644 PyObject *module_name;
4645 PyObject *global_name;
4646 Py_ssize_t len;
4647 char *s;
4648
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004649 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004650 return -1;
4651 if (len < 2)
4652 return bad_readline();
4653 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4654 if (!module_name)
4655 return -1;
4656
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004657 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004658 if (len < 2) {
4659 Py_DECREF(module_name);
4660 return bad_readline();
4661 }
4662 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4663 if (global_name) {
4664 global = find_class(self, module_name, global_name);
4665 Py_DECREF(global_name);
4666 }
4667 }
4668 Py_DECREF(module_name);
4669
4670 if (global == NULL)
4671 return -1;
4672 PDATA_PUSH(self->stack, global, -1);
4673 return 0;
4674}
4675
4676static int
4677load_persid(UnpicklerObject *self)
4678{
4679 PyObject *pid;
4680 Py_ssize_t len;
4681 char *s;
4682
4683 if (self->pers_func) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004684 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004685 return -1;
4686 if (len < 2)
4687 return bad_readline();
4688
4689 pid = PyBytes_FromStringAndSize(s, len - 1);
4690 if (pid == NULL)
4691 return -1;
4692
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004693 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004694 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004695 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004696 if (pid == NULL)
4697 return -1;
4698
4699 PDATA_PUSH(self->stack, pid, -1);
4700 return 0;
4701 }
4702 else {
4703 PyErr_SetString(UnpicklingError,
4704 "A load persistent id instruction was encountered,\n"
4705 "but no persistent_load function was specified.");
4706 return -1;
4707 }
4708}
4709
4710static int
4711load_binpersid(UnpicklerObject *self)
4712{
4713 PyObject *pid;
4714
4715 if (self->pers_func) {
4716 PDATA_POP(self->stack, pid);
4717 if (pid == NULL)
4718 return -1;
4719
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004720 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004721 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004722 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004723 if (pid == NULL)
4724 return -1;
4725
4726 PDATA_PUSH(self->stack, pid, -1);
4727 return 0;
4728 }
4729 else {
4730 PyErr_SetString(UnpicklingError,
4731 "A load persistent id instruction was encountered,\n"
4732 "but no persistent_load function was specified.");
4733 return -1;
4734 }
4735}
4736
4737static int
4738load_pop(UnpicklerObject *self)
4739{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004740 Py_ssize_t len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004741
4742 /* Note that we split the (pickle.py) stack into two stacks,
4743 * an object stack and a mark stack. We have to be clever and
4744 * pop the right one. We do this by looking at the top of the
Collin Winter8ca69de2009-05-26 16:53:41 +00004745 * mark stack first, and only signalling a stack underflow if
4746 * the object stack is empty and the mark stack doesn't match
4747 * our expectations.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004748 */
Collin Winter8ca69de2009-05-26 16:53:41 +00004749 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004750 self->num_marks--;
Antoine Pitrou01a15ea2010-01-07 17:57:31 +00004751 } else if (len > 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004752 len--;
4753 Py_DECREF(self->stack->data[len]);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004754 Py_SIZE(self->stack) = len;
Collin Winter8ca69de2009-05-26 16:53:41 +00004755 } else {
4756 return stack_underflow();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004757 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004758 return 0;
4759}
4760
4761static int
4762load_pop_mark(UnpicklerObject *self)
4763{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004764 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004765
4766 if ((i = marker(self)) < 0)
4767 return -1;
4768
4769 Pdata_clear(self->stack, i);
4770
4771 return 0;
4772}
4773
4774static int
4775load_dup(UnpicklerObject *self)
4776{
4777 PyObject *last;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004778 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004779
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004780 if ((len = Py_SIZE(self->stack)) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004781 return stack_underflow();
4782 last = self->stack->data[len - 1];
4783 PDATA_APPEND(self->stack, last, -1);
4784 return 0;
4785}
4786
4787static int
4788load_get(UnpicklerObject *self)
4789{
4790 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004791 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004792 Py_ssize_t len;
4793 char *s;
4794
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004795 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004796 return -1;
4797 if (len < 2)
4798 return bad_readline();
4799
4800 key = PyLong_FromString(s, NULL, 10);
4801 if (key == NULL)
4802 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004803 idx = PyLong_AsSsize_t(key);
4804 if (idx == -1 && PyErr_Occurred()) {
4805 Py_DECREF(key);
4806 return -1;
4807 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004808
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004809 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004810 if (value == NULL) {
4811 if (!PyErr_Occurred())
4812 PyErr_SetObject(PyExc_KeyError, key);
4813 Py_DECREF(key);
4814 return -1;
4815 }
4816 Py_DECREF(key);
4817
4818 PDATA_APPEND(self->stack, value, -1);
4819 return 0;
4820}
4821
4822static int
4823load_binget(UnpicklerObject *self)
4824{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004825 PyObject *value;
4826 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004827 char *s;
4828
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004829 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004830 return -1;
4831
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004832 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004833
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004834 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004835 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004836 PyObject *key = PyLong_FromSsize_t(idx);
Christian Heimes9ee5c372013-07-26 22:45:00 +02004837 if (key != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004838 PyErr_SetObject(PyExc_KeyError, key);
Christian Heimes9ee5c372013-07-26 22:45:00 +02004839 Py_DECREF(key);
4840 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004841 return -1;
4842 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004843
4844 PDATA_APPEND(self->stack, value, -1);
4845 return 0;
4846}
4847
4848static int
4849load_long_binget(UnpicklerObject *self)
4850{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004851 PyObject *value;
4852 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004853 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004854
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004855 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004856 return -1;
4857
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004858 idx = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004859
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004860 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004861 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004862 PyObject *key = PyLong_FromSsize_t(idx);
Christian Heimes9ee5c372013-07-26 22:45:00 +02004863 if (key != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004864 PyErr_SetObject(PyExc_KeyError, key);
Christian Heimes9ee5c372013-07-26 22:45:00 +02004865 Py_DECREF(key);
4866 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004867 return -1;
4868 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004869
4870 PDATA_APPEND(self->stack, value, -1);
4871 return 0;
4872}
4873
4874/* Push an object from the extension registry (EXT[124]). nbytes is
4875 * the number of bytes following the opcode, holding the index (code) value.
4876 */
4877static int
4878load_extension(UnpicklerObject *self, int nbytes)
4879{
4880 char *codebytes; /* the nbytes bytes after the opcode */
4881 long code; /* calc_binint returns long */
4882 PyObject *py_code; /* code as a Python int */
4883 PyObject *obj; /* the object to push */
4884 PyObject *pair; /* (module_name, class_name) */
4885 PyObject *module_name, *class_name;
4886
4887 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004888 if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004889 return -1;
4890 code = calc_binint(codebytes, nbytes);
4891 if (code <= 0) { /* note that 0 is forbidden */
4892 /* Corrupt or hostile pickle. */
4893 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
4894 return -1;
4895 }
4896
4897 /* Look for the code in the cache. */
4898 py_code = PyLong_FromLong(code);
4899 if (py_code == NULL)
4900 return -1;
4901 obj = PyDict_GetItem(extension_cache, py_code);
4902 if (obj != NULL) {
4903 /* Bingo. */
4904 Py_DECREF(py_code);
4905 PDATA_APPEND(self->stack, obj, -1);
4906 return 0;
4907 }
4908
4909 /* Look up the (module_name, class_name) pair. */
4910 pair = PyDict_GetItem(inverted_registry, py_code);
4911 if (pair == NULL) {
4912 Py_DECREF(py_code);
4913 PyErr_Format(PyExc_ValueError, "unregistered extension "
4914 "code %ld", code);
4915 return -1;
4916 }
4917 /* Since the extension registry is manipulable via Python code,
4918 * confirm that pair is really a 2-tuple of strings.
4919 */
4920 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
4921 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
4922 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
4923 Py_DECREF(py_code);
4924 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
4925 "isn't a 2-tuple of strings", code);
4926 return -1;
4927 }
4928 /* Load the object. */
4929 obj = find_class(self, module_name, class_name);
4930 if (obj == NULL) {
4931 Py_DECREF(py_code);
4932 return -1;
4933 }
4934 /* Cache code -> obj. */
4935 code = PyDict_SetItem(extension_cache, py_code, obj);
4936 Py_DECREF(py_code);
4937 if (code < 0) {
4938 Py_DECREF(obj);
4939 return -1;
4940 }
4941 PDATA_PUSH(self->stack, obj, -1);
4942 return 0;
4943}
4944
4945static int
4946load_put(UnpicklerObject *self)
4947{
4948 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004949 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004950 Py_ssize_t len;
4951 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004952
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004953 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004954 return -1;
4955 if (len < 2)
4956 return bad_readline();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004957 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004958 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004959 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004960
4961 key = PyLong_FromString(s, NULL, 10);
4962 if (key == NULL)
4963 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004964 idx = PyLong_AsSsize_t(key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004965 Py_DECREF(key);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004966 if (idx < 0) {
4967 if (!PyErr_Occurred())
4968 PyErr_SetString(PyExc_ValueError,
4969 "negative PUT argument");
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004970 return -1;
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004971 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004972
4973 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004974}
4975
4976static int
4977load_binput(UnpicklerObject *self)
4978{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004979 PyObject *value;
4980 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004981 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004982
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004983 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004984 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004985
4986 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004987 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004988 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004989
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004990 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004991
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004992 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004993}
4994
4995static int
4996load_long_binput(UnpicklerObject *self)
4997{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004998 PyObject *value;
4999 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005000 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005001
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005002 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005003 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005004
5005 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005006 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005007 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005008
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005009 idx = calc_binsize(s, 4);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02005010 if (idx < 0) {
5011 PyErr_SetString(PyExc_ValueError,
5012 "negative LONG_BINPUT argument");
5013 return -1;
5014 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005015
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005016 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005017}
5018
5019static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005020do_append(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005021{
5022 PyObject *value;
5023 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005024 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005025
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005026 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005027 if (x > len || x <= 0)
5028 return stack_underflow();
5029 if (len == x) /* nothing to do */
5030 return 0;
5031
5032 list = self->stack->data[x - 1];
5033
5034 if (PyList_Check(list)) {
5035 PyObject *slice;
5036 Py_ssize_t list_len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005037 int ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005038
5039 slice = Pdata_poplist(self->stack, x);
5040 if (!slice)
5041 return -1;
5042 list_len = PyList_GET_SIZE(list);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005043 ret = PyList_SetSlice(list, list_len, list_len, slice);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005044 Py_DECREF(slice);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005045 return ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005046 }
5047 else {
5048 PyObject *append_func;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005049 _Py_IDENTIFIER(append);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005050
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005051 append_func = _PyObject_GetAttrId(list, &PyId_append);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005052 if (append_func == NULL)
5053 return -1;
5054 for (i = x; i < len; i++) {
5055 PyObject *result;
5056
5057 value = self->stack->data[i];
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005058 result = _Unpickler_FastCall(self, append_func, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005059 if (result == NULL) {
5060 Pdata_clear(self->stack, i + 1);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005061 Py_SIZE(self->stack) = x;
Alexandre Vassalotti637c7c42013-04-20 21:28:21 -07005062 Py_DECREF(append_func);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005063 return -1;
5064 }
5065 Py_DECREF(result);
5066 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005067 Py_SIZE(self->stack) = x;
Alexandre Vassalotti637c7c42013-04-20 21:28:21 -07005068 Py_DECREF(append_func);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005069 }
5070
5071 return 0;
5072}
5073
5074static int
5075load_append(UnpicklerObject *self)
5076{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005077 return do_append(self, Py_SIZE(self->stack) - 1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005078}
5079
5080static int
5081load_appends(UnpicklerObject *self)
5082{
5083 return do_append(self, marker(self));
5084}
5085
5086static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005087do_setitems(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005088{
5089 PyObject *value, *key;
5090 PyObject *dict;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005091 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005092 int status = 0;
5093
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005094 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005095 if (x > len || x <= 0)
5096 return stack_underflow();
5097 if (len == x) /* nothing to do */
5098 return 0;
Victor Stinner121aab42011-09-29 23:40:53 +02005099 if ((len - x) % 2 != 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005100 /* Currupt or hostile pickle -- we never write one like this. */
5101 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
5102 return -1;
5103 }
5104
5105 /* Here, dict does not actually need to be a PyDict; it could be anything
5106 that supports the __setitem__ attribute. */
5107 dict = self->stack->data[x - 1];
5108
5109 for (i = x + 1; i < len; i += 2) {
5110 key = self->stack->data[i - 1];
5111 value = self->stack->data[i];
5112 if (PyObject_SetItem(dict, key, value) < 0) {
5113 status = -1;
5114 break;
5115 }
5116 }
5117
5118 Pdata_clear(self->stack, x);
5119 return status;
5120}
5121
5122static int
5123load_setitem(UnpicklerObject *self)
5124{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005125 return do_setitems(self, Py_SIZE(self->stack) - 2);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005126}
5127
5128static int
5129load_setitems(UnpicklerObject *self)
5130{
5131 return do_setitems(self, marker(self));
5132}
5133
5134static int
5135load_build(UnpicklerObject *self)
5136{
5137 PyObject *state, *inst, *slotstate;
5138 PyObject *setstate;
5139 int status = 0;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005140 _Py_IDENTIFIER(__setstate__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005141
5142 /* Stack is ... instance, state. We want to leave instance at
5143 * the stack top, possibly mutated via instance.__setstate__(state).
5144 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005145 if (Py_SIZE(self->stack) < 2)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005146 return stack_underflow();
5147
5148 PDATA_POP(self->stack, state);
5149 if (state == NULL)
5150 return -1;
5151
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005152 inst = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005153
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005154 setstate = _PyObject_GetAttrId(inst, &PyId___setstate__);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005155 if (setstate == NULL) {
5156 if (PyErr_ExceptionMatches(PyExc_AttributeError))
5157 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00005158 else {
5159 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005160 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00005161 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005162 }
5163 else {
5164 PyObject *result;
5165
5166 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005167 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Antoine Pitroud79dc622008-09-05 00:03:33 +00005168 reference to state first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005169 result = _Unpickler_FastCall(self, setstate, state);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005170 Py_DECREF(setstate);
5171 if (result == NULL)
5172 return -1;
5173 Py_DECREF(result);
5174 return 0;
5175 }
5176
5177 /* A default __setstate__. First see whether state embeds a
5178 * slot state dict too (a proto 2 addition).
5179 */
5180 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
5181 PyObject *tmp = state;
5182
5183 state = PyTuple_GET_ITEM(tmp, 0);
5184 slotstate = PyTuple_GET_ITEM(tmp, 1);
5185 Py_INCREF(state);
5186 Py_INCREF(slotstate);
5187 Py_DECREF(tmp);
5188 }
5189 else
5190 slotstate = NULL;
5191
5192 /* Set inst.__dict__ from the state dict (if any). */
5193 if (state != Py_None) {
5194 PyObject *dict;
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005195 PyObject *d_key, *d_value;
5196 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005197 _Py_IDENTIFIER(__dict__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005198
5199 if (!PyDict_Check(state)) {
5200 PyErr_SetString(UnpicklingError, "state is not a dictionary");
5201 goto error;
5202 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005203 dict = _PyObject_GetAttrId(inst, &PyId___dict__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005204 if (dict == NULL)
5205 goto error;
5206
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005207 i = 0;
5208 while (PyDict_Next(state, &i, &d_key, &d_value)) {
5209 /* normally the keys for instance attributes are
5210 interned. we should try to do that here. */
5211 Py_INCREF(d_key);
5212 if (PyUnicode_CheckExact(d_key))
5213 PyUnicode_InternInPlace(&d_key);
5214 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
5215 Py_DECREF(d_key);
5216 goto error;
5217 }
5218 Py_DECREF(d_key);
5219 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005220 Py_DECREF(dict);
5221 }
5222
5223 /* Also set instance attributes from the slotstate dict (if any). */
5224 if (slotstate != NULL) {
5225 PyObject *d_key, *d_value;
5226 Py_ssize_t i;
5227
5228 if (!PyDict_Check(slotstate)) {
5229 PyErr_SetString(UnpicklingError,
5230 "slot state is not a dictionary");
5231 goto error;
5232 }
5233 i = 0;
5234 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
5235 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
5236 goto error;
5237 }
5238 }
5239
5240 if (0) {
5241 error:
5242 status = -1;
5243 }
5244
5245 Py_DECREF(state);
5246 Py_XDECREF(slotstate);
5247 return status;
5248}
5249
5250static int
5251load_mark(UnpicklerObject *self)
5252{
5253
5254 /* Note that we split the (pickle.py) stack into two stacks, an
5255 * object stack and a mark stack. Here we push a mark onto the
5256 * mark stack.
5257 */
5258
5259 if ((self->num_marks + 1) >= self->marks_size) {
5260 size_t alloc;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005261 Py_ssize_t *marks;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005262
5263 /* Use the size_t type to check for overflow. */
5264 alloc = ((size_t)self->num_marks << 1) + 20;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005265 if (alloc > (PY_SSIZE_T_MAX / sizeof(Py_ssize_t)) ||
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00005266 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005267 PyErr_NoMemory();
5268 return -1;
5269 }
5270
5271 if (self->marks == NULL)
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005272 marks = (Py_ssize_t *) PyMem_Malloc(alloc * sizeof(Py_ssize_t));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005273 else
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005274 marks = (Py_ssize_t *) PyMem_Realloc(self->marks,
5275 alloc * sizeof(Py_ssize_t));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005276 if (marks == NULL) {
5277 PyErr_NoMemory();
5278 return -1;
5279 }
5280 self->marks = marks;
5281 self->marks_size = (Py_ssize_t)alloc;
5282 }
5283
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005284 self->marks[self->num_marks++] = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005285
5286 return 0;
5287}
5288
5289static int
5290load_reduce(UnpicklerObject *self)
5291{
5292 PyObject *callable = NULL;
5293 PyObject *argtup = NULL;
5294 PyObject *obj = NULL;
5295
5296 PDATA_POP(self->stack, argtup);
5297 if (argtup == NULL)
5298 return -1;
5299 PDATA_POP(self->stack, callable);
5300 if (callable) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005301 obj = PyObject_CallObject(callable, argtup);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005302 Py_DECREF(callable);
5303 }
5304 Py_DECREF(argtup);
5305
5306 if (obj == NULL)
5307 return -1;
5308
5309 PDATA_PUSH(self->stack, obj, -1);
5310 return 0;
5311}
5312
5313/* Just raises an error if we don't know the protocol specified. PROTO
5314 * is the first opcode for protocols >= 2.
5315 */
5316static int
5317load_proto(UnpicklerObject *self)
5318{
5319 char *s;
5320 int i;
5321
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005322 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005323 return -1;
5324
5325 i = (unsigned char)s[0];
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005326 if (i <= HIGHEST_PROTOCOL) {
5327 self->proto = i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005328 return 0;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005329 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005330
5331 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
5332 return -1;
5333}
5334
5335static PyObject *
5336load(UnpicklerObject *self)
5337{
5338 PyObject *err;
5339 PyObject *value = NULL;
5340 char *s;
5341
5342 self->num_marks = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005343 if (Py_SIZE(self->stack))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005344 Pdata_clear(self->stack, 0);
5345
5346 /* Convenient macros for the dispatch while-switch loop just below. */
5347#define OP(opcode, load_func) \
5348 case opcode: if (load_func(self) < 0) break; continue;
5349
5350#define OP_ARG(opcode, load_func, arg) \
5351 case opcode: if (load_func(self, (arg)) < 0) break; continue;
5352
5353 while (1) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005354 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005355 break;
5356
5357 switch ((enum opcode)s[0]) {
5358 OP(NONE, load_none)
5359 OP(BININT, load_binint)
5360 OP(BININT1, load_binint1)
5361 OP(BININT2, load_binint2)
5362 OP(INT, load_int)
5363 OP(LONG, load_long)
5364 OP_ARG(LONG1, load_counted_long, 1)
5365 OP_ARG(LONG4, load_counted_long, 4)
5366 OP(FLOAT, load_float)
5367 OP(BINFLOAT, load_binfloat)
5368 OP(BINBYTES, load_binbytes)
5369 OP(SHORT_BINBYTES, load_short_binbytes)
5370 OP(BINSTRING, load_binstring)
5371 OP(SHORT_BINSTRING, load_short_binstring)
5372 OP(STRING, load_string)
5373 OP(UNICODE, load_unicode)
5374 OP(BINUNICODE, load_binunicode)
5375 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
5376 OP_ARG(TUPLE1, load_counted_tuple, 1)
5377 OP_ARG(TUPLE2, load_counted_tuple, 2)
5378 OP_ARG(TUPLE3, load_counted_tuple, 3)
5379 OP(TUPLE, load_tuple)
5380 OP(EMPTY_LIST, load_empty_list)
5381 OP(LIST, load_list)
5382 OP(EMPTY_DICT, load_empty_dict)
5383 OP(DICT, load_dict)
5384 OP(OBJ, load_obj)
5385 OP(INST, load_inst)
5386 OP(NEWOBJ, load_newobj)
5387 OP(GLOBAL, load_global)
5388 OP(APPEND, load_append)
5389 OP(APPENDS, load_appends)
5390 OP(BUILD, load_build)
5391 OP(DUP, load_dup)
5392 OP(BINGET, load_binget)
5393 OP(LONG_BINGET, load_long_binget)
5394 OP(GET, load_get)
5395 OP(MARK, load_mark)
5396 OP(BINPUT, load_binput)
5397 OP(LONG_BINPUT, load_long_binput)
5398 OP(PUT, load_put)
5399 OP(POP, load_pop)
5400 OP(POP_MARK, load_pop_mark)
5401 OP(SETITEM, load_setitem)
5402 OP(SETITEMS, load_setitems)
5403 OP(PERSID, load_persid)
5404 OP(BINPERSID, load_binpersid)
5405 OP(REDUCE, load_reduce)
5406 OP(PROTO, load_proto)
5407 OP_ARG(EXT1, load_extension, 1)
5408 OP_ARG(EXT2, load_extension, 2)
5409 OP_ARG(EXT4, load_extension, 4)
5410 OP_ARG(NEWTRUE, load_bool, Py_True)
5411 OP_ARG(NEWFALSE, load_bool, Py_False)
5412
5413 case STOP:
5414 break;
5415
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005416 default:
Benjamin Petersonadde86d2011-09-23 13:41:41 -04005417 if (s[0] == '\0')
5418 PyErr_SetNone(PyExc_EOFError);
5419 else
5420 PyErr_Format(UnpicklingError,
5421 "invalid load key, '%c'.", s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005422 return NULL;
5423 }
5424
5425 break; /* and we are done! */
5426 }
5427
5428 /* XXX: It is not clear what this is actually for. */
5429 if ((err = PyErr_Occurred())) {
5430 if (err == PyExc_EOFError) {
5431 PyErr_SetNone(PyExc_EOFError);
5432 }
5433 return NULL;
5434 }
5435
Victor Stinner2ae57e32013-10-31 13:39:23 +01005436 if (_Unpickler_SkipConsumed(self) < 0)
5437 return NULL;
5438
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005439 PDATA_POP(self->stack, value);
5440 return value;
5441}
5442
5443PyDoc_STRVAR(Unpickler_load_doc,
5444"load() -> object. Load a pickle."
5445"\n"
5446"Read a pickled object representation from the open file object given in\n"
5447"the constructor, and return the reconstituted object hierarchy specified\n"
5448"therein.\n");
5449
5450static PyObject *
5451Unpickler_load(UnpicklerObject *self)
5452{
5453 /* Check whether the Unpickler was initialized correctly. This prevents
5454 segfaulting if a subclass overridden __init__ with a function that does
5455 not call Unpickler.__init__(). Here, we simply ensure that self->read
5456 is not NULL. */
5457 if (self->read == NULL) {
Victor Stinner121aab42011-09-29 23:40:53 +02005458 PyErr_Format(UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005459 "Unpickler.__init__() was not called by %s.__init__()",
5460 Py_TYPE(self)->tp_name);
5461 return NULL;
5462 }
5463
5464 return load(self);
5465}
5466
5467/* The name of find_class() is misleading. In newer pickle protocols, this
5468 function is used for loading any global (i.e., functions), not just
5469 classes. The name is kept only for backward compatibility. */
5470
5471PyDoc_STRVAR(Unpickler_find_class_doc,
5472"find_class(module_name, global_name) -> object.\n"
5473"\n"
5474"Return an object from a specified module, importing the module if\n"
5475"necessary. Subclasses may override this method (e.g. to restrict\n"
5476"unpickling of arbitrary classes and functions).\n"
5477"\n"
5478"This method is called whenever a class or a function object is\n"
5479"needed. Both arguments passed are str objects.\n");
5480
5481static PyObject *
5482Unpickler_find_class(UnpicklerObject *self, PyObject *args)
5483{
5484 PyObject *global;
5485 PyObject *modules_dict;
5486 PyObject *module;
5487 PyObject *module_name, *global_name;
5488
5489 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
5490 &module_name, &global_name))
5491 return NULL;
5492
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005493 /* Try to map the old names used in Python 2.x to the new ones used in
5494 Python 3.x. We do this only with old pickle protocols and when the
5495 user has not disabled the feature. */
5496 if (self->proto < 3 && self->fix_imports) {
5497 PyObject *key;
5498 PyObject *item;
5499
5500 /* Check if the global (i.e., a function or a class) was renamed
5501 or moved to another module. */
5502 key = PyTuple_Pack(2, module_name, global_name);
5503 if (key == NULL)
5504 return NULL;
5505 item = PyDict_GetItemWithError(name_mapping_2to3, key);
5506 Py_DECREF(key);
5507 if (item) {
5508 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
5509 PyErr_Format(PyExc_RuntimeError,
5510 "_compat_pickle.NAME_MAPPING values should be "
5511 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
5512 return NULL;
5513 }
5514 module_name = PyTuple_GET_ITEM(item, 0);
5515 global_name = PyTuple_GET_ITEM(item, 1);
5516 if (!PyUnicode_Check(module_name) ||
5517 !PyUnicode_Check(global_name)) {
5518 PyErr_Format(PyExc_RuntimeError,
5519 "_compat_pickle.NAME_MAPPING values should be "
5520 "pairs of str, not (%.200s, %.200s)",
5521 Py_TYPE(module_name)->tp_name,
5522 Py_TYPE(global_name)->tp_name);
5523 return NULL;
5524 }
5525 }
5526 else if (PyErr_Occurred()) {
5527 return NULL;
5528 }
5529
5530 /* Check if the module was renamed. */
5531 item = PyDict_GetItemWithError(import_mapping_2to3, module_name);
5532 if (item) {
5533 if (!PyUnicode_Check(item)) {
5534 PyErr_Format(PyExc_RuntimeError,
5535 "_compat_pickle.IMPORT_MAPPING values should be "
5536 "strings, not %.200s", Py_TYPE(item)->tp_name);
5537 return NULL;
5538 }
5539 module_name = item;
5540 }
5541 else if (PyErr_Occurred()) {
5542 return NULL;
5543 }
5544 }
5545
Victor Stinnerbb520202013-11-06 22:40:41 +01005546 modules_dict = _PySys_GetObjectId(&PyId_modules);
Victor Stinner1e53bba2013-07-16 22:26:05 +02005547 if (modules_dict == NULL) {
5548 PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005549 return NULL;
Victor Stinner1e53bba2013-07-16 22:26:05 +02005550 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005551
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005552 module = PyDict_GetItemWithError(modules_dict, module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005553 if (module == NULL) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005554 if (PyErr_Occurred())
5555 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005556 module = PyImport_Import(module_name);
5557 if (module == NULL)
5558 return NULL;
5559 global = PyObject_GetAttr(module, global_name);
5560 Py_DECREF(module);
5561 }
Victor Stinner121aab42011-09-29 23:40:53 +02005562 else {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005563 global = PyObject_GetAttr(module, global_name);
5564 }
5565 return global;
5566}
5567
5568static struct PyMethodDef Unpickler_methods[] = {
5569 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
5570 Unpickler_load_doc},
5571 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
5572 Unpickler_find_class_doc},
5573 {NULL, NULL} /* sentinel */
5574};
5575
5576static void
5577Unpickler_dealloc(UnpicklerObject *self)
5578{
5579 PyObject_GC_UnTrack((PyObject *)self);
5580 Py_XDECREF(self->readline);
5581 Py_XDECREF(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005582 Py_XDECREF(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005583 Py_XDECREF(self->stack);
5584 Py_XDECREF(self->pers_func);
5585 Py_XDECREF(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005586 if (self->buffer.buf != NULL) {
5587 PyBuffer_Release(&self->buffer);
5588 self->buffer.buf = NULL;
5589 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005590
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005591 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005592 PyMem_Free(self->marks);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005593 PyMem_Free(self->input_line);
Victor Stinner49fc8ec2013-07-07 23:30:24 +02005594 PyMem_Free(self->encoding);
5595 PyMem_Free(self->errors);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005596
5597 Py_TYPE(self)->tp_free((PyObject *)self);
5598}
5599
5600static int
5601Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
5602{
5603 Py_VISIT(self->readline);
5604 Py_VISIT(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005605 Py_VISIT(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005606 Py_VISIT(self->stack);
5607 Py_VISIT(self->pers_func);
5608 Py_VISIT(self->arg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005609 return 0;
5610}
5611
5612static int
5613Unpickler_clear(UnpicklerObject *self)
5614{
5615 Py_CLEAR(self->readline);
5616 Py_CLEAR(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005617 Py_CLEAR(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005618 Py_CLEAR(self->stack);
5619 Py_CLEAR(self->pers_func);
5620 Py_CLEAR(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005621 if (self->buffer.buf != NULL) {
5622 PyBuffer_Release(&self->buffer);
5623 self->buffer.buf = NULL;
5624 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005625
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005626 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005627 PyMem_Free(self->marks);
5628 self->marks = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005629 PyMem_Free(self->input_line);
5630 self->input_line = NULL;
Victor Stinner49fc8ec2013-07-07 23:30:24 +02005631 PyMem_Free(self->encoding);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005632 self->encoding = NULL;
Victor Stinner49fc8ec2013-07-07 23:30:24 +02005633 PyMem_Free(self->errors);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005634 self->errors = NULL;
5635
5636 return 0;
5637}
5638
5639PyDoc_STRVAR(Unpickler_doc,
5640"Unpickler(file, *, encoding='ASCII', errors='strict')"
5641"\n"
5642"This takes a binary file for reading a pickle data stream.\n"
5643"\n"
5644"The protocol version of the pickle is detected automatically, so no\n"
5645"proto argument is needed.\n"
5646"\n"
5647"The file-like object must have two methods, a read() method\n"
5648"that takes an integer argument, and a readline() method that\n"
5649"requires no arguments. Both methods should return bytes.\n"
5650"Thus file-like object can be a binary file object opened for\n"
5651"reading, a BytesIO object, or any other custom object that\n"
5652"meets this interface.\n"
5653"\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005654"Optional keyword arguments are *fix_imports*, *encoding* and *errors*,\n"
5655"which are used to control compatiblity support for pickle stream\n"
5656"generated by Python 2.x. If *fix_imports* is True, pickle will try to\n"
5657"map the old Python 2.x names to the new names used in Python 3.x. The\n"
5658"*encoding* and *errors* tell pickle how to decode 8-bit string\n"
5659"instances pickled by Python 2.x; these default to 'ASCII' and\n"
5660"'strict', respectively.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005661
5662static int
5663Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
5664{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005665 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005666 PyObject *file;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005667 PyObject *fix_imports = Py_True;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005668 char *encoding = NULL;
5669 char *errors = NULL;
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02005670 _Py_IDENTIFIER(persistent_load);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005671
5672 /* XXX: That is an horrible error message. But, I don't know how to do
5673 better... */
5674 if (Py_SIZE(args) != 1) {
5675 PyErr_Format(PyExc_TypeError,
5676 "%s takes exactly one positional argument (%zd given)",
5677 Py_TYPE(self)->tp_name, Py_SIZE(args));
5678 return -1;
5679 }
5680
5681 /* Arguments parsing needs to be done in the __init__() method to allow
5682 subclasses to define their own __init__() method, which may (or may
5683 not) support Unpickler arguments. However, this means we need to be
5684 extra careful in the other Unpickler methods, since a subclass could
5685 forget to call Unpickler.__init__() thus breaking our internal
5686 invariants. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005687 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:Unpickler", kwlist,
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005688 &file, &fix_imports, &encoding, &errors))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005689 return -1;
5690
5691 /* In case of multiple __init__() calls, clear previous content. */
5692 if (self->read != NULL)
5693 (void)Unpickler_clear(self);
5694
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005695 if (_Unpickler_SetInputStream(self, file) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005696 return -1;
5697
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005698 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005699 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005700
5701 self->fix_imports = PyObject_IsTrue(fix_imports);
5702 if (self->fix_imports == -1)
5703 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005704
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02005705 if (_PyObject_HasAttrId((PyObject *)self, &PyId_persistent_load)) {
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005706 self->pers_func = _PyObject_GetAttrId((PyObject *)self,
5707 &PyId_persistent_load);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005708 if (self->pers_func == NULL)
5709 return -1;
5710 }
5711 else {
5712 self->pers_func = NULL;
5713 }
5714
5715 self->stack = (Pdata *)Pdata_New();
5716 if (self->stack == NULL)
5717 return -1;
5718
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005719 self->memo_size = 32;
5720 self->memo = _Unpickler_NewMemo(self->memo_size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005721 if (self->memo == NULL)
5722 return -1;
5723
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005724 self->arg = NULL;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005725 self->proto = 0;
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005726
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005727 return 0;
5728}
5729
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005730/* Define a proxy object for the Unpickler's internal memo object. This is to
5731 * avoid breaking code like:
5732 * unpickler.memo.clear()
5733 * and
5734 * unpickler.memo = saved_memo
5735 * Is this a good idea? Not really, but we don't want to break code that uses
5736 * it. Note that we don't implement the entire mapping API here. This is
5737 * intentional, as these should be treated as black-box implementation details.
5738 *
5739 * We do, however, have to implement pickling/unpickling support because of
Victor Stinner121aab42011-09-29 23:40:53 +02005740 * real-world code like cvs2svn.
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005741 */
5742
5743typedef struct {
5744 PyObject_HEAD
5745 UnpicklerObject *unpickler;
5746} UnpicklerMemoProxyObject;
5747
5748PyDoc_STRVAR(ump_clear_doc,
5749"memo.clear() -> None. Remove all items from memo.");
5750
5751static PyObject *
5752ump_clear(UnpicklerMemoProxyObject *self)
5753{
5754 _Unpickler_MemoCleanup(self->unpickler);
5755 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
5756 if (self->unpickler->memo == NULL)
5757 return NULL;
5758 Py_RETURN_NONE;
5759}
5760
5761PyDoc_STRVAR(ump_copy_doc,
5762"memo.copy() -> new_memo. Copy the memo to a new object.");
5763
5764static PyObject *
5765ump_copy(UnpicklerMemoProxyObject *self)
5766{
5767 Py_ssize_t i;
5768 PyObject *new_memo = PyDict_New();
5769 if (new_memo == NULL)
5770 return NULL;
5771
5772 for (i = 0; i < self->unpickler->memo_size; i++) {
5773 int status;
5774 PyObject *key, *value;
5775
5776 value = self->unpickler->memo[i];
5777 if (value == NULL)
5778 continue;
5779
5780 key = PyLong_FromSsize_t(i);
5781 if (key == NULL)
5782 goto error;
5783 status = PyDict_SetItem(new_memo, key, value);
5784 Py_DECREF(key);
5785 if (status < 0)
5786 goto error;
5787 }
5788 return new_memo;
5789
5790error:
5791 Py_DECREF(new_memo);
5792 return NULL;
5793}
5794
5795PyDoc_STRVAR(ump_reduce_doc,
5796"memo.__reduce__(). Pickling support.");
5797
5798static PyObject *
5799ump_reduce(UnpicklerMemoProxyObject *self, PyObject *args)
5800{
5801 PyObject *reduce_value;
5802 PyObject *constructor_args;
5803 PyObject *contents = ump_copy(self);
5804 if (contents == NULL)
5805 return NULL;
5806
5807 reduce_value = PyTuple_New(2);
5808 if (reduce_value == NULL) {
5809 Py_DECREF(contents);
5810 return NULL;
5811 }
5812 constructor_args = PyTuple_New(1);
5813 if (constructor_args == NULL) {
5814 Py_DECREF(contents);
5815 Py_DECREF(reduce_value);
5816 return NULL;
5817 }
5818 PyTuple_SET_ITEM(constructor_args, 0, contents);
5819 Py_INCREF((PyObject *)&PyDict_Type);
5820 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
5821 PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
5822 return reduce_value;
5823}
5824
5825static PyMethodDef unpicklerproxy_methods[] = {
5826 {"clear", (PyCFunction)ump_clear, METH_NOARGS, ump_clear_doc},
5827 {"copy", (PyCFunction)ump_copy, METH_NOARGS, ump_copy_doc},
5828 {"__reduce__", (PyCFunction)ump_reduce, METH_VARARGS, ump_reduce_doc},
5829 {NULL, NULL} /* sentinel */
5830};
5831
5832static void
5833UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
5834{
5835 PyObject_GC_UnTrack(self);
5836 Py_XDECREF(self->unpickler);
5837 PyObject_GC_Del((PyObject *)self);
5838}
5839
5840static int
5841UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
5842 visitproc visit, void *arg)
5843{
5844 Py_VISIT(self->unpickler);
5845 return 0;
5846}
5847
5848static int
5849UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
5850{
5851 Py_CLEAR(self->unpickler);
5852 return 0;
5853}
5854
5855static PyTypeObject UnpicklerMemoProxyType = {
5856 PyVarObject_HEAD_INIT(NULL, 0)
5857 "_pickle.UnpicklerMemoProxy", /*tp_name*/
5858 sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/
5859 0,
5860 (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */
5861 0, /* tp_print */
5862 0, /* tp_getattr */
5863 0, /* tp_setattr */
5864 0, /* tp_compare */
5865 0, /* tp_repr */
5866 0, /* tp_as_number */
5867 0, /* tp_as_sequence */
5868 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00005869 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005870 0, /* tp_call */
5871 0, /* tp_str */
5872 PyObject_GenericGetAttr, /* tp_getattro */
5873 PyObject_GenericSetAttr, /* tp_setattro */
5874 0, /* tp_as_buffer */
5875 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5876 0, /* tp_doc */
5877 (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */
5878 (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */
5879 0, /* tp_richcompare */
5880 0, /* tp_weaklistoffset */
5881 0, /* tp_iter */
5882 0, /* tp_iternext */
5883 unpicklerproxy_methods, /* tp_methods */
5884};
5885
5886static PyObject *
5887UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
5888{
5889 UnpicklerMemoProxyObject *self;
5890
5891 self = PyObject_GC_New(UnpicklerMemoProxyObject,
5892 &UnpicklerMemoProxyType);
5893 if (self == NULL)
5894 return NULL;
5895 Py_INCREF(unpickler);
5896 self->unpickler = unpickler;
5897 PyObject_GC_Track(self);
5898 return (PyObject *)self;
5899}
5900
5901/*****************************************************************************/
5902
5903
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005904static PyObject *
5905Unpickler_get_memo(UnpicklerObject *self)
5906{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005907 return UnpicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005908}
5909
5910static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005911Unpickler_set_memo(UnpicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005912{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005913 PyObject **new_memo;
5914 Py_ssize_t new_memo_size = 0;
5915 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005916
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005917 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005918 PyErr_SetString(PyExc_TypeError,
5919 "attribute deletion is not supported");
5920 return -1;
5921 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005922
5923 if (Py_TYPE(obj) == &UnpicklerMemoProxyType) {
5924 UnpicklerObject *unpickler =
5925 ((UnpicklerMemoProxyObject *)obj)->unpickler;
5926
5927 new_memo_size = unpickler->memo_size;
5928 new_memo = _Unpickler_NewMemo(new_memo_size);
5929 if (new_memo == NULL)
5930 return -1;
5931
5932 for (i = 0; i < new_memo_size; i++) {
5933 Py_XINCREF(unpickler->memo[i]);
5934 new_memo[i] = unpickler->memo[i];
5935 }
5936 }
5937 else if (PyDict_Check(obj)) {
5938 Py_ssize_t i = 0;
5939 PyObject *key, *value;
5940
5941 new_memo_size = PyDict_Size(obj);
5942 new_memo = _Unpickler_NewMemo(new_memo_size);
5943 if (new_memo == NULL)
5944 return -1;
5945
5946 while (PyDict_Next(obj, &i, &key, &value)) {
5947 Py_ssize_t idx;
5948 if (!PyLong_Check(key)) {
5949 PyErr_SetString(PyExc_TypeError,
5950 "memo key must be integers");
5951 goto error;
5952 }
5953 idx = PyLong_AsSsize_t(key);
5954 if (idx == -1 && PyErr_Occurred())
5955 goto error;
Christian Heimesa24b4d22013-07-01 15:17:45 +02005956 if (idx < 0) {
5957 PyErr_SetString(PyExc_ValueError,
Christian Heimes80878792013-07-01 15:23:39 +02005958 "memo key must be positive integers.");
Christian Heimesa24b4d22013-07-01 15:17:45 +02005959 goto error;
5960 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005961 if (_Unpickler_MemoPut(self, idx, value) < 0)
5962 goto error;
5963 }
5964 }
5965 else {
5966 PyErr_Format(PyExc_TypeError,
5967 "'memo' attribute must be an UnpicklerMemoProxy object"
5968 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005969 return -1;
5970 }
5971
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005972 _Unpickler_MemoCleanup(self);
5973 self->memo_size = new_memo_size;
5974 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005975
5976 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005977
5978 error:
5979 if (new_memo_size) {
5980 i = new_memo_size;
5981 while (--i >= 0) {
5982 Py_XDECREF(new_memo[i]);
5983 }
5984 PyMem_FREE(new_memo);
5985 }
5986 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005987}
5988
5989static PyObject *
5990Unpickler_get_persload(UnpicklerObject *self)
5991{
5992 if (self->pers_func == NULL)
5993 PyErr_SetString(PyExc_AttributeError, "persistent_load");
5994 else
5995 Py_INCREF(self->pers_func);
5996 return self->pers_func;
5997}
5998
5999static int
6000Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
6001{
6002 PyObject *tmp;
6003
6004 if (value == NULL) {
6005 PyErr_SetString(PyExc_TypeError,
6006 "attribute deletion is not supported");
6007 return -1;
6008 }
6009 if (!PyCallable_Check(value)) {
6010 PyErr_SetString(PyExc_TypeError,
6011 "persistent_load must be a callable taking "
6012 "one argument");
6013 return -1;
6014 }
6015
6016 tmp = self->pers_func;
6017 Py_INCREF(value);
6018 self->pers_func = value;
6019 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
6020
6021 return 0;
6022}
6023
6024static PyGetSetDef Unpickler_getsets[] = {
6025 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
6026 {"persistent_load", (getter)Unpickler_get_persload,
6027 (setter)Unpickler_set_persload},
6028 {NULL}
6029};
6030
6031static PyTypeObject Unpickler_Type = {
6032 PyVarObject_HEAD_INIT(NULL, 0)
6033 "_pickle.Unpickler", /*tp_name*/
6034 sizeof(UnpicklerObject), /*tp_basicsize*/
6035 0, /*tp_itemsize*/
6036 (destructor)Unpickler_dealloc, /*tp_dealloc*/
6037 0, /*tp_print*/
6038 0, /*tp_getattr*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006039 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00006040 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006041 0, /*tp_repr*/
6042 0, /*tp_as_number*/
6043 0, /*tp_as_sequence*/
6044 0, /*tp_as_mapping*/
6045 0, /*tp_hash*/
6046 0, /*tp_call*/
6047 0, /*tp_str*/
6048 0, /*tp_getattro*/
6049 0, /*tp_setattro*/
6050 0, /*tp_as_buffer*/
6051 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
6052 Unpickler_doc, /*tp_doc*/
6053 (traverseproc)Unpickler_traverse, /*tp_traverse*/
6054 (inquiry)Unpickler_clear, /*tp_clear*/
6055 0, /*tp_richcompare*/
6056 0, /*tp_weaklistoffset*/
6057 0, /*tp_iter*/
6058 0, /*tp_iternext*/
6059 Unpickler_methods, /*tp_methods*/
6060 0, /*tp_members*/
6061 Unpickler_getsets, /*tp_getset*/
6062 0, /*tp_base*/
6063 0, /*tp_dict*/
6064 0, /*tp_descr_get*/
6065 0, /*tp_descr_set*/
6066 0, /*tp_dictoffset*/
6067 (initproc)Unpickler_init, /*tp_init*/
6068 PyType_GenericAlloc, /*tp_alloc*/
6069 PyType_GenericNew, /*tp_new*/
6070 PyObject_GC_Del, /*tp_free*/
6071 0, /*tp_is_gc*/
6072};
6073
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006074PyDoc_STRVAR(pickle_dump_doc,
6075"dump(obj, file, protocol=None, *, fix_imports=True) -> None\n"
6076"\n"
6077"Write a pickled representation of obj to the open file object file. This\n"
6078"is equivalent to ``Pickler(file, protocol).dump(obj)``, but may be more\n"
6079"efficient.\n"
6080"\n"
6081"The optional protocol argument tells the pickler to use the given protocol;\n"
6082"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
6083"backward-incompatible protocol designed for Python 3.0.\n"
6084"\n"
6085"Specifying a negative protocol version selects the highest protocol version\n"
6086"supported. The higher the protocol used, the more recent the version of\n"
6087"Python needed to read the pickle produced.\n"
6088"\n"
6089"The file argument must have a write() method that accepts a single bytes\n"
6090"argument. It can thus be a file object opened for binary writing, a\n"
6091"io.BytesIO instance, or any other custom object that meets this interface.\n"
6092"\n"
6093"If fix_imports is True and protocol is less than 3, pickle will try to\n"
6094"map the new Python 3.x names to the old module names used in Python 2.x,\n"
6095"so that the pickle data stream is readable with Python 2.x.\n");
6096
6097static PyObject *
6098pickle_dump(PyObject *self, PyObject *args, PyObject *kwds)
6099{
6100 static char *kwlist[] = {"obj", "file", "protocol", "fix_imports", 0};
6101 PyObject *obj;
6102 PyObject *file;
6103 PyObject *proto = NULL;
6104 PyObject *fix_imports = Py_True;
6105 PicklerObject *pickler;
6106
6107 /* fix_imports is a keyword-only argument. */
6108 if (Py_SIZE(args) > 3) {
6109 PyErr_Format(PyExc_TypeError,
6110 "pickle.dump() takes at most 3 positional "
6111 "argument (%zd given)", Py_SIZE(args));
6112 return NULL;
6113 }
6114
6115 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|OO:dump", kwlist,
6116 &obj, &file, &proto, &fix_imports))
6117 return NULL;
6118
6119 pickler = _Pickler_New();
6120 if (pickler == NULL)
6121 return NULL;
6122
6123 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
6124 goto error;
6125
6126 if (_Pickler_SetOutputStream(pickler, file) < 0)
6127 goto error;
6128
6129 if (dump(pickler, obj) < 0)
6130 goto error;
6131
6132 if (_Pickler_FlushToFile(pickler) < 0)
6133 goto error;
6134
6135 Py_DECREF(pickler);
6136 Py_RETURN_NONE;
6137
6138 error:
6139 Py_XDECREF(pickler);
6140 return NULL;
6141}
6142
6143PyDoc_STRVAR(pickle_dumps_doc,
6144"dumps(obj, protocol=None, *, fix_imports=True) -> bytes\n"
6145"\n"
6146"Return the pickled representation of the object as a bytes\n"
6147"object, instead of writing it to a file.\n"
6148"\n"
6149"The optional protocol argument tells the pickler to use the given protocol;\n"
6150"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
6151"backward-incompatible protocol designed for Python 3.0.\n"
6152"\n"
6153"Specifying a negative protocol version selects the highest protocol version\n"
6154"supported. The higher the protocol used, the more recent the version of\n"
6155"Python needed to read the pickle produced.\n"
6156"\n"
6157"If fix_imports is True and *protocol* is less than 3, pickle will try to\n"
6158"map the new Python 3.x names to the old module names used in Python 2.x,\n"
6159"so that the pickle data stream is readable with Python 2.x.\n");
6160
6161static PyObject *
6162pickle_dumps(PyObject *self, PyObject *args, PyObject *kwds)
6163{
6164 static char *kwlist[] = {"obj", "protocol", "fix_imports", 0};
6165 PyObject *obj;
6166 PyObject *proto = NULL;
6167 PyObject *result;
6168 PyObject *fix_imports = Py_True;
6169 PicklerObject *pickler;
6170
6171 /* fix_imports is a keyword-only argument. */
6172 if (Py_SIZE(args) > 2) {
6173 PyErr_Format(PyExc_TypeError,
6174 "pickle.dumps() takes at most 2 positional "
6175 "argument (%zd given)", Py_SIZE(args));
6176 return NULL;
6177 }
6178
6179 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:dumps", kwlist,
6180 &obj, &proto, &fix_imports))
6181 return NULL;
6182
6183 pickler = _Pickler_New();
6184 if (pickler == NULL)
6185 return NULL;
6186
6187 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
6188 goto error;
6189
6190 if (dump(pickler, obj) < 0)
6191 goto error;
6192
6193 result = _Pickler_GetString(pickler);
6194 Py_DECREF(pickler);
6195 return result;
6196
6197 error:
6198 Py_XDECREF(pickler);
6199 return NULL;
6200}
6201
6202PyDoc_STRVAR(pickle_load_doc,
6203"load(file, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6204"\n"
6205"Read a pickled object representation from the open file object file and\n"
6206"return the reconstituted object hierarchy specified therein. This is\n"
6207"equivalent to ``Unpickler(file).load()``, but may be more efficient.\n"
6208"\n"
6209"The protocol version of the pickle is detected automatically, so no protocol\n"
6210"argument is needed. Bytes past the pickled object's representation are\n"
6211"ignored.\n"
6212"\n"
6213"The argument file must have two methods, a read() method that takes an\n"
6214"integer argument, and a readline() method that requires no arguments. Both\n"
6215"methods should return bytes. Thus *file* can be a binary file object opened\n"
6216"for reading, a BytesIO object, or any other custom object that meets this\n"
6217"interface.\n"
6218"\n"
6219"Optional keyword arguments are fix_imports, encoding and errors,\n"
6220"which are used to control compatiblity support for pickle stream generated\n"
6221"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6222"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6223"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6224"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6225
6226static PyObject *
6227pickle_load(PyObject *self, PyObject *args, PyObject *kwds)
6228{
6229 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
6230 PyObject *file;
6231 PyObject *fix_imports = Py_True;
6232 PyObject *result;
6233 char *encoding = NULL;
6234 char *errors = NULL;
6235 UnpicklerObject *unpickler;
6236
6237 /* fix_imports, encoding and errors are a keyword-only argument. */
6238 if (Py_SIZE(args) != 1) {
6239 PyErr_Format(PyExc_TypeError,
6240 "pickle.load() takes exactly one positional "
6241 "argument (%zd given)", Py_SIZE(args));
6242 return NULL;
6243 }
6244
6245 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:load", kwlist,
6246 &file, &fix_imports, &encoding, &errors))
6247 return NULL;
6248
6249 unpickler = _Unpickler_New();
6250 if (unpickler == NULL)
6251 return NULL;
6252
6253 if (_Unpickler_SetInputStream(unpickler, file) < 0)
6254 goto error;
6255
6256 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6257 goto error;
6258
6259 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6260 if (unpickler->fix_imports == -1)
6261 goto error;
6262
6263 result = load(unpickler);
6264 Py_DECREF(unpickler);
6265 return result;
6266
6267 error:
6268 Py_XDECREF(unpickler);
6269 return NULL;
6270}
6271
6272PyDoc_STRVAR(pickle_loads_doc,
6273"loads(input, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6274"\n"
6275"Read a pickled object hierarchy from a bytes object and return the\n"
6276"reconstituted object hierarchy specified therein\n"
6277"\n"
6278"The protocol version of the pickle is detected automatically, so no protocol\n"
6279"argument is needed. Bytes past the pickled object's representation are\n"
6280"ignored.\n"
6281"\n"
6282"Optional keyword arguments are fix_imports, encoding and errors, which\n"
6283"are used to control compatiblity support for pickle stream generated\n"
6284"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6285"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6286"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6287"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6288
6289static PyObject *
6290pickle_loads(PyObject *self, PyObject *args, PyObject *kwds)
6291{
6292 static char *kwlist[] = {"input", "fix_imports", "encoding", "errors", 0};
6293 PyObject *input;
6294 PyObject *fix_imports = Py_True;
6295 PyObject *result;
6296 char *encoding = NULL;
6297 char *errors = NULL;
6298 UnpicklerObject *unpickler;
6299
6300 /* fix_imports, encoding and errors are a keyword-only argument. */
6301 if (Py_SIZE(args) != 1) {
6302 PyErr_Format(PyExc_TypeError,
6303 "pickle.loads() takes exactly one positional "
6304 "argument (%zd given)", Py_SIZE(args));
6305 return NULL;
6306 }
6307
6308 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:loads", kwlist,
6309 &input, &fix_imports, &encoding, &errors))
6310 return NULL;
6311
6312 unpickler = _Unpickler_New();
6313 if (unpickler == NULL)
6314 return NULL;
6315
6316 if (_Unpickler_SetStringInput(unpickler, input) < 0)
6317 goto error;
6318
6319 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6320 goto error;
6321
6322 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6323 if (unpickler->fix_imports == -1)
6324 goto error;
6325
6326 result = load(unpickler);
6327 Py_DECREF(unpickler);
6328 return result;
6329
6330 error:
6331 Py_XDECREF(unpickler);
6332 return NULL;
6333}
6334
6335
6336static struct PyMethodDef pickle_methods[] = {
6337 {"dump", (PyCFunction)pickle_dump, METH_VARARGS|METH_KEYWORDS,
6338 pickle_dump_doc},
6339 {"dumps", (PyCFunction)pickle_dumps, METH_VARARGS|METH_KEYWORDS,
6340 pickle_dumps_doc},
6341 {"load", (PyCFunction)pickle_load, METH_VARARGS|METH_KEYWORDS,
6342 pickle_load_doc},
6343 {"loads", (PyCFunction)pickle_loads, METH_VARARGS|METH_KEYWORDS,
6344 pickle_loads_doc},
6345 {NULL, NULL} /* sentinel */
6346};
6347
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006348static int
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006349initmodule(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006350{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006351 PyObject *copyreg = NULL;
6352 PyObject *compat_pickle = NULL;
6353
6354 /* XXX: We should ensure that the types of the dictionaries imported are
6355 exactly PyDict objects. Otherwise, it is possible to crash the pickle
6356 since we use the PyDict API directly to access these dictionaries. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006357
6358 copyreg = PyImport_ImportModule("copyreg");
6359 if (!copyreg)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006360 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006361 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
6362 if (!dispatch_table)
6363 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006364 extension_registry = \
6365 PyObject_GetAttrString(copyreg, "_extension_registry");
6366 if (!extension_registry)
6367 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006368 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
6369 if (!inverted_registry)
6370 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006371 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
6372 if (!extension_cache)
6373 goto error;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006374 Py_CLEAR(copyreg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006375
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006376 /* Load the 2.x -> 3.x stdlib module mapping tables */
6377 compat_pickle = PyImport_ImportModule("_compat_pickle");
6378 if (!compat_pickle)
6379 goto error;
6380 name_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
6381 if (!name_mapping_2to3)
6382 goto error;
6383 if (!PyDict_CheckExact(name_mapping_2to3)) {
6384 PyErr_Format(PyExc_RuntimeError,
6385 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
6386 Py_TYPE(name_mapping_2to3)->tp_name);
6387 goto error;
6388 }
6389 import_mapping_2to3 = PyObject_GetAttrString(compat_pickle,
6390 "IMPORT_MAPPING");
6391 if (!import_mapping_2to3)
6392 goto error;
6393 if (!PyDict_CheckExact(import_mapping_2to3)) {
6394 PyErr_Format(PyExc_RuntimeError,
6395 "_compat_pickle.IMPORT_MAPPING should be a dict, "
6396 "not %.200s", Py_TYPE(import_mapping_2to3)->tp_name);
6397 goto error;
6398 }
6399 /* ... and the 3.x -> 2.x mapping tables */
6400 name_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6401 "REVERSE_NAME_MAPPING");
6402 if (!name_mapping_3to2)
6403 goto error;
6404 if (!PyDict_CheckExact(name_mapping_3to2)) {
6405 PyErr_Format(PyExc_RuntimeError,
Ezio Melotti13925002011-03-16 11:05:33 +02006406 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006407 "not %.200s", Py_TYPE(name_mapping_3to2)->tp_name);
6408 goto error;
6409 }
6410 import_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6411 "REVERSE_IMPORT_MAPPING");
6412 if (!import_mapping_3to2)
6413 goto error;
6414 if (!PyDict_CheckExact(import_mapping_3to2)) {
6415 PyErr_Format(PyExc_RuntimeError,
6416 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
6417 "not %.200s", Py_TYPE(import_mapping_3to2)->tp_name);
6418 goto error;
6419 }
6420 Py_CLEAR(compat_pickle);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006421
6422 empty_tuple = PyTuple_New(0);
6423 if (empty_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006424 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006425 two_tuple = PyTuple_New(2);
6426 if (two_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006427 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006428 /* We use this temp container with no regard to refcounts, or to
6429 * keeping containees alive. Exempt from GC, because we don't
6430 * want anything looking at two_tuple() by magic.
6431 */
6432 PyObject_GC_UnTrack(two_tuple);
6433
6434 return 0;
6435
6436 error:
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006437 Py_CLEAR(copyreg);
6438 Py_CLEAR(dispatch_table);
6439 Py_CLEAR(extension_registry);
6440 Py_CLEAR(inverted_registry);
6441 Py_CLEAR(extension_cache);
6442 Py_CLEAR(compat_pickle);
6443 Py_CLEAR(name_mapping_2to3);
6444 Py_CLEAR(import_mapping_2to3);
6445 Py_CLEAR(name_mapping_3to2);
6446 Py_CLEAR(import_mapping_3to2);
6447 Py_CLEAR(empty_tuple);
6448 Py_CLEAR(two_tuple);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006449 return -1;
6450}
6451
6452static struct PyModuleDef _picklemodule = {
6453 PyModuleDef_HEAD_INIT,
6454 "_pickle",
6455 pickle_module_doc,
6456 -1,
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006457 pickle_methods,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006458 NULL,
6459 NULL,
6460 NULL,
6461 NULL
6462};
6463
6464PyMODINIT_FUNC
6465PyInit__pickle(void)
6466{
6467 PyObject *m;
6468
6469 if (PyType_Ready(&Unpickler_Type) < 0)
6470 return NULL;
6471 if (PyType_Ready(&Pickler_Type) < 0)
6472 return NULL;
6473 if (PyType_Ready(&Pdata_Type) < 0)
6474 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006475 if (PyType_Ready(&PicklerMemoProxyType) < 0)
6476 return NULL;
6477 if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
6478 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006479
6480 /* Create the module and add the functions. */
6481 m = PyModule_Create(&_picklemodule);
6482 if (m == NULL)
6483 return NULL;
6484
Antoine Pitrou8391cf42011-07-15 21:01:21 +02006485 Py_INCREF(&Pickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006486 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
6487 return NULL;
Antoine Pitrou8391cf42011-07-15 21:01:21 +02006488 Py_INCREF(&Unpickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006489 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
6490 return NULL;
6491
6492 /* Initialize the exceptions. */
6493 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
6494 if (PickleError == NULL)
6495 return NULL;
6496 PicklingError = \
6497 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
6498 if (PicklingError == NULL)
6499 return NULL;
6500 UnpicklingError = \
6501 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
6502 if (UnpicklingError == NULL)
6503 return NULL;
6504
6505 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
6506 return NULL;
6507 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
6508 return NULL;
6509 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
6510 return NULL;
6511
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006512 if (initmodule() < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006513 return NULL;
6514
6515 return m;
6516}