blob: a0c10292bdb760df0bcfa2368481ec425efe4d2a [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000013/* Pickle opcodes. These must be kept updated with pickle.py.
14 Extensive docs are in pickletools.py. */
15enum opcode {
16 MARK = '(',
17 STOP = '.',
18 POP = '0',
19 POP_MARK = '1',
20 DUP = '2',
21 FLOAT = 'F',
22 INT = 'I',
23 BININT = 'J',
24 BININT1 = 'K',
25 LONG = 'L',
26 BININT2 = 'M',
27 NONE = 'N',
28 PERSID = 'P',
29 BINPERSID = 'Q',
30 REDUCE = 'R',
31 STRING = 'S',
32 BINSTRING = 'T',
33 SHORT_BINSTRING = 'U',
34 UNICODE = 'V',
35 BINUNICODE = 'X',
36 APPEND = 'a',
37 BUILD = 'b',
38 GLOBAL = 'c',
39 DICT = 'd',
40 EMPTY_DICT = '}',
41 APPENDS = 'e',
42 GET = 'g',
43 BINGET = 'h',
44 INST = 'i',
45 LONG_BINGET = 'j',
46 LIST = 'l',
47 EMPTY_LIST = ']',
48 OBJ = 'o',
49 PUT = 'p',
50 BINPUT = 'q',
51 LONG_BINPUT = 'r',
52 SETITEM = 's',
53 TUPLE = 't',
54 EMPTY_TUPLE = ')',
55 SETITEMS = 'u',
56 BINFLOAT = 'G',
57
58 /* Protocol 2. */
59 PROTO = '\x80',
60 NEWOBJ = '\x81',
61 EXT1 = '\x82',
62 EXT2 = '\x83',
63 EXT4 = '\x84',
64 TUPLE1 = '\x85',
65 TUPLE2 = '\x86',
66 TUPLE3 = '\x87',
67 NEWTRUE = '\x88',
68 NEWFALSE = '\x89',
69 LONG1 = '\x8a',
70 LONG4 = '\x8b',
71
72 /* Protocol 3 (Python 3.x) */
73 BINBYTES = 'B',
Victor Stinner132ef6c2010-11-09 09:39:41 +000074 SHORT_BINBYTES = 'C'
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000075};
76
77/* These aren't opcodes -- they're ways to pickle bools before protocol 2
78 * so that unpicklers written before bools were introduced unpickle them
79 * as ints, but unpicklers after can recognize that bools were intended.
80 * Note that protocol 2 added direct ways to pickle bools.
81 */
82#undef TRUE
83#define TRUE "I01\n"
84#undef FALSE
85#define FALSE "I00\n"
86
87enum {
88 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
89 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
90 break if this gets out of synch with pickle.py, but it's unclear that would
91 help anything either. */
92 BATCHSIZE = 1000,
93
94 /* Nesting limit until Pickler, when running in "fast mode", starts
95 checking for self-referential data-structures. */
96 FAST_NESTING_LIMIT = 50,
97
Antoine Pitrouea99c5c2010-09-09 18:33:21 +000098 /* Initial size of the write buffer of Pickler. */
99 WRITE_BUF_SIZE = 4096,
100
101 /* Maximum size of the write buffer of Pickler when pickling to a
102 stream. This is ignored for in-memory pickling. */
103 MAX_WRITE_BUF_SIZE = 64 * 1024,
Antoine Pitrou04248a82010-10-12 20:51:21 +0000104
105 /* Prefetch size when unpickling (disabled on unpeekable streams) */
Victor Stinner132ef6c2010-11-09 09:39:41 +0000106 PREFETCH = 8192 * 16
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000107};
108
109/* Exception classes for pickle. These should override the ones defined in
110 pickle.py, when the C-optimized Pickler and Unpickler are used. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000111static PyObject *PickleError = NULL;
112static PyObject *PicklingError = NULL;
113static PyObject *UnpicklingError = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000114
115/* copyreg.dispatch_table, {type_object: pickling_function} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000116static PyObject *dispatch_table = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000117/* For EXT[124] opcodes. */
118/* copyreg._extension_registry, {(module_name, function_name): code} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000119static PyObject *extension_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000120/* copyreg._inverted_registry, {code: (module_name, function_name)} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000121static PyObject *inverted_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000122/* copyreg._extension_cache, {code: object} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000123static PyObject *extension_cache = NULL;
124
125/* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */
126static PyObject *name_mapping_2to3 = NULL;
127/* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
128static PyObject *import_mapping_2to3 = NULL;
129/* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
130static PyObject *name_mapping_3to2 = NULL;
131static PyObject *import_mapping_3to2 = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000132
133/* XXX: Are these really nescessary? */
134/* As the name says, an empty tuple. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000135static PyObject *empty_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000136/* For looking up name pairs in copyreg._extension_registry. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000137static PyObject *two_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000138
139static int
140stack_underflow(void)
141{
142 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
143 return -1;
144}
145
146/* Internal data type used as the unpickling stack. */
147typedef struct {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000148 PyObject_VAR_HEAD
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000149 PyObject **data;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000150 Py_ssize_t allocated; /* number of slots in data allocated */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000151} Pdata;
152
153static void
154Pdata_dealloc(Pdata *self)
155{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200156 Py_ssize_t i = Py_SIZE(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000157 while (--i >= 0) {
158 Py_DECREF(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000159 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000160 PyMem_FREE(self->data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000161 PyObject_Del(self);
162}
163
164static PyTypeObject Pdata_Type = {
165 PyVarObject_HEAD_INIT(NULL, 0)
166 "_pickle.Pdata", /*tp_name*/
167 sizeof(Pdata), /*tp_basicsize*/
168 0, /*tp_itemsize*/
169 (destructor)Pdata_dealloc, /*tp_dealloc*/
170};
171
172static PyObject *
173Pdata_New(void)
174{
175 Pdata *self;
176
177 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
178 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000179 Py_SIZE(self) = 0;
180 self->allocated = 8;
181 self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000182 if (self->data)
183 return (PyObject *)self;
184 Py_DECREF(self);
185 return PyErr_NoMemory();
186}
187
188
189/* Retain only the initial clearto items. If clearto >= the current
190 * number of items, this is a (non-erroneous) NOP.
191 */
192static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200193Pdata_clear(Pdata *self, Py_ssize_t clearto)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000194{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200195 Py_ssize_t i = Py_SIZE(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000196
197 if (clearto < 0)
198 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000199 if (clearto >= i)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000200 return 0;
201
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000202 while (--i >= clearto) {
203 Py_CLEAR(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000204 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000205 Py_SIZE(self) = clearto;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000206 return 0;
207}
208
209static int
210Pdata_grow(Pdata *self)
211{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000212 PyObject **data = self->data;
213 Py_ssize_t allocated = self->allocated;
214 Py_ssize_t new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000215
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000216 new_allocated = (allocated >> 3) + 6;
217 /* check for integer overflow */
218 if (new_allocated > PY_SSIZE_T_MAX - allocated)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000219 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000220 new_allocated += allocated;
221 if (new_allocated > (PY_SSIZE_T_MAX / sizeof(PyObject *)))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000222 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000223 data = PyMem_REALLOC(data, new_allocated * sizeof(PyObject *));
224 if (data == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000225 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000226
227 self->data = data;
228 self->allocated = new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000229 return 0;
230
231 nomemory:
232 PyErr_NoMemory();
233 return -1;
234}
235
236/* D is a Pdata*. Pop the topmost element and store it into V, which
237 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
238 * is raised and V is set to NULL.
239 */
240static PyObject *
241Pdata_pop(Pdata *self)
242{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000243 if (Py_SIZE(self) == 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000244 PyErr_SetString(UnpicklingError, "bad pickle data");
245 return NULL;
246 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000247 return self->data[--Py_SIZE(self)];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000248}
249#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
250
251static int
252Pdata_push(Pdata *self, PyObject *obj)
253{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000254 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000255 return -1;
256 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000257 self->data[Py_SIZE(self)++] = obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000258 return 0;
259}
260
261/* Push an object on stack, transferring its ownership to the stack. */
262#define PDATA_PUSH(D, O, ER) do { \
263 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
264
265/* Push an object on stack, adding a new reference to the object. */
266#define PDATA_APPEND(D, O, ER) do { \
267 Py_INCREF((O)); \
268 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
269
270static PyObject *
271Pdata_poptuple(Pdata *self, Py_ssize_t start)
272{
273 PyObject *tuple;
274 Py_ssize_t len, i, j;
275
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000276 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000277 tuple = PyTuple_New(len);
278 if (tuple == NULL)
279 return NULL;
280 for (i = start, j = 0; j < len; i++, j++)
281 PyTuple_SET_ITEM(tuple, j, self->data[i]);
282
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000283 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000284 return tuple;
285}
286
287static PyObject *
288Pdata_poplist(Pdata *self, Py_ssize_t start)
289{
290 PyObject *list;
291 Py_ssize_t len, i, j;
292
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000293 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000294 list = PyList_New(len);
295 if (list == NULL)
296 return NULL;
297 for (i = start, j = 0; j < len; i++, j++)
298 PyList_SET_ITEM(list, j, self->data[i]);
299
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000300 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000301 return list;
302}
303
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000304typedef struct {
305 PyObject *me_key;
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200306 Py_ssize_t me_value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000307} PyMemoEntry;
308
309typedef struct {
310 Py_ssize_t mt_mask;
311 Py_ssize_t mt_used;
312 Py_ssize_t mt_allocated;
313 PyMemoEntry *mt_table;
314} PyMemoTable;
315
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000316typedef struct PicklerObject {
317 PyObject_HEAD
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000318 PyMemoTable *memo; /* Memo table, keep track of the seen
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000319 objects to support self-referential objects
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000320 pickling. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000321 PyObject *pers_func; /* persistent_id() method, can be NULL */
322 PyObject *arg;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000323
324 PyObject *write; /* write() method of the output stream. */
325 PyObject *output_buffer; /* Write into a local bytearray buffer before
326 flushing to the stream. */
327 Py_ssize_t output_len; /* Length of output_buffer. */
328 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000329 int proto; /* Pickle protocol number, >= 0 */
330 int bin; /* Boolean, true if proto > 0 */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200331 Py_ssize_t buf_size; /* Size of the current buffered pickle data */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000332 int fast; /* Enable fast mode if set to a true value.
333 The fast mode disable the usage of memo,
334 therefore speeding the pickling process by
335 not generating superfluous PUT opcodes. It
336 should not be used if with self-referential
337 objects. */
338 int fast_nesting;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000339 int fix_imports; /* Indicate whether Pickler should fix
340 the name of globals for Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000341 PyObject *fast_memo;
342} PicklerObject;
343
344typedef struct UnpicklerObject {
345 PyObject_HEAD
346 Pdata *stack; /* Pickle data stack, store unpickled objects. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000347
348 /* The unpickler memo is just an array of PyObject *s. Using a dict
349 is unnecessary, since the keys are contiguous ints. */
350 PyObject **memo;
351 Py_ssize_t memo_size;
352
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000353 PyObject *arg;
354 PyObject *pers_func; /* persistent_load() method, can be NULL. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000355
356 Py_buffer buffer;
357 char *input_buffer;
358 char *input_line;
359 Py_ssize_t input_len;
360 Py_ssize_t next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000361 Py_ssize_t prefetched_idx; /* index of first prefetched byte */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000362 PyObject *read; /* read() method of the input stream. */
363 PyObject *readline; /* readline() method of the input stream. */
Antoine Pitrou04248a82010-10-12 20:51:21 +0000364 PyObject *peek; /* peek() method of the input stream, or NULL */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000365
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000366 char *encoding; /* Name of the encoding to be used for
367 decoding strings pickled using Python
368 2.x. The default value is "ASCII" */
369 char *errors; /* Name of errors handling scheme to used when
370 decoding strings. The default value is
371 "strict". */
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -0500372 Py_ssize_t *marks; /* Mark stack, used for unpickling container
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000373 objects. */
374 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
375 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000376 int proto; /* Protocol of the pickle loaded. */
377 int fix_imports; /* Indicate whether Unpickler should fix
378 the name of globals pickled by Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000379} UnpicklerObject;
380
381/* Forward declarations */
382static int save(PicklerObject *, PyObject *, int);
383static int save_reduce(PicklerObject *, PyObject *, PyObject *);
384static PyTypeObject Pickler_Type;
385static PyTypeObject Unpickler_Type;
386
387
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000388/*************************************************************************
389 A custom hashtable mapping void* to longs. This is used by the pickler for
390 memoization. Using a custom hashtable rather than PyDict allows us to skip
391 a bunch of unnecessary object creation. This makes a huge performance
392 difference. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000393
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000394#define MT_MINSIZE 8
395#define PERTURB_SHIFT 5
396
397
398static PyMemoTable *
399PyMemoTable_New(void)
400{
401 PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
402 if (memo == NULL) {
403 PyErr_NoMemory();
404 return NULL;
405 }
406
407 memo->mt_used = 0;
408 memo->mt_allocated = MT_MINSIZE;
409 memo->mt_mask = MT_MINSIZE - 1;
410 memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
411 if (memo->mt_table == NULL) {
412 PyMem_FREE(memo);
413 PyErr_NoMemory();
414 return NULL;
415 }
416 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
417
418 return memo;
419}
420
421static PyMemoTable *
422PyMemoTable_Copy(PyMemoTable *self)
423{
424 Py_ssize_t i;
425 PyMemoTable *new = PyMemoTable_New();
426 if (new == NULL)
427 return NULL;
428
429 new->mt_used = self->mt_used;
430 new->mt_allocated = self->mt_allocated;
431 new->mt_mask = self->mt_mask;
432 /* The table we get from _New() is probably smaller than we wanted.
433 Free it and allocate one that's the right size. */
434 PyMem_FREE(new->mt_table);
435 new->mt_table = PyMem_MALLOC(self->mt_allocated * sizeof(PyMemoEntry));
436 if (new->mt_table == NULL) {
437 PyMem_FREE(new);
438 return NULL;
439 }
440 for (i = 0; i < self->mt_allocated; i++) {
441 Py_XINCREF(self->mt_table[i].me_key);
442 }
443 memcpy(new->mt_table, self->mt_table,
444 sizeof(PyMemoEntry) * self->mt_allocated);
445
446 return new;
447}
448
449static Py_ssize_t
450PyMemoTable_Size(PyMemoTable *self)
451{
452 return self->mt_used;
453}
454
455static int
456PyMemoTable_Clear(PyMemoTable *self)
457{
458 Py_ssize_t i = self->mt_allocated;
459
460 while (--i >= 0) {
461 Py_XDECREF(self->mt_table[i].me_key);
462 }
463 self->mt_used = 0;
464 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
465 return 0;
466}
467
468static void
469PyMemoTable_Del(PyMemoTable *self)
470{
471 if (self == NULL)
472 return;
473 PyMemoTable_Clear(self);
474
475 PyMem_FREE(self->mt_table);
476 PyMem_FREE(self);
477}
478
479/* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
480 can be considerably simpler than dictobject.c's lookdict(). */
481static PyMemoEntry *
482_PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
483{
484 size_t i;
485 size_t perturb;
486 size_t mask = (size_t)self->mt_mask;
487 PyMemoEntry *table = self->mt_table;
488 PyMemoEntry *entry;
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000489 Py_hash_t hash = (Py_hash_t)key >> 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000490
491 i = hash & mask;
492 entry = &table[i];
493 if (entry->me_key == NULL || entry->me_key == key)
494 return entry;
495
496 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
497 i = (i << 2) + i + perturb + 1;
498 entry = &table[i & mask];
499 if (entry->me_key == NULL || entry->me_key == key)
500 return entry;
501 }
502 assert(0); /* Never reached */
503 return NULL;
504}
505
506/* Returns -1 on failure, 0 on success. */
507static int
508_PyMemoTable_ResizeTable(PyMemoTable *self, Py_ssize_t min_size)
509{
510 PyMemoEntry *oldtable = NULL;
511 PyMemoEntry *oldentry, *newentry;
512 Py_ssize_t new_size = MT_MINSIZE;
513 Py_ssize_t to_process;
514
515 assert(min_size > 0);
516
517 /* Find the smallest valid table size >= min_size. */
518 while (new_size < min_size && new_size > 0)
519 new_size <<= 1;
520 if (new_size <= 0) {
521 PyErr_NoMemory();
522 return -1;
523 }
524 /* new_size needs to be a power of two. */
525 assert((new_size & (new_size - 1)) == 0);
526
527 /* Allocate new table. */
528 oldtable = self->mt_table;
529 self->mt_table = PyMem_MALLOC(new_size * sizeof(PyMemoEntry));
530 if (self->mt_table == NULL) {
531 PyMem_FREE(oldtable);
532 PyErr_NoMemory();
533 return -1;
534 }
535 self->mt_allocated = new_size;
536 self->mt_mask = new_size - 1;
537 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
538
539 /* Copy entries from the old table. */
540 to_process = self->mt_used;
541 for (oldentry = oldtable; to_process > 0; oldentry++) {
542 if (oldentry->me_key != NULL) {
543 to_process--;
544 /* newentry is a pointer to a chunk of the new
545 mt_table, so we're setting the key:value pair
546 in-place. */
547 newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
548 newentry->me_key = oldentry->me_key;
549 newentry->me_value = oldentry->me_value;
550 }
551 }
552
553 /* Deallocate the old table. */
554 PyMem_FREE(oldtable);
555 return 0;
556}
557
558/* Returns NULL on failure, a pointer to the value otherwise. */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200559static Py_ssize_t *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000560PyMemoTable_Get(PyMemoTable *self, PyObject *key)
561{
562 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
563 if (entry->me_key == NULL)
564 return NULL;
565 return &entry->me_value;
566}
567
568/* Returns -1 on failure, 0 on success. */
569static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200570PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000571{
572 PyMemoEntry *entry;
573
574 assert(key != NULL);
575
576 entry = _PyMemoTable_Lookup(self, key);
577 if (entry->me_key != NULL) {
578 entry->me_value = value;
579 return 0;
580 }
581 Py_INCREF(key);
582 entry->me_key = key;
583 entry->me_value = value;
584 self->mt_used++;
585
586 /* If we added a key, we can safely resize. Otherwise just return!
587 * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
588 *
589 * Quadrupling the size improves average table sparseness
590 * (reducing collisions) at the cost of some memory. It also halves
591 * the number of expensive resize operations in a growing memo table.
592 *
593 * Very large memo tables (over 50K items) use doubling instead.
594 * This may help applications with severe memory constraints.
595 */
596 if (!(self->mt_used * 3 >= (self->mt_mask + 1) * 2))
597 return 0;
598 return _PyMemoTable_ResizeTable(self,
599 (self->mt_used > 50000 ? 2 : 4) * self->mt_used);
600}
601
602#undef MT_MINSIZE
603#undef PERTURB_SHIFT
604
605/*************************************************************************/
606
607/* Helpers for creating the argument tuple passed to functions. This has the
Victor Stinner121aab42011-09-29 23:40:53 +0200608 performance advantage of calling PyTuple_New() only once.
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000609
610 XXX(avassalotti): Inline directly in _Pickler_FastCall() and
611 _Unpickler_FastCall(). */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000612#define ARG_TUP(self, obj) do { \
613 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
614 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
615 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
616 } \
617 else { \
618 Py_DECREF((obj)); \
619 } \
620 } while (0)
621
622#define FREE_ARG_TUP(self) do { \
623 if ((self)->arg->ob_refcnt > 1) \
624 Py_CLEAR((self)->arg); \
625 } while (0)
626
627/* A temporary cleaner API for fast single argument function call.
628
629 XXX: Does caching the argument tuple provides any real performance benefits?
630
631 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
632 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
633 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
634 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
635 (i.e, call PyTuple_New() and store the returned value in an array), to save
636 one second (wall clock time). Either ways, the loading time a pickle stream
637 large enough to generate this number of calls would be massively
638 overwhelmed by other factors, like I/O throughput, the GC traversal and
639 object allocation overhead. So, I really doubt these functions provide any
640 real benefits.
641
642 On the other hand, oprofile reports that pickle spends a lot of time in
643 these functions. But, that is probably more related to the function call
644 overhead, than the argument tuple allocation.
645
646 XXX: And, what is the reference behavior of these? Steal, borrow? At first
647 glance, it seems to steal the reference of 'arg' and borrow the reference
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000648 of 'func'. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000649static PyObject *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000650_Pickler_FastCall(PicklerObject *self, PyObject *func, PyObject *arg)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000651{
652 PyObject *result = NULL;
653
654 ARG_TUP(self, arg);
655 if (self->arg) {
656 result = PyObject_Call(func, self->arg, NULL);
657 FREE_ARG_TUP(self);
658 }
659 return result;
660}
661
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000662static int
663_Pickler_ClearBuffer(PicklerObject *self)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000664{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000665 Py_CLEAR(self->output_buffer);
666 self->output_buffer =
667 PyBytes_FromStringAndSize(NULL, self->max_output_len);
668 if (self->output_buffer == NULL)
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000669 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000670 self->output_len = 0;
671 return 0;
672}
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000673
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000674static PyObject *
675_Pickler_GetString(PicklerObject *self)
676{
677 PyObject *output_buffer = self->output_buffer;
678
679 assert(self->output_buffer != NULL);
680 self->output_buffer = NULL;
681 /* Resize down to exact size */
682 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
683 return NULL;
684 return output_buffer;
685}
686
687static int
688_Pickler_FlushToFile(PicklerObject *self)
689{
690 PyObject *output, *result;
691
692 assert(self->write != NULL);
693
694 output = _Pickler_GetString(self);
695 if (output == NULL)
696 return -1;
697
698 result = _Pickler_FastCall(self, self->write, output);
699 Py_XDECREF(result);
700 return (result == NULL) ? -1 : 0;
701}
702
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200703static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000704_Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t n)
705{
706 Py_ssize_t i, required;
707 char *buffer;
708
709 assert(s != NULL);
710
711 required = self->output_len + n;
712 if (required > self->max_output_len) {
713 if (self->write != NULL && required > MAX_WRITE_BUF_SIZE) {
714 /* XXX This reallocates a new buffer every time, which is a bit
715 wasteful. */
716 if (_Pickler_FlushToFile(self) < 0)
717 return -1;
718 if (_Pickler_ClearBuffer(self) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000719 return -1;
720 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000721 if (self->write != NULL && n > MAX_WRITE_BUF_SIZE) {
722 /* we already flushed above, so the buffer is empty */
723 PyObject *result;
724 /* XXX we could spare an intermediate copy and pass
725 a memoryview instead */
726 PyObject *output = PyBytes_FromStringAndSize(s, n);
727 if (s == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000728 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000729 result = _Pickler_FastCall(self, self->write, output);
730 Py_XDECREF(result);
731 return (result == NULL) ? -1 : 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000732 }
733 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000734 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
735 PyErr_NoMemory();
736 return -1;
737 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200738 self->max_output_len = (self->output_len + n) / 2 * 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000739 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
740 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000741 }
742 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000743 buffer = PyBytes_AS_STRING(self->output_buffer);
744 if (n < 8) {
745 /* This is faster than memcpy when the string is short. */
746 for (i = 0; i < n; i++) {
747 buffer[self->output_len + i] = s[i];
748 }
749 }
750 else {
751 memcpy(buffer + self->output_len, s, n);
752 }
753 self->output_len += n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000754 return n;
755}
756
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000757static PicklerObject *
758_Pickler_New(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000759{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000760 PicklerObject *self;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000761
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000762 self = PyObject_GC_New(PicklerObject, &Pickler_Type);
763 if (self == NULL)
764 return NULL;
765
766 self->pers_func = NULL;
767 self->arg = NULL;
768 self->write = NULL;
769 self->proto = 0;
770 self->bin = 0;
771 self->fast = 0;
772 self->fast_nesting = 0;
773 self->fix_imports = 0;
774 self->fast_memo = NULL;
775
776 self->memo = PyMemoTable_New();
777 if (self->memo == NULL) {
778 Py_DECREF(self);
779 return NULL;
780 }
781 self->max_output_len = WRITE_BUF_SIZE;
782 self->output_len = 0;
783 self->output_buffer = PyBytes_FromStringAndSize(NULL,
784 self->max_output_len);
785 if (self->output_buffer == NULL) {
786 Py_DECREF(self);
787 return NULL;
788 }
789 return self;
790}
791
792static int
793_Pickler_SetProtocol(PicklerObject *self, PyObject *proto_obj,
794 PyObject *fix_imports_obj)
795{
796 long proto = 0;
797 int fix_imports;
798
799 if (proto_obj == NULL || proto_obj == Py_None)
800 proto = DEFAULT_PROTOCOL;
801 else {
802 proto = PyLong_AsLong(proto_obj);
803 if (proto == -1 && PyErr_Occurred())
804 return -1;
805 }
806 if (proto < 0)
807 proto = HIGHEST_PROTOCOL;
808 if (proto > HIGHEST_PROTOCOL) {
809 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
810 HIGHEST_PROTOCOL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000811 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000812 }
813 fix_imports = PyObject_IsTrue(fix_imports_obj);
814 if (fix_imports == -1)
815 return -1;
Victor Stinner121aab42011-09-29 23:40:53 +0200816
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000817 self->proto = proto;
818 self->bin = proto > 0;
819 self->fix_imports = fix_imports && proto < 3;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000820
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000821 return 0;
822}
823
824/* Returns -1 (with an exception set) on failure, 0 on success. This may
825 be called once on a freshly created Pickler. */
826static int
827_Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
828{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200829 _Py_IDENTIFIER(write);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000830 assert(file != NULL);
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +0200831 self->write = _PyObject_GetAttrId(file, &PyId_write);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000832 if (self->write == NULL) {
833 if (PyErr_ExceptionMatches(PyExc_AttributeError))
834 PyErr_SetString(PyExc_TypeError,
835 "file must have a 'write' attribute");
836 return -1;
837 }
838
839 return 0;
840}
841
842/* See documentation for _Pickler_FastCall(). */
843static PyObject *
844_Unpickler_FastCall(UnpicklerObject *self, PyObject *func, PyObject *arg)
845{
846 PyObject *result = NULL;
847
848 ARG_TUP(self, arg);
849 if (self->arg) {
850 result = PyObject_Call(func, self->arg, NULL);
851 FREE_ARG_TUP(self);
852 }
853 return result;
854}
855
856/* Returns the size of the input on success, -1 on failure. This takes its
857 own reference to `input`. */
858static Py_ssize_t
859_Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
860{
861 if (self->buffer.buf != NULL)
862 PyBuffer_Release(&self->buffer);
863 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
864 return -1;
865 self->input_buffer = self->buffer.buf;
866 self->input_len = self->buffer.len;
867 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000868 self->prefetched_idx = self->input_len;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000869 return self->input_len;
870}
871
Antoine Pitrou04248a82010-10-12 20:51:21 +0000872static int
873_Unpickler_SkipConsumed(UnpicklerObject *self)
874{
875 Py_ssize_t consumed = self->next_read_idx - self->prefetched_idx;
876
877 if (consumed > 0) {
878 PyObject *r;
879 assert(self->peek); /* otherwise we did something wrong */
880 /* This makes an useless copy... */
881 r = PyObject_CallFunction(self->read, "n", consumed);
882 if (r == NULL)
883 return -1;
884 Py_DECREF(r);
885 self->prefetched_idx = self->next_read_idx;
886 }
887 return 0;
888}
889
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000890static const Py_ssize_t READ_WHOLE_LINE = -1;
891
892/* If reading from a file, we need to only pull the bytes we need, since there
893 may be multiple pickle objects arranged contiguously in the same input
894 buffer.
895
896 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
897 bytes from the input stream/buffer.
898
899 Update the unpickler's input buffer with the newly-read data. Returns -1 on
900 failure; on success, returns the number of bytes read from the file.
901
902 On success, self->input_len will be 0; this is intentional so that when
903 unpickling from a file, the "we've run out of data" code paths will trigger,
904 causing the Unpickler to go back to the file for more data. Use the returned
905 size to tell you how much data you can process. */
906static Py_ssize_t
907_Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
908{
909 PyObject *data;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000910 Py_ssize_t read_size, prefetched_size = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000911
912 assert(self->read != NULL);
Victor Stinner121aab42011-09-29 23:40:53 +0200913
Antoine Pitrou04248a82010-10-12 20:51:21 +0000914 if (_Unpickler_SkipConsumed(self) < 0)
915 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000916
917 if (n == READ_WHOLE_LINE)
918 data = PyObject_Call(self->readline, empty_tuple, NULL);
919 else {
920 PyObject *len = PyLong_FromSsize_t(n);
921 if (len == NULL)
922 return -1;
923 data = _Unpickler_FastCall(self, self->read, len);
924 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000925 if (data == NULL)
926 return -1;
927
Antoine Pitrou04248a82010-10-12 20:51:21 +0000928 /* Prefetch some data without advancing the file pointer, if possible */
929 if (self->peek) {
930 PyObject *len, *prefetched;
931 len = PyLong_FromSsize_t(PREFETCH);
932 if (len == NULL) {
933 Py_DECREF(data);
934 return -1;
935 }
936 prefetched = _Unpickler_FastCall(self, self->peek, len);
937 if (prefetched == NULL) {
938 if (PyErr_ExceptionMatches(PyExc_NotImplementedError)) {
939 /* peek() is probably not supported by the given file object */
940 PyErr_Clear();
941 Py_CLEAR(self->peek);
942 }
943 else {
944 Py_DECREF(data);
945 return -1;
946 }
947 }
948 else {
949 assert(PyBytes_Check(prefetched));
950 prefetched_size = PyBytes_GET_SIZE(prefetched);
951 PyBytes_ConcatAndDel(&data, prefetched);
952 if (data == NULL)
953 return -1;
954 }
955 }
956
957 read_size = _Unpickler_SetStringInput(self, data) - prefetched_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000958 Py_DECREF(data);
Antoine Pitrou04248a82010-10-12 20:51:21 +0000959 self->prefetched_idx = read_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000960 return read_size;
961}
962
963/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
964
965 This should be used for all data reads, rather than accessing the unpickler's
966 input buffer directly. This method deals correctly with reading from input
967 streams, which the input buffer doesn't deal with.
968
969 Note that when reading from a file-like object, self->next_read_idx won't
970 be updated (it should remain at 0 for the entire unpickling process). You
971 should use this function's return value to know how many bytes you can
972 consume.
973
974 Returns -1 (with an exception set) on failure. On success, return the
975 number of chars read. */
976static Py_ssize_t
977_Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n)
978{
Antoine Pitrou04248a82010-10-12 20:51:21 +0000979 Py_ssize_t num_read;
980
Antoine Pitrou04248a82010-10-12 20:51:21 +0000981 if (self->next_read_idx + n <= self->input_len) {
982 *s = self->input_buffer + self->next_read_idx;
983 self->next_read_idx += n;
984 return n;
985 }
986 if (!self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000987 PyErr_Format(PyExc_EOFError, "Ran out of input");
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000988 return -1;
989 }
Antoine Pitrou04248a82010-10-12 20:51:21 +0000990 num_read = _Unpickler_ReadFromFile(self, n);
991 if (num_read < 0)
992 return -1;
993 if (num_read < n) {
994 PyErr_Format(PyExc_EOFError, "Ran out of input");
995 return -1;
996 }
997 *s = self->input_buffer;
998 self->next_read_idx = n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000999 return n;
1000}
1001
1002static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001003_Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1004 char **result)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001005{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001006 char *input_line = PyMem_Realloc(self->input_line, len + 1);
1007 if (input_line == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001008 return -1;
1009
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001010 memcpy(input_line, line, len);
1011 input_line[len] = '\0';
1012 self->input_line = input_line;
1013 *result = self->input_line;
1014 return len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001015}
1016
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001017/* Read a line from the input stream/buffer. If we run off the end of the input
1018 before hitting \n, return the data we found.
1019
1020 Returns the number of chars read, or -1 on failure. */
1021static Py_ssize_t
1022_Unpickler_Readline(UnpicklerObject *self, char **result)
1023{
1024 Py_ssize_t i, num_read;
1025
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001026 for (i = self->next_read_idx; i < self->input_len; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001027 if (self->input_buffer[i] == '\n') {
1028 char *line_start = self->input_buffer + self->next_read_idx;
1029 num_read = i - self->next_read_idx + 1;
1030 self->next_read_idx = i + 1;
1031 return _Unpickler_CopyLine(self, line_start, num_read, result);
1032 }
1033 }
1034 if (self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001035 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1036 if (num_read < 0)
1037 return -1;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001038 self->next_read_idx = num_read;
Antoine Pitrouf6c7a852011-08-11 21:04:02 +02001039 return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001040 }
Victor Stinner121aab42011-09-29 23:40:53 +02001041
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001042 /* If we get here, we've run off the end of the input string. Return the
1043 remaining string and let the caller figure it out. */
1044 *result = self->input_buffer + self->next_read_idx;
1045 num_read = i - self->next_read_idx;
1046 self->next_read_idx = i;
1047 return num_read;
1048}
1049
1050/* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1051 will be modified in place. */
1052static int
1053_Unpickler_ResizeMemoList(UnpicklerObject *self, Py_ssize_t new_size)
1054{
1055 Py_ssize_t i;
1056 PyObject **memo;
1057
1058 assert(new_size > self->memo_size);
1059
1060 memo = PyMem_REALLOC(self->memo, new_size * sizeof(PyObject *));
1061 if (memo == NULL) {
1062 PyErr_NoMemory();
1063 return -1;
1064 }
1065 self->memo = memo;
1066 for (i = self->memo_size; i < new_size; i++)
1067 self->memo[i] = NULL;
1068 self->memo_size = new_size;
1069 return 0;
1070}
1071
1072/* Returns NULL if idx is out of bounds. */
1073static PyObject *
1074_Unpickler_MemoGet(UnpicklerObject *self, Py_ssize_t idx)
1075{
1076 if (idx < 0 || idx >= self->memo_size)
1077 return NULL;
1078
1079 return self->memo[idx];
1080}
1081
1082/* Returns -1 (with an exception set) on failure, 0 on success.
1083 This takes its own reference to `value`. */
1084static int
1085_Unpickler_MemoPut(UnpicklerObject *self, Py_ssize_t idx, PyObject *value)
1086{
1087 PyObject *old_item;
1088
1089 if (idx >= self->memo_size) {
1090 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1091 return -1;
1092 assert(idx < self->memo_size);
1093 }
1094 Py_INCREF(value);
1095 old_item = self->memo[idx];
1096 self->memo[idx] = value;
1097 Py_XDECREF(old_item);
1098 return 0;
1099}
1100
1101static PyObject **
1102_Unpickler_NewMemo(Py_ssize_t new_size)
1103{
1104 PyObject **memo = PyMem_MALLOC(new_size * sizeof(PyObject *));
1105 if (memo == NULL)
1106 return NULL;
1107 memset(memo, 0, new_size * sizeof(PyObject *));
1108 return memo;
1109}
1110
1111/* Free the unpickler's memo, taking care to decref any items left in it. */
1112static void
1113_Unpickler_MemoCleanup(UnpicklerObject *self)
1114{
1115 Py_ssize_t i;
1116 PyObject **memo = self->memo;
1117
1118 if (self->memo == NULL)
1119 return;
1120 self->memo = NULL;
1121 i = self->memo_size;
1122 while (--i >= 0) {
1123 Py_XDECREF(memo[i]);
1124 }
1125 PyMem_FREE(memo);
1126}
1127
1128static UnpicklerObject *
1129_Unpickler_New(void)
1130{
1131 UnpicklerObject *self;
1132
1133 self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1134 if (self == NULL)
1135 return NULL;
1136
1137 self->stack = (Pdata *)Pdata_New();
1138 if (self->stack == NULL) {
1139 Py_DECREF(self);
1140 return NULL;
1141 }
1142 memset(&self->buffer, 0, sizeof(Py_buffer));
1143
1144 self->memo_size = 32;
1145 self->memo = _Unpickler_NewMemo(self->memo_size);
1146 if (self->memo == NULL) {
1147 Py_DECREF(self);
1148 return NULL;
1149 }
1150
1151 self->arg = NULL;
1152 self->pers_func = NULL;
1153 self->input_buffer = NULL;
1154 self->input_line = NULL;
1155 self->input_len = 0;
1156 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001157 self->prefetched_idx = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001158 self->read = NULL;
1159 self->readline = NULL;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001160 self->peek = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001161 self->encoding = NULL;
1162 self->errors = NULL;
1163 self->marks = NULL;
1164 self->num_marks = 0;
1165 self->marks_size = 0;
1166 self->proto = 0;
1167 self->fix_imports = 0;
1168
1169 return self;
1170}
1171
1172/* Returns -1 (with an exception set) on failure, 0 on success. This may
1173 be called once on a freshly created Pickler. */
1174static int
1175_Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1176{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001177 _Py_IDENTIFIER(peek);
1178 _Py_IDENTIFIER(read);
1179 _Py_IDENTIFIER(readline);
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001180
1181 self->peek = _PyObject_GetAttrId(file, &PyId_peek);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001182 if (self->peek == NULL) {
1183 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1184 PyErr_Clear();
1185 else
1186 return -1;
1187 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001188 self->read = _PyObject_GetAttrId(file, &PyId_read);
1189 self->readline = _PyObject_GetAttrId(file, &PyId_readline);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001190 if (self->readline == NULL || self->read == NULL) {
1191 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1192 PyErr_SetString(PyExc_TypeError,
1193 "file must have 'read' and 'readline' attributes");
1194 Py_CLEAR(self->read);
1195 Py_CLEAR(self->readline);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001196 Py_CLEAR(self->peek);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001197 return -1;
1198 }
1199 return 0;
1200}
1201
1202/* Returns -1 (with an exception set) on failure, 0 on success. This may
1203 be called once on a freshly created Pickler. */
1204static int
1205_Unpickler_SetInputEncoding(UnpicklerObject *self,
1206 const char *encoding,
1207 const char *errors)
1208{
1209 if (encoding == NULL)
1210 encoding = "ASCII";
1211 if (errors == NULL)
1212 errors = "strict";
1213
1214 self->encoding = strdup(encoding);
1215 self->errors = strdup(errors);
1216 if (self->encoding == NULL || self->errors == NULL) {
1217 PyErr_NoMemory();
1218 return -1;
1219 }
1220 return 0;
1221}
1222
1223/* Generate a GET opcode for an object stored in the memo. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001224static int
1225memo_get(PicklerObject *self, PyObject *key)
1226{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001227 Py_ssize_t *value;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001228 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001229 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001230
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001231 value = PyMemoTable_Get(self->memo, key);
1232 if (value == NULL) {
1233 PyErr_SetObject(PyExc_KeyError, key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001234 return -1;
1235 }
1236
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001237 if (!self->bin) {
1238 pdata[0] = GET;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001239 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1240 "%" PY_FORMAT_SIZE_T "d\n", *value);
1241 len = strlen(pdata);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001242 }
1243 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001244 if (*value < 256) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001245 pdata[0] = BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001246 pdata[1] = (unsigned char)(*value & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001247 len = 2;
1248 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001249 else if (*value <= 0xffffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001250 pdata[0] = LONG_BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001251 pdata[1] = (unsigned char)(*value & 0xff);
1252 pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1253 pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1254 pdata[4] = (unsigned char)((*value >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001255 len = 5;
1256 }
1257 else { /* unlikely */
1258 PyErr_SetString(PicklingError,
1259 "memo id too large for LONG_BINGET");
1260 return -1;
1261 }
1262 }
1263
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001264 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001265 return -1;
1266
1267 return 0;
1268}
1269
1270/* Store an object in the memo, assign it a new unique ID based on the number
1271 of objects currently stored in the memo and generate a PUT opcode. */
1272static int
1273memo_put(PicklerObject *self, PyObject *obj)
1274{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001275 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001276 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001277 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001278 int status = 0;
1279
1280 if (self->fast)
1281 return 0;
1282
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001283 x = PyMemoTable_Size(self->memo);
1284 if (PyMemoTable_Set(self->memo, obj, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001285 goto error;
1286
1287 if (!self->bin) {
1288 pdata[0] = PUT;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001289 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1290 "%" PY_FORMAT_SIZE_T "d\n", x);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001291 len = strlen(pdata);
1292 }
1293 else {
1294 if (x < 256) {
1295 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00001296 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001297 len = 2;
1298 }
1299 else if (x <= 0xffffffffL) {
1300 pdata[0] = LONG_BINPUT;
1301 pdata[1] = (unsigned char)(x & 0xff);
1302 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1303 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1304 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1305 len = 5;
1306 }
1307 else { /* unlikely */
1308 PyErr_SetString(PicklingError,
1309 "memo id too large for LONG_BINPUT");
1310 return -1;
1311 }
1312 }
1313
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001314 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001315 goto error;
1316
1317 if (0) {
1318 error:
1319 status = -1;
1320 }
1321
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001322 return status;
1323}
1324
1325static PyObject *
1326whichmodule(PyObject *global, PyObject *global_name)
1327{
1328 Py_ssize_t i, j;
1329 static PyObject *module_str = NULL;
1330 static PyObject *main_str = NULL;
1331 PyObject *module_name;
1332 PyObject *modules_dict;
1333 PyObject *module;
1334 PyObject *obj;
1335
1336 if (module_str == NULL) {
1337 module_str = PyUnicode_InternFromString("__module__");
1338 if (module_str == NULL)
1339 return NULL;
1340 main_str = PyUnicode_InternFromString("__main__");
1341 if (main_str == NULL)
1342 return NULL;
1343 }
1344
1345 module_name = PyObject_GetAttr(global, module_str);
1346
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00001347 /* In some rare cases (e.g., bound methods of extension types),
1348 __module__ can be None. If it is so, then search sys.modules
1349 for the module of global. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001350 if (module_name == Py_None) {
1351 Py_DECREF(module_name);
1352 goto search;
1353 }
1354
1355 if (module_name) {
1356 return module_name;
1357 }
1358 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1359 PyErr_Clear();
1360 else
1361 return NULL;
1362
1363 search:
1364 modules_dict = PySys_GetObject("modules");
1365 if (modules_dict == NULL)
1366 return NULL;
1367
1368 i = 0;
1369 module_name = NULL;
1370 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
Mark Dickinson211c6252009-02-01 10:28:51 +00001371 if (PyObject_RichCompareBool(module_name, main_str, Py_EQ) == 1)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001372 continue;
1373
1374 obj = PyObject_GetAttr(module, global_name);
1375 if (obj == NULL) {
1376 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1377 PyErr_Clear();
1378 else
1379 return NULL;
1380 continue;
1381 }
1382
1383 if (obj != global) {
1384 Py_DECREF(obj);
1385 continue;
1386 }
1387
1388 Py_DECREF(obj);
1389 break;
1390 }
1391
1392 /* If no module is found, use __main__. */
1393 if (!j) {
1394 module_name = main_str;
1395 }
1396
1397 Py_INCREF(module_name);
1398 return module_name;
1399}
1400
1401/* fast_save_enter() and fast_save_leave() are guards against recursive
1402 objects when Pickler is used with the "fast mode" (i.e., with object
1403 memoization disabled). If the nesting of a list or dict object exceed
1404 FAST_NESTING_LIMIT, these guards will start keeping an internal
1405 reference to the seen list or dict objects and check whether these objects
1406 are recursive. These are not strictly necessary, since save() has a
1407 hard-coded recursion limit, but they give a nicer error message than the
1408 typical RuntimeError. */
1409static int
1410fast_save_enter(PicklerObject *self, PyObject *obj)
1411{
1412 /* if fast_nesting < 0, we're doing an error exit. */
1413 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1414 PyObject *key = NULL;
1415 if (self->fast_memo == NULL) {
1416 self->fast_memo = PyDict_New();
1417 if (self->fast_memo == NULL) {
1418 self->fast_nesting = -1;
1419 return 0;
1420 }
1421 }
1422 key = PyLong_FromVoidPtr(obj);
1423 if (key == NULL)
1424 return 0;
1425 if (PyDict_GetItem(self->fast_memo, key)) {
1426 Py_DECREF(key);
1427 PyErr_Format(PyExc_ValueError,
1428 "fast mode: can't pickle cyclic objects "
1429 "including object type %.200s at %p",
1430 obj->ob_type->tp_name, obj);
1431 self->fast_nesting = -1;
1432 return 0;
1433 }
1434 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
1435 Py_DECREF(key);
1436 self->fast_nesting = -1;
1437 return 0;
1438 }
1439 Py_DECREF(key);
1440 }
1441 return 1;
1442}
1443
1444static int
1445fast_save_leave(PicklerObject *self, PyObject *obj)
1446{
1447 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
1448 PyObject *key = PyLong_FromVoidPtr(obj);
1449 if (key == NULL)
1450 return 0;
1451 if (PyDict_DelItem(self->fast_memo, key) < 0) {
1452 Py_DECREF(key);
1453 return 0;
1454 }
1455 Py_DECREF(key);
1456 }
1457 return 1;
1458}
1459
1460static int
1461save_none(PicklerObject *self, PyObject *obj)
1462{
1463 const char none_op = NONE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001464 if (_Pickler_Write(self, &none_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001465 return -1;
1466
1467 return 0;
1468}
1469
1470static int
1471save_bool(PicklerObject *self, PyObject *obj)
1472{
1473 static const char *buf[2] = { FALSE, TRUE };
1474 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
1475 int p = (obj == Py_True);
1476
1477 if (self->proto >= 2) {
1478 const char bool_op = p ? NEWTRUE : NEWFALSE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001479 if (_Pickler_Write(self, &bool_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001480 return -1;
1481 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001482 else if (_Pickler_Write(self, buf[p], len[p]) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001483 return -1;
1484
1485 return 0;
1486}
1487
1488static int
1489save_int(PicklerObject *self, long x)
1490{
1491 char pdata[32];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001492 Py_ssize_t len = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001493
1494 if (!self->bin
1495#if SIZEOF_LONG > 4
1496 || x > 0x7fffffffL || x < -0x80000000L
1497#endif
1498 ) {
1499 /* Text-mode pickle, or long too big to fit in the 4-byte
1500 * signed BININT format: store as a string.
1501 */
Mark Dickinson8dd05142009-01-20 20:43:58 +00001502 pdata[0] = LONG; /* use LONG for consistency with pickle.py */
1503 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ldL\n", x);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001504 if (_Pickler_Write(self, pdata, strlen(pdata)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001505 return -1;
1506 }
1507 else {
1508 /* Binary pickle and x fits in a signed 4-byte int. */
1509 pdata[1] = (unsigned char)(x & 0xff);
1510 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1511 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1512 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1513
1514 if ((pdata[4] == 0) && (pdata[3] == 0)) {
1515 if (pdata[2] == 0) {
1516 pdata[0] = BININT1;
1517 len = 2;
1518 }
1519 else {
1520 pdata[0] = BININT2;
1521 len = 3;
1522 }
1523 }
1524 else {
1525 pdata[0] = BININT;
1526 len = 5;
1527 }
1528
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001529 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001530 return -1;
1531 }
1532
1533 return 0;
1534}
1535
1536static int
1537save_long(PicklerObject *self, PyObject *obj)
1538{
1539 PyObject *repr = NULL;
1540 Py_ssize_t size;
1541 long val = PyLong_AsLong(obj);
1542 int status = 0;
1543
1544 const char long_op = LONG;
1545
1546 if (val == -1 && PyErr_Occurred()) {
1547 /* out of range for int pickling */
1548 PyErr_Clear();
1549 }
Antoine Pitroue58bffb2011-08-13 20:40:32 +02001550 else
1551#if SIZEOF_LONG > 4
1552 if (val <= 0x7fffffffL && val >= -0x80000000L)
1553#endif
1554 return save_int(self, val);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001555
1556 if (self->proto >= 2) {
1557 /* Linear-time pickling. */
1558 size_t nbits;
1559 size_t nbytes;
1560 unsigned char *pdata;
1561 char header[5];
1562 int i;
1563 int sign = _PyLong_Sign(obj);
1564
1565 if (sign == 0) {
1566 header[0] = LONG1;
1567 header[1] = 0; /* It's 0 -- an empty bytestring. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001568 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001569 goto error;
1570 return 0;
1571 }
1572 nbits = _PyLong_NumBits(obj);
1573 if (nbits == (size_t)-1 && PyErr_Occurred())
1574 goto error;
1575 /* How many bytes do we need? There are nbits >> 3 full
1576 * bytes of data, and nbits & 7 leftover bits. If there
1577 * are any leftover bits, then we clearly need another
1578 * byte. Wnat's not so obvious is that we *probably*
1579 * need another byte even if there aren't any leftovers:
1580 * the most-significant bit of the most-significant byte
1581 * acts like a sign bit, and it's usually got a sense
1582 * opposite of the one we need. The exception is longs
1583 * of the form -(2**(8*j-1)) for j > 0. Such a long is
1584 * its own 256's-complement, so has the right sign bit
1585 * even without the extra byte. That's a pain to check
1586 * for in advance, though, so we always grab an extra
1587 * byte at the start, and cut it back later if possible.
1588 */
1589 nbytes = (nbits >> 3) + 1;
1590 if (nbytes > INT_MAX) {
1591 PyErr_SetString(PyExc_OverflowError,
1592 "long too large to pickle");
1593 goto error;
1594 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001595 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001596 if (repr == NULL)
1597 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001598 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001599 i = _PyLong_AsByteArray((PyLongObject *)obj,
1600 pdata, nbytes,
1601 1 /* little endian */ , 1 /* signed */ );
1602 if (i < 0)
1603 goto error;
1604 /* If the long is negative, this may be a byte more than
1605 * needed. This is so iff the MSB is all redundant sign
1606 * bits.
1607 */
1608 if (sign < 0 &&
Victor Stinner121aab42011-09-29 23:40:53 +02001609 nbytes > 1 &&
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001610 pdata[nbytes - 1] == 0xff &&
1611 (pdata[nbytes - 2] & 0x80) != 0) {
1612 nbytes--;
1613 }
1614
1615 if (nbytes < 256) {
1616 header[0] = LONG1;
1617 header[1] = (unsigned char)nbytes;
1618 size = 2;
1619 }
1620 else {
1621 header[0] = LONG4;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001622 size = (Py_ssize_t) nbytes;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001623 for (i = 1; i < 5; i++) {
1624 header[i] = (unsigned char)(size & 0xff);
1625 size >>= 8;
1626 }
1627 size = 5;
1628 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001629 if (_Pickler_Write(self, header, size) < 0 ||
1630 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001631 goto error;
1632 }
1633 else {
1634 char *string;
1635
Mark Dickinson8dd05142009-01-20 20:43:58 +00001636 /* proto < 2: write the repr and newline. This is quadratic-time (in
1637 the number of digits), in both directions. We add a trailing 'L'
1638 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001639
1640 repr = PyObject_Repr(obj);
1641 if (repr == NULL)
1642 goto error;
1643
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001644 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001645 if (string == NULL)
1646 goto error;
1647
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001648 if (_Pickler_Write(self, &long_op, 1) < 0 ||
1649 _Pickler_Write(self, string, size) < 0 ||
1650 _Pickler_Write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001651 goto error;
1652 }
1653
1654 if (0) {
1655 error:
1656 status = -1;
1657 }
1658 Py_XDECREF(repr);
1659
1660 return status;
1661}
1662
1663static int
1664save_float(PicklerObject *self, PyObject *obj)
1665{
1666 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1667
1668 if (self->bin) {
1669 char pdata[9];
1670 pdata[0] = BINFLOAT;
1671 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1672 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001673 if (_Pickler_Write(self, pdata, 9) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001674 return -1;
Victor Stinner121aab42011-09-29 23:40:53 +02001675 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001676 else {
Eric Smith0923d1d2009-04-16 20:16:10 +00001677 int result = -1;
1678 char *buf = NULL;
1679 char op = FLOAT;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001680
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001681 if (_Pickler_Write(self, &op, 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001682 goto done;
1683
Mark Dickinson3e09f432009-04-17 08:41:23 +00001684 buf = PyOS_double_to_string(x, 'g', 17, 0, NULL);
Eric Smith0923d1d2009-04-16 20:16:10 +00001685 if (!buf) {
1686 PyErr_NoMemory();
1687 goto done;
1688 }
1689
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001690 if (_Pickler_Write(self, buf, strlen(buf)) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001691 goto done;
1692
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001693 if (_Pickler_Write(self, "\n", 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001694 goto done;
1695
1696 result = 0;
1697done:
1698 PyMem_Free(buf);
1699 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001700 }
1701
1702 return 0;
1703}
1704
1705static int
1706save_bytes(PicklerObject *self, PyObject *obj)
1707{
1708 if (self->proto < 3) {
1709 /* Older pickle protocols do not have an opcode for pickling bytes
1710 objects. Therefore, we need to fake the copy protocol (i.e.,
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001711 the __reduce__ method) to permit bytes object unpickling.
1712
1713 Here we use a hack to be compatible with Python 2. Since in Python
1714 2 'bytes' is just an alias for 'str' (which has different
1715 parameters than the actual bytes object), we use codecs.encode
1716 to create the appropriate 'str' object when unpickled using
1717 Python 2 *and* the appropriate 'bytes' object when unpickled
1718 using Python 3. Again this is a hack and we don't need to do this
1719 with newer protocols. */
1720 static PyObject *codecs_encode = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001721 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001722 int status;
1723
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001724 if (codecs_encode == NULL) {
1725 PyObject *codecs_module = PyImport_ImportModule("codecs");
1726 if (codecs_module == NULL) {
1727 return -1;
1728 }
1729 codecs_encode = PyObject_GetAttrString(codecs_module, "encode");
1730 Py_DECREF(codecs_module);
1731 if (codecs_encode == NULL) {
1732 return -1;
1733 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001734 }
1735
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001736 if (PyBytes_GET_SIZE(obj) == 0) {
1737 reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
1738 }
1739 else {
1740 static PyObject *latin1 = NULL;
1741 PyObject *unicode_str =
1742 PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
1743 PyBytes_GET_SIZE(obj),
1744 "strict");
1745 if (unicode_str == NULL)
1746 return -1;
1747 if (latin1 == NULL) {
1748 latin1 = PyUnicode_InternFromString("latin1");
1749 if (latin1 == NULL)
1750 return -1;
1751 }
1752 reduce_value = Py_BuildValue("(O(OO))",
1753 codecs_encode, unicode_str, latin1);
1754 Py_DECREF(unicode_str);
1755 }
1756
1757 if (reduce_value == NULL)
1758 return -1;
1759
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001760 /* save_reduce() will memoize the object automatically. */
1761 status = save_reduce(self, reduce_value, obj);
1762 Py_DECREF(reduce_value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001763 return status;
1764 }
1765 else {
1766 Py_ssize_t size;
1767 char header[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001768 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001769
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001770 size = PyBytes_GET_SIZE(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001771 if (size < 0)
1772 return -1;
1773
1774 if (size < 256) {
1775 header[0] = SHORT_BINBYTES;
1776 header[1] = (unsigned char)size;
1777 len = 2;
1778 }
1779 else if (size <= 0xffffffffL) {
1780 header[0] = BINBYTES;
1781 header[1] = (unsigned char)(size & 0xff);
1782 header[2] = (unsigned char)((size >> 8) & 0xff);
1783 header[3] = (unsigned char)((size >> 16) & 0xff);
1784 header[4] = (unsigned char)((size >> 24) & 0xff);
1785 len = 5;
1786 }
1787 else {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001788 PyErr_SetString(PyExc_OverflowError,
1789 "cannot serialize a bytes object larger than 4GB");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001790 return -1; /* string too large */
1791 }
1792
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001793 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001794 return -1;
1795
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001796 if (_Pickler_Write(self, PyBytes_AS_STRING(obj), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001797 return -1;
1798
1799 if (memo_put(self, obj) < 0)
1800 return -1;
1801
1802 return 0;
1803 }
1804}
1805
1806/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1807 backslash and newline characters to \uXXXX escapes. */
1808static PyObject *
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001809raw_unicode_escape(PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001810{
1811 PyObject *repr, *result;
1812 char *p;
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001813 Py_ssize_t i, size, expandsize;
1814 void *data;
1815 unsigned int kind;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001816
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001817 if (PyUnicode_READY(obj))
1818 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001819
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001820 size = PyUnicode_GET_LENGTH(obj);
1821 data = PyUnicode_DATA(obj);
1822 kind = PyUnicode_KIND(obj);
1823 if (kind == PyUnicode_4BYTE_KIND)
1824 expandsize = 10;
1825 else
1826 expandsize = 6;
Victor Stinner121aab42011-09-29 23:40:53 +02001827
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001828 if (size > PY_SSIZE_T_MAX / expandsize)
1829 return PyErr_NoMemory();
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001830 repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001831 if (repr == NULL)
1832 return NULL;
1833 if (size == 0)
1834 goto done;
1835
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001836 p = PyByteArray_AS_STRING(repr);
1837 for (i=0; i < size; i++) {
1838 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001839 /* Map 32-bit characters to '\Uxxxxxxxx' */
1840 if (ch >= 0x10000) {
1841 *p++ = '\\';
1842 *p++ = 'U';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001843 *p++ = Py_hexdigits[(ch >> 28) & 0xf];
1844 *p++ = Py_hexdigits[(ch >> 24) & 0xf];
1845 *p++ = Py_hexdigits[(ch >> 20) & 0xf];
1846 *p++ = Py_hexdigits[(ch >> 16) & 0xf];
1847 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
1848 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
1849 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
1850 *p++ = Py_hexdigits[ch & 15];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001851 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001852 /* Map 16-bit characters to '\uxxxx' */
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001853 else if (ch >= 256 || ch == '\\' || ch == '\n') {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001854 *p++ = '\\';
1855 *p++ = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001856 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
1857 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
1858 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
1859 *p++ = Py_hexdigits[ch & 15];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001860 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001861 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001862 else
1863 *p++ = (char) ch;
1864 }
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001865 size = p - PyByteArray_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001866
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001867done:
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001868 result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001869 Py_DECREF(repr);
1870 return result;
1871}
1872
1873static int
1874save_unicode(PicklerObject *self, PyObject *obj)
1875{
1876 Py_ssize_t size;
1877 PyObject *encoded = NULL;
1878
1879 if (self->bin) {
1880 char pdata[5];
1881
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001882 encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001883 if (encoded == NULL)
1884 goto error;
1885
1886 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001887 if (size > 0xffffffffL) {
1888 PyErr_SetString(PyExc_OverflowError,
1889 "cannot serialize a string larger than 4GB");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001890 goto error; /* string too large */
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001891 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001892
1893 pdata[0] = BINUNICODE;
1894 pdata[1] = (unsigned char)(size & 0xff);
1895 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1896 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1897 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1898
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001899 if (_Pickler_Write(self, pdata, 5) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001900 goto error;
1901
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001902 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001903 goto error;
1904 }
1905 else {
1906 const char unicode_op = UNICODE;
1907
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001908 encoded = raw_unicode_escape(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001909 if (encoded == NULL)
1910 goto error;
1911
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001912 if (_Pickler_Write(self, &unicode_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001913 goto error;
1914
1915 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001916 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001917 goto error;
1918
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001919 if (_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001920 goto error;
1921 }
1922 if (memo_put(self, obj) < 0)
1923 goto error;
1924
1925 Py_DECREF(encoded);
1926 return 0;
1927
1928 error:
1929 Py_XDECREF(encoded);
1930 return -1;
1931}
1932
1933/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1934static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001935store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001936{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001937 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001938
1939 assert(PyTuple_Size(t) == len);
1940
1941 for (i = 0; i < len; i++) {
1942 PyObject *element = PyTuple_GET_ITEM(t, i);
1943
1944 if (element == NULL)
1945 return -1;
1946 if (save(self, element, 0) < 0)
1947 return -1;
1948 }
1949
1950 return 0;
1951}
1952
1953/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1954 * used across protocols to minimize the space needed to pickle them.
1955 * Tuples are also the only builtin immutable type that can be recursive
1956 * (a tuple can be reached from itself), and that requires some subtle
1957 * magic so that it works in all cases. IOW, this is a long routine.
1958 */
1959static int
1960save_tuple(PicklerObject *self, PyObject *obj)
1961{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001962 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001963
1964 const char mark_op = MARK;
1965 const char tuple_op = TUPLE;
1966 const char pop_op = POP;
1967 const char pop_mark_op = POP_MARK;
1968 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
1969
1970 if ((len = PyTuple_Size(obj)) < 0)
1971 return -1;
1972
1973 if (len == 0) {
1974 char pdata[2];
1975
1976 if (self->proto) {
1977 pdata[0] = EMPTY_TUPLE;
1978 len = 1;
1979 }
1980 else {
1981 pdata[0] = MARK;
1982 pdata[1] = TUPLE;
1983 len = 2;
1984 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001985 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001986 return -1;
1987 return 0;
1988 }
1989
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001990 /* The tuple isn't in the memo now. If it shows up there after
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001991 * saving the tuple elements, the tuple must be recursive, in
1992 * which case we'll pop everything we put on the stack, and fetch
1993 * its value from the memo.
1994 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001995 if (len <= 3 && self->proto >= 2) {
1996 /* Use TUPLE{1,2,3} opcodes. */
1997 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001998 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001999
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002000 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002001 /* pop the len elements */
2002 for (i = 0; i < len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002003 if (_Pickler_Write(self, &pop_op, 1) < 0)
2004 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002005 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002006 if (memo_get(self, obj) < 0)
2007 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002008
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002009 return 0;
2010 }
2011 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002012 if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2013 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002014 }
2015 goto memoize;
2016 }
2017
2018 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2019 * Generate MARK e1 e2 ... TUPLE
2020 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002021 if (_Pickler_Write(self, &mark_op, 1) < 0)
2022 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002023
2024 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002025 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002026
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002027 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002028 /* pop the stack stuff we pushed */
2029 if (self->bin) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002030 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2031 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002032 }
2033 else {
2034 /* Note that we pop one more than len, to remove
2035 * the MARK too.
2036 */
2037 for (i = 0; i <= len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002038 if (_Pickler_Write(self, &pop_op, 1) < 0)
2039 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002040 }
2041 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002042 if (memo_get(self, obj) < 0)
2043 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002044
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002045 return 0;
2046 }
2047 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002048 if (_Pickler_Write(self, &tuple_op, 1) < 0)
2049 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002050 }
2051
2052 memoize:
2053 if (memo_put(self, obj) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002054 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002055
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002056 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002057}
2058
2059/* iter is an iterator giving items, and we batch up chunks of
2060 * MARK item item ... item APPENDS
2061 * opcode sequences. Calling code should have arranged to first create an
2062 * empty list, or list-like object, for the APPENDS to operate on.
2063 * Returns 0 on success, <0 on error.
2064 */
2065static int
2066batch_list(PicklerObject *self, PyObject *iter)
2067{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002068 PyObject *obj = NULL;
2069 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002070 int i, n;
2071
2072 const char mark_op = MARK;
2073 const char append_op = APPEND;
2074 const char appends_op = APPENDS;
2075
2076 assert(iter != NULL);
2077
2078 /* XXX: I think this function could be made faster by avoiding the
2079 iterator interface and fetching objects directly from list using
2080 PyList_GET_ITEM.
2081 */
2082
2083 if (self->proto == 0) {
2084 /* APPENDS isn't available; do one at a time. */
2085 for (;;) {
2086 obj = PyIter_Next(iter);
2087 if (obj == NULL) {
2088 if (PyErr_Occurred())
2089 return -1;
2090 break;
2091 }
2092 i = save(self, obj, 0);
2093 Py_DECREF(obj);
2094 if (i < 0)
2095 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002096 if (_Pickler_Write(self, &append_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002097 return -1;
2098 }
2099 return 0;
2100 }
2101
2102 /* proto > 0: write in batches of BATCHSIZE. */
2103 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002104 /* Get first item */
2105 firstitem = PyIter_Next(iter);
2106 if (firstitem == NULL) {
2107 if (PyErr_Occurred())
2108 goto error;
2109
2110 /* nothing more to add */
2111 break;
2112 }
2113
2114 /* Try to get a second item */
2115 obj = PyIter_Next(iter);
2116 if (obj == NULL) {
2117 if (PyErr_Occurred())
2118 goto error;
2119
2120 /* Only one item to write */
2121 if (save(self, firstitem, 0) < 0)
2122 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002123 if (_Pickler_Write(self, &append_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002124 goto error;
2125 Py_CLEAR(firstitem);
2126 break;
2127 }
2128
2129 /* More than one item to write */
2130
2131 /* Pump out MARK, items, APPENDS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002132 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002133 goto error;
2134
2135 if (save(self, firstitem, 0) < 0)
2136 goto error;
2137 Py_CLEAR(firstitem);
2138 n = 1;
2139
2140 /* Fetch and save up to BATCHSIZE items */
2141 while (obj) {
2142 if (save(self, obj, 0) < 0)
2143 goto error;
2144 Py_CLEAR(obj);
2145 n += 1;
2146
2147 if (n == BATCHSIZE)
2148 break;
2149
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002150 obj = PyIter_Next(iter);
2151 if (obj == NULL) {
2152 if (PyErr_Occurred())
2153 goto error;
2154 break;
2155 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002156 }
2157
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002158 if (_Pickler_Write(self, &appends_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002159 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002160
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002161 } while (n == BATCHSIZE);
2162 return 0;
2163
2164 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002165 Py_XDECREF(firstitem);
2166 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002167 return -1;
2168}
2169
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002170/* This is a variant of batch_list() above, specialized for lists (with no
2171 * support for list subclasses). Like batch_list(), we batch up chunks of
2172 * MARK item item ... item APPENDS
2173 * opcode sequences. Calling code should have arranged to first create an
2174 * empty list, or list-like object, for the APPENDS to operate on.
2175 * Returns 0 on success, -1 on error.
2176 *
2177 * This version is considerably faster than batch_list(), if less general.
2178 *
2179 * Note that this only works for protocols > 0.
2180 */
2181static int
2182batch_list_exact(PicklerObject *self, PyObject *obj)
2183{
2184 PyObject *item = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002185 Py_ssize_t this_batch, total;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002186
2187 const char append_op = APPEND;
2188 const char appends_op = APPENDS;
2189 const char mark_op = MARK;
2190
2191 assert(obj != NULL);
2192 assert(self->proto > 0);
2193 assert(PyList_CheckExact(obj));
2194
2195 if (PyList_GET_SIZE(obj) == 1) {
2196 item = PyList_GET_ITEM(obj, 0);
2197 if (save(self, item, 0) < 0)
2198 return -1;
2199 if (_Pickler_Write(self, &append_op, 1) < 0)
2200 return -1;
2201 return 0;
2202 }
2203
2204 /* Write in batches of BATCHSIZE. */
2205 total = 0;
2206 do {
2207 this_batch = 0;
2208 if (_Pickler_Write(self, &mark_op, 1) < 0)
2209 return -1;
2210 while (total < PyList_GET_SIZE(obj)) {
2211 item = PyList_GET_ITEM(obj, total);
2212 if (save(self, item, 0) < 0)
2213 return -1;
2214 total++;
2215 if (++this_batch == BATCHSIZE)
2216 break;
2217 }
2218 if (_Pickler_Write(self, &appends_op, 1) < 0)
2219 return -1;
2220
2221 } while (total < PyList_GET_SIZE(obj));
2222
2223 return 0;
2224}
2225
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002226static int
2227save_list(PicklerObject *self, PyObject *obj)
2228{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002229 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002230 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002231 int status = 0;
2232
2233 if (self->fast && !fast_save_enter(self, obj))
2234 goto error;
2235
2236 /* Create an empty list. */
2237 if (self->bin) {
2238 header[0] = EMPTY_LIST;
2239 len = 1;
2240 }
2241 else {
2242 header[0] = MARK;
2243 header[1] = LIST;
2244 len = 2;
2245 }
2246
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002247 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002248 goto error;
2249
2250 /* Get list length, and bow out early if empty. */
2251 if ((len = PyList_Size(obj)) < 0)
2252 goto error;
2253
2254 if (memo_put(self, obj) < 0)
2255 goto error;
2256
2257 if (len != 0) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002258 /* Materialize the list elements. */
2259 if (PyList_CheckExact(obj) && self->proto > 0) {
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002260 if (Py_EnterRecursiveCall(" while pickling an object"))
2261 goto error;
2262 status = batch_list_exact(self, obj);
2263 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002264 } else {
2265 PyObject *iter = PyObject_GetIter(obj);
2266 if (iter == NULL)
2267 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002268
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002269 if (Py_EnterRecursiveCall(" while pickling an object")) {
2270 Py_DECREF(iter);
2271 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002272 }
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002273 status = batch_list(self, iter);
2274 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002275 Py_DECREF(iter);
2276 }
2277 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002278 if (0) {
2279 error:
2280 status = -1;
2281 }
2282
2283 if (self->fast && !fast_save_leave(self, obj))
2284 status = -1;
2285
2286 return status;
2287}
2288
2289/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
2290 * MARK key value ... key value SETITEMS
2291 * opcode sequences. Calling code should have arranged to first create an
2292 * empty dict, or dict-like object, for the SETITEMS to operate on.
2293 * Returns 0 on success, <0 on error.
2294 *
2295 * This is very much like batch_list(). The difference between saving
2296 * elements directly, and picking apart two-tuples, is so long-winded at
2297 * the C level, though, that attempts to combine these routines were too
2298 * ugly to bear.
2299 */
2300static int
2301batch_dict(PicklerObject *self, PyObject *iter)
2302{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002303 PyObject *obj = NULL;
2304 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002305 int i, n;
2306
2307 const char mark_op = MARK;
2308 const char setitem_op = SETITEM;
2309 const char setitems_op = SETITEMS;
2310
2311 assert(iter != NULL);
2312
2313 if (self->proto == 0) {
2314 /* SETITEMS isn't available; do one at a time. */
2315 for (;;) {
2316 obj = PyIter_Next(iter);
2317 if (obj == NULL) {
2318 if (PyErr_Occurred())
2319 return -1;
2320 break;
2321 }
2322 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2323 PyErr_SetString(PyExc_TypeError, "dict items "
2324 "iterator must return 2-tuples");
2325 return -1;
2326 }
2327 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
2328 if (i >= 0)
2329 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
2330 Py_DECREF(obj);
2331 if (i < 0)
2332 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002333 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002334 return -1;
2335 }
2336 return 0;
2337 }
2338
2339 /* proto > 0: write in batches of BATCHSIZE. */
2340 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002341 /* Get first item */
2342 firstitem = PyIter_Next(iter);
2343 if (firstitem == NULL) {
2344 if (PyErr_Occurred())
2345 goto error;
2346
2347 /* nothing more to add */
2348 break;
2349 }
2350 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
2351 PyErr_SetString(PyExc_TypeError, "dict items "
2352 "iterator must return 2-tuples");
2353 goto error;
2354 }
2355
2356 /* Try to get a second item */
2357 obj = PyIter_Next(iter);
2358 if (obj == NULL) {
2359 if (PyErr_Occurred())
2360 goto error;
2361
2362 /* Only one item to write */
2363 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2364 goto error;
2365 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2366 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002367 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002368 goto error;
2369 Py_CLEAR(firstitem);
2370 break;
2371 }
2372
2373 /* More than one item to write */
2374
2375 /* Pump out MARK, items, SETITEMS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002376 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002377 goto error;
2378
2379 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2380 goto error;
2381 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2382 goto error;
2383 Py_CLEAR(firstitem);
2384 n = 1;
2385
2386 /* Fetch and save up to BATCHSIZE items */
2387 while (obj) {
2388 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2389 PyErr_SetString(PyExc_TypeError, "dict items "
2390 "iterator must return 2-tuples");
2391 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002392 }
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002393 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
2394 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
2395 goto error;
2396 Py_CLEAR(obj);
2397 n += 1;
2398
2399 if (n == BATCHSIZE)
2400 break;
2401
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002402 obj = PyIter_Next(iter);
2403 if (obj == NULL) {
2404 if (PyErr_Occurred())
2405 goto error;
2406 break;
2407 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002408 }
2409
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002410 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002411 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002412
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002413 } while (n == BATCHSIZE);
2414 return 0;
2415
2416 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002417 Py_XDECREF(firstitem);
2418 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002419 return -1;
2420}
2421
Collin Winter5c9b02d2009-05-25 05:43:30 +00002422/* This is a variant of batch_dict() above that specializes for dicts, with no
2423 * support for dict subclasses. Like batch_dict(), we batch up chunks of
2424 * MARK key value ... key value SETITEMS
2425 * opcode sequences. Calling code should have arranged to first create an
2426 * empty dict, or dict-like object, for the SETITEMS to operate on.
2427 * Returns 0 on success, -1 on error.
2428 *
2429 * Note that this currently doesn't work for protocol 0.
2430 */
2431static int
2432batch_dict_exact(PicklerObject *self, PyObject *obj)
2433{
2434 PyObject *key = NULL, *value = NULL;
2435 int i;
2436 Py_ssize_t dict_size, ppos = 0;
2437
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00002438 const char mark_op = MARK;
2439 const char setitem_op = SETITEM;
2440 const char setitems_op = SETITEMS;
Collin Winter5c9b02d2009-05-25 05:43:30 +00002441
2442 assert(obj != NULL);
2443 assert(self->proto > 0);
2444
2445 dict_size = PyDict_Size(obj);
2446
2447 /* Special-case len(d) == 1 to save space. */
2448 if (dict_size == 1) {
2449 PyDict_Next(obj, &ppos, &key, &value);
2450 if (save(self, key, 0) < 0)
2451 return -1;
2452 if (save(self, value, 0) < 0)
2453 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002454 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002455 return -1;
2456 return 0;
2457 }
2458
2459 /* Write in batches of BATCHSIZE. */
2460 do {
2461 i = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002462 if (_Pickler_Write(self, &mark_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002463 return -1;
2464 while (PyDict_Next(obj, &ppos, &key, &value)) {
2465 if (save(self, key, 0) < 0)
2466 return -1;
2467 if (save(self, value, 0) < 0)
2468 return -1;
2469 if (++i == BATCHSIZE)
2470 break;
2471 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002472 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002473 return -1;
2474 if (PyDict_Size(obj) != dict_size) {
2475 PyErr_Format(
2476 PyExc_RuntimeError,
2477 "dictionary changed size during iteration");
2478 return -1;
2479 }
2480
2481 } while (i == BATCHSIZE);
2482 return 0;
2483}
2484
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002485static int
2486save_dict(PicklerObject *self, PyObject *obj)
2487{
2488 PyObject *items, *iter;
2489 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002490 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002491 int status = 0;
2492
2493 if (self->fast && !fast_save_enter(self, obj))
2494 goto error;
2495
2496 /* Create an empty dict. */
2497 if (self->bin) {
2498 header[0] = EMPTY_DICT;
2499 len = 1;
2500 }
2501 else {
2502 header[0] = MARK;
2503 header[1] = DICT;
2504 len = 2;
2505 }
2506
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002507 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002508 goto error;
2509
2510 /* Get dict size, and bow out early if empty. */
2511 if ((len = PyDict_Size(obj)) < 0)
2512 goto error;
2513
2514 if (memo_put(self, obj) < 0)
2515 goto error;
2516
2517 if (len != 0) {
2518 /* Save the dict items. */
Collin Winter5c9b02d2009-05-25 05:43:30 +00002519 if (PyDict_CheckExact(obj) && self->proto > 0) {
2520 /* We can take certain shortcuts if we know this is a dict and
2521 not a dict subclass. */
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002522 if (Py_EnterRecursiveCall(" while pickling an object"))
2523 goto error;
2524 status = batch_dict_exact(self, obj);
2525 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002526 } else {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02002527 _Py_IDENTIFIER(items);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002528
2529 items = _PyObject_CallMethodId(obj, &PyId_items, "()");
Collin Winter5c9b02d2009-05-25 05:43:30 +00002530 if (items == NULL)
2531 goto error;
2532 iter = PyObject_GetIter(items);
2533 Py_DECREF(items);
2534 if (iter == NULL)
2535 goto error;
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002536 if (Py_EnterRecursiveCall(" while pickling an object")) {
2537 Py_DECREF(iter);
2538 goto error;
2539 }
Collin Winter5c9b02d2009-05-25 05:43:30 +00002540 status = batch_dict(self, iter);
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002541 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002542 Py_DECREF(iter);
2543 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002544 }
2545
2546 if (0) {
2547 error:
2548 status = -1;
2549 }
2550
2551 if (self->fast && !fast_save_leave(self, obj))
2552 status = -1;
2553
2554 return status;
2555}
2556
2557static int
2558save_global(PicklerObject *self, PyObject *obj, PyObject *name)
2559{
2560 static PyObject *name_str = NULL;
2561 PyObject *global_name = NULL;
2562 PyObject *module_name = NULL;
2563 PyObject *module = NULL;
2564 PyObject *cls;
2565 int status = 0;
2566
2567 const char global_op = GLOBAL;
2568
2569 if (name_str == NULL) {
2570 name_str = PyUnicode_InternFromString("__name__");
2571 if (name_str == NULL)
2572 goto error;
2573 }
2574
2575 if (name) {
2576 global_name = name;
2577 Py_INCREF(global_name);
2578 }
2579 else {
2580 global_name = PyObject_GetAttr(obj, name_str);
2581 if (global_name == NULL)
2582 goto error;
2583 }
2584
2585 module_name = whichmodule(obj, global_name);
2586 if (module_name == NULL)
2587 goto error;
2588
2589 /* XXX: Change to use the import C API directly with level=0 to disallow
2590 relative imports.
2591
2592 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
2593 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
2594 custom import functions (IMHO, this would be a nice security
2595 feature). The import C API would need to be extended to support the
2596 extra parameters of __import__ to fix that. */
2597 module = PyImport_Import(module_name);
2598 if (module == NULL) {
2599 PyErr_Format(PicklingError,
2600 "Can't pickle %R: import of module %R failed",
2601 obj, module_name);
2602 goto error;
2603 }
2604 cls = PyObject_GetAttr(module, global_name);
2605 if (cls == NULL) {
2606 PyErr_Format(PicklingError,
2607 "Can't pickle %R: attribute lookup %S.%S failed",
2608 obj, module_name, global_name);
2609 goto error;
2610 }
2611 if (cls != obj) {
2612 Py_DECREF(cls);
2613 PyErr_Format(PicklingError,
2614 "Can't pickle %R: it's not the same object as %S.%S",
2615 obj, module_name, global_name);
2616 goto error;
2617 }
2618 Py_DECREF(cls);
2619
2620 if (self->proto >= 2) {
2621 /* See whether this is in the extension registry, and if
2622 * so generate an EXT opcode.
2623 */
2624 PyObject *code_obj; /* extension code as Python object */
2625 long code; /* extension code as C value */
2626 char pdata[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002627 Py_ssize_t n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002628
2629 PyTuple_SET_ITEM(two_tuple, 0, module_name);
2630 PyTuple_SET_ITEM(two_tuple, 1, global_name);
2631 code_obj = PyDict_GetItem(extension_registry, two_tuple);
2632 /* The object is not registered in the extension registry.
2633 This is the most likely code path. */
2634 if (code_obj == NULL)
2635 goto gen_global;
2636
2637 /* XXX: pickle.py doesn't check neither the type, nor the range
2638 of the value returned by the extension_registry. It should for
2639 consistency. */
2640
2641 /* Verify code_obj has the right type and value. */
2642 if (!PyLong_Check(code_obj)) {
2643 PyErr_Format(PicklingError,
2644 "Can't pickle %R: extension code %R isn't an integer",
2645 obj, code_obj);
2646 goto error;
2647 }
2648 code = PyLong_AS_LONG(code_obj);
2649 if (code <= 0 || code > 0x7fffffffL) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002650 if (!PyErr_Occurred())
2651 PyErr_Format(PicklingError,
2652 "Can't pickle %R: extension code %ld is out of range",
2653 obj, code);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002654 goto error;
2655 }
2656
2657 /* Generate an EXT opcode. */
2658 if (code <= 0xff) {
2659 pdata[0] = EXT1;
2660 pdata[1] = (unsigned char)code;
2661 n = 2;
2662 }
2663 else if (code <= 0xffff) {
2664 pdata[0] = EXT2;
2665 pdata[1] = (unsigned char)(code & 0xff);
2666 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2667 n = 3;
2668 }
2669 else {
2670 pdata[0] = EXT4;
2671 pdata[1] = (unsigned char)(code & 0xff);
2672 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2673 pdata[3] = (unsigned char)((code >> 16) & 0xff);
2674 pdata[4] = (unsigned char)((code >> 24) & 0xff);
2675 n = 5;
2676 }
2677
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002678 if (_Pickler_Write(self, pdata, n) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002679 goto error;
2680 }
2681 else {
2682 /* Generate a normal global opcode if we are using a pickle
2683 protocol <= 2, or if the object is not registered in the
2684 extension registry. */
2685 PyObject *encoded;
2686 PyObject *(*unicode_encoder)(PyObject *);
2687
2688 gen_global:
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002689 if (_Pickler_Write(self, &global_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002690 goto error;
2691
2692 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
2693 the module name and the global name using UTF-8. We do so only when
2694 we are using the pickle protocol newer than version 3. This is to
2695 ensure compatibility with older Unpickler running on Python 2.x. */
2696 if (self->proto >= 3) {
2697 unicode_encoder = PyUnicode_AsUTF8String;
2698 }
2699 else {
2700 unicode_encoder = PyUnicode_AsASCIIString;
2701 }
2702
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002703 /* For protocol < 3 and if the user didn't request against doing so,
2704 we convert module names to the old 2.x module names. */
2705 if (self->fix_imports) {
2706 PyObject *key;
2707 PyObject *item;
2708
2709 key = PyTuple_Pack(2, module_name, global_name);
2710 if (key == NULL)
2711 goto error;
2712 item = PyDict_GetItemWithError(name_mapping_3to2, key);
2713 Py_DECREF(key);
2714 if (item) {
2715 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
2716 PyErr_Format(PyExc_RuntimeError,
2717 "_compat_pickle.REVERSE_NAME_MAPPING values "
2718 "should be 2-tuples, not %.200s",
2719 Py_TYPE(item)->tp_name);
2720 goto error;
2721 }
2722 Py_CLEAR(module_name);
2723 Py_CLEAR(global_name);
2724 module_name = PyTuple_GET_ITEM(item, 0);
2725 global_name = PyTuple_GET_ITEM(item, 1);
2726 if (!PyUnicode_Check(module_name) ||
2727 !PyUnicode_Check(global_name)) {
2728 PyErr_Format(PyExc_RuntimeError,
2729 "_compat_pickle.REVERSE_NAME_MAPPING values "
2730 "should be pairs of str, not (%.200s, %.200s)",
2731 Py_TYPE(module_name)->tp_name,
2732 Py_TYPE(global_name)->tp_name);
2733 goto error;
2734 }
2735 Py_INCREF(module_name);
2736 Py_INCREF(global_name);
2737 }
2738 else if (PyErr_Occurred()) {
2739 goto error;
2740 }
2741
2742 item = PyDict_GetItemWithError(import_mapping_3to2, module_name);
2743 if (item) {
2744 if (!PyUnicode_Check(item)) {
2745 PyErr_Format(PyExc_RuntimeError,
2746 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
2747 "should be strings, not %.200s",
2748 Py_TYPE(item)->tp_name);
2749 goto error;
2750 }
2751 Py_CLEAR(module_name);
2752 module_name = item;
2753 Py_INCREF(module_name);
2754 }
2755 else if (PyErr_Occurred()) {
2756 goto error;
2757 }
2758 }
2759
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002760 /* Save the name of the module. */
2761 encoded = unicode_encoder(module_name);
2762 if (encoded == NULL) {
2763 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2764 PyErr_Format(PicklingError,
2765 "can't pickle module identifier '%S' using "
2766 "pickle protocol %i", module_name, self->proto);
2767 goto error;
2768 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002769 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002770 PyBytes_GET_SIZE(encoded)) < 0) {
2771 Py_DECREF(encoded);
2772 goto error;
2773 }
2774 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002775 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002776 goto error;
2777
2778 /* Save the name of the module. */
2779 encoded = unicode_encoder(global_name);
2780 if (encoded == NULL) {
2781 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2782 PyErr_Format(PicklingError,
2783 "can't pickle global identifier '%S' using "
2784 "pickle protocol %i", global_name, self->proto);
2785 goto error;
2786 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002787 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002788 PyBytes_GET_SIZE(encoded)) < 0) {
2789 Py_DECREF(encoded);
2790 goto error;
2791 }
2792 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002793 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002794 goto error;
2795
2796 /* Memoize the object. */
2797 if (memo_put(self, obj) < 0)
2798 goto error;
2799 }
2800
2801 if (0) {
2802 error:
2803 status = -1;
2804 }
2805 Py_XDECREF(module_name);
2806 Py_XDECREF(global_name);
2807 Py_XDECREF(module);
2808
2809 return status;
2810}
2811
2812static int
2813save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
2814{
2815 PyObject *pid = NULL;
2816 int status = 0;
2817
2818 const char persid_op = PERSID;
2819 const char binpersid_op = BINPERSID;
2820
2821 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002822 pid = _Pickler_FastCall(self, func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002823 if (pid == NULL)
2824 return -1;
2825
2826 if (pid != Py_None) {
2827 if (self->bin) {
2828 if (save(self, pid, 1) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002829 _Pickler_Write(self, &binpersid_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002830 goto error;
2831 }
2832 else {
2833 PyObject *pid_str = NULL;
2834 char *pid_ascii_bytes;
2835 Py_ssize_t size;
2836
2837 pid_str = PyObject_Str(pid);
2838 if (pid_str == NULL)
2839 goto error;
2840
2841 /* XXX: Should it check whether the persistent id only contains
2842 ASCII characters? And what if the pid contains embedded
2843 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00002844 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002845 Py_DECREF(pid_str);
2846 if (pid_ascii_bytes == NULL)
2847 goto error;
2848
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002849 if (_Pickler_Write(self, &persid_op, 1) < 0 ||
2850 _Pickler_Write(self, pid_ascii_bytes, size) < 0 ||
2851 _Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002852 goto error;
2853 }
2854 status = 1;
2855 }
2856
2857 if (0) {
2858 error:
2859 status = -1;
2860 }
2861 Py_XDECREF(pid);
2862
2863 return status;
2864}
2865
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002866static PyObject *
2867get_class(PyObject *obj)
2868{
2869 PyObject *cls;
2870 static PyObject *str_class;
2871
2872 if (str_class == NULL) {
2873 str_class = PyUnicode_InternFromString("__class__");
2874 if (str_class == NULL)
2875 return NULL;
2876 }
2877 cls = PyObject_GetAttr(obj, str_class);
2878 if (cls == NULL) {
2879 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2880 PyErr_Clear();
2881 cls = (PyObject *) Py_TYPE(obj);
2882 Py_INCREF(cls);
2883 }
2884 }
2885 return cls;
2886}
2887
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002888/* We're saving obj, and args is the 2-thru-5 tuple returned by the
2889 * appropriate __reduce__ method for obj.
2890 */
2891static int
2892save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
2893{
2894 PyObject *callable;
2895 PyObject *argtup;
2896 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002897 PyObject *listitems = Py_None;
2898 PyObject *dictitems = Py_None;
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002899 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002900
2901 int use_newobj = self->proto >= 2;
2902
2903 const char reduce_op = REDUCE;
2904 const char build_op = BUILD;
2905 const char newobj_op = NEWOBJ;
2906
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002907 size = PyTuple_Size(args);
2908 if (size < 2 || size > 5) {
2909 PyErr_SetString(PicklingError, "tuple returned by "
2910 "__reduce__ must contain 2 through 5 elements");
2911 return -1;
2912 }
2913
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002914 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
2915 &callable, &argtup, &state, &listitems, &dictitems))
2916 return -1;
2917
2918 if (!PyCallable_Check(callable)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002919 PyErr_SetString(PicklingError, "first item of the tuple "
2920 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002921 return -1;
2922 }
2923 if (!PyTuple_Check(argtup)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002924 PyErr_SetString(PicklingError, "second item of the tuple "
2925 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002926 return -1;
2927 }
2928
2929 if (state == Py_None)
2930 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002931
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002932 if (listitems == Py_None)
2933 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002934 else if (!PyIter_Check(listitems)) {
2935 PyErr_Format(PicklingError, "Fourth element of tuple"
2936 "returned by __reduce__ must be an iterator, not %s",
2937 Py_TYPE(listitems)->tp_name);
2938 return -1;
2939 }
2940
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002941 if (dictitems == Py_None)
2942 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002943 else if (!PyIter_Check(dictitems)) {
2944 PyErr_Format(PicklingError, "Fifth element of tuple"
2945 "returned by __reduce__ must be an iterator, not %s",
2946 Py_TYPE(dictitems)->tp_name);
2947 return -1;
2948 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002949
2950 /* Protocol 2 special case: if callable's name is __newobj__, use
2951 NEWOBJ. */
2952 if (use_newobj) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002953 static PyObject *newobj_str = NULL, *name_str = NULL;
2954 PyObject *name;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002955
2956 if (newobj_str == NULL) {
2957 newobj_str = PyUnicode_InternFromString("__newobj__");
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002958 name_str = PyUnicode_InternFromString("__name__");
2959 if (newobj_str == NULL || name_str == NULL)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002960 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002961 }
2962
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002963 name = PyObject_GetAttr(callable, name_str);
2964 if (name == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002965 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2966 PyErr_Clear();
2967 else
2968 return -1;
2969 use_newobj = 0;
2970 }
2971 else {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002972 use_newobj = PyUnicode_Check(name) &&
2973 PyUnicode_Compare(name, newobj_str) == 0;
2974 Py_DECREF(name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002975 }
2976 }
2977 if (use_newobj) {
2978 PyObject *cls;
2979 PyObject *newargtup;
2980 PyObject *obj_class;
2981 int p;
2982
2983 /* Sanity checks. */
2984 if (Py_SIZE(argtup) < 1) {
2985 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
2986 return -1;
2987 }
2988
2989 cls = PyTuple_GET_ITEM(argtup, 0);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002990 if (!PyType_Check(cls)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002991 PyErr_SetString(PicklingError, "args[0] from "
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002992 "__newobj__ args is not a type");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002993 return -1;
2994 }
2995
2996 if (obj != NULL) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002997 obj_class = get_class(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002998 p = obj_class != cls; /* true iff a problem */
2999 Py_DECREF(obj_class);
3000 if (p) {
3001 PyErr_SetString(PicklingError, "args[0] from "
3002 "__newobj__ args has the wrong class");
3003 return -1;
3004 }
3005 }
3006 /* XXX: These calls save() are prone to infinite recursion. Imagine
3007 what happen if the value returned by the __reduce__() method of
3008 some extension type contains another object of the same type. Ouch!
3009
3010 Here is a quick example, that I ran into, to illustrate what I
3011 mean:
3012
3013 >>> import pickle, copyreg
3014 >>> copyreg.dispatch_table.pop(complex)
3015 >>> pickle.dumps(1+2j)
3016 Traceback (most recent call last):
3017 ...
3018 RuntimeError: maximum recursion depth exceeded
3019
3020 Removing the complex class from copyreg.dispatch_table made the
3021 __reduce_ex__() method emit another complex object:
3022
3023 >>> (1+1j).__reduce_ex__(2)
3024 (<function __newobj__ at 0xb7b71c3c>,
3025 (<class 'complex'>, (1+1j)), None, None, None)
3026
3027 Thus when save() was called on newargstup (the 2nd item) recursion
3028 ensued. Of course, the bug was in the complex class which had a
3029 broken __getnewargs__() that emitted another complex object. But,
3030 the point, here, is it is quite easy to end up with a broken reduce
3031 function. */
3032
3033 /* Save the class and its __new__ arguments. */
3034 if (save(self, cls, 0) < 0)
3035 return -1;
3036
3037 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
3038 if (newargtup == NULL)
3039 return -1;
3040
3041 p = save(self, newargtup, 0);
3042 Py_DECREF(newargtup);
3043 if (p < 0)
3044 return -1;
3045
3046 /* Add NEWOBJ opcode. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003047 if (_Pickler_Write(self, &newobj_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003048 return -1;
3049 }
3050 else { /* Not using NEWOBJ. */
3051 if (save(self, callable, 0) < 0 ||
3052 save(self, argtup, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003053 _Pickler_Write(self, &reduce_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003054 return -1;
3055 }
3056
3057 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
3058 the caller do not want to memoize the object. Not particularly useful,
3059 but that is to mimic the behavior save_reduce() in pickle.py when
3060 obj is None. */
3061 if (obj && memo_put(self, obj) < 0)
3062 return -1;
3063
3064 if (listitems && batch_list(self, listitems) < 0)
3065 return -1;
3066
3067 if (dictitems && batch_dict(self, dictitems) < 0)
3068 return -1;
3069
3070 if (state) {
Victor Stinner121aab42011-09-29 23:40:53 +02003071 if (save(self, state, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003072 _Pickler_Write(self, &build_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003073 return -1;
3074 }
3075
3076 return 0;
3077}
3078
3079static int
3080save(PicklerObject *self, PyObject *obj, int pers_save)
3081{
3082 PyTypeObject *type;
3083 PyObject *reduce_func = NULL;
3084 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003085 int status = 0;
3086
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003087 if (Py_EnterRecursiveCall(" while pickling an object"))
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003088 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003089
3090 /* The extra pers_save argument is necessary to avoid calling save_pers()
3091 on its returned object. */
3092 if (!pers_save && self->pers_func) {
3093 /* save_pers() returns:
3094 -1 to signal an error;
3095 0 if it did nothing successfully;
3096 1 if a persistent id was saved.
3097 */
3098 if ((status = save_pers(self, obj, self->pers_func)) != 0)
3099 goto done;
3100 }
3101
3102 type = Py_TYPE(obj);
3103
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003104 /* The old cPickle had an optimization that used switch-case statement
3105 dispatching on the first letter of the type name. This has was removed
3106 since benchmarks shown that this optimization was actually slowing
3107 things down. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003108
3109 /* Atom types; these aren't memoized, so don't check the memo. */
3110
3111 if (obj == Py_None) {
3112 status = save_none(self, obj);
3113 goto done;
3114 }
3115 else if (obj == Py_False || obj == Py_True) {
3116 status = save_bool(self, obj);
3117 goto done;
3118 }
3119 else if (type == &PyLong_Type) {
3120 status = save_long(self, obj);
3121 goto done;
3122 }
3123 else if (type == &PyFloat_Type) {
3124 status = save_float(self, obj);
3125 goto done;
3126 }
3127
3128 /* Check the memo to see if it has the object. If so, generate
3129 a GET (or BINGET) opcode, instead of pickling the object
3130 once again. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003131 if (PyMemoTable_Get(self->memo, obj)) {
3132 if (memo_get(self, obj) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003133 goto error;
3134 goto done;
3135 }
3136
3137 if (type == &PyBytes_Type) {
3138 status = save_bytes(self, obj);
3139 goto done;
3140 }
3141 else if (type == &PyUnicode_Type) {
3142 status = save_unicode(self, obj);
3143 goto done;
3144 }
3145 else if (type == &PyDict_Type) {
3146 status = save_dict(self, obj);
3147 goto done;
3148 }
3149 else if (type == &PyList_Type) {
3150 status = save_list(self, obj);
3151 goto done;
3152 }
3153 else if (type == &PyTuple_Type) {
3154 status = save_tuple(self, obj);
3155 goto done;
3156 }
3157 else if (type == &PyType_Type) {
3158 status = save_global(self, obj, NULL);
3159 goto done;
3160 }
3161 else if (type == &PyFunction_Type) {
3162 status = save_global(self, obj, NULL);
3163 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
3164 /* fall back to reduce */
3165 PyErr_Clear();
3166 }
3167 else {
3168 goto done;
3169 }
3170 }
3171 else if (type == &PyCFunction_Type) {
3172 status = save_global(self, obj, NULL);
3173 goto done;
3174 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003175
3176 /* XXX: This part needs some unit tests. */
3177
3178 /* Get a reduction callable, and call it. This may come from
3179 * copyreg.dispatch_table, the object's __reduce_ex__ method,
3180 * or the object's __reduce__ method.
3181 */
3182 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
3183 if (reduce_func != NULL) {
3184 /* Here, the reference count of the reduce_func object returned by
3185 PyDict_GetItem needs to be increased to be consistent with the one
3186 returned by PyObject_GetAttr. This is allow us to blindly DECREF
3187 reduce_func at the end of the save() routine.
3188 */
3189 Py_INCREF(reduce_func);
3190 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003191 reduce_value = _Pickler_FastCall(self, reduce_func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003192 }
Antoine Pitrouffd41d92011-10-04 09:23:04 +02003193 else if (PyType_IsSubtype(type, &PyType_Type)) {
3194 status = save_global(self, obj, NULL);
3195 goto done;
3196 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003197 else {
3198 static PyObject *reduce_str = NULL;
3199 static PyObject *reduce_ex_str = NULL;
3200
3201 /* Cache the name of the reduce methods. */
3202 if (reduce_str == NULL) {
3203 reduce_str = PyUnicode_InternFromString("__reduce__");
3204 if (reduce_str == NULL)
3205 goto error;
3206 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
3207 if (reduce_ex_str == NULL)
3208 goto error;
3209 }
3210
3211 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
3212 automatically defined as __reduce__. While this is convenient, this
3213 make it impossible to know which method was actually called. Of
3214 course, this is not a big deal. But still, it would be nice to let
3215 the user know which method was called when something go
3216 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
3217 don't actually have to check for a __reduce__ method. */
3218
3219 /* Check for a __reduce_ex__ method. */
3220 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
3221 if (reduce_func != NULL) {
3222 PyObject *proto;
3223 proto = PyLong_FromLong(self->proto);
3224 if (proto != NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003225 reduce_value = _Pickler_FastCall(self, reduce_func, proto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003226 }
3227 }
3228 else {
3229 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3230 PyErr_Clear();
3231 else
3232 goto error;
3233 /* Check for a __reduce__ method. */
3234 reduce_func = PyObject_GetAttr(obj, reduce_str);
3235 if (reduce_func != NULL) {
3236 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
3237 }
3238 else {
3239 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
3240 type->tp_name, obj);
3241 goto error;
3242 }
3243 }
3244 }
3245
3246 if (reduce_value == NULL)
3247 goto error;
3248
3249 if (PyUnicode_Check(reduce_value)) {
3250 status = save_global(self, obj, reduce_value);
3251 goto done;
3252 }
3253
3254 if (!PyTuple_Check(reduce_value)) {
3255 PyErr_SetString(PicklingError,
3256 "__reduce__ must return a string or tuple");
3257 goto error;
3258 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003259
3260 status = save_reduce(self, reduce_value, obj);
3261
3262 if (0) {
3263 error:
3264 status = -1;
3265 }
3266 done:
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003267 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003268 Py_XDECREF(reduce_func);
3269 Py_XDECREF(reduce_value);
3270
3271 return status;
3272}
3273
3274static int
3275dump(PicklerObject *self, PyObject *obj)
3276{
3277 const char stop_op = STOP;
3278
3279 if (self->proto >= 2) {
3280 char header[2];
3281
3282 header[0] = PROTO;
3283 assert(self->proto >= 0 && self->proto < 256);
3284 header[1] = (unsigned char)self->proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003285 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003286 return -1;
3287 }
3288
3289 if (save(self, obj, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003290 _Pickler_Write(self, &stop_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003291 return -1;
3292
3293 return 0;
3294}
3295
3296PyDoc_STRVAR(Pickler_clear_memo_doc,
3297"clear_memo() -> None. Clears the pickler's \"memo\"."
3298"\n"
3299"The memo is the data structure that remembers which objects the\n"
3300"pickler has already seen, so that shared or recursive objects are\n"
3301"pickled by reference and not by value. This method is useful when\n"
3302"re-using picklers.");
3303
3304static PyObject *
3305Pickler_clear_memo(PicklerObject *self)
3306{
3307 if (self->memo)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003308 PyMemoTable_Clear(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003309
3310 Py_RETURN_NONE;
3311}
3312
3313PyDoc_STRVAR(Pickler_dump_doc,
3314"dump(obj) -> None. Write a pickled representation of obj to the open file.");
3315
3316static PyObject *
3317Pickler_dump(PicklerObject *self, PyObject *args)
3318{
3319 PyObject *obj;
3320
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00003321 /* Check whether the Pickler was initialized correctly (issue3664).
3322 Developers often forget to call __init__() in their subclasses, which
3323 would trigger a segfault without this check. */
3324 if (self->write == NULL) {
Victor Stinner121aab42011-09-29 23:40:53 +02003325 PyErr_Format(PicklingError,
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00003326 "Pickler.__init__() was not called by %s.__init__()",
3327 Py_TYPE(self)->tp_name);
3328 return NULL;
3329 }
3330
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003331 if (!PyArg_ParseTuple(args, "O:dump", &obj))
3332 return NULL;
3333
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003334 if (_Pickler_ClearBuffer(self) < 0)
3335 return NULL;
3336
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003337 if (dump(self, obj) < 0)
3338 return NULL;
3339
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003340 if (_Pickler_FlushToFile(self) < 0)
3341 return NULL;
3342
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003343 Py_RETURN_NONE;
3344}
3345
3346static struct PyMethodDef Pickler_methods[] = {
3347 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
3348 Pickler_dump_doc},
3349 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
3350 Pickler_clear_memo_doc},
3351 {NULL, NULL} /* sentinel */
3352};
3353
3354static void
3355Pickler_dealloc(PicklerObject *self)
3356{
3357 PyObject_GC_UnTrack(self);
3358
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003359 Py_XDECREF(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003360 Py_XDECREF(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003361 Py_XDECREF(self->pers_func);
3362 Py_XDECREF(self->arg);
3363 Py_XDECREF(self->fast_memo);
3364
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003365 PyMemoTable_Del(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003366
3367 Py_TYPE(self)->tp_free((PyObject *)self);
3368}
3369
3370static int
3371Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
3372{
3373 Py_VISIT(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003374 Py_VISIT(self->pers_func);
3375 Py_VISIT(self->arg);
3376 Py_VISIT(self->fast_memo);
3377 return 0;
3378}
3379
3380static int
3381Pickler_clear(PicklerObject *self)
3382{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003383 Py_CLEAR(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003384 Py_CLEAR(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003385 Py_CLEAR(self->pers_func);
3386 Py_CLEAR(self->arg);
3387 Py_CLEAR(self->fast_memo);
3388
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003389 if (self->memo != NULL) {
3390 PyMemoTable *memo = self->memo;
3391 self->memo = NULL;
3392 PyMemoTable_Del(memo);
3393 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003394 return 0;
3395}
3396
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003397
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003398PyDoc_STRVAR(Pickler_doc,
3399"Pickler(file, protocol=None)"
3400"\n"
3401"This takes a binary file for writing a pickle data stream.\n"
3402"\n"
3403"The optional protocol argument tells the pickler to use the\n"
3404"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
3405"protocol is 3; a backward-incompatible protocol designed for\n"
3406"Python 3.0.\n"
3407"\n"
3408"Specifying a negative protocol version selects the highest\n"
3409"protocol version supported. The higher the protocol used, the\n"
3410"more recent the version of Python needed to read the pickle\n"
3411"produced.\n"
3412"\n"
3413"The file argument must have a write() method that accepts a single\n"
3414"bytes argument. It can thus be a file object opened for binary\n"
3415"writing, a io.BytesIO instance, or any other custom object that\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003416"meets this interface.\n"
3417"\n"
3418"If fix_imports is True and protocol is less than 3, pickle will try to\n"
3419"map the new Python 3.x names to the old module names used in Python\n"
3420"2.x, so that the pickle data stream is readable with Python 2.x.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003421
3422static int
3423Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
3424{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003425 static char *kwlist[] = {"file", "protocol", "fix_imports", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003426 PyObject *file;
3427 PyObject *proto_obj = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003428 PyObject *fix_imports = Py_True;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02003429 _Py_IDENTIFIER(persistent_id);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003430
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003431 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:Pickler",
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003432 kwlist, &file, &proto_obj, &fix_imports))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003433 return -1;
3434
3435 /* In case of multiple __init__() calls, clear previous content. */
3436 if (self->write != NULL)
3437 (void)Pickler_clear(self);
3438
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003439 if (_Pickler_SetProtocol(self, proto_obj, fix_imports) < 0)
3440 return -1;
3441
3442 if (_Pickler_SetOutputStream(self, file) < 0)
3443 return -1;
3444
3445 /* memo and output_buffer may have already been created in _Pickler_New */
3446 if (self->memo == NULL) {
3447 self->memo = PyMemoTable_New();
3448 if (self->memo == NULL)
3449 return -1;
3450 }
3451 self->output_len = 0;
3452 if (self->output_buffer == NULL) {
3453 self->max_output_len = WRITE_BUF_SIZE;
3454 self->output_buffer = PyBytes_FromStringAndSize(NULL,
3455 self->max_output_len);
3456 if (self->output_buffer == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003457 return -1;
3458 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003459
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003460 self->arg = NULL;
3461 self->fast = 0;
3462 self->fast_nesting = 0;
3463 self->fast_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003464 self->pers_func = NULL;
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02003465 if (_PyObject_HasAttrId((PyObject *)self, &PyId_persistent_id)) {
3466 self->pers_func = _PyObject_GetAttrId((PyObject *)self,
3467 &PyId_persistent_id);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003468 if (self->pers_func == NULL)
3469 return -1;
3470 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003471 return 0;
3472}
3473
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003474/* Define a proxy object for the Pickler's internal memo object. This is to
3475 * avoid breaking code like:
3476 * pickler.memo.clear()
3477 * and
3478 * pickler.memo = saved_memo
3479 * Is this a good idea? Not really, but we don't want to break code that uses
3480 * it. Note that we don't implement the entire mapping API here. This is
3481 * intentional, as these should be treated as black-box implementation details.
3482 */
3483
3484typedef struct {
3485 PyObject_HEAD
3486 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
3487} PicklerMemoProxyObject;
3488
3489PyDoc_STRVAR(pmp_clear_doc,
3490"memo.clear() -> None. Remove all items from memo.");
3491
3492static PyObject *
3493pmp_clear(PicklerMemoProxyObject *self)
3494{
3495 if (self->pickler->memo)
3496 PyMemoTable_Clear(self->pickler->memo);
3497 Py_RETURN_NONE;
3498}
3499
3500PyDoc_STRVAR(pmp_copy_doc,
3501"memo.copy() -> new_memo. Copy the memo to a new object.");
3502
3503static PyObject *
3504pmp_copy(PicklerMemoProxyObject *self)
3505{
3506 Py_ssize_t i;
3507 PyMemoTable *memo;
3508 PyObject *new_memo = PyDict_New();
3509 if (new_memo == NULL)
3510 return NULL;
3511
3512 memo = self->pickler->memo;
3513 for (i = 0; i < memo->mt_allocated; ++i) {
3514 PyMemoEntry entry = memo->mt_table[i];
3515 if (entry.me_key != NULL) {
3516 int status;
3517 PyObject *key, *value;
3518
3519 key = PyLong_FromVoidPtr(entry.me_key);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003520 value = Py_BuildValue("nO", entry.me_value, entry.me_key);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003521
3522 if (key == NULL || value == NULL) {
3523 Py_XDECREF(key);
3524 Py_XDECREF(value);
3525 goto error;
3526 }
3527 status = PyDict_SetItem(new_memo, key, value);
3528 Py_DECREF(key);
3529 Py_DECREF(value);
3530 if (status < 0)
3531 goto error;
3532 }
3533 }
3534 return new_memo;
3535
3536 error:
3537 Py_XDECREF(new_memo);
3538 return NULL;
3539}
3540
3541PyDoc_STRVAR(pmp_reduce_doc,
3542"memo.__reduce__(). Pickling support.");
3543
3544static PyObject *
3545pmp_reduce(PicklerMemoProxyObject *self, PyObject *args)
3546{
3547 PyObject *reduce_value, *dict_args;
3548 PyObject *contents = pmp_copy(self);
3549 if (contents == NULL)
3550 return NULL;
3551
3552 reduce_value = PyTuple_New(2);
3553 if (reduce_value == NULL) {
3554 Py_DECREF(contents);
3555 return NULL;
3556 }
3557 dict_args = PyTuple_New(1);
3558 if (dict_args == NULL) {
3559 Py_DECREF(contents);
3560 Py_DECREF(reduce_value);
3561 return NULL;
3562 }
3563 PyTuple_SET_ITEM(dict_args, 0, contents);
3564 Py_INCREF((PyObject *)&PyDict_Type);
3565 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
3566 PyTuple_SET_ITEM(reduce_value, 1, dict_args);
3567 return reduce_value;
3568}
3569
3570static PyMethodDef picklerproxy_methods[] = {
3571 {"clear", (PyCFunction)pmp_clear, METH_NOARGS, pmp_clear_doc},
3572 {"copy", (PyCFunction)pmp_copy, METH_NOARGS, pmp_copy_doc},
3573 {"__reduce__", (PyCFunction)pmp_reduce, METH_VARARGS, pmp_reduce_doc},
3574 {NULL, NULL} /* sentinel */
3575};
3576
3577static void
3578PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
3579{
3580 PyObject_GC_UnTrack(self);
3581 Py_XDECREF(self->pickler);
3582 PyObject_GC_Del((PyObject *)self);
3583}
3584
3585static int
3586PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
3587 visitproc visit, void *arg)
3588{
3589 Py_VISIT(self->pickler);
3590 return 0;
3591}
3592
3593static int
3594PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
3595{
3596 Py_CLEAR(self->pickler);
3597 return 0;
3598}
3599
3600static PyTypeObject PicklerMemoProxyType = {
3601 PyVarObject_HEAD_INIT(NULL, 0)
3602 "_pickle.PicklerMemoProxy", /*tp_name*/
3603 sizeof(PicklerMemoProxyObject), /*tp_basicsize*/
3604 0,
3605 (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */
3606 0, /* tp_print */
3607 0, /* tp_getattr */
3608 0, /* tp_setattr */
3609 0, /* tp_compare */
3610 0, /* tp_repr */
3611 0, /* tp_as_number */
3612 0, /* tp_as_sequence */
3613 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00003614 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003615 0, /* tp_call */
3616 0, /* tp_str */
3617 PyObject_GenericGetAttr, /* tp_getattro */
3618 PyObject_GenericSetAttr, /* tp_setattro */
3619 0, /* tp_as_buffer */
3620 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3621 0, /* tp_doc */
3622 (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */
3623 (inquiry)PicklerMemoProxy_clear, /* tp_clear */
3624 0, /* tp_richcompare */
3625 0, /* tp_weaklistoffset */
3626 0, /* tp_iter */
3627 0, /* tp_iternext */
3628 picklerproxy_methods, /* tp_methods */
3629};
3630
3631static PyObject *
3632PicklerMemoProxy_New(PicklerObject *pickler)
3633{
3634 PicklerMemoProxyObject *self;
3635
3636 self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
3637 if (self == NULL)
3638 return NULL;
3639 Py_INCREF(pickler);
3640 self->pickler = pickler;
3641 PyObject_GC_Track(self);
3642 return (PyObject *)self;
3643}
3644
3645/*****************************************************************************/
3646
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003647static PyObject *
3648Pickler_get_memo(PicklerObject *self)
3649{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003650 return PicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003651}
3652
3653static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003654Pickler_set_memo(PicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003655{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003656 PyMemoTable *new_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003657
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003658 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003659 PyErr_SetString(PyExc_TypeError,
3660 "attribute deletion is not supported");
3661 return -1;
3662 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003663
3664 if (Py_TYPE(obj) == &PicklerMemoProxyType) {
3665 PicklerObject *pickler =
3666 ((PicklerMemoProxyObject *)obj)->pickler;
3667
3668 new_memo = PyMemoTable_Copy(pickler->memo);
3669 if (new_memo == NULL)
3670 return -1;
3671 }
3672 else if (PyDict_Check(obj)) {
3673 Py_ssize_t i = 0;
3674 PyObject *key, *value;
3675
3676 new_memo = PyMemoTable_New();
3677 if (new_memo == NULL)
3678 return -1;
3679
3680 while (PyDict_Next(obj, &i, &key, &value)) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003681 Py_ssize_t memo_id;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003682 PyObject *memo_obj;
3683
3684 if (!PyTuple_Check(value) || Py_SIZE(value) != 2) {
3685 PyErr_SetString(PyExc_TypeError,
3686 "'memo' values must be 2-item tuples");
3687 goto error;
3688 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003689 memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003690 if (memo_id == -1 && PyErr_Occurred())
3691 goto error;
3692 memo_obj = PyTuple_GET_ITEM(value, 1);
3693 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
3694 goto error;
3695 }
3696 }
3697 else {
3698 PyErr_Format(PyExc_TypeError,
3699 "'memo' attribute must be an PicklerMemoProxy object"
3700 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003701 return -1;
3702 }
3703
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003704 PyMemoTable_Del(self->memo);
3705 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003706
3707 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003708
3709 error:
3710 if (new_memo)
3711 PyMemoTable_Del(new_memo);
3712 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003713}
3714
3715static PyObject *
3716Pickler_get_persid(PicklerObject *self)
3717{
3718 if (self->pers_func == NULL)
3719 PyErr_SetString(PyExc_AttributeError, "persistent_id");
3720 else
3721 Py_INCREF(self->pers_func);
3722 return self->pers_func;
3723}
3724
3725static int
3726Pickler_set_persid(PicklerObject *self, PyObject *value)
3727{
3728 PyObject *tmp;
3729
3730 if (value == NULL) {
3731 PyErr_SetString(PyExc_TypeError,
3732 "attribute deletion is not supported");
3733 return -1;
3734 }
3735 if (!PyCallable_Check(value)) {
3736 PyErr_SetString(PyExc_TypeError,
3737 "persistent_id must be a callable taking one argument");
3738 return -1;
3739 }
3740
3741 tmp = self->pers_func;
3742 Py_INCREF(value);
3743 self->pers_func = value;
3744 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
3745
3746 return 0;
3747}
3748
3749static PyMemberDef Pickler_members[] = {
3750 {"bin", T_INT, offsetof(PicklerObject, bin)},
3751 {"fast", T_INT, offsetof(PicklerObject, fast)},
3752 {NULL}
3753};
3754
3755static PyGetSetDef Pickler_getsets[] = {
3756 {"memo", (getter)Pickler_get_memo,
3757 (setter)Pickler_set_memo},
3758 {"persistent_id", (getter)Pickler_get_persid,
3759 (setter)Pickler_set_persid},
3760 {NULL}
3761};
3762
3763static PyTypeObject Pickler_Type = {
3764 PyVarObject_HEAD_INIT(NULL, 0)
3765 "_pickle.Pickler" , /*tp_name*/
3766 sizeof(PicklerObject), /*tp_basicsize*/
3767 0, /*tp_itemsize*/
3768 (destructor)Pickler_dealloc, /*tp_dealloc*/
3769 0, /*tp_print*/
3770 0, /*tp_getattr*/
3771 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00003772 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003773 0, /*tp_repr*/
3774 0, /*tp_as_number*/
3775 0, /*tp_as_sequence*/
3776 0, /*tp_as_mapping*/
3777 0, /*tp_hash*/
3778 0, /*tp_call*/
3779 0, /*tp_str*/
3780 0, /*tp_getattro*/
3781 0, /*tp_setattro*/
3782 0, /*tp_as_buffer*/
3783 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3784 Pickler_doc, /*tp_doc*/
3785 (traverseproc)Pickler_traverse, /*tp_traverse*/
3786 (inquiry)Pickler_clear, /*tp_clear*/
3787 0, /*tp_richcompare*/
3788 0, /*tp_weaklistoffset*/
3789 0, /*tp_iter*/
3790 0, /*tp_iternext*/
3791 Pickler_methods, /*tp_methods*/
3792 Pickler_members, /*tp_members*/
3793 Pickler_getsets, /*tp_getset*/
3794 0, /*tp_base*/
3795 0, /*tp_dict*/
3796 0, /*tp_descr_get*/
3797 0, /*tp_descr_set*/
3798 0, /*tp_dictoffset*/
3799 (initproc)Pickler_init, /*tp_init*/
3800 PyType_GenericAlloc, /*tp_alloc*/
3801 PyType_GenericNew, /*tp_new*/
3802 PyObject_GC_Del, /*tp_free*/
3803 0, /*tp_is_gc*/
3804};
3805
Victor Stinner121aab42011-09-29 23:40:53 +02003806/* Temporary helper for calling self.find_class().
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003807
3808 XXX: It would be nice to able to avoid Python function call overhead, by
3809 using directly the C version of find_class(), when find_class() is not
3810 overridden by a subclass. Although, this could become rather hackish. A
3811 simpler optimization would be to call the C function when self is not a
3812 subclass instance. */
3813static PyObject *
3814find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
3815{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02003816 _Py_IDENTIFIER(find_class);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02003817
3818 return _PyObject_CallMethodId((PyObject *)self, &PyId_find_class, "OO",
3819 module_name, global_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003820}
3821
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003822static Py_ssize_t
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003823marker(UnpicklerObject *self)
3824{
3825 if (self->num_marks < 1) {
3826 PyErr_SetString(UnpicklingError, "could not find MARK");
3827 return -1;
3828 }
3829
3830 return self->marks[--self->num_marks];
3831}
3832
3833static int
3834load_none(UnpicklerObject *self)
3835{
3836 PDATA_APPEND(self->stack, Py_None, -1);
3837 return 0;
3838}
3839
3840static int
3841bad_readline(void)
3842{
3843 PyErr_SetString(UnpicklingError, "pickle data was truncated");
3844 return -1;
3845}
3846
3847static int
3848load_int(UnpicklerObject *self)
3849{
3850 PyObject *value;
3851 char *endptr, *s;
3852 Py_ssize_t len;
3853 long x;
3854
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003855 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003856 return -1;
3857 if (len < 2)
3858 return bad_readline();
3859
3860 errno = 0;
Victor Stinner121aab42011-09-29 23:40:53 +02003861 /* XXX: Should the base argument of strtol() be explicitly set to 10?
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003862 XXX(avassalotti): Should this uses PyOS_strtol()? */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003863 x = strtol(s, &endptr, 0);
3864
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003865 if (errno || (*endptr != '\n' && *endptr != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003866 /* Hm, maybe we've got something long. Let's try reading
3867 * it as a Python long object. */
3868 errno = 0;
3869 /* XXX: Same thing about the base here. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003870 value = PyLong_FromString(s, NULL, 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003871 if (value == NULL) {
3872 PyErr_SetString(PyExc_ValueError,
3873 "could not convert string to int");
3874 return -1;
3875 }
3876 }
3877 else {
3878 if (len == 3 && (x == 0 || x == 1)) {
3879 if ((value = PyBool_FromLong(x)) == NULL)
3880 return -1;
3881 }
3882 else {
3883 if ((value = PyLong_FromLong(x)) == NULL)
3884 return -1;
3885 }
3886 }
3887
3888 PDATA_PUSH(self->stack, value, -1);
3889 return 0;
3890}
3891
3892static int
3893load_bool(UnpicklerObject *self, PyObject *boolean)
3894{
3895 assert(boolean == Py_True || boolean == Py_False);
3896 PDATA_APPEND(self->stack, boolean, -1);
3897 return 0;
3898}
3899
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003900/* s contains x bytes of an unsigned little-endian integer. Return its value
3901 * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
3902 */
3903static Py_ssize_t
3904calc_binsize(char *bytes, int size)
3905{
3906 unsigned char *s = (unsigned char *)bytes;
3907 size_t x = 0;
3908
3909 assert(size == 4);
3910
3911 x = (size_t) s[0];
3912 x |= (size_t) s[1] << 8;
3913 x |= (size_t) s[2] << 16;
3914 x |= (size_t) s[3] << 24;
3915
3916 if (x > PY_SSIZE_T_MAX)
3917 return -1;
3918 else
3919 return (Py_ssize_t) x;
3920}
3921
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003922/* s contains x bytes of a little-endian integer. Return its value as a
3923 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
3924 * int, but when x is 4 it's a signed one. This is an historical source
3925 * of x-platform bugs.
3926 */
3927static long
3928calc_binint(char *bytes, int size)
3929{
3930 unsigned char *s = (unsigned char *)bytes;
3931 int i = size;
3932 long x = 0;
3933
3934 for (i = 0; i < size; i++) {
3935 x |= (long)s[i] << (i * 8);
3936 }
3937
3938 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
3939 * is signed, so on a box with longs bigger than 4 bytes we need
3940 * to extend a BININT's sign bit to the full width.
3941 */
3942 if (SIZEOF_LONG > 4 && size == 4) {
3943 x |= -(x & (1L << 31));
3944 }
3945
3946 return x;
3947}
3948
3949static int
3950load_binintx(UnpicklerObject *self, char *s, int size)
3951{
3952 PyObject *value;
3953 long x;
3954
3955 x = calc_binint(s, size);
3956
3957 if ((value = PyLong_FromLong(x)) == NULL)
3958 return -1;
3959
3960 PDATA_PUSH(self->stack, value, -1);
3961 return 0;
3962}
3963
3964static int
3965load_binint(UnpicklerObject *self)
3966{
3967 char *s;
3968
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003969 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003970 return -1;
3971
3972 return load_binintx(self, s, 4);
3973}
3974
3975static int
3976load_binint1(UnpicklerObject *self)
3977{
3978 char *s;
3979
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003980 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003981 return -1;
3982
3983 return load_binintx(self, s, 1);
3984}
3985
3986static int
3987load_binint2(UnpicklerObject *self)
3988{
3989 char *s;
3990
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003991 if (_Unpickler_Read(self, &s, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003992 return -1;
3993
3994 return load_binintx(self, s, 2);
3995}
3996
3997static int
3998load_long(UnpicklerObject *self)
3999{
4000 PyObject *value;
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00004001 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004002 Py_ssize_t len;
4003
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004004 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004005 return -1;
4006 if (len < 2)
4007 return bad_readline();
4008
Mark Dickinson8dd05142009-01-20 20:43:58 +00004009 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
4010 the 'L' before calling PyLong_FromString. In order to maintain
4011 compatibility with Python 3.0.0, we don't actually *require*
4012 the 'L' to be present. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004013 if (s[len-2] == 'L')
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00004014 s[len-2] = '\0';
Alexandre Vassalottie4bccb72009-01-24 01:47:57 +00004015 /* XXX: Should the base argument explicitly set to 10? */
4016 value = PyLong_FromString(s, NULL, 0);
Mark Dickinson8dd05142009-01-20 20:43:58 +00004017 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004018 return -1;
4019
4020 PDATA_PUSH(self->stack, value, -1);
4021 return 0;
4022}
4023
4024/* 'size' bytes contain the # of bytes of little-endian 256's-complement
4025 * data following.
4026 */
4027static int
4028load_counted_long(UnpicklerObject *self, int size)
4029{
4030 PyObject *value;
4031 char *nbytes;
4032 char *pdata;
4033
4034 assert(size == 1 || size == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004035 if (_Unpickler_Read(self, &nbytes, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004036 return -1;
4037
4038 size = calc_binint(nbytes, size);
4039 if (size < 0) {
4040 /* Corrupt or hostile pickle -- we never write one like this */
4041 PyErr_SetString(UnpicklingError,
4042 "LONG pickle has negative byte count");
4043 return -1;
4044 }
4045
4046 if (size == 0)
4047 value = PyLong_FromLong(0L);
4048 else {
4049 /* Read the raw little-endian bytes and convert. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004050 if (_Unpickler_Read(self, &pdata, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004051 return -1;
4052 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
4053 1 /* little endian */ , 1 /* signed */ );
4054 }
4055 if (value == NULL)
4056 return -1;
4057 PDATA_PUSH(self->stack, value, -1);
4058 return 0;
4059}
4060
4061static int
4062load_float(UnpicklerObject *self)
4063{
4064 PyObject *value;
4065 char *endptr, *s;
4066 Py_ssize_t len;
4067 double d;
4068
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004069 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004070 return -1;
4071 if (len < 2)
4072 return bad_readline();
4073
4074 errno = 0;
Mark Dickinson725bfd82009-05-03 20:33:40 +00004075 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
4076 if (d == -1.0 && PyErr_Occurred())
4077 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004078 if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004079 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
4080 return -1;
4081 }
Mark Dickinson725bfd82009-05-03 20:33:40 +00004082 value = PyFloat_FromDouble(d);
4083 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004084 return -1;
4085
4086 PDATA_PUSH(self->stack, value, -1);
4087 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004088}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004089
4090static int
4091load_binfloat(UnpicklerObject *self)
4092{
4093 PyObject *value;
4094 double x;
4095 char *s;
4096
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004097 if (_Unpickler_Read(self, &s, 8) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004098 return -1;
4099
4100 x = _PyFloat_Unpack8((unsigned char *)s, 0);
4101 if (x == -1.0 && PyErr_Occurred())
4102 return -1;
4103
4104 if ((value = PyFloat_FromDouble(x)) == NULL)
4105 return -1;
4106
4107 PDATA_PUSH(self->stack, value, -1);
4108 return 0;
4109}
4110
4111static int
4112load_string(UnpicklerObject *self)
4113{
4114 PyObject *bytes;
4115 PyObject *str = NULL;
4116 Py_ssize_t len;
4117 char *s, *p;
4118
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004119 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004120 return -1;
4121 if (len < 3)
4122 return bad_readline();
4123 if ((s = strdup(s)) == NULL) {
4124 PyErr_NoMemory();
4125 return -1;
4126 }
4127
4128 /* Strip outermost quotes */
4129 while (s[len - 1] <= ' ')
4130 len--;
4131 if (s[0] == '"' && s[len - 1] == '"') {
4132 s[len - 1] = '\0';
4133 p = s + 1;
4134 len -= 2;
4135 }
4136 else if (s[0] == '\'' && s[len - 1] == '\'') {
4137 s[len - 1] = '\0';
4138 p = s + 1;
4139 len -= 2;
4140 }
4141 else {
4142 free(s);
4143 PyErr_SetString(PyExc_ValueError, "insecure string pickle");
4144 return -1;
4145 }
4146
4147 /* Use the PyBytes API to decode the string, since that is what is used
4148 to encode, and then coerce the result to Unicode. */
4149 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
4150 free(s);
4151 if (bytes == NULL)
4152 return -1;
4153 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
4154 Py_DECREF(bytes);
4155 if (str == NULL)
4156 return -1;
4157
4158 PDATA_PUSH(self->stack, str, -1);
4159 return 0;
4160}
4161
4162static int
4163load_binbytes(UnpicklerObject *self)
4164{
4165 PyObject *bytes;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004166 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004167 char *s;
4168
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004169 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004170 return -1;
4171
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004172 x = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004173 if (x < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004174 PyErr_Format(PyExc_OverflowError,
4175 "BINBYTES exceeds system's maximum size of %zd bytes",
4176 PY_SSIZE_T_MAX
4177 );
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004178 return -1;
4179 }
4180
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004181 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004182 return -1;
4183 bytes = PyBytes_FromStringAndSize(s, x);
4184 if (bytes == NULL)
4185 return -1;
4186
4187 PDATA_PUSH(self->stack, bytes, -1);
4188 return 0;
4189}
4190
4191static int
4192load_short_binbytes(UnpicklerObject *self)
4193{
4194 PyObject *bytes;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004195 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004196 char *s;
4197
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004198 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004199 return -1;
4200
4201 x = (unsigned char)s[0];
4202
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004203 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004204 return -1;
4205
4206 bytes = PyBytes_FromStringAndSize(s, x);
4207 if (bytes == NULL)
4208 return -1;
4209
4210 PDATA_PUSH(self->stack, bytes, -1);
4211 return 0;
4212}
4213
4214static int
4215load_binstring(UnpicklerObject *self)
4216{
4217 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004218 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004219 char *s;
4220
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004221 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004222 return -1;
4223
4224 x = calc_binint(s, 4);
4225 if (x < 0) {
Victor Stinner121aab42011-09-29 23:40:53 +02004226 PyErr_SetString(UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004227 "BINSTRING pickle has negative byte count");
4228 return -1;
4229 }
4230
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004231 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004232 return -1;
4233
4234 /* Convert Python 2.x strings to unicode. */
4235 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4236 if (str == NULL)
4237 return -1;
4238
4239 PDATA_PUSH(self->stack, str, -1);
4240 return 0;
4241}
4242
4243static int
4244load_short_binstring(UnpicklerObject *self)
4245{
4246 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004247 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004248 char *s;
4249
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004250 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004251 return -1;
4252
4253 x = (unsigned char)s[0];
4254
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004255 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004256 return -1;
4257
4258 /* Convert Python 2.x strings to unicode. */
4259 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4260 if (str == NULL)
4261 return -1;
4262
4263 PDATA_PUSH(self->stack, str, -1);
4264 return 0;
4265}
4266
4267static int
4268load_unicode(UnpicklerObject *self)
4269{
4270 PyObject *str;
4271 Py_ssize_t len;
4272 char *s;
4273
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004274 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004275 return -1;
4276 if (len < 1)
4277 return bad_readline();
4278
4279 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
4280 if (str == NULL)
4281 return -1;
4282
4283 PDATA_PUSH(self->stack, str, -1);
4284 return 0;
4285}
4286
4287static int
4288load_binunicode(UnpicklerObject *self)
4289{
4290 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004291 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004292 char *s;
4293
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004294 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004295 return -1;
4296
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004297 size = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004298 if (size < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004299 PyErr_Format(PyExc_OverflowError,
4300 "BINUNICODE exceeds system's maximum size of %zd bytes",
4301 PY_SSIZE_T_MAX
4302 );
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004303 return -1;
4304 }
4305
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004306
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004307 if (_Unpickler_Read(self, &s, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004308 return -1;
4309
Victor Stinner485fb562010-04-13 11:07:24 +00004310 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004311 if (str == NULL)
4312 return -1;
4313
4314 PDATA_PUSH(self->stack, str, -1);
4315 return 0;
4316}
4317
4318static int
4319load_tuple(UnpicklerObject *self)
4320{
4321 PyObject *tuple;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004322 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004323
4324 if ((i = marker(self)) < 0)
4325 return -1;
4326
4327 tuple = Pdata_poptuple(self->stack, i);
4328 if (tuple == NULL)
4329 return -1;
4330 PDATA_PUSH(self->stack, tuple, -1);
4331 return 0;
4332}
4333
4334static int
4335load_counted_tuple(UnpicklerObject *self, int len)
4336{
4337 PyObject *tuple;
4338
4339 tuple = PyTuple_New(len);
4340 if (tuple == NULL)
4341 return -1;
4342
4343 while (--len >= 0) {
4344 PyObject *item;
4345
4346 PDATA_POP(self->stack, item);
4347 if (item == NULL)
4348 return -1;
4349 PyTuple_SET_ITEM(tuple, len, item);
4350 }
4351 PDATA_PUSH(self->stack, tuple, -1);
4352 return 0;
4353}
4354
4355static int
4356load_empty_list(UnpicklerObject *self)
4357{
4358 PyObject *list;
4359
4360 if ((list = PyList_New(0)) == NULL)
4361 return -1;
4362 PDATA_PUSH(self->stack, list, -1);
4363 return 0;
4364}
4365
4366static int
4367load_empty_dict(UnpicklerObject *self)
4368{
4369 PyObject *dict;
4370
4371 if ((dict = PyDict_New()) == NULL)
4372 return -1;
4373 PDATA_PUSH(self->stack, dict, -1);
4374 return 0;
4375}
4376
4377static int
4378load_list(UnpicklerObject *self)
4379{
4380 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004381 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004382
4383 if ((i = marker(self)) < 0)
4384 return -1;
4385
4386 list = Pdata_poplist(self->stack, i);
4387 if (list == NULL)
4388 return -1;
4389 PDATA_PUSH(self->stack, list, -1);
4390 return 0;
4391}
4392
4393static int
4394load_dict(UnpicklerObject *self)
4395{
4396 PyObject *dict, *key, *value;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004397 Py_ssize_t i, j, k;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004398
4399 if ((i = marker(self)) < 0)
4400 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004401 j = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004402
4403 if ((dict = PyDict_New()) == NULL)
4404 return -1;
4405
4406 for (k = i + 1; k < j; k += 2) {
4407 key = self->stack->data[k - 1];
4408 value = self->stack->data[k];
4409 if (PyDict_SetItem(dict, key, value) < 0) {
4410 Py_DECREF(dict);
4411 return -1;
4412 }
4413 }
4414 Pdata_clear(self->stack, i);
4415 PDATA_PUSH(self->stack, dict, -1);
4416 return 0;
4417}
4418
4419static PyObject *
4420instantiate(PyObject *cls, PyObject *args)
4421{
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004422 PyObject *result = NULL;
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02004423 _Py_IDENTIFIER(__getinitargs__);
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004424 /* Caller must assure args are a tuple. Normally, args come from
4425 Pdata_poptuple which packs objects from the top of the stack
4426 into a newly created tuple. */
4427 assert(PyTuple_Check(args));
4428 if (Py_SIZE(args) > 0 || !PyType_Check(cls) ||
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02004429 _PyObject_HasAttrId(cls, &PyId___getinitargs__)) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004430 result = PyObject_CallObject(cls, args);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004431 }
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004432 else {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02004433 _Py_IDENTIFIER(__new__);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02004434
4435 result = _PyObject_CallMethodId(cls, &PyId___new__, "O", cls);
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004436 }
4437 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004438}
4439
4440static int
4441load_obj(UnpicklerObject *self)
4442{
4443 PyObject *cls, *args, *obj = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004444 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004445
4446 if ((i = marker(self)) < 0)
4447 return -1;
4448
4449 args = Pdata_poptuple(self->stack, i + 1);
4450 if (args == NULL)
4451 return -1;
4452
4453 PDATA_POP(self->stack, cls);
4454 if (cls) {
4455 obj = instantiate(cls, args);
4456 Py_DECREF(cls);
4457 }
4458 Py_DECREF(args);
4459 if (obj == NULL)
4460 return -1;
4461
4462 PDATA_PUSH(self->stack, obj, -1);
4463 return 0;
4464}
4465
4466static int
4467load_inst(UnpicklerObject *self)
4468{
4469 PyObject *cls = NULL;
4470 PyObject *args = NULL;
4471 PyObject *obj = NULL;
4472 PyObject *module_name;
4473 PyObject *class_name;
4474 Py_ssize_t len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004475 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004476 char *s;
4477
4478 if ((i = marker(self)) < 0)
4479 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004480 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004481 return -1;
4482 if (len < 2)
4483 return bad_readline();
4484
4485 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
4486 identifiers are permitted in Python 3.0, since the INST opcode is only
4487 supported by older protocols on Python 2.x. */
4488 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
4489 if (module_name == NULL)
4490 return -1;
4491
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004492 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004493 if (len < 2)
4494 return bad_readline();
4495 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004496 if (class_name != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004497 cls = find_class(self, module_name, class_name);
4498 Py_DECREF(class_name);
4499 }
4500 }
4501 Py_DECREF(module_name);
4502
4503 if (cls == NULL)
4504 return -1;
4505
4506 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
4507 obj = instantiate(cls, args);
4508 Py_DECREF(args);
4509 }
4510 Py_DECREF(cls);
4511
4512 if (obj == NULL)
4513 return -1;
4514
4515 PDATA_PUSH(self->stack, obj, -1);
4516 return 0;
4517}
4518
4519static int
4520load_newobj(UnpicklerObject *self)
4521{
4522 PyObject *args = NULL;
4523 PyObject *clsraw = NULL;
4524 PyTypeObject *cls; /* clsraw cast to its true type */
4525 PyObject *obj;
4526
4527 /* Stack is ... cls argtuple, and we want to call
4528 * cls.__new__(cls, *argtuple).
4529 */
4530 PDATA_POP(self->stack, args);
4531 if (args == NULL)
4532 goto error;
4533 if (!PyTuple_Check(args)) {
4534 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
4535 goto error;
4536 }
4537
4538 PDATA_POP(self->stack, clsraw);
4539 cls = (PyTypeObject *)clsraw;
4540 if (cls == NULL)
4541 goto error;
4542 if (!PyType_Check(cls)) {
4543 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4544 "isn't a type object");
4545 goto error;
4546 }
4547 if (cls->tp_new == NULL) {
4548 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4549 "has NULL tp_new");
4550 goto error;
4551 }
4552
4553 /* Call __new__. */
4554 obj = cls->tp_new(cls, args, NULL);
4555 if (obj == NULL)
4556 goto error;
4557
4558 Py_DECREF(args);
4559 Py_DECREF(clsraw);
4560 PDATA_PUSH(self->stack, obj, -1);
4561 return 0;
4562
4563 error:
4564 Py_XDECREF(args);
4565 Py_XDECREF(clsraw);
4566 return -1;
4567}
4568
4569static int
4570load_global(UnpicklerObject *self)
4571{
4572 PyObject *global = NULL;
4573 PyObject *module_name;
4574 PyObject *global_name;
4575 Py_ssize_t len;
4576 char *s;
4577
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004578 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004579 return -1;
4580 if (len < 2)
4581 return bad_readline();
4582 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4583 if (!module_name)
4584 return -1;
4585
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004586 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004587 if (len < 2) {
4588 Py_DECREF(module_name);
4589 return bad_readline();
4590 }
4591 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4592 if (global_name) {
4593 global = find_class(self, module_name, global_name);
4594 Py_DECREF(global_name);
4595 }
4596 }
4597 Py_DECREF(module_name);
4598
4599 if (global == NULL)
4600 return -1;
4601 PDATA_PUSH(self->stack, global, -1);
4602 return 0;
4603}
4604
4605static int
4606load_persid(UnpicklerObject *self)
4607{
4608 PyObject *pid;
4609 Py_ssize_t len;
4610 char *s;
4611
4612 if (self->pers_func) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004613 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004614 return -1;
4615 if (len < 2)
4616 return bad_readline();
4617
4618 pid = PyBytes_FromStringAndSize(s, len - 1);
4619 if (pid == NULL)
4620 return -1;
4621
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004622 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004623 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004624 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004625 if (pid == NULL)
4626 return -1;
4627
4628 PDATA_PUSH(self->stack, pid, -1);
4629 return 0;
4630 }
4631 else {
4632 PyErr_SetString(UnpicklingError,
4633 "A load persistent id instruction was encountered,\n"
4634 "but no persistent_load function was specified.");
4635 return -1;
4636 }
4637}
4638
4639static int
4640load_binpersid(UnpicklerObject *self)
4641{
4642 PyObject *pid;
4643
4644 if (self->pers_func) {
4645 PDATA_POP(self->stack, pid);
4646 if (pid == NULL)
4647 return -1;
4648
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004649 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004650 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004651 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004652 if (pid == NULL)
4653 return -1;
4654
4655 PDATA_PUSH(self->stack, pid, -1);
4656 return 0;
4657 }
4658 else {
4659 PyErr_SetString(UnpicklingError,
4660 "A load persistent id instruction was encountered,\n"
4661 "but no persistent_load function was specified.");
4662 return -1;
4663 }
4664}
4665
4666static int
4667load_pop(UnpicklerObject *self)
4668{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004669 Py_ssize_t len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004670
4671 /* Note that we split the (pickle.py) stack into two stacks,
4672 * an object stack and a mark stack. We have to be clever and
4673 * pop the right one. We do this by looking at the top of the
Collin Winter8ca69de2009-05-26 16:53:41 +00004674 * mark stack first, and only signalling a stack underflow if
4675 * the object stack is empty and the mark stack doesn't match
4676 * our expectations.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004677 */
Collin Winter8ca69de2009-05-26 16:53:41 +00004678 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004679 self->num_marks--;
Antoine Pitrou01a15ea2010-01-07 17:57:31 +00004680 } else if (len > 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004681 len--;
4682 Py_DECREF(self->stack->data[len]);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004683 Py_SIZE(self->stack) = len;
Collin Winter8ca69de2009-05-26 16:53:41 +00004684 } else {
4685 return stack_underflow();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004686 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004687 return 0;
4688}
4689
4690static int
4691load_pop_mark(UnpicklerObject *self)
4692{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004693 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004694
4695 if ((i = marker(self)) < 0)
4696 return -1;
4697
4698 Pdata_clear(self->stack, i);
4699
4700 return 0;
4701}
4702
4703static int
4704load_dup(UnpicklerObject *self)
4705{
4706 PyObject *last;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004707 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004708
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004709 if ((len = Py_SIZE(self->stack)) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004710 return stack_underflow();
4711 last = self->stack->data[len - 1];
4712 PDATA_APPEND(self->stack, last, -1);
4713 return 0;
4714}
4715
4716static int
4717load_get(UnpicklerObject *self)
4718{
4719 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004720 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004721 Py_ssize_t len;
4722 char *s;
4723
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004724 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004725 return -1;
4726 if (len < 2)
4727 return bad_readline();
4728
4729 key = PyLong_FromString(s, NULL, 10);
4730 if (key == NULL)
4731 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004732 idx = PyLong_AsSsize_t(key);
4733 if (idx == -1 && PyErr_Occurred()) {
4734 Py_DECREF(key);
4735 return -1;
4736 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004737
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004738 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004739 if (value == NULL) {
4740 if (!PyErr_Occurred())
4741 PyErr_SetObject(PyExc_KeyError, key);
4742 Py_DECREF(key);
4743 return -1;
4744 }
4745 Py_DECREF(key);
4746
4747 PDATA_APPEND(self->stack, value, -1);
4748 return 0;
4749}
4750
4751static int
4752load_binget(UnpicklerObject *self)
4753{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004754 PyObject *value;
4755 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004756 char *s;
4757
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004758 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004759 return -1;
4760
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004761 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004762
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004763 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004764 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004765 PyObject *key = PyLong_FromSsize_t(idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004766 if (!PyErr_Occurred())
4767 PyErr_SetObject(PyExc_KeyError, key);
4768 Py_DECREF(key);
4769 return -1;
4770 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004771
4772 PDATA_APPEND(self->stack, value, -1);
4773 return 0;
4774}
4775
4776static int
4777load_long_binget(UnpicklerObject *self)
4778{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004779 PyObject *value;
4780 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004781 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004782
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004783 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004784 return -1;
4785
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004786 idx = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004787
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004788 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004789 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004790 PyObject *key = PyLong_FromSsize_t(idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004791 if (!PyErr_Occurred())
4792 PyErr_SetObject(PyExc_KeyError, key);
4793 Py_DECREF(key);
4794 return -1;
4795 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004796
4797 PDATA_APPEND(self->stack, value, -1);
4798 return 0;
4799}
4800
4801/* Push an object from the extension registry (EXT[124]). nbytes is
4802 * the number of bytes following the opcode, holding the index (code) value.
4803 */
4804static int
4805load_extension(UnpicklerObject *self, int nbytes)
4806{
4807 char *codebytes; /* the nbytes bytes after the opcode */
4808 long code; /* calc_binint returns long */
4809 PyObject *py_code; /* code as a Python int */
4810 PyObject *obj; /* the object to push */
4811 PyObject *pair; /* (module_name, class_name) */
4812 PyObject *module_name, *class_name;
4813
4814 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004815 if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004816 return -1;
4817 code = calc_binint(codebytes, nbytes);
4818 if (code <= 0) { /* note that 0 is forbidden */
4819 /* Corrupt or hostile pickle. */
4820 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
4821 return -1;
4822 }
4823
4824 /* Look for the code in the cache. */
4825 py_code = PyLong_FromLong(code);
4826 if (py_code == NULL)
4827 return -1;
4828 obj = PyDict_GetItem(extension_cache, py_code);
4829 if (obj != NULL) {
4830 /* Bingo. */
4831 Py_DECREF(py_code);
4832 PDATA_APPEND(self->stack, obj, -1);
4833 return 0;
4834 }
4835
4836 /* Look up the (module_name, class_name) pair. */
4837 pair = PyDict_GetItem(inverted_registry, py_code);
4838 if (pair == NULL) {
4839 Py_DECREF(py_code);
4840 PyErr_Format(PyExc_ValueError, "unregistered extension "
4841 "code %ld", code);
4842 return -1;
4843 }
4844 /* Since the extension registry is manipulable via Python code,
4845 * confirm that pair is really a 2-tuple of strings.
4846 */
4847 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
4848 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
4849 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
4850 Py_DECREF(py_code);
4851 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
4852 "isn't a 2-tuple of strings", code);
4853 return -1;
4854 }
4855 /* Load the object. */
4856 obj = find_class(self, module_name, class_name);
4857 if (obj == NULL) {
4858 Py_DECREF(py_code);
4859 return -1;
4860 }
4861 /* Cache code -> obj. */
4862 code = PyDict_SetItem(extension_cache, py_code, obj);
4863 Py_DECREF(py_code);
4864 if (code < 0) {
4865 Py_DECREF(obj);
4866 return -1;
4867 }
4868 PDATA_PUSH(self->stack, obj, -1);
4869 return 0;
4870}
4871
4872static int
4873load_put(UnpicklerObject *self)
4874{
4875 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004876 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004877 Py_ssize_t len;
4878 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004879
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004880 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004881 return -1;
4882 if (len < 2)
4883 return bad_readline();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004884 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004885 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004886 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004887
4888 key = PyLong_FromString(s, NULL, 10);
4889 if (key == NULL)
4890 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004891 idx = PyLong_AsSsize_t(key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004892 Py_DECREF(key);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004893 if (idx < 0) {
4894 if (!PyErr_Occurred())
4895 PyErr_SetString(PyExc_ValueError,
4896 "negative PUT argument");
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004897 return -1;
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004898 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004899
4900 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004901}
4902
4903static int
4904load_binput(UnpicklerObject *self)
4905{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004906 PyObject *value;
4907 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004908 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004909
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004910 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004911 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004912
4913 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004914 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004915 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004916
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004917 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004918
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004919 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004920}
4921
4922static int
4923load_long_binput(UnpicklerObject *self)
4924{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004925 PyObject *value;
4926 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004927 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004928
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004929 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004930 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004931
4932 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004933 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004934 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004935
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004936 idx = calc_binsize(s, 4);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004937 if (idx < 0) {
4938 PyErr_SetString(PyExc_ValueError,
4939 "negative LONG_BINPUT argument");
4940 return -1;
4941 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004942
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004943 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004944}
4945
4946static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004947do_append(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004948{
4949 PyObject *value;
4950 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004951 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004952
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004953 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004954 if (x > len || x <= 0)
4955 return stack_underflow();
4956 if (len == x) /* nothing to do */
4957 return 0;
4958
4959 list = self->stack->data[x - 1];
4960
4961 if (PyList_Check(list)) {
4962 PyObject *slice;
4963 Py_ssize_t list_len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004964 int ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004965
4966 slice = Pdata_poplist(self->stack, x);
4967 if (!slice)
4968 return -1;
4969 list_len = PyList_GET_SIZE(list);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004970 ret = PyList_SetSlice(list, list_len, list_len, slice);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004971 Py_DECREF(slice);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004972 return ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004973 }
4974 else {
4975 PyObject *append_func;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02004976 _Py_IDENTIFIER(append);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004977
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02004978 append_func = _PyObject_GetAttrId(list, &PyId_append);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004979 if (append_func == NULL)
4980 return -1;
4981 for (i = x; i < len; i++) {
4982 PyObject *result;
4983
4984 value = self->stack->data[i];
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004985 result = _Unpickler_FastCall(self, append_func, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004986 if (result == NULL) {
4987 Pdata_clear(self->stack, i + 1);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004988 Py_SIZE(self->stack) = x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004989 return -1;
4990 }
4991 Py_DECREF(result);
4992 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004993 Py_SIZE(self->stack) = x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004994 }
4995
4996 return 0;
4997}
4998
4999static int
5000load_append(UnpicklerObject *self)
5001{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005002 return do_append(self, Py_SIZE(self->stack) - 1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005003}
5004
5005static int
5006load_appends(UnpicklerObject *self)
5007{
5008 return do_append(self, marker(self));
5009}
5010
5011static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005012do_setitems(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005013{
5014 PyObject *value, *key;
5015 PyObject *dict;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005016 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005017 int status = 0;
5018
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005019 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005020 if (x > len || x <= 0)
5021 return stack_underflow();
5022 if (len == x) /* nothing to do */
5023 return 0;
Victor Stinner121aab42011-09-29 23:40:53 +02005024 if ((len - x) % 2 != 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005025 /* Currupt or hostile pickle -- we never write one like this. */
5026 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
5027 return -1;
5028 }
5029
5030 /* Here, dict does not actually need to be a PyDict; it could be anything
5031 that supports the __setitem__ attribute. */
5032 dict = self->stack->data[x - 1];
5033
5034 for (i = x + 1; i < len; i += 2) {
5035 key = self->stack->data[i - 1];
5036 value = self->stack->data[i];
5037 if (PyObject_SetItem(dict, key, value) < 0) {
5038 status = -1;
5039 break;
5040 }
5041 }
5042
5043 Pdata_clear(self->stack, x);
5044 return status;
5045}
5046
5047static int
5048load_setitem(UnpicklerObject *self)
5049{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005050 return do_setitems(self, Py_SIZE(self->stack) - 2);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005051}
5052
5053static int
5054load_setitems(UnpicklerObject *self)
5055{
5056 return do_setitems(self, marker(self));
5057}
5058
5059static int
5060load_build(UnpicklerObject *self)
5061{
5062 PyObject *state, *inst, *slotstate;
5063 PyObject *setstate;
5064 int status = 0;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005065 _Py_IDENTIFIER(__setstate__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005066
5067 /* Stack is ... instance, state. We want to leave instance at
5068 * the stack top, possibly mutated via instance.__setstate__(state).
5069 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005070 if (Py_SIZE(self->stack) < 2)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005071 return stack_underflow();
5072
5073 PDATA_POP(self->stack, state);
5074 if (state == NULL)
5075 return -1;
5076
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005077 inst = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005078
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005079 setstate = _PyObject_GetAttrId(inst, &PyId___setstate__);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005080 if (setstate == NULL) {
5081 if (PyErr_ExceptionMatches(PyExc_AttributeError))
5082 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00005083 else {
5084 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005085 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00005086 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005087 }
5088 else {
5089 PyObject *result;
5090
5091 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005092 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Antoine Pitroud79dc622008-09-05 00:03:33 +00005093 reference to state first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005094 result = _Unpickler_FastCall(self, setstate, state);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005095 Py_DECREF(setstate);
5096 if (result == NULL)
5097 return -1;
5098 Py_DECREF(result);
5099 return 0;
5100 }
5101
5102 /* A default __setstate__. First see whether state embeds a
5103 * slot state dict too (a proto 2 addition).
5104 */
5105 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
5106 PyObject *tmp = state;
5107
5108 state = PyTuple_GET_ITEM(tmp, 0);
5109 slotstate = PyTuple_GET_ITEM(tmp, 1);
5110 Py_INCREF(state);
5111 Py_INCREF(slotstate);
5112 Py_DECREF(tmp);
5113 }
5114 else
5115 slotstate = NULL;
5116
5117 /* Set inst.__dict__ from the state dict (if any). */
5118 if (state != Py_None) {
5119 PyObject *dict;
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005120 PyObject *d_key, *d_value;
5121 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005122 _Py_IDENTIFIER(__dict__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005123
5124 if (!PyDict_Check(state)) {
5125 PyErr_SetString(UnpicklingError, "state is not a dictionary");
5126 goto error;
5127 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005128 dict = _PyObject_GetAttrId(inst, &PyId___dict__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005129 if (dict == NULL)
5130 goto error;
5131
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005132 i = 0;
5133 while (PyDict_Next(state, &i, &d_key, &d_value)) {
5134 /* normally the keys for instance attributes are
5135 interned. we should try to do that here. */
5136 Py_INCREF(d_key);
5137 if (PyUnicode_CheckExact(d_key))
5138 PyUnicode_InternInPlace(&d_key);
5139 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
5140 Py_DECREF(d_key);
5141 goto error;
5142 }
5143 Py_DECREF(d_key);
5144 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005145 Py_DECREF(dict);
5146 }
5147
5148 /* Also set instance attributes from the slotstate dict (if any). */
5149 if (slotstate != NULL) {
5150 PyObject *d_key, *d_value;
5151 Py_ssize_t i;
5152
5153 if (!PyDict_Check(slotstate)) {
5154 PyErr_SetString(UnpicklingError,
5155 "slot state is not a dictionary");
5156 goto error;
5157 }
5158 i = 0;
5159 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
5160 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
5161 goto error;
5162 }
5163 }
5164
5165 if (0) {
5166 error:
5167 status = -1;
5168 }
5169
5170 Py_DECREF(state);
5171 Py_XDECREF(slotstate);
5172 return status;
5173}
5174
5175static int
5176load_mark(UnpicklerObject *self)
5177{
5178
5179 /* Note that we split the (pickle.py) stack into two stacks, an
5180 * object stack and a mark stack. Here we push a mark onto the
5181 * mark stack.
5182 */
5183
5184 if ((self->num_marks + 1) >= self->marks_size) {
5185 size_t alloc;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005186 Py_ssize_t *marks;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005187
5188 /* Use the size_t type to check for overflow. */
5189 alloc = ((size_t)self->num_marks << 1) + 20;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005190 if (alloc > (PY_SSIZE_T_MAX / sizeof(Py_ssize_t)) ||
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00005191 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005192 PyErr_NoMemory();
5193 return -1;
5194 }
5195
5196 if (self->marks == NULL)
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005197 marks = (Py_ssize_t *) PyMem_Malloc(alloc * sizeof(Py_ssize_t));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005198 else
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005199 marks = (Py_ssize_t *) PyMem_Realloc(self->marks,
5200 alloc * sizeof(Py_ssize_t));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005201 if (marks == NULL) {
5202 PyErr_NoMemory();
5203 return -1;
5204 }
5205 self->marks = marks;
5206 self->marks_size = (Py_ssize_t)alloc;
5207 }
5208
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005209 self->marks[self->num_marks++] = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005210
5211 return 0;
5212}
5213
5214static int
5215load_reduce(UnpicklerObject *self)
5216{
5217 PyObject *callable = NULL;
5218 PyObject *argtup = NULL;
5219 PyObject *obj = NULL;
5220
5221 PDATA_POP(self->stack, argtup);
5222 if (argtup == NULL)
5223 return -1;
5224 PDATA_POP(self->stack, callable);
5225 if (callable) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005226 obj = PyObject_CallObject(callable, argtup);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005227 Py_DECREF(callable);
5228 }
5229 Py_DECREF(argtup);
5230
5231 if (obj == NULL)
5232 return -1;
5233
5234 PDATA_PUSH(self->stack, obj, -1);
5235 return 0;
5236}
5237
5238/* Just raises an error if we don't know the protocol specified. PROTO
5239 * is the first opcode for protocols >= 2.
5240 */
5241static int
5242load_proto(UnpicklerObject *self)
5243{
5244 char *s;
5245 int i;
5246
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005247 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005248 return -1;
5249
5250 i = (unsigned char)s[0];
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005251 if (i <= HIGHEST_PROTOCOL) {
5252 self->proto = i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005253 return 0;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005254 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005255
5256 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
5257 return -1;
5258}
5259
5260static PyObject *
5261load(UnpicklerObject *self)
5262{
5263 PyObject *err;
5264 PyObject *value = NULL;
5265 char *s;
5266
5267 self->num_marks = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005268 if (Py_SIZE(self->stack))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005269 Pdata_clear(self->stack, 0);
5270
5271 /* Convenient macros for the dispatch while-switch loop just below. */
5272#define OP(opcode, load_func) \
5273 case opcode: if (load_func(self) < 0) break; continue;
5274
5275#define OP_ARG(opcode, load_func, arg) \
5276 case opcode: if (load_func(self, (arg)) < 0) break; continue;
5277
5278 while (1) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005279 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005280 break;
5281
5282 switch ((enum opcode)s[0]) {
5283 OP(NONE, load_none)
5284 OP(BININT, load_binint)
5285 OP(BININT1, load_binint1)
5286 OP(BININT2, load_binint2)
5287 OP(INT, load_int)
5288 OP(LONG, load_long)
5289 OP_ARG(LONG1, load_counted_long, 1)
5290 OP_ARG(LONG4, load_counted_long, 4)
5291 OP(FLOAT, load_float)
5292 OP(BINFLOAT, load_binfloat)
5293 OP(BINBYTES, load_binbytes)
5294 OP(SHORT_BINBYTES, load_short_binbytes)
5295 OP(BINSTRING, load_binstring)
5296 OP(SHORT_BINSTRING, load_short_binstring)
5297 OP(STRING, load_string)
5298 OP(UNICODE, load_unicode)
5299 OP(BINUNICODE, load_binunicode)
5300 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
5301 OP_ARG(TUPLE1, load_counted_tuple, 1)
5302 OP_ARG(TUPLE2, load_counted_tuple, 2)
5303 OP_ARG(TUPLE3, load_counted_tuple, 3)
5304 OP(TUPLE, load_tuple)
5305 OP(EMPTY_LIST, load_empty_list)
5306 OP(LIST, load_list)
5307 OP(EMPTY_DICT, load_empty_dict)
5308 OP(DICT, load_dict)
5309 OP(OBJ, load_obj)
5310 OP(INST, load_inst)
5311 OP(NEWOBJ, load_newobj)
5312 OP(GLOBAL, load_global)
5313 OP(APPEND, load_append)
5314 OP(APPENDS, load_appends)
5315 OP(BUILD, load_build)
5316 OP(DUP, load_dup)
5317 OP(BINGET, load_binget)
5318 OP(LONG_BINGET, load_long_binget)
5319 OP(GET, load_get)
5320 OP(MARK, load_mark)
5321 OP(BINPUT, load_binput)
5322 OP(LONG_BINPUT, load_long_binput)
5323 OP(PUT, load_put)
5324 OP(POP, load_pop)
5325 OP(POP_MARK, load_pop_mark)
5326 OP(SETITEM, load_setitem)
5327 OP(SETITEMS, load_setitems)
5328 OP(PERSID, load_persid)
5329 OP(BINPERSID, load_binpersid)
5330 OP(REDUCE, load_reduce)
5331 OP(PROTO, load_proto)
5332 OP_ARG(EXT1, load_extension, 1)
5333 OP_ARG(EXT2, load_extension, 2)
5334 OP_ARG(EXT4, load_extension, 4)
5335 OP_ARG(NEWTRUE, load_bool, Py_True)
5336 OP_ARG(NEWFALSE, load_bool, Py_False)
5337
5338 case STOP:
5339 break;
5340
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005341 default:
Benjamin Petersonadde86d2011-09-23 13:41:41 -04005342 if (s[0] == '\0')
5343 PyErr_SetNone(PyExc_EOFError);
5344 else
5345 PyErr_Format(UnpicklingError,
5346 "invalid load key, '%c'.", s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005347 return NULL;
5348 }
5349
5350 break; /* and we are done! */
5351 }
5352
Antoine Pitrou04248a82010-10-12 20:51:21 +00005353 if (_Unpickler_SkipConsumed(self) < 0)
5354 return NULL;
5355
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005356 /* XXX: It is not clear what this is actually for. */
5357 if ((err = PyErr_Occurred())) {
5358 if (err == PyExc_EOFError) {
5359 PyErr_SetNone(PyExc_EOFError);
5360 }
5361 return NULL;
5362 }
5363
5364 PDATA_POP(self->stack, value);
5365 return value;
5366}
5367
5368PyDoc_STRVAR(Unpickler_load_doc,
5369"load() -> object. Load a pickle."
5370"\n"
5371"Read a pickled object representation from the open file object given in\n"
5372"the constructor, and return the reconstituted object hierarchy specified\n"
5373"therein.\n");
5374
5375static PyObject *
5376Unpickler_load(UnpicklerObject *self)
5377{
5378 /* Check whether the Unpickler was initialized correctly. This prevents
5379 segfaulting if a subclass overridden __init__ with a function that does
5380 not call Unpickler.__init__(). Here, we simply ensure that self->read
5381 is not NULL. */
5382 if (self->read == NULL) {
Victor Stinner121aab42011-09-29 23:40:53 +02005383 PyErr_Format(UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005384 "Unpickler.__init__() was not called by %s.__init__()",
5385 Py_TYPE(self)->tp_name);
5386 return NULL;
5387 }
5388
5389 return load(self);
5390}
5391
5392/* The name of find_class() is misleading. In newer pickle protocols, this
5393 function is used for loading any global (i.e., functions), not just
5394 classes. The name is kept only for backward compatibility. */
5395
5396PyDoc_STRVAR(Unpickler_find_class_doc,
5397"find_class(module_name, global_name) -> object.\n"
5398"\n"
5399"Return an object from a specified module, importing the module if\n"
5400"necessary. Subclasses may override this method (e.g. to restrict\n"
5401"unpickling of arbitrary classes and functions).\n"
5402"\n"
5403"This method is called whenever a class or a function object is\n"
5404"needed. Both arguments passed are str objects.\n");
5405
5406static PyObject *
5407Unpickler_find_class(UnpicklerObject *self, PyObject *args)
5408{
5409 PyObject *global;
5410 PyObject *modules_dict;
5411 PyObject *module;
5412 PyObject *module_name, *global_name;
5413
5414 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
5415 &module_name, &global_name))
5416 return NULL;
5417
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005418 /* Try to map the old names used in Python 2.x to the new ones used in
5419 Python 3.x. We do this only with old pickle protocols and when the
5420 user has not disabled the feature. */
5421 if (self->proto < 3 && self->fix_imports) {
5422 PyObject *key;
5423 PyObject *item;
5424
5425 /* Check if the global (i.e., a function or a class) was renamed
5426 or moved to another module. */
5427 key = PyTuple_Pack(2, module_name, global_name);
5428 if (key == NULL)
5429 return NULL;
5430 item = PyDict_GetItemWithError(name_mapping_2to3, key);
5431 Py_DECREF(key);
5432 if (item) {
5433 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
5434 PyErr_Format(PyExc_RuntimeError,
5435 "_compat_pickle.NAME_MAPPING values should be "
5436 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
5437 return NULL;
5438 }
5439 module_name = PyTuple_GET_ITEM(item, 0);
5440 global_name = PyTuple_GET_ITEM(item, 1);
5441 if (!PyUnicode_Check(module_name) ||
5442 !PyUnicode_Check(global_name)) {
5443 PyErr_Format(PyExc_RuntimeError,
5444 "_compat_pickle.NAME_MAPPING values should be "
5445 "pairs of str, not (%.200s, %.200s)",
5446 Py_TYPE(module_name)->tp_name,
5447 Py_TYPE(global_name)->tp_name);
5448 return NULL;
5449 }
5450 }
5451 else if (PyErr_Occurred()) {
5452 return NULL;
5453 }
5454
5455 /* Check if the module was renamed. */
5456 item = PyDict_GetItemWithError(import_mapping_2to3, module_name);
5457 if (item) {
5458 if (!PyUnicode_Check(item)) {
5459 PyErr_Format(PyExc_RuntimeError,
5460 "_compat_pickle.IMPORT_MAPPING values should be "
5461 "strings, not %.200s", Py_TYPE(item)->tp_name);
5462 return NULL;
5463 }
5464 module_name = item;
5465 }
5466 else if (PyErr_Occurred()) {
5467 return NULL;
5468 }
5469 }
5470
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005471 modules_dict = PySys_GetObject("modules");
5472 if (modules_dict == NULL)
5473 return NULL;
5474
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005475 module = PyDict_GetItemWithError(modules_dict, module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005476 if (module == NULL) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005477 if (PyErr_Occurred())
5478 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005479 module = PyImport_Import(module_name);
5480 if (module == NULL)
5481 return NULL;
5482 global = PyObject_GetAttr(module, global_name);
5483 Py_DECREF(module);
5484 }
Victor Stinner121aab42011-09-29 23:40:53 +02005485 else {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005486 global = PyObject_GetAttr(module, global_name);
5487 }
5488 return global;
5489}
5490
5491static struct PyMethodDef Unpickler_methods[] = {
5492 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
5493 Unpickler_load_doc},
5494 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
5495 Unpickler_find_class_doc},
5496 {NULL, NULL} /* sentinel */
5497};
5498
5499static void
5500Unpickler_dealloc(UnpicklerObject *self)
5501{
5502 PyObject_GC_UnTrack((PyObject *)self);
5503 Py_XDECREF(self->readline);
5504 Py_XDECREF(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005505 Py_XDECREF(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005506 Py_XDECREF(self->stack);
5507 Py_XDECREF(self->pers_func);
5508 Py_XDECREF(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005509 if (self->buffer.buf != NULL) {
5510 PyBuffer_Release(&self->buffer);
5511 self->buffer.buf = NULL;
5512 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005513
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005514 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005515 PyMem_Free(self->marks);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005516 PyMem_Free(self->input_line);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005517 free(self->encoding);
5518 free(self->errors);
5519
5520 Py_TYPE(self)->tp_free((PyObject *)self);
5521}
5522
5523static int
5524Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
5525{
5526 Py_VISIT(self->readline);
5527 Py_VISIT(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005528 Py_VISIT(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005529 Py_VISIT(self->stack);
5530 Py_VISIT(self->pers_func);
5531 Py_VISIT(self->arg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005532 return 0;
5533}
5534
5535static int
5536Unpickler_clear(UnpicklerObject *self)
5537{
5538 Py_CLEAR(self->readline);
5539 Py_CLEAR(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005540 Py_CLEAR(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005541 Py_CLEAR(self->stack);
5542 Py_CLEAR(self->pers_func);
5543 Py_CLEAR(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005544 if (self->buffer.buf != NULL) {
5545 PyBuffer_Release(&self->buffer);
5546 self->buffer.buf = NULL;
5547 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005548
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005549 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005550 PyMem_Free(self->marks);
5551 self->marks = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005552 PyMem_Free(self->input_line);
5553 self->input_line = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005554 free(self->encoding);
5555 self->encoding = NULL;
5556 free(self->errors);
5557 self->errors = NULL;
5558
5559 return 0;
5560}
5561
5562PyDoc_STRVAR(Unpickler_doc,
5563"Unpickler(file, *, encoding='ASCII', errors='strict')"
5564"\n"
5565"This takes a binary file for reading a pickle data stream.\n"
5566"\n"
5567"The protocol version of the pickle is detected automatically, so no\n"
5568"proto argument is needed.\n"
5569"\n"
5570"The file-like object must have two methods, a read() method\n"
5571"that takes an integer argument, and a readline() method that\n"
5572"requires no arguments. Both methods should return bytes.\n"
5573"Thus file-like object can be a binary file object opened for\n"
5574"reading, a BytesIO object, or any other custom object that\n"
5575"meets this interface.\n"
5576"\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005577"Optional keyword arguments are *fix_imports*, *encoding* and *errors*,\n"
5578"which are used to control compatiblity support for pickle stream\n"
5579"generated by Python 2.x. If *fix_imports* is True, pickle will try to\n"
5580"map the old Python 2.x names to the new names used in Python 3.x. The\n"
5581"*encoding* and *errors* tell pickle how to decode 8-bit string\n"
5582"instances pickled by Python 2.x; these default to 'ASCII' and\n"
5583"'strict', respectively.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005584
5585static int
5586Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
5587{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005588 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005589 PyObject *file;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005590 PyObject *fix_imports = Py_True;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005591 char *encoding = NULL;
5592 char *errors = NULL;
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02005593 _Py_IDENTIFIER(persistent_load);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005594
5595 /* XXX: That is an horrible error message. But, I don't know how to do
5596 better... */
5597 if (Py_SIZE(args) != 1) {
5598 PyErr_Format(PyExc_TypeError,
5599 "%s takes exactly one positional argument (%zd given)",
5600 Py_TYPE(self)->tp_name, Py_SIZE(args));
5601 return -1;
5602 }
5603
5604 /* Arguments parsing needs to be done in the __init__() method to allow
5605 subclasses to define their own __init__() method, which may (or may
5606 not) support Unpickler arguments. However, this means we need to be
5607 extra careful in the other Unpickler methods, since a subclass could
5608 forget to call Unpickler.__init__() thus breaking our internal
5609 invariants. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005610 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:Unpickler", kwlist,
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005611 &file, &fix_imports, &encoding, &errors))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005612 return -1;
5613
5614 /* In case of multiple __init__() calls, clear previous content. */
5615 if (self->read != NULL)
5616 (void)Unpickler_clear(self);
5617
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005618 if (_Unpickler_SetInputStream(self, file) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005619 return -1;
5620
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005621 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005622 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005623
5624 self->fix_imports = PyObject_IsTrue(fix_imports);
5625 if (self->fix_imports == -1)
5626 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005627
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02005628 if (_PyObject_HasAttrId((PyObject *)self, &PyId_persistent_load)) {
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005629 self->pers_func = _PyObject_GetAttrId((PyObject *)self,
5630 &PyId_persistent_load);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005631 if (self->pers_func == NULL)
5632 return -1;
5633 }
5634 else {
5635 self->pers_func = NULL;
5636 }
5637
5638 self->stack = (Pdata *)Pdata_New();
5639 if (self->stack == NULL)
5640 return -1;
5641
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005642 self->memo_size = 32;
5643 self->memo = _Unpickler_NewMemo(self->memo_size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005644 if (self->memo == NULL)
5645 return -1;
5646
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005647 self->arg = NULL;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005648 self->proto = 0;
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005649
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005650 return 0;
5651}
5652
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005653/* Define a proxy object for the Unpickler's internal memo object. This is to
5654 * avoid breaking code like:
5655 * unpickler.memo.clear()
5656 * and
5657 * unpickler.memo = saved_memo
5658 * Is this a good idea? Not really, but we don't want to break code that uses
5659 * it. Note that we don't implement the entire mapping API here. This is
5660 * intentional, as these should be treated as black-box implementation details.
5661 *
5662 * We do, however, have to implement pickling/unpickling support because of
Victor Stinner121aab42011-09-29 23:40:53 +02005663 * real-world code like cvs2svn.
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005664 */
5665
5666typedef struct {
5667 PyObject_HEAD
5668 UnpicklerObject *unpickler;
5669} UnpicklerMemoProxyObject;
5670
5671PyDoc_STRVAR(ump_clear_doc,
5672"memo.clear() -> None. Remove all items from memo.");
5673
5674static PyObject *
5675ump_clear(UnpicklerMemoProxyObject *self)
5676{
5677 _Unpickler_MemoCleanup(self->unpickler);
5678 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
5679 if (self->unpickler->memo == NULL)
5680 return NULL;
5681 Py_RETURN_NONE;
5682}
5683
5684PyDoc_STRVAR(ump_copy_doc,
5685"memo.copy() -> new_memo. Copy the memo to a new object.");
5686
5687static PyObject *
5688ump_copy(UnpicklerMemoProxyObject *self)
5689{
5690 Py_ssize_t i;
5691 PyObject *new_memo = PyDict_New();
5692 if (new_memo == NULL)
5693 return NULL;
5694
5695 for (i = 0; i < self->unpickler->memo_size; i++) {
5696 int status;
5697 PyObject *key, *value;
5698
5699 value = self->unpickler->memo[i];
5700 if (value == NULL)
5701 continue;
5702
5703 key = PyLong_FromSsize_t(i);
5704 if (key == NULL)
5705 goto error;
5706 status = PyDict_SetItem(new_memo, key, value);
5707 Py_DECREF(key);
5708 if (status < 0)
5709 goto error;
5710 }
5711 return new_memo;
5712
5713error:
5714 Py_DECREF(new_memo);
5715 return NULL;
5716}
5717
5718PyDoc_STRVAR(ump_reduce_doc,
5719"memo.__reduce__(). Pickling support.");
5720
5721static PyObject *
5722ump_reduce(UnpicklerMemoProxyObject *self, PyObject *args)
5723{
5724 PyObject *reduce_value;
5725 PyObject *constructor_args;
5726 PyObject *contents = ump_copy(self);
5727 if (contents == NULL)
5728 return NULL;
5729
5730 reduce_value = PyTuple_New(2);
5731 if (reduce_value == NULL) {
5732 Py_DECREF(contents);
5733 return NULL;
5734 }
5735 constructor_args = PyTuple_New(1);
5736 if (constructor_args == NULL) {
5737 Py_DECREF(contents);
5738 Py_DECREF(reduce_value);
5739 return NULL;
5740 }
5741 PyTuple_SET_ITEM(constructor_args, 0, contents);
5742 Py_INCREF((PyObject *)&PyDict_Type);
5743 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
5744 PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
5745 return reduce_value;
5746}
5747
5748static PyMethodDef unpicklerproxy_methods[] = {
5749 {"clear", (PyCFunction)ump_clear, METH_NOARGS, ump_clear_doc},
5750 {"copy", (PyCFunction)ump_copy, METH_NOARGS, ump_copy_doc},
5751 {"__reduce__", (PyCFunction)ump_reduce, METH_VARARGS, ump_reduce_doc},
5752 {NULL, NULL} /* sentinel */
5753};
5754
5755static void
5756UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
5757{
5758 PyObject_GC_UnTrack(self);
5759 Py_XDECREF(self->unpickler);
5760 PyObject_GC_Del((PyObject *)self);
5761}
5762
5763static int
5764UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
5765 visitproc visit, void *arg)
5766{
5767 Py_VISIT(self->unpickler);
5768 return 0;
5769}
5770
5771static int
5772UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
5773{
5774 Py_CLEAR(self->unpickler);
5775 return 0;
5776}
5777
5778static PyTypeObject UnpicklerMemoProxyType = {
5779 PyVarObject_HEAD_INIT(NULL, 0)
5780 "_pickle.UnpicklerMemoProxy", /*tp_name*/
5781 sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/
5782 0,
5783 (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */
5784 0, /* tp_print */
5785 0, /* tp_getattr */
5786 0, /* tp_setattr */
5787 0, /* tp_compare */
5788 0, /* tp_repr */
5789 0, /* tp_as_number */
5790 0, /* tp_as_sequence */
5791 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00005792 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005793 0, /* tp_call */
5794 0, /* tp_str */
5795 PyObject_GenericGetAttr, /* tp_getattro */
5796 PyObject_GenericSetAttr, /* tp_setattro */
5797 0, /* tp_as_buffer */
5798 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5799 0, /* tp_doc */
5800 (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */
5801 (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */
5802 0, /* tp_richcompare */
5803 0, /* tp_weaklistoffset */
5804 0, /* tp_iter */
5805 0, /* tp_iternext */
5806 unpicklerproxy_methods, /* tp_methods */
5807};
5808
5809static PyObject *
5810UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
5811{
5812 UnpicklerMemoProxyObject *self;
5813
5814 self = PyObject_GC_New(UnpicklerMemoProxyObject,
5815 &UnpicklerMemoProxyType);
5816 if (self == NULL)
5817 return NULL;
5818 Py_INCREF(unpickler);
5819 self->unpickler = unpickler;
5820 PyObject_GC_Track(self);
5821 return (PyObject *)self;
5822}
5823
5824/*****************************************************************************/
5825
5826
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005827static PyObject *
5828Unpickler_get_memo(UnpicklerObject *self)
5829{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005830 return UnpicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005831}
5832
5833static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005834Unpickler_set_memo(UnpicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005835{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005836 PyObject **new_memo;
5837 Py_ssize_t new_memo_size = 0;
5838 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005839
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005840 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005841 PyErr_SetString(PyExc_TypeError,
5842 "attribute deletion is not supported");
5843 return -1;
5844 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005845
5846 if (Py_TYPE(obj) == &UnpicklerMemoProxyType) {
5847 UnpicklerObject *unpickler =
5848 ((UnpicklerMemoProxyObject *)obj)->unpickler;
5849
5850 new_memo_size = unpickler->memo_size;
5851 new_memo = _Unpickler_NewMemo(new_memo_size);
5852 if (new_memo == NULL)
5853 return -1;
5854
5855 for (i = 0; i < new_memo_size; i++) {
5856 Py_XINCREF(unpickler->memo[i]);
5857 new_memo[i] = unpickler->memo[i];
5858 }
5859 }
5860 else if (PyDict_Check(obj)) {
5861 Py_ssize_t i = 0;
5862 PyObject *key, *value;
5863
5864 new_memo_size = PyDict_Size(obj);
5865 new_memo = _Unpickler_NewMemo(new_memo_size);
5866 if (new_memo == NULL)
5867 return -1;
5868
5869 while (PyDict_Next(obj, &i, &key, &value)) {
5870 Py_ssize_t idx;
5871 if (!PyLong_Check(key)) {
5872 PyErr_SetString(PyExc_TypeError,
5873 "memo key must be integers");
5874 goto error;
5875 }
5876 idx = PyLong_AsSsize_t(key);
5877 if (idx == -1 && PyErr_Occurred())
5878 goto error;
5879 if (_Unpickler_MemoPut(self, idx, value) < 0)
5880 goto error;
5881 }
5882 }
5883 else {
5884 PyErr_Format(PyExc_TypeError,
5885 "'memo' attribute must be an UnpicklerMemoProxy object"
5886 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005887 return -1;
5888 }
5889
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005890 _Unpickler_MemoCleanup(self);
5891 self->memo_size = new_memo_size;
5892 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005893
5894 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005895
5896 error:
5897 if (new_memo_size) {
5898 i = new_memo_size;
5899 while (--i >= 0) {
5900 Py_XDECREF(new_memo[i]);
5901 }
5902 PyMem_FREE(new_memo);
5903 }
5904 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005905}
5906
5907static PyObject *
5908Unpickler_get_persload(UnpicklerObject *self)
5909{
5910 if (self->pers_func == NULL)
5911 PyErr_SetString(PyExc_AttributeError, "persistent_load");
5912 else
5913 Py_INCREF(self->pers_func);
5914 return self->pers_func;
5915}
5916
5917static int
5918Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
5919{
5920 PyObject *tmp;
5921
5922 if (value == NULL) {
5923 PyErr_SetString(PyExc_TypeError,
5924 "attribute deletion is not supported");
5925 return -1;
5926 }
5927 if (!PyCallable_Check(value)) {
5928 PyErr_SetString(PyExc_TypeError,
5929 "persistent_load must be a callable taking "
5930 "one argument");
5931 return -1;
5932 }
5933
5934 tmp = self->pers_func;
5935 Py_INCREF(value);
5936 self->pers_func = value;
5937 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
5938
5939 return 0;
5940}
5941
5942static PyGetSetDef Unpickler_getsets[] = {
5943 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
5944 {"persistent_load", (getter)Unpickler_get_persload,
5945 (setter)Unpickler_set_persload},
5946 {NULL}
5947};
5948
5949static PyTypeObject Unpickler_Type = {
5950 PyVarObject_HEAD_INIT(NULL, 0)
5951 "_pickle.Unpickler", /*tp_name*/
5952 sizeof(UnpicklerObject), /*tp_basicsize*/
5953 0, /*tp_itemsize*/
5954 (destructor)Unpickler_dealloc, /*tp_dealloc*/
5955 0, /*tp_print*/
5956 0, /*tp_getattr*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005957 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00005958 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005959 0, /*tp_repr*/
5960 0, /*tp_as_number*/
5961 0, /*tp_as_sequence*/
5962 0, /*tp_as_mapping*/
5963 0, /*tp_hash*/
5964 0, /*tp_call*/
5965 0, /*tp_str*/
5966 0, /*tp_getattro*/
5967 0, /*tp_setattro*/
5968 0, /*tp_as_buffer*/
5969 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5970 Unpickler_doc, /*tp_doc*/
5971 (traverseproc)Unpickler_traverse, /*tp_traverse*/
5972 (inquiry)Unpickler_clear, /*tp_clear*/
5973 0, /*tp_richcompare*/
5974 0, /*tp_weaklistoffset*/
5975 0, /*tp_iter*/
5976 0, /*tp_iternext*/
5977 Unpickler_methods, /*tp_methods*/
5978 0, /*tp_members*/
5979 Unpickler_getsets, /*tp_getset*/
5980 0, /*tp_base*/
5981 0, /*tp_dict*/
5982 0, /*tp_descr_get*/
5983 0, /*tp_descr_set*/
5984 0, /*tp_dictoffset*/
5985 (initproc)Unpickler_init, /*tp_init*/
5986 PyType_GenericAlloc, /*tp_alloc*/
5987 PyType_GenericNew, /*tp_new*/
5988 PyObject_GC_Del, /*tp_free*/
5989 0, /*tp_is_gc*/
5990};
5991
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005992PyDoc_STRVAR(pickle_dump_doc,
5993"dump(obj, file, protocol=None, *, fix_imports=True) -> None\n"
5994"\n"
5995"Write a pickled representation of obj to the open file object file. This\n"
5996"is equivalent to ``Pickler(file, protocol).dump(obj)``, but may be more\n"
5997"efficient.\n"
5998"\n"
5999"The optional protocol argument tells the pickler to use the given protocol;\n"
6000"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
6001"backward-incompatible protocol designed for Python 3.0.\n"
6002"\n"
6003"Specifying a negative protocol version selects the highest protocol version\n"
6004"supported. The higher the protocol used, the more recent the version of\n"
6005"Python needed to read the pickle produced.\n"
6006"\n"
6007"The file argument must have a write() method that accepts a single bytes\n"
6008"argument. It can thus be a file object opened for binary writing, a\n"
6009"io.BytesIO instance, or any other custom object that meets this interface.\n"
6010"\n"
6011"If fix_imports is True and protocol is less than 3, pickle will try to\n"
6012"map the new Python 3.x names to the old module names used in Python 2.x,\n"
6013"so that the pickle data stream is readable with Python 2.x.\n");
6014
6015static PyObject *
6016pickle_dump(PyObject *self, PyObject *args, PyObject *kwds)
6017{
6018 static char *kwlist[] = {"obj", "file", "protocol", "fix_imports", 0};
6019 PyObject *obj;
6020 PyObject *file;
6021 PyObject *proto = NULL;
6022 PyObject *fix_imports = Py_True;
6023 PicklerObject *pickler;
6024
6025 /* fix_imports is a keyword-only argument. */
6026 if (Py_SIZE(args) > 3) {
6027 PyErr_Format(PyExc_TypeError,
6028 "pickle.dump() takes at most 3 positional "
6029 "argument (%zd given)", Py_SIZE(args));
6030 return NULL;
6031 }
6032
6033 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|OO:dump", kwlist,
6034 &obj, &file, &proto, &fix_imports))
6035 return NULL;
6036
6037 pickler = _Pickler_New();
6038 if (pickler == NULL)
6039 return NULL;
6040
6041 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
6042 goto error;
6043
6044 if (_Pickler_SetOutputStream(pickler, file) < 0)
6045 goto error;
6046
6047 if (dump(pickler, obj) < 0)
6048 goto error;
6049
6050 if (_Pickler_FlushToFile(pickler) < 0)
6051 goto error;
6052
6053 Py_DECREF(pickler);
6054 Py_RETURN_NONE;
6055
6056 error:
6057 Py_XDECREF(pickler);
6058 return NULL;
6059}
6060
6061PyDoc_STRVAR(pickle_dumps_doc,
6062"dumps(obj, protocol=None, *, fix_imports=True) -> bytes\n"
6063"\n"
6064"Return the pickled representation of the object as a bytes\n"
6065"object, instead of writing it to a file.\n"
6066"\n"
6067"The optional protocol argument tells the pickler to use the given protocol;\n"
6068"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
6069"backward-incompatible protocol designed for Python 3.0.\n"
6070"\n"
6071"Specifying a negative protocol version selects the highest protocol version\n"
6072"supported. The higher the protocol used, the more recent the version of\n"
6073"Python needed to read the pickle produced.\n"
6074"\n"
6075"If fix_imports is True and *protocol* is less than 3, pickle will try to\n"
6076"map the new Python 3.x names to the old module names used in Python 2.x,\n"
6077"so that the pickle data stream is readable with Python 2.x.\n");
6078
6079static PyObject *
6080pickle_dumps(PyObject *self, PyObject *args, PyObject *kwds)
6081{
6082 static char *kwlist[] = {"obj", "protocol", "fix_imports", 0};
6083 PyObject *obj;
6084 PyObject *proto = NULL;
6085 PyObject *result;
6086 PyObject *fix_imports = Py_True;
6087 PicklerObject *pickler;
6088
6089 /* fix_imports is a keyword-only argument. */
6090 if (Py_SIZE(args) > 2) {
6091 PyErr_Format(PyExc_TypeError,
6092 "pickle.dumps() takes at most 2 positional "
6093 "argument (%zd given)", Py_SIZE(args));
6094 return NULL;
6095 }
6096
6097 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:dumps", kwlist,
6098 &obj, &proto, &fix_imports))
6099 return NULL;
6100
6101 pickler = _Pickler_New();
6102 if (pickler == NULL)
6103 return NULL;
6104
6105 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
6106 goto error;
6107
6108 if (dump(pickler, obj) < 0)
6109 goto error;
6110
6111 result = _Pickler_GetString(pickler);
6112 Py_DECREF(pickler);
6113 return result;
6114
6115 error:
6116 Py_XDECREF(pickler);
6117 return NULL;
6118}
6119
6120PyDoc_STRVAR(pickle_load_doc,
6121"load(file, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6122"\n"
6123"Read a pickled object representation from the open file object file and\n"
6124"return the reconstituted object hierarchy specified therein. This is\n"
6125"equivalent to ``Unpickler(file).load()``, but may be more efficient.\n"
6126"\n"
6127"The protocol version of the pickle is detected automatically, so no protocol\n"
6128"argument is needed. Bytes past the pickled object's representation are\n"
6129"ignored.\n"
6130"\n"
6131"The argument file must have two methods, a read() method that takes an\n"
6132"integer argument, and a readline() method that requires no arguments. Both\n"
6133"methods should return bytes. Thus *file* can be a binary file object opened\n"
6134"for reading, a BytesIO object, or any other custom object that meets this\n"
6135"interface.\n"
6136"\n"
6137"Optional keyword arguments are fix_imports, encoding and errors,\n"
6138"which are used to control compatiblity support for pickle stream generated\n"
6139"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6140"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6141"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6142"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6143
6144static PyObject *
6145pickle_load(PyObject *self, PyObject *args, PyObject *kwds)
6146{
6147 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
6148 PyObject *file;
6149 PyObject *fix_imports = Py_True;
6150 PyObject *result;
6151 char *encoding = NULL;
6152 char *errors = NULL;
6153 UnpicklerObject *unpickler;
6154
6155 /* fix_imports, encoding and errors are a keyword-only argument. */
6156 if (Py_SIZE(args) != 1) {
6157 PyErr_Format(PyExc_TypeError,
6158 "pickle.load() takes exactly one positional "
6159 "argument (%zd given)", Py_SIZE(args));
6160 return NULL;
6161 }
6162
6163 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:load", kwlist,
6164 &file, &fix_imports, &encoding, &errors))
6165 return NULL;
6166
6167 unpickler = _Unpickler_New();
6168 if (unpickler == NULL)
6169 return NULL;
6170
6171 if (_Unpickler_SetInputStream(unpickler, file) < 0)
6172 goto error;
6173
6174 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6175 goto error;
6176
6177 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6178 if (unpickler->fix_imports == -1)
6179 goto error;
6180
6181 result = load(unpickler);
6182 Py_DECREF(unpickler);
6183 return result;
6184
6185 error:
6186 Py_XDECREF(unpickler);
6187 return NULL;
6188}
6189
6190PyDoc_STRVAR(pickle_loads_doc,
6191"loads(input, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6192"\n"
6193"Read a pickled object hierarchy from a bytes object and return the\n"
6194"reconstituted object hierarchy specified therein\n"
6195"\n"
6196"The protocol version of the pickle is detected automatically, so no protocol\n"
6197"argument is needed. Bytes past the pickled object's representation are\n"
6198"ignored.\n"
6199"\n"
6200"Optional keyword arguments are fix_imports, encoding and errors, which\n"
6201"are used to control compatiblity support for pickle stream generated\n"
6202"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6203"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6204"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6205"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6206
6207static PyObject *
6208pickle_loads(PyObject *self, PyObject *args, PyObject *kwds)
6209{
6210 static char *kwlist[] = {"input", "fix_imports", "encoding", "errors", 0};
6211 PyObject *input;
6212 PyObject *fix_imports = Py_True;
6213 PyObject *result;
6214 char *encoding = NULL;
6215 char *errors = NULL;
6216 UnpicklerObject *unpickler;
6217
6218 /* fix_imports, encoding and errors are a keyword-only argument. */
6219 if (Py_SIZE(args) != 1) {
6220 PyErr_Format(PyExc_TypeError,
6221 "pickle.loads() takes exactly one positional "
6222 "argument (%zd given)", Py_SIZE(args));
6223 return NULL;
6224 }
6225
6226 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:loads", kwlist,
6227 &input, &fix_imports, &encoding, &errors))
6228 return NULL;
6229
6230 unpickler = _Unpickler_New();
6231 if (unpickler == NULL)
6232 return NULL;
6233
6234 if (_Unpickler_SetStringInput(unpickler, input) < 0)
6235 goto error;
6236
6237 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6238 goto error;
6239
6240 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6241 if (unpickler->fix_imports == -1)
6242 goto error;
6243
6244 result = load(unpickler);
6245 Py_DECREF(unpickler);
6246 return result;
6247
6248 error:
6249 Py_XDECREF(unpickler);
6250 return NULL;
6251}
6252
6253
6254static struct PyMethodDef pickle_methods[] = {
6255 {"dump", (PyCFunction)pickle_dump, METH_VARARGS|METH_KEYWORDS,
6256 pickle_dump_doc},
6257 {"dumps", (PyCFunction)pickle_dumps, METH_VARARGS|METH_KEYWORDS,
6258 pickle_dumps_doc},
6259 {"load", (PyCFunction)pickle_load, METH_VARARGS|METH_KEYWORDS,
6260 pickle_load_doc},
6261 {"loads", (PyCFunction)pickle_loads, METH_VARARGS|METH_KEYWORDS,
6262 pickle_loads_doc},
6263 {NULL, NULL} /* sentinel */
6264};
6265
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006266static int
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006267initmodule(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006268{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006269 PyObject *copyreg = NULL;
6270 PyObject *compat_pickle = NULL;
6271
6272 /* XXX: We should ensure that the types of the dictionaries imported are
6273 exactly PyDict objects. Otherwise, it is possible to crash the pickle
6274 since we use the PyDict API directly to access these dictionaries. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006275
6276 copyreg = PyImport_ImportModule("copyreg");
6277 if (!copyreg)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006278 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006279 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
6280 if (!dispatch_table)
6281 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006282 extension_registry = \
6283 PyObject_GetAttrString(copyreg, "_extension_registry");
6284 if (!extension_registry)
6285 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006286 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
6287 if (!inverted_registry)
6288 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006289 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
6290 if (!extension_cache)
6291 goto error;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006292 Py_CLEAR(copyreg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006293
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006294 /* Load the 2.x -> 3.x stdlib module mapping tables */
6295 compat_pickle = PyImport_ImportModule("_compat_pickle");
6296 if (!compat_pickle)
6297 goto error;
6298 name_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
6299 if (!name_mapping_2to3)
6300 goto error;
6301 if (!PyDict_CheckExact(name_mapping_2to3)) {
6302 PyErr_Format(PyExc_RuntimeError,
6303 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
6304 Py_TYPE(name_mapping_2to3)->tp_name);
6305 goto error;
6306 }
6307 import_mapping_2to3 = PyObject_GetAttrString(compat_pickle,
6308 "IMPORT_MAPPING");
6309 if (!import_mapping_2to3)
6310 goto error;
6311 if (!PyDict_CheckExact(import_mapping_2to3)) {
6312 PyErr_Format(PyExc_RuntimeError,
6313 "_compat_pickle.IMPORT_MAPPING should be a dict, "
6314 "not %.200s", Py_TYPE(import_mapping_2to3)->tp_name);
6315 goto error;
6316 }
6317 /* ... and the 3.x -> 2.x mapping tables */
6318 name_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6319 "REVERSE_NAME_MAPPING");
6320 if (!name_mapping_3to2)
6321 goto error;
6322 if (!PyDict_CheckExact(name_mapping_3to2)) {
6323 PyErr_Format(PyExc_RuntimeError,
Ezio Melotti13925002011-03-16 11:05:33 +02006324 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006325 "not %.200s", Py_TYPE(name_mapping_3to2)->tp_name);
6326 goto error;
6327 }
6328 import_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6329 "REVERSE_IMPORT_MAPPING");
6330 if (!import_mapping_3to2)
6331 goto error;
6332 if (!PyDict_CheckExact(import_mapping_3to2)) {
6333 PyErr_Format(PyExc_RuntimeError,
6334 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
6335 "not %.200s", Py_TYPE(import_mapping_3to2)->tp_name);
6336 goto error;
6337 }
6338 Py_CLEAR(compat_pickle);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006339
6340 empty_tuple = PyTuple_New(0);
6341 if (empty_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006342 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006343 two_tuple = PyTuple_New(2);
6344 if (two_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006345 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006346 /* We use this temp container with no regard to refcounts, or to
6347 * keeping containees alive. Exempt from GC, because we don't
6348 * want anything looking at two_tuple() by magic.
6349 */
6350 PyObject_GC_UnTrack(two_tuple);
6351
6352 return 0;
6353
6354 error:
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006355 Py_CLEAR(copyreg);
6356 Py_CLEAR(dispatch_table);
6357 Py_CLEAR(extension_registry);
6358 Py_CLEAR(inverted_registry);
6359 Py_CLEAR(extension_cache);
6360 Py_CLEAR(compat_pickle);
6361 Py_CLEAR(name_mapping_2to3);
6362 Py_CLEAR(import_mapping_2to3);
6363 Py_CLEAR(name_mapping_3to2);
6364 Py_CLEAR(import_mapping_3to2);
6365 Py_CLEAR(empty_tuple);
6366 Py_CLEAR(two_tuple);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006367 return -1;
6368}
6369
6370static struct PyModuleDef _picklemodule = {
6371 PyModuleDef_HEAD_INIT,
6372 "_pickle",
6373 pickle_module_doc,
6374 -1,
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006375 pickle_methods,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006376 NULL,
6377 NULL,
6378 NULL,
6379 NULL
6380};
6381
6382PyMODINIT_FUNC
6383PyInit__pickle(void)
6384{
6385 PyObject *m;
6386
6387 if (PyType_Ready(&Unpickler_Type) < 0)
6388 return NULL;
6389 if (PyType_Ready(&Pickler_Type) < 0)
6390 return NULL;
6391 if (PyType_Ready(&Pdata_Type) < 0)
6392 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006393 if (PyType_Ready(&PicklerMemoProxyType) < 0)
6394 return NULL;
6395 if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
6396 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006397
6398 /* Create the module and add the functions. */
6399 m = PyModule_Create(&_picklemodule);
6400 if (m == NULL)
6401 return NULL;
6402
Antoine Pitrou8391cf42011-07-15 21:01:21 +02006403 Py_INCREF(&Pickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006404 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
6405 return NULL;
Antoine Pitrou8391cf42011-07-15 21:01:21 +02006406 Py_INCREF(&Unpickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006407 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
6408 return NULL;
6409
6410 /* Initialize the exceptions. */
6411 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
6412 if (PickleError == NULL)
6413 return NULL;
6414 PicklingError = \
6415 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
6416 if (PicklingError == NULL)
6417 return NULL;
6418 UnpicklingError = \
6419 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
6420 if (UnpicklingError == NULL)
6421 return NULL;
6422
6423 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
6424 return NULL;
6425 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
6426 return NULL;
6427 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
6428 return NULL;
6429
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006430 if (initmodule() < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006431 return NULL;
6432
6433 return m;
6434}