blob: 6a44b8288e13ab632989511634bbb7320468b1c2 [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000013/* Pickle opcodes. These must be kept updated with pickle.py.
14 Extensive docs are in pickletools.py. */
15enum opcode {
16 MARK = '(',
17 STOP = '.',
18 POP = '0',
19 POP_MARK = '1',
20 DUP = '2',
21 FLOAT = 'F',
22 INT = 'I',
23 BININT = 'J',
24 BININT1 = 'K',
25 LONG = 'L',
26 BININT2 = 'M',
27 NONE = 'N',
28 PERSID = 'P',
29 BINPERSID = 'Q',
30 REDUCE = 'R',
31 STRING = 'S',
32 BINSTRING = 'T',
33 SHORT_BINSTRING = 'U',
34 UNICODE = 'V',
35 BINUNICODE = 'X',
36 APPEND = 'a',
37 BUILD = 'b',
38 GLOBAL = 'c',
39 DICT = 'd',
40 EMPTY_DICT = '}',
41 APPENDS = 'e',
42 GET = 'g',
43 BINGET = 'h',
44 INST = 'i',
45 LONG_BINGET = 'j',
46 LIST = 'l',
47 EMPTY_LIST = ']',
48 OBJ = 'o',
49 PUT = 'p',
50 BINPUT = 'q',
51 LONG_BINPUT = 'r',
52 SETITEM = 's',
53 TUPLE = 't',
54 EMPTY_TUPLE = ')',
55 SETITEMS = 'u',
56 BINFLOAT = 'G',
57
58 /* Protocol 2. */
59 PROTO = '\x80',
60 NEWOBJ = '\x81',
61 EXT1 = '\x82',
62 EXT2 = '\x83',
63 EXT4 = '\x84',
64 TUPLE1 = '\x85',
65 TUPLE2 = '\x86',
66 TUPLE3 = '\x87',
67 NEWTRUE = '\x88',
68 NEWFALSE = '\x89',
69 LONG1 = '\x8a',
70 LONG4 = '\x8b',
71
72 /* Protocol 3 (Python 3.x) */
73 BINBYTES = 'B',
Victor Stinner132ef6c2010-11-09 09:39:41 +000074 SHORT_BINBYTES = 'C'
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000075};
76
77/* These aren't opcodes -- they're ways to pickle bools before protocol 2
78 * so that unpicklers written before bools were introduced unpickle them
79 * as ints, but unpicklers after can recognize that bools were intended.
80 * Note that protocol 2 added direct ways to pickle bools.
81 */
82#undef TRUE
83#define TRUE "I01\n"
84#undef FALSE
85#define FALSE "I00\n"
86
87enum {
88 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
89 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
90 break if this gets out of synch with pickle.py, but it's unclear that would
91 help anything either. */
92 BATCHSIZE = 1000,
93
94 /* Nesting limit until Pickler, when running in "fast mode", starts
95 checking for self-referential data-structures. */
96 FAST_NESTING_LIMIT = 50,
97
Antoine Pitrouea99c5c2010-09-09 18:33:21 +000098 /* Initial size of the write buffer of Pickler. */
99 WRITE_BUF_SIZE = 4096,
100
101 /* Maximum size of the write buffer of Pickler when pickling to a
102 stream. This is ignored for in-memory pickling. */
103 MAX_WRITE_BUF_SIZE = 64 * 1024,
Antoine Pitrou04248a82010-10-12 20:51:21 +0000104
105 /* Prefetch size when unpickling (disabled on unpeekable streams) */
Victor Stinner132ef6c2010-11-09 09:39:41 +0000106 PREFETCH = 8192 * 16
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000107};
108
109/* Exception classes for pickle. These should override the ones defined in
110 pickle.py, when the C-optimized Pickler and Unpickler are used. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000111static PyObject *PickleError = NULL;
112static PyObject *PicklingError = NULL;
113static PyObject *UnpicklingError = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000114
115/* copyreg.dispatch_table, {type_object: pickling_function} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000116static PyObject *dispatch_table = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000117/* For EXT[124] opcodes. */
118/* copyreg._extension_registry, {(module_name, function_name): code} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000119static PyObject *extension_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000120/* copyreg._inverted_registry, {code: (module_name, function_name)} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000121static PyObject *inverted_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000122/* copyreg._extension_cache, {code: object} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000123static PyObject *extension_cache = NULL;
124
125/* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */
126static PyObject *name_mapping_2to3 = NULL;
127/* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
128static PyObject *import_mapping_2to3 = NULL;
129/* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
130static PyObject *name_mapping_3to2 = NULL;
131static PyObject *import_mapping_3to2 = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000132
133/* XXX: Are these really nescessary? */
134/* As the name says, an empty tuple. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000135static PyObject *empty_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000136/* For looking up name pairs in copyreg._extension_registry. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000137static PyObject *two_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000138
139static int
140stack_underflow(void)
141{
142 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
143 return -1;
144}
145
146/* Internal data type used as the unpickling stack. */
147typedef struct {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000148 PyObject_VAR_HEAD
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000149 PyObject **data;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000150 Py_ssize_t allocated; /* number of slots in data allocated */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000151} Pdata;
152
153static void
154Pdata_dealloc(Pdata *self)
155{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200156 Py_ssize_t i = Py_SIZE(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000157 while (--i >= 0) {
158 Py_DECREF(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000159 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000160 PyMem_FREE(self->data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000161 PyObject_Del(self);
162}
163
164static PyTypeObject Pdata_Type = {
165 PyVarObject_HEAD_INIT(NULL, 0)
166 "_pickle.Pdata", /*tp_name*/
167 sizeof(Pdata), /*tp_basicsize*/
168 0, /*tp_itemsize*/
169 (destructor)Pdata_dealloc, /*tp_dealloc*/
170};
171
172static PyObject *
173Pdata_New(void)
174{
175 Pdata *self;
176
177 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
178 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000179 Py_SIZE(self) = 0;
180 self->allocated = 8;
181 self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000182 if (self->data)
183 return (PyObject *)self;
184 Py_DECREF(self);
185 return PyErr_NoMemory();
186}
187
188
189/* Retain only the initial clearto items. If clearto >= the current
190 * number of items, this is a (non-erroneous) NOP.
191 */
192static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200193Pdata_clear(Pdata *self, Py_ssize_t clearto)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000194{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200195 Py_ssize_t i = Py_SIZE(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000196
197 if (clearto < 0)
198 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000199 if (clearto >= i)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000200 return 0;
201
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000202 while (--i >= clearto) {
203 Py_CLEAR(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000204 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000205 Py_SIZE(self) = clearto;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000206 return 0;
207}
208
209static int
210Pdata_grow(Pdata *self)
211{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000212 PyObject **data = self->data;
213 Py_ssize_t allocated = self->allocated;
214 Py_ssize_t new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000215
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000216 new_allocated = (allocated >> 3) + 6;
217 /* check for integer overflow */
218 if (new_allocated > PY_SSIZE_T_MAX - allocated)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000219 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000220 new_allocated += allocated;
221 if (new_allocated > (PY_SSIZE_T_MAX / sizeof(PyObject *)))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000222 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000223 data = PyMem_REALLOC(data, new_allocated * sizeof(PyObject *));
224 if (data == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000225 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000226
227 self->data = data;
228 self->allocated = new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000229 return 0;
230
231 nomemory:
232 PyErr_NoMemory();
233 return -1;
234}
235
236/* D is a Pdata*. Pop the topmost element and store it into V, which
237 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
238 * is raised and V is set to NULL.
239 */
240static PyObject *
241Pdata_pop(Pdata *self)
242{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000243 if (Py_SIZE(self) == 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000244 PyErr_SetString(UnpicklingError, "bad pickle data");
245 return NULL;
246 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000247 return self->data[--Py_SIZE(self)];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000248}
249#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
250
251static int
252Pdata_push(Pdata *self, PyObject *obj)
253{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000254 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000255 return -1;
256 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000257 self->data[Py_SIZE(self)++] = obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000258 return 0;
259}
260
261/* Push an object on stack, transferring its ownership to the stack. */
262#define PDATA_PUSH(D, O, ER) do { \
263 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
264
265/* Push an object on stack, adding a new reference to the object. */
266#define PDATA_APPEND(D, O, ER) do { \
267 Py_INCREF((O)); \
268 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
269
270static PyObject *
271Pdata_poptuple(Pdata *self, Py_ssize_t start)
272{
273 PyObject *tuple;
274 Py_ssize_t len, i, j;
275
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000276 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000277 tuple = PyTuple_New(len);
278 if (tuple == NULL)
279 return NULL;
280 for (i = start, j = 0; j < len; i++, j++)
281 PyTuple_SET_ITEM(tuple, j, self->data[i]);
282
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000283 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000284 return tuple;
285}
286
287static PyObject *
288Pdata_poplist(Pdata *self, Py_ssize_t start)
289{
290 PyObject *list;
291 Py_ssize_t len, i, j;
292
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000293 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000294 list = PyList_New(len);
295 if (list == NULL)
296 return NULL;
297 for (i = start, j = 0; j < len; i++, j++)
298 PyList_SET_ITEM(list, j, self->data[i]);
299
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000300 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000301 return list;
302}
303
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000304typedef struct {
305 PyObject *me_key;
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200306 Py_ssize_t me_value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000307} PyMemoEntry;
308
309typedef struct {
310 Py_ssize_t mt_mask;
311 Py_ssize_t mt_used;
312 Py_ssize_t mt_allocated;
313 PyMemoEntry *mt_table;
314} PyMemoTable;
315
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000316typedef struct PicklerObject {
317 PyObject_HEAD
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000318 PyMemoTable *memo; /* Memo table, keep track of the seen
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000319 objects to support self-referential objects
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000320 pickling. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000321 PyObject *pers_func; /* persistent_id() method, can be NULL */
322 PyObject *arg;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000323
324 PyObject *write; /* write() method of the output stream. */
325 PyObject *output_buffer; /* Write into a local bytearray buffer before
326 flushing to the stream. */
327 Py_ssize_t output_len; /* Length of output_buffer. */
328 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000329 int proto; /* Pickle protocol number, >= 0 */
330 int bin; /* Boolean, true if proto > 0 */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200331 Py_ssize_t buf_size; /* Size of the current buffered pickle data */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000332 int fast; /* Enable fast mode if set to a true value.
333 The fast mode disable the usage of memo,
334 therefore speeding the pickling process by
335 not generating superfluous PUT opcodes. It
336 should not be used if with self-referential
337 objects. */
338 int fast_nesting;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000339 int fix_imports; /* Indicate whether Pickler should fix
340 the name of globals for Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000341 PyObject *fast_memo;
342} PicklerObject;
343
344typedef struct UnpicklerObject {
345 PyObject_HEAD
346 Pdata *stack; /* Pickle data stack, store unpickled objects. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000347
348 /* The unpickler memo is just an array of PyObject *s. Using a dict
349 is unnecessary, since the keys are contiguous ints. */
350 PyObject **memo;
351 Py_ssize_t memo_size;
352
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000353 PyObject *arg;
354 PyObject *pers_func; /* persistent_load() method, can be NULL. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000355
356 Py_buffer buffer;
357 char *input_buffer;
358 char *input_line;
359 Py_ssize_t input_len;
360 Py_ssize_t next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000361 Py_ssize_t prefetched_idx; /* index of first prefetched byte */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000362 PyObject *read; /* read() method of the input stream. */
363 PyObject *readline; /* readline() method of the input stream. */
Antoine Pitrou04248a82010-10-12 20:51:21 +0000364 PyObject *peek; /* peek() method of the input stream, or NULL */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000365
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000366 char *encoding; /* Name of the encoding to be used for
367 decoding strings pickled using Python
368 2.x. The default value is "ASCII" */
369 char *errors; /* Name of errors handling scheme to used when
370 decoding strings. The default value is
371 "strict". */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200372 Py_ssize_t *marks; /* Mark stack, used for unpickling container
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000373 objects. */
374 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
375 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000376 int proto; /* Protocol of the pickle loaded. */
377 int fix_imports; /* Indicate whether Unpickler should fix
378 the name of globals pickled by Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000379} UnpicklerObject;
380
381/* Forward declarations */
382static int save(PicklerObject *, PyObject *, int);
383static int save_reduce(PicklerObject *, PyObject *, PyObject *);
384static PyTypeObject Pickler_Type;
385static PyTypeObject Unpickler_Type;
386
387
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000388/*************************************************************************
389 A custom hashtable mapping void* to longs. This is used by the pickler for
390 memoization. Using a custom hashtable rather than PyDict allows us to skip
391 a bunch of unnecessary object creation. This makes a huge performance
392 difference. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000393
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000394#define MT_MINSIZE 8
395#define PERTURB_SHIFT 5
396
397
398static PyMemoTable *
399PyMemoTable_New(void)
400{
401 PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
402 if (memo == NULL) {
403 PyErr_NoMemory();
404 return NULL;
405 }
406
407 memo->mt_used = 0;
408 memo->mt_allocated = MT_MINSIZE;
409 memo->mt_mask = MT_MINSIZE - 1;
410 memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
411 if (memo->mt_table == NULL) {
412 PyMem_FREE(memo);
413 PyErr_NoMemory();
414 return NULL;
415 }
416 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
417
418 return memo;
419}
420
421static PyMemoTable *
422PyMemoTable_Copy(PyMemoTable *self)
423{
424 Py_ssize_t i;
425 PyMemoTable *new = PyMemoTable_New();
426 if (new == NULL)
427 return NULL;
428
429 new->mt_used = self->mt_used;
430 new->mt_allocated = self->mt_allocated;
431 new->mt_mask = self->mt_mask;
432 /* The table we get from _New() is probably smaller than we wanted.
433 Free it and allocate one that's the right size. */
434 PyMem_FREE(new->mt_table);
435 new->mt_table = PyMem_MALLOC(self->mt_allocated * sizeof(PyMemoEntry));
436 if (new->mt_table == NULL) {
437 PyMem_FREE(new);
438 return NULL;
439 }
440 for (i = 0; i < self->mt_allocated; i++) {
441 Py_XINCREF(self->mt_table[i].me_key);
442 }
443 memcpy(new->mt_table, self->mt_table,
444 sizeof(PyMemoEntry) * self->mt_allocated);
445
446 return new;
447}
448
449static Py_ssize_t
450PyMemoTable_Size(PyMemoTable *self)
451{
452 return self->mt_used;
453}
454
455static int
456PyMemoTable_Clear(PyMemoTable *self)
457{
458 Py_ssize_t i = self->mt_allocated;
459
460 while (--i >= 0) {
461 Py_XDECREF(self->mt_table[i].me_key);
462 }
463 self->mt_used = 0;
464 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
465 return 0;
466}
467
468static void
469PyMemoTable_Del(PyMemoTable *self)
470{
471 if (self == NULL)
472 return;
473 PyMemoTable_Clear(self);
474
475 PyMem_FREE(self->mt_table);
476 PyMem_FREE(self);
477}
478
479/* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
480 can be considerably simpler than dictobject.c's lookdict(). */
481static PyMemoEntry *
482_PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
483{
484 size_t i;
485 size_t perturb;
486 size_t mask = (size_t)self->mt_mask;
487 PyMemoEntry *table = self->mt_table;
488 PyMemoEntry *entry;
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000489 Py_hash_t hash = (Py_hash_t)key >> 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000490
491 i = hash & mask;
492 entry = &table[i];
493 if (entry->me_key == NULL || entry->me_key == key)
494 return entry;
495
496 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
497 i = (i << 2) + i + perturb + 1;
498 entry = &table[i & mask];
499 if (entry->me_key == NULL || entry->me_key == key)
500 return entry;
501 }
502 assert(0); /* Never reached */
503 return NULL;
504}
505
506/* Returns -1 on failure, 0 on success. */
507static int
508_PyMemoTable_ResizeTable(PyMemoTable *self, Py_ssize_t min_size)
509{
510 PyMemoEntry *oldtable = NULL;
511 PyMemoEntry *oldentry, *newentry;
512 Py_ssize_t new_size = MT_MINSIZE;
513 Py_ssize_t to_process;
514
515 assert(min_size > 0);
516
517 /* Find the smallest valid table size >= min_size. */
518 while (new_size < min_size && new_size > 0)
519 new_size <<= 1;
520 if (new_size <= 0) {
521 PyErr_NoMemory();
522 return -1;
523 }
524 /* new_size needs to be a power of two. */
525 assert((new_size & (new_size - 1)) == 0);
526
527 /* Allocate new table. */
528 oldtable = self->mt_table;
529 self->mt_table = PyMem_MALLOC(new_size * sizeof(PyMemoEntry));
530 if (self->mt_table == NULL) {
531 PyMem_FREE(oldtable);
532 PyErr_NoMemory();
533 return -1;
534 }
535 self->mt_allocated = new_size;
536 self->mt_mask = new_size - 1;
537 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
538
539 /* Copy entries from the old table. */
540 to_process = self->mt_used;
541 for (oldentry = oldtable; to_process > 0; oldentry++) {
542 if (oldentry->me_key != NULL) {
543 to_process--;
544 /* newentry is a pointer to a chunk of the new
545 mt_table, so we're setting the key:value pair
546 in-place. */
547 newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
548 newentry->me_key = oldentry->me_key;
549 newentry->me_value = oldentry->me_value;
550 }
551 }
552
553 /* Deallocate the old table. */
554 PyMem_FREE(oldtable);
555 return 0;
556}
557
558/* Returns NULL on failure, a pointer to the value otherwise. */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200559static Py_ssize_t *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000560PyMemoTable_Get(PyMemoTable *self, PyObject *key)
561{
562 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
563 if (entry->me_key == NULL)
564 return NULL;
565 return &entry->me_value;
566}
567
568/* Returns -1 on failure, 0 on success. */
569static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200570PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000571{
572 PyMemoEntry *entry;
573
574 assert(key != NULL);
575
576 entry = _PyMemoTable_Lookup(self, key);
577 if (entry->me_key != NULL) {
578 entry->me_value = value;
579 return 0;
580 }
581 Py_INCREF(key);
582 entry->me_key = key;
583 entry->me_value = value;
584 self->mt_used++;
585
586 /* If we added a key, we can safely resize. Otherwise just return!
587 * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
588 *
589 * Quadrupling the size improves average table sparseness
590 * (reducing collisions) at the cost of some memory. It also halves
591 * the number of expensive resize operations in a growing memo table.
592 *
593 * Very large memo tables (over 50K items) use doubling instead.
594 * This may help applications with severe memory constraints.
595 */
596 if (!(self->mt_used * 3 >= (self->mt_mask + 1) * 2))
597 return 0;
598 return _PyMemoTable_ResizeTable(self,
599 (self->mt_used > 50000 ? 2 : 4) * self->mt_used);
600}
601
602#undef MT_MINSIZE
603#undef PERTURB_SHIFT
604
605/*************************************************************************/
606
607/* Helpers for creating the argument tuple passed to functions. This has the
Victor Stinner121aab42011-09-29 23:40:53 +0200608 performance advantage of calling PyTuple_New() only once.
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000609
610 XXX(avassalotti): Inline directly in _Pickler_FastCall() and
611 _Unpickler_FastCall(). */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000612#define ARG_TUP(self, obj) do { \
613 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
614 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
615 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
616 } \
617 else { \
618 Py_DECREF((obj)); \
619 } \
620 } while (0)
621
622#define FREE_ARG_TUP(self) do { \
623 if ((self)->arg->ob_refcnt > 1) \
624 Py_CLEAR((self)->arg); \
625 } while (0)
626
627/* A temporary cleaner API for fast single argument function call.
628
629 XXX: Does caching the argument tuple provides any real performance benefits?
630
631 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
632 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
633 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
634 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
635 (i.e, call PyTuple_New() and store the returned value in an array), to save
636 one second (wall clock time). Either ways, the loading time a pickle stream
637 large enough to generate this number of calls would be massively
638 overwhelmed by other factors, like I/O throughput, the GC traversal and
639 object allocation overhead. So, I really doubt these functions provide any
640 real benefits.
641
642 On the other hand, oprofile reports that pickle spends a lot of time in
643 these functions. But, that is probably more related to the function call
644 overhead, than the argument tuple allocation.
645
646 XXX: And, what is the reference behavior of these? Steal, borrow? At first
647 glance, it seems to steal the reference of 'arg' and borrow the reference
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000648 of 'func'. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000649static PyObject *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000650_Pickler_FastCall(PicklerObject *self, PyObject *func, PyObject *arg)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000651{
652 PyObject *result = NULL;
653
654 ARG_TUP(self, arg);
655 if (self->arg) {
656 result = PyObject_Call(func, self->arg, NULL);
657 FREE_ARG_TUP(self);
658 }
659 return result;
660}
661
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000662static int
663_Pickler_ClearBuffer(PicklerObject *self)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000664{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000665 Py_CLEAR(self->output_buffer);
666 self->output_buffer =
667 PyBytes_FromStringAndSize(NULL, self->max_output_len);
668 if (self->output_buffer == NULL)
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000669 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000670 self->output_len = 0;
671 return 0;
672}
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000673
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000674static PyObject *
675_Pickler_GetString(PicklerObject *self)
676{
677 PyObject *output_buffer = self->output_buffer;
678
679 assert(self->output_buffer != NULL);
680 self->output_buffer = NULL;
681 /* Resize down to exact size */
682 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
683 return NULL;
684 return output_buffer;
685}
686
687static int
688_Pickler_FlushToFile(PicklerObject *self)
689{
690 PyObject *output, *result;
691
692 assert(self->write != NULL);
693
694 output = _Pickler_GetString(self);
695 if (output == NULL)
696 return -1;
697
698 result = _Pickler_FastCall(self, self->write, output);
699 Py_XDECREF(result);
700 return (result == NULL) ? -1 : 0;
701}
702
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200703static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000704_Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t n)
705{
706 Py_ssize_t i, required;
707 char *buffer;
708
709 assert(s != NULL);
710
711 required = self->output_len + n;
712 if (required > self->max_output_len) {
713 if (self->write != NULL && required > MAX_WRITE_BUF_SIZE) {
714 /* XXX This reallocates a new buffer every time, which is a bit
715 wasteful. */
716 if (_Pickler_FlushToFile(self) < 0)
717 return -1;
718 if (_Pickler_ClearBuffer(self) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000719 return -1;
720 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000721 if (self->write != NULL && n > MAX_WRITE_BUF_SIZE) {
722 /* we already flushed above, so the buffer is empty */
723 PyObject *result;
724 /* XXX we could spare an intermediate copy and pass
725 a memoryview instead */
726 PyObject *output = PyBytes_FromStringAndSize(s, n);
727 if (s == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000728 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000729 result = _Pickler_FastCall(self, self->write, output);
730 Py_XDECREF(result);
731 return (result == NULL) ? -1 : 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000732 }
733 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000734 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
735 PyErr_NoMemory();
736 return -1;
737 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200738 self->max_output_len = (self->output_len + n) / 2 * 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000739 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
740 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000741 }
742 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000743 buffer = PyBytes_AS_STRING(self->output_buffer);
744 if (n < 8) {
745 /* This is faster than memcpy when the string is short. */
746 for (i = 0; i < n; i++) {
747 buffer[self->output_len + i] = s[i];
748 }
749 }
750 else {
751 memcpy(buffer + self->output_len, s, n);
752 }
753 self->output_len += n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000754 return n;
755}
756
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000757static PicklerObject *
758_Pickler_New(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000759{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000760 PicklerObject *self;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000761
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000762 self = PyObject_GC_New(PicklerObject, &Pickler_Type);
763 if (self == NULL)
764 return NULL;
765
766 self->pers_func = NULL;
767 self->arg = NULL;
768 self->write = NULL;
769 self->proto = 0;
770 self->bin = 0;
771 self->fast = 0;
772 self->fast_nesting = 0;
773 self->fix_imports = 0;
774 self->fast_memo = NULL;
775
776 self->memo = PyMemoTable_New();
777 if (self->memo == NULL) {
778 Py_DECREF(self);
779 return NULL;
780 }
781 self->max_output_len = WRITE_BUF_SIZE;
782 self->output_len = 0;
783 self->output_buffer = PyBytes_FromStringAndSize(NULL,
784 self->max_output_len);
785 if (self->output_buffer == NULL) {
786 Py_DECREF(self);
787 return NULL;
788 }
789 return self;
790}
791
792static int
793_Pickler_SetProtocol(PicklerObject *self, PyObject *proto_obj,
794 PyObject *fix_imports_obj)
795{
796 long proto = 0;
797 int fix_imports;
798
799 if (proto_obj == NULL || proto_obj == Py_None)
800 proto = DEFAULT_PROTOCOL;
801 else {
802 proto = PyLong_AsLong(proto_obj);
803 if (proto == -1 && PyErr_Occurred())
804 return -1;
805 }
806 if (proto < 0)
807 proto = HIGHEST_PROTOCOL;
808 if (proto > HIGHEST_PROTOCOL) {
809 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
810 HIGHEST_PROTOCOL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000811 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000812 }
813 fix_imports = PyObject_IsTrue(fix_imports_obj);
814 if (fix_imports == -1)
815 return -1;
Victor Stinner121aab42011-09-29 23:40:53 +0200816
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000817 self->proto = proto;
818 self->bin = proto > 0;
819 self->fix_imports = fix_imports && proto < 3;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000820
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000821 return 0;
822}
823
824/* Returns -1 (with an exception set) on failure, 0 on success. This may
825 be called once on a freshly created Pickler. */
826static int
827_Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
828{
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +0200829 _Py_identifier(write);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000830 assert(file != NULL);
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +0200831 self->write = _PyObject_GetAttrId(file, &PyId_write);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000832 if (self->write == NULL) {
833 if (PyErr_ExceptionMatches(PyExc_AttributeError))
834 PyErr_SetString(PyExc_TypeError,
835 "file must have a 'write' attribute");
836 return -1;
837 }
838
839 return 0;
840}
841
842/* See documentation for _Pickler_FastCall(). */
843static PyObject *
844_Unpickler_FastCall(UnpicklerObject *self, PyObject *func, PyObject *arg)
845{
846 PyObject *result = NULL;
847
848 ARG_TUP(self, arg);
849 if (self->arg) {
850 result = PyObject_Call(func, self->arg, NULL);
851 FREE_ARG_TUP(self);
852 }
853 return result;
854}
855
856/* Returns the size of the input on success, -1 on failure. This takes its
857 own reference to `input`. */
858static Py_ssize_t
859_Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
860{
861 if (self->buffer.buf != NULL)
862 PyBuffer_Release(&self->buffer);
863 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
864 return -1;
865 self->input_buffer = self->buffer.buf;
866 self->input_len = self->buffer.len;
867 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000868 self->prefetched_idx = self->input_len;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000869 return self->input_len;
870}
871
Antoine Pitrou04248a82010-10-12 20:51:21 +0000872static int
873_Unpickler_SkipConsumed(UnpicklerObject *self)
874{
875 Py_ssize_t consumed = self->next_read_idx - self->prefetched_idx;
876
877 if (consumed > 0) {
878 PyObject *r;
879 assert(self->peek); /* otherwise we did something wrong */
880 /* This makes an useless copy... */
881 r = PyObject_CallFunction(self->read, "n", consumed);
882 if (r == NULL)
883 return -1;
884 Py_DECREF(r);
885 self->prefetched_idx = self->next_read_idx;
886 }
887 return 0;
888}
889
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000890static const Py_ssize_t READ_WHOLE_LINE = -1;
891
892/* If reading from a file, we need to only pull the bytes we need, since there
893 may be multiple pickle objects arranged contiguously in the same input
894 buffer.
895
896 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
897 bytes from the input stream/buffer.
898
899 Update the unpickler's input buffer with the newly-read data. Returns -1 on
900 failure; on success, returns the number of bytes read from the file.
901
902 On success, self->input_len will be 0; this is intentional so that when
903 unpickling from a file, the "we've run out of data" code paths will trigger,
904 causing the Unpickler to go back to the file for more data. Use the returned
905 size to tell you how much data you can process. */
906static Py_ssize_t
907_Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
908{
909 PyObject *data;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000910 Py_ssize_t read_size, prefetched_size = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000911
912 assert(self->read != NULL);
Victor Stinner121aab42011-09-29 23:40:53 +0200913
Antoine Pitrou04248a82010-10-12 20:51:21 +0000914 if (_Unpickler_SkipConsumed(self) < 0)
915 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000916
917 if (n == READ_WHOLE_LINE)
918 data = PyObject_Call(self->readline, empty_tuple, NULL);
919 else {
920 PyObject *len = PyLong_FromSsize_t(n);
921 if (len == NULL)
922 return -1;
923 data = _Unpickler_FastCall(self, self->read, len);
924 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000925 if (data == NULL)
926 return -1;
927
Antoine Pitrou04248a82010-10-12 20:51:21 +0000928 /* Prefetch some data without advancing the file pointer, if possible */
929 if (self->peek) {
930 PyObject *len, *prefetched;
931 len = PyLong_FromSsize_t(PREFETCH);
932 if (len == NULL) {
933 Py_DECREF(data);
934 return -1;
935 }
936 prefetched = _Unpickler_FastCall(self, self->peek, len);
937 if (prefetched == NULL) {
938 if (PyErr_ExceptionMatches(PyExc_NotImplementedError)) {
939 /* peek() is probably not supported by the given file object */
940 PyErr_Clear();
941 Py_CLEAR(self->peek);
942 }
943 else {
944 Py_DECREF(data);
945 return -1;
946 }
947 }
948 else {
949 assert(PyBytes_Check(prefetched));
950 prefetched_size = PyBytes_GET_SIZE(prefetched);
951 PyBytes_ConcatAndDel(&data, prefetched);
952 if (data == NULL)
953 return -1;
954 }
955 }
956
957 read_size = _Unpickler_SetStringInput(self, data) - prefetched_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000958 Py_DECREF(data);
Antoine Pitrou04248a82010-10-12 20:51:21 +0000959 self->prefetched_idx = read_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000960 return read_size;
961}
962
963/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
964
965 This should be used for all data reads, rather than accessing the unpickler's
966 input buffer directly. This method deals correctly with reading from input
967 streams, which the input buffer doesn't deal with.
968
969 Note that when reading from a file-like object, self->next_read_idx won't
970 be updated (it should remain at 0 for the entire unpickling process). You
971 should use this function's return value to know how many bytes you can
972 consume.
973
974 Returns -1 (with an exception set) on failure. On success, return the
975 number of chars read. */
976static Py_ssize_t
977_Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n)
978{
Antoine Pitrou04248a82010-10-12 20:51:21 +0000979 Py_ssize_t num_read;
980
Antoine Pitrou04248a82010-10-12 20:51:21 +0000981 if (self->next_read_idx + n <= self->input_len) {
982 *s = self->input_buffer + self->next_read_idx;
983 self->next_read_idx += n;
984 return n;
985 }
986 if (!self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000987 PyErr_Format(PyExc_EOFError, "Ran out of input");
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000988 return -1;
989 }
Antoine Pitrou04248a82010-10-12 20:51:21 +0000990 num_read = _Unpickler_ReadFromFile(self, n);
991 if (num_read < 0)
992 return -1;
993 if (num_read < n) {
994 PyErr_Format(PyExc_EOFError, "Ran out of input");
995 return -1;
996 }
997 *s = self->input_buffer;
998 self->next_read_idx = n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000999 return n;
1000}
1001
1002static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001003_Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1004 char **result)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001005{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001006 char *input_line = PyMem_Realloc(self->input_line, len + 1);
1007 if (input_line == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001008 return -1;
1009
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001010 memcpy(input_line, line, len);
1011 input_line[len] = '\0';
1012 self->input_line = input_line;
1013 *result = self->input_line;
1014 return len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001015}
1016
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001017/* Read a line from the input stream/buffer. If we run off the end of the input
1018 before hitting \n, return the data we found.
1019
1020 Returns the number of chars read, or -1 on failure. */
1021static Py_ssize_t
1022_Unpickler_Readline(UnpicklerObject *self, char **result)
1023{
1024 Py_ssize_t i, num_read;
1025
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001026 for (i = self->next_read_idx; i < self->input_len; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001027 if (self->input_buffer[i] == '\n') {
1028 char *line_start = self->input_buffer + self->next_read_idx;
1029 num_read = i - self->next_read_idx + 1;
1030 self->next_read_idx = i + 1;
1031 return _Unpickler_CopyLine(self, line_start, num_read, result);
1032 }
1033 }
1034 if (self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001035 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1036 if (num_read < 0)
1037 return -1;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001038 self->next_read_idx = num_read;
Antoine Pitrouf6c7a852011-08-11 21:04:02 +02001039 return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001040 }
Victor Stinner121aab42011-09-29 23:40:53 +02001041
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001042 /* If we get here, we've run off the end of the input string. Return the
1043 remaining string and let the caller figure it out. */
1044 *result = self->input_buffer + self->next_read_idx;
1045 num_read = i - self->next_read_idx;
1046 self->next_read_idx = i;
1047 return num_read;
1048}
1049
1050/* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1051 will be modified in place. */
1052static int
1053_Unpickler_ResizeMemoList(UnpicklerObject *self, Py_ssize_t new_size)
1054{
1055 Py_ssize_t i;
1056 PyObject **memo;
1057
1058 assert(new_size > self->memo_size);
1059
1060 memo = PyMem_REALLOC(self->memo, new_size * sizeof(PyObject *));
1061 if (memo == NULL) {
1062 PyErr_NoMemory();
1063 return -1;
1064 }
1065 self->memo = memo;
1066 for (i = self->memo_size; i < new_size; i++)
1067 self->memo[i] = NULL;
1068 self->memo_size = new_size;
1069 return 0;
1070}
1071
1072/* Returns NULL if idx is out of bounds. */
1073static PyObject *
1074_Unpickler_MemoGet(UnpicklerObject *self, Py_ssize_t idx)
1075{
1076 if (idx < 0 || idx >= self->memo_size)
1077 return NULL;
1078
1079 return self->memo[idx];
1080}
1081
1082/* Returns -1 (with an exception set) on failure, 0 on success.
1083 This takes its own reference to `value`. */
1084static int
1085_Unpickler_MemoPut(UnpicklerObject *self, Py_ssize_t idx, PyObject *value)
1086{
1087 PyObject *old_item;
1088
1089 if (idx >= self->memo_size) {
1090 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1091 return -1;
1092 assert(idx < self->memo_size);
1093 }
1094 Py_INCREF(value);
1095 old_item = self->memo[idx];
1096 self->memo[idx] = value;
1097 Py_XDECREF(old_item);
1098 return 0;
1099}
1100
1101static PyObject **
1102_Unpickler_NewMemo(Py_ssize_t new_size)
1103{
1104 PyObject **memo = PyMem_MALLOC(new_size * sizeof(PyObject *));
1105 if (memo == NULL)
1106 return NULL;
1107 memset(memo, 0, new_size * sizeof(PyObject *));
1108 return memo;
1109}
1110
1111/* Free the unpickler's memo, taking care to decref any items left in it. */
1112static void
1113_Unpickler_MemoCleanup(UnpicklerObject *self)
1114{
1115 Py_ssize_t i;
1116 PyObject **memo = self->memo;
1117
1118 if (self->memo == NULL)
1119 return;
1120 self->memo = NULL;
1121 i = self->memo_size;
1122 while (--i >= 0) {
1123 Py_XDECREF(memo[i]);
1124 }
1125 PyMem_FREE(memo);
1126}
1127
1128static UnpicklerObject *
1129_Unpickler_New(void)
1130{
1131 UnpicklerObject *self;
1132
1133 self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1134 if (self == NULL)
1135 return NULL;
1136
1137 self->stack = (Pdata *)Pdata_New();
1138 if (self->stack == NULL) {
1139 Py_DECREF(self);
1140 return NULL;
1141 }
1142 memset(&self->buffer, 0, sizeof(Py_buffer));
1143
1144 self->memo_size = 32;
1145 self->memo = _Unpickler_NewMemo(self->memo_size);
1146 if (self->memo == NULL) {
1147 Py_DECREF(self);
1148 return NULL;
1149 }
1150
1151 self->arg = NULL;
1152 self->pers_func = NULL;
1153 self->input_buffer = NULL;
1154 self->input_line = NULL;
1155 self->input_len = 0;
1156 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001157 self->prefetched_idx = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001158 self->read = NULL;
1159 self->readline = NULL;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001160 self->peek = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001161 self->encoding = NULL;
1162 self->errors = NULL;
1163 self->marks = NULL;
1164 self->num_marks = 0;
1165 self->marks_size = 0;
1166 self->proto = 0;
1167 self->fix_imports = 0;
1168
1169 return self;
1170}
1171
1172/* Returns -1 (with an exception set) on failure, 0 on success. This may
1173 be called once on a freshly created Pickler. */
1174static int
1175_Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1176{
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001177 _Py_identifier(peek);
1178 _Py_identifier(read);
1179 _Py_identifier(readline);
1180
1181 self->peek = _PyObject_GetAttrId(file, &PyId_peek);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001182 if (self->peek == NULL) {
1183 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1184 PyErr_Clear();
1185 else
1186 return -1;
1187 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001188 self->read = _PyObject_GetAttrId(file, &PyId_read);
1189 self->readline = _PyObject_GetAttrId(file, &PyId_readline);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001190 if (self->readline == NULL || self->read == NULL) {
1191 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1192 PyErr_SetString(PyExc_TypeError,
1193 "file must have 'read' and 'readline' attributes");
1194 Py_CLEAR(self->read);
1195 Py_CLEAR(self->readline);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001196 Py_CLEAR(self->peek);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001197 return -1;
1198 }
1199 return 0;
1200}
1201
1202/* Returns -1 (with an exception set) on failure, 0 on success. This may
1203 be called once on a freshly created Pickler. */
1204static int
1205_Unpickler_SetInputEncoding(UnpicklerObject *self,
1206 const char *encoding,
1207 const char *errors)
1208{
1209 if (encoding == NULL)
1210 encoding = "ASCII";
1211 if (errors == NULL)
1212 errors = "strict";
1213
1214 self->encoding = strdup(encoding);
1215 self->errors = strdup(errors);
1216 if (self->encoding == NULL || self->errors == NULL) {
1217 PyErr_NoMemory();
1218 return -1;
1219 }
1220 return 0;
1221}
1222
1223/* Generate a GET opcode for an object stored in the memo. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001224static int
1225memo_get(PicklerObject *self, PyObject *key)
1226{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001227 Py_ssize_t *value;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001228 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001229 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001230
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001231 value = PyMemoTable_Get(self->memo, key);
1232 if (value == NULL) {
1233 PyErr_SetObject(PyExc_KeyError, key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001234 return -1;
1235 }
1236
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001237 if (!self->bin) {
1238 pdata[0] = GET;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001239 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1240 "%" PY_FORMAT_SIZE_T "d\n", *value);
1241 len = strlen(pdata);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001242 }
1243 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001244 if (*value < 256) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001245 pdata[0] = BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001246 pdata[1] = (unsigned char)(*value & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001247 len = 2;
1248 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001249 else if (*value <= 0xffffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001250 pdata[0] = LONG_BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001251 pdata[1] = (unsigned char)(*value & 0xff);
1252 pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1253 pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1254 pdata[4] = (unsigned char)((*value >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001255 len = 5;
1256 }
1257 else { /* unlikely */
1258 PyErr_SetString(PicklingError,
1259 "memo id too large for LONG_BINGET");
1260 return -1;
1261 }
1262 }
1263
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001264 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001265 return -1;
1266
1267 return 0;
1268}
1269
1270/* Store an object in the memo, assign it a new unique ID based on the number
1271 of objects currently stored in the memo and generate a PUT opcode. */
1272static int
1273memo_put(PicklerObject *self, PyObject *obj)
1274{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001275 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001276 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001277 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001278 int status = 0;
1279
1280 if (self->fast)
1281 return 0;
1282
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001283 x = PyMemoTable_Size(self->memo);
1284 if (PyMemoTable_Set(self->memo, obj, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001285 goto error;
1286
1287 if (!self->bin) {
1288 pdata[0] = PUT;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001289 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1290 "%" PY_FORMAT_SIZE_T "d\n", x);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001291 len = strlen(pdata);
1292 }
1293 else {
1294 if (x < 256) {
1295 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00001296 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001297 len = 2;
1298 }
1299 else if (x <= 0xffffffffL) {
1300 pdata[0] = LONG_BINPUT;
1301 pdata[1] = (unsigned char)(x & 0xff);
1302 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1303 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1304 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1305 len = 5;
1306 }
1307 else { /* unlikely */
1308 PyErr_SetString(PicklingError,
1309 "memo id too large for LONG_BINPUT");
1310 return -1;
1311 }
1312 }
1313
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001314 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001315 goto error;
1316
1317 if (0) {
1318 error:
1319 status = -1;
1320 }
1321
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001322 return status;
1323}
1324
1325static PyObject *
1326whichmodule(PyObject *global, PyObject *global_name)
1327{
1328 Py_ssize_t i, j;
1329 static PyObject *module_str = NULL;
1330 static PyObject *main_str = NULL;
1331 PyObject *module_name;
1332 PyObject *modules_dict;
1333 PyObject *module;
1334 PyObject *obj;
1335
1336 if (module_str == NULL) {
1337 module_str = PyUnicode_InternFromString("__module__");
1338 if (module_str == NULL)
1339 return NULL;
1340 main_str = PyUnicode_InternFromString("__main__");
1341 if (main_str == NULL)
1342 return NULL;
1343 }
1344
1345 module_name = PyObject_GetAttr(global, module_str);
1346
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00001347 /* In some rare cases (e.g., bound methods of extension types),
1348 __module__ can be None. If it is so, then search sys.modules
1349 for the module of global. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001350 if (module_name == Py_None) {
1351 Py_DECREF(module_name);
1352 goto search;
1353 }
1354
1355 if (module_name) {
1356 return module_name;
1357 }
1358 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1359 PyErr_Clear();
1360 else
1361 return NULL;
1362
1363 search:
1364 modules_dict = PySys_GetObject("modules");
1365 if (modules_dict == NULL)
1366 return NULL;
1367
1368 i = 0;
1369 module_name = NULL;
1370 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
Mark Dickinson211c6252009-02-01 10:28:51 +00001371 if (PyObject_RichCompareBool(module_name, main_str, Py_EQ) == 1)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001372 continue;
1373
1374 obj = PyObject_GetAttr(module, global_name);
1375 if (obj == NULL) {
1376 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1377 PyErr_Clear();
1378 else
1379 return NULL;
1380 continue;
1381 }
1382
1383 if (obj != global) {
1384 Py_DECREF(obj);
1385 continue;
1386 }
1387
1388 Py_DECREF(obj);
1389 break;
1390 }
1391
1392 /* If no module is found, use __main__. */
1393 if (!j) {
1394 module_name = main_str;
1395 }
1396
1397 Py_INCREF(module_name);
1398 return module_name;
1399}
1400
1401/* fast_save_enter() and fast_save_leave() are guards against recursive
1402 objects when Pickler is used with the "fast mode" (i.e., with object
1403 memoization disabled). If the nesting of a list or dict object exceed
1404 FAST_NESTING_LIMIT, these guards will start keeping an internal
1405 reference to the seen list or dict objects and check whether these objects
1406 are recursive. These are not strictly necessary, since save() has a
1407 hard-coded recursion limit, but they give a nicer error message than the
1408 typical RuntimeError. */
1409static int
1410fast_save_enter(PicklerObject *self, PyObject *obj)
1411{
1412 /* if fast_nesting < 0, we're doing an error exit. */
1413 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1414 PyObject *key = NULL;
1415 if (self->fast_memo == NULL) {
1416 self->fast_memo = PyDict_New();
1417 if (self->fast_memo == NULL) {
1418 self->fast_nesting = -1;
1419 return 0;
1420 }
1421 }
1422 key = PyLong_FromVoidPtr(obj);
1423 if (key == NULL)
1424 return 0;
1425 if (PyDict_GetItem(self->fast_memo, key)) {
1426 Py_DECREF(key);
1427 PyErr_Format(PyExc_ValueError,
1428 "fast mode: can't pickle cyclic objects "
1429 "including object type %.200s at %p",
1430 obj->ob_type->tp_name, obj);
1431 self->fast_nesting = -1;
1432 return 0;
1433 }
1434 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
1435 Py_DECREF(key);
1436 self->fast_nesting = -1;
1437 return 0;
1438 }
1439 Py_DECREF(key);
1440 }
1441 return 1;
1442}
1443
1444static int
1445fast_save_leave(PicklerObject *self, PyObject *obj)
1446{
1447 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
1448 PyObject *key = PyLong_FromVoidPtr(obj);
1449 if (key == NULL)
1450 return 0;
1451 if (PyDict_DelItem(self->fast_memo, key) < 0) {
1452 Py_DECREF(key);
1453 return 0;
1454 }
1455 Py_DECREF(key);
1456 }
1457 return 1;
1458}
1459
1460static int
1461save_none(PicklerObject *self, PyObject *obj)
1462{
1463 const char none_op = NONE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001464 if (_Pickler_Write(self, &none_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001465 return -1;
1466
1467 return 0;
1468}
1469
1470static int
1471save_bool(PicklerObject *self, PyObject *obj)
1472{
1473 static const char *buf[2] = { FALSE, TRUE };
1474 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
1475 int p = (obj == Py_True);
1476
1477 if (self->proto >= 2) {
1478 const char bool_op = p ? NEWTRUE : NEWFALSE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001479 if (_Pickler_Write(self, &bool_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001480 return -1;
1481 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001482 else if (_Pickler_Write(self, buf[p], len[p]) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001483 return -1;
1484
1485 return 0;
1486}
1487
1488static int
1489save_int(PicklerObject *self, long x)
1490{
1491 char pdata[32];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001492 Py_ssize_t len = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001493
1494 if (!self->bin
1495#if SIZEOF_LONG > 4
1496 || x > 0x7fffffffL || x < -0x80000000L
1497#endif
1498 ) {
1499 /* Text-mode pickle, or long too big to fit in the 4-byte
1500 * signed BININT format: store as a string.
1501 */
Mark Dickinson8dd05142009-01-20 20:43:58 +00001502 pdata[0] = LONG; /* use LONG for consistency with pickle.py */
1503 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ldL\n", x);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001504 if (_Pickler_Write(self, pdata, strlen(pdata)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001505 return -1;
1506 }
1507 else {
1508 /* Binary pickle and x fits in a signed 4-byte int. */
1509 pdata[1] = (unsigned char)(x & 0xff);
1510 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1511 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1512 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1513
1514 if ((pdata[4] == 0) && (pdata[3] == 0)) {
1515 if (pdata[2] == 0) {
1516 pdata[0] = BININT1;
1517 len = 2;
1518 }
1519 else {
1520 pdata[0] = BININT2;
1521 len = 3;
1522 }
1523 }
1524 else {
1525 pdata[0] = BININT;
1526 len = 5;
1527 }
1528
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001529 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001530 return -1;
1531 }
1532
1533 return 0;
1534}
1535
1536static int
1537save_long(PicklerObject *self, PyObject *obj)
1538{
1539 PyObject *repr = NULL;
1540 Py_ssize_t size;
1541 long val = PyLong_AsLong(obj);
1542 int status = 0;
1543
1544 const char long_op = LONG;
1545
1546 if (val == -1 && PyErr_Occurred()) {
1547 /* out of range for int pickling */
1548 PyErr_Clear();
1549 }
Antoine Pitroue58bffb2011-08-13 20:40:32 +02001550 else
1551#if SIZEOF_LONG > 4
1552 if (val <= 0x7fffffffL && val >= -0x80000000L)
1553#endif
1554 return save_int(self, val);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001555
1556 if (self->proto >= 2) {
1557 /* Linear-time pickling. */
1558 size_t nbits;
1559 size_t nbytes;
1560 unsigned char *pdata;
1561 char header[5];
1562 int i;
1563 int sign = _PyLong_Sign(obj);
1564
1565 if (sign == 0) {
1566 header[0] = LONG1;
1567 header[1] = 0; /* It's 0 -- an empty bytestring. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001568 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001569 goto error;
1570 return 0;
1571 }
1572 nbits = _PyLong_NumBits(obj);
1573 if (nbits == (size_t)-1 && PyErr_Occurred())
1574 goto error;
1575 /* How many bytes do we need? There are nbits >> 3 full
1576 * bytes of data, and nbits & 7 leftover bits. If there
1577 * are any leftover bits, then we clearly need another
1578 * byte. Wnat's not so obvious is that we *probably*
1579 * need another byte even if there aren't any leftovers:
1580 * the most-significant bit of the most-significant byte
1581 * acts like a sign bit, and it's usually got a sense
1582 * opposite of the one we need. The exception is longs
1583 * of the form -(2**(8*j-1)) for j > 0. Such a long is
1584 * its own 256's-complement, so has the right sign bit
1585 * even without the extra byte. That's a pain to check
1586 * for in advance, though, so we always grab an extra
1587 * byte at the start, and cut it back later if possible.
1588 */
1589 nbytes = (nbits >> 3) + 1;
1590 if (nbytes > INT_MAX) {
1591 PyErr_SetString(PyExc_OverflowError,
1592 "long too large to pickle");
1593 goto error;
1594 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001595 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001596 if (repr == NULL)
1597 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001598 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001599 i = _PyLong_AsByteArray((PyLongObject *)obj,
1600 pdata, nbytes,
1601 1 /* little endian */ , 1 /* signed */ );
1602 if (i < 0)
1603 goto error;
1604 /* If the long is negative, this may be a byte more than
1605 * needed. This is so iff the MSB is all redundant sign
1606 * bits.
1607 */
1608 if (sign < 0 &&
Victor Stinner121aab42011-09-29 23:40:53 +02001609 nbytes > 1 &&
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001610 pdata[nbytes - 1] == 0xff &&
1611 (pdata[nbytes - 2] & 0x80) != 0) {
1612 nbytes--;
1613 }
1614
1615 if (nbytes < 256) {
1616 header[0] = LONG1;
1617 header[1] = (unsigned char)nbytes;
1618 size = 2;
1619 }
1620 else {
1621 header[0] = LONG4;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001622 size = (Py_ssize_t) nbytes;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001623 for (i = 1; i < 5; i++) {
1624 header[i] = (unsigned char)(size & 0xff);
1625 size >>= 8;
1626 }
1627 size = 5;
1628 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001629 if (_Pickler_Write(self, header, size) < 0 ||
1630 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001631 goto error;
1632 }
1633 else {
1634 char *string;
1635
Mark Dickinson8dd05142009-01-20 20:43:58 +00001636 /* proto < 2: write the repr and newline. This is quadratic-time (in
1637 the number of digits), in both directions. We add a trailing 'L'
1638 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001639
1640 repr = PyObject_Repr(obj);
1641 if (repr == NULL)
1642 goto error;
1643
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001644 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001645 if (string == NULL)
1646 goto error;
1647
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001648 if (_Pickler_Write(self, &long_op, 1) < 0 ||
1649 _Pickler_Write(self, string, size) < 0 ||
1650 _Pickler_Write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001651 goto error;
1652 }
1653
1654 if (0) {
1655 error:
1656 status = -1;
1657 }
1658 Py_XDECREF(repr);
1659
1660 return status;
1661}
1662
1663static int
1664save_float(PicklerObject *self, PyObject *obj)
1665{
1666 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1667
1668 if (self->bin) {
1669 char pdata[9];
1670 pdata[0] = BINFLOAT;
1671 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1672 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001673 if (_Pickler_Write(self, pdata, 9) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001674 return -1;
Victor Stinner121aab42011-09-29 23:40:53 +02001675 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001676 else {
Eric Smith0923d1d2009-04-16 20:16:10 +00001677 int result = -1;
1678 char *buf = NULL;
1679 char op = FLOAT;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001680
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001681 if (_Pickler_Write(self, &op, 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001682 goto done;
1683
Mark Dickinson3e09f432009-04-17 08:41:23 +00001684 buf = PyOS_double_to_string(x, 'g', 17, 0, NULL);
Eric Smith0923d1d2009-04-16 20:16:10 +00001685 if (!buf) {
1686 PyErr_NoMemory();
1687 goto done;
1688 }
1689
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001690 if (_Pickler_Write(self, buf, strlen(buf)) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001691 goto done;
1692
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001693 if (_Pickler_Write(self, "\n", 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001694 goto done;
1695
1696 result = 0;
1697done:
1698 PyMem_Free(buf);
1699 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001700 }
1701
1702 return 0;
1703}
1704
1705static int
1706save_bytes(PicklerObject *self, PyObject *obj)
1707{
1708 if (self->proto < 3) {
1709 /* Older pickle protocols do not have an opcode for pickling bytes
1710 objects. Therefore, we need to fake the copy protocol (i.e.,
1711 the __reduce__ method) to permit bytes object unpickling. */
1712 PyObject *reduce_value = NULL;
1713 PyObject *bytelist = NULL;
1714 int status;
1715
1716 bytelist = PySequence_List(obj);
1717 if (bytelist == NULL)
1718 return -1;
1719
1720 reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
1721 bytelist);
1722 if (reduce_value == NULL) {
1723 Py_DECREF(bytelist);
1724 return -1;
1725 }
1726
1727 /* save_reduce() will memoize the object automatically. */
1728 status = save_reduce(self, reduce_value, obj);
1729 Py_DECREF(reduce_value);
1730 Py_DECREF(bytelist);
1731 return status;
1732 }
1733 else {
1734 Py_ssize_t size;
1735 char header[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001736 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001737
1738 size = PyBytes_Size(obj);
1739 if (size < 0)
1740 return -1;
1741
1742 if (size < 256) {
1743 header[0] = SHORT_BINBYTES;
1744 header[1] = (unsigned char)size;
1745 len = 2;
1746 }
1747 else if (size <= 0xffffffffL) {
1748 header[0] = BINBYTES;
1749 header[1] = (unsigned char)(size & 0xff);
1750 header[2] = (unsigned char)((size >> 8) & 0xff);
1751 header[3] = (unsigned char)((size >> 16) & 0xff);
1752 header[4] = (unsigned char)((size >> 24) & 0xff);
1753 len = 5;
1754 }
1755 else {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001756 PyErr_SetString(PyExc_OverflowError,
1757 "cannot serialize a bytes object larger than 4GB");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001758 return -1; /* string too large */
1759 }
1760
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001761 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001762 return -1;
1763
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001764 if (_Pickler_Write(self, PyBytes_AS_STRING(obj), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001765 return -1;
1766
1767 if (memo_put(self, obj) < 0)
1768 return -1;
1769
1770 return 0;
1771 }
1772}
1773
1774/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1775 backslash and newline characters to \uXXXX escapes. */
1776static PyObject *
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001777raw_unicode_escape(PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001778{
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001779 static const char *hexdigits = "0123456789abcdef";
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001780 PyObject *repr, *result;
1781 char *p;
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001782 Py_ssize_t i, size, expandsize;
1783 void *data;
1784 unsigned int kind;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001785
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001786 if (PyUnicode_READY(obj))
1787 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001788
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001789 size = PyUnicode_GET_LENGTH(obj);
1790 data = PyUnicode_DATA(obj);
1791 kind = PyUnicode_KIND(obj);
1792 if (kind == PyUnicode_4BYTE_KIND)
1793 expandsize = 10;
1794 else
1795 expandsize = 6;
Victor Stinner121aab42011-09-29 23:40:53 +02001796
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001797 if (size > PY_SSIZE_T_MAX / expandsize)
1798 return PyErr_NoMemory();
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001799 repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001800 if (repr == NULL)
1801 return NULL;
1802 if (size == 0)
1803 goto done;
1804
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001805 p = PyByteArray_AS_STRING(repr);
1806 for (i=0; i < size; i++) {
1807 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001808 /* Map 32-bit characters to '\Uxxxxxxxx' */
1809 if (ch >= 0x10000) {
1810 *p++ = '\\';
1811 *p++ = 'U';
1812 *p++ = hexdigits[(ch >> 28) & 0xf];
1813 *p++ = hexdigits[(ch >> 24) & 0xf];
1814 *p++ = hexdigits[(ch >> 20) & 0xf];
1815 *p++ = hexdigits[(ch >> 16) & 0xf];
1816 *p++ = hexdigits[(ch >> 12) & 0xf];
1817 *p++ = hexdigits[(ch >> 8) & 0xf];
1818 *p++ = hexdigits[(ch >> 4) & 0xf];
1819 *p++ = hexdigits[ch & 15];
1820 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001821 /* Map 16-bit characters to '\uxxxx' */
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001822 else if (ch >= 256 || ch == '\\' || ch == '\n') {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001823 *p++ = '\\';
1824 *p++ = 'u';
1825 *p++ = hexdigits[(ch >> 12) & 0xf];
1826 *p++ = hexdigits[(ch >> 8) & 0xf];
1827 *p++ = hexdigits[(ch >> 4) & 0xf];
1828 *p++ = hexdigits[ch & 15];
1829 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001830 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001831 else
1832 *p++ = (char) ch;
1833 }
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001834 size = p - PyByteArray_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001835
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001836done:
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001837 result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001838 Py_DECREF(repr);
1839 return result;
1840}
1841
1842static int
1843save_unicode(PicklerObject *self, PyObject *obj)
1844{
1845 Py_ssize_t size;
1846 PyObject *encoded = NULL;
1847
1848 if (self->bin) {
1849 char pdata[5];
1850
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001851 encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001852 if (encoded == NULL)
1853 goto error;
1854
1855 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001856 if (size > 0xffffffffL) {
1857 PyErr_SetString(PyExc_OverflowError,
1858 "cannot serialize a string larger than 4GB");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001859 goto error; /* string too large */
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001860 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001861
1862 pdata[0] = BINUNICODE;
1863 pdata[1] = (unsigned char)(size & 0xff);
1864 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1865 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1866 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1867
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001868 if (_Pickler_Write(self, pdata, 5) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001869 goto error;
1870
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001871 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001872 goto error;
1873 }
1874 else {
1875 const char unicode_op = UNICODE;
1876
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001877 encoded = raw_unicode_escape(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001878 if (encoded == NULL)
1879 goto error;
1880
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001881 if (_Pickler_Write(self, &unicode_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001882 goto error;
1883
1884 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001885 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001886 goto error;
1887
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001888 if (_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001889 goto error;
1890 }
1891 if (memo_put(self, obj) < 0)
1892 goto error;
1893
1894 Py_DECREF(encoded);
1895 return 0;
1896
1897 error:
1898 Py_XDECREF(encoded);
1899 return -1;
1900}
1901
1902/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1903static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001904store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001905{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001906 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001907
1908 assert(PyTuple_Size(t) == len);
1909
1910 for (i = 0; i < len; i++) {
1911 PyObject *element = PyTuple_GET_ITEM(t, i);
1912
1913 if (element == NULL)
1914 return -1;
1915 if (save(self, element, 0) < 0)
1916 return -1;
1917 }
1918
1919 return 0;
1920}
1921
1922/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1923 * used across protocols to minimize the space needed to pickle them.
1924 * Tuples are also the only builtin immutable type that can be recursive
1925 * (a tuple can be reached from itself), and that requires some subtle
1926 * magic so that it works in all cases. IOW, this is a long routine.
1927 */
1928static int
1929save_tuple(PicklerObject *self, PyObject *obj)
1930{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001931 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001932
1933 const char mark_op = MARK;
1934 const char tuple_op = TUPLE;
1935 const char pop_op = POP;
1936 const char pop_mark_op = POP_MARK;
1937 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
1938
1939 if ((len = PyTuple_Size(obj)) < 0)
1940 return -1;
1941
1942 if (len == 0) {
1943 char pdata[2];
1944
1945 if (self->proto) {
1946 pdata[0] = EMPTY_TUPLE;
1947 len = 1;
1948 }
1949 else {
1950 pdata[0] = MARK;
1951 pdata[1] = TUPLE;
1952 len = 2;
1953 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001954 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001955 return -1;
1956 return 0;
1957 }
1958
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001959 /* The tuple isn't in the memo now. If it shows up there after
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001960 * saving the tuple elements, the tuple must be recursive, in
1961 * which case we'll pop everything we put on the stack, and fetch
1962 * its value from the memo.
1963 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001964 if (len <= 3 && self->proto >= 2) {
1965 /* Use TUPLE{1,2,3} opcodes. */
1966 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001967 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001968
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001969 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001970 /* pop the len elements */
1971 for (i = 0; i < len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001972 if (_Pickler_Write(self, &pop_op, 1) < 0)
1973 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001974 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001975 if (memo_get(self, obj) < 0)
1976 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001977
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001978 return 0;
1979 }
1980 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001981 if (_Pickler_Write(self, len2opcode + len, 1) < 0)
1982 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001983 }
1984 goto memoize;
1985 }
1986
1987 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
1988 * Generate MARK e1 e2 ... TUPLE
1989 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001990 if (_Pickler_Write(self, &mark_op, 1) < 0)
1991 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001992
1993 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001994 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001995
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001996 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001997 /* pop the stack stuff we pushed */
1998 if (self->bin) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001999 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2000 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002001 }
2002 else {
2003 /* Note that we pop one more than len, to remove
2004 * the MARK too.
2005 */
2006 for (i = 0; i <= len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002007 if (_Pickler_Write(self, &pop_op, 1) < 0)
2008 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002009 }
2010 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002011 if (memo_get(self, obj) < 0)
2012 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002013
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002014 return 0;
2015 }
2016 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002017 if (_Pickler_Write(self, &tuple_op, 1) < 0)
2018 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002019 }
2020
2021 memoize:
2022 if (memo_put(self, obj) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002023 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002024
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002025 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002026}
2027
2028/* iter is an iterator giving items, and we batch up chunks of
2029 * MARK item item ... item APPENDS
2030 * opcode sequences. Calling code should have arranged to first create an
2031 * empty list, or list-like object, for the APPENDS to operate on.
2032 * Returns 0 on success, <0 on error.
2033 */
2034static int
2035batch_list(PicklerObject *self, PyObject *iter)
2036{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002037 PyObject *obj = NULL;
2038 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002039 int i, n;
2040
2041 const char mark_op = MARK;
2042 const char append_op = APPEND;
2043 const char appends_op = APPENDS;
2044
2045 assert(iter != NULL);
2046
2047 /* XXX: I think this function could be made faster by avoiding the
2048 iterator interface and fetching objects directly from list using
2049 PyList_GET_ITEM.
2050 */
2051
2052 if (self->proto == 0) {
2053 /* APPENDS isn't available; do one at a time. */
2054 for (;;) {
2055 obj = PyIter_Next(iter);
2056 if (obj == NULL) {
2057 if (PyErr_Occurred())
2058 return -1;
2059 break;
2060 }
2061 i = save(self, obj, 0);
2062 Py_DECREF(obj);
2063 if (i < 0)
2064 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002065 if (_Pickler_Write(self, &append_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002066 return -1;
2067 }
2068 return 0;
2069 }
2070
2071 /* proto > 0: write in batches of BATCHSIZE. */
2072 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002073 /* Get first item */
2074 firstitem = PyIter_Next(iter);
2075 if (firstitem == NULL) {
2076 if (PyErr_Occurred())
2077 goto error;
2078
2079 /* nothing more to add */
2080 break;
2081 }
2082
2083 /* Try to get a second item */
2084 obj = PyIter_Next(iter);
2085 if (obj == NULL) {
2086 if (PyErr_Occurred())
2087 goto error;
2088
2089 /* Only one item to write */
2090 if (save(self, firstitem, 0) < 0)
2091 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002092 if (_Pickler_Write(self, &append_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002093 goto error;
2094 Py_CLEAR(firstitem);
2095 break;
2096 }
2097
2098 /* More than one item to write */
2099
2100 /* Pump out MARK, items, APPENDS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002101 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002102 goto error;
2103
2104 if (save(self, firstitem, 0) < 0)
2105 goto error;
2106 Py_CLEAR(firstitem);
2107 n = 1;
2108
2109 /* Fetch and save up to BATCHSIZE items */
2110 while (obj) {
2111 if (save(self, obj, 0) < 0)
2112 goto error;
2113 Py_CLEAR(obj);
2114 n += 1;
2115
2116 if (n == BATCHSIZE)
2117 break;
2118
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002119 obj = PyIter_Next(iter);
2120 if (obj == NULL) {
2121 if (PyErr_Occurred())
2122 goto error;
2123 break;
2124 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002125 }
2126
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002127 if (_Pickler_Write(self, &appends_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002128 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002129
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002130 } while (n == BATCHSIZE);
2131 return 0;
2132
2133 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002134 Py_XDECREF(firstitem);
2135 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002136 return -1;
2137}
2138
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002139/* This is a variant of batch_list() above, specialized for lists (with no
2140 * support for list subclasses). Like batch_list(), we batch up chunks of
2141 * MARK item item ... item APPENDS
2142 * opcode sequences. Calling code should have arranged to first create an
2143 * empty list, or list-like object, for the APPENDS to operate on.
2144 * Returns 0 on success, -1 on error.
2145 *
2146 * This version is considerably faster than batch_list(), if less general.
2147 *
2148 * Note that this only works for protocols > 0.
2149 */
2150static int
2151batch_list_exact(PicklerObject *self, PyObject *obj)
2152{
2153 PyObject *item = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002154 Py_ssize_t this_batch, total;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002155
2156 const char append_op = APPEND;
2157 const char appends_op = APPENDS;
2158 const char mark_op = MARK;
2159
2160 assert(obj != NULL);
2161 assert(self->proto > 0);
2162 assert(PyList_CheckExact(obj));
2163
2164 if (PyList_GET_SIZE(obj) == 1) {
2165 item = PyList_GET_ITEM(obj, 0);
2166 if (save(self, item, 0) < 0)
2167 return -1;
2168 if (_Pickler_Write(self, &append_op, 1) < 0)
2169 return -1;
2170 return 0;
2171 }
2172
2173 /* Write in batches of BATCHSIZE. */
2174 total = 0;
2175 do {
2176 this_batch = 0;
2177 if (_Pickler_Write(self, &mark_op, 1) < 0)
2178 return -1;
2179 while (total < PyList_GET_SIZE(obj)) {
2180 item = PyList_GET_ITEM(obj, total);
2181 if (save(self, item, 0) < 0)
2182 return -1;
2183 total++;
2184 if (++this_batch == BATCHSIZE)
2185 break;
2186 }
2187 if (_Pickler_Write(self, &appends_op, 1) < 0)
2188 return -1;
2189
2190 } while (total < PyList_GET_SIZE(obj));
2191
2192 return 0;
2193}
2194
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002195static int
2196save_list(PicklerObject *self, PyObject *obj)
2197{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002198 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002199 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002200 int status = 0;
2201
2202 if (self->fast && !fast_save_enter(self, obj))
2203 goto error;
2204
2205 /* Create an empty list. */
2206 if (self->bin) {
2207 header[0] = EMPTY_LIST;
2208 len = 1;
2209 }
2210 else {
2211 header[0] = MARK;
2212 header[1] = LIST;
2213 len = 2;
2214 }
2215
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002216 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002217 goto error;
2218
2219 /* Get list length, and bow out early if empty. */
2220 if ((len = PyList_Size(obj)) < 0)
2221 goto error;
2222
2223 if (memo_put(self, obj) < 0)
2224 goto error;
2225
2226 if (len != 0) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002227 /* Materialize the list elements. */
2228 if (PyList_CheckExact(obj) && self->proto > 0) {
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002229 if (Py_EnterRecursiveCall(" while pickling an object"))
2230 goto error;
2231 status = batch_list_exact(self, obj);
2232 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002233 } else {
2234 PyObject *iter = PyObject_GetIter(obj);
2235 if (iter == NULL)
2236 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002237
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002238 if (Py_EnterRecursiveCall(" while pickling an object")) {
2239 Py_DECREF(iter);
2240 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002241 }
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002242 status = batch_list(self, iter);
2243 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002244 Py_DECREF(iter);
2245 }
2246 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002247 if (0) {
2248 error:
2249 status = -1;
2250 }
2251
2252 if (self->fast && !fast_save_leave(self, obj))
2253 status = -1;
2254
2255 return status;
2256}
2257
2258/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
2259 * MARK key value ... key value SETITEMS
2260 * opcode sequences. Calling code should have arranged to first create an
2261 * empty dict, or dict-like object, for the SETITEMS to operate on.
2262 * Returns 0 on success, <0 on error.
2263 *
2264 * This is very much like batch_list(). The difference between saving
2265 * elements directly, and picking apart two-tuples, is so long-winded at
2266 * the C level, though, that attempts to combine these routines were too
2267 * ugly to bear.
2268 */
2269static int
2270batch_dict(PicklerObject *self, PyObject *iter)
2271{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002272 PyObject *obj = NULL;
2273 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002274 int i, n;
2275
2276 const char mark_op = MARK;
2277 const char setitem_op = SETITEM;
2278 const char setitems_op = SETITEMS;
2279
2280 assert(iter != NULL);
2281
2282 if (self->proto == 0) {
2283 /* SETITEMS isn't available; do one at a time. */
2284 for (;;) {
2285 obj = PyIter_Next(iter);
2286 if (obj == NULL) {
2287 if (PyErr_Occurred())
2288 return -1;
2289 break;
2290 }
2291 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2292 PyErr_SetString(PyExc_TypeError, "dict items "
2293 "iterator must return 2-tuples");
2294 return -1;
2295 }
2296 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
2297 if (i >= 0)
2298 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
2299 Py_DECREF(obj);
2300 if (i < 0)
2301 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002302 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002303 return -1;
2304 }
2305 return 0;
2306 }
2307
2308 /* proto > 0: write in batches of BATCHSIZE. */
2309 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002310 /* Get first item */
2311 firstitem = PyIter_Next(iter);
2312 if (firstitem == NULL) {
2313 if (PyErr_Occurred())
2314 goto error;
2315
2316 /* nothing more to add */
2317 break;
2318 }
2319 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
2320 PyErr_SetString(PyExc_TypeError, "dict items "
2321 "iterator must return 2-tuples");
2322 goto error;
2323 }
2324
2325 /* Try to get a second item */
2326 obj = PyIter_Next(iter);
2327 if (obj == NULL) {
2328 if (PyErr_Occurred())
2329 goto error;
2330
2331 /* Only one item to write */
2332 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2333 goto error;
2334 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2335 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002336 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002337 goto error;
2338 Py_CLEAR(firstitem);
2339 break;
2340 }
2341
2342 /* More than one item to write */
2343
2344 /* Pump out MARK, items, SETITEMS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002345 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002346 goto error;
2347
2348 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2349 goto error;
2350 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2351 goto error;
2352 Py_CLEAR(firstitem);
2353 n = 1;
2354
2355 /* Fetch and save up to BATCHSIZE items */
2356 while (obj) {
2357 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2358 PyErr_SetString(PyExc_TypeError, "dict items "
2359 "iterator must return 2-tuples");
2360 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002361 }
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002362 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
2363 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
2364 goto error;
2365 Py_CLEAR(obj);
2366 n += 1;
2367
2368 if (n == BATCHSIZE)
2369 break;
2370
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002371 obj = PyIter_Next(iter);
2372 if (obj == NULL) {
2373 if (PyErr_Occurred())
2374 goto error;
2375 break;
2376 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002377 }
2378
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002379 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002380 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002381
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002382 } while (n == BATCHSIZE);
2383 return 0;
2384
2385 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002386 Py_XDECREF(firstitem);
2387 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002388 return -1;
2389}
2390
Collin Winter5c9b02d2009-05-25 05:43:30 +00002391/* This is a variant of batch_dict() above that specializes for dicts, with no
2392 * support for dict subclasses. Like batch_dict(), we batch up chunks of
2393 * MARK key value ... key value SETITEMS
2394 * opcode sequences. Calling code should have arranged to first create an
2395 * empty dict, or dict-like object, for the SETITEMS to operate on.
2396 * Returns 0 on success, -1 on error.
2397 *
2398 * Note that this currently doesn't work for protocol 0.
2399 */
2400static int
2401batch_dict_exact(PicklerObject *self, PyObject *obj)
2402{
2403 PyObject *key = NULL, *value = NULL;
2404 int i;
2405 Py_ssize_t dict_size, ppos = 0;
2406
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00002407 const char mark_op = MARK;
2408 const char setitem_op = SETITEM;
2409 const char setitems_op = SETITEMS;
Collin Winter5c9b02d2009-05-25 05:43:30 +00002410
2411 assert(obj != NULL);
2412 assert(self->proto > 0);
2413
2414 dict_size = PyDict_Size(obj);
2415
2416 /* Special-case len(d) == 1 to save space. */
2417 if (dict_size == 1) {
2418 PyDict_Next(obj, &ppos, &key, &value);
2419 if (save(self, key, 0) < 0)
2420 return -1;
2421 if (save(self, value, 0) < 0)
2422 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002423 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002424 return -1;
2425 return 0;
2426 }
2427
2428 /* Write in batches of BATCHSIZE. */
2429 do {
2430 i = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002431 if (_Pickler_Write(self, &mark_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002432 return -1;
2433 while (PyDict_Next(obj, &ppos, &key, &value)) {
2434 if (save(self, key, 0) < 0)
2435 return -1;
2436 if (save(self, value, 0) < 0)
2437 return -1;
2438 if (++i == BATCHSIZE)
2439 break;
2440 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002441 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002442 return -1;
2443 if (PyDict_Size(obj) != dict_size) {
2444 PyErr_Format(
2445 PyExc_RuntimeError,
2446 "dictionary changed size during iteration");
2447 return -1;
2448 }
2449
2450 } while (i == BATCHSIZE);
2451 return 0;
2452}
2453
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002454static int
2455save_dict(PicklerObject *self, PyObject *obj)
2456{
2457 PyObject *items, *iter;
2458 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002459 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002460 int status = 0;
2461
2462 if (self->fast && !fast_save_enter(self, obj))
2463 goto error;
2464
2465 /* Create an empty dict. */
2466 if (self->bin) {
2467 header[0] = EMPTY_DICT;
2468 len = 1;
2469 }
2470 else {
2471 header[0] = MARK;
2472 header[1] = DICT;
2473 len = 2;
2474 }
2475
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002476 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002477 goto error;
2478
2479 /* Get dict size, and bow out early if empty. */
2480 if ((len = PyDict_Size(obj)) < 0)
2481 goto error;
2482
2483 if (memo_put(self, obj) < 0)
2484 goto error;
2485
2486 if (len != 0) {
2487 /* Save the dict items. */
Collin Winter5c9b02d2009-05-25 05:43:30 +00002488 if (PyDict_CheckExact(obj) && self->proto > 0) {
2489 /* We can take certain shortcuts if we know this is a dict and
2490 not a dict subclass. */
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002491 if (Py_EnterRecursiveCall(" while pickling an object"))
2492 goto error;
2493 status = batch_dict_exact(self, obj);
2494 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002495 } else {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002496 _Py_identifier(items);
2497
2498 items = _PyObject_CallMethodId(obj, &PyId_items, "()");
Collin Winter5c9b02d2009-05-25 05:43:30 +00002499 if (items == NULL)
2500 goto error;
2501 iter = PyObject_GetIter(items);
2502 Py_DECREF(items);
2503 if (iter == NULL)
2504 goto error;
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002505 if (Py_EnterRecursiveCall(" while pickling an object")) {
2506 Py_DECREF(iter);
2507 goto error;
2508 }
Collin Winter5c9b02d2009-05-25 05:43:30 +00002509 status = batch_dict(self, iter);
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002510 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002511 Py_DECREF(iter);
2512 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002513 }
2514
2515 if (0) {
2516 error:
2517 status = -1;
2518 }
2519
2520 if (self->fast && !fast_save_leave(self, obj))
2521 status = -1;
2522
2523 return status;
2524}
2525
2526static int
2527save_global(PicklerObject *self, PyObject *obj, PyObject *name)
2528{
2529 static PyObject *name_str = NULL;
2530 PyObject *global_name = NULL;
2531 PyObject *module_name = NULL;
2532 PyObject *module = NULL;
2533 PyObject *cls;
2534 int status = 0;
2535
2536 const char global_op = GLOBAL;
2537
2538 if (name_str == NULL) {
2539 name_str = PyUnicode_InternFromString("__name__");
2540 if (name_str == NULL)
2541 goto error;
2542 }
2543
2544 if (name) {
2545 global_name = name;
2546 Py_INCREF(global_name);
2547 }
2548 else {
2549 global_name = PyObject_GetAttr(obj, name_str);
2550 if (global_name == NULL)
2551 goto error;
2552 }
2553
2554 module_name = whichmodule(obj, global_name);
2555 if (module_name == NULL)
2556 goto error;
2557
2558 /* XXX: Change to use the import C API directly with level=0 to disallow
2559 relative imports.
2560
2561 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
2562 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
2563 custom import functions (IMHO, this would be a nice security
2564 feature). The import C API would need to be extended to support the
2565 extra parameters of __import__ to fix that. */
2566 module = PyImport_Import(module_name);
2567 if (module == NULL) {
2568 PyErr_Format(PicklingError,
2569 "Can't pickle %R: import of module %R failed",
2570 obj, module_name);
2571 goto error;
2572 }
2573 cls = PyObject_GetAttr(module, global_name);
2574 if (cls == NULL) {
2575 PyErr_Format(PicklingError,
2576 "Can't pickle %R: attribute lookup %S.%S failed",
2577 obj, module_name, global_name);
2578 goto error;
2579 }
2580 if (cls != obj) {
2581 Py_DECREF(cls);
2582 PyErr_Format(PicklingError,
2583 "Can't pickle %R: it's not the same object as %S.%S",
2584 obj, module_name, global_name);
2585 goto error;
2586 }
2587 Py_DECREF(cls);
2588
2589 if (self->proto >= 2) {
2590 /* See whether this is in the extension registry, and if
2591 * so generate an EXT opcode.
2592 */
2593 PyObject *code_obj; /* extension code as Python object */
2594 long code; /* extension code as C value */
2595 char pdata[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002596 Py_ssize_t n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002597
2598 PyTuple_SET_ITEM(two_tuple, 0, module_name);
2599 PyTuple_SET_ITEM(two_tuple, 1, global_name);
2600 code_obj = PyDict_GetItem(extension_registry, two_tuple);
2601 /* The object is not registered in the extension registry.
2602 This is the most likely code path. */
2603 if (code_obj == NULL)
2604 goto gen_global;
2605
2606 /* XXX: pickle.py doesn't check neither the type, nor the range
2607 of the value returned by the extension_registry. It should for
2608 consistency. */
2609
2610 /* Verify code_obj has the right type and value. */
2611 if (!PyLong_Check(code_obj)) {
2612 PyErr_Format(PicklingError,
2613 "Can't pickle %R: extension code %R isn't an integer",
2614 obj, code_obj);
2615 goto error;
2616 }
2617 code = PyLong_AS_LONG(code_obj);
2618 if (code <= 0 || code > 0x7fffffffL) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002619 if (!PyErr_Occurred())
2620 PyErr_Format(PicklingError,
2621 "Can't pickle %R: extension code %ld is out of range",
2622 obj, code);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002623 goto error;
2624 }
2625
2626 /* Generate an EXT opcode. */
2627 if (code <= 0xff) {
2628 pdata[0] = EXT1;
2629 pdata[1] = (unsigned char)code;
2630 n = 2;
2631 }
2632 else if (code <= 0xffff) {
2633 pdata[0] = EXT2;
2634 pdata[1] = (unsigned char)(code & 0xff);
2635 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2636 n = 3;
2637 }
2638 else {
2639 pdata[0] = EXT4;
2640 pdata[1] = (unsigned char)(code & 0xff);
2641 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2642 pdata[3] = (unsigned char)((code >> 16) & 0xff);
2643 pdata[4] = (unsigned char)((code >> 24) & 0xff);
2644 n = 5;
2645 }
2646
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002647 if (_Pickler_Write(self, pdata, n) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002648 goto error;
2649 }
2650 else {
2651 /* Generate a normal global opcode if we are using a pickle
2652 protocol <= 2, or if the object is not registered in the
2653 extension registry. */
2654 PyObject *encoded;
2655 PyObject *(*unicode_encoder)(PyObject *);
2656
2657 gen_global:
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002658 if (_Pickler_Write(self, &global_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002659 goto error;
2660
2661 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
2662 the module name and the global name using UTF-8. We do so only when
2663 we are using the pickle protocol newer than version 3. This is to
2664 ensure compatibility with older Unpickler running on Python 2.x. */
2665 if (self->proto >= 3) {
2666 unicode_encoder = PyUnicode_AsUTF8String;
2667 }
2668 else {
2669 unicode_encoder = PyUnicode_AsASCIIString;
2670 }
2671
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002672 /* For protocol < 3 and if the user didn't request against doing so,
2673 we convert module names to the old 2.x module names. */
2674 if (self->fix_imports) {
2675 PyObject *key;
2676 PyObject *item;
2677
2678 key = PyTuple_Pack(2, module_name, global_name);
2679 if (key == NULL)
2680 goto error;
2681 item = PyDict_GetItemWithError(name_mapping_3to2, key);
2682 Py_DECREF(key);
2683 if (item) {
2684 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
2685 PyErr_Format(PyExc_RuntimeError,
2686 "_compat_pickle.REVERSE_NAME_MAPPING values "
2687 "should be 2-tuples, not %.200s",
2688 Py_TYPE(item)->tp_name);
2689 goto error;
2690 }
2691 Py_CLEAR(module_name);
2692 Py_CLEAR(global_name);
2693 module_name = PyTuple_GET_ITEM(item, 0);
2694 global_name = PyTuple_GET_ITEM(item, 1);
2695 if (!PyUnicode_Check(module_name) ||
2696 !PyUnicode_Check(global_name)) {
2697 PyErr_Format(PyExc_RuntimeError,
2698 "_compat_pickle.REVERSE_NAME_MAPPING values "
2699 "should be pairs of str, not (%.200s, %.200s)",
2700 Py_TYPE(module_name)->tp_name,
2701 Py_TYPE(global_name)->tp_name);
2702 goto error;
2703 }
2704 Py_INCREF(module_name);
2705 Py_INCREF(global_name);
2706 }
2707 else if (PyErr_Occurred()) {
2708 goto error;
2709 }
2710
2711 item = PyDict_GetItemWithError(import_mapping_3to2, module_name);
2712 if (item) {
2713 if (!PyUnicode_Check(item)) {
2714 PyErr_Format(PyExc_RuntimeError,
2715 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
2716 "should be strings, not %.200s",
2717 Py_TYPE(item)->tp_name);
2718 goto error;
2719 }
2720 Py_CLEAR(module_name);
2721 module_name = item;
2722 Py_INCREF(module_name);
2723 }
2724 else if (PyErr_Occurred()) {
2725 goto error;
2726 }
2727 }
2728
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002729 /* Save the name of the module. */
2730 encoded = unicode_encoder(module_name);
2731 if (encoded == NULL) {
2732 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2733 PyErr_Format(PicklingError,
2734 "can't pickle module identifier '%S' using "
2735 "pickle protocol %i", module_name, self->proto);
2736 goto error;
2737 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002738 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002739 PyBytes_GET_SIZE(encoded)) < 0) {
2740 Py_DECREF(encoded);
2741 goto error;
2742 }
2743 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002744 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002745 goto error;
2746
2747 /* Save the name of the module. */
2748 encoded = unicode_encoder(global_name);
2749 if (encoded == NULL) {
2750 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2751 PyErr_Format(PicklingError,
2752 "can't pickle global identifier '%S' using "
2753 "pickle protocol %i", global_name, self->proto);
2754 goto error;
2755 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002756 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002757 PyBytes_GET_SIZE(encoded)) < 0) {
2758 Py_DECREF(encoded);
2759 goto error;
2760 }
2761 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002762 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002763 goto error;
2764
2765 /* Memoize the object. */
2766 if (memo_put(self, obj) < 0)
2767 goto error;
2768 }
2769
2770 if (0) {
2771 error:
2772 status = -1;
2773 }
2774 Py_XDECREF(module_name);
2775 Py_XDECREF(global_name);
2776 Py_XDECREF(module);
2777
2778 return status;
2779}
2780
2781static int
2782save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
2783{
2784 PyObject *pid = NULL;
2785 int status = 0;
2786
2787 const char persid_op = PERSID;
2788 const char binpersid_op = BINPERSID;
2789
2790 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002791 pid = _Pickler_FastCall(self, func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002792 if (pid == NULL)
2793 return -1;
2794
2795 if (pid != Py_None) {
2796 if (self->bin) {
2797 if (save(self, pid, 1) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002798 _Pickler_Write(self, &binpersid_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002799 goto error;
2800 }
2801 else {
2802 PyObject *pid_str = NULL;
2803 char *pid_ascii_bytes;
2804 Py_ssize_t size;
2805
2806 pid_str = PyObject_Str(pid);
2807 if (pid_str == NULL)
2808 goto error;
2809
2810 /* XXX: Should it check whether the persistent id only contains
2811 ASCII characters? And what if the pid contains embedded
2812 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00002813 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002814 Py_DECREF(pid_str);
2815 if (pid_ascii_bytes == NULL)
2816 goto error;
2817
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002818 if (_Pickler_Write(self, &persid_op, 1) < 0 ||
2819 _Pickler_Write(self, pid_ascii_bytes, size) < 0 ||
2820 _Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002821 goto error;
2822 }
2823 status = 1;
2824 }
2825
2826 if (0) {
2827 error:
2828 status = -1;
2829 }
2830 Py_XDECREF(pid);
2831
2832 return status;
2833}
2834
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002835static PyObject *
2836get_class(PyObject *obj)
2837{
2838 PyObject *cls;
2839 static PyObject *str_class;
2840
2841 if (str_class == NULL) {
2842 str_class = PyUnicode_InternFromString("__class__");
2843 if (str_class == NULL)
2844 return NULL;
2845 }
2846 cls = PyObject_GetAttr(obj, str_class);
2847 if (cls == NULL) {
2848 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2849 PyErr_Clear();
2850 cls = (PyObject *) Py_TYPE(obj);
2851 Py_INCREF(cls);
2852 }
2853 }
2854 return cls;
2855}
2856
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002857/* We're saving obj, and args is the 2-thru-5 tuple returned by the
2858 * appropriate __reduce__ method for obj.
2859 */
2860static int
2861save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
2862{
2863 PyObject *callable;
2864 PyObject *argtup;
2865 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002866 PyObject *listitems = Py_None;
2867 PyObject *dictitems = Py_None;
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002868 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002869
2870 int use_newobj = self->proto >= 2;
2871
2872 const char reduce_op = REDUCE;
2873 const char build_op = BUILD;
2874 const char newobj_op = NEWOBJ;
2875
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002876 size = PyTuple_Size(args);
2877 if (size < 2 || size > 5) {
2878 PyErr_SetString(PicklingError, "tuple returned by "
2879 "__reduce__ must contain 2 through 5 elements");
2880 return -1;
2881 }
2882
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002883 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
2884 &callable, &argtup, &state, &listitems, &dictitems))
2885 return -1;
2886
2887 if (!PyCallable_Check(callable)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002888 PyErr_SetString(PicklingError, "first item of the tuple "
2889 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002890 return -1;
2891 }
2892 if (!PyTuple_Check(argtup)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002893 PyErr_SetString(PicklingError, "second item of the tuple "
2894 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002895 return -1;
2896 }
2897
2898 if (state == Py_None)
2899 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002900
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002901 if (listitems == Py_None)
2902 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002903 else if (!PyIter_Check(listitems)) {
2904 PyErr_Format(PicklingError, "Fourth element of tuple"
2905 "returned by __reduce__ must be an iterator, not %s",
2906 Py_TYPE(listitems)->tp_name);
2907 return -1;
2908 }
2909
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002910 if (dictitems == Py_None)
2911 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002912 else if (!PyIter_Check(dictitems)) {
2913 PyErr_Format(PicklingError, "Fifth element of tuple"
2914 "returned by __reduce__ must be an iterator, not %s",
2915 Py_TYPE(dictitems)->tp_name);
2916 return -1;
2917 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002918
2919 /* Protocol 2 special case: if callable's name is __newobj__, use
2920 NEWOBJ. */
2921 if (use_newobj) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002922 static PyObject *newobj_str = NULL, *name_str = NULL;
2923 PyObject *name;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002924
2925 if (newobj_str == NULL) {
2926 newobj_str = PyUnicode_InternFromString("__newobj__");
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002927 name_str = PyUnicode_InternFromString("__name__");
2928 if (newobj_str == NULL || name_str == NULL)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002929 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002930 }
2931
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002932 name = PyObject_GetAttr(callable, name_str);
2933 if (name == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002934 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2935 PyErr_Clear();
2936 else
2937 return -1;
2938 use_newobj = 0;
2939 }
2940 else {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002941 use_newobj = PyUnicode_Check(name) &&
2942 PyUnicode_Compare(name, newobj_str) == 0;
2943 Py_DECREF(name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002944 }
2945 }
2946 if (use_newobj) {
2947 PyObject *cls;
2948 PyObject *newargtup;
2949 PyObject *obj_class;
2950 int p;
2951
2952 /* Sanity checks. */
2953 if (Py_SIZE(argtup) < 1) {
2954 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
2955 return -1;
2956 }
2957
2958 cls = PyTuple_GET_ITEM(argtup, 0);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002959 if (!PyType_Check(cls)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002960 PyErr_SetString(PicklingError, "args[0] from "
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002961 "__newobj__ args is not a type");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002962 return -1;
2963 }
2964
2965 if (obj != NULL) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002966 obj_class = get_class(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002967 p = obj_class != cls; /* true iff a problem */
2968 Py_DECREF(obj_class);
2969 if (p) {
2970 PyErr_SetString(PicklingError, "args[0] from "
2971 "__newobj__ args has the wrong class");
2972 return -1;
2973 }
2974 }
2975 /* XXX: These calls save() are prone to infinite recursion. Imagine
2976 what happen if the value returned by the __reduce__() method of
2977 some extension type contains another object of the same type. Ouch!
2978
2979 Here is a quick example, that I ran into, to illustrate what I
2980 mean:
2981
2982 >>> import pickle, copyreg
2983 >>> copyreg.dispatch_table.pop(complex)
2984 >>> pickle.dumps(1+2j)
2985 Traceback (most recent call last):
2986 ...
2987 RuntimeError: maximum recursion depth exceeded
2988
2989 Removing the complex class from copyreg.dispatch_table made the
2990 __reduce_ex__() method emit another complex object:
2991
2992 >>> (1+1j).__reduce_ex__(2)
2993 (<function __newobj__ at 0xb7b71c3c>,
2994 (<class 'complex'>, (1+1j)), None, None, None)
2995
2996 Thus when save() was called on newargstup (the 2nd item) recursion
2997 ensued. Of course, the bug was in the complex class which had a
2998 broken __getnewargs__() that emitted another complex object. But,
2999 the point, here, is it is quite easy to end up with a broken reduce
3000 function. */
3001
3002 /* Save the class and its __new__ arguments. */
3003 if (save(self, cls, 0) < 0)
3004 return -1;
3005
3006 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
3007 if (newargtup == NULL)
3008 return -1;
3009
3010 p = save(self, newargtup, 0);
3011 Py_DECREF(newargtup);
3012 if (p < 0)
3013 return -1;
3014
3015 /* Add NEWOBJ opcode. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003016 if (_Pickler_Write(self, &newobj_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003017 return -1;
3018 }
3019 else { /* Not using NEWOBJ. */
3020 if (save(self, callable, 0) < 0 ||
3021 save(self, argtup, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003022 _Pickler_Write(self, &reduce_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003023 return -1;
3024 }
3025
3026 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
3027 the caller do not want to memoize the object. Not particularly useful,
3028 but that is to mimic the behavior save_reduce() in pickle.py when
3029 obj is None. */
3030 if (obj && memo_put(self, obj) < 0)
3031 return -1;
3032
3033 if (listitems && batch_list(self, listitems) < 0)
3034 return -1;
3035
3036 if (dictitems && batch_dict(self, dictitems) < 0)
3037 return -1;
3038
3039 if (state) {
Victor Stinner121aab42011-09-29 23:40:53 +02003040 if (save(self, state, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003041 _Pickler_Write(self, &build_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003042 return -1;
3043 }
3044
3045 return 0;
3046}
3047
3048static int
3049save(PicklerObject *self, PyObject *obj, int pers_save)
3050{
3051 PyTypeObject *type;
3052 PyObject *reduce_func = NULL;
3053 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003054 int status = 0;
3055
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003056 if (Py_EnterRecursiveCall(" while pickling an object"))
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003057 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003058
3059 /* The extra pers_save argument is necessary to avoid calling save_pers()
3060 on its returned object. */
3061 if (!pers_save && self->pers_func) {
3062 /* save_pers() returns:
3063 -1 to signal an error;
3064 0 if it did nothing successfully;
3065 1 if a persistent id was saved.
3066 */
3067 if ((status = save_pers(self, obj, self->pers_func)) != 0)
3068 goto done;
3069 }
3070
3071 type = Py_TYPE(obj);
3072
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003073 /* The old cPickle had an optimization that used switch-case statement
3074 dispatching on the first letter of the type name. This has was removed
3075 since benchmarks shown that this optimization was actually slowing
3076 things down. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003077
3078 /* Atom types; these aren't memoized, so don't check the memo. */
3079
3080 if (obj == Py_None) {
3081 status = save_none(self, obj);
3082 goto done;
3083 }
3084 else if (obj == Py_False || obj == Py_True) {
3085 status = save_bool(self, obj);
3086 goto done;
3087 }
3088 else if (type == &PyLong_Type) {
3089 status = save_long(self, obj);
3090 goto done;
3091 }
3092 else if (type == &PyFloat_Type) {
3093 status = save_float(self, obj);
3094 goto done;
3095 }
3096
3097 /* Check the memo to see if it has the object. If so, generate
3098 a GET (or BINGET) opcode, instead of pickling the object
3099 once again. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003100 if (PyMemoTable_Get(self->memo, obj)) {
3101 if (memo_get(self, obj) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003102 goto error;
3103 goto done;
3104 }
3105
3106 if (type == &PyBytes_Type) {
3107 status = save_bytes(self, obj);
3108 goto done;
3109 }
3110 else if (type == &PyUnicode_Type) {
3111 status = save_unicode(self, obj);
3112 goto done;
3113 }
3114 else if (type == &PyDict_Type) {
3115 status = save_dict(self, obj);
3116 goto done;
3117 }
3118 else if (type == &PyList_Type) {
3119 status = save_list(self, obj);
3120 goto done;
3121 }
3122 else if (type == &PyTuple_Type) {
3123 status = save_tuple(self, obj);
3124 goto done;
3125 }
3126 else if (type == &PyType_Type) {
3127 status = save_global(self, obj, NULL);
3128 goto done;
3129 }
3130 else if (type == &PyFunction_Type) {
3131 status = save_global(self, obj, NULL);
3132 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
3133 /* fall back to reduce */
3134 PyErr_Clear();
3135 }
3136 else {
3137 goto done;
3138 }
3139 }
3140 else if (type == &PyCFunction_Type) {
3141 status = save_global(self, obj, NULL);
3142 goto done;
3143 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003144
3145 /* XXX: This part needs some unit tests. */
3146
3147 /* Get a reduction callable, and call it. This may come from
3148 * copyreg.dispatch_table, the object's __reduce_ex__ method,
3149 * or the object's __reduce__ method.
3150 */
3151 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
3152 if (reduce_func != NULL) {
3153 /* Here, the reference count of the reduce_func object returned by
3154 PyDict_GetItem needs to be increased to be consistent with the one
3155 returned by PyObject_GetAttr. This is allow us to blindly DECREF
3156 reduce_func at the end of the save() routine.
3157 */
3158 Py_INCREF(reduce_func);
3159 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003160 reduce_value = _Pickler_FastCall(self, reduce_func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003161 }
Antoine Pitrouffd41d92011-10-04 09:23:04 +02003162 else if (PyType_IsSubtype(type, &PyType_Type)) {
3163 status = save_global(self, obj, NULL);
3164 goto done;
3165 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003166 else {
3167 static PyObject *reduce_str = NULL;
3168 static PyObject *reduce_ex_str = NULL;
3169
3170 /* Cache the name of the reduce methods. */
3171 if (reduce_str == NULL) {
3172 reduce_str = PyUnicode_InternFromString("__reduce__");
3173 if (reduce_str == NULL)
3174 goto error;
3175 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
3176 if (reduce_ex_str == NULL)
3177 goto error;
3178 }
3179
3180 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
3181 automatically defined as __reduce__. While this is convenient, this
3182 make it impossible to know which method was actually called. Of
3183 course, this is not a big deal. But still, it would be nice to let
3184 the user know which method was called when something go
3185 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
3186 don't actually have to check for a __reduce__ method. */
3187
3188 /* Check for a __reduce_ex__ method. */
3189 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
3190 if (reduce_func != NULL) {
3191 PyObject *proto;
3192 proto = PyLong_FromLong(self->proto);
3193 if (proto != NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003194 reduce_value = _Pickler_FastCall(self, reduce_func, proto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003195 }
3196 }
3197 else {
3198 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3199 PyErr_Clear();
3200 else
3201 goto error;
3202 /* Check for a __reduce__ method. */
3203 reduce_func = PyObject_GetAttr(obj, reduce_str);
3204 if (reduce_func != NULL) {
3205 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
3206 }
3207 else {
3208 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
3209 type->tp_name, obj);
3210 goto error;
3211 }
3212 }
3213 }
3214
3215 if (reduce_value == NULL)
3216 goto error;
3217
3218 if (PyUnicode_Check(reduce_value)) {
3219 status = save_global(self, obj, reduce_value);
3220 goto done;
3221 }
3222
3223 if (!PyTuple_Check(reduce_value)) {
3224 PyErr_SetString(PicklingError,
3225 "__reduce__ must return a string or tuple");
3226 goto error;
3227 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003228
3229 status = save_reduce(self, reduce_value, obj);
3230
3231 if (0) {
3232 error:
3233 status = -1;
3234 }
3235 done:
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003236 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003237 Py_XDECREF(reduce_func);
3238 Py_XDECREF(reduce_value);
3239
3240 return status;
3241}
3242
3243static int
3244dump(PicklerObject *self, PyObject *obj)
3245{
3246 const char stop_op = STOP;
3247
3248 if (self->proto >= 2) {
3249 char header[2];
3250
3251 header[0] = PROTO;
3252 assert(self->proto >= 0 && self->proto < 256);
3253 header[1] = (unsigned char)self->proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003254 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003255 return -1;
3256 }
3257
3258 if (save(self, obj, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003259 _Pickler_Write(self, &stop_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003260 return -1;
3261
3262 return 0;
3263}
3264
3265PyDoc_STRVAR(Pickler_clear_memo_doc,
3266"clear_memo() -> None. Clears the pickler's \"memo\"."
3267"\n"
3268"The memo is the data structure that remembers which objects the\n"
3269"pickler has already seen, so that shared or recursive objects are\n"
3270"pickled by reference and not by value. This method is useful when\n"
3271"re-using picklers.");
3272
3273static PyObject *
3274Pickler_clear_memo(PicklerObject *self)
3275{
3276 if (self->memo)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003277 PyMemoTable_Clear(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003278
3279 Py_RETURN_NONE;
3280}
3281
3282PyDoc_STRVAR(Pickler_dump_doc,
3283"dump(obj) -> None. Write a pickled representation of obj to the open file.");
3284
3285static PyObject *
3286Pickler_dump(PicklerObject *self, PyObject *args)
3287{
3288 PyObject *obj;
3289
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00003290 /* Check whether the Pickler was initialized correctly (issue3664).
3291 Developers often forget to call __init__() in their subclasses, which
3292 would trigger a segfault without this check. */
3293 if (self->write == NULL) {
Victor Stinner121aab42011-09-29 23:40:53 +02003294 PyErr_Format(PicklingError,
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00003295 "Pickler.__init__() was not called by %s.__init__()",
3296 Py_TYPE(self)->tp_name);
3297 return NULL;
3298 }
3299
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003300 if (!PyArg_ParseTuple(args, "O:dump", &obj))
3301 return NULL;
3302
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003303 if (_Pickler_ClearBuffer(self) < 0)
3304 return NULL;
3305
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003306 if (dump(self, obj) < 0)
3307 return NULL;
3308
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003309 if (_Pickler_FlushToFile(self) < 0)
3310 return NULL;
3311
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003312 Py_RETURN_NONE;
3313}
3314
3315static struct PyMethodDef Pickler_methods[] = {
3316 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
3317 Pickler_dump_doc},
3318 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
3319 Pickler_clear_memo_doc},
3320 {NULL, NULL} /* sentinel */
3321};
3322
3323static void
3324Pickler_dealloc(PicklerObject *self)
3325{
3326 PyObject_GC_UnTrack(self);
3327
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003328 Py_XDECREF(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003329 Py_XDECREF(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003330 Py_XDECREF(self->pers_func);
3331 Py_XDECREF(self->arg);
3332 Py_XDECREF(self->fast_memo);
3333
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003334 PyMemoTable_Del(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003335
3336 Py_TYPE(self)->tp_free((PyObject *)self);
3337}
3338
3339static int
3340Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
3341{
3342 Py_VISIT(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003343 Py_VISIT(self->pers_func);
3344 Py_VISIT(self->arg);
3345 Py_VISIT(self->fast_memo);
3346 return 0;
3347}
3348
3349static int
3350Pickler_clear(PicklerObject *self)
3351{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003352 Py_CLEAR(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003353 Py_CLEAR(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003354 Py_CLEAR(self->pers_func);
3355 Py_CLEAR(self->arg);
3356 Py_CLEAR(self->fast_memo);
3357
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003358 if (self->memo != NULL) {
3359 PyMemoTable *memo = self->memo;
3360 self->memo = NULL;
3361 PyMemoTable_Del(memo);
3362 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003363 return 0;
3364}
3365
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003366
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003367PyDoc_STRVAR(Pickler_doc,
3368"Pickler(file, protocol=None)"
3369"\n"
3370"This takes a binary file for writing a pickle data stream.\n"
3371"\n"
3372"The optional protocol argument tells the pickler to use the\n"
3373"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
3374"protocol is 3; a backward-incompatible protocol designed for\n"
3375"Python 3.0.\n"
3376"\n"
3377"Specifying a negative protocol version selects the highest\n"
3378"protocol version supported. The higher the protocol used, the\n"
3379"more recent the version of Python needed to read the pickle\n"
3380"produced.\n"
3381"\n"
3382"The file argument must have a write() method that accepts a single\n"
3383"bytes argument. It can thus be a file object opened for binary\n"
3384"writing, a io.BytesIO instance, or any other custom object that\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003385"meets this interface.\n"
3386"\n"
3387"If fix_imports is True and protocol is less than 3, pickle will try to\n"
3388"map the new Python 3.x names to the old module names used in Python\n"
3389"2.x, so that the pickle data stream is readable with Python 2.x.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003390
3391static int
3392Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
3393{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003394 static char *kwlist[] = {"file", "protocol", "fix_imports", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003395 PyObject *file;
3396 PyObject *proto_obj = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003397 PyObject *fix_imports = Py_True;
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02003398 _Py_identifier(persistent_id);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003399
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003400 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:Pickler",
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003401 kwlist, &file, &proto_obj, &fix_imports))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003402 return -1;
3403
3404 /* In case of multiple __init__() calls, clear previous content. */
3405 if (self->write != NULL)
3406 (void)Pickler_clear(self);
3407
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003408 if (_Pickler_SetProtocol(self, proto_obj, fix_imports) < 0)
3409 return -1;
3410
3411 if (_Pickler_SetOutputStream(self, file) < 0)
3412 return -1;
3413
3414 /* memo and output_buffer may have already been created in _Pickler_New */
3415 if (self->memo == NULL) {
3416 self->memo = PyMemoTable_New();
3417 if (self->memo == NULL)
3418 return -1;
3419 }
3420 self->output_len = 0;
3421 if (self->output_buffer == NULL) {
3422 self->max_output_len = WRITE_BUF_SIZE;
3423 self->output_buffer = PyBytes_FromStringAndSize(NULL,
3424 self->max_output_len);
3425 if (self->output_buffer == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003426 return -1;
3427 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003428
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003429 self->arg = NULL;
3430 self->fast = 0;
3431 self->fast_nesting = 0;
3432 self->fast_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003433 self->pers_func = NULL;
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02003434 if (_PyObject_HasAttrId((PyObject *)self, &PyId_persistent_id)) {
3435 self->pers_func = _PyObject_GetAttrId((PyObject *)self,
3436 &PyId_persistent_id);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003437 if (self->pers_func == NULL)
3438 return -1;
3439 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003440 return 0;
3441}
3442
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003443/* Define a proxy object for the Pickler's internal memo object. This is to
3444 * avoid breaking code like:
3445 * pickler.memo.clear()
3446 * and
3447 * pickler.memo = saved_memo
3448 * Is this a good idea? Not really, but we don't want to break code that uses
3449 * it. Note that we don't implement the entire mapping API here. This is
3450 * intentional, as these should be treated as black-box implementation details.
3451 */
3452
3453typedef struct {
3454 PyObject_HEAD
3455 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
3456} PicklerMemoProxyObject;
3457
3458PyDoc_STRVAR(pmp_clear_doc,
3459"memo.clear() -> None. Remove all items from memo.");
3460
3461static PyObject *
3462pmp_clear(PicklerMemoProxyObject *self)
3463{
3464 if (self->pickler->memo)
3465 PyMemoTable_Clear(self->pickler->memo);
3466 Py_RETURN_NONE;
3467}
3468
3469PyDoc_STRVAR(pmp_copy_doc,
3470"memo.copy() -> new_memo. Copy the memo to a new object.");
3471
3472static PyObject *
3473pmp_copy(PicklerMemoProxyObject *self)
3474{
3475 Py_ssize_t i;
3476 PyMemoTable *memo;
3477 PyObject *new_memo = PyDict_New();
3478 if (new_memo == NULL)
3479 return NULL;
3480
3481 memo = self->pickler->memo;
3482 for (i = 0; i < memo->mt_allocated; ++i) {
3483 PyMemoEntry entry = memo->mt_table[i];
3484 if (entry.me_key != NULL) {
3485 int status;
3486 PyObject *key, *value;
3487
3488 key = PyLong_FromVoidPtr(entry.me_key);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003489 value = Py_BuildValue("nO", entry.me_value, entry.me_key);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003490
3491 if (key == NULL || value == NULL) {
3492 Py_XDECREF(key);
3493 Py_XDECREF(value);
3494 goto error;
3495 }
3496 status = PyDict_SetItem(new_memo, key, value);
3497 Py_DECREF(key);
3498 Py_DECREF(value);
3499 if (status < 0)
3500 goto error;
3501 }
3502 }
3503 return new_memo;
3504
3505 error:
3506 Py_XDECREF(new_memo);
3507 return NULL;
3508}
3509
3510PyDoc_STRVAR(pmp_reduce_doc,
3511"memo.__reduce__(). Pickling support.");
3512
3513static PyObject *
3514pmp_reduce(PicklerMemoProxyObject *self, PyObject *args)
3515{
3516 PyObject *reduce_value, *dict_args;
3517 PyObject *contents = pmp_copy(self);
3518 if (contents == NULL)
3519 return NULL;
3520
3521 reduce_value = PyTuple_New(2);
3522 if (reduce_value == NULL) {
3523 Py_DECREF(contents);
3524 return NULL;
3525 }
3526 dict_args = PyTuple_New(1);
3527 if (dict_args == NULL) {
3528 Py_DECREF(contents);
3529 Py_DECREF(reduce_value);
3530 return NULL;
3531 }
3532 PyTuple_SET_ITEM(dict_args, 0, contents);
3533 Py_INCREF((PyObject *)&PyDict_Type);
3534 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
3535 PyTuple_SET_ITEM(reduce_value, 1, dict_args);
3536 return reduce_value;
3537}
3538
3539static PyMethodDef picklerproxy_methods[] = {
3540 {"clear", (PyCFunction)pmp_clear, METH_NOARGS, pmp_clear_doc},
3541 {"copy", (PyCFunction)pmp_copy, METH_NOARGS, pmp_copy_doc},
3542 {"__reduce__", (PyCFunction)pmp_reduce, METH_VARARGS, pmp_reduce_doc},
3543 {NULL, NULL} /* sentinel */
3544};
3545
3546static void
3547PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
3548{
3549 PyObject_GC_UnTrack(self);
3550 Py_XDECREF(self->pickler);
3551 PyObject_GC_Del((PyObject *)self);
3552}
3553
3554static int
3555PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
3556 visitproc visit, void *arg)
3557{
3558 Py_VISIT(self->pickler);
3559 return 0;
3560}
3561
3562static int
3563PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
3564{
3565 Py_CLEAR(self->pickler);
3566 return 0;
3567}
3568
3569static PyTypeObject PicklerMemoProxyType = {
3570 PyVarObject_HEAD_INIT(NULL, 0)
3571 "_pickle.PicklerMemoProxy", /*tp_name*/
3572 sizeof(PicklerMemoProxyObject), /*tp_basicsize*/
3573 0,
3574 (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */
3575 0, /* tp_print */
3576 0, /* tp_getattr */
3577 0, /* tp_setattr */
3578 0, /* tp_compare */
3579 0, /* tp_repr */
3580 0, /* tp_as_number */
3581 0, /* tp_as_sequence */
3582 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00003583 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003584 0, /* tp_call */
3585 0, /* tp_str */
3586 PyObject_GenericGetAttr, /* tp_getattro */
3587 PyObject_GenericSetAttr, /* tp_setattro */
3588 0, /* tp_as_buffer */
3589 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3590 0, /* tp_doc */
3591 (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */
3592 (inquiry)PicklerMemoProxy_clear, /* tp_clear */
3593 0, /* tp_richcompare */
3594 0, /* tp_weaklistoffset */
3595 0, /* tp_iter */
3596 0, /* tp_iternext */
3597 picklerproxy_methods, /* tp_methods */
3598};
3599
3600static PyObject *
3601PicklerMemoProxy_New(PicklerObject *pickler)
3602{
3603 PicklerMemoProxyObject *self;
3604
3605 self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
3606 if (self == NULL)
3607 return NULL;
3608 Py_INCREF(pickler);
3609 self->pickler = pickler;
3610 PyObject_GC_Track(self);
3611 return (PyObject *)self;
3612}
3613
3614/*****************************************************************************/
3615
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003616static PyObject *
3617Pickler_get_memo(PicklerObject *self)
3618{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003619 return PicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003620}
3621
3622static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003623Pickler_set_memo(PicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003624{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003625 PyMemoTable *new_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003626
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003627 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003628 PyErr_SetString(PyExc_TypeError,
3629 "attribute deletion is not supported");
3630 return -1;
3631 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003632
3633 if (Py_TYPE(obj) == &PicklerMemoProxyType) {
3634 PicklerObject *pickler =
3635 ((PicklerMemoProxyObject *)obj)->pickler;
3636
3637 new_memo = PyMemoTable_Copy(pickler->memo);
3638 if (new_memo == NULL)
3639 return -1;
3640 }
3641 else if (PyDict_Check(obj)) {
3642 Py_ssize_t i = 0;
3643 PyObject *key, *value;
3644
3645 new_memo = PyMemoTable_New();
3646 if (new_memo == NULL)
3647 return -1;
3648
3649 while (PyDict_Next(obj, &i, &key, &value)) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003650 Py_ssize_t memo_id;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003651 PyObject *memo_obj;
3652
3653 if (!PyTuple_Check(value) || Py_SIZE(value) != 2) {
3654 PyErr_SetString(PyExc_TypeError,
3655 "'memo' values must be 2-item tuples");
3656 goto error;
3657 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003658 memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003659 if (memo_id == -1 && PyErr_Occurred())
3660 goto error;
3661 memo_obj = PyTuple_GET_ITEM(value, 1);
3662 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
3663 goto error;
3664 }
3665 }
3666 else {
3667 PyErr_Format(PyExc_TypeError,
3668 "'memo' attribute must be an PicklerMemoProxy object"
3669 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003670 return -1;
3671 }
3672
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003673 PyMemoTable_Del(self->memo);
3674 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003675
3676 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003677
3678 error:
3679 if (new_memo)
3680 PyMemoTable_Del(new_memo);
3681 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003682}
3683
3684static PyObject *
3685Pickler_get_persid(PicklerObject *self)
3686{
3687 if (self->pers_func == NULL)
3688 PyErr_SetString(PyExc_AttributeError, "persistent_id");
3689 else
3690 Py_INCREF(self->pers_func);
3691 return self->pers_func;
3692}
3693
3694static int
3695Pickler_set_persid(PicklerObject *self, PyObject *value)
3696{
3697 PyObject *tmp;
3698
3699 if (value == NULL) {
3700 PyErr_SetString(PyExc_TypeError,
3701 "attribute deletion is not supported");
3702 return -1;
3703 }
3704 if (!PyCallable_Check(value)) {
3705 PyErr_SetString(PyExc_TypeError,
3706 "persistent_id must be a callable taking one argument");
3707 return -1;
3708 }
3709
3710 tmp = self->pers_func;
3711 Py_INCREF(value);
3712 self->pers_func = value;
3713 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
3714
3715 return 0;
3716}
3717
3718static PyMemberDef Pickler_members[] = {
3719 {"bin", T_INT, offsetof(PicklerObject, bin)},
3720 {"fast", T_INT, offsetof(PicklerObject, fast)},
3721 {NULL}
3722};
3723
3724static PyGetSetDef Pickler_getsets[] = {
3725 {"memo", (getter)Pickler_get_memo,
3726 (setter)Pickler_set_memo},
3727 {"persistent_id", (getter)Pickler_get_persid,
3728 (setter)Pickler_set_persid},
3729 {NULL}
3730};
3731
3732static PyTypeObject Pickler_Type = {
3733 PyVarObject_HEAD_INIT(NULL, 0)
3734 "_pickle.Pickler" , /*tp_name*/
3735 sizeof(PicklerObject), /*tp_basicsize*/
3736 0, /*tp_itemsize*/
3737 (destructor)Pickler_dealloc, /*tp_dealloc*/
3738 0, /*tp_print*/
3739 0, /*tp_getattr*/
3740 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00003741 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003742 0, /*tp_repr*/
3743 0, /*tp_as_number*/
3744 0, /*tp_as_sequence*/
3745 0, /*tp_as_mapping*/
3746 0, /*tp_hash*/
3747 0, /*tp_call*/
3748 0, /*tp_str*/
3749 0, /*tp_getattro*/
3750 0, /*tp_setattro*/
3751 0, /*tp_as_buffer*/
3752 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3753 Pickler_doc, /*tp_doc*/
3754 (traverseproc)Pickler_traverse, /*tp_traverse*/
3755 (inquiry)Pickler_clear, /*tp_clear*/
3756 0, /*tp_richcompare*/
3757 0, /*tp_weaklistoffset*/
3758 0, /*tp_iter*/
3759 0, /*tp_iternext*/
3760 Pickler_methods, /*tp_methods*/
3761 Pickler_members, /*tp_members*/
3762 Pickler_getsets, /*tp_getset*/
3763 0, /*tp_base*/
3764 0, /*tp_dict*/
3765 0, /*tp_descr_get*/
3766 0, /*tp_descr_set*/
3767 0, /*tp_dictoffset*/
3768 (initproc)Pickler_init, /*tp_init*/
3769 PyType_GenericAlloc, /*tp_alloc*/
3770 PyType_GenericNew, /*tp_new*/
3771 PyObject_GC_Del, /*tp_free*/
3772 0, /*tp_is_gc*/
3773};
3774
Victor Stinner121aab42011-09-29 23:40:53 +02003775/* Temporary helper for calling self.find_class().
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003776
3777 XXX: It would be nice to able to avoid Python function call overhead, by
3778 using directly the C version of find_class(), when find_class() is not
3779 overridden by a subclass. Although, this could become rather hackish. A
3780 simpler optimization would be to call the C function when self is not a
3781 subclass instance. */
3782static PyObject *
3783find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
3784{
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02003785 _Py_identifier(find_class);
3786
3787 return _PyObject_CallMethodId((PyObject *)self, &PyId_find_class, "OO",
3788 module_name, global_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003789}
3790
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003791static Py_ssize_t
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003792marker(UnpicklerObject *self)
3793{
3794 if (self->num_marks < 1) {
3795 PyErr_SetString(UnpicklingError, "could not find MARK");
3796 return -1;
3797 }
3798
3799 return self->marks[--self->num_marks];
3800}
3801
3802static int
3803load_none(UnpicklerObject *self)
3804{
3805 PDATA_APPEND(self->stack, Py_None, -1);
3806 return 0;
3807}
3808
3809static int
3810bad_readline(void)
3811{
3812 PyErr_SetString(UnpicklingError, "pickle data was truncated");
3813 return -1;
3814}
3815
3816static int
3817load_int(UnpicklerObject *self)
3818{
3819 PyObject *value;
3820 char *endptr, *s;
3821 Py_ssize_t len;
3822 long x;
3823
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003824 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003825 return -1;
3826 if (len < 2)
3827 return bad_readline();
3828
3829 errno = 0;
Victor Stinner121aab42011-09-29 23:40:53 +02003830 /* XXX: Should the base argument of strtol() be explicitly set to 10?
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003831 XXX(avassalotti): Should this uses PyOS_strtol()? */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003832 x = strtol(s, &endptr, 0);
3833
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003834 if (errno || (*endptr != '\n' && *endptr != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003835 /* Hm, maybe we've got something long. Let's try reading
3836 * it as a Python long object. */
3837 errno = 0;
3838 /* XXX: Same thing about the base here. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003839 value = PyLong_FromString(s, NULL, 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003840 if (value == NULL) {
3841 PyErr_SetString(PyExc_ValueError,
3842 "could not convert string to int");
3843 return -1;
3844 }
3845 }
3846 else {
3847 if (len == 3 && (x == 0 || x == 1)) {
3848 if ((value = PyBool_FromLong(x)) == NULL)
3849 return -1;
3850 }
3851 else {
3852 if ((value = PyLong_FromLong(x)) == NULL)
3853 return -1;
3854 }
3855 }
3856
3857 PDATA_PUSH(self->stack, value, -1);
3858 return 0;
3859}
3860
3861static int
3862load_bool(UnpicklerObject *self, PyObject *boolean)
3863{
3864 assert(boolean == Py_True || boolean == Py_False);
3865 PDATA_APPEND(self->stack, boolean, -1);
3866 return 0;
3867}
3868
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003869/* s contains x bytes of an unsigned little-endian integer. Return its value
3870 * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
3871 */
3872static Py_ssize_t
3873calc_binsize(char *bytes, int size)
3874{
3875 unsigned char *s = (unsigned char *)bytes;
3876 size_t x = 0;
3877
3878 assert(size == 4);
3879
3880 x = (size_t) s[0];
3881 x |= (size_t) s[1] << 8;
3882 x |= (size_t) s[2] << 16;
3883 x |= (size_t) s[3] << 24;
3884
3885 if (x > PY_SSIZE_T_MAX)
3886 return -1;
3887 else
3888 return (Py_ssize_t) x;
3889}
3890
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003891/* s contains x bytes of a little-endian integer. Return its value as a
3892 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
3893 * int, but when x is 4 it's a signed one. This is an historical source
3894 * of x-platform bugs.
3895 */
3896static long
3897calc_binint(char *bytes, int size)
3898{
3899 unsigned char *s = (unsigned char *)bytes;
3900 int i = size;
3901 long x = 0;
3902
3903 for (i = 0; i < size; i++) {
3904 x |= (long)s[i] << (i * 8);
3905 }
3906
3907 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
3908 * is signed, so on a box with longs bigger than 4 bytes we need
3909 * to extend a BININT's sign bit to the full width.
3910 */
3911 if (SIZEOF_LONG > 4 && size == 4) {
3912 x |= -(x & (1L << 31));
3913 }
3914
3915 return x;
3916}
3917
3918static int
3919load_binintx(UnpicklerObject *self, char *s, int size)
3920{
3921 PyObject *value;
3922 long x;
3923
3924 x = calc_binint(s, size);
3925
3926 if ((value = PyLong_FromLong(x)) == NULL)
3927 return -1;
3928
3929 PDATA_PUSH(self->stack, value, -1);
3930 return 0;
3931}
3932
3933static int
3934load_binint(UnpicklerObject *self)
3935{
3936 char *s;
3937
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003938 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003939 return -1;
3940
3941 return load_binintx(self, s, 4);
3942}
3943
3944static int
3945load_binint1(UnpicklerObject *self)
3946{
3947 char *s;
3948
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003949 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003950 return -1;
3951
3952 return load_binintx(self, s, 1);
3953}
3954
3955static int
3956load_binint2(UnpicklerObject *self)
3957{
3958 char *s;
3959
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003960 if (_Unpickler_Read(self, &s, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003961 return -1;
3962
3963 return load_binintx(self, s, 2);
3964}
3965
3966static int
3967load_long(UnpicklerObject *self)
3968{
3969 PyObject *value;
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00003970 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003971 Py_ssize_t len;
3972
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003973 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003974 return -1;
3975 if (len < 2)
3976 return bad_readline();
3977
Mark Dickinson8dd05142009-01-20 20:43:58 +00003978 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
3979 the 'L' before calling PyLong_FromString. In order to maintain
3980 compatibility with Python 3.0.0, we don't actually *require*
3981 the 'L' to be present. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003982 if (s[len-2] == 'L')
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00003983 s[len-2] = '\0';
Alexandre Vassalottie4bccb72009-01-24 01:47:57 +00003984 /* XXX: Should the base argument explicitly set to 10? */
3985 value = PyLong_FromString(s, NULL, 0);
Mark Dickinson8dd05142009-01-20 20:43:58 +00003986 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003987 return -1;
3988
3989 PDATA_PUSH(self->stack, value, -1);
3990 return 0;
3991}
3992
3993/* 'size' bytes contain the # of bytes of little-endian 256's-complement
3994 * data following.
3995 */
3996static int
3997load_counted_long(UnpicklerObject *self, int size)
3998{
3999 PyObject *value;
4000 char *nbytes;
4001 char *pdata;
4002
4003 assert(size == 1 || size == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004004 if (_Unpickler_Read(self, &nbytes, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004005 return -1;
4006
4007 size = calc_binint(nbytes, size);
4008 if (size < 0) {
4009 /* Corrupt or hostile pickle -- we never write one like this */
4010 PyErr_SetString(UnpicklingError,
4011 "LONG pickle has negative byte count");
4012 return -1;
4013 }
4014
4015 if (size == 0)
4016 value = PyLong_FromLong(0L);
4017 else {
4018 /* Read the raw little-endian bytes and convert. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004019 if (_Unpickler_Read(self, &pdata, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004020 return -1;
4021 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
4022 1 /* little endian */ , 1 /* signed */ );
4023 }
4024 if (value == NULL)
4025 return -1;
4026 PDATA_PUSH(self->stack, value, -1);
4027 return 0;
4028}
4029
4030static int
4031load_float(UnpicklerObject *self)
4032{
4033 PyObject *value;
4034 char *endptr, *s;
4035 Py_ssize_t len;
4036 double d;
4037
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004038 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004039 return -1;
4040 if (len < 2)
4041 return bad_readline();
4042
4043 errno = 0;
Mark Dickinson725bfd82009-05-03 20:33:40 +00004044 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
4045 if (d == -1.0 && PyErr_Occurred())
4046 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004047 if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004048 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
4049 return -1;
4050 }
Mark Dickinson725bfd82009-05-03 20:33:40 +00004051 value = PyFloat_FromDouble(d);
4052 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004053 return -1;
4054
4055 PDATA_PUSH(self->stack, value, -1);
4056 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004057}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004058
4059static int
4060load_binfloat(UnpicklerObject *self)
4061{
4062 PyObject *value;
4063 double x;
4064 char *s;
4065
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004066 if (_Unpickler_Read(self, &s, 8) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004067 return -1;
4068
4069 x = _PyFloat_Unpack8((unsigned char *)s, 0);
4070 if (x == -1.0 && PyErr_Occurred())
4071 return -1;
4072
4073 if ((value = PyFloat_FromDouble(x)) == NULL)
4074 return -1;
4075
4076 PDATA_PUSH(self->stack, value, -1);
4077 return 0;
4078}
4079
4080static int
4081load_string(UnpicklerObject *self)
4082{
4083 PyObject *bytes;
4084 PyObject *str = NULL;
4085 Py_ssize_t len;
4086 char *s, *p;
4087
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004088 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004089 return -1;
4090 if (len < 3)
4091 return bad_readline();
4092 if ((s = strdup(s)) == NULL) {
4093 PyErr_NoMemory();
4094 return -1;
4095 }
4096
4097 /* Strip outermost quotes */
4098 while (s[len - 1] <= ' ')
4099 len--;
4100 if (s[0] == '"' && s[len - 1] == '"') {
4101 s[len - 1] = '\0';
4102 p = s + 1;
4103 len -= 2;
4104 }
4105 else if (s[0] == '\'' && s[len - 1] == '\'') {
4106 s[len - 1] = '\0';
4107 p = s + 1;
4108 len -= 2;
4109 }
4110 else {
4111 free(s);
4112 PyErr_SetString(PyExc_ValueError, "insecure string pickle");
4113 return -1;
4114 }
4115
4116 /* Use the PyBytes API to decode the string, since that is what is used
4117 to encode, and then coerce the result to Unicode. */
4118 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
4119 free(s);
4120 if (bytes == NULL)
4121 return -1;
4122 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
4123 Py_DECREF(bytes);
4124 if (str == NULL)
4125 return -1;
4126
4127 PDATA_PUSH(self->stack, str, -1);
4128 return 0;
4129}
4130
4131static int
4132load_binbytes(UnpicklerObject *self)
4133{
4134 PyObject *bytes;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004135 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004136 char *s;
4137
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004138 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004139 return -1;
4140
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004141 x = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004142 if (x < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004143 PyErr_Format(PyExc_OverflowError,
4144 "BINBYTES exceeds system's maximum size of %zd bytes",
4145 PY_SSIZE_T_MAX
4146 );
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004147 return -1;
4148 }
4149
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004150 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004151 return -1;
4152 bytes = PyBytes_FromStringAndSize(s, x);
4153 if (bytes == NULL)
4154 return -1;
4155
4156 PDATA_PUSH(self->stack, bytes, -1);
4157 return 0;
4158}
4159
4160static int
4161load_short_binbytes(UnpicklerObject *self)
4162{
4163 PyObject *bytes;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004164 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004165 char *s;
4166
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004167 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004168 return -1;
4169
4170 x = (unsigned char)s[0];
4171
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004172 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004173 return -1;
4174
4175 bytes = PyBytes_FromStringAndSize(s, x);
4176 if (bytes == NULL)
4177 return -1;
4178
4179 PDATA_PUSH(self->stack, bytes, -1);
4180 return 0;
4181}
4182
4183static int
4184load_binstring(UnpicklerObject *self)
4185{
4186 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004187 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004188 char *s;
4189
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004190 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004191 return -1;
4192
4193 x = calc_binint(s, 4);
4194 if (x < 0) {
Victor Stinner121aab42011-09-29 23:40:53 +02004195 PyErr_SetString(UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004196 "BINSTRING pickle has negative byte count");
4197 return -1;
4198 }
4199
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004200 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004201 return -1;
4202
4203 /* Convert Python 2.x strings to unicode. */
4204 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4205 if (str == NULL)
4206 return -1;
4207
4208 PDATA_PUSH(self->stack, str, -1);
4209 return 0;
4210}
4211
4212static int
4213load_short_binstring(UnpicklerObject *self)
4214{
4215 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004216 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004217 char *s;
4218
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004219 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004220 return -1;
4221
4222 x = (unsigned char)s[0];
4223
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004224 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004225 return -1;
4226
4227 /* Convert Python 2.x strings to unicode. */
4228 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4229 if (str == NULL)
4230 return -1;
4231
4232 PDATA_PUSH(self->stack, str, -1);
4233 return 0;
4234}
4235
4236static int
4237load_unicode(UnpicklerObject *self)
4238{
4239 PyObject *str;
4240 Py_ssize_t len;
4241 char *s;
4242
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004243 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004244 return -1;
4245 if (len < 1)
4246 return bad_readline();
4247
4248 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
4249 if (str == NULL)
4250 return -1;
4251
4252 PDATA_PUSH(self->stack, str, -1);
4253 return 0;
4254}
4255
4256static int
4257load_binunicode(UnpicklerObject *self)
4258{
4259 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004260 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004261 char *s;
4262
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004263 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004264 return -1;
4265
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004266 size = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004267 if (size < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004268 PyErr_Format(PyExc_OverflowError,
4269 "BINUNICODE exceeds system's maximum size of %zd bytes",
4270 PY_SSIZE_T_MAX
4271 );
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004272 return -1;
4273 }
4274
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004275
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004276 if (_Unpickler_Read(self, &s, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004277 return -1;
4278
Victor Stinner485fb562010-04-13 11:07:24 +00004279 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004280 if (str == NULL)
4281 return -1;
4282
4283 PDATA_PUSH(self->stack, str, -1);
4284 return 0;
4285}
4286
4287static int
4288load_tuple(UnpicklerObject *self)
4289{
4290 PyObject *tuple;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004291 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004292
4293 if ((i = marker(self)) < 0)
4294 return -1;
4295
4296 tuple = Pdata_poptuple(self->stack, i);
4297 if (tuple == NULL)
4298 return -1;
4299 PDATA_PUSH(self->stack, tuple, -1);
4300 return 0;
4301}
4302
4303static int
4304load_counted_tuple(UnpicklerObject *self, int len)
4305{
4306 PyObject *tuple;
4307
4308 tuple = PyTuple_New(len);
4309 if (tuple == NULL)
4310 return -1;
4311
4312 while (--len >= 0) {
4313 PyObject *item;
4314
4315 PDATA_POP(self->stack, item);
4316 if (item == NULL)
4317 return -1;
4318 PyTuple_SET_ITEM(tuple, len, item);
4319 }
4320 PDATA_PUSH(self->stack, tuple, -1);
4321 return 0;
4322}
4323
4324static int
4325load_empty_list(UnpicklerObject *self)
4326{
4327 PyObject *list;
4328
4329 if ((list = PyList_New(0)) == NULL)
4330 return -1;
4331 PDATA_PUSH(self->stack, list, -1);
4332 return 0;
4333}
4334
4335static int
4336load_empty_dict(UnpicklerObject *self)
4337{
4338 PyObject *dict;
4339
4340 if ((dict = PyDict_New()) == NULL)
4341 return -1;
4342 PDATA_PUSH(self->stack, dict, -1);
4343 return 0;
4344}
4345
4346static int
4347load_list(UnpicklerObject *self)
4348{
4349 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004350 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004351
4352 if ((i = marker(self)) < 0)
4353 return -1;
4354
4355 list = Pdata_poplist(self->stack, i);
4356 if (list == NULL)
4357 return -1;
4358 PDATA_PUSH(self->stack, list, -1);
4359 return 0;
4360}
4361
4362static int
4363load_dict(UnpicklerObject *self)
4364{
4365 PyObject *dict, *key, *value;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004366 Py_ssize_t i, j, k;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004367
4368 if ((i = marker(self)) < 0)
4369 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004370 j = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004371
4372 if ((dict = PyDict_New()) == NULL)
4373 return -1;
4374
4375 for (k = i + 1; k < j; k += 2) {
4376 key = self->stack->data[k - 1];
4377 value = self->stack->data[k];
4378 if (PyDict_SetItem(dict, key, value) < 0) {
4379 Py_DECREF(dict);
4380 return -1;
4381 }
4382 }
4383 Pdata_clear(self->stack, i);
4384 PDATA_PUSH(self->stack, dict, -1);
4385 return 0;
4386}
4387
4388static PyObject *
4389instantiate(PyObject *cls, PyObject *args)
4390{
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004391 PyObject *result = NULL;
4392 /* Caller must assure args are a tuple. Normally, args come from
4393 Pdata_poptuple which packs objects from the top of the stack
4394 into a newly created tuple. */
4395 assert(PyTuple_Check(args));
4396 if (Py_SIZE(args) > 0 || !PyType_Check(cls) ||
4397 PyObject_HasAttrString(cls, "__getinitargs__")) {
4398 result = PyObject_CallObject(cls, args);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004399 }
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004400 else {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02004401 _Py_identifier(__new__);
4402
4403 result = _PyObject_CallMethodId(cls, &PyId___new__, "O", cls);
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004404 }
4405 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004406}
4407
4408static int
4409load_obj(UnpicklerObject *self)
4410{
4411 PyObject *cls, *args, *obj = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004412 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004413
4414 if ((i = marker(self)) < 0)
4415 return -1;
4416
4417 args = Pdata_poptuple(self->stack, i + 1);
4418 if (args == NULL)
4419 return -1;
4420
4421 PDATA_POP(self->stack, cls);
4422 if (cls) {
4423 obj = instantiate(cls, args);
4424 Py_DECREF(cls);
4425 }
4426 Py_DECREF(args);
4427 if (obj == NULL)
4428 return -1;
4429
4430 PDATA_PUSH(self->stack, obj, -1);
4431 return 0;
4432}
4433
4434static int
4435load_inst(UnpicklerObject *self)
4436{
4437 PyObject *cls = NULL;
4438 PyObject *args = NULL;
4439 PyObject *obj = NULL;
4440 PyObject *module_name;
4441 PyObject *class_name;
4442 Py_ssize_t len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004443 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004444 char *s;
4445
4446 if ((i = marker(self)) < 0)
4447 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004448 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004449 return -1;
4450 if (len < 2)
4451 return bad_readline();
4452
4453 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
4454 identifiers are permitted in Python 3.0, since the INST opcode is only
4455 supported by older protocols on Python 2.x. */
4456 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
4457 if (module_name == NULL)
4458 return -1;
4459
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004460 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004461 if (len < 2)
4462 return bad_readline();
4463 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004464 if (class_name != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004465 cls = find_class(self, module_name, class_name);
4466 Py_DECREF(class_name);
4467 }
4468 }
4469 Py_DECREF(module_name);
4470
4471 if (cls == NULL)
4472 return -1;
4473
4474 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
4475 obj = instantiate(cls, args);
4476 Py_DECREF(args);
4477 }
4478 Py_DECREF(cls);
4479
4480 if (obj == NULL)
4481 return -1;
4482
4483 PDATA_PUSH(self->stack, obj, -1);
4484 return 0;
4485}
4486
4487static int
4488load_newobj(UnpicklerObject *self)
4489{
4490 PyObject *args = NULL;
4491 PyObject *clsraw = NULL;
4492 PyTypeObject *cls; /* clsraw cast to its true type */
4493 PyObject *obj;
4494
4495 /* Stack is ... cls argtuple, and we want to call
4496 * cls.__new__(cls, *argtuple).
4497 */
4498 PDATA_POP(self->stack, args);
4499 if (args == NULL)
4500 goto error;
4501 if (!PyTuple_Check(args)) {
4502 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
4503 goto error;
4504 }
4505
4506 PDATA_POP(self->stack, clsraw);
4507 cls = (PyTypeObject *)clsraw;
4508 if (cls == NULL)
4509 goto error;
4510 if (!PyType_Check(cls)) {
4511 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4512 "isn't a type object");
4513 goto error;
4514 }
4515 if (cls->tp_new == NULL) {
4516 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4517 "has NULL tp_new");
4518 goto error;
4519 }
4520
4521 /* Call __new__. */
4522 obj = cls->tp_new(cls, args, NULL);
4523 if (obj == NULL)
4524 goto error;
4525
4526 Py_DECREF(args);
4527 Py_DECREF(clsraw);
4528 PDATA_PUSH(self->stack, obj, -1);
4529 return 0;
4530
4531 error:
4532 Py_XDECREF(args);
4533 Py_XDECREF(clsraw);
4534 return -1;
4535}
4536
4537static int
4538load_global(UnpicklerObject *self)
4539{
4540 PyObject *global = NULL;
4541 PyObject *module_name;
4542 PyObject *global_name;
4543 Py_ssize_t len;
4544 char *s;
4545
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004546 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004547 return -1;
4548 if (len < 2)
4549 return bad_readline();
4550 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4551 if (!module_name)
4552 return -1;
4553
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004554 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004555 if (len < 2) {
4556 Py_DECREF(module_name);
4557 return bad_readline();
4558 }
4559 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4560 if (global_name) {
4561 global = find_class(self, module_name, global_name);
4562 Py_DECREF(global_name);
4563 }
4564 }
4565 Py_DECREF(module_name);
4566
4567 if (global == NULL)
4568 return -1;
4569 PDATA_PUSH(self->stack, global, -1);
4570 return 0;
4571}
4572
4573static int
4574load_persid(UnpicklerObject *self)
4575{
4576 PyObject *pid;
4577 Py_ssize_t len;
4578 char *s;
4579
4580 if (self->pers_func) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004581 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004582 return -1;
4583 if (len < 2)
4584 return bad_readline();
4585
4586 pid = PyBytes_FromStringAndSize(s, len - 1);
4587 if (pid == NULL)
4588 return -1;
4589
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004590 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004591 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004592 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004593 if (pid == NULL)
4594 return -1;
4595
4596 PDATA_PUSH(self->stack, pid, -1);
4597 return 0;
4598 }
4599 else {
4600 PyErr_SetString(UnpicklingError,
4601 "A load persistent id instruction was encountered,\n"
4602 "but no persistent_load function was specified.");
4603 return -1;
4604 }
4605}
4606
4607static int
4608load_binpersid(UnpicklerObject *self)
4609{
4610 PyObject *pid;
4611
4612 if (self->pers_func) {
4613 PDATA_POP(self->stack, pid);
4614 if (pid == NULL)
4615 return -1;
4616
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004617 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004618 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004619 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004620 if (pid == NULL)
4621 return -1;
4622
4623 PDATA_PUSH(self->stack, pid, -1);
4624 return 0;
4625 }
4626 else {
4627 PyErr_SetString(UnpicklingError,
4628 "A load persistent id instruction was encountered,\n"
4629 "but no persistent_load function was specified.");
4630 return -1;
4631 }
4632}
4633
4634static int
4635load_pop(UnpicklerObject *self)
4636{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004637 Py_ssize_t len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004638
4639 /* Note that we split the (pickle.py) stack into two stacks,
4640 * an object stack and a mark stack. We have to be clever and
4641 * pop the right one. We do this by looking at the top of the
Collin Winter8ca69de2009-05-26 16:53:41 +00004642 * mark stack first, and only signalling a stack underflow if
4643 * the object stack is empty and the mark stack doesn't match
4644 * our expectations.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004645 */
Collin Winter8ca69de2009-05-26 16:53:41 +00004646 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004647 self->num_marks--;
Antoine Pitrou01a15ea2010-01-07 17:57:31 +00004648 } else if (len > 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004649 len--;
4650 Py_DECREF(self->stack->data[len]);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004651 Py_SIZE(self->stack) = len;
Collin Winter8ca69de2009-05-26 16:53:41 +00004652 } else {
4653 return stack_underflow();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004654 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004655 return 0;
4656}
4657
4658static int
4659load_pop_mark(UnpicklerObject *self)
4660{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004661 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004662
4663 if ((i = marker(self)) < 0)
4664 return -1;
4665
4666 Pdata_clear(self->stack, i);
4667
4668 return 0;
4669}
4670
4671static int
4672load_dup(UnpicklerObject *self)
4673{
4674 PyObject *last;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004675 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004676
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004677 if ((len = Py_SIZE(self->stack)) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004678 return stack_underflow();
4679 last = self->stack->data[len - 1];
4680 PDATA_APPEND(self->stack, last, -1);
4681 return 0;
4682}
4683
4684static int
4685load_get(UnpicklerObject *self)
4686{
4687 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004688 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004689 Py_ssize_t len;
4690 char *s;
4691
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004692 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004693 return -1;
4694 if (len < 2)
4695 return bad_readline();
4696
4697 key = PyLong_FromString(s, NULL, 10);
4698 if (key == NULL)
4699 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004700 idx = PyLong_AsSsize_t(key);
4701 if (idx == -1 && PyErr_Occurred()) {
4702 Py_DECREF(key);
4703 return -1;
4704 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004705
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004706 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004707 if (value == NULL) {
4708 if (!PyErr_Occurred())
4709 PyErr_SetObject(PyExc_KeyError, key);
4710 Py_DECREF(key);
4711 return -1;
4712 }
4713 Py_DECREF(key);
4714
4715 PDATA_APPEND(self->stack, value, -1);
4716 return 0;
4717}
4718
4719static int
4720load_binget(UnpicklerObject *self)
4721{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004722 PyObject *value;
4723 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004724 char *s;
4725
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004726 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004727 return -1;
4728
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004729 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004730
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004731 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004732 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004733 PyObject *key = PyLong_FromSsize_t(idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004734 if (!PyErr_Occurred())
4735 PyErr_SetObject(PyExc_KeyError, key);
4736 Py_DECREF(key);
4737 return -1;
4738 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004739
4740 PDATA_APPEND(self->stack, value, -1);
4741 return 0;
4742}
4743
4744static int
4745load_long_binget(UnpicklerObject *self)
4746{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004747 PyObject *value;
4748 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004749 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004750
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004751 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004752 return -1;
4753
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004754 idx = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004755
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004756 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004757 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004758 PyObject *key = PyLong_FromSsize_t(idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004759 if (!PyErr_Occurred())
4760 PyErr_SetObject(PyExc_KeyError, key);
4761 Py_DECREF(key);
4762 return -1;
4763 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004764
4765 PDATA_APPEND(self->stack, value, -1);
4766 return 0;
4767}
4768
4769/* Push an object from the extension registry (EXT[124]). nbytes is
4770 * the number of bytes following the opcode, holding the index (code) value.
4771 */
4772static int
4773load_extension(UnpicklerObject *self, int nbytes)
4774{
4775 char *codebytes; /* the nbytes bytes after the opcode */
4776 long code; /* calc_binint returns long */
4777 PyObject *py_code; /* code as a Python int */
4778 PyObject *obj; /* the object to push */
4779 PyObject *pair; /* (module_name, class_name) */
4780 PyObject *module_name, *class_name;
4781
4782 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004783 if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004784 return -1;
4785 code = calc_binint(codebytes, nbytes);
4786 if (code <= 0) { /* note that 0 is forbidden */
4787 /* Corrupt or hostile pickle. */
4788 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
4789 return -1;
4790 }
4791
4792 /* Look for the code in the cache. */
4793 py_code = PyLong_FromLong(code);
4794 if (py_code == NULL)
4795 return -1;
4796 obj = PyDict_GetItem(extension_cache, py_code);
4797 if (obj != NULL) {
4798 /* Bingo. */
4799 Py_DECREF(py_code);
4800 PDATA_APPEND(self->stack, obj, -1);
4801 return 0;
4802 }
4803
4804 /* Look up the (module_name, class_name) pair. */
4805 pair = PyDict_GetItem(inverted_registry, py_code);
4806 if (pair == NULL) {
4807 Py_DECREF(py_code);
4808 PyErr_Format(PyExc_ValueError, "unregistered extension "
4809 "code %ld", code);
4810 return -1;
4811 }
4812 /* Since the extension registry is manipulable via Python code,
4813 * confirm that pair is really a 2-tuple of strings.
4814 */
4815 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
4816 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
4817 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
4818 Py_DECREF(py_code);
4819 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
4820 "isn't a 2-tuple of strings", code);
4821 return -1;
4822 }
4823 /* Load the object. */
4824 obj = find_class(self, module_name, class_name);
4825 if (obj == NULL) {
4826 Py_DECREF(py_code);
4827 return -1;
4828 }
4829 /* Cache code -> obj. */
4830 code = PyDict_SetItem(extension_cache, py_code, obj);
4831 Py_DECREF(py_code);
4832 if (code < 0) {
4833 Py_DECREF(obj);
4834 return -1;
4835 }
4836 PDATA_PUSH(self->stack, obj, -1);
4837 return 0;
4838}
4839
4840static int
4841load_put(UnpicklerObject *self)
4842{
4843 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004844 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004845 Py_ssize_t len;
4846 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004847
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004848 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004849 return -1;
4850 if (len < 2)
4851 return bad_readline();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004852 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004853 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004854 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004855
4856 key = PyLong_FromString(s, NULL, 10);
4857 if (key == NULL)
4858 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004859 idx = PyLong_AsSsize_t(key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004860 Py_DECREF(key);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004861 if (idx < 0) {
4862 if (!PyErr_Occurred())
4863 PyErr_SetString(PyExc_ValueError,
4864 "negative PUT argument");
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004865 return -1;
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004866 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004867
4868 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004869}
4870
4871static int
4872load_binput(UnpicklerObject *self)
4873{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004874 PyObject *value;
4875 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004876 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004877
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004878 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004879 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004880
4881 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004882 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004883 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004884
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004885 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004886
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004887 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004888}
4889
4890static int
4891load_long_binput(UnpicklerObject *self)
4892{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004893 PyObject *value;
4894 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004895 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004896
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004897 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004898 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004899
4900 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004901 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004902 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004903
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004904 idx = calc_binsize(s, 4);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004905 if (idx < 0) {
4906 PyErr_SetString(PyExc_ValueError,
4907 "negative LONG_BINPUT argument");
4908 return -1;
4909 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004910
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004911 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004912}
4913
4914static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004915do_append(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004916{
4917 PyObject *value;
4918 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004919 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004920
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004921 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004922 if (x > len || x <= 0)
4923 return stack_underflow();
4924 if (len == x) /* nothing to do */
4925 return 0;
4926
4927 list = self->stack->data[x - 1];
4928
4929 if (PyList_Check(list)) {
4930 PyObject *slice;
4931 Py_ssize_t list_len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004932 int ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004933
4934 slice = Pdata_poplist(self->stack, x);
4935 if (!slice)
4936 return -1;
4937 list_len = PyList_GET_SIZE(list);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004938 ret = PyList_SetSlice(list, list_len, list_len, slice);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004939 Py_DECREF(slice);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004940 return ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004941 }
4942 else {
4943 PyObject *append_func;
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02004944 _Py_identifier(append);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004945
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02004946 append_func = _PyObject_GetAttrId(list, &PyId_append);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004947 if (append_func == NULL)
4948 return -1;
4949 for (i = x; i < len; i++) {
4950 PyObject *result;
4951
4952 value = self->stack->data[i];
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004953 result = _Unpickler_FastCall(self, append_func, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004954 if (result == NULL) {
4955 Pdata_clear(self->stack, i + 1);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004956 Py_SIZE(self->stack) = x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004957 return -1;
4958 }
4959 Py_DECREF(result);
4960 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004961 Py_SIZE(self->stack) = x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004962 }
4963
4964 return 0;
4965}
4966
4967static int
4968load_append(UnpicklerObject *self)
4969{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004970 return do_append(self, Py_SIZE(self->stack) - 1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004971}
4972
4973static int
4974load_appends(UnpicklerObject *self)
4975{
4976 return do_append(self, marker(self));
4977}
4978
4979static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004980do_setitems(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004981{
4982 PyObject *value, *key;
4983 PyObject *dict;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004984 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004985 int status = 0;
4986
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004987 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004988 if (x > len || x <= 0)
4989 return stack_underflow();
4990 if (len == x) /* nothing to do */
4991 return 0;
Victor Stinner121aab42011-09-29 23:40:53 +02004992 if ((len - x) % 2 != 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004993 /* Currupt or hostile pickle -- we never write one like this. */
4994 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
4995 return -1;
4996 }
4997
4998 /* Here, dict does not actually need to be a PyDict; it could be anything
4999 that supports the __setitem__ attribute. */
5000 dict = self->stack->data[x - 1];
5001
5002 for (i = x + 1; i < len; i += 2) {
5003 key = self->stack->data[i - 1];
5004 value = self->stack->data[i];
5005 if (PyObject_SetItem(dict, key, value) < 0) {
5006 status = -1;
5007 break;
5008 }
5009 }
5010
5011 Pdata_clear(self->stack, x);
5012 return status;
5013}
5014
5015static int
5016load_setitem(UnpicklerObject *self)
5017{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005018 return do_setitems(self, Py_SIZE(self->stack) - 2);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005019}
5020
5021static int
5022load_setitems(UnpicklerObject *self)
5023{
5024 return do_setitems(self, marker(self));
5025}
5026
5027static int
5028load_build(UnpicklerObject *self)
5029{
5030 PyObject *state, *inst, *slotstate;
5031 PyObject *setstate;
5032 int status = 0;
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005033 _Py_identifier(__setstate__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005034
5035 /* Stack is ... instance, state. We want to leave instance at
5036 * the stack top, possibly mutated via instance.__setstate__(state).
5037 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005038 if (Py_SIZE(self->stack) < 2)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005039 return stack_underflow();
5040
5041 PDATA_POP(self->stack, state);
5042 if (state == NULL)
5043 return -1;
5044
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005045 inst = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005046
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005047 setstate = _PyObject_GetAttrId(inst, &PyId___setstate__);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005048 if (setstate == NULL) {
5049 if (PyErr_ExceptionMatches(PyExc_AttributeError))
5050 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00005051 else {
5052 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005053 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00005054 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005055 }
5056 else {
5057 PyObject *result;
5058
5059 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005060 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Antoine Pitroud79dc622008-09-05 00:03:33 +00005061 reference to state first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005062 result = _Unpickler_FastCall(self, setstate, state);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005063 Py_DECREF(setstate);
5064 if (result == NULL)
5065 return -1;
5066 Py_DECREF(result);
5067 return 0;
5068 }
5069
5070 /* A default __setstate__. First see whether state embeds a
5071 * slot state dict too (a proto 2 addition).
5072 */
5073 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
5074 PyObject *tmp = state;
5075
5076 state = PyTuple_GET_ITEM(tmp, 0);
5077 slotstate = PyTuple_GET_ITEM(tmp, 1);
5078 Py_INCREF(state);
5079 Py_INCREF(slotstate);
5080 Py_DECREF(tmp);
5081 }
5082 else
5083 slotstate = NULL;
5084
5085 /* Set inst.__dict__ from the state dict (if any). */
5086 if (state != Py_None) {
5087 PyObject *dict;
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005088 PyObject *d_key, *d_value;
5089 Py_ssize_t i;
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005090 _Py_identifier(__dict__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005091
5092 if (!PyDict_Check(state)) {
5093 PyErr_SetString(UnpicklingError, "state is not a dictionary");
5094 goto error;
5095 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005096 dict = _PyObject_GetAttrId(inst, &PyId___dict__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005097 if (dict == NULL)
5098 goto error;
5099
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005100 i = 0;
5101 while (PyDict_Next(state, &i, &d_key, &d_value)) {
5102 /* normally the keys for instance attributes are
5103 interned. we should try to do that here. */
5104 Py_INCREF(d_key);
5105 if (PyUnicode_CheckExact(d_key))
5106 PyUnicode_InternInPlace(&d_key);
5107 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
5108 Py_DECREF(d_key);
5109 goto error;
5110 }
5111 Py_DECREF(d_key);
5112 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005113 Py_DECREF(dict);
5114 }
5115
5116 /* Also set instance attributes from the slotstate dict (if any). */
5117 if (slotstate != NULL) {
5118 PyObject *d_key, *d_value;
5119 Py_ssize_t i;
5120
5121 if (!PyDict_Check(slotstate)) {
5122 PyErr_SetString(UnpicklingError,
5123 "slot state is not a dictionary");
5124 goto error;
5125 }
5126 i = 0;
5127 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
5128 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
5129 goto error;
5130 }
5131 }
5132
5133 if (0) {
5134 error:
5135 status = -1;
5136 }
5137
5138 Py_DECREF(state);
5139 Py_XDECREF(slotstate);
5140 return status;
5141}
5142
5143static int
5144load_mark(UnpicklerObject *self)
5145{
5146
5147 /* Note that we split the (pickle.py) stack into two stacks, an
5148 * object stack and a mark stack. Here we push a mark onto the
5149 * mark stack.
5150 */
5151
5152 if ((self->num_marks + 1) >= self->marks_size) {
5153 size_t alloc;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005154 Py_ssize_t *marks;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005155
5156 /* Use the size_t type to check for overflow. */
5157 alloc = ((size_t)self->num_marks << 1) + 20;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005158 if (alloc > (PY_SSIZE_T_MAX / sizeof(Py_ssize_t)) ||
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00005159 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005160 PyErr_NoMemory();
5161 return -1;
5162 }
5163
5164 if (self->marks == NULL)
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005165 marks = (Py_ssize_t *) PyMem_Malloc(alloc * sizeof(Py_ssize_t));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005166 else
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005167 marks = (Py_ssize_t *) PyMem_Realloc(self->marks,
5168 alloc * sizeof(Py_ssize_t));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005169 if (marks == NULL) {
5170 PyErr_NoMemory();
5171 return -1;
5172 }
5173 self->marks = marks;
5174 self->marks_size = (Py_ssize_t)alloc;
5175 }
5176
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005177 self->marks[self->num_marks++] = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005178
5179 return 0;
5180}
5181
5182static int
5183load_reduce(UnpicklerObject *self)
5184{
5185 PyObject *callable = NULL;
5186 PyObject *argtup = NULL;
5187 PyObject *obj = NULL;
5188
5189 PDATA_POP(self->stack, argtup);
5190 if (argtup == NULL)
5191 return -1;
5192 PDATA_POP(self->stack, callable);
5193 if (callable) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005194 obj = PyObject_CallObject(callable, argtup);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005195 Py_DECREF(callable);
5196 }
5197 Py_DECREF(argtup);
5198
5199 if (obj == NULL)
5200 return -1;
5201
5202 PDATA_PUSH(self->stack, obj, -1);
5203 return 0;
5204}
5205
5206/* Just raises an error if we don't know the protocol specified. PROTO
5207 * is the first opcode for protocols >= 2.
5208 */
5209static int
5210load_proto(UnpicklerObject *self)
5211{
5212 char *s;
5213 int i;
5214
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005215 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005216 return -1;
5217
5218 i = (unsigned char)s[0];
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005219 if (i <= HIGHEST_PROTOCOL) {
5220 self->proto = i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005221 return 0;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005222 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005223
5224 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
5225 return -1;
5226}
5227
5228static PyObject *
5229load(UnpicklerObject *self)
5230{
5231 PyObject *err;
5232 PyObject *value = NULL;
5233 char *s;
5234
5235 self->num_marks = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005236 if (Py_SIZE(self->stack))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005237 Pdata_clear(self->stack, 0);
5238
5239 /* Convenient macros for the dispatch while-switch loop just below. */
5240#define OP(opcode, load_func) \
5241 case opcode: if (load_func(self) < 0) break; continue;
5242
5243#define OP_ARG(opcode, load_func, arg) \
5244 case opcode: if (load_func(self, (arg)) < 0) break; continue;
5245
5246 while (1) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005247 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005248 break;
5249
5250 switch ((enum opcode)s[0]) {
5251 OP(NONE, load_none)
5252 OP(BININT, load_binint)
5253 OP(BININT1, load_binint1)
5254 OP(BININT2, load_binint2)
5255 OP(INT, load_int)
5256 OP(LONG, load_long)
5257 OP_ARG(LONG1, load_counted_long, 1)
5258 OP_ARG(LONG4, load_counted_long, 4)
5259 OP(FLOAT, load_float)
5260 OP(BINFLOAT, load_binfloat)
5261 OP(BINBYTES, load_binbytes)
5262 OP(SHORT_BINBYTES, load_short_binbytes)
5263 OP(BINSTRING, load_binstring)
5264 OP(SHORT_BINSTRING, load_short_binstring)
5265 OP(STRING, load_string)
5266 OP(UNICODE, load_unicode)
5267 OP(BINUNICODE, load_binunicode)
5268 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
5269 OP_ARG(TUPLE1, load_counted_tuple, 1)
5270 OP_ARG(TUPLE2, load_counted_tuple, 2)
5271 OP_ARG(TUPLE3, load_counted_tuple, 3)
5272 OP(TUPLE, load_tuple)
5273 OP(EMPTY_LIST, load_empty_list)
5274 OP(LIST, load_list)
5275 OP(EMPTY_DICT, load_empty_dict)
5276 OP(DICT, load_dict)
5277 OP(OBJ, load_obj)
5278 OP(INST, load_inst)
5279 OP(NEWOBJ, load_newobj)
5280 OP(GLOBAL, load_global)
5281 OP(APPEND, load_append)
5282 OP(APPENDS, load_appends)
5283 OP(BUILD, load_build)
5284 OP(DUP, load_dup)
5285 OP(BINGET, load_binget)
5286 OP(LONG_BINGET, load_long_binget)
5287 OP(GET, load_get)
5288 OP(MARK, load_mark)
5289 OP(BINPUT, load_binput)
5290 OP(LONG_BINPUT, load_long_binput)
5291 OP(PUT, load_put)
5292 OP(POP, load_pop)
5293 OP(POP_MARK, load_pop_mark)
5294 OP(SETITEM, load_setitem)
5295 OP(SETITEMS, load_setitems)
5296 OP(PERSID, load_persid)
5297 OP(BINPERSID, load_binpersid)
5298 OP(REDUCE, load_reduce)
5299 OP(PROTO, load_proto)
5300 OP_ARG(EXT1, load_extension, 1)
5301 OP_ARG(EXT2, load_extension, 2)
5302 OP_ARG(EXT4, load_extension, 4)
5303 OP_ARG(NEWTRUE, load_bool, Py_True)
5304 OP_ARG(NEWFALSE, load_bool, Py_False)
5305
5306 case STOP:
5307 break;
5308
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005309 default:
Benjamin Petersonadde86d2011-09-23 13:41:41 -04005310 if (s[0] == '\0')
5311 PyErr_SetNone(PyExc_EOFError);
5312 else
5313 PyErr_Format(UnpicklingError,
5314 "invalid load key, '%c'.", s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005315 return NULL;
5316 }
5317
5318 break; /* and we are done! */
5319 }
5320
Antoine Pitrou04248a82010-10-12 20:51:21 +00005321 if (_Unpickler_SkipConsumed(self) < 0)
5322 return NULL;
5323
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005324 /* XXX: It is not clear what this is actually for. */
5325 if ((err = PyErr_Occurred())) {
5326 if (err == PyExc_EOFError) {
5327 PyErr_SetNone(PyExc_EOFError);
5328 }
5329 return NULL;
5330 }
5331
5332 PDATA_POP(self->stack, value);
5333 return value;
5334}
5335
5336PyDoc_STRVAR(Unpickler_load_doc,
5337"load() -> object. Load a pickle."
5338"\n"
5339"Read a pickled object representation from the open file object given in\n"
5340"the constructor, and return the reconstituted object hierarchy specified\n"
5341"therein.\n");
5342
5343static PyObject *
5344Unpickler_load(UnpicklerObject *self)
5345{
5346 /* Check whether the Unpickler was initialized correctly. This prevents
5347 segfaulting if a subclass overridden __init__ with a function that does
5348 not call Unpickler.__init__(). Here, we simply ensure that self->read
5349 is not NULL. */
5350 if (self->read == NULL) {
Victor Stinner121aab42011-09-29 23:40:53 +02005351 PyErr_Format(UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005352 "Unpickler.__init__() was not called by %s.__init__()",
5353 Py_TYPE(self)->tp_name);
5354 return NULL;
5355 }
5356
5357 return load(self);
5358}
5359
5360/* The name of find_class() is misleading. In newer pickle protocols, this
5361 function is used for loading any global (i.e., functions), not just
5362 classes. The name is kept only for backward compatibility. */
5363
5364PyDoc_STRVAR(Unpickler_find_class_doc,
5365"find_class(module_name, global_name) -> object.\n"
5366"\n"
5367"Return an object from a specified module, importing the module if\n"
5368"necessary. Subclasses may override this method (e.g. to restrict\n"
5369"unpickling of arbitrary classes and functions).\n"
5370"\n"
5371"This method is called whenever a class or a function object is\n"
5372"needed. Both arguments passed are str objects.\n");
5373
5374static PyObject *
5375Unpickler_find_class(UnpicklerObject *self, PyObject *args)
5376{
5377 PyObject *global;
5378 PyObject *modules_dict;
5379 PyObject *module;
5380 PyObject *module_name, *global_name;
5381
5382 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
5383 &module_name, &global_name))
5384 return NULL;
5385
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005386 /* Try to map the old names used in Python 2.x to the new ones used in
5387 Python 3.x. We do this only with old pickle protocols and when the
5388 user has not disabled the feature. */
5389 if (self->proto < 3 && self->fix_imports) {
5390 PyObject *key;
5391 PyObject *item;
5392
5393 /* Check if the global (i.e., a function or a class) was renamed
5394 or moved to another module. */
5395 key = PyTuple_Pack(2, module_name, global_name);
5396 if (key == NULL)
5397 return NULL;
5398 item = PyDict_GetItemWithError(name_mapping_2to3, key);
5399 Py_DECREF(key);
5400 if (item) {
5401 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
5402 PyErr_Format(PyExc_RuntimeError,
5403 "_compat_pickle.NAME_MAPPING values should be "
5404 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
5405 return NULL;
5406 }
5407 module_name = PyTuple_GET_ITEM(item, 0);
5408 global_name = PyTuple_GET_ITEM(item, 1);
5409 if (!PyUnicode_Check(module_name) ||
5410 !PyUnicode_Check(global_name)) {
5411 PyErr_Format(PyExc_RuntimeError,
5412 "_compat_pickle.NAME_MAPPING values should be "
5413 "pairs of str, not (%.200s, %.200s)",
5414 Py_TYPE(module_name)->tp_name,
5415 Py_TYPE(global_name)->tp_name);
5416 return NULL;
5417 }
5418 }
5419 else if (PyErr_Occurred()) {
5420 return NULL;
5421 }
5422
5423 /* Check if the module was renamed. */
5424 item = PyDict_GetItemWithError(import_mapping_2to3, module_name);
5425 if (item) {
5426 if (!PyUnicode_Check(item)) {
5427 PyErr_Format(PyExc_RuntimeError,
5428 "_compat_pickle.IMPORT_MAPPING values should be "
5429 "strings, not %.200s", Py_TYPE(item)->tp_name);
5430 return NULL;
5431 }
5432 module_name = item;
5433 }
5434 else if (PyErr_Occurred()) {
5435 return NULL;
5436 }
5437 }
5438
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005439 modules_dict = PySys_GetObject("modules");
5440 if (modules_dict == NULL)
5441 return NULL;
5442
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005443 module = PyDict_GetItemWithError(modules_dict, module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005444 if (module == NULL) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005445 if (PyErr_Occurred())
5446 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005447 module = PyImport_Import(module_name);
5448 if (module == NULL)
5449 return NULL;
5450 global = PyObject_GetAttr(module, global_name);
5451 Py_DECREF(module);
5452 }
Victor Stinner121aab42011-09-29 23:40:53 +02005453 else {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005454 global = PyObject_GetAttr(module, global_name);
5455 }
5456 return global;
5457}
5458
5459static struct PyMethodDef Unpickler_methods[] = {
5460 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
5461 Unpickler_load_doc},
5462 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
5463 Unpickler_find_class_doc},
5464 {NULL, NULL} /* sentinel */
5465};
5466
5467static void
5468Unpickler_dealloc(UnpicklerObject *self)
5469{
5470 PyObject_GC_UnTrack((PyObject *)self);
5471 Py_XDECREF(self->readline);
5472 Py_XDECREF(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005473 Py_XDECREF(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005474 Py_XDECREF(self->stack);
5475 Py_XDECREF(self->pers_func);
5476 Py_XDECREF(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005477 if (self->buffer.buf != NULL) {
5478 PyBuffer_Release(&self->buffer);
5479 self->buffer.buf = NULL;
5480 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005481
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005482 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005483 PyMem_Free(self->marks);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005484 PyMem_Free(self->input_line);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005485 free(self->encoding);
5486 free(self->errors);
5487
5488 Py_TYPE(self)->tp_free((PyObject *)self);
5489}
5490
5491static int
5492Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
5493{
5494 Py_VISIT(self->readline);
5495 Py_VISIT(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005496 Py_VISIT(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005497 Py_VISIT(self->stack);
5498 Py_VISIT(self->pers_func);
5499 Py_VISIT(self->arg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005500 return 0;
5501}
5502
5503static int
5504Unpickler_clear(UnpicklerObject *self)
5505{
5506 Py_CLEAR(self->readline);
5507 Py_CLEAR(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005508 Py_CLEAR(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005509 Py_CLEAR(self->stack);
5510 Py_CLEAR(self->pers_func);
5511 Py_CLEAR(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005512 if (self->buffer.buf != NULL) {
5513 PyBuffer_Release(&self->buffer);
5514 self->buffer.buf = NULL;
5515 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005516
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005517 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005518 PyMem_Free(self->marks);
5519 self->marks = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005520 PyMem_Free(self->input_line);
5521 self->input_line = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005522 free(self->encoding);
5523 self->encoding = NULL;
5524 free(self->errors);
5525 self->errors = NULL;
5526
5527 return 0;
5528}
5529
5530PyDoc_STRVAR(Unpickler_doc,
5531"Unpickler(file, *, encoding='ASCII', errors='strict')"
5532"\n"
5533"This takes a binary file for reading a pickle data stream.\n"
5534"\n"
5535"The protocol version of the pickle is detected automatically, so no\n"
5536"proto argument is needed.\n"
5537"\n"
5538"The file-like object must have two methods, a read() method\n"
5539"that takes an integer argument, and a readline() method that\n"
5540"requires no arguments. Both methods should return bytes.\n"
5541"Thus file-like object can be a binary file object opened for\n"
5542"reading, a BytesIO object, or any other custom object that\n"
5543"meets this interface.\n"
5544"\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005545"Optional keyword arguments are *fix_imports*, *encoding* and *errors*,\n"
5546"which are used to control compatiblity support for pickle stream\n"
5547"generated by Python 2.x. If *fix_imports* is True, pickle will try to\n"
5548"map the old Python 2.x names to the new names used in Python 3.x. The\n"
5549"*encoding* and *errors* tell pickle how to decode 8-bit string\n"
5550"instances pickled by Python 2.x; these default to 'ASCII' and\n"
5551"'strict', respectively.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005552
5553static int
5554Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
5555{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005556 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005557 PyObject *file;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005558 PyObject *fix_imports = Py_True;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005559 char *encoding = NULL;
5560 char *errors = NULL;
5561
5562 /* XXX: That is an horrible error message. But, I don't know how to do
5563 better... */
5564 if (Py_SIZE(args) != 1) {
5565 PyErr_Format(PyExc_TypeError,
5566 "%s takes exactly one positional argument (%zd given)",
5567 Py_TYPE(self)->tp_name, Py_SIZE(args));
5568 return -1;
5569 }
5570
5571 /* Arguments parsing needs to be done in the __init__() method to allow
5572 subclasses to define their own __init__() method, which may (or may
5573 not) support Unpickler arguments. However, this means we need to be
5574 extra careful in the other Unpickler methods, since a subclass could
5575 forget to call Unpickler.__init__() thus breaking our internal
5576 invariants. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005577 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:Unpickler", kwlist,
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005578 &file, &fix_imports, &encoding, &errors))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005579 return -1;
5580
5581 /* In case of multiple __init__() calls, clear previous content. */
5582 if (self->read != NULL)
5583 (void)Unpickler_clear(self);
5584
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005585 if (_Unpickler_SetInputStream(self, file) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005586 return -1;
5587
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005588 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005589 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005590
5591 self->fix_imports = PyObject_IsTrue(fix_imports);
5592 if (self->fix_imports == -1)
5593 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005594
5595 if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) {
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005596 _Py_identifier(persistent_load);
5597 self->pers_func = _PyObject_GetAttrId((PyObject *)self,
5598 &PyId_persistent_load);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005599 if (self->pers_func == NULL)
5600 return -1;
5601 }
5602 else {
5603 self->pers_func = NULL;
5604 }
5605
5606 self->stack = (Pdata *)Pdata_New();
5607 if (self->stack == NULL)
5608 return -1;
5609
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005610 self->memo_size = 32;
5611 self->memo = _Unpickler_NewMemo(self->memo_size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005612 if (self->memo == NULL)
5613 return -1;
5614
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005615 self->arg = NULL;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005616 self->proto = 0;
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005617
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005618 return 0;
5619}
5620
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005621/* Define a proxy object for the Unpickler's internal memo object. This is to
5622 * avoid breaking code like:
5623 * unpickler.memo.clear()
5624 * and
5625 * unpickler.memo = saved_memo
5626 * Is this a good idea? Not really, but we don't want to break code that uses
5627 * it. Note that we don't implement the entire mapping API here. This is
5628 * intentional, as these should be treated as black-box implementation details.
5629 *
5630 * We do, however, have to implement pickling/unpickling support because of
Victor Stinner121aab42011-09-29 23:40:53 +02005631 * real-world code like cvs2svn.
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005632 */
5633
5634typedef struct {
5635 PyObject_HEAD
5636 UnpicklerObject *unpickler;
5637} UnpicklerMemoProxyObject;
5638
5639PyDoc_STRVAR(ump_clear_doc,
5640"memo.clear() -> None. Remove all items from memo.");
5641
5642static PyObject *
5643ump_clear(UnpicklerMemoProxyObject *self)
5644{
5645 _Unpickler_MemoCleanup(self->unpickler);
5646 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
5647 if (self->unpickler->memo == NULL)
5648 return NULL;
5649 Py_RETURN_NONE;
5650}
5651
5652PyDoc_STRVAR(ump_copy_doc,
5653"memo.copy() -> new_memo. Copy the memo to a new object.");
5654
5655static PyObject *
5656ump_copy(UnpicklerMemoProxyObject *self)
5657{
5658 Py_ssize_t i;
5659 PyObject *new_memo = PyDict_New();
5660 if (new_memo == NULL)
5661 return NULL;
5662
5663 for (i = 0; i < self->unpickler->memo_size; i++) {
5664 int status;
5665 PyObject *key, *value;
5666
5667 value = self->unpickler->memo[i];
5668 if (value == NULL)
5669 continue;
5670
5671 key = PyLong_FromSsize_t(i);
5672 if (key == NULL)
5673 goto error;
5674 status = PyDict_SetItem(new_memo, key, value);
5675 Py_DECREF(key);
5676 if (status < 0)
5677 goto error;
5678 }
5679 return new_memo;
5680
5681error:
5682 Py_DECREF(new_memo);
5683 return NULL;
5684}
5685
5686PyDoc_STRVAR(ump_reduce_doc,
5687"memo.__reduce__(). Pickling support.");
5688
5689static PyObject *
5690ump_reduce(UnpicklerMemoProxyObject *self, PyObject *args)
5691{
5692 PyObject *reduce_value;
5693 PyObject *constructor_args;
5694 PyObject *contents = ump_copy(self);
5695 if (contents == NULL)
5696 return NULL;
5697
5698 reduce_value = PyTuple_New(2);
5699 if (reduce_value == NULL) {
5700 Py_DECREF(contents);
5701 return NULL;
5702 }
5703 constructor_args = PyTuple_New(1);
5704 if (constructor_args == NULL) {
5705 Py_DECREF(contents);
5706 Py_DECREF(reduce_value);
5707 return NULL;
5708 }
5709 PyTuple_SET_ITEM(constructor_args, 0, contents);
5710 Py_INCREF((PyObject *)&PyDict_Type);
5711 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
5712 PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
5713 return reduce_value;
5714}
5715
5716static PyMethodDef unpicklerproxy_methods[] = {
5717 {"clear", (PyCFunction)ump_clear, METH_NOARGS, ump_clear_doc},
5718 {"copy", (PyCFunction)ump_copy, METH_NOARGS, ump_copy_doc},
5719 {"__reduce__", (PyCFunction)ump_reduce, METH_VARARGS, ump_reduce_doc},
5720 {NULL, NULL} /* sentinel */
5721};
5722
5723static void
5724UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
5725{
5726 PyObject_GC_UnTrack(self);
5727 Py_XDECREF(self->unpickler);
5728 PyObject_GC_Del((PyObject *)self);
5729}
5730
5731static int
5732UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
5733 visitproc visit, void *arg)
5734{
5735 Py_VISIT(self->unpickler);
5736 return 0;
5737}
5738
5739static int
5740UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
5741{
5742 Py_CLEAR(self->unpickler);
5743 return 0;
5744}
5745
5746static PyTypeObject UnpicklerMemoProxyType = {
5747 PyVarObject_HEAD_INIT(NULL, 0)
5748 "_pickle.UnpicklerMemoProxy", /*tp_name*/
5749 sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/
5750 0,
5751 (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */
5752 0, /* tp_print */
5753 0, /* tp_getattr */
5754 0, /* tp_setattr */
5755 0, /* tp_compare */
5756 0, /* tp_repr */
5757 0, /* tp_as_number */
5758 0, /* tp_as_sequence */
5759 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00005760 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005761 0, /* tp_call */
5762 0, /* tp_str */
5763 PyObject_GenericGetAttr, /* tp_getattro */
5764 PyObject_GenericSetAttr, /* tp_setattro */
5765 0, /* tp_as_buffer */
5766 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5767 0, /* tp_doc */
5768 (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */
5769 (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */
5770 0, /* tp_richcompare */
5771 0, /* tp_weaklistoffset */
5772 0, /* tp_iter */
5773 0, /* tp_iternext */
5774 unpicklerproxy_methods, /* tp_methods */
5775};
5776
5777static PyObject *
5778UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
5779{
5780 UnpicklerMemoProxyObject *self;
5781
5782 self = PyObject_GC_New(UnpicklerMemoProxyObject,
5783 &UnpicklerMemoProxyType);
5784 if (self == NULL)
5785 return NULL;
5786 Py_INCREF(unpickler);
5787 self->unpickler = unpickler;
5788 PyObject_GC_Track(self);
5789 return (PyObject *)self;
5790}
5791
5792/*****************************************************************************/
5793
5794
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005795static PyObject *
5796Unpickler_get_memo(UnpicklerObject *self)
5797{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005798 return UnpicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005799}
5800
5801static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005802Unpickler_set_memo(UnpicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005803{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005804 PyObject **new_memo;
5805 Py_ssize_t new_memo_size = 0;
5806 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005807
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005808 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005809 PyErr_SetString(PyExc_TypeError,
5810 "attribute deletion is not supported");
5811 return -1;
5812 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005813
5814 if (Py_TYPE(obj) == &UnpicklerMemoProxyType) {
5815 UnpicklerObject *unpickler =
5816 ((UnpicklerMemoProxyObject *)obj)->unpickler;
5817
5818 new_memo_size = unpickler->memo_size;
5819 new_memo = _Unpickler_NewMemo(new_memo_size);
5820 if (new_memo == NULL)
5821 return -1;
5822
5823 for (i = 0; i < new_memo_size; i++) {
5824 Py_XINCREF(unpickler->memo[i]);
5825 new_memo[i] = unpickler->memo[i];
5826 }
5827 }
5828 else if (PyDict_Check(obj)) {
5829 Py_ssize_t i = 0;
5830 PyObject *key, *value;
5831
5832 new_memo_size = PyDict_Size(obj);
5833 new_memo = _Unpickler_NewMemo(new_memo_size);
5834 if (new_memo == NULL)
5835 return -1;
5836
5837 while (PyDict_Next(obj, &i, &key, &value)) {
5838 Py_ssize_t idx;
5839 if (!PyLong_Check(key)) {
5840 PyErr_SetString(PyExc_TypeError,
5841 "memo key must be integers");
5842 goto error;
5843 }
5844 idx = PyLong_AsSsize_t(key);
5845 if (idx == -1 && PyErr_Occurred())
5846 goto error;
5847 if (_Unpickler_MemoPut(self, idx, value) < 0)
5848 goto error;
5849 }
5850 }
5851 else {
5852 PyErr_Format(PyExc_TypeError,
5853 "'memo' attribute must be an UnpicklerMemoProxy object"
5854 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005855 return -1;
5856 }
5857
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005858 _Unpickler_MemoCleanup(self);
5859 self->memo_size = new_memo_size;
5860 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005861
5862 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005863
5864 error:
5865 if (new_memo_size) {
5866 i = new_memo_size;
5867 while (--i >= 0) {
5868 Py_XDECREF(new_memo[i]);
5869 }
5870 PyMem_FREE(new_memo);
5871 }
5872 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005873}
5874
5875static PyObject *
5876Unpickler_get_persload(UnpicklerObject *self)
5877{
5878 if (self->pers_func == NULL)
5879 PyErr_SetString(PyExc_AttributeError, "persistent_load");
5880 else
5881 Py_INCREF(self->pers_func);
5882 return self->pers_func;
5883}
5884
5885static int
5886Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
5887{
5888 PyObject *tmp;
5889
5890 if (value == NULL) {
5891 PyErr_SetString(PyExc_TypeError,
5892 "attribute deletion is not supported");
5893 return -1;
5894 }
5895 if (!PyCallable_Check(value)) {
5896 PyErr_SetString(PyExc_TypeError,
5897 "persistent_load must be a callable taking "
5898 "one argument");
5899 return -1;
5900 }
5901
5902 tmp = self->pers_func;
5903 Py_INCREF(value);
5904 self->pers_func = value;
5905 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
5906
5907 return 0;
5908}
5909
5910static PyGetSetDef Unpickler_getsets[] = {
5911 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
5912 {"persistent_load", (getter)Unpickler_get_persload,
5913 (setter)Unpickler_set_persload},
5914 {NULL}
5915};
5916
5917static PyTypeObject Unpickler_Type = {
5918 PyVarObject_HEAD_INIT(NULL, 0)
5919 "_pickle.Unpickler", /*tp_name*/
5920 sizeof(UnpicklerObject), /*tp_basicsize*/
5921 0, /*tp_itemsize*/
5922 (destructor)Unpickler_dealloc, /*tp_dealloc*/
5923 0, /*tp_print*/
5924 0, /*tp_getattr*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005925 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00005926 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005927 0, /*tp_repr*/
5928 0, /*tp_as_number*/
5929 0, /*tp_as_sequence*/
5930 0, /*tp_as_mapping*/
5931 0, /*tp_hash*/
5932 0, /*tp_call*/
5933 0, /*tp_str*/
5934 0, /*tp_getattro*/
5935 0, /*tp_setattro*/
5936 0, /*tp_as_buffer*/
5937 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5938 Unpickler_doc, /*tp_doc*/
5939 (traverseproc)Unpickler_traverse, /*tp_traverse*/
5940 (inquiry)Unpickler_clear, /*tp_clear*/
5941 0, /*tp_richcompare*/
5942 0, /*tp_weaklistoffset*/
5943 0, /*tp_iter*/
5944 0, /*tp_iternext*/
5945 Unpickler_methods, /*tp_methods*/
5946 0, /*tp_members*/
5947 Unpickler_getsets, /*tp_getset*/
5948 0, /*tp_base*/
5949 0, /*tp_dict*/
5950 0, /*tp_descr_get*/
5951 0, /*tp_descr_set*/
5952 0, /*tp_dictoffset*/
5953 (initproc)Unpickler_init, /*tp_init*/
5954 PyType_GenericAlloc, /*tp_alloc*/
5955 PyType_GenericNew, /*tp_new*/
5956 PyObject_GC_Del, /*tp_free*/
5957 0, /*tp_is_gc*/
5958};
5959
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005960PyDoc_STRVAR(pickle_dump_doc,
5961"dump(obj, file, protocol=None, *, fix_imports=True) -> None\n"
5962"\n"
5963"Write a pickled representation of obj to the open file object file. This\n"
5964"is equivalent to ``Pickler(file, protocol).dump(obj)``, but may be more\n"
5965"efficient.\n"
5966"\n"
5967"The optional protocol argument tells the pickler to use the given protocol;\n"
5968"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
5969"backward-incompatible protocol designed for Python 3.0.\n"
5970"\n"
5971"Specifying a negative protocol version selects the highest protocol version\n"
5972"supported. The higher the protocol used, the more recent the version of\n"
5973"Python needed to read the pickle produced.\n"
5974"\n"
5975"The file argument must have a write() method that accepts a single bytes\n"
5976"argument. It can thus be a file object opened for binary writing, a\n"
5977"io.BytesIO instance, or any other custom object that meets this interface.\n"
5978"\n"
5979"If fix_imports is True and protocol is less than 3, pickle will try to\n"
5980"map the new Python 3.x names to the old module names used in Python 2.x,\n"
5981"so that the pickle data stream is readable with Python 2.x.\n");
5982
5983static PyObject *
5984pickle_dump(PyObject *self, PyObject *args, PyObject *kwds)
5985{
5986 static char *kwlist[] = {"obj", "file", "protocol", "fix_imports", 0};
5987 PyObject *obj;
5988 PyObject *file;
5989 PyObject *proto = NULL;
5990 PyObject *fix_imports = Py_True;
5991 PicklerObject *pickler;
5992
5993 /* fix_imports is a keyword-only argument. */
5994 if (Py_SIZE(args) > 3) {
5995 PyErr_Format(PyExc_TypeError,
5996 "pickle.dump() takes at most 3 positional "
5997 "argument (%zd given)", Py_SIZE(args));
5998 return NULL;
5999 }
6000
6001 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|OO:dump", kwlist,
6002 &obj, &file, &proto, &fix_imports))
6003 return NULL;
6004
6005 pickler = _Pickler_New();
6006 if (pickler == NULL)
6007 return NULL;
6008
6009 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
6010 goto error;
6011
6012 if (_Pickler_SetOutputStream(pickler, file) < 0)
6013 goto error;
6014
6015 if (dump(pickler, obj) < 0)
6016 goto error;
6017
6018 if (_Pickler_FlushToFile(pickler) < 0)
6019 goto error;
6020
6021 Py_DECREF(pickler);
6022 Py_RETURN_NONE;
6023
6024 error:
6025 Py_XDECREF(pickler);
6026 return NULL;
6027}
6028
6029PyDoc_STRVAR(pickle_dumps_doc,
6030"dumps(obj, protocol=None, *, fix_imports=True) -> bytes\n"
6031"\n"
6032"Return the pickled representation of the object as a bytes\n"
6033"object, instead of writing it to a file.\n"
6034"\n"
6035"The optional protocol argument tells the pickler to use the given protocol;\n"
6036"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
6037"backward-incompatible protocol designed for Python 3.0.\n"
6038"\n"
6039"Specifying a negative protocol version selects the highest protocol version\n"
6040"supported. The higher the protocol used, the more recent the version of\n"
6041"Python needed to read the pickle produced.\n"
6042"\n"
6043"If fix_imports is True and *protocol* is less than 3, pickle will try to\n"
6044"map the new Python 3.x names to the old module names used in Python 2.x,\n"
6045"so that the pickle data stream is readable with Python 2.x.\n");
6046
6047static PyObject *
6048pickle_dumps(PyObject *self, PyObject *args, PyObject *kwds)
6049{
6050 static char *kwlist[] = {"obj", "protocol", "fix_imports", 0};
6051 PyObject *obj;
6052 PyObject *proto = NULL;
6053 PyObject *result;
6054 PyObject *fix_imports = Py_True;
6055 PicklerObject *pickler;
6056
6057 /* fix_imports is a keyword-only argument. */
6058 if (Py_SIZE(args) > 2) {
6059 PyErr_Format(PyExc_TypeError,
6060 "pickle.dumps() takes at most 2 positional "
6061 "argument (%zd given)", Py_SIZE(args));
6062 return NULL;
6063 }
6064
6065 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:dumps", kwlist,
6066 &obj, &proto, &fix_imports))
6067 return NULL;
6068
6069 pickler = _Pickler_New();
6070 if (pickler == NULL)
6071 return NULL;
6072
6073 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
6074 goto error;
6075
6076 if (dump(pickler, obj) < 0)
6077 goto error;
6078
6079 result = _Pickler_GetString(pickler);
6080 Py_DECREF(pickler);
6081 return result;
6082
6083 error:
6084 Py_XDECREF(pickler);
6085 return NULL;
6086}
6087
6088PyDoc_STRVAR(pickle_load_doc,
6089"load(file, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6090"\n"
6091"Read a pickled object representation from the open file object file and\n"
6092"return the reconstituted object hierarchy specified therein. This is\n"
6093"equivalent to ``Unpickler(file).load()``, but may be more efficient.\n"
6094"\n"
6095"The protocol version of the pickle is detected automatically, so no protocol\n"
6096"argument is needed. Bytes past the pickled object's representation are\n"
6097"ignored.\n"
6098"\n"
6099"The argument file must have two methods, a read() method that takes an\n"
6100"integer argument, and a readline() method that requires no arguments. Both\n"
6101"methods should return bytes. Thus *file* can be a binary file object opened\n"
6102"for reading, a BytesIO object, or any other custom object that meets this\n"
6103"interface.\n"
6104"\n"
6105"Optional keyword arguments are fix_imports, encoding and errors,\n"
6106"which are used to control compatiblity support for pickle stream generated\n"
6107"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6108"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6109"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6110"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6111
6112static PyObject *
6113pickle_load(PyObject *self, PyObject *args, PyObject *kwds)
6114{
6115 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
6116 PyObject *file;
6117 PyObject *fix_imports = Py_True;
6118 PyObject *result;
6119 char *encoding = NULL;
6120 char *errors = NULL;
6121 UnpicklerObject *unpickler;
6122
6123 /* fix_imports, encoding and errors are a keyword-only argument. */
6124 if (Py_SIZE(args) != 1) {
6125 PyErr_Format(PyExc_TypeError,
6126 "pickle.load() takes exactly one positional "
6127 "argument (%zd given)", Py_SIZE(args));
6128 return NULL;
6129 }
6130
6131 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:load", kwlist,
6132 &file, &fix_imports, &encoding, &errors))
6133 return NULL;
6134
6135 unpickler = _Unpickler_New();
6136 if (unpickler == NULL)
6137 return NULL;
6138
6139 if (_Unpickler_SetInputStream(unpickler, file) < 0)
6140 goto error;
6141
6142 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6143 goto error;
6144
6145 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6146 if (unpickler->fix_imports == -1)
6147 goto error;
6148
6149 result = load(unpickler);
6150 Py_DECREF(unpickler);
6151 return result;
6152
6153 error:
6154 Py_XDECREF(unpickler);
6155 return NULL;
6156}
6157
6158PyDoc_STRVAR(pickle_loads_doc,
6159"loads(input, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6160"\n"
6161"Read a pickled object hierarchy from a bytes object and return the\n"
6162"reconstituted object hierarchy specified therein\n"
6163"\n"
6164"The protocol version of the pickle is detected automatically, so no protocol\n"
6165"argument is needed. Bytes past the pickled object's representation are\n"
6166"ignored.\n"
6167"\n"
6168"Optional keyword arguments are fix_imports, encoding and errors, which\n"
6169"are used to control compatiblity support for pickle stream generated\n"
6170"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6171"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6172"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6173"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6174
6175static PyObject *
6176pickle_loads(PyObject *self, PyObject *args, PyObject *kwds)
6177{
6178 static char *kwlist[] = {"input", "fix_imports", "encoding", "errors", 0};
6179 PyObject *input;
6180 PyObject *fix_imports = Py_True;
6181 PyObject *result;
6182 char *encoding = NULL;
6183 char *errors = NULL;
6184 UnpicklerObject *unpickler;
6185
6186 /* fix_imports, encoding and errors are a keyword-only argument. */
6187 if (Py_SIZE(args) != 1) {
6188 PyErr_Format(PyExc_TypeError,
6189 "pickle.loads() takes exactly one positional "
6190 "argument (%zd given)", Py_SIZE(args));
6191 return NULL;
6192 }
6193
6194 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:loads", kwlist,
6195 &input, &fix_imports, &encoding, &errors))
6196 return NULL;
6197
6198 unpickler = _Unpickler_New();
6199 if (unpickler == NULL)
6200 return NULL;
6201
6202 if (_Unpickler_SetStringInput(unpickler, input) < 0)
6203 goto error;
6204
6205 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6206 goto error;
6207
6208 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6209 if (unpickler->fix_imports == -1)
6210 goto error;
6211
6212 result = load(unpickler);
6213 Py_DECREF(unpickler);
6214 return result;
6215
6216 error:
6217 Py_XDECREF(unpickler);
6218 return NULL;
6219}
6220
6221
6222static struct PyMethodDef pickle_methods[] = {
6223 {"dump", (PyCFunction)pickle_dump, METH_VARARGS|METH_KEYWORDS,
6224 pickle_dump_doc},
6225 {"dumps", (PyCFunction)pickle_dumps, METH_VARARGS|METH_KEYWORDS,
6226 pickle_dumps_doc},
6227 {"load", (PyCFunction)pickle_load, METH_VARARGS|METH_KEYWORDS,
6228 pickle_load_doc},
6229 {"loads", (PyCFunction)pickle_loads, METH_VARARGS|METH_KEYWORDS,
6230 pickle_loads_doc},
6231 {NULL, NULL} /* sentinel */
6232};
6233
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006234static int
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006235initmodule(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006236{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006237 PyObject *copyreg = NULL;
6238 PyObject *compat_pickle = NULL;
6239
6240 /* XXX: We should ensure that the types of the dictionaries imported are
6241 exactly PyDict objects. Otherwise, it is possible to crash the pickle
6242 since we use the PyDict API directly to access these dictionaries. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006243
6244 copyreg = PyImport_ImportModule("copyreg");
6245 if (!copyreg)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006246 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006247 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
6248 if (!dispatch_table)
6249 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006250 extension_registry = \
6251 PyObject_GetAttrString(copyreg, "_extension_registry");
6252 if (!extension_registry)
6253 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006254 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
6255 if (!inverted_registry)
6256 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006257 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
6258 if (!extension_cache)
6259 goto error;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006260 Py_CLEAR(copyreg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006261
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006262 /* Load the 2.x -> 3.x stdlib module mapping tables */
6263 compat_pickle = PyImport_ImportModule("_compat_pickle");
6264 if (!compat_pickle)
6265 goto error;
6266 name_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
6267 if (!name_mapping_2to3)
6268 goto error;
6269 if (!PyDict_CheckExact(name_mapping_2to3)) {
6270 PyErr_Format(PyExc_RuntimeError,
6271 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
6272 Py_TYPE(name_mapping_2to3)->tp_name);
6273 goto error;
6274 }
6275 import_mapping_2to3 = PyObject_GetAttrString(compat_pickle,
6276 "IMPORT_MAPPING");
6277 if (!import_mapping_2to3)
6278 goto error;
6279 if (!PyDict_CheckExact(import_mapping_2to3)) {
6280 PyErr_Format(PyExc_RuntimeError,
6281 "_compat_pickle.IMPORT_MAPPING should be a dict, "
6282 "not %.200s", Py_TYPE(import_mapping_2to3)->tp_name);
6283 goto error;
6284 }
6285 /* ... and the 3.x -> 2.x mapping tables */
6286 name_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6287 "REVERSE_NAME_MAPPING");
6288 if (!name_mapping_3to2)
6289 goto error;
6290 if (!PyDict_CheckExact(name_mapping_3to2)) {
6291 PyErr_Format(PyExc_RuntimeError,
Ezio Melotti13925002011-03-16 11:05:33 +02006292 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006293 "not %.200s", Py_TYPE(name_mapping_3to2)->tp_name);
6294 goto error;
6295 }
6296 import_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6297 "REVERSE_IMPORT_MAPPING");
6298 if (!import_mapping_3to2)
6299 goto error;
6300 if (!PyDict_CheckExact(import_mapping_3to2)) {
6301 PyErr_Format(PyExc_RuntimeError,
6302 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
6303 "not %.200s", Py_TYPE(import_mapping_3to2)->tp_name);
6304 goto error;
6305 }
6306 Py_CLEAR(compat_pickle);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006307
6308 empty_tuple = PyTuple_New(0);
6309 if (empty_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006310 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006311 two_tuple = PyTuple_New(2);
6312 if (two_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006313 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006314 /* We use this temp container with no regard to refcounts, or to
6315 * keeping containees alive. Exempt from GC, because we don't
6316 * want anything looking at two_tuple() by magic.
6317 */
6318 PyObject_GC_UnTrack(two_tuple);
6319
6320 return 0;
6321
6322 error:
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006323 Py_CLEAR(copyreg);
6324 Py_CLEAR(dispatch_table);
6325 Py_CLEAR(extension_registry);
6326 Py_CLEAR(inverted_registry);
6327 Py_CLEAR(extension_cache);
6328 Py_CLEAR(compat_pickle);
6329 Py_CLEAR(name_mapping_2to3);
6330 Py_CLEAR(import_mapping_2to3);
6331 Py_CLEAR(name_mapping_3to2);
6332 Py_CLEAR(import_mapping_3to2);
6333 Py_CLEAR(empty_tuple);
6334 Py_CLEAR(two_tuple);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006335 return -1;
6336}
6337
6338static struct PyModuleDef _picklemodule = {
6339 PyModuleDef_HEAD_INIT,
6340 "_pickle",
6341 pickle_module_doc,
6342 -1,
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006343 pickle_methods,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006344 NULL,
6345 NULL,
6346 NULL,
6347 NULL
6348};
6349
6350PyMODINIT_FUNC
6351PyInit__pickle(void)
6352{
6353 PyObject *m;
6354
6355 if (PyType_Ready(&Unpickler_Type) < 0)
6356 return NULL;
6357 if (PyType_Ready(&Pickler_Type) < 0)
6358 return NULL;
6359 if (PyType_Ready(&Pdata_Type) < 0)
6360 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006361 if (PyType_Ready(&PicklerMemoProxyType) < 0)
6362 return NULL;
6363 if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
6364 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006365
6366 /* Create the module and add the functions. */
6367 m = PyModule_Create(&_picklemodule);
6368 if (m == NULL)
6369 return NULL;
6370
Antoine Pitrou8391cf42011-07-15 21:01:21 +02006371 Py_INCREF(&Pickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006372 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
6373 return NULL;
Antoine Pitrou8391cf42011-07-15 21:01:21 +02006374 Py_INCREF(&Unpickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006375 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
6376 return NULL;
6377
6378 /* Initialize the exceptions. */
6379 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
6380 if (PickleError == NULL)
6381 return NULL;
6382 PicklingError = \
6383 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
6384 if (PicklingError == NULL)
6385 return NULL;
6386 UnpicklingError = \
6387 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
6388 if (UnpicklingError == NULL)
6389 return NULL;
6390
6391 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
6392 return NULL;
6393 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
6394 return NULL;
6395 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
6396 return NULL;
6397
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006398 if (initmodule() < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006399 return NULL;
6400
6401 return m;
6402}