blob: 822d03c49affd9b9fd1b862a3b2c61a388ad7bcd [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000013/* Pickle opcodes. These must be kept updated with pickle.py.
14 Extensive docs are in pickletools.py. */
15enum opcode {
16 MARK = '(',
17 STOP = '.',
18 POP = '0',
19 POP_MARK = '1',
20 DUP = '2',
21 FLOAT = 'F',
22 INT = 'I',
23 BININT = 'J',
24 BININT1 = 'K',
25 LONG = 'L',
26 BININT2 = 'M',
27 NONE = 'N',
28 PERSID = 'P',
29 BINPERSID = 'Q',
30 REDUCE = 'R',
31 STRING = 'S',
32 BINSTRING = 'T',
33 SHORT_BINSTRING = 'U',
34 UNICODE = 'V',
35 BINUNICODE = 'X',
36 APPEND = 'a',
37 BUILD = 'b',
38 GLOBAL = 'c',
39 DICT = 'd',
40 EMPTY_DICT = '}',
41 APPENDS = 'e',
42 GET = 'g',
43 BINGET = 'h',
44 INST = 'i',
45 LONG_BINGET = 'j',
46 LIST = 'l',
47 EMPTY_LIST = ']',
48 OBJ = 'o',
49 PUT = 'p',
50 BINPUT = 'q',
51 LONG_BINPUT = 'r',
52 SETITEM = 's',
53 TUPLE = 't',
54 EMPTY_TUPLE = ')',
55 SETITEMS = 'u',
56 BINFLOAT = 'G',
57
58 /* Protocol 2. */
59 PROTO = '\x80',
60 NEWOBJ = '\x81',
61 EXT1 = '\x82',
62 EXT2 = '\x83',
63 EXT4 = '\x84',
64 TUPLE1 = '\x85',
65 TUPLE2 = '\x86',
66 TUPLE3 = '\x87',
67 NEWTRUE = '\x88',
68 NEWFALSE = '\x89',
69 LONG1 = '\x8a',
70 LONG4 = '\x8b',
71
72 /* Protocol 3 (Python 3.x) */
73 BINBYTES = 'B',
Victor Stinner132ef6c2010-11-09 09:39:41 +000074 SHORT_BINBYTES = 'C'
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000075};
76
77/* These aren't opcodes -- they're ways to pickle bools before protocol 2
78 * so that unpicklers written before bools were introduced unpickle them
79 * as ints, but unpicklers after can recognize that bools were intended.
80 * Note that protocol 2 added direct ways to pickle bools.
81 */
82#undef TRUE
83#define TRUE "I01\n"
84#undef FALSE
85#define FALSE "I00\n"
86
87enum {
88 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
89 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
90 break if this gets out of synch with pickle.py, but it's unclear that would
91 help anything either. */
92 BATCHSIZE = 1000,
93
94 /* Nesting limit until Pickler, when running in "fast mode", starts
95 checking for self-referential data-structures. */
96 FAST_NESTING_LIMIT = 50,
97
Antoine Pitrouea99c5c2010-09-09 18:33:21 +000098 /* Initial size of the write buffer of Pickler. */
99 WRITE_BUF_SIZE = 4096,
100
101 /* Maximum size of the write buffer of Pickler when pickling to a
102 stream. This is ignored for in-memory pickling. */
103 MAX_WRITE_BUF_SIZE = 64 * 1024,
Antoine Pitrou04248a82010-10-12 20:51:21 +0000104
105 /* Prefetch size when unpickling (disabled on unpeekable streams) */
Victor Stinner132ef6c2010-11-09 09:39:41 +0000106 PREFETCH = 8192 * 16
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000107};
108
109/* Exception classes for pickle. These should override the ones defined in
110 pickle.py, when the C-optimized Pickler and Unpickler are used. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000111static PyObject *PickleError = NULL;
112static PyObject *PicklingError = NULL;
113static PyObject *UnpicklingError = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000114
115/* copyreg.dispatch_table, {type_object: pickling_function} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000116static PyObject *dispatch_table = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000117/* For EXT[124] opcodes. */
118/* copyreg._extension_registry, {(module_name, function_name): code} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000119static PyObject *extension_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000120/* copyreg._inverted_registry, {code: (module_name, function_name)} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000121static PyObject *inverted_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000122/* copyreg._extension_cache, {code: object} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000123static PyObject *extension_cache = NULL;
124
125/* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */
126static PyObject *name_mapping_2to3 = NULL;
127/* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
128static PyObject *import_mapping_2to3 = NULL;
129/* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
130static PyObject *name_mapping_3to2 = NULL;
131static PyObject *import_mapping_3to2 = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000132
133/* XXX: Are these really nescessary? */
134/* As the name says, an empty tuple. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000135static PyObject *empty_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000136/* For looking up name pairs in copyreg._extension_registry. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000137static PyObject *two_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000138
139static int
140stack_underflow(void)
141{
142 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
143 return -1;
144}
145
146/* Internal data type used as the unpickling stack. */
147typedef struct {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000148 PyObject_VAR_HEAD
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000149 PyObject **data;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000150 Py_ssize_t allocated; /* number of slots in data allocated */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000151} Pdata;
152
153static void
154Pdata_dealloc(Pdata *self)
155{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000156 int i = Py_SIZE(self);
157 while (--i >= 0) {
158 Py_DECREF(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000159 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000160 PyMem_FREE(self->data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000161 PyObject_Del(self);
162}
163
164static PyTypeObject Pdata_Type = {
165 PyVarObject_HEAD_INIT(NULL, 0)
166 "_pickle.Pdata", /*tp_name*/
167 sizeof(Pdata), /*tp_basicsize*/
168 0, /*tp_itemsize*/
169 (destructor)Pdata_dealloc, /*tp_dealloc*/
170};
171
172static PyObject *
173Pdata_New(void)
174{
175 Pdata *self;
176
177 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
178 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000179 Py_SIZE(self) = 0;
180 self->allocated = 8;
181 self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000182 if (self->data)
183 return (PyObject *)self;
184 Py_DECREF(self);
185 return PyErr_NoMemory();
186}
187
188
189/* Retain only the initial clearto items. If clearto >= the current
190 * number of items, this is a (non-erroneous) NOP.
191 */
192static int
193Pdata_clear(Pdata *self, int clearto)
194{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000195 int i = Py_SIZE(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000196
197 if (clearto < 0)
198 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000199 if (clearto >= i)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000200 return 0;
201
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000202 while (--i >= clearto) {
203 Py_CLEAR(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000204 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000205 Py_SIZE(self) = clearto;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000206 return 0;
207}
208
209static int
210Pdata_grow(Pdata *self)
211{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000212 PyObject **data = self->data;
213 Py_ssize_t allocated = self->allocated;
214 Py_ssize_t new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000215
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000216 new_allocated = (allocated >> 3) + 6;
217 /* check for integer overflow */
218 if (new_allocated > PY_SSIZE_T_MAX - allocated)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000219 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000220 new_allocated += allocated;
221 if (new_allocated > (PY_SSIZE_T_MAX / sizeof(PyObject *)))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000222 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000223 data = PyMem_REALLOC(data, new_allocated * sizeof(PyObject *));
224 if (data == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000225 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000226
227 self->data = data;
228 self->allocated = new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000229 return 0;
230
231 nomemory:
232 PyErr_NoMemory();
233 return -1;
234}
235
236/* D is a Pdata*. Pop the topmost element and store it into V, which
237 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
238 * is raised and V is set to NULL.
239 */
240static PyObject *
241Pdata_pop(Pdata *self)
242{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000243 if (Py_SIZE(self) == 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000244 PyErr_SetString(UnpicklingError, "bad pickle data");
245 return NULL;
246 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000247 return self->data[--Py_SIZE(self)];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000248}
249#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
250
251static int
252Pdata_push(Pdata *self, PyObject *obj)
253{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000254 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000255 return -1;
256 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000257 self->data[Py_SIZE(self)++] = obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000258 return 0;
259}
260
261/* Push an object on stack, transferring its ownership to the stack. */
262#define PDATA_PUSH(D, O, ER) do { \
263 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
264
265/* Push an object on stack, adding a new reference to the object. */
266#define PDATA_APPEND(D, O, ER) do { \
267 Py_INCREF((O)); \
268 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
269
270static PyObject *
271Pdata_poptuple(Pdata *self, Py_ssize_t start)
272{
273 PyObject *tuple;
274 Py_ssize_t len, i, j;
275
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000276 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000277 tuple = PyTuple_New(len);
278 if (tuple == NULL)
279 return NULL;
280 for (i = start, j = 0; j < len; i++, j++)
281 PyTuple_SET_ITEM(tuple, j, self->data[i]);
282
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000283 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000284 return tuple;
285}
286
287static PyObject *
288Pdata_poplist(Pdata *self, Py_ssize_t start)
289{
290 PyObject *list;
291 Py_ssize_t len, i, j;
292
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000293 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000294 list = PyList_New(len);
295 if (list == NULL)
296 return NULL;
297 for (i = start, j = 0; j < len; i++, j++)
298 PyList_SET_ITEM(list, j, self->data[i]);
299
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000300 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000301 return list;
302}
303
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000304typedef struct {
305 PyObject *me_key;
306 long me_value;
307} PyMemoEntry;
308
309typedef struct {
310 Py_ssize_t mt_mask;
311 Py_ssize_t mt_used;
312 Py_ssize_t mt_allocated;
313 PyMemoEntry *mt_table;
314} PyMemoTable;
315
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000316typedef struct PicklerObject {
317 PyObject_HEAD
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000318 PyMemoTable *memo; /* Memo table, keep track of the seen
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000319 objects to support self-referential objects
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000320 pickling. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000321 PyObject *pers_func; /* persistent_id() method, can be NULL */
322 PyObject *arg;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000323
324 PyObject *write; /* write() method of the output stream. */
325 PyObject *output_buffer; /* Write into a local bytearray buffer before
326 flushing to the stream. */
327 Py_ssize_t output_len; /* Length of output_buffer. */
328 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000329 int proto; /* Pickle protocol number, >= 0 */
330 int bin; /* Boolean, true if proto > 0 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000331 int buf_size; /* Size of the current buffered pickle data */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000332 int fast; /* Enable fast mode if set to a true value.
333 The fast mode disable the usage of memo,
334 therefore speeding the pickling process by
335 not generating superfluous PUT opcodes. It
336 should not be used if with self-referential
337 objects. */
338 int fast_nesting;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000339 int fix_imports; /* Indicate whether Pickler should fix
340 the name of globals for Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000341 PyObject *fast_memo;
342} PicklerObject;
343
344typedef struct UnpicklerObject {
345 PyObject_HEAD
346 Pdata *stack; /* Pickle data stack, store unpickled objects. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000347
348 /* The unpickler memo is just an array of PyObject *s. Using a dict
349 is unnecessary, since the keys are contiguous ints. */
350 PyObject **memo;
351 Py_ssize_t memo_size;
352
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000353 PyObject *arg;
354 PyObject *pers_func; /* persistent_load() method, can be NULL. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000355
356 Py_buffer buffer;
357 char *input_buffer;
358 char *input_line;
359 Py_ssize_t input_len;
360 Py_ssize_t next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000361 Py_ssize_t prefetched_idx; /* index of first prefetched byte */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000362 PyObject *read; /* read() method of the input stream. */
363 PyObject *readline; /* readline() method of the input stream. */
Antoine Pitrou04248a82010-10-12 20:51:21 +0000364 PyObject *peek; /* peek() method of the input stream, or NULL */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000365
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000366 char *encoding; /* Name of the encoding to be used for
367 decoding strings pickled using Python
368 2.x. The default value is "ASCII" */
369 char *errors; /* Name of errors handling scheme to used when
370 decoding strings. The default value is
371 "strict". */
372 int *marks; /* Mark stack, used for unpickling container
373 objects. */
374 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
375 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000376 int proto; /* Protocol of the pickle loaded. */
377 int fix_imports; /* Indicate whether Unpickler should fix
378 the name of globals pickled by Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000379} UnpicklerObject;
380
381/* Forward declarations */
382static int save(PicklerObject *, PyObject *, int);
383static int save_reduce(PicklerObject *, PyObject *, PyObject *);
384static PyTypeObject Pickler_Type;
385static PyTypeObject Unpickler_Type;
386
387
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000388/*************************************************************************
389 A custom hashtable mapping void* to longs. This is used by the pickler for
390 memoization. Using a custom hashtable rather than PyDict allows us to skip
391 a bunch of unnecessary object creation. This makes a huge performance
392 difference. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000393
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000394#define MT_MINSIZE 8
395#define PERTURB_SHIFT 5
396
397
398static PyMemoTable *
399PyMemoTable_New(void)
400{
401 PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
402 if (memo == NULL) {
403 PyErr_NoMemory();
404 return NULL;
405 }
406
407 memo->mt_used = 0;
408 memo->mt_allocated = MT_MINSIZE;
409 memo->mt_mask = MT_MINSIZE - 1;
410 memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
411 if (memo->mt_table == NULL) {
412 PyMem_FREE(memo);
413 PyErr_NoMemory();
414 return NULL;
415 }
416 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
417
418 return memo;
419}
420
421static PyMemoTable *
422PyMemoTable_Copy(PyMemoTable *self)
423{
424 Py_ssize_t i;
425 PyMemoTable *new = PyMemoTable_New();
426 if (new == NULL)
427 return NULL;
428
429 new->mt_used = self->mt_used;
430 new->mt_allocated = self->mt_allocated;
431 new->mt_mask = self->mt_mask;
432 /* The table we get from _New() is probably smaller than we wanted.
433 Free it and allocate one that's the right size. */
434 PyMem_FREE(new->mt_table);
435 new->mt_table = PyMem_MALLOC(self->mt_allocated * sizeof(PyMemoEntry));
436 if (new->mt_table == NULL) {
437 PyMem_FREE(new);
438 return NULL;
439 }
440 for (i = 0; i < self->mt_allocated; i++) {
441 Py_XINCREF(self->mt_table[i].me_key);
442 }
443 memcpy(new->mt_table, self->mt_table,
444 sizeof(PyMemoEntry) * self->mt_allocated);
445
446 return new;
447}
448
449static Py_ssize_t
450PyMemoTable_Size(PyMemoTable *self)
451{
452 return self->mt_used;
453}
454
455static int
456PyMemoTable_Clear(PyMemoTable *self)
457{
458 Py_ssize_t i = self->mt_allocated;
459
460 while (--i >= 0) {
461 Py_XDECREF(self->mt_table[i].me_key);
462 }
463 self->mt_used = 0;
464 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
465 return 0;
466}
467
468static void
469PyMemoTable_Del(PyMemoTable *self)
470{
471 if (self == NULL)
472 return;
473 PyMemoTable_Clear(self);
474
475 PyMem_FREE(self->mt_table);
476 PyMem_FREE(self);
477}
478
479/* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
480 can be considerably simpler than dictobject.c's lookdict(). */
481static PyMemoEntry *
482_PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
483{
484 size_t i;
485 size_t perturb;
486 size_t mask = (size_t)self->mt_mask;
487 PyMemoEntry *table = self->mt_table;
488 PyMemoEntry *entry;
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000489 Py_hash_t hash = (Py_hash_t)key >> 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000490
491 i = hash & mask;
492 entry = &table[i];
493 if (entry->me_key == NULL || entry->me_key == key)
494 return entry;
495
496 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
497 i = (i << 2) + i + perturb + 1;
498 entry = &table[i & mask];
499 if (entry->me_key == NULL || entry->me_key == key)
500 return entry;
501 }
502 assert(0); /* Never reached */
503 return NULL;
504}
505
506/* Returns -1 on failure, 0 on success. */
507static int
508_PyMemoTable_ResizeTable(PyMemoTable *self, Py_ssize_t min_size)
509{
510 PyMemoEntry *oldtable = NULL;
511 PyMemoEntry *oldentry, *newentry;
512 Py_ssize_t new_size = MT_MINSIZE;
513 Py_ssize_t to_process;
514
515 assert(min_size > 0);
516
517 /* Find the smallest valid table size >= min_size. */
518 while (new_size < min_size && new_size > 0)
519 new_size <<= 1;
520 if (new_size <= 0) {
521 PyErr_NoMemory();
522 return -1;
523 }
524 /* new_size needs to be a power of two. */
525 assert((new_size & (new_size - 1)) == 0);
526
527 /* Allocate new table. */
528 oldtable = self->mt_table;
529 self->mt_table = PyMem_MALLOC(new_size * sizeof(PyMemoEntry));
530 if (self->mt_table == NULL) {
531 PyMem_FREE(oldtable);
532 PyErr_NoMemory();
533 return -1;
534 }
535 self->mt_allocated = new_size;
536 self->mt_mask = new_size - 1;
537 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
538
539 /* Copy entries from the old table. */
540 to_process = self->mt_used;
541 for (oldentry = oldtable; to_process > 0; oldentry++) {
542 if (oldentry->me_key != NULL) {
543 to_process--;
544 /* newentry is a pointer to a chunk of the new
545 mt_table, so we're setting the key:value pair
546 in-place. */
547 newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
548 newentry->me_key = oldentry->me_key;
549 newentry->me_value = oldentry->me_value;
550 }
551 }
552
553 /* Deallocate the old table. */
554 PyMem_FREE(oldtable);
555 return 0;
556}
557
558/* Returns NULL on failure, a pointer to the value otherwise. */
559static long *
560PyMemoTable_Get(PyMemoTable *self, PyObject *key)
561{
562 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
563 if (entry->me_key == NULL)
564 return NULL;
565 return &entry->me_value;
566}
567
568/* Returns -1 on failure, 0 on success. */
569static int
570PyMemoTable_Set(PyMemoTable *self, PyObject *key, long value)
571{
572 PyMemoEntry *entry;
573
574 assert(key != NULL);
575
576 entry = _PyMemoTable_Lookup(self, key);
577 if (entry->me_key != NULL) {
578 entry->me_value = value;
579 return 0;
580 }
581 Py_INCREF(key);
582 entry->me_key = key;
583 entry->me_value = value;
584 self->mt_used++;
585
586 /* If we added a key, we can safely resize. Otherwise just return!
587 * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
588 *
589 * Quadrupling the size improves average table sparseness
590 * (reducing collisions) at the cost of some memory. It also halves
591 * the number of expensive resize operations in a growing memo table.
592 *
593 * Very large memo tables (over 50K items) use doubling instead.
594 * This may help applications with severe memory constraints.
595 */
596 if (!(self->mt_used * 3 >= (self->mt_mask + 1) * 2))
597 return 0;
598 return _PyMemoTable_ResizeTable(self,
599 (self->mt_used > 50000 ? 2 : 4) * self->mt_used);
600}
601
602#undef MT_MINSIZE
603#undef PERTURB_SHIFT
604
605/*************************************************************************/
606
607/* Helpers for creating the argument tuple passed to functions. This has the
608 performance advantage of calling PyTuple_New() only once.
609
610 XXX(avassalotti): Inline directly in _Pickler_FastCall() and
611 _Unpickler_FastCall(). */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000612#define ARG_TUP(self, obj) do { \
613 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
614 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
615 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
616 } \
617 else { \
618 Py_DECREF((obj)); \
619 } \
620 } while (0)
621
622#define FREE_ARG_TUP(self) do { \
623 if ((self)->arg->ob_refcnt > 1) \
624 Py_CLEAR((self)->arg); \
625 } while (0)
626
627/* A temporary cleaner API for fast single argument function call.
628
629 XXX: Does caching the argument tuple provides any real performance benefits?
630
631 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
632 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
633 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
634 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
635 (i.e, call PyTuple_New() and store the returned value in an array), to save
636 one second (wall clock time). Either ways, the loading time a pickle stream
637 large enough to generate this number of calls would be massively
638 overwhelmed by other factors, like I/O throughput, the GC traversal and
639 object allocation overhead. So, I really doubt these functions provide any
640 real benefits.
641
642 On the other hand, oprofile reports that pickle spends a lot of time in
643 these functions. But, that is probably more related to the function call
644 overhead, than the argument tuple allocation.
645
646 XXX: And, what is the reference behavior of these? Steal, borrow? At first
647 glance, it seems to steal the reference of 'arg' and borrow the reference
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000648 of 'func'. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000649static PyObject *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000650_Pickler_FastCall(PicklerObject *self, PyObject *func, PyObject *arg)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000651{
652 PyObject *result = NULL;
653
654 ARG_TUP(self, arg);
655 if (self->arg) {
656 result = PyObject_Call(func, self->arg, NULL);
657 FREE_ARG_TUP(self);
658 }
659 return result;
660}
661
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000662static int
663_Pickler_ClearBuffer(PicklerObject *self)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000664{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000665 Py_CLEAR(self->output_buffer);
666 self->output_buffer =
667 PyBytes_FromStringAndSize(NULL, self->max_output_len);
668 if (self->output_buffer == NULL)
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000669 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000670 self->output_len = 0;
671 return 0;
672}
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000673
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000674static PyObject *
675_Pickler_GetString(PicklerObject *self)
676{
677 PyObject *output_buffer = self->output_buffer;
678
679 assert(self->output_buffer != NULL);
680 self->output_buffer = NULL;
681 /* Resize down to exact size */
682 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
683 return NULL;
684 return output_buffer;
685}
686
687static int
688_Pickler_FlushToFile(PicklerObject *self)
689{
690 PyObject *output, *result;
691
692 assert(self->write != NULL);
693
694 output = _Pickler_GetString(self);
695 if (output == NULL)
696 return -1;
697
698 result = _Pickler_FastCall(self, self->write, output);
699 Py_XDECREF(result);
700 return (result == NULL) ? -1 : 0;
701}
702
703static int
704_Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t n)
705{
706 Py_ssize_t i, required;
707 char *buffer;
708
709 assert(s != NULL);
710
711 required = self->output_len + n;
712 if (required > self->max_output_len) {
713 if (self->write != NULL && required > MAX_WRITE_BUF_SIZE) {
714 /* XXX This reallocates a new buffer every time, which is a bit
715 wasteful. */
716 if (_Pickler_FlushToFile(self) < 0)
717 return -1;
718 if (_Pickler_ClearBuffer(self) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000719 return -1;
720 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000721 if (self->write != NULL && n > MAX_WRITE_BUF_SIZE) {
722 /* we already flushed above, so the buffer is empty */
723 PyObject *result;
724 /* XXX we could spare an intermediate copy and pass
725 a memoryview instead */
726 PyObject *output = PyBytes_FromStringAndSize(s, n);
727 if (s == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000728 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000729 result = _Pickler_FastCall(self, self->write, output);
730 Py_XDECREF(result);
731 return (result == NULL) ? -1 : 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000732 }
733 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000734 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
735 PyErr_NoMemory();
736 return -1;
737 }
738 self->max_output_len = (self->output_len + n) * 2;
739 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
740 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000741 }
742 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000743 buffer = PyBytes_AS_STRING(self->output_buffer);
744 if (n < 8) {
745 /* This is faster than memcpy when the string is short. */
746 for (i = 0; i < n; i++) {
747 buffer[self->output_len + i] = s[i];
748 }
749 }
750 else {
751 memcpy(buffer + self->output_len, s, n);
752 }
753 self->output_len += n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000754 return n;
755}
756
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000757static PicklerObject *
758_Pickler_New(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000759{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000760 PicklerObject *self;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000761
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000762 self = PyObject_GC_New(PicklerObject, &Pickler_Type);
763 if (self == NULL)
764 return NULL;
765
766 self->pers_func = NULL;
767 self->arg = NULL;
768 self->write = NULL;
769 self->proto = 0;
770 self->bin = 0;
771 self->fast = 0;
772 self->fast_nesting = 0;
773 self->fix_imports = 0;
774 self->fast_memo = NULL;
775
776 self->memo = PyMemoTable_New();
777 if (self->memo == NULL) {
778 Py_DECREF(self);
779 return NULL;
780 }
781 self->max_output_len = WRITE_BUF_SIZE;
782 self->output_len = 0;
783 self->output_buffer = PyBytes_FromStringAndSize(NULL,
784 self->max_output_len);
785 if (self->output_buffer == NULL) {
786 Py_DECREF(self);
787 return NULL;
788 }
789 return self;
790}
791
792static int
793_Pickler_SetProtocol(PicklerObject *self, PyObject *proto_obj,
794 PyObject *fix_imports_obj)
795{
796 long proto = 0;
797 int fix_imports;
798
799 if (proto_obj == NULL || proto_obj == Py_None)
800 proto = DEFAULT_PROTOCOL;
801 else {
802 proto = PyLong_AsLong(proto_obj);
803 if (proto == -1 && PyErr_Occurred())
804 return -1;
805 }
806 if (proto < 0)
807 proto = HIGHEST_PROTOCOL;
808 if (proto > HIGHEST_PROTOCOL) {
809 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
810 HIGHEST_PROTOCOL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000811 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000812 }
813 fix_imports = PyObject_IsTrue(fix_imports_obj);
814 if (fix_imports == -1)
815 return -1;
816
817 self->proto = proto;
818 self->bin = proto > 0;
819 self->fix_imports = fix_imports && proto < 3;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000820
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000821 return 0;
822}
823
824/* Returns -1 (with an exception set) on failure, 0 on success. This may
825 be called once on a freshly created Pickler. */
826static int
827_Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
828{
829 assert(file != NULL);
830 self->write = PyObject_GetAttrString(file, "write");
831 if (self->write == NULL) {
832 if (PyErr_ExceptionMatches(PyExc_AttributeError))
833 PyErr_SetString(PyExc_TypeError,
834 "file must have a 'write' attribute");
835 return -1;
836 }
837
838 return 0;
839}
840
841/* See documentation for _Pickler_FastCall(). */
842static PyObject *
843_Unpickler_FastCall(UnpicklerObject *self, PyObject *func, PyObject *arg)
844{
845 PyObject *result = NULL;
846
847 ARG_TUP(self, arg);
848 if (self->arg) {
849 result = PyObject_Call(func, self->arg, NULL);
850 FREE_ARG_TUP(self);
851 }
852 return result;
853}
854
855/* Returns the size of the input on success, -1 on failure. This takes its
856 own reference to `input`. */
857static Py_ssize_t
858_Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
859{
860 if (self->buffer.buf != NULL)
861 PyBuffer_Release(&self->buffer);
862 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
863 return -1;
864 self->input_buffer = self->buffer.buf;
865 self->input_len = self->buffer.len;
866 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000867 self->prefetched_idx = self->input_len;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000868 return self->input_len;
869}
870
Antoine Pitrou04248a82010-10-12 20:51:21 +0000871static int
872_Unpickler_SkipConsumed(UnpicklerObject *self)
873{
874 Py_ssize_t consumed = self->next_read_idx - self->prefetched_idx;
875
876 if (consumed > 0) {
877 PyObject *r;
878 assert(self->peek); /* otherwise we did something wrong */
879 /* This makes an useless copy... */
880 r = PyObject_CallFunction(self->read, "n", consumed);
881 if (r == NULL)
882 return -1;
883 Py_DECREF(r);
884 self->prefetched_idx = self->next_read_idx;
885 }
886 return 0;
887}
888
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000889static const Py_ssize_t READ_WHOLE_LINE = -1;
890
891/* If reading from a file, we need to only pull the bytes we need, since there
892 may be multiple pickle objects arranged contiguously in the same input
893 buffer.
894
895 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
896 bytes from the input stream/buffer.
897
898 Update the unpickler's input buffer with the newly-read data. Returns -1 on
899 failure; on success, returns the number of bytes read from the file.
900
901 On success, self->input_len will be 0; this is intentional so that when
902 unpickling from a file, the "we've run out of data" code paths will trigger,
903 causing the Unpickler to go back to the file for more data. Use the returned
904 size to tell you how much data you can process. */
905static Py_ssize_t
906_Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
907{
908 PyObject *data;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000909 Py_ssize_t read_size, prefetched_size = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000910
911 assert(self->read != NULL);
Antoine Pitrou04248a82010-10-12 20:51:21 +0000912
913 if (_Unpickler_SkipConsumed(self) < 0)
914 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000915
916 if (n == READ_WHOLE_LINE)
917 data = PyObject_Call(self->readline, empty_tuple, NULL);
918 else {
919 PyObject *len = PyLong_FromSsize_t(n);
920 if (len == NULL)
921 return -1;
922 data = _Unpickler_FastCall(self, self->read, len);
923 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000924 if (data == NULL)
925 return -1;
926
Antoine Pitrou04248a82010-10-12 20:51:21 +0000927 /* Prefetch some data without advancing the file pointer, if possible */
928 if (self->peek) {
929 PyObject *len, *prefetched;
930 len = PyLong_FromSsize_t(PREFETCH);
931 if (len == NULL) {
932 Py_DECREF(data);
933 return -1;
934 }
935 prefetched = _Unpickler_FastCall(self, self->peek, len);
936 if (prefetched == NULL) {
937 if (PyErr_ExceptionMatches(PyExc_NotImplementedError)) {
938 /* peek() is probably not supported by the given file object */
939 PyErr_Clear();
940 Py_CLEAR(self->peek);
941 }
942 else {
943 Py_DECREF(data);
944 return -1;
945 }
946 }
947 else {
948 assert(PyBytes_Check(prefetched));
949 prefetched_size = PyBytes_GET_SIZE(prefetched);
950 PyBytes_ConcatAndDel(&data, prefetched);
951 if (data == NULL)
952 return -1;
953 }
954 }
955
956 read_size = _Unpickler_SetStringInput(self, data) - prefetched_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000957 Py_DECREF(data);
Antoine Pitrou04248a82010-10-12 20:51:21 +0000958 self->prefetched_idx = read_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000959 return read_size;
960}
961
962/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
963
964 This should be used for all data reads, rather than accessing the unpickler's
965 input buffer directly. This method deals correctly with reading from input
966 streams, which the input buffer doesn't deal with.
967
968 Note that when reading from a file-like object, self->next_read_idx won't
969 be updated (it should remain at 0 for the entire unpickling process). You
970 should use this function's return value to know how many bytes you can
971 consume.
972
973 Returns -1 (with an exception set) on failure. On success, return the
974 number of chars read. */
975static Py_ssize_t
976_Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n)
977{
Antoine Pitrou04248a82010-10-12 20:51:21 +0000978 Py_ssize_t num_read;
979
Antoine Pitrou04248a82010-10-12 20:51:21 +0000980 if (self->next_read_idx + n <= self->input_len) {
981 *s = self->input_buffer + self->next_read_idx;
982 self->next_read_idx += n;
983 return n;
984 }
985 if (!self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000986 PyErr_Format(PyExc_EOFError, "Ran out of input");
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000987 return -1;
988 }
Antoine Pitrou04248a82010-10-12 20:51:21 +0000989 num_read = _Unpickler_ReadFromFile(self, n);
990 if (num_read < 0)
991 return -1;
992 if (num_read < n) {
993 PyErr_Format(PyExc_EOFError, "Ran out of input");
994 return -1;
995 }
996 *s = self->input_buffer;
997 self->next_read_idx = n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000998 return n;
999}
1000
1001static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001002_Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1003 char **result)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001004{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001005 char *input_line = PyMem_Realloc(self->input_line, len + 1);
1006 if (input_line == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001007 return -1;
1008
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001009 memcpy(input_line, line, len);
1010 input_line[len] = '\0';
1011 self->input_line = input_line;
1012 *result = self->input_line;
1013 return len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001014}
1015
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001016/* Read a line from the input stream/buffer. If we run off the end of the input
1017 before hitting \n, return the data we found.
1018
1019 Returns the number of chars read, or -1 on failure. */
1020static Py_ssize_t
1021_Unpickler_Readline(UnpicklerObject *self, char **result)
1022{
1023 Py_ssize_t i, num_read;
1024
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001025 for (i = self->next_read_idx; i < self->input_len; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001026 if (self->input_buffer[i] == '\n') {
1027 char *line_start = self->input_buffer + self->next_read_idx;
1028 num_read = i - self->next_read_idx + 1;
1029 self->next_read_idx = i + 1;
1030 return _Unpickler_CopyLine(self, line_start, num_read, result);
1031 }
1032 }
1033 if (self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001034 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1035 if (num_read < 0)
1036 return -1;
1037 *result = self->input_buffer;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001038 self->next_read_idx = num_read;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001039 return num_read;
1040 }
1041
1042 /* If we get here, we've run off the end of the input string. Return the
1043 remaining string and let the caller figure it out. */
1044 *result = self->input_buffer + self->next_read_idx;
1045 num_read = i - self->next_read_idx;
1046 self->next_read_idx = i;
1047 return num_read;
1048}
1049
1050/* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1051 will be modified in place. */
1052static int
1053_Unpickler_ResizeMemoList(UnpicklerObject *self, Py_ssize_t new_size)
1054{
1055 Py_ssize_t i;
1056 PyObject **memo;
1057
1058 assert(new_size > self->memo_size);
1059
1060 memo = PyMem_REALLOC(self->memo, new_size * sizeof(PyObject *));
1061 if (memo == NULL) {
1062 PyErr_NoMemory();
1063 return -1;
1064 }
1065 self->memo = memo;
1066 for (i = self->memo_size; i < new_size; i++)
1067 self->memo[i] = NULL;
1068 self->memo_size = new_size;
1069 return 0;
1070}
1071
1072/* Returns NULL if idx is out of bounds. */
1073static PyObject *
1074_Unpickler_MemoGet(UnpicklerObject *self, Py_ssize_t idx)
1075{
1076 if (idx < 0 || idx >= self->memo_size)
1077 return NULL;
1078
1079 return self->memo[idx];
1080}
1081
1082/* Returns -1 (with an exception set) on failure, 0 on success.
1083 This takes its own reference to `value`. */
1084static int
1085_Unpickler_MemoPut(UnpicklerObject *self, Py_ssize_t idx, PyObject *value)
1086{
1087 PyObject *old_item;
1088
1089 if (idx >= self->memo_size) {
1090 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1091 return -1;
1092 assert(idx < self->memo_size);
1093 }
1094 Py_INCREF(value);
1095 old_item = self->memo[idx];
1096 self->memo[idx] = value;
1097 Py_XDECREF(old_item);
1098 return 0;
1099}
1100
1101static PyObject **
1102_Unpickler_NewMemo(Py_ssize_t new_size)
1103{
1104 PyObject **memo = PyMem_MALLOC(new_size * sizeof(PyObject *));
1105 if (memo == NULL)
1106 return NULL;
1107 memset(memo, 0, new_size * sizeof(PyObject *));
1108 return memo;
1109}
1110
1111/* Free the unpickler's memo, taking care to decref any items left in it. */
1112static void
1113_Unpickler_MemoCleanup(UnpicklerObject *self)
1114{
1115 Py_ssize_t i;
1116 PyObject **memo = self->memo;
1117
1118 if (self->memo == NULL)
1119 return;
1120 self->memo = NULL;
1121 i = self->memo_size;
1122 while (--i >= 0) {
1123 Py_XDECREF(memo[i]);
1124 }
1125 PyMem_FREE(memo);
1126}
1127
1128static UnpicklerObject *
1129_Unpickler_New(void)
1130{
1131 UnpicklerObject *self;
1132
1133 self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1134 if (self == NULL)
1135 return NULL;
1136
1137 self->stack = (Pdata *)Pdata_New();
1138 if (self->stack == NULL) {
1139 Py_DECREF(self);
1140 return NULL;
1141 }
1142 memset(&self->buffer, 0, sizeof(Py_buffer));
1143
1144 self->memo_size = 32;
1145 self->memo = _Unpickler_NewMemo(self->memo_size);
1146 if (self->memo == NULL) {
1147 Py_DECREF(self);
1148 return NULL;
1149 }
1150
1151 self->arg = NULL;
1152 self->pers_func = NULL;
1153 self->input_buffer = NULL;
1154 self->input_line = NULL;
1155 self->input_len = 0;
1156 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001157 self->prefetched_idx = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001158 self->read = NULL;
1159 self->readline = NULL;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001160 self->peek = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001161 self->encoding = NULL;
1162 self->errors = NULL;
1163 self->marks = NULL;
1164 self->num_marks = 0;
1165 self->marks_size = 0;
1166 self->proto = 0;
1167 self->fix_imports = 0;
1168
1169 return self;
1170}
1171
1172/* Returns -1 (with an exception set) on failure, 0 on success. This may
1173 be called once on a freshly created Pickler. */
1174static int
1175_Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1176{
Antoine Pitrou04248a82010-10-12 20:51:21 +00001177 self->peek = PyObject_GetAttrString(file, "peek");
1178 if (self->peek == NULL) {
1179 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1180 PyErr_Clear();
1181 else
1182 return -1;
1183 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001184 self->read = PyObject_GetAttrString(file, "read");
1185 self->readline = PyObject_GetAttrString(file, "readline");
1186 if (self->readline == NULL || self->read == NULL) {
1187 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1188 PyErr_SetString(PyExc_TypeError,
1189 "file must have 'read' and 'readline' attributes");
1190 Py_CLEAR(self->read);
1191 Py_CLEAR(self->readline);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001192 Py_CLEAR(self->peek);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001193 return -1;
1194 }
1195 return 0;
1196}
1197
1198/* Returns -1 (with an exception set) on failure, 0 on success. This may
1199 be called once on a freshly created Pickler. */
1200static int
1201_Unpickler_SetInputEncoding(UnpicklerObject *self,
1202 const char *encoding,
1203 const char *errors)
1204{
1205 if (encoding == NULL)
1206 encoding = "ASCII";
1207 if (errors == NULL)
1208 errors = "strict";
1209
1210 self->encoding = strdup(encoding);
1211 self->errors = strdup(errors);
1212 if (self->encoding == NULL || self->errors == NULL) {
1213 PyErr_NoMemory();
1214 return -1;
1215 }
1216 return 0;
1217}
1218
1219/* Generate a GET opcode for an object stored in the memo. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001220static int
1221memo_get(PicklerObject *self, PyObject *key)
1222{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001223 long *value;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001224 char pdata[30];
1225 int len;
1226
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001227 value = PyMemoTable_Get(self->memo, key);
1228 if (value == NULL) {
1229 PyErr_SetObject(PyExc_KeyError, key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001230 return -1;
1231 }
1232
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001233 if (!self->bin) {
1234 pdata[0] = GET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001235 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", *value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001236 len = (int)strlen(pdata);
1237 }
1238 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001239 if (*value < 256) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001240 pdata[0] = BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001241 pdata[1] = (unsigned char)(*value & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001242 len = 2;
1243 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001244 else if (*value <= 0xffffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001245 pdata[0] = LONG_BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001246 pdata[1] = (unsigned char)(*value & 0xff);
1247 pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1248 pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1249 pdata[4] = (unsigned char)((*value >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001250 len = 5;
1251 }
1252 else { /* unlikely */
1253 PyErr_SetString(PicklingError,
1254 "memo id too large for LONG_BINGET");
1255 return -1;
1256 }
1257 }
1258
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001259 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001260 return -1;
1261
1262 return 0;
1263}
1264
1265/* Store an object in the memo, assign it a new unique ID based on the number
1266 of objects currently stored in the memo and generate a PUT opcode. */
1267static int
1268memo_put(PicklerObject *self, PyObject *obj)
1269{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001270 long x;
1271 char pdata[30];
1272 int len;
1273 int status = 0;
1274
1275 if (self->fast)
1276 return 0;
1277
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001278 x = PyMemoTable_Size(self->memo);
1279 if (PyMemoTable_Set(self->memo, obj, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001280 goto error;
1281
1282 if (!self->bin) {
1283 pdata[0] = PUT;
1284 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
1285 len = strlen(pdata);
1286 }
1287 else {
1288 if (x < 256) {
1289 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00001290 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001291 len = 2;
1292 }
1293 else if (x <= 0xffffffffL) {
1294 pdata[0] = LONG_BINPUT;
1295 pdata[1] = (unsigned char)(x & 0xff);
1296 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1297 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1298 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1299 len = 5;
1300 }
1301 else { /* unlikely */
1302 PyErr_SetString(PicklingError,
1303 "memo id too large for LONG_BINPUT");
1304 return -1;
1305 }
1306 }
1307
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001308 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001309 goto error;
1310
1311 if (0) {
1312 error:
1313 status = -1;
1314 }
1315
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001316 return status;
1317}
1318
1319static PyObject *
1320whichmodule(PyObject *global, PyObject *global_name)
1321{
1322 Py_ssize_t i, j;
1323 static PyObject *module_str = NULL;
1324 static PyObject *main_str = NULL;
1325 PyObject *module_name;
1326 PyObject *modules_dict;
1327 PyObject *module;
1328 PyObject *obj;
1329
1330 if (module_str == NULL) {
1331 module_str = PyUnicode_InternFromString("__module__");
1332 if (module_str == NULL)
1333 return NULL;
1334 main_str = PyUnicode_InternFromString("__main__");
1335 if (main_str == NULL)
1336 return NULL;
1337 }
1338
1339 module_name = PyObject_GetAttr(global, module_str);
1340
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00001341 /* In some rare cases (e.g., bound methods of extension types),
1342 __module__ can be None. If it is so, then search sys.modules
1343 for the module of global. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001344 if (module_name == Py_None) {
1345 Py_DECREF(module_name);
1346 goto search;
1347 }
1348
1349 if (module_name) {
1350 return module_name;
1351 }
1352 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1353 PyErr_Clear();
1354 else
1355 return NULL;
1356
1357 search:
1358 modules_dict = PySys_GetObject("modules");
1359 if (modules_dict == NULL)
1360 return NULL;
1361
1362 i = 0;
1363 module_name = NULL;
1364 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
Mark Dickinson211c6252009-02-01 10:28:51 +00001365 if (PyObject_RichCompareBool(module_name, main_str, Py_EQ) == 1)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001366 continue;
1367
1368 obj = PyObject_GetAttr(module, global_name);
1369 if (obj == NULL) {
1370 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1371 PyErr_Clear();
1372 else
1373 return NULL;
1374 continue;
1375 }
1376
1377 if (obj != global) {
1378 Py_DECREF(obj);
1379 continue;
1380 }
1381
1382 Py_DECREF(obj);
1383 break;
1384 }
1385
1386 /* If no module is found, use __main__. */
1387 if (!j) {
1388 module_name = main_str;
1389 }
1390
1391 Py_INCREF(module_name);
1392 return module_name;
1393}
1394
1395/* fast_save_enter() and fast_save_leave() are guards against recursive
1396 objects when Pickler is used with the "fast mode" (i.e., with object
1397 memoization disabled). If the nesting of a list or dict object exceed
1398 FAST_NESTING_LIMIT, these guards will start keeping an internal
1399 reference to the seen list or dict objects and check whether these objects
1400 are recursive. These are not strictly necessary, since save() has a
1401 hard-coded recursion limit, but they give a nicer error message than the
1402 typical RuntimeError. */
1403static int
1404fast_save_enter(PicklerObject *self, PyObject *obj)
1405{
1406 /* if fast_nesting < 0, we're doing an error exit. */
1407 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1408 PyObject *key = NULL;
1409 if (self->fast_memo == NULL) {
1410 self->fast_memo = PyDict_New();
1411 if (self->fast_memo == NULL) {
1412 self->fast_nesting = -1;
1413 return 0;
1414 }
1415 }
1416 key = PyLong_FromVoidPtr(obj);
1417 if (key == NULL)
1418 return 0;
1419 if (PyDict_GetItem(self->fast_memo, key)) {
1420 Py_DECREF(key);
1421 PyErr_Format(PyExc_ValueError,
1422 "fast mode: can't pickle cyclic objects "
1423 "including object type %.200s at %p",
1424 obj->ob_type->tp_name, obj);
1425 self->fast_nesting = -1;
1426 return 0;
1427 }
1428 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
1429 Py_DECREF(key);
1430 self->fast_nesting = -1;
1431 return 0;
1432 }
1433 Py_DECREF(key);
1434 }
1435 return 1;
1436}
1437
1438static int
1439fast_save_leave(PicklerObject *self, PyObject *obj)
1440{
1441 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
1442 PyObject *key = PyLong_FromVoidPtr(obj);
1443 if (key == NULL)
1444 return 0;
1445 if (PyDict_DelItem(self->fast_memo, key) < 0) {
1446 Py_DECREF(key);
1447 return 0;
1448 }
1449 Py_DECREF(key);
1450 }
1451 return 1;
1452}
1453
1454static int
1455save_none(PicklerObject *self, PyObject *obj)
1456{
1457 const char none_op = NONE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001458 if (_Pickler_Write(self, &none_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001459 return -1;
1460
1461 return 0;
1462}
1463
1464static int
1465save_bool(PicklerObject *self, PyObject *obj)
1466{
1467 static const char *buf[2] = { FALSE, TRUE };
1468 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
1469 int p = (obj == Py_True);
1470
1471 if (self->proto >= 2) {
1472 const char bool_op = p ? NEWTRUE : NEWFALSE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001473 if (_Pickler_Write(self, &bool_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001474 return -1;
1475 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001476 else if (_Pickler_Write(self, buf[p], len[p]) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001477 return -1;
1478
1479 return 0;
1480}
1481
1482static int
1483save_int(PicklerObject *self, long x)
1484{
1485 char pdata[32];
1486 int len = 0;
1487
1488 if (!self->bin
1489#if SIZEOF_LONG > 4
1490 || x > 0x7fffffffL || x < -0x80000000L
1491#endif
1492 ) {
1493 /* Text-mode pickle, or long too big to fit in the 4-byte
1494 * signed BININT format: store as a string.
1495 */
Mark Dickinson8dd05142009-01-20 20:43:58 +00001496 pdata[0] = LONG; /* use LONG for consistency with pickle.py */
1497 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ldL\n", x);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001498 if (_Pickler_Write(self, pdata, strlen(pdata)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001499 return -1;
1500 }
1501 else {
1502 /* Binary pickle and x fits in a signed 4-byte int. */
1503 pdata[1] = (unsigned char)(x & 0xff);
1504 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1505 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1506 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1507
1508 if ((pdata[4] == 0) && (pdata[3] == 0)) {
1509 if (pdata[2] == 0) {
1510 pdata[0] = BININT1;
1511 len = 2;
1512 }
1513 else {
1514 pdata[0] = BININT2;
1515 len = 3;
1516 }
1517 }
1518 else {
1519 pdata[0] = BININT;
1520 len = 5;
1521 }
1522
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001523 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001524 return -1;
1525 }
1526
1527 return 0;
1528}
1529
1530static int
1531save_long(PicklerObject *self, PyObject *obj)
1532{
1533 PyObject *repr = NULL;
1534 Py_ssize_t size;
1535 long val = PyLong_AsLong(obj);
1536 int status = 0;
1537
1538 const char long_op = LONG;
1539
1540 if (val == -1 && PyErr_Occurred()) {
1541 /* out of range for int pickling */
1542 PyErr_Clear();
1543 }
1544 else
1545 return save_int(self, val);
1546
1547 if (self->proto >= 2) {
1548 /* Linear-time pickling. */
1549 size_t nbits;
1550 size_t nbytes;
1551 unsigned char *pdata;
1552 char header[5];
1553 int i;
1554 int sign = _PyLong_Sign(obj);
1555
1556 if (sign == 0) {
1557 header[0] = LONG1;
1558 header[1] = 0; /* It's 0 -- an empty bytestring. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001559 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001560 goto error;
1561 return 0;
1562 }
1563 nbits = _PyLong_NumBits(obj);
1564 if (nbits == (size_t)-1 && PyErr_Occurred())
1565 goto error;
1566 /* How many bytes do we need? There are nbits >> 3 full
1567 * bytes of data, and nbits & 7 leftover bits. If there
1568 * are any leftover bits, then we clearly need another
1569 * byte. Wnat's not so obvious is that we *probably*
1570 * need another byte even if there aren't any leftovers:
1571 * the most-significant bit of the most-significant byte
1572 * acts like a sign bit, and it's usually got a sense
1573 * opposite of the one we need. The exception is longs
1574 * of the form -(2**(8*j-1)) for j > 0. Such a long is
1575 * its own 256's-complement, so has the right sign bit
1576 * even without the extra byte. That's a pain to check
1577 * for in advance, though, so we always grab an extra
1578 * byte at the start, and cut it back later if possible.
1579 */
1580 nbytes = (nbits >> 3) + 1;
1581 if (nbytes > INT_MAX) {
1582 PyErr_SetString(PyExc_OverflowError,
1583 "long too large to pickle");
1584 goto error;
1585 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001586 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001587 if (repr == NULL)
1588 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001589 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001590 i = _PyLong_AsByteArray((PyLongObject *)obj,
1591 pdata, nbytes,
1592 1 /* little endian */ , 1 /* signed */ );
1593 if (i < 0)
1594 goto error;
1595 /* If the long is negative, this may be a byte more than
1596 * needed. This is so iff the MSB is all redundant sign
1597 * bits.
1598 */
1599 if (sign < 0 &&
1600 nbytes > 1 &&
1601 pdata[nbytes - 1] == 0xff &&
1602 (pdata[nbytes - 2] & 0x80) != 0) {
1603 nbytes--;
1604 }
1605
1606 if (nbytes < 256) {
1607 header[0] = LONG1;
1608 header[1] = (unsigned char)nbytes;
1609 size = 2;
1610 }
1611 else {
1612 header[0] = LONG4;
1613 size = (int)nbytes;
1614 for (i = 1; i < 5; i++) {
1615 header[i] = (unsigned char)(size & 0xff);
1616 size >>= 8;
1617 }
1618 size = 5;
1619 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001620 if (_Pickler_Write(self, header, size) < 0 ||
1621 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001622 goto error;
1623 }
1624 else {
1625 char *string;
1626
Mark Dickinson8dd05142009-01-20 20:43:58 +00001627 /* proto < 2: write the repr and newline. This is quadratic-time (in
1628 the number of digits), in both directions. We add a trailing 'L'
1629 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001630
1631 repr = PyObject_Repr(obj);
1632 if (repr == NULL)
1633 goto error;
1634
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001635 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001636 if (string == NULL)
1637 goto error;
1638
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001639 if (_Pickler_Write(self, &long_op, 1) < 0 ||
1640 _Pickler_Write(self, string, size) < 0 ||
1641 _Pickler_Write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001642 goto error;
1643 }
1644
1645 if (0) {
1646 error:
1647 status = -1;
1648 }
1649 Py_XDECREF(repr);
1650
1651 return status;
1652}
1653
1654static int
1655save_float(PicklerObject *self, PyObject *obj)
1656{
1657 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1658
1659 if (self->bin) {
1660 char pdata[9];
1661 pdata[0] = BINFLOAT;
1662 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1663 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001664 if (_Pickler_Write(self, pdata, 9) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001665 return -1;
Eric Smith0923d1d2009-04-16 20:16:10 +00001666 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001667 else {
Eric Smith0923d1d2009-04-16 20:16:10 +00001668 int result = -1;
1669 char *buf = NULL;
1670 char op = FLOAT;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001671
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001672 if (_Pickler_Write(self, &op, 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001673 goto done;
1674
Mark Dickinson3e09f432009-04-17 08:41:23 +00001675 buf = PyOS_double_to_string(x, 'g', 17, 0, NULL);
Eric Smith0923d1d2009-04-16 20:16:10 +00001676 if (!buf) {
1677 PyErr_NoMemory();
1678 goto done;
1679 }
1680
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001681 if (_Pickler_Write(self, buf, strlen(buf)) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001682 goto done;
1683
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001684 if (_Pickler_Write(self, "\n", 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001685 goto done;
1686
1687 result = 0;
1688done:
1689 PyMem_Free(buf);
1690 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001691 }
1692
1693 return 0;
1694}
1695
1696static int
1697save_bytes(PicklerObject *self, PyObject *obj)
1698{
1699 if (self->proto < 3) {
1700 /* Older pickle protocols do not have an opcode for pickling bytes
1701 objects. Therefore, we need to fake the copy protocol (i.e.,
1702 the __reduce__ method) to permit bytes object unpickling. */
1703 PyObject *reduce_value = NULL;
1704 PyObject *bytelist = NULL;
1705 int status;
1706
1707 bytelist = PySequence_List(obj);
1708 if (bytelist == NULL)
1709 return -1;
1710
1711 reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
1712 bytelist);
1713 if (reduce_value == NULL) {
1714 Py_DECREF(bytelist);
1715 return -1;
1716 }
1717
1718 /* save_reduce() will memoize the object automatically. */
1719 status = save_reduce(self, reduce_value, obj);
1720 Py_DECREF(reduce_value);
1721 Py_DECREF(bytelist);
1722 return status;
1723 }
1724 else {
1725 Py_ssize_t size;
1726 char header[5];
1727 int len;
1728
1729 size = PyBytes_Size(obj);
1730 if (size < 0)
1731 return -1;
1732
1733 if (size < 256) {
1734 header[0] = SHORT_BINBYTES;
1735 header[1] = (unsigned char)size;
1736 len = 2;
1737 }
1738 else if (size <= 0xffffffffL) {
1739 header[0] = BINBYTES;
1740 header[1] = (unsigned char)(size & 0xff);
1741 header[2] = (unsigned char)((size >> 8) & 0xff);
1742 header[3] = (unsigned char)((size >> 16) & 0xff);
1743 header[4] = (unsigned char)((size >> 24) & 0xff);
1744 len = 5;
1745 }
1746 else {
1747 return -1; /* string too large */
1748 }
1749
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001750 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001751 return -1;
1752
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001753 if (_Pickler_Write(self, PyBytes_AS_STRING(obj), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001754 return -1;
1755
1756 if (memo_put(self, obj) < 0)
1757 return -1;
1758
1759 return 0;
1760 }
1761}
1762
1763/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1764 backslash and newline characters to \uXXXX escapes. */
1765static PyObject *
1766raw_unicode_escape(const Py_UNICODE *s, Py_ssize_t size)
1767{
1768 PyObject *repr, *result;
1769 char *p;
1770 char *q;
1771
1772 static const char *hexdigits = "0123456789abcdef";
1773
1774#ifdef Py_UNICODE_WIDE
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001775 const Py_ssize_t expandsize = 10;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001776#else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001777 const Py_ssize_t expandsize = 6;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001778#endif
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001779
1780 if (size > PY_SSIZE_T_MAX / expandsize)
1781 return PyErr_NoMemory();
1782
1783 repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001784 if (repr == NULL)
1785 return NULL;
1786 if (size == 0)
1787 goto done;
1788
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001789 p = q = PyByteArray_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001790 while (size-- > 0) {
1791 Py_UNICODE ch = *s++;
1792#ifdef Py_UNICODE_WIDE
1793 /* Map 32-bit characters to '\Uxxxxxxxx' */
1794 if (ch >= 0x10000) {
1795 *p++ = '\\';
1796 *p++ = 'U';
1797 *p++ = hexdigits[(ch >> 28) & 0xf];
1798 *p++ = hexdigits[(ch >> 24) & 0xf];
1799 *p++ = hexdigits[(ch >> 20) & 0xf];
1800 *p++ = hexdigits[(ch >> 16) & 0xf];
1801 *p++ = hexdigits[(ch >> 12) & 0xf];
1802 *p++ = hexdigits[(ch >> 8) & 0xf];
1803 *p++ = hexdigits[(ch >> 4) & 0xf];
1804 *p++ = hexdigits[ch & 15];
1805 }
1806 else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001807#else
1808 /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
1809 if (ch >= 0xD800 && ch < 0xDC00) {
1810 Py_UNICODE ch2;
1811 Py_UCS4 ucs;
1812
1813 ch2 = *s++;
1814 size--;
1815 if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
1816 ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
1817 *p++ = '\\';
1818 *p++ = 'U';
1819 *p++ = hexdigits[(ucs >> 28) & 0xf];
1820 *p++ = hexdigits[(ucs >> 24) & 0xf];
1821 *p++ = hexdigits[(ucs >> 20) & 0xf];
1822 *p++ = hexdigits[(ucs >> 16) & 0xf];
1823 *p++ = hexdigits[(ucs >> 12) & 0xf];
1824 *p++ = hexdigits[(ucs >> 8) & 0xf];
1825 *p++ = hexdigits[(ucs >> 4) & 0xf];
1826 *p++ = hexdigits[ucs & 0xf];
1827 continue;
1828 }
1829 /* Fall through: isolated surrogates are copied as-is */
1830 s--;
1831 size++;
1832 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001833#endif
1834 /* Map 16-bit characters to '\uxxxx' */
1835 if (ch >= 256 || ch == '\\' || ch == '\n') {
1836 *p++ = '\\';
1837 *p++ = 'u';
1838 *p++ = hexdigits[(ch >> 12) & 0xf];
1839 *p++ = hexdigits[(ch >> 8) & 0xf];
1840 *p++ = hexdigits[(ch >> 4) & 0xf];
1841 *p++ = hexdigits[ch & 15];
1842 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001843 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001844 else
1845 *p++ = (char) ch;
1846 }
1847 size = p - q;
1848
1849 done:
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001850 result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001851 Py_DECREF(repr);
1852 return result;
1853}
1854
1855static int
1856save_unicode(PicklerObject *self, PyObject *obj)
1857{
1858 Py_ssize_t size;
1859 PyObject *encoded = NULL;
1860
1861 if (self->bin) {
1862 char pdata[5];
1863
Victor Stinner485fb562010-04-13 11:07:24 +00001864 encoded = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(obj),
1865 PyUnicode_GET_SIZE(obj),
1866 "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001867 if (encoded == NULL)
1868 goto error;
1869
1870 size = PyBytes_GET_SIZE(encoded);
1871 if (size < 0 || size > 0xffffffffL)
1872 goto error; /* string too large */
1873
1874 pdata[0] = BINUNICODE;
1875 pdata[1] = (unsigned char)(size & 0xff);
1876 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1877 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1878 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1879
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001880 if (_Pickler_Write(self, pdata, 5) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001881 goto error;
1882
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001883 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001884 goto error;
1885 }
1886 else {
1887 const char unicode_op = UNICODE;
1888
1889 encoded = raw_unicode_escape(PyUnicode_AS_UNICODE(obj),
1890 PyUnicode_GET_SIZE(obj));
1891 if (encoded == NULL)
1892 goto error;
1893
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001894 if (_Pickler_Write(self, &unicode_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001895 goto error;
1896
1897 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001898 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001899 goto error;
1900
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001901 if (_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001902 goto error;
1903 }
1904 if (memo_put(self, obj) < 0)
1905 goto error;
1906
1907 Py_DECREF(encoded);
1908 return 0;
1909
1910 error:
1911 Py_XDECREF(encoded);
1912 return -1;
1913}
1914
1915/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1916static int
1917store_tuple_elements(PicklerObject *self, PyObject *t, int len)
1918{
1919 int i;
1920
1921 assert(PyTuple_Size(t) == len);
1922
1923 for (i = 0; i < len; i++) {
1924 PyObject *element = PyTuple_GET_ITEM(t, i);
1925
1926 if (element == NULL)
1927 return -1;
1928 if (save(self, element, 0) < 0)
1929 return -1;
1930 }
1931
1932 return 0;
1933}
1934
1935/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1936 * used across protocols to minimize the space needed to pickle them.
1937 * Tuples are also the only builtin immutable type that can be recursive
1938 * (a tuple can be reached from itself), and that requires some subtle
1939 * magic so that it works in all cases. IOW, this is a long routine.
1940 */
1941static int
1942save_tuple(PicklerObject *self, PyObject *obj)
1943{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001944 int len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001945
1946 const char mark_op = MARK;
1947 const char tuple_op = TUPLE;
1948 const char pop_op = POP;
1949 const char pop_mark_op = POP_MARK;
1950 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
1951
1952 if ((len = PyTuple_Size(obj)) < 0)
1953 return -1;
1954
1955 if (len == 0) {
1956 char pdata[2];
1957
1958 if (self->proto) {
1959 pdata[0] = EMPTY_TUPLE;
1960 len = 1;
1961 }
1962 else {
1963 pdata[0] = MARK;
1964 pdata[1] = TUPLE;
1965 len = 2;
1966 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001967 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001968 return -1;
1969 return 0;
1970 }
1971
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001972 /* The tuple isn't in the memo now. If it shows up there after
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001973 * saving the tuple elements, the tuple must be recursive, in
1974 * which case we'll pop everything we put on the stack, and fetch
1975 * its value from the memo.
1976 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001977 if (len <= 3 && self->proto >= 2) {
1978 /* Use TUPLE{1,2,3} opcodes. */
1979 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001980 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001981
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001982 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001983 /* pop the len elements */
1984 for (i = 0; i < len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001985 if (_Pickler_Write(self, &pop_op, 1) < 0)
1986 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001987 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001988 if (memo_get(self, obj) < 0)
1989 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001990
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001991 return 0;
1992 }
1993 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001994 if (_Pickler_Write(self, len2opcode + len, 1) < 0)
1995 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001996 }
1997 goto memoize;
1998 }
1999
2000 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2001 * Generate MARK e1 e2 ... TUPLE
2002 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002003 if (_Pickler_Write(self, &mark_op, 1) < 0)
2004 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002005
2006 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002007 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002008
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002009 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002010 /* pop the stack stuff we pushed */
2011 if (self->bin) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002012 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2013 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002014 }
2015 else {
2016 /* Note that we pop one more than len, to remove
2017 * the MARK too.
2018 */
2019 for (i = 0; i <= len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002020 if (_Pickler_Write(self, &pop_op, 1) < 0)
2021 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002022 }
2023 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002024 if (memo_get(self, obj) < 0)
2025 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002026
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002027 return 0;
2028 }
2029 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002030 if (_Pickler_Write(self, &tuple_op, 1) < 0)
2031 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002032 }
2033
2034 memoize:
2035 if (memo_put(self, obj) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002036 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002037
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002038 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002039}
2040
2041/* iter is an iterator giving items, and we batch up chunks of
2042 * MARK item item ... item APPENDS
2043 * opcode sequences. Calling code should have arranged to first create an
2044 * empty list, or list-like object, for the APPENDS to operate on.
2045 * Returns 0 on success, <0 on error.
2046 */
2047static int
2048batch_list(PicklerObject *self, PyObject *iter)
2049{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002050 PyObject *obj = NULL;
2051 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002052 int i, n;
2053
2054 const char mark_op = MARK;
2055 const char append_op = APPEND;
2056 const char appends_op = APPENDS;
2057
2058 assert(iter != NULL);
2059
2060 /* XXX: I think this function could be made faster by avoiding the
2061 iterator interface and fetching objects directly from list using
2062 PyList_GET_ITEM.
2063 */
2064
2065 if (self->proto == 0) {
2066 /* APPENDS isn't available; do one at a time. */
2067 for (;;) {
2068 obj = PyIter_Next(iter);
2069 if (obj == NULL) {
2070 if (PyErr_Occurred())
2071 return -1;
2072 break;
2073 }
2074 i = save(self, obj, 0);
2075 Py_DECREF(obj);
2076 if (i < 0)
2077 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002078 if (_Pickler_Write(self, &append_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002079 return -1;
2080 }
2081 return 0;
2082 }
2083
2084 /* proto > 0: write in batches of BATCHSIZE. */
2085 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002086 /* Get first item */
2087 firstitem = PyIter_Next(iter);
2088 if (firstitem == NULL) {
2089 if (PyErr_Occurred())
2090 goto error;
2091
2092 /* nothing more to add */
2093 break;
2094 }
2095
2096 /* Try to get a second item */
2097 obj = PyIter_Next(iter);
2098 if (obj == NULL) {
2099 if (PyErr_Occurred())
2100 goto error;
2101
2102 /* Only one item to write */
2103 if (save(self, firstitem, 0) < 0)
2104 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002105 if (_Pickler_Write(self, &append_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002106 goto error;
2107 Py_CLEAR(firstitem);
2108 break;
2109 }
2110
2111 /* More than one item to write */
2112
2113 /* Pump out MARK, items, APPENDS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002114 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002115 goto error;
2116
2117 if (save(self, firstitem, 0) < 0)
2118 goto error;
2119 Py_CLEAR(firstitem);
2120 n = 1;
2121
2122 /* Fetch and save up to BATCHSIZE items */
2123 while (obj) {
2124 if (save(self, obj, 0) < 0)
2125 goto error;
2126 Py_CLEAR(obj);
2127 n += 1;
2128
2129 if (n == BATCHSIZE)
2130 break;
2131
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002132 obj = PyIter_Next(iter);
2133 if (obj == NULL) {
2134 if (PyErr_Occurred())
2135 goto error;
2136 break;
2137 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002138 }
2139
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002140 if (_Pickler_Write(self, &appends_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002141 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002142
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002143 } while (n == BATCHSIZE);
2144 return 0;
2145
2146 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002147 Py_XDECREF(firstitem);
2148 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002149 return -1;
2150}
2151
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002152/* This is a variant of batch_list() above, specialized for lists (with no
2153 * support for list subclasses). Like batch_list(), we batch up chunks of
2154 * MARK item item ... item APPENDS
2155 * opcode sequences. Calling code should have arranged to first create an
2156 * empty list, or list-like object, for the APPENDS to operate on.
2157 * Returns 0 on success, -1 on error.
2158 *
2159 * This version is considerably faster than batch_list(), if less general.
2160 *
2161 * Note that this only works for protocols > 0.
2162 */
2163static int
2164batch_list_exact(PicklerObject *self, PyObject *obj)
2165{
2166 PyObject *item = NULL;
2167 int this_batch, total;
2168
2169 const char append_op = APPEND;
2170 const char appends_op = APPENDS;
2171 const char mark_op = MARK;
2172
2173 assert(obj != NULL);
2174 assert(self->proto > 0);
2175 assert(PyList_CheckExact(obj));
2176
2177 if (PyList_GET_SIZE(obj) == 1) {
2178 item = PyList_GET_ITEM(obj, 0);
2179 if (save(self, item, 0) < 0)
2180 return -1;
2181 if (_Pickler_Write(self, &append_op, 1) < 0)
2182 return -1;
2183 return 0;
2184 }
2185
2186 /* Write in batches of BATCHSIZE. */
2187 total = 0;
2188 do {
2189 this_batch = 0;
2190 if (_Pickler_Write(self, &mark_op, 1) < 0)
2191 return -1;
2192 while (total < PyList_GET_SIZE(obj)) {
2193 item = PyList_GET_ITEM(obj, total);
2194 if (save(self, item, 0) < 0)
2195 return -1;
2196 total++;
2197 if (++this_batch == BATCHSIZE)
2198 break;
2199 }
2200 if (_Pickler_Write(self, &appends_op, 1) < 0)
2201 return -1;
2202
2203 } while (total < PyList_GET_SIZE(obj));
2204
2205 return 0;
2206}
2207
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002208static int
2209save_list(PicklerObject *self, PyObject *obj)
2210{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002211 char header[3];
2212 int len;
2213 int status = 0;
2214
2215 if (self->fast && !fast_save_enter(self, obj))
2216 goto error;
2217
2218 /* Create an empty list. */
2219 if (self->bin) {
2220 header[0] = EMPTY_LIST;
2221 len = 1;
2222 }
2223 else {
2224 header[0] = MARK;
2225 header[1] = LIST;
2226 len = 2;
2227 }
2228
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002229 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002230 goto error;
2231
2232 /* Get list length, and bow out early if empty. */
2233 if ((len = PyList_Size(obj)) < 0)
2234 goto error;
2235
2236 if (memo_put(self, obj) < 0)
2237 goto error;
2238
2239 if (len != 0) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002240 /* Materialize the list elements. */
2241 if (PyList_CheckExact(obj) && self->proto > 0) {
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002242 if (Py_EnterRecursiveCall(" while pickling an object"))
2243 goto error;
2244 status = batch_list_exact(self, obj);
2245 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002246 } else {
2247 PyObject *iter = PyObject_GetIter(obj);
2248 if (iter == NULL)
2249 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002250
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002251 if (Py_EnterRecursiveCall(" while pickling an object")) {
2252 Py_DECREF(iter);
2253 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002254 }
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002255 status = batch_list(self, iter);
2256 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002257 Py_DECREF(iter);
2258 }
2259 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002260 if (0) {
2261 error:
2262 status = -1;
2263 }
2264
2265 if (self->fast && !fast_save_leave(self, obj))
2266 status = -1;
2267
2268 return status;
2269}
2270
2271/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
2272 * MARK key value ... key value SETITEMS
2273 * opcode sequences. Calling code should have arranged to first create an
2274 * empty dict, or dict-like object, for the SETITEMS to operate on.
2275 * Returns 0 on success, <0 on error.
2276 *
2277 * This is very much like batch_list(). The difference between saving
2278 * elements directly, and picking apart two-tuples, is so long-winded at
2279 * the C level, though, that attempts to combine these routines were too
2280 * ugly to bear.
2281 */
2282static int
2283batch_dict(PicklerObject *self, PyObject *iter)
2284{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002285 PyObject *obj = NULL;
2286 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002287 int i, n;
2288
2289 const char mark_op = MARK;
2290 const char setitem_op = SETITEM;
2291 const char setitems_op = SETITEMS;
2292
2293 assert(iter != NULL);
2294
2295 if (self->proto == 0) {
2296 /* SETITEMS isn't available; do one at a time. */
2297 for (;;) {
2298 obj = PyIter_Next(iter);
2299 if (obj == NULL) {
2300 if (PyErr_Occurred())
2301 return -1;
2302 break;
2303 }
2304 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2305 PyErr_SetString(PyExc_TypeError, "dict items "
2306 "iterator must return 2-tuples");
2307 return -1;
2308 }
2309 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
2310 if (i >= 0)
2311 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
2312 Py_DECREF(obj);
2313 if (i < 0)
2314 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002315 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002316 return -1;
2317 }
2318 return 0;
2319 }
2320
2321 /* proto > 0: write in batches of BATCHSIZE. */
2322 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002323 /* Get first item */
2324 firstitem = PyIter_Next(iter);
2325 if (firstitem == NULL) {
2326 if (PyErr_Occurred())
2327 goto error;
2328
2329 /* nothing more to add */
2330 break;
2331 }
2332 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
2333 PyErr_SetString(PyExc_TypeError, "dict items "
2334 "iterator must return 2-tuples");
2335 goto error;
2336 }
2337
2338 /* Try to get a second item */
2339 obj = PyIter_Next(iter);
2340 if (obj == NULL) {
2341 if (PyErr_Occurred())
2342 goto error;
2343
2344 /* Only one item to write */
2345 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2346 goto error;
2347 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2348 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002349 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002350 goto error;
2351 Py_CLEAR(firstitem);
2352 break;
2353 }
2354
2355 /* More than one item to write */
2356
2357 /* Pump out MARK, items, SETITEMS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002358 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002359 goto error;
2360
2361 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2362 goto error;
2363 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2364 goto error;
2365 Py_CLEAR(firstitem);
2366 n = 1;
2367
2368 /* Fetch and save up to BATCHSIZE items */
2369 while (obj) {
2370 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2371 PyErr_SetString(PyExc_TypeError, "dict items "
2372 "iterator must return 2-tuples");
2373 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002374 }
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002375 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
2376 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
2377 goto error;
2378 Py_CLEAR(obj);
2379 n += 1;
2380
2381 if (n == BATCHSIZE)
2382 break;
2383
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002384 obj = PyIter_Next(iter);
2385 if (obj == NULL) {
2386 if (PyErr_Occurred())
2387 goto error;
2388 break;
2389 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002390 }
2391
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002392 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002393 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002394
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002395 } while (n == BATCHSIZE);
2396 return 0;
2397
2398 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002399 Py_XDECREF(firstitem);
2400 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002401 return -1;
2402}
2403
Collin Winter5c9b02d2009-05-25 05:43:30 +00002404/* This is a variant of batch_dict() above that specializes for dicts, with no
2405 * support for dict subclasses. Like batch_dict(), we batch up chunks of
2406 * MARK key value ... key value SETITEMS
2407 * opcode sequences. Calling code should have arranged to first create an
2408 * empty dict, or dict-like object, for the SETITEMS to operate on.
2409 * Returns 0 on success, -1 on error.
2410 *
2411 * Note that this currently doesn't work for protocol 0.
2412 */
2413static int
2414batch_dict_exact(PicklerObject *self, PyObject *obj)
2415{
2416 PyObject *key = NULL, *value = NULL;
2417 int i;
2418 Py_ssize_t dict_size, ppos = 0;
2419
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00002420 const char mark_op = MARK;
2421 const char setitem_op = SETITEM;
2422 const char setitems_op = SETITEMS;
Collin Winter5c9b02d2009-05-25 05:43:30 +00002423
2424 assert(obj != NULL);
2425 assert(self->proto > 0);
2426
2427 dict_size = PyDict_Size(obj);
2428
2429 /* Special-case len(d) == 1 to save space. */
2430 if (dict_size == 1) {
2431 PyDict_Next(obj, &ppos, &key, &value);
2432 if (save(self, key, 0) < 0)
2433 return -1;
2434 if (save(self, value, 0) < 0)
2435 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002436 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002437 return -1;
2438 return 0;
2439 }
2440
2441 /* Write in batches of BATCHSIZE. */
2442 do {
2443 i = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002444 if (_Pickler_Write(self, &mark_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002445 return -1;
2446 while (PyDict_Next(obj, &ppos, &key, &value)) {
2447 if (save(self, key, 0) < 0)
2448 return -1;
2449 if (save(self, value, 0) < 0)
2450 return -1;
2451 if (++i == BATCHSIZE)
2452 break;
2453 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002454 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002455 return -1;
2456 if (PyDict_Size(obj) != dict_size) {
2457 PyErr_Format(
2458 PyExc_RuntimeError,
2459 "dictionary changed size during iteration");
2460 return -1;
2461 }
2462
2463 } while (i == BATCHSIZE);
2464 return 0;
2465}
2466
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002467static int
2468save_dict(PicklerObject *self, PyObject *obj)
2469{
2470 PyObject *items, *iter;
2471 char header[3];
2472 int len;
2473 int status = 0;
2474
2475 if (self->fast && !fast_save_enter(self, obj))
2476 goto error;
2477
2478 /* Create an empty dict. */
2479 if (self->bin) {
2480 header[0] = EMPTY_DICT;
2481 len = 1;
2482 }
2483 else {
2484 header[0] = MARK;
2485 header[1] = DICT;
2486 len = 2;
2487 }
2488
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002489 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002490 goto error;
2491
2492 /* Get dict size, and bow out early if empty. */
2493 if ((len = PyDict_Size(obj)) < 0)
2494 goto error;
2495
2496 if (memo_put(self, obj) < 0)
2497 goto error;
2498
2499 if (len != 0) {
2500 /* Save the dict items. */
Collin Winter5c9b02d2009-05-25 05:43:30 +00002501 if (PyDict_CheckExact(obj) && self->proto > 0) {
2502 /* We can take certain shortcuts if we know this is a dict and
2503 not a dict subclass. */
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002504 if (Py_EnterRecursiveCall(" while pickling an object"))
2505 goto error;
2506 status = batch_dict_exact(self, obj);
2507 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002508 } else {
2509 items = PyObject_CallMethod(obj, "items", "()");
2510 if (items == NULL)
2511 goto error;
2512 iter = PyObject_GetIter(items);
2513 Py_DECREF(items);
2514 if (iter == NULL)
2515 goto error;
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002516 if (Py_EnterRecursiveCall(" while pickling an object")) {
2517 Py_DECREF(iter);
2518 goto error;
2519 }
Collin Winter5c9b02d2009-05-25 05:43:30 +00002520 status = batch_dict(self, iter);
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002521 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002522 Py_DECREF(iter);
2523 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002524 }
2525
2526 if (0) {
2527 error:
2528 status = -1;
2529 }
2530
2531 if (self->fast && !fast_save_leave(self, obj))
2532 status = -1;
2533
2534 return status;
2535}
2536
2537static int
2538save_global(PicklerObject *self, PyObject *obj, PyObject *name)
2539{
2540 static PyObject *name_str = NULL;
2541 PyObject *global_name = NULL;
2542 PyObject *module_name = NULL;
2543 PyObject *module = NULL;
2544 PyObject *cls;
2545 int status = 0;
2546
2547 const char global_op = GLOBAL;
2548
2549 if (name_str == NULL) {
2550 name_str = PyUnicode_InternFromString("__name__");
2551 if (name_str == NULL)
2552 goto error;
2553 }
2554
2555 if (name) {
2556 global_name = name;
2557 Py_INCREF(global_name);
2558 }
2559 else {
2560 global_name = PyObject_GetAttr(obj, name_str);
2561 if (global_name == NULL)
2562 goto error;
2563 }
2564
2565 module_name = whichmodule(obj, global_name);
2566 if (module_name == NULL)
2567 goto error;
2568
2569 /* XXX: Change to use the import C API directly with level=0 to disallow
2570 relative imports.
2571
2572 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
2573 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
2574 custom import functions (IMHO, this would be a nice security
2575 feature). The import C API would need to be extended to support the
2576 extra parameters of __import__ to fix that. */
2577 module = PyImport_Import(module_name);
2578 if (module == NULL) {
2579 PyErr_Format(PicklingError,
2580 "Can't pickle %R: import of module %R failed",
2581 obj, module_name);
2582 goto error;
2583 }
2584 cls = PyObject_GetAttr(module, global_name);
2585 if (cls == NULL) {
2586 PyErr_Format(PicklingError,
2587 "Can't pickle %R: attribute lookup %S.%S failed",
2588 obj, module_name, global_name);
2589 goto error;
2590 }
2591 if (cls != obj) {
2592 Py_DECREF(cls);
2593 PyErr_Format(PicklingError,
2594 "Can't pickle %R: it's not the same object as %S.%S",
2595 obj, module_name, global_name);
2596 goto error;
2597 }
2598 Py_DECREF(cls);
2599
2600 if (self->proto >= 2) {
2601 /* See whether this is in the extension registry, and if
2602 * so generate an EXT opcode.
2603 */
2604 PyObject *code_obj; /* extension code as Python object */
2605 long code; /* extension code as C value */
2606 char pdata[5];
2607 int n;
2608
2609 PyTuple_SET_ITEM(two_tuple, 0, module_name);
2610 PyTuple_SET_ITEM(two_tuple, 1, global_name);
2611 code_obj = PyDict_GetItem(extension_registry, two_tuple);
2612 /* The object is not registered in the extension registry.
2613 This is the most likely code path. */
2614 if (code_obj == NULL)
2615 goto gen_global;
2616
2617 /* XXX: pickle.py doesn't check neither the type, nor the range
2618 of the value returned by the extension_registry. It should for
2619 consistency. */
2620
2621 /* Verify code_obj has the right type and value. */
2622 if (!PyLong_Check(code_obj)) {
2623 PyErr_Format(PicklingError,
2624 "Can't pickle %R: extension code %R isn't an integer",
2625 obj, code_obj);
2626 goto error;
2627 }
2628 code = PyLong_AS_LONG(code_obj);
2629 if (code <= 0 || code > 0x7fffffffL) {
2630 PyErr_Format(PicklingError,
2631 "Can't pickle %R: extension code %ld is out of range",
2632 obj, code);
2633 goto error;
2634 }
2635
2636 /* Generate an EXT opcode. */
2637 if (code <= 0xff) {
2638 pdata[0] = EXT1;
2639 pdata[1] = (unsigned char)code;
2640 n = 2;
2641 }
2642 else if (code <= 0xffff) {
2643 pdata[0] = EXT2;
2644 pdata[1] = (unsigned char)(code & 0xff);
2645 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2646 n = 3;
2647 }
2648 else {
2649 pdata[0] = EXT4;
2650 pdata[1] = (unsigned char)(code & 0xff);
2651 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2652 pdata[3] = (unsigned char)((code >> 16) & 0xff);
2653 pdata[4] = (unsigned char)((code >> 24) & 0xff);
2654 n = 5;
2655 }
2656
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002657 if (_Pickler_Write(self, pdata, n) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002658 goto error;
2659 }
2660 else {
2661 /* Generate a normal global opcode if we are using a pickle
2662 protocol <= 2, or if the object is not registered in the
2663 extension registry. */
2664 PyObject *encoded;
2665 PyObject *(*unicode_encoder)(PyObject *);
2666
2667 gen_global:
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002668 if (_Pickler_Write(self, &global_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002669 goto error;
2670
2671 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
2672 the module name and the global name using UTF-8. We do so only when
2673 we are using the pickle protocol newer than version 3. This is to
2674 ensure compatibility with older Unpickler running on Python 2.x. */
2675 if (self->proto >= 3) {
2676 unicode_encoder = PyUnicode_AsUTF8String;
2677 }
2678 else {
2679 unicode_encoder = PyUnicode_AsASCIIString;
2680 }
2681
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002682 /* For protocol < 3 and if the user didn't request against doing so,
2683 we convert module names to the old 2.x module names. */
2684 if (self->fix_imports) {
2685 PyObject *key;
2686 PyObject *item;
2687
2688 key = PyTuple_Pack(2, module_name, global_name);
2689 if (key == NULL)
2690 goto error;
2691 item = PyDict_GetItemWithError(name_mapping_3to2, key);
2692 Py_DECREF(key);
2693 if (item) {
2694 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
2695 PyErr_Format(PyExc_RuntimeError,
2696 "_compat_pickle.REVERSE_NAME_MAPPING values "
2697 "should be 2-tuples, not %.200s",
2698 Py_TYPE(item)->tp_name);
2699 goto error;
2700 }
2701 Py_CLEAR(module_name);
2702 Py_CLEAR(global_name);
2703 module_name = PyTuple_GET_ITEM(item, 0);
2704 global_name = PyTuple_GET_ITEM(item, 1);
2705 if (!PyUnicode_Check(module_name) ||
2706 !PyUnicode_Check(global_name)) {
2707 PyErr_Format(PyExc_RuntimeError,
2708 "_compat_pickle.REVERSE_NAME_MAPPING values "
2709 "should be pairs of str, not (%.200s, %.200s)",
2710 Py_TYPE(module_name)->tp_name,
2711 Py_TYPE(global_name)->tp_name);
2712 goto error;
2713 }
2714 Py_INCREF(module_name);
2715 Py_INCREF(global_name);
2716 }
2717 else if (PyErr_Occurred()) {
2718 goto error;
2719 }
2720
2721 item = PyDict_GetItemWithError(import_mapping_3to2, module_name);
2722 if (item) {
2723 if (!PyUnicode_Check(item)) {
2724 PyErr_Format(PyExc_RuntimeError,
2725 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
2726 "should be strings, not %.200s",
2727 Py_TYPE(item)->tp_name);
2728 goto error;
2729 }
2730 Py_CLEAR(module_name);
2731 module_name = item;
2732 Py_INCREF(module_name);
2733 }
2734 else if (PyErr_Occurred()) {
2735 goto error;
2736 }
2737 }
2738
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002739 /* Save the name of the module. */
2740 encoded = unicode_encoder(module_name);
2741 if (encoded == NULL) {
2742 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2743 PyErr_Format(PicklingError,
2744 "can't pickle module identifier '%S' using "
2745 "pickle protocol %i", module_name, self->proto);
2746 goto error;
2747 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002748 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002749 PyBytes_GET_SIZE(encoded)) < 0) {
2750 Py_DECREF(encoded);
2751 goto error;
2752 }
2753 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002754 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002755 goto error;
2756
2757 /* Save the name of the module. */
2758 encoded = unicode_encoder(global_name);
2759 if (encoded == NULL) {
2760 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2761 PyErr_Format(PicklingError,
2762 "can't pickle global identifier '%S' using "
2763 "pickle protocol %i", global_name, self->proto);
2764 goto error;
2765 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002766 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002767 PyBytes_GET_SIZE(encoded)) < 0) {
2768 Py_DECREF(encoded);
2769 goto error;
2770 }
2771 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002772 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002773 goto error;
2774
2775 /* Memoize the object. */
2776 if (memo_put(self, obj) < 0)
2777 goto error;
2778 }
2779
2780 if (0) {
2781 error:
2782 status = -1;
2783 }
2784 Py_XDECREF(module_name);
2785 Py_XDECREF(global_name);
2786 Py_XDECREF(module);
2787
2788 return status;
2789}
2790
2791static int
2792save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
2793{
2794 PyObject *pid = NULL;
2795 int status = 0;
2796
2797 const char persid_op = PERSID;
2798 const char binpersid_op = BINPERSID;
2799
2800 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002801 pid = _Pickler_FastCall(self, func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002802 if (pid == NULL)
2803 return -1;
2804
2805 if (pid != Py_None) {
2806 if (self->bin) {
2807 if (save(self, pid, 1) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002808 _Pickler_Write(self, &binpersid_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002809 goto error;
2810 }
2811 else {
2812 PyObject *pid_str = NULL;
2813 char *pid_ascii_bytes;
2814 Py_ssize_t size;
2815
2816 pid_str = PyObject_Str(pid);
2817 if (pid_str == NULL)
2818 goto error;
2819
2820 /* XXX: Should it check whether the persistent id only contains
2821 ASCII characters? And what if the pid contains embedded
2822 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00002823 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002824 Py_DECREF(pid_str);
2825 if (pid_ascii_bytes == NULL)
2826 goto error;
2827
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002828 if (_Pickler_Write(self, &persid_op, 1) < 0 ||
2829 _Pickler_Write(self, pid_ascii_bytes, size) < 0 ||
2830 _Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002831 goto error;
2832 }
2833 status = 1;
2834 }
2835
2836 if (0) {
2837 error:
2838 status = -1;
2839 }
2840 Py_XDECREF(pid);
2841
2842 return status;
2843}
2844
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002845static PyObject *
2846get_class(PyObject *obj)
2847{
2848 PyObject *cls;
2849 static PyObject *str_class;
2850
2851 if (str_class == NULL) {
2852 str_class = PyUnicode_InternFromString("__class__");
2853 if (str_class == NULL)
2854 return NULL;
2855 }
2856 cls = PyObject_GetAttr(obj, str_class);
2857 if (cls == NULL) {
2858 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2859 PyErr_Clear();
2860 cls = (PyObject *) Py_TYPE(obj);
2861 Py_INCREF(cls);
2862 }
2863 }
2864 return cls;
2865}
2866
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002867/* We're saving obj, and args is the 2-thru-5 tuple returned by the
2868 * appropriate __reduce__ method for obj.
2869 */
2870static int
2871save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
2872{
2873 PyObject *callable;
2874 PyObject *argtup;
2875 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002876 PyObject *listitems = Py_None;
2877 PyObject *dictitems = Py_None;
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002878 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002879
2880 int use_newobj = self->proto >= 2;
2881
2882 const char reduce_op = REDUCE;
2883 const char build_op = BUILD;
2884 const char newobj_op = NEWOBJ;
2885
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002886 size = PyTuple_Size(args);
2887 if (size < 2 || size > 5) {
2888 PyErr_SetString(PicklingError, "tuple returned by "
2889 "__reduce__ must contain 2 through 5 elements");
2890 return -1;
2891 }
2892
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002893 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
2894 &callable, &argtup, &state, &listitems, &dictitems))
2895 return -1;
2896
2897 if (!PyCallable_Check(callable)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002898 PyErr_SetString(PicklingError, "first item of the tuple "
2899 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002900 return -1;
2901 }
2902 if (!PyTuple_Check(argtup)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002903 PyErr_SetString(PicklingError, "second item of the tuple "
2904 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002905 return -1;
2906 }
2907
2908 if (state == Py_None)
2909 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002910
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002911 if (listitems == Py_None)
2912 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002913 else if (!PyIter_Check(listitems)) {
2914 PyErr_Format(PicklingError, "Fourth element of tuple"
2915 "returned by __reduce__ must be an iterator, not %s",
2916 Py_TYPE(listitems)->tp_name);
2917 return -1;
2918 }
2919
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002920 if (dictitems == Py_None)
2921 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002922 else if (!PyIter_Check(dictitems)) {
2923 PyErr_Format(PicklingError, "Fifth element of tuple"
2924 "returned by __reduce__ must be an iterator, not %s",
2925 Py_TYPE(dictitems)->tp_name);
2926 return -1;
2927 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002928
2929 /* Protocol 2 special case: if callable's name is __newobj__, use
2930 NEWOBJ. */
2931 if (use_newobj) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002932 static PyObject *newobj_str = NULL, *name_str = NULL;
2933 PyObject *name;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002934
2935 if (newobj_str == NULL) {
2936 newobj_str = PyUnicode_InternFromString("__newobj__");
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002937 name_str = PyUnicode_InternFromString("__name__");
2938 if (newobj_str == NULL || name_str == NULL)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002939 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002940 }
2941
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002942 name = PyObject_GetAttr(callable, name_str);
2943 if (name == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002944 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2945 PyErr_Clear();
2946 else
2947 return -1;
2948 use_newobj = 0;
2949 }
2950 else {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002951 use_newobj = PyUnicode_Check(name) &&
2952 PyUnicode_Compare(name, newobj_str) == 0;
2953 Py_DECREF(name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002954 }
2955 }
2956 if (use_newobj) {
2957 PyObject *cls;
2958 PyObject *newargtup;
2959 PyObject *obj_class;
2960 int p;
2961
2962 /* Sanity checks. */
2963 if (Py_SIZE(argtup) < 1) {
2964 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
2965 return -1;
2966 }
2967
2968 cls = PyTuple_GET_ITEM(argtup, 0);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002969 if (!PyType_Check(cls)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002970 PyErr_SetString(PicklingError, "args[0] from "
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002971 "__newobj__ args is not a type");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002972 return -1;
2973 }
2974
2975 if (obj != NULL) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002976 obj_class = get_class(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002977 p = obj_class != cls; /* true iff a problem */
2978 Py_DECREF(obj_class);
2979 if (p) {
2980 PyErr_SetString(PicklingError, "args[0] from "
2981 "__newobj__ args has the wrong class");
2982 return -1;
2983 }
2984 }
2985 /* XXX: These calls save() are prone to infinite recursion. Imagine
2986 what happen if the value returned by the __reduce__() method of
2987 some extension type contains another object of the same type. Ouch!
2988
2989 Here is a quick example, that I ran into, to illustrate what I
2990 mean:
2991
2992 >>> import pickle, copyreg
2993 >>> copyreg.dispatch_table.pop(complex)
2994 >>> pickle.dumps(1+2j)
2995 Traceback (most recent call last):
2996 ...
2997 RuntimeError: maximum recursion depth exceeded
2998
2999 Removing the complex class from copyreg.dispatch_table made the
3000 __reduce_ex__() method emit another complex object:
3001
3002 >>> (1+1j).__reduce_ex__(2)
3003 (<function __newobj__ at 0xb7b71c3c>,
3004 (<class 'complex'>, (1+1j)), None, None, None)
3005
3006 Thus when save() was called on newargstup (the 2nd item) recursion
3007 ensued. Of course, the bug was in the complex class which had a
3008 broken __getnewargs__() that emitted another complex object. But,
3009 the point, here, is it is quite easy to end up with a broken reduce
3010 function. */
3011
3012 /* Save the class and its __new__ arguments. */
3013 if (save(self, cls, 0) < 0)
3014 return -1;
3015
3016 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
3017 if (newargtup == NULL)
3018 return -1;
3019
3020 p = save(self, newargtup, 0);
3021 Py_DECREF(newargtup);
3022 if (p < 0)
3023 return -1;
3024
3025 /* Add NEWOBJ opcode. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003026 if (_Pickler_Write(self, &newobj_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003027 return -1;
3028 }
3029 else { /* Not using NEWOBJ. */
3030 if (save(self, callable, 0) < 0 ||
3031 save(self, argtup, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003032 _Pickler_Write(self, &reduce_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003033 return -1;
3034 }
3035
3036 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
3037 the caller do not want to memoize the object. Not particularly useful,
3038 but that is to mimic the behavior save_reduce() in pickle.py when
3039 obj is None. */
3040 if (obj && memo_put(self, obj) < 0)
3041 return -1;
3042
3043 if (listitems && batch_list(self, listitems) < 0)
3044 return -1;
3045
3046 if (dictitems && batch_dict(self, dictitems) < 0)
3047 return -1;
3048
3049 if (state) {
3050 if (save(self, state, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003051 _Pickler_Write(self, &build_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003052 return -1;
3053 }
3054
3055 return 0;
3056}
3057
3058static int
3059save(PicklerObject *self, PyObject *obj, int pers_save)
3060{
3061 PyTypeObject *type;
3062 PyObject *reduce_func = NULL;
3063 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003064 int status = 0;
3065
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003066 if (Py_EnterRecursiveCall(" while pickling an object"))
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003067 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003068
3069 /* The extra pers_save argument is necessary to avoid calling save_pers()
3070 on its returned object. */
3071 if (!pers_save && self->pers_func) {
3072 /* save_pers() returns:
3073 -1 to signal an error;
3074 0 if it did nothing successfully;
3075 1 if a persistent id was saved.
3076 */
3077 if ((status = save_pers(self, obj, self->pers_func)) != 0)
3078 goto done;
3079 }
3080
3081 type = Py_TYPE(obj);
3082
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003083 /* The old cPickle had an optimization that used switch-case statement
3084 dispatching on the first letter of the type name. This has was removed
3085 since benchmarks shown that this optimization was actually slowing
3086 things down. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003087
3088 /* Atom types; these aren't memoized, so don't check the memo. */
3089
3090 if (obj == Py_None) {
3091 status = save_none(self, obj);
3092 goto done;
3093 }
3094 else if (obj == Py_False || obj == Py_True) {
3095 status = save_bool(self, obj);
3096 goto done;
3097 }
3098 else if (type == &PyLong_Type) {
3099 status = save_long(self, obj);
3100 goto done;
3101 }
3102 else if (type == &PyFloat_Type) {
3103 status = save_float(self, obj);
3104 goto done;
3105 }
3106
3107 /* Check the memo to see if it has the object. If so, generate
3108 a GET (or BINGET) opcode, instead of pickling the object
3109 once again. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003110 if (PyMemoTable_Get(self->memo, obj)) {
3111 if (memo_get(self, obj) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003112 goto error;
3113 goto done;
3114 }
3115
3116 if (type == &PyBytes_Type) {
3117 status = save_bytes(self, obj);
3118 goto done;
3119 }
3120 else if (type == &PyUnicode_Type) {
3121 status = save_unicode(self, obj);
3122 goto done;
3123 }
3124 else if (type == &PyDict_Type) {
3125 status = save_dict(self, obj);
3126 goto done;
3127 }
3128 else if (type == &PyList_Type) {
3129 status = save_list(self, obj);
3130 goto done;
3131 }
3132 else if (type == &PyTuple_Type) {
3133 status = save_tuple(self, obj);
3134 goto done;
3135 }
3136 else if (type == &PyType_Type) {
3137 status = save_global(self, obj, NULL);
3138 goto done;
3139 }
3140 else if (type == &PyFunction_Type) {
3141 status = save_global(self, obj, NULL);
3142 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
3143 /* fall back to reduce */
3144 PyErr_Clear();
3145 }
3146 else {
3147 goto done;
3148 }
3149 }
3150 else if (type == &PyCFunction_Type) {
3151 status = save_global(self, obj, NULL);
3152 goto done;
3153 }
3154 else if (PyType_IsSubtype(type, &PyType_Type)) {
3155 status = save_global(self, obj, NULL);
3156 goto done;
3157 }
3158
3159 /* XXX: This part needs some unit tests. */
3160
3161 /* Get a reduction callable, and call it. This may come from
3162 * copyreg.dispatch_table, the object's __reduce_ex__ method,
3163 * or the object's __reduce__ method.
3164 */
3165 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
3166 if (reduce_func != NULL) {
3167 /* Here, the reference count of the reduce_func object returned by
3168 PyDict_GetItem needs to be increased to be consistent with the one
3169 returned by PyObject_GetAttr. This is allow us to blindly DECREF
3170 reduce_func at the end of the save() routine.
3171 */
3172 Py_INCREF(reduce_func);
3173 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003174 reduce_value = _Pickler_FastCall(self, reduce_func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003175 }
3176 else {
3177 static PyObject *reduce_str = NULL;
3178 static PyObject *reduce_ex_str = NULL;
3179
3180 /* Cache the name of the reduce methods. */
3181 if (reduce_str == NULL) {
3182 reduce_str = PyUnicode_InternFromString("__reduce__");
3183 if (reduce_str == NULL)
3184 goto error;
3185 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
3186 if (reduce_ex_str == NULL)
3187 goto error;
3188 }
3189
3190 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
3191 automatically defined as __reduce__. While this is convenient, this
3192 make it impossible to know which method was actually called. Of
3193 course, this is not a big deal. But still, it would be nice to let
3194 the user know which method was called when something go
3195 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
3196 don't actually have to check for a __reduce__ method. */
3197
3198 /* Check for a __reduce_ex__ method. */
3199 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
3200 if (reduce_func != NULL) {
3201 PyObject *proto;
3202 proto = PyLong_FromLong(self->proto);
3203 if (proto != NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003204 reduce_value = _Pickler_FastCall(self, reduce_func, proto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003205 }
3206 }
3207 else {
3208 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3209 PyErr_Clear();
3210 else
3211 goto error;
3212 /* Check for a __reduce__ method. */
3213 reduce_func = PyObject_GetAttr(obj, reduce_str);
3214 if (reduce_func != NULL) {
3215 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
3216 }
3217 else {
3218 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
3219 type->tp_name, obj);
3220 goto error;
3221 }
3222 }
3223 }
3224
3225 if (reduce_value == NULL)
3226 goto error;
3227
3228 if (PyUnicode_Check(reduce_value)) {
3229 status = save_global(self, obj, reduce_value);
3230 goto done;
3231 }
3232
3233 if (!PyTuple_Check(reduce_value)) {
3234 PyErr_SetString(PicklingError,
3235 "__reduce__ must return a string or tuple");
3236 goto error;
3237 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003238
3239 status = save_reduce(self, reduce_value, obj);
3240
3241 if (0) {
3242 error:
3243 status = -1;
3244 }
3245 done:
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003246 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003247 Py_XDECREF(reduce_func);
3248 Py_XDECREF(reduce_value);
3249
3250 return status;
3251}
3252
3253static int
3254dump(PicklerObject *self, PyObject *obj)
3255{
3256 const char stop_op = STOP;
3257
3258 if (self->proto >= 2) {
3259 char header[2];
3260
3261 header[0] = PROTO;
3262 assert(self->proto >= 0 && self->proto < 256);
3263 header[1] = (unsigned char)self->proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003264 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003265 return -1;
3266 }
3267
3268 if (save(self, obj, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003269 _Pickler_Write(self, &stop_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003270 return -1;
3271
3272 return 0;
3273}
3274
3275PyDoc_STRVAR(Pickler_clear_memo_doc,
3276"clear_memo() -> None. Clears the pickler's \"memo\"."
3277"\n"
3278"The memo is the data structure that remembers which objects the\n"
3279"pickler has already seen, so that shared or recursive objects are\n"
3280"pickled by reference and not by value. This method is useful when\n"
3281"re-using picklers.");
3282
3283static PyObject *
3284Pickler_clear_memo(PicklerObject *self)
3285{
3286 if (self->memo)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003287 PyMemoTable_Clear(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003288
3289 Py_RETURN_NONE;
3290}
3291
3292PyDoc_STRVAR(Pickler_dump_doc,
3293"dump(obj) -> None. Write a pickled representation of obj to the open file.");
3294
3295static PyObject *
3296Pickler_dump(PicklerObject *self, PyObject *args)
3297{
3298 PyObject *obj;
3299
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00003300 /* Check whether the Pickler was initialized correctly (issue3664).
3301 Developers often forget to call __init__() in their subclasses, which
3302 would trigger a segfault without this check. */
3303 if (self->write == NULL) {
3304 PyErr_Format(PicklingError,
3305 "Pickler.__init__() was not called by %s.__init__()",
3306 Py_TYPE(self)->tp_name);
3307 return NULL;
3308 }
3309
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003310 if (!PyArg_ParseTuple(args, "O:dump", &obj))
3311 return NULL;
3312
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003313 if (_Pickler_ClearBuffer(self) < 0)
3314 return NULL;
3315
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003316 if (dump(self, obj) < 0)
3317 return NULL;
3318
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003319 if (_Pickler_FlushToFile(self) < 0)
3320 return NULL;
3321
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003322 Py_RETURN_NONE;
3323}
3324
3325static struct PyMethodDef Pickler_methods[] = {
3326 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
3327 Pickler_dump_doc},
3328 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
3329 Pickler_clear_memo_doc},
3330 {NULL, NULL} /* sentinel */
3331};
3332
3333static void
3334Pickler_dealloc(PicklerObject *self)
3335{
3336 PyObject_GC_UnTrack(self);
3337
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003338 Py_XDECREF(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003339 Py_XDECREF(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003340 Py_XDECREF(self->pers_func);
3341 Py_XDECREF(self->arg);
3342 Py_XDECREF(self->fast_memo);
3343
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003344 PyMemoTable_Del(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003345
3346 Py_TYPE(self)->tp_free((PyObject *)self);
3347}
3348
3349static int
3350Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
3351{
3352 Py_VISIT(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003353 Py_VISIT(self->pers_func);
3354 Py_VISIT(self->arg);
3355 Py_VISIT(self->fast_memo);
3356 return 0;
3357}
3358
3359static int
3360Pickler_clear(PicklerObject *self)
3361{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003362 Py_CLEAR(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003363 Py_CLEAR(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003364 Py_CLEAR(self->pers_func);
3365 Py_CLEAR(self->arg);
3366 Py_CLEAR(self->fast_memo);
3367
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003368 if (self->memo != NULL) {
3369 PyMemoTable *memo = self->memo;
3370 self->memo = NULL;
3371 PyMemoTable_Del(memo);
3372 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003373 return 0;
3374}
3375
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003376
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003377PyDoc_STRVAR(Pickler_doc,
3378"Pickler(file, protocol=None)"
3379"\n"
3380"This takes a binary file for writing a pickle data stream.\n"
3381"\n"
3382"The optional protocol argument tells the pickler to use the\n"
3383"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
3384"protocol is 3; a backward-incompatible protocol designed for\n"
3385"Python 3.0.\n"
3386"\n"
3387"Specifying a negative protocol version selects the highest\n"
3388"protocol version supported. The higher the protocol used, the\n"
3389"more recent the version of Python needed to read the pickle\n"
3390"produced.\n"
3391"\n"
3392"The file argument must have a write() method that accepts a single\n"
3393"bytes argument. It can thus be a file object opened for binary\n"
3394"writing, a io.BytesIO instance, or any other custom object that\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003395"meets this interface.\n"
3396"\n"
3397"If fix_imports is True and protocol is less than 3, pickle will try to\n"
3398"map the new Python 3.x names to the old module names used in Python\n"
3399"2.x, so that the pickle data stream is readable with Python 2.x.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003400
3401static int
3402Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
3403{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003404 static char *kwlist[] = {"file", "protocol", "fix_imports", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003405 PyObject *file;
3406 PyObject *proto_obj = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003407 PyObject *fix_imports = Py_True;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003408
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003409 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:Pickler",
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003410 kwlist, &file, &proto_obj, &fix_imports))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003411 return -1;
3412
3413 /* In case of multiple __init__() calls, clear previous content. */
3414 if (self->write != NULL)
3415 (void)Pickler_clear(self);
3416
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003417 if (_Pickler_SetProtocol(self, proto_obj, fix_imports) < 0)
3418 return -1;
3419
3420 if (_Pickler_SetOutputStream(self, file) < 0)
3421 return -1;
3422
3423 /* memo and output_buffer may have already been created in _Pickler_New */
3424 if (self->memo == NULL) {
3425 self->memo = PyMemoTable_New();
3426 if (self->memo == NULL)
3427 return -1;
3428 }
3429 self->output_len = 0;
3430 if (self->output_buffer == NULL) {
3431 self->max_output_len = WRITE_BUF_SIZE;
3432 self->output_buffer = PyBytes_FromStringAndSize(NULL,
3433 self->max_output_len);
3434 if (self->output_buffer == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003435 return -1;
3436 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003437
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003438 self->arg = NULL;
3439 self->fast = 0;
3440 self->fast_nesting = 0;
3441 self->fast_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003442 self->pers_func = NULL;
3443 if (PyObject_HasAttrString((PyObject *)self, "persistent_id")) {
3444 self->pers_func = PyObject_GetAttrString((PyObject *)self,
3445 "persistent_id");
3446 if (self->pers_func == NULL)
3447 return -1;
3448 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003449 return 0;
3450}
3451
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003452/* Define a proxy object for the Pickler's internal memo object. This is to
3453 * avoid breaking code like:
3454 * pickler.memo.clear()
3455 * and
3456 * pickler.memo = saved_memo
3457 * Is this a good idea? Not really, but we don't want to break code that uses
3458 * it. Note that we don't implement the entire mapping API here. This is
3459 * intentional, as these should be treated as black-box implementation details.
3460 */
3461
3462typedef struct {
3463 PyObject_HEAD
3464 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
3465} PicklerMemoProxyObject;
3466
3467PyDoc_STRVAR(pmp_clear_doc,
3468"memo.clear() -> None. Remove all items from memo.");
3469
3470static PyObject *
3471pmp_clear(PicklerMemoProxyObject *self)
3472{
3473 if (self->pickler->memo)
3474 PyMemoTable_Clear(self->pickler->memo);
3475 Py_RETURN_NONE;
3476}
3477
3478PyDoc_STRVAR(pmp_copy_doc,
3479"memo.copy() -> new_memo. Copy the memo to a new object.");
3480
3481static PyObject *
3482pmp_copy(PicklerMemoProxyObject *self)
3483{
3484 Py_ssize_t i;
3485 PyMemoTable *memo;
3486 PyObject *new_memo = PyDict_New();
3487 if (new_memo == NULL)
3488 return NULL;
3489
3490 memo = self->pickler->memo;
3491 for (i = 0; i < memo->mt_allocated; ++i) {
3492 PyMemoEntry entry = memo->mt_table[i];
3493 if (entry.me_key != NULL) {
3494 int status;
3495 PyObject *key, *value;
3496
3497 key = PyLong_FromVoidPtr(entry.me_key);
3498 value = Py_BuildValue("lO", entry.me_value, entry.me_key);
3499
3500 if (key == NULL || value == NULL) {
3501 Py_XDECREF(key);
3502 Py_XDECREF(value);
3503 goto error;
3504 }
3505 status = PyDict_SetItem(new_memo, key, value);
3506 Py_DECREF(key);
3507 Py_DECREF(value);
3508 if (status < 0)
3509 goto error;
3510 }
3511 }
3512 return new_memo;
3513
3514 error:
3515 Py_XDECREF(new_memo);
3516 return NULL;
3517}
3518
3519PyDoc_STRVAR(pmp_reduce_doc,
3520"memo.__reduce__(). Pickling support.");
3521
3522static PyObject *
3523pmp_reduce(PicklerMemoProxyObject *self, PyObject *args)
3524{
3525 PyObject *reduce_value, *dict_args;
3526 PyObject *contents = pmp_copy(self);
3527 if (contents == NULL)
3528 return NULL;
3529
3530 reduce_value = PyTuple_New(2);
3531 if (reduce_value == NULL) {
3532 Py_DECREF(contents);
3533 return NULL;
3534 }
3535 dict_args = PyTuple_New(1);
3536 if (dict_args == NULL) {
3537 Py_DECREF(contents);
3538 Py_DECREF(reduce_value);
3539 return NULL;
3540 }
3541 PyTuple_SET_ITEM(dict_args, 0, contents);
3542 Py_INCREF((PyObject *)&PyDict_Type);
3543 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
3544 PyTuple_SET_ITEM(reduce_value, 1, dict_args);
3545 return reduce_value;
3546}
3547
3548static PyMethodDef picklerproxy_methods[] = {
3549 {"clear", (PyCFunction)pmp_clear, METH_NOARGS, pmp_clear_doc},
3550 {"copy", (PyCFunction)pmp_copy, METH_NOARGS, pmp_copy_doc},
3551 {"__reduce__", (PyCFunction)pmp_reduce, METH_VARARGS, pmp_reduce_doc},
3552 {NULL, NULL} /* sentinel */
3553};
3554
3555static void
3556PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
3557{
3558 PyObject_GC_UnTrack(self);
3559 Py_XDECREF(self->pickler);
3560 PyObject_GC_Del((PyObject *)self);
3561}
3562
3563static int
3564PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
3565 visitproc visit, void *arg)
3566{
3567 Py_VISIT(self->pickler);
3568 return 0;
3569}
3570
3571static int
3572PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
3573{
3574 Py_CLEAR(self->pickler);
3575 return 0;
3576}
3577
3578static PyTypeObject PicklerMemoProxyType = {
3579 PyVarObject_HEAD_INIT(NULL, 0)
3580 "_pickle.PicklerMemoProxy", /*tp_name*/
3581 sizeof(PicklerMemoProxyObject), /*tp_basicsize*/
3582 0,
3583 (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */
3584 0, /* tp_print */
3585 0, /* tp_getattr */
3586 0, /* tp_setattr */
3587 0, /* tp_compare */
3588 0, /* tp_repr */
3589 0, /* tp_as_number */
3590 0, /* tp_as_sequence */
3591 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00003592 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003593 0, /* tp_call */
3594 0, /* tp_str */
3595 PyObject_GenericGetAttr, /* tp_getattro */
3596 PyObject_GenericSetAttr, /* tp_setattro */
3597 0, /* tp_as_buffer */
3598 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3599 0, /* tp_doc */
3600 (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */
3601 (inquiry)PicklerMemoProxy_clear, /* tp_clear */
3602 0, /* tp_richcompare */
3603 0, /* tp_weaklistoffset */
3604 0, /* tp_iter */
3605 0, /* tp_iternext */
3606 picklerproxy_methods, /* tp_methods */
3607};
3608
3609static PyObject *
3610PicklerMemoProxy_New(PicklerObject *pickler)
3611{
3612 PicklerMemoProxyObject *self;
3613
3614 self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
3615 if (self == NULL)
3616 return NULL;
3617 Py_INCREF(pickler);
3618 self->pickler = pickler;
3619 PyObject_GC_Track(self);
3620 return (PyObject *)self;
3621}
3622
3623/*****************************************************************************/
3624
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003625static PyObject *
3626Pickler_get_memo(PicklerObject *self)
3627{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003628 return PicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003629}
3630
3631static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003632Pickler_set_memo(PicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003633{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003634 PyMemoTable *new_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003635
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003636 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003637 PyErr_SetString(PyExc_TypeError,
3638 "attribute deletion is not supported");
3639 return -1;
3640 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003641
3642 if (Py_TYPE(obj) == &PicklerMemoProxyType) {
3643 PicklerObject *pickler =
3644 ((PicklerMemoProxyObject *)obj)->pickler;
3645
3646 new_memo = PyMemoTable_Copy(pickler->memo);
3647 if (new_memo == NULL)
3648 return -1;
3649 }
3650 else if (PyDict_Check(obj)) {
3651 Py_ssize_t i = 0;
3652 PyObject *key, *value;
3653
3654 new_memo = PyMemoTable_New();
3655 if (new_memo == NULL)
3656 return -1;
3657
3658 while (PyDict_Next(obj, &i, &key, &value)) {
3659 long memo_id;
3660 PyObject *memo_obj;
3661
3662 if (!PyTuple_Check(value) || Py_SIZE(value) != 2) {
3663 PyErr_SetString(PyExc_TypeError,
3664 "'memo' values must be 2-item tuples");
3665 goto error;
3666 }
3667 memo_id = PyLong_AsLong(PyTuple_GET_ITEM(value, 0));
3668 if (memo_id == -1 && PyErr_Occurred())
3669 goto error;
3670 memo_obj = PyTuple_GET_ITEM(value, 1);
3671 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
3672 goto error;
3673 }
3674 }
3675 else {
3676 PyErr_Format(PyExc_TypeError,
3677 "'memo' attribute must be an PicklerMemoProxy object"
3678 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003679 return -1;
3680 }
3681
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003682 PyMemoTable_Del(self->memo);
3683 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003684
3685 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003686
3687 error:
3688 if (new_memo)
3689 PyMemoTable_Del(new_memo);
3690 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003691}
3692
3693static PyObject *
3694Pickler_get_persid(PicklerObject *self)
3695{
3696 if (self->pers_func == NULL)
3697 PyErr_SetString(PyExc_AttributeError, "persistent_id");
3698 else
3699 Py_INCREF(self->pers_func);
3700 return self->pers_func;
3701}
3702
3703static int
3704Pickler_set_persid(PicklerObject *self, PyObject *value)
3705{
3706 PyObject *tmp;
3707
3708 if (value == NULL) {
3709 PyErr_SetString(PyExc_TypeError,
3710 "attribute deletion is not supported");
3711 return -1;
3712 }
3713 if (!PyCallable_Check(value)) {
3714 PyErr_SetString(PyExc_TypeError,
3715 "persistent_id must be a callable taking one argument");
3716 return -1;
3717 }
3718
3719 tmp = self->pers_func;
3720 Py_INCREF(value);
3721 self->pers_func = value;
3722 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
3723
3724 return 0;
3725}
3726
3727static PyMemberDef Pickler_members[] = {
3728 {"bin", T_INT, offsetof(PicklerObject, bin)},
3729 {"fast", T_INT, offsetof(PicklerObject, fast)},
3730 {NULL}
3731};
3732
3733static PyGetSetDef Pickler_getsets[] = {
3734 {"memo", (getter)Pickler_get_memo,
3735 (setter)Pickler_set_memo},
3736 {"persistent_id", (getter)Pickler_get_persid,
3737 (setter)Pickler_set_persid},
3738 {NULL}
3739};
3740
3741static PyTypeObject Pickler_Type = {
3742 PyVarObject_HEAD_INIT(NULL, 0)
3743 "_pickle.Pickler" , /*tp_name*/
3744 sizeof(PicklerObject), /*tp_basicsize*/
3745 0, /*tp_itemsize*/
3746 (destructor)Pickler_dealloc, /*tp_dealloc*/
3747 0, /*tp_print*/
3748 0, /*tp_getattr*/
3749 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00003750 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003751 0, /*tp_repr*/
3752 0, /*tp_as_number*/
3753 0, /*tp_as_sequence*/
3754 0, /*tp_as_mapping*/
3755 0, /*tp_hash*/
3756 0, /*tp_call*/
3757 0, /*tp_str*/
3758 0, /*tp_getattro*/
3759 0, /*tp_setattro*/
3760 0, /*tp_as_buffer*/
3761 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3762 Pickler_doc, /*tp_doc*/
3763 (traverseproc)Pickler_traverse, /*tp_traverse*/
3764 (inquiry)Pickler_clear, /*tp_clear*/
3765 0, /*tp_richcompare*/
3766 0, /*tp_weaklistoffset*/
3767 0, /*tp_iter*/
3768 0, /*tp_iternext*/
3769 Pickler_methods, /*tp_methods*/
3770 Pickler_members, /*tp_members*/
3771 Pickler_getsets, /*tp_getset*/
3772 0, /*tp_base*/
3773 0, /*tp_dict*/
3774 0, /*tp_descr_get*/
3775 0, /*tp_descr_set*/
3776 0, /*tp_dictoffset*/
3777 (initproc)Pickler_init, /*tp_init*/
3778 PyType_GenericAlloc, /*tp_alloc*/
3779 PyType_GenericNew, /*tp_new*/
3780 PyObject_GC_Del, /*tp_free*/
3781 0, /*tp_is_gc*/
3782};
3783
3784/* Temporary helper for calling self.find_class().
3785
3786 XXX: It would be nice to able to avoid Python function call overhead, by
3787 using directly the C version of find_class(), when find_class() is not
3788 overridden by a subclass. Although, this could become rather hackish. A
3789 simpler optimization would be to call the C function when self is not a
3790 subclass instance. */
3791static PyObject *
3792find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
3793{
3794 return PyObject_CallMethod((PyObject *)self, "find_class", "OO",
3795 module_name, global_name);
3796}
3797
3798static int
3799marker(UnpicklerObject *self)
3800{
3801 if (self->num_marks < 1) {
3802 PyErr_SetString(UnpicklingError, "could not find MARK");
3803 return -1;
3804 }
3805
3806 return self->marks[--self->num_marks];
3807}
3808
3809static int
3810load_none(UnpicklerObject *self)
3811{
3812 PDATA_APPEND(self->stack, Py_None, -1);
3813 return 0;
3814}
3815
3816static int
3817bad_readline(void)
3818{
3819 PyErr_SetString(UnpicklingError, "pickle data was truncated");
3820 return -1;
3821}
3822
3823static int
3824load_int(UnpicklerObject *self)
3825{
3826 PyObject *value;
3827 char *endptr, *s;
3828 Py_ssize_t len;
3829 long x;
3830
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003831 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003832 return -1;
3833 if (len < 2)
3834 return bad_readline();
3835
3836 errno = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003837 /* XXX: Should the base argument of strtol() be explicitly set to 10?
3838 XXX(avassalotti): Should this uses PyOS_strtol()? */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003839 x = strtol(s, &endptr, 0);
3840
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003841 if (errno || (*endptr != '\n' && *endptr != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003842 /* Hm, maybe we've got something long. Let's try reading
3843 * it as a Python long object. */
3844 errno = 0;
3845 /* XXX: Same thing about the base here. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003846 value = PyLong_FromString(s, NULL, 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003847 if (value == NULL) {
3848 PyErr_SetString(PyExc_ValueError,
3849 "could not convert string to int");
3850 return -1;
3851 }
3852 }
3853 else {
3854 if (len == 3 && (x == 0 || x == 1)) {
3855 if ((value = PyBool_FromLong(x)) == NULL)
3856 return -1;
3857 }
3858 else {
3859 if ((value = PyLong_FromLong(x)) == NULL)
3860 return -1;
3861 }
3862 }
3863
3864 PDATA_PUSH(self->stack, value, -1);
3865 return 0;
3866}
3867
3868static int
3869load_bool(UnpicklerObject *self, PyObject *boolean)
3870{
3871 assert(boolean == Py_True || boolean == Py_False);
3872 PDATA_APPEND(self->stack, boolean, -1);
3873 return 0;
3874}
3875
3876/* s contains x bytes of a little-endian integer. Return its value as a
3877 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
3878 * int, but when x is 4 it's a signed one. This is an historical source
3879 * of x-platform bugs.
3880 */
3881static long
3882calc_binint(char *bytes, int size)
3883{
3884 unsigned char *s = (unsigned char *)bytes;
3885 int i = size;
3886 long x = 0;
3887
3888 for (i = 0; i < size; i++) {
3889 x |= (long)s[i] << (i * 8);
3890 }
3891
3892 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
3893 * is signed, so on a box with longs bigger than 4 bytes we need
3894 * to extend a BININT's sign bit to the full width.
3895 */
3896 if (SIZEOF_LONG > 4 && size == 4) {
3897 x |= -(x & (1L << 31));
3898 }
3899
3900 return x;
3901}
3902
3903static int
3904load_binintx(UnpicklerObject *self, char *s, int size)
3905{
3906 PyObject *value;
3907 long x;
3908
3909 x = calc_binint(s, size);
3910
3911 if ((value = PyLong_FromLong(x)) == NULL)
3912 return -1;
3913
3914 PDATA_PUSH(self->stack, value, -1);
3915 return 0;
3916}
3917
3918static int
3919load_binint(UnpicklerObject *self)
3920{
3921 char *s;
3922
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003923 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003924 return -1;
3925
3926 return load_binintx(self, s, 4);
3927}
3928
3929static int
3930load_binint1(UnpicklerObject *self)
3931{
3932 char *s;
3933
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003934 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003935 return -1;
3936
3937 return load_binintx(self, s, 1);
3938}
3939
3940static int
3941load_binint2(UnpicklerObject *self)
3942{
3943 char *s;
3944
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003945 if (_Unpickler_Read(self, &s, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003946 return -1;
3947
3948 return load_binintx(self, s, 2);
3949}
3950
3951static int
3952load_long(UnpicklerObject *self)
3953{
3954 PyObject *value;
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00003955 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003956 Py_ssize_t len;
3957
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003958 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003959 return -1;
3960 if (len < 2)
3961 return bad_readline();
3962
Mark Dickinson8dd05142009-01-20 20:43:58 +00003963 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
3964 the 'L' before calling PyLong_FromString. In order to maintain
3965 compatibility with Python 3.0.0, we don't actually *require*
3966 the 'L' to be present. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003967 if (s[len-2] == 'L')
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00003968 s[len-2] = '\0';
Alexandre Vassalottie4bccb72009-01-24 01:47:57 +00003969 /* XXX: Should the base argument explicitly set to 10? */
3970 value = PyLong_FromString(s, NULL, 0);
Mark Dickinson8dd05142009-01-20 20:43:58 +00003971 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003972 return -1;
3973
3974 PDATA_PUSH(self->stack, value, -1);
3975 return 0;
3976}
3977
3978/* 'size' bytes contain the # of bytes of little-endian 256's-complement
3979 * data following.
3980 */
3981static int
3982load_counted_long(UnpicklerObject *self, int size)
3983{
3984 PyObject *value;
3985 char *nbytes;
3986 char *pdata;
3987
3988 assert(size == 1 || size == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003989 if (_Unpickler_Read(self, &nbytes, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003990 return -1;
3991
3992 size = calc_binint(nbytes, size);
3993 if (size < 0) {
3994 /* Corrupt or hostile pickle -- we never write one like this */
3995 PyErr_SetString(UnpicklingError,
3996 "LONG pickle has negative byte count");
3997 return -1;
3998 }
3999
4000 if (size == 0)
4001 value = PyLong_FromLong(0L);
4002 else {
4003 /* Read the raw little-endian bytes and convert. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004004 if (_Unpickler_Read(self, &pdata, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004005 return -1;
4006 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
4007 1 /* little endian */ , 1 /* signed */ );
4008 }
4009 if (value == NULL)
4010 return -1;
4011 PDATA_PUSH(self->stack, value, -1);
4012 return 0;
4013}
4014
4015static int
4016load_float(UnpicklerObject *self)
4017{
4018 PyObject *value;
4019 char *endptr, *s;
4020 Py_ssize_t len;
4021 double d;
4022
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004023 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004024 return -1;
4025 if (len < 2)
4026 return bad_readline();
4027
4028 errno = 0;
Mark Dickinson725bfd82009-05-03 20:33:40 +00004029 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
4030 if (d == -1.0 && PyErr_Occurred())
4031 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004032 if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004033 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
4034 return -1;
4035 }
Mark Dickinson725bfd82009-05-03 20:33:40 +00004036 value = PyFloat_FromDouble(d);
4037 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004038 return -1;
4039
4040 PDATA_PUSH(self->stack, value, -1);
4041 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004042}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004043
4044static int
4045load_binfloat(UnpicklerObject *self)
4046{
4047 PyObject *value;
4048 double x;
4049 char *s;
4050
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004051 if (_Unpickler_Read(self, &s, 8) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004052 return -1;
4053
4054 x = _PyFloat_Unpack8((unsigned char *)s, 0);
4055 if (x == -1.0 && PyErr_Occurred())
4056 return -1;
4057
4058 if ((value = PyFloat_FromDouble(x)) == NULL)
4059 return -1;
4060
4061 PDATA_PUSH(self->stack, value, -1);
4062 return 0;
4063}
4064
4065static int
4066load_string(UnpicklerObject *self)
4067{
4068 PyObject *bytes;
4069 PyObject *str = NULL;
4070 Py_ssize_t len;
4071 char *s, *p;
4072
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004073 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004074 return -1;
4075 if (len < 3)
4076 return bad_readline();
4077 if ((s = strdup(s)) == NULL) {
4078 PyErr_NoMemory();
4079 return -1;
4080 }
4081
4082 /* Strip outermost quotes */
4083 while (s[len - 1] <= ' ')
4084 len--;
4085 if (s[0] == '"' && s[len - 1] == '"') {
4086 s[len - 1] = '\0';
4087 p = s + 1;
4088 len -= 2;
4089 }
4090 else if (s[0] == '\'' && s[len - 1] == '\'') {
4091 s[len - 1] = '\0';
4092 p = s + 1;
4093 len -= 2;
4094 }
4095 else {
4096 free(s);
4097 PyErr_SetString(PyExc_ValueError, "insecure string pickle");
4098 return -1;
4099 }
4100
4101 /* Use the PyBytes API to decode the string, since that is what is used
4102 to encode, and then coerce the result to Unicode. */
4103 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
4104 free(s);
4105 if (bytes == NULL)
4106 return -1;
4107 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
4108 Py_DECREF(bytes);
4109 if (str == NULL)
4110 return -1;
4111
4112 PDATA_PUSH(self->stack, str, -1);
4113 return 0;
4114}
4115
4116static int
4117load_binbytes(UnpicklerObject *self)
4118{
4119 PyObject *bytes;
4120 long x;
4121 char *s;
4122
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004123 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004124 return -1;
4125
4126 x = calc_binint(s, 4);
4127 if (x < 0) {
4128 PyErr_SetString(UnpicklingError,
4129 "BINBYTES pickle has negative byte count");
4130 return -1;
4131 }
4132
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004133 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004134 return -1;
4135 bytes = PyBytes_FromStringAndSize(s, x);
4136 if (bytes == NULL)
4137 return -1;
4138
4139 PDATA_PUSH(self->stack, bytes, -1);
4140 return 0;
4141}
4142
4143static int
4144load_short_binbytes(UnpicklerObject *self)
4145{
4146 PyObject *bytes;
4147 unsigned char x;
4148 char *s;
4149
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004150 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004151 return -1;
4152
4153 x = (unsigned char)s[0];
4154
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004155 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004156 return -1;
4157
4158 bytes = PyBytes_FromStringAndSize(s, x);
4159 if (bytes == NULL)
4160 return -1;
4161
4162 PDATA_PUSH(self->stack, bytes, -1);
4163 return 0;
4164}
4165
4166static int
4167load_binstring(UnpicklerObject *self)
4168{
4169 PyObject *str;
4170 long x;
4171 char *s;
4172
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004173 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004174 return -1;
4175
4176 x = calc_binint(s, 4);
4177 if (x < 0) {
4178 PyErr_SetString(UnpicklingError,
4179 "BINSTRING pickle has negative byte count");
4180 return -1;
4181 }
4182
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004183 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004184 return -1;
4185
4186 /* Convert Python 2.x strings to unicode. */
4187 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4188 if (str == NULL)
4189 return -1;
4190
4191 PDATA_PUSH(self->stack, str, -1);
4192 return 0;
4193}
4194
4195static int
4196load_short_binstring(UnpicklerObject *self)
4197{
4198 PyObject *str;
4199 unsigned char x;
4200 char *s;
4201
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004202 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004203 return -1;
4204
4205 x = (unsigned char)s[0];
4206
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004207 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004208 return -1;
4209
4210 /* Convert Python 2.x strings to unicode. */
4211 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4212 if (str == NULL)
4213 return -1;
4214
4215 PDATA_PUSH(self->stack, str, -1);
4216 return 0;
4217}
4218
4219static int
4220load_unicode(UnpicklerObject *self)
4221{
4222 PyObject *str;
4223 Py_ssize_t len;
4224 char *s;
4225
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004226 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004227 return -1;
4228 if (len < 1)
4229 return bad_readline();
4230
4231 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
4232 if (str == NULL)
4233 return -1;
4234
4235 PDATA_PUSH(self->stack, str, -1);
4236 return 0;
4237}
4238
4239static int
4240load_binunicode(UnpicklerObject *self)
4241{
4242 PyObject *str;
4243 long size;
4244 char *s;
4245
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004246 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004247 return -1;
4248
4249 size = calc_binint(s, 4);
4250 if (size < 0) {
4251 PyErr_SetString(UnpicklingError,
4252 "BINUNICODE pickle has negative byte count");
4253 return -1;
4254 }
4255
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004256 if (_Unpickler_Read(self, &s, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004257 return -1;
4258
Victor Stinner485fb562010-04-13 11:07:24 +00004259 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004260 if (str == NULL)
4261 return -1;
4262
4263 PDATA_PUSH(self->stack, str, -1);
4264 return 0;
4265}
4266
4267static int
4268load_tuple(UnpicklerObject *self)
4269{
4270 PyObject *tuple;
4271 int i;
4272
4273 if ((i = marker(self)) < 0)
4274 return -1;
4275
4276 tuple = Pdata_poptuple(self->stack, i);
4277 if (tuple == NULL)
4278 return -1;
4279 PDATA_PUSH(self->stack, tuple, -1);
4280 return 0;
4281}
4282
4283static int
4284load_counted_tuple(UnpicklerObject *self, int len)
4285{
4286 PyObject *tuple;
4287
4288 tuple = PyTuple_New(len);
4289 if (tuple == NULL)
4290 return -1;
4291
4292 while (--len >= 0) {
4293 PyObject *item;
4294
4295 PDATA_POP(self->stack, item);
4296 if (item == NULL)
4297 return -1;
4298 PyTuple_SET_ITEM(tuple, len, item);
4299 }
4300 PDATA_PUSH(self->stack, tuple, -1);
4301 return 0;
4302}
4303
4304static int
4305load_empty_list(UnpicklerObject *self)
4306{
4307 PyObject *list;
4308
4309 if ((list = PyList_New(0)) == NULL)
4310 return -1;
4311 PDATA_PUSH(self->stack, list, -1);
4312 return 0;
4313}
4314
4315static int
4316load_empty_dict(UnpicklerObject *self)
4317{
4318 PyObject *dict;
4319
4320 if ((dict = PyDict_New()) == NULL)
4321 return -1;
4322 PDATA_PUSH(self->stack, dict, -1);
4323 return 0;
4324}
4325
4326static int
4327load_list(UnpicklerObject *self)
4328{
4329 PyObject *list;
4330 int i;
4331
4332 if ((i = marker(self)) < 0)
4333 return -1;
4334
4335 list = Pdata_poplist(self->stack, i);
4336 if (list == NULL)
4337 return -1;
4338 PDATA_PUSH(self->stack, list, -1);
4339 return 0;
4340}
4341
4342static int
4343load_dict(UnpicklerObject *self)
4344{
4345 PyObject *dict, *key, *value;
4346 int i, j, k;
4347
4348 if ((i = marker(self)) < 0)
4349 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004350 j = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004351
4352 if ((dict = PyDict_New()) == NULL)
4353 return -1;
4354
4355 for (k = i + 1; k < j; k += 2) {
4356 key = self->stack->data[k - 1];
4357 value = self->stack->data[k];
4358 if (PyDict_SetItem(dict, key, value) < 0) {
4359 Py_DECREF(dict);
4360 return -1;
4361 }
4362 }
4363 Pdata_clear(self->stack, i);
4364 PDATA_PUSH(self->stack, dict, -1);
4365 return 0;
4366}
4367
4368static PyObject *
4369instantiate(PyObject *cls, PyObject *args)
4370{
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004371 PyObject *result = NULL;
4372 /* Caller must assure args are a tuple. Normally, args come from
4373 Pdata_poptuple which packs objects from the top of the stack
4374 into a newly created tuple. */
4375 assert(PyTuple_Check(args));
4376 if (Py_SIZE(args) > 0 || !PyType_Check(cls) ||
4377 PyObject_HasAttrString(cls, "__getinitargs__")) {
4378 result = PyObject_CallObject(cls, args);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004379 }
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004380 else {
4381 result = PyObject_CallMethod(cls, "__new__", "O", cls);
4382 }
4383 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004384}
4385
4386static int
4387load_obj(UnpicklerObject *self)
4388{
4389 PyObject *cls, *args, *obj = NULL;
4390 int i;
4391
4392 if ((i = marker(self)) < 0)
4393 return -1;
4394
4395 args = Pdata_poptuple(self->stack, i + 1);
4396 if (args == NULL)
4397 return -1;
4398
4399 PDATA_POP(self->stack, cls);
4400 if (cls) {
4401 obj = instantiate(cls, args);
4402 Py_DECREF(cls);
4403 }
4404 Py_DECREF(args);
4405 if (obj == NULL)
4406 return -1;
4407
4408 PDATA_PUSH(self->stack, obj, -1);
4409 return 0;
4410}
4411
4412static int
4413load_inst(UnpicklerObject *self)
4414{
4415 PyObject *cls = NULL;
4416 PyObject *args = NULL;
4417 PyObject *obj = NULL;
4418 PyObject *module_name;
4419 PyObject *class_name;
4420 Py_ssize_t len;
4421 int i;
4422 char *s;
4423
4424 if ((i = marker(self)) < 0)
4425 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004426 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004427 return -1;
4428 if (len < 2)
4429 return bad_readline();
4430
4431 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
4432 identifiers are permitted in Python 3.0, since the INST opcode is only
4433 supported by older protocols on Python 2.x. */
4434 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
4435 if (module_name == NULL)
4436 return -1;
4437
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004438 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004439 if (len < 2)
4440 return bad_readline();
4441 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004442 if (class_name != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004443 cls = find_class(self, module_name, class_name);
4444 Py_DECREF(class_name);
4445 }
4446 }
4447 Py_DECREF(module_name);
4448
4449 if (cls == NULL)
4450 return -1;
4451
4452 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
4453 obj = instantiate(cls, args);
4454 Py_DECREF(args);
4455 }
4456 Py_DECREF(cls);
4457
4458 if (obj == NULL)
4459 return -1;
4460
4461 PDATA_PUSH(self->stack, obj, -1);
4462 return 0;
4463}
4464
4465static int
4466load_newobj(UnpicklerObject *self)
4467{
4468 PyObject *args = NULL;
4469 PyObject *clsraw = NULL;
4470 PyTypeObject *cls; /* clsraw cast to its true type */
4471 PyObject *obj;
4472
4473 /* Stack is ... cls argtuple, and we want to call
4474 * cls.__new__(cls, *argtuple).
4475 */
4476 PDATA_POP(self->stack, args);
4477 if (args == NULL)
4478 goto error;
4479 if (!PyTuple_Check(args)) {
4480 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
4481 goto error;
4482 }
4483
4484 PDATA_POP(self->stack, clsraw);
4485 cls = (PyTypeObject *)clsraw;
4486 if (cls == NULL)
4487 goto error;
4488 if (!PyType_Check(cls)) {
4489 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4490 "isn't a type object");
4491 goto error;
4492 }
4493 if (cls->tp_new == NULL) {
4494 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4495 "has NULL tp_new");
4496 goto error;
4497 }
4498
4499 /* Call __new__. */
4500 obj = cls->tp_new(cls, args, NULL);
4501 if (obj == NULL)
4502 goto error;
4503
4504 Py_DECREF(args);
4505 Py_DECREF(clsraw);
4506 PDATA_PUSH(self->stack, obj, -1);
4507 return 0;
4508
4509 error:
4510 Py_XDECREF(args);
4511 Py_XDECREF(clsraw);
4512 return -1;
4513}
4514
4515static int
4516load_global(UnpicklerObject *self)
4517{
4518 PyObject *global = NULL;
4519 PyObject *module_name;
4520 PyObject *global_name;
4521 Py_ssize_t len;
4522 char *s;
4523
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004524 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004525 return -1;
4526 if (len < 2)
4527 return bad_readline();
4528 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4529 if (!module_name)
4530 return -1;
4531
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004532 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004533 if (len < 2) {
4534 Py_DECREF(module_name);
4535 return bad_readline();
4536 }
4537 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4538 if (global_name) {
4539 global = find_class(self, module_name, global_name);
4540 Py_DECREF(global_name);
4541 }
4542 }
4543 Py_DECREF(module_name);
4544
4545 if (global == NULL)
4546 return -1;
4547 PDATA_PUSH(self->stack, global, -1);
4548 return 0;
4549}
4550
4551static int
4552load_persid(UnpicklerObject *self)
4553{
4554 PyObject *pid;
4555 Py_ssize_t len;
4556 char *s;
4557
4558 if (self->pers_func) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004559 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004560 return -1;
4561 if (len < 2)
4562 return bad_readline();
4563
4564 pid = PyBytes_FromStringAndSize(s, len - 1);
4565 if (pid == NULL)
4566 return -1;
4567
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004568 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004569 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004570 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004571 if (pid == NULL)
4572 return -1;
4573
4574 PDATA_PUSH(self->stack, pid, -1);
4575 return 0;
4576 }
4577 else {
4578 PyErr_SetString(UnpicklingError,
4579 "A load persistent id instruction was encountered,\n"
4580 "but no persistent_load function was specified.");
4581 return -1;
4582 }
4583}
4584
4585static int
4586load_binpersid(UnpicklerObject *self)
4587{
4588 PyObject *pid;
4589
4590 if (self->pers_func) {
4591 PDATA_POP(self->stack, pid);
4592 if (pid == NULL)
4593 return -1;
4594
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004595 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004596 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004597 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004598 if (pid == NULL)
4599 return -1;
4600
4601 PDATA_PUSH(self->stack, pid, -1);
4602 return 0;
4603 }
4604 else {
4605 PyErr_SetString(UnpicklingError,
4606 "A load persistent id instruction was encountered,\n"
4607 "but no persistent_load function was specified.");
4608 return -1;
4609 }
4610}
4611
4612static int
4613load_pop(UnpicklerObject *self)
4614{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004615 int len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004616
4617 /* Note that we split the (pickle.py) stack into two stacks,
4618 * an object stack and a mark stack. We have to be clever and
4619 * pop the right one. We do this by looking at the top of the
Collin Winter8ca69de2009-05-26 16:53:41 +00004620 * mark stack first, and only signalling a stack underflow if
4621 * the object stack is empty and the mark stack doesn't match
4622 * our expectations.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004623 */
Collin Winter8ca69de2009-05-26 16:53:41 +00004624 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004625 self->num_marks--;
Antoine Pitrou01a15ea2010-01-07 17:57:31 +00004626 } else if (len > 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004627 len--;
4628 Py_DECREF(self->stack->data[len]);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004629 Py_SIZE(self->stack) = len;
Collin Winter8ca69de2009-05-26 16:53:41 +00004630 } else {
4631 return stack_underflow();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004632 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004633 return 0;
4634}
4635
4636static int
4637load_pop_mark(UnpicklerObject *self)
4638{
4639 int i;
4640
4641 if ((i = marker(self)) < 0)
4642 return -1;
4643
4644 Pdata_clear(self->stack, i);
4645
4646 return 0;
4647}
4648
4649static int
4650load_dup(UnpicklerObject *self)
4651{
4652 PyObject *last;
4653 int len;
4654
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004655 if ((len = Py_SIZE(self->stack)) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004656 return stack_underflow();
4657 last = self->stack->data[len - 1];
4658 PDATA_APPEND(self->stack, last, -1);
4659 return 0;
4660}
4661
4662static int
4663load_get(UnpicklerObject *self)
4664{
4665 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004666 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004667 Py_ssize_t len;
4668 char *s;
4669
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004670 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004671 return -1;
4672 if (len < 2)
4673 return bad_readline();
4674
4675 key = PyLong_FromString(s, NULL, 10);
4676 if (key == NULL)
4677 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004678 idx = PyLong_AsSsize_t(key);
4679 if (idx == -1 && PyErr_Occurred()) {
4680 Py_DECREF(key);
4681 return -1;
4682 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004683
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004684 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004685 if (value == NULL) {
4686 if (!PyErr_Occurred())
4687 PyErr_SetObject(PyExc_KeyError, key);
4688 Py_DECREF(key);
4689 return -1;
4690 }
4691 Py_DECREF(key);
4692
4693 PDATA_APPEND(self->stack, value, -1);
4694 return 0;
4695}
4696
4697static int
4698load_binget(UnpicklerObject *self)
4699{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004700 PyObject *value;
4701 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004702 char *s;
4703
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004704 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004705 return -1;
4706
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004707 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004708
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004709 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004710 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004711 PyObject *key = PyLong_FromSsize_t(idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004712 if (!PyErr_Occurred())
4713 PyErr_SetObject(PyExc_KeyError, key);
4714 Py_DECREF(key);
4715 return -1;
4716 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004717
4718 PDATA_APPEND(self->stack, value, -1);
4719 return 0;
4720}
4721
4722static int
4723load_long_binget(UnpicklerObject *self)
4724{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004725 PyObject *value;
4726 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004727 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004728
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004729 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004730 return -1;
4731
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004732 idx = (long)Py_CHARMASK(s[0]);
4733 idx |= (long)Py_CHARMASK(s[1]) << 8;
4734 idx |= (long)Py_CHARMASK(s[2]) << 16;
4735 idx |= (long)Py_CHARMASK(s[3]) << 24;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004736
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004737 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004738 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004739 PyObject *key = PyLong_FromSsize_t(idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004740 if (!PyErr_Occurred())
4741 PyErr_SetObject(PyExc_KeyError, key);
4742 Py_DECREF(key);
4743 return -1;
4744 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004745
4746 PDATA_APPEND(self->stack, value, -1);
4747 return 0;
4748}
4749
4750/* Push an object from the extension registry (EXT[124]). nbytes is
4751 * the number of bytes following the opcode, holding the index (code) value.
4752 */
4753static int
4754load_extension(UnpicklerObject *self, int nbytes)
4755{
4756 char *codebytes; /* the nbytes bytes after the opcode */
4757 long code; /* calc_binint returns long */
4758 PyObject *py_code; /* code as a Python int */
4759 PyObject *obj; /* the object to push */
4760 PyObject *pair; /* (module_name, class_name) */
4761 PyObject *module_name, *class_name;
4762
4763 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004764 if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004765 return -1;
4766 code = calc_binint(codebytes, nbytes);
4767 if (code <= 0) { /* note that 0 is forbidden */
4768 /* Corrupt or hostile pickle. */
4769 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
4770 return -1;
4771 }
4772
4773 /* Look for the code in the cache. */
4774 py_code = PyLong_FromLong(code);
4775 if (py_code == NULL)
4776 return -1;
4777 obj = PyDict_GetItem(extension_cache, py_code);
4778 if (obj != NULL) {
4779 /* Bingo. */
4780 Py_DECREF(py_code);
4781 PDATA_APPEND(self->stack, obj, -1);
4782 return 0;
4783 }
4784
4785 /* Look up the (module_name, class_name) pair. */
4786 pair = PyDict_GetItem(inverted_registry, py_code);
4787 if (pair == NULL) {
4788 Py_DECREF(py_code);
4789 PyErr_Format(PyExc_ValueError, "unregistered extension "
4790 "code %ld", code);
4791 return -1;
4792 }
4793 /* Since the extension registry is manipulable via Python code,
4794 * confirm that pair is really a 2-tuple of strings.
4795 */
4796 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
4797 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
4798 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
4799 Py_DECREF(py_code);
4800 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
4801 "isn't a 2-tuple of strings", code);
4802 return -1;
4803 }
4804 /* Load the object. */
4805 obj = find_class(self, module_name, class_name);
4806 if (obj == NULL) {
4807 Py_DECREF(py_code);
4808 return -1;
4809 }
4810 /* Cache code -> obj. */
4811 code = PyDict_SetItem(extension_cache, py_code, obj);
4812 Py_DECREF(py_code);
4813 if (code < 0) {
4814 Py_DECREF(obj);
4815 return -1;
4816 }
4817 PDATA_PUSH(self->stack, obj, -1);
4818 return 0;
4819}
4820
4821static int
4822load_put(UnpicklerObject *self)
4823{
4824 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004825 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004826 Py_ssize_t len;
4827 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004828
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004829 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004830 return -1;
4831 if (len < 2)
4832 return bad_readline();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004833 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004834 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004835 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004836
4837 key = PyLong_FromString(s, NULL, 10);
4838 if (key == NULL)
4839 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004840 idx = PyLong_AsSsize_t(key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004841 Py_DECREF(key);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004842 if (idx == -1 && PyErr_Occurred())
4843 return -1;
4844
4845 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004846}
4847
4848static int
4849load_binput(UnpicklerObject *self)
4850{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004851 PyObject *value;
4852 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004853 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004854
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004855 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004856 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004857
4858 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004859 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004860 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004861
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004862 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004863
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004864 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004865}
4866
4867static int
4868load_long_binput(UnpicklerObject *self)
4869{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004870 PyObject *value;
4871 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004872 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004873
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004874 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004875 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004876
4877 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004878 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004879 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004880
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004881 idx = (long)Py_CHARMASK(s[0]);
4882 idx |= (long)Py_CHARMASK(s[1]) << 8;
4883 idx |= (long)Py_CHARMASK(s[2]) << 16;
4884 idx |= (long)Py_CHARMASK(s[3]) << 24;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004885
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004886 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004887}
4888
4889static int
4890do_append(UnpicklerObject *self, int x)
4891{
4892 PyObject *value;
4893 PyObject *list;
4894 int len, i;
4895
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004896 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004897 if (x > len || x <= 0)
4898 return stack_underflow();
4899 if (len == x) /* nothing to do */
4900 return 0;
4901
4902 list = self->stack->data[x - 1];
4903
4904 if (PyList_Check(list)) {
4905 PyObject *slice;
4906 Py_ssize_t list_len;
4907
4908 slice = Pdata_poplist(self->stack, x);
4909 if (!slice)
4910 return -1;
4911 list_len = PyList_GET_SIZE(list);
4912 i = PyList_SetSlice(list, list_len, list_len, slice);
4913 Py_DECREF(slice);
4914 return i;
4915 }
4916 else {
4917 PyObject *append_func;
4918
4919 append_func = PyObject_GetAttrString(list, "append");
4920 if (append_func == NULL)
4921 return -1;
4922 for (i = x; i < len; i++) {
4923 PyObject *result;
4924
4925 value = self->stack->data[i];
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004926 result = _Unpickler_FastCall(self, append_func, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004927 if (result == NULL) {
4928 Pdata_clear(self->stack, i + 1);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004929 Py_SIZE(self->stack) = x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004930 return -1;
4931 }
4932 Py_DECREF(result);
4933 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004934 Py_SIZE(self->stack) = x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004935 }
4936
4937 return 0;
4938}
4939
4940static int
4941load_append(UnpicklerObject *self)
4942{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004943 return do_append(self, Py_SIZE(self->stack) - 1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004944}
4945
4946static int
4947load_appends(UnpicklerObject *self)
4948{
4949 return do_append(self, marker(self));
4950}
4951
4952static int
4953do_setitems(UnpicklerObject *self, int x)
4954{
4955 PyObject *value, *key;
4956 PyObject *dict;
4957 int len, i;
4958 int status = 0;
4959
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004960 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004961 if (x > len || x <= 0)
4962 return stack_underflow();
4963 if (len == x) /* nothing to do */
4964 return 0;
4965 if ((len - x) % 2 != 0) {
4966 /* Currupt or hostile pickle -- we never write one like this. */
4967 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
4968 return -1;
4969 }
4970
4971 /* Here, dict does not actually need to be a PyDict; it could be anything
4972 that supports the __setitem__ attribute. */
4973 dict = self->stack->data[x - 1];
4974
4975 for (i = x + 1; i < len; i += 2) {
4976 key = self->stack->data[i - 1];
4977 value = self->stack->data[i];
4978 if (PyObject_SetItem(dict, key, value) < 0) {
4979 status = -1;
4980 break;
4981 }
4982 }
4983
4984 Pdata_clear(self->stack, x);
4985 return status;
4986}
4987
4988static int
4989load_setitem(UnpicklerObject *self)
4990{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004991 return do_setitems(self, Py_SIZE(self->stack) - 2);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004992}
4993
4994static int
4995load_setitems(UnpicklerObject *self)
4996{
4997 return do_setitems(self, marker(self));
4998}
4999
5000static int
5001load_build(UnpicklerObject *self)
5002{
5003 PyObject *state, *inst, *slotstate;
5004 PyObject *setstate;
5005 int status = 0;
5006
5007 /* Stack is ... instance, state. We want to leave instance at
5008 * the stack top, possibly mutated via instance.__setstate__(state).
5009 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005010 if (Py_SIZE(self->stack) < 2)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005011 return stack_underflow();
5012
5013 PDATA_POP(self->stack, state);
5014 if (state == NULL)
5015 return -1;
5016
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005017 inst = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005018
5019 setstate = PyObject_GetAttrString(inst, "__setstate__");
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005020 if (setstate == NULL) {
5021 if (PyErr_ExceptionMatches(PyExc_AttributeError))
5022 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00005023 else {
5024 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005025 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00005026 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005027 }
5028 else {
5029 PyObject *result;
5030
5031 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005032 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Antoine Pitroud79dc622008-09-05 00:03:33 +00005033 reference to state first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005034 result = _Unpickler_FastCall(self, setstate, state);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005035 Py_DECREF(setstate);
5036 if (result == NULL)
5037 return -1;
5038 Py_DECREF(result);
5039 return 0;
5040 }
5041
5042 /* A default __setstate__. First see whether state embeds a
5043 * slot state dict too (a proto 2 addition).
5044 */
5045 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
5046 PyObject *tmp = state;
5047
5048 state = PyTuple_GET_ITEM(tmp, 0);
5049 slotstate = PyTuple_GET_ITEM(tmp, 1);
5050 Py_INCREF(state);
5051 Py_INCREF(slotstate);
5052 Py_DECREF(tmp);
5053 }
5054 else
5055 slotstate = NULL;
5056
5057 /* Set inst.__dict__ from the state dict (if any). */
5058 if (state != Py_None) {
5059 PyObject *dict;
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005060 PyObject *d_key, *d_value;
5061 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005062
5063 if (!PyDict_Check(state)) {
5064 PyErr_SetString(UnpicklingError, "state is not a dictionary");
5065 goto error;
5066 }
5067 dict = PyObject_GetAttrString(inst, "__dict__");
5068 if (dict == NULL)
5069 goto error;
5070
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005071 i = 0;
5072 while (PyDict_Next(state, &i, &d_key, &d_value)) {
5073 /* normally the keys for instance attributes are
5074 interned. we should try to do that here. */
5075 Py_INCREF(d_key);
5076 if (PyUnicode_CheckExact(d_key))
5077 PyUnicode_InternInPlace(&d_key);
5078 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
5079 Py_DECREF(d_key);
5080 goto error;
5081 }
5082 Py_DECREF(d_key);
5083 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005084 Py_DECREF(dict);
5085 }
5086
5087 /* Also set instance attributes from the slotstate dict (if any). */
5088 if (slotstate != NULL) {
5089 PyObject *d_key, *d_value;
5090 Py_ssize_t i;
5091
5092 if (!PyDict_Check(slotstate)) {
5093 PyErr_SetString(UnpicklingError,
5094 "slot state is not a dictionary");
5095 goto error;
5096 }
5097 i = 0;
5098 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
5099 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
5100 goto error;
5101 }
5102 }
5103
5104 if (0) {
5105 error:
5106 status = -1;
5107 }
5108
5109 Py_DECREF(state);
5110 Py_XDECREF(slotstate);
5111 return status;
5112}
5113
5114static int
5115load_mark(UnpicklerObject *self)
5116{
5117
5118 /* Note that we split the (pickle.py) stack into two stacks, an
5119 * object stack and a mark stack. Here we push a mark onto the
5120 * mark stack.
5121 */
5122
5123 if ((self->num_marks + 1) >= self->marks_size) {
5124 size_t alloc;
5125 int *marks;
5126
5127 /* Use the size_t type to check for overflow. */
5128 alloc = ((size_t)self->num_marks << 1) + 20;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00005129 if (alloc > PY_SSIZE_T_MAX ||
5130 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005131 PyErr_NoMemory();
5132 return -1;
5133 }
5134
5135 if (self->marks == NULL)
5136 marks = (int *)PyMem_Malloc(alloc * sizeof(int));
5137 else
5138 marks = (int *)PyMem_Realloc(self->marks, alloc * sizeof(int));
5139 if (marks == NULL) {
5140 PyErr_NoMemory();
5141 return -1;
5142 }
5143 self->marks = marks;
5144 self->marks_size = (Py_ssize_t)alloc;
5145 }
5146
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005147 self->marks[self->num_marks++] = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005148
5149 return 0;
5150}
5151
5152static int
5153load_reduce(UnpicklerObject *self)
5154{
5155 PyObject *callable = NULL;
5156 PyObject *argtup = NULL;
5157 PyObject *obj = NULL;
5158
5159 PDATA_POP(self->stack, argtup);
5160 if (argtup == NULL)
5161 return -1;
5162 PDATA_POP(self->stack, callable);
5163 if (callable) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005164 obj = PyObject_CallObject(callable, argtup);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005165 Py_DECREF(callable);
5166 }
5167 Py_DECREF(argtup);
5168
5169 if (obj == NULL)
5170 return -1;
5171
5172 PDATA_PUSH(self->stack, obj, -1);
5173 return 0;
5174}
5175
5176/* Just raises an error if we don't know the protocol specified. PROTO
5177 * is the first opcode for protocols >= 2.
5178 */
5179static int
5180load_proto(UnpicklerObject *self)
5181{
5182 char *s;
5183 int i;
5184
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005185 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005186 return -1;
5187
5188 i = (unsigned char)s[0];
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005189 if (i <= HIGHEST_PROTOCOL) {
5190 self->proto = i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005191 return 0;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005192 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005193
5194 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
5195 return -1;
5196}
5197
5198static PyObject *
5199load(UnpicklerObject *self)
5200{
5201 PyObject *err;
5202 PyObject *value = NULL;
5203 char *s;
5204
5205 self->num_marks = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005206 if (Py_SIZE(self->stack))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005207 Pdata_clear(self->stack, 0);
5208
5209 /* Convenient macros for the dispatch while-switch loop just below. */
5210#define OP(opcode, load_func) \
5211 case opcode: if (load_func(self) < 0) break; continue;
5212
5213#define OP_ARG(opcode, load_func, arg) \
5214 case opcode: if (load_func(self, (arg)) < 0) break; continue;
5215
5216 while (1) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005217 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005218 break;
5219
5220 switch ((enum opcode)s[0]) {
5221 OP(NONE, load_none)
5222 OP(BININT, load_binint)
5223 OP(BININT1, load_binint1)
5224 OP(BININT2, load_binint2)
5225 OP(INT, load_int)
5226 OP(LONG, load_long)
5227 OP_ARG(LONG1, load_counted_long, 1)
5228 OP_ARG(LONG4, load_counted_long, 4)
5229 OP(FLOAT, load_float)
5230 OP(BINFLOAT, load_binfloat)
5231 OP(BINBYTES, load_binbytes)
5232 OP(SHORT_BINBYTES, load_short_binbytes)
5233 OP(BINSTRING, load_binstring)
5234 OP(SHORT_BINSTRING, load_short_binstring)
5235 OP(STRING, load_string)
5236 OP(UNICODE, load_unicode)
5237 OP(BINUNICODE, load_binunicode)
5238 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
5239 OP_ARG(TUPLE1, load_counted_tuple, 1)
5240 OP_ARG(TUPLE2, load_counted_tuple, 2)
5241 OP_ARG(TUPLE3, load_counted_tuple, 3)
5242 OP(TUPLE, load_tuple)
5243 OP(EMPTY_LIST, load_empty_list)
5244 OP(LIST, load_list)
5245 OP(EMPTY_DICT, load_empty_dict)
5246 OP(DICT, load_dict)
5247 OP(OBJ, load_obj)
5248 OP(INST, load_inst)
5249 OP(NEWOBJ, load_newobj)
5250 OP(GLOBAL, load_global)
5251 OP(APPEND, load_append)
5252 OP(APPENDS, load_appends)
5253 OP(BUILD, load_build)
5254 OP(DUP, load_dup)
5255 OP(BINGET, load_binget)
5256 OP(LONG_BINGET, load_long_binget)
5257 OP(GET, load_get)
5258 OP(MARK, load_mark)
5259 OP(BINPUT, load_binput)
5260 OP(LONG_BINPUT, load_long_binput)
5261 OP(PUT, load_put)
5262 OP(POP, load_pop)
5263 OP(POP_MARK, load_pop_mark)
5264 OP(SETITEM, load_setitem)
5265 OP(SETITEMS, load_setitems)
5266 OP(PERSID, load_persid)
5267 OP(BINPERSID, load_binpersid)
5268 OP(REDUCE, load_reduce)
5269 OP(PROTO, load_proto)
5270 OP_ARG(EXT1, load_extension, 1)
5271 OP_ARG(EXT2, load_extension, 2)
5272 OP_ARG(EXT4, load_extension, 4)
5273 OP_ARG(NEWTRUE, load_bool, Py_True)
5274 OP_ARG(NEWFALSE, load_bool, Py_False)
5275
5276 case STOP:
5277 break;
5278
5279 case '\0':
5280 PyErr_SetNone(PyExc_EOFError);
5281 return NULL;
5282
5283 default:
5284 PyErr_Format(UnpicklingError,
5285 "invalid load key, '%c'.", s[0]);
5286 return NULL;
5287 }
5288
5289 break; /* and we are done! */
5290 }
5291
Antoine Pitrou04248a82010-10-12 20:51:21 +00005292 if (_Unpickler_SkipConsumed(self) < 0)
5293 return NULL;
5294
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005295 /* XXX: It is not clear what this is actually for. */
5296 if ((err = PyErr_Occurred())) {
5297 if (err == PyExc_EOFError) {
5298 PyErr_SetNone(PyExc_EOFError);
5299 }
5300 return NULL;
5301 }
5302
5303 PDATA_POP(self->stack, value);
5304 return value;
5305}
5306
5307PyDoc_STRVAR(Unpickler_load_doc,
5308"load() -> object. Load a pickle."
5309"\n"
5310"Read a pickled object representation from the open file object given in\n"
5311"the constructor, and return the reconstituted object hierarchy specified\n"
5312"therein.\n");
5313
5314static PyObject *
5315Unpickler_load(UnpicklerObject *self)
5316{
5317 /* Check whether the Unpickler was initialized correctly. This prevents
5318 segfaulting if a subclass overridden __init__ with a function that does
5319 not call Unpickler.__init__(). Here, we simply ensure that self->read
5320 is not NULL. */
5321 if (self->read == NULL) {
5322 PyErr_Format(UnpicklingError,
5323 "Unpickler.__init__() was not called by %s.__init__()",
5324 Py_TYPE(self)->tp_name);
5325 return NULL;
5326 }
5327
5328 return load(self);
5329}
5330
5331/* The name of find_class() is misleading. In newer pickle protocols, this
5332 function is used for loading any global (i.e., functions), not just
5333 classes. The name is kept only for backward compatibility. */
5334
5335PyDoc_STRVAR(Unpickler_find_class_doc,
5336"find_class(module_name, global_name) -> object.\n"
5337"\n"
5338"Return an object from a specified module, importing the module if\n"
5339"necessary. Subclasses may override this method (e.g. to restrict\n"
5340"unpickling of arbitrary classes and functions).\n"
5341"\n"
5342"This method is called whenever a class or a function object is\n"
5343"needed. Both arguments passed are str objects.\n");
5344
5345static PyObject *
5346Unpickler_find_class(UnpicklerObject *self, PyObject *args)
5347{
5348 PyObject *global;
5349 PyObject *modules_dict;
5350 PyObject *module;
5351 PyObject *module_name, *global_name;
5352
5353 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
5354 &module_name, &global_name))
5355 return NULL;
5356
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005357 /* Try to map the old names used in Python 2.x to the new ones used in
5358 Python 3.x. We do this only with old pickle protocols and when the
5359 user has not disabled the feature. */
5360 if (self->proto < 3 && self->fix_imports) {
5361 PyObject *key;
5362 PyObject *item;
5363
5364 /* Check if the global (i.e., a function or a class) was renamed
5365 or moved to another module. */
5366 key = PyTuple_Pack(2, module_name, global_name);
5367 if (key == NULL)
5368 return NULL;
5369 item = PyDict_GetItemWithError(name_mapping_2to3, key);
5370 Py_DECREF(key);
5371 if (item) {
5372 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
5373 PyErr_Format(PyExc_RuntimeError,
5374 "_compat_pickle.NAME_MAPPING values should be "
5375 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
5376 return NULL;
5377 }
5378 module_name = PyTuple_GET_ITEM(item, 0);
5379 global_name = PyTuple_GET_ITEM(item, 1);
5380 if (!PyUnicode_Check(module_name) ||
5381 !PyUnicode_Check(global_name)) {
5382 PyErr_Format(PyExc_RuntimeError,
5383 "_compat_pickle.NAME_MAPPING values should be "
5384 "pairs of str, not (%.200s, %.200s)",
5385 Py_TYPE(module_name)->tp_name,
5386 Py_TYPE(global_name)->tp_name);
5387 return NULL;
5388 }
5389 }
5390 else if (PyErr_Occurred()) {
5391 return NULL;
5392 }
5393
5394 /* Check if the module was renamed. */
5395 item = PyDict_GetItemWithError(import_mapping_2to3, module_name);
5396 if (item) {
5397 if (!PyUnicode_Check(item)) {
5398 PyErr_Format(PyExc_RuntimeError,
5399 "_compat_pickle.IMPORT_MAPPING values should be "
5400 "strings, not %.200s", Py_TYPE(item)->tp_name);
5401 return NULL;
5402 }
5403 module_name = item;
5404 }
5405 else if (PyErr_Occurred()) {
5406 return NULL;
5407 }
5408 }
5409
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005410 modules_dict = PySys_GetObject("modules");
5411 if (modules_dict == NULL)
5412 return NULL;
5413
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005414 module = PyDict_GetItemWithError(modules_dict, module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005415 if (module == NULL) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005416 if (PyErr_Occurred())
5417 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005418 module = PyImport_Import(module_name);
5419 if (module == NULL)
5420 return NULL;
5421 global = PyObject_GetAttr(module, global_name);
5422 Py_DECREF(module);
5423 }
5424 else {
5425 global = PyObject_GetAttr(module, global_name);
5426 }
5427 return global;
5428}
5429
5430static struct PyMethodDef Unpickler_methods[] = {
5431 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
5432 Unpickler_load_doc},
5433 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
5434 Unpickler_find_class_doc},
5435 {NULL, NULL} /* sentinel */
5436};
5437
5438static void
5439Unpickler_dealloc(UnpicklerObject *self)
5440{
5441 PyObject_GC_UnTrack((PyObject *)self);
5442 Py_XDECREF(self->readline);
5443 Py_XDECREF(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005444 Py_XDECREF(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005445 Py_XDECREF(self->stack);
5446 Py_XDECREF(self->pers_func);
5447 Py_XDECREF(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005448 if (self->buffer.buf != NULL) {
5449 PyBuffer_Release(&self->buffer);
5450 self->buffer.buf = NULL;
5451 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005452
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005453 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005454 PyMem_Free(self->marks);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005455 PyMem_Free(self->input_line);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005456 free(self->encoding);
5457 free(self->errors);
5458
5459 Py_TYPE(self)->tp_free((PyObject *)self);
5460}
5461
5462static int
5463Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
5464{
5465 Py_VISIT(self->readline);
5466 Py_VISIT(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005467 Py_VISIT(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005468 Py_VISIT(self->stack);
5469 Py_VISIT(self->pers_func);
5470 Py_VISIT(self->arg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005471 return 0;
5472}
5473
5474static int
5475Unpickler_clear(UnpicklerObject *self)
5476{
5477 Py_CLEAR(self->readline);
5478 Py_CLEAR(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005479 Py_CLEAR(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005480 Py_CLEAR(self->stack);
5481 Py_CLEAR(self->pers_func);
5482 Py_CLEAR(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005483 if (self->buffer.buf != NULL) {
5484 PyBuffer_Release(&self->buffer);
5485 self->buffer.buf = NULL;
5486 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005487
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005488 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005489 PyMem_Free(self->marks);
5490 self->marks = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005491 PyMem_Free(self->input_line);
5492 self->input_line = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005493 free(self->encoding);
5494 self->encoding = NULL;
5495 free(self->errors);
5496 self->errors = NULL;
5497
5498 return 0;
5499}
5500
5501PyDoc_STRVAR(Unpickler_doc,
5502"Unpickler(file, *, encoding='ASCII', errors='strict')"
5503"\n"
5504"This takes a binary file for reading a pickle data stream.\n"
5505"\n"
5506"The protocol version of the pickle is detected automatically, so no\n"
5507"proto argument is needed.\n"
5508"\n"
5509"The file-like object must have two methods, a read() method\n"
5510"that takes an integer argument, and a readline() method that\n"
5511"requires no arguments. Both methods should return bytes.\n"
5512"Thus file-like object can be a binary file object opened for\n"
5513"reading, a BytesIO object, or any other custom object that\n"
5514"meets this interface.\n"
5515"\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005516"Optional keyword arguments are *fix_imports*, *encoding* and *errors*,\n"
5517"which are used to control compatiblity support for pickle stream\n"
5518"generated by Python 2.x. If *fix_imports* is True, pickle will try to\n"
5519"map the old Python 2.x names to the new names used in Python 3.x. The\n"
5520"*encoding* and *errors* tell pickle how to decode 8-bit string\n"
5521"instances pickled by Python 2.x; these default to 'ASCII' and\n"
5522"'strict', respectively.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005523
5524static int
5525Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
5526{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005527 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005528 PyObject *file;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005529 PyObject *fix_imports = Py_True;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005530 char *encoding = NULL;
5531 char *errors = NULL;
5532
5533 /* XXX: That is an horrible error message. But, I don't know how to do
5534 better... */
5535 if (Py_SIZE(args) != 1) {
5536 PyErr_Format(PyExc_TypeError,
5537 "%s takes exactly one positional argument (%zd given)",
5538 Py_TYPE(self)->tp_name, Py_SIZE(args));
5539 return -1;
5540 }
5541
5542 /* Arguments parsing needs to be done in the __init__() method to allow
5543 subclasses to define their own __init__() method, which may (or may
5544 not) support Unpickler arguments. However, this means we need to be
5545 extra careful in the other Unpickler methods, since a subclass could
5546 forget to call Unpickler.__init__() thus breaking our internal
5547 invariants. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005548 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:Unpickler", kwlist,
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005549 &file, &fix_imports, &encoding, &errors))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005550 return -1;
5551
5552 /* In case of multiple __init__() calls, clear previous content. */
5553 if (self->read != NULL)
5554 (void)Unpickler_clear(self);
5555
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005556 if (_Unpickler_SetInputStream(self, file) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005557 return -1;
5558
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005559 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005560 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005561
5562 self->fix_imports = PyObject_IsTrue(fix_imports);
5563 if (self->fix_imports == -1)
5564 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005565
5566 if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) {
5567 self->pers_func = PyObject_GetAttrString((PyObject *)self,
5568 "persistent_load");
5569 if (self->pers_func == NULL)
5570 return -1;
5571 }
5572 else {
5573 self->pers_func = NULL;
5574 }
5575
5576 self->stack = (Pdata *)Pdata_New();
5577 if (self->stack == NULL)
5578 return -1;
5579
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005580 self->memo_size = 32;
5581 self->memo = _Unpickler_NewMemo(self->memo_size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005582 if (self->memo == NULL)
5583 return -1;
5584
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005585 self->arg = NULL;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005586 self->proto = 0;
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005587
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005588 return 0;
5589}
5590
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005591/* Define a proxy object for the Unpickler's internal memo object. This is to
5592 * avoid breaking code like:
5593 * unpickler.memo.clear()
5594 * and
5595 * unpickler.memo = saved_memo
5596 * Is this a good idea? Not really, but we don't want to break code that uses
5597 * it. Note that we don't implement the entire mapping API here. This is
5598 * intentional, as these should be treated as black-box implementation details.
5599 *
5600 * We do, however, have to implement pickling/unpickling support because of
5601 * real-world code like cvs2svn.
5602 */
5603
5604typedef struct {
5605 PyObject_HEAD
5606 UnpicklerObject *unpickler;
5607} UnpicklerMemoProxyObject;
5608
5609PyDoc_STRVAR(ump_clear_doc,
5610"memo.clear() -> None. Remove all items from memo.");
5611
5612static PyObject *
5613ump_clear(UnpicklerMemoProxyObject *self)
5614{
5615 _Unpickler_MemoCleanup(self->unpickler);
5616 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
5617 if (self->unpickler->memo == NULL)
5618 return NULL;
5619 Py_RETURN_NONE;
5620}
5621
5622PyDoc_STRVAR(ump_copy_doc,
5623"memo.copy() -> new_memo. Copy the memo to a new object.");
5624
5625static PyObject *
5626ump_copy(UnpicklerMemoProxyObject *self)
5627{
5628 Py_ssize_t i;
5629 PyObject *new_memo = PyDict_New();
5630 if (new_memo == NULL)
5631 return NULL;
5632
5633 for (i = 0; i < self->unpickler->memo_size; i++) {
5634 int status;
5635 PyObject *key, *value;
5636
5637 value = self->unpickler->memo[i];
5638 if (value == NULL)
5639 continue;
5640
5641 key = PyLong_FromSsize_t(i);
5642 if (key == NULL)
5643 goto error;
5644 status = PyDict_SetItem(new_memo, key, value);
5645 Py_DECREF(key);
5646 if (status < 0)
5647 goto error;
5648 }
5649 return new_memo;
5650
5651error:
5652 Py_DECREF(new_memo);
5653 return NULL;
5654}
5655
5656PyDoc_STRVAR(ump_reduce_doc,
5657"memo.__reduce__(). Pickling support.");
5658
5659static PyObject *
5660ump_reduce(UnpicklerMemoProxyObject *self, PyObject *args)
5661{
5662 PyObject *reduce_value;
5663 PyObject *constructor_args;
5664 PyObject *contents = ump_copy(self);
5665 if (contents == NULL)
5666 return NULL;
5667
5668 reduce_value = PyTuple_New(2);
5669 if (reduce_value == NULL) {
5670 Py_DECREF(contents);
5671 return NULL;
5672 }
5673 constructor_args = PyTuple_New(1);
5674 if (constructor_args == NULL) {
5675 Py_DECREF(contents);
5676 Py_DECREF(reduce_value);
5677 return NULL;
5678 }
5679 PyTuple_SET_ITEM(constructor_args, 0, contents);
5680 Py_INCREF((PyObject *)&PyDict_Type);
5681 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
5682 PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
5683 return reduce_value;
5684}
5685
5686static PyMethodDef unpicklerproxy_methods[] = {
5687 {"clear", (PyCFunction)ump_clear, METH_NOARGS, ump_clear_doc},
5688 {"copy", (PyCFunction)ump_copy, METH_NOARGS, ump_copy_doc},
5689 {"__reduce__", (PyCFunction)ump_reduce, METH_VARARGS, ump_reduce_doc},
5690 {NULL, NULL} /* sentinel */
5691};
5692
5693static void
5694UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
5695{
5696 PyObject_GC_UnTrack(self);
5697 Py_XDECREF(self->unpickler);
5698 PyObject_GC_Del((PyObject *)self);
5699}
5700
5701static int
5702UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
5703 visitproc visit, void *arg)
5704{
5705 Py_VISIT(self->unpickler);
5706 return 0;
5707}
5708
5709static int
5710UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
5711{
5712 Py_CLEAR(self->unpickler);
5713 return 0;
5714}
5715
5716static PyTypeObject UnpicklerMemoProxyType = {
5717 PyVarObject_HEAD_INIT(NULL, 0)
5718 "_pickle.UnpicklerMemoProxy", /*tp_name*/
5719 sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/
5720 0,
5721 (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */
5722 0, /* tp_print */
5723 0, /* tp_getattr */
5724 0, /* tp_setattr */
5725 0, /* tp_compare */
5726 0, /* tp_repr */
5727 0, /* tp_as_number */
5728 0, /* tp_as_sequence */
5729 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00005730 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005731 0, /* tp_call */
5732 0, /* tp_str */
5733 PyObject_GenericGetAttr, /* tp_getattro */
5734 PyObject_GenericSetAttr, /* tp_setattro */
5735 0, /* tp_as_buffer */
5736 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5737 0, /* tp_doc */
5738 (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */
5739 (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */
5740 0, /* tp_richcompare */
5741 0, /* tp_weaklistoffset */
5742 0, /* tp_iter */
5743 0, /* tp_iternext */
5744 unpicklerproxy_methods, /* tp_methods */
5745};
5746
5747static PyObject *
5748UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
5749{
5750 UnpicklerMemoProxyObject *self;
5751
5752 self = PyObject_GC_New(UnpicklerMemoProxyObject,
5753 &UnpicklerMemoProxyType);
5754 if (self == NULL)
5755 return NULL;
5756 Py_INCREF(unpickler);
5757 self->unpickler = unpickler;
5758 PyObject_GC_Track(self);
5759 return (PyObject *)self;
5760}
5761
5762/*****************************************************************************/
5763
5764
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005765static PyObject *
5766Unpickler_get_memo(UnpicklerObject *self)
5767{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005768 return UnpicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005769}
5770
5771static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005772Unpickler_set_memo(UnpicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005773{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005774 PyObject **new_memo;
5775 Py_ssize_t new_memo_size = 0;
5776 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005777
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005778 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005779 PyErr_SetString(PyExc_TypeError,
5780 "attribute deletion is not supported");
5781 return -1;
5782 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005783
5784 if (Py_TYPE(obj) == &UnpicklerMemoProxyType) {
5785 UnpicklerObject *unpickler =
5786 ((UnpicklerMemoProxyObject *)obj)->unpickler;
5787
5788 new_memo_size = unpickler->memo_size;
5789 new_memo = _Unpickler_NewMemo(new_memo_size);
5790 if (new_memo == NULL)
5791 return -1;
5792
5793 for (i = 0; i < new_memo_size; i++) {
5794 Py_XINCREF(unpickler->memo[i]);
5795 new_memo[i] = unpickler->memo[i];
5796 }
5797 }
5798 else if (PyDict_Check(obj)) {
5799 Py_ssize_t i = 0;
5800 PyObject *key, *value;
5801
5802 new_memo_size = PyDict_Size(obj);
5803 new_memo = _Unpickler_NewMemo(new_memo_size);
5804 if (new_memo == NULL)
5805 return -1;
5806
5807 while (PyDict_Next(obj, &i, &key, &value)) {
5808 Py_ssize_t idx;
5809 if (!PyLong_Check(key)) {
5810 PyErr_SetString(PyExc_TypeError,
5811 "memo key must be integers");
5812 goto error;
5813 }
5814 idx = PyLong_AsSsize_t(key);
5815 if (idx == -1 && PyErr_Occurred())
5816 goto error;
5817 if (_Unpickler_MemoPut(self, idx, value) < 0)
5818 goto error;
5819 }
5820 }
5821 else {
5822 PyErr_Format(PyExc_TypeError,
5823 "'memo' attribute must be an UnpicklerMemoProxy object"
5824 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005825 return -1;
5826 }
5827
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005828 _Unpickler_MemoCleanup(self);
5829 self->memo_size = new_memo_size;
5830 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005831
5832 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005833
5834 error:
5835 if (new_memo_size) {
5836 i = new_memo_size;
5837 while (--i >= 0) {
5838 Py_XDECREF(new_memo[i]);
5839 }
5840 PyMem_FREE(new_memo);
5841 }
5842 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005843}
5844
5845static PyObject *
5846Unpickler_get_persload(UnpicklerObject *self)
5847{
5848 if (self->pers_func == NULL)
5849 PyErr_SetString(PyExc_AttributeError, "persistent_load");
5850 else
5851 Py_INCREF(self->pers_func);
5852 return self->pers_func;
5853}
5854
5855static int
5856Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
5857{
5858 PyObject *tmp;
5859
5860 if (value == NULL) {
5861 PyErr_SetString(PyExc_TypeError,
5862 "attribute deletion is not supported");
5863 return -1;
5864 }
5865 if (!PyCallable_Check(value)) {
5866 PyErr_SetString(PyExc_TypeError,
5867 "persistent_load must be a callable taking "
5868 "one argument");
5869 return -1;
5870 }
5871
5872 tmp = self->pers_func;
5873 Py_INCREF(value);
5874 self->pers_func = value;
5875 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
5876
5877 return 0;
5878}
5879
5880static PyGetSetDef Unpickler_getsets[] = {
5881 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
5882 {"persistent_load", (getter)Unpickler_get_persload,
5883 (setter)Unpickler_set_persload},
5884 {NULL}
5885};
5886
5887static PyTypeObject Unpickler_Type = {
5888 PyVarObject_HEAD_INIT(NULL, 0)
5889 "_pickle.Unpickler", /*tp_name*/
5890 sizeof(UnpicklerObject), /*tp_basicsize*/
5891 0, /*tp_itemsize*/
5892 (destructor)Unpickler_dealloc, /*tp_dealloc*/
5893 0, /*tp_print*/
5894 0, /*tp_getattr*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005895 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00005896 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005897 0, /*tp_repr*/
5898 0, /*tp_as_number*/
5899 0, /*tp_as_sequence*/
5900 0, /*tp_as_mapping*/
5901 0, /*tp_hash*/
5902 0, /*tp_call*/
5903 0, /*tp_str*/
5904 0, /*tp_getattro*/
5905 0, /*tp_setattro*/
5906 0, /*tp_as_buffer*/
5907 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5908 Unpickler_doc, /*tp_doc*/
5909 (traverseproc)Unpickler_traverse, /*tp_traverse*/
5910 (inquiry)Unpickler_clear, /*tp_clear*/
5911 0, /*tp_richcompare*/
5912 0, /*tp_weaklistoffset*/
5913 0, /*tp_iter*/
5914 0, /*tp_iternext*/
5915 Unpickler_methods, /*tp_methods*/
5916 0, /*tp_members*/
5917 Unpickler_getsets, /*tp_getset*/
5918 0, /*tp_base*/
5919 0, /*tp_dict*/
5920 0, /*tp_descr_get*/
5921 0, /*tp_descr_set*/
5922 0, /*tp_dictoffset*/
5923 (initproc)Unpickler_init, /*tp_init*/
5924 PyType_GenericAlloc, /*tp_alloc*/
5925 PyType_GenericNew, /*tp_new*/
5926 PyObject_GC_Del, /*tp_free*/
5927 0, /*tp_is_gc*/
5928};
5929
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005930PyDoc_STRVAR(pickle_dump_doc,
5931"dump(obj, file, protocol=None, *, fix_imports=True) -> None\n"
5932"\n"
5933"Write a pickled representation of obj to the open file object file. This\n"
5934"is equivalent to ``Pickler(file, protocol).dump(obj)``, but may be more\n"
5935"efficient.\n"
5936"\n"
5937"The optional protocol argument tells the pickler to use the given protocol;\n"
5938"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
5939"backward-incompatible protocol designed for Python 3.0.\n"
5940"\n"
5941"Specifying a negative protocol version selects the highest protocol version\n"
5942"supported. The higher the protocol used, the more recent the version of\n"
5943"Python needed to read the pickle produced.\n"
5944"\n"
5945"The file argument must have a write() method that accepts a single bytes\n"
5946"argument. It can thus be a file object opened for binary writing, a\n"
5947"io.BytesIO instance, or any other custom object that meets this interface.\n"
5948"\n"
5949"If fix_imports is True and protocol is less than 3, pickle will try to\n"
5950"map the new Python 3.x names to the old module names used in Python 2.x,\n"
5951"so that the pickle data stream is readable with Python 2.x.\n");
5952
5953static PyObject *
5954pickle_dump(PyObject *self, PyObject *args, PyObject *kwds)
5955{
5956 static char *kwlist[] = {"obj", "file", "protocol", "fix_imports", 0};
5957 PyObject *obj;
5958 PyObject *file;
5959 PyObject *proto = NULL;
5960 PyObject *fix_imports = Py_True;
5961 PicklerObject *pickler;
5962
5963 /* fix_imports is a keyword-only argument. */
5964 if (Py_SIZE(args) > 3) {
5965 PyErr_Format(PyExc_TypeError,
5966 "pickle.dump() takes at most 3 positional "
5967 "argument (%zd given)", Py_SIZE(args));
5968 return NULL;
5969 }
5970
5971 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|OO:dump", kwlist,
5972 &obj, &file, &proto, &fix_imports))
5973 return NULL;
5974
5975 pickler = _Pickler_New();
5976 if (pickler == NULL)
5977 return NULL;
5978
5979 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
5980 goto error;
5981
5982 if (_Pickler_SetOutputStream(pickler, file) < 0)
5983 goto error;
5984
5985 if (dump(pickler, obj) < 0)
5986 goto error;
5987
5988 if (_Pickler_FlushToFile(pickler) < 0)
5989 goto error;
5990
5991 Py_DECREF(pickler);
5992 Py_RETURN_NONE;
5993
5994 error:
5995 Py_XDECREF(pickler);
5996 return NULL;
5997}
5998
5999PyDoc_STRVAR(pickle_dumps_doc,
6000"dumps(obj, protocol=None, *, fix_imports=True) -> bytes\n"
6001"\n"
6002"Return the pickled representation of the object as a bytes\n"
6003"object, instead of writing it to a file.\n"
6004"\n"
6005"The optional protocol argument tells the pickler to use the given protocol;\n"
6006"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
6007"backward-incompatible protocol designed for Python 3.0.\n"
6008"\n"
6009"Specifying a negative protocol version selects the highest protocol version\n"
6010"supported. The higher the protocol used, the more recent the version of\n"
6011"Python needed to read the pickle produced.\n"
6012"\n"
6013"If fix_imports is True and *protocol* is less than 3, pickle will try to\n"
6014"map the new Python 3.x names to the old module names used in Python 2.x,\n"
6015"so that the pickle data stream is readable with Python 2.x.\n");
6016
6017static PyObject *
6018pickle_dumps(PyObject *self, PyObject *args, PyObject *kwds)
6019{
6020 static char *kwlist[] = {"obj", "protocol", "fix_imports", 0};
6021 PyObject *obj;
6022 PyObject *proto = NULL;
6023 PyObject *result;
6024 PyObject *fix_imports = Py_True;
6025 PicklerObject *pickler;
6026
6027 /* fix_imports is a keyword-only argument. */
6028 if (Py_SIZE(args) > 2) {
6029 PyErr_Format(PyExc_TypeError,
6030 "pickle.dumps() takes at most 2 positional "
6031 "argument (%zd given)", Py_SIZE(args));
6032 return NULL;
6033 }
6034
6035 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:dumps", kwlist,
6036 &obj, &proto, &fix_imports))
6037 return NULL;
6038
6039 pickler = _Pickler_New();
6040 if (pickler == NULL)
6041 return NULL;
6042
6043 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
6044 goto error;
6045
6046 if (dump(pickler, obj) < 0)
6047 goto error;
6048
6049 result = _Pickler_GetString(pickler);
6050 Py_DECREF(pickler);
6051 return result;
6052
6053 error:
6054 Py_XDECREF(pickler);
6055 return NULL;
6056}
6057
6058PyDoc_STRVAR(pickle_load_doc,
6059"load(file, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6060"\n"
6061"Read a pickled object representation from the open file object file and\n"
6062"return the reconstituted object hierarchy specified therein. This is\n"
6063"equivalent to ``Unpickler(file).load()``, but may be more efficient.\n"
6064"\n"
6065"The protocol version of the pickle is detected automatically, so no protocol\n"
6066"argument is needed. Bytes past the pickled object's representation are\n"
6067"ignored.\n"
6068"\n"
6069"The argument file must have two methods, a read() method that takes an\n"
6070"integer argument, and a readline() method that requires no arguments. Both\n"
6071"methods should return bytes. Thus *file* can be a binary file object opened\n"
6072"for reading, a BytesIO object, or any other custom object that meets this\n"
6073"interface.\n"
6074"\n"
6075"Optional keyword arguments are fix_imports, encoding and errors,\n"
6076"which are used to control compatiblity support for pickle stream generated\n"
6077"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6078"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6079"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6080"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6081
6082static PyObject *
6083pickle_load(PyObject *self, PyObject *args, PyObject *kwds)
6084{
6085 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
6086 PyObject *file;
6087 PyObject *fix_imports = Py_True;
6088 PyObject *result;
6089 char *encoding = NULL;
6090 char *errors = NULL;
6091 UnpicklerObject *unpickler;
6092
6093 /* fix_imports, encoding and errors are a keyword-only argument. */
6094 if (Py_SIZE(args) != 1) {
6095 PyErr_Format(PyExc_TypeError,
6096 "pickle.load() takes exactly one positional "
6097 "argument (%zd given)", Py_SIZE(args));
6098 return NULL;
6099 }
6100
6101 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:load", kwlist,
6102 &file, &fix_imports, &encoding, &errors))
6103 return NULL;
6104
6105 unpickler = _Unpickler_New();
6106 if (unpickler == NULL)
6107 return NULL;
6108
6109 if (_Unpickler_SetInputStream(unpickler, file) < 0)
6110 goto error;
6111
6112 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6113 goto error;
6114
6115 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6116 if (unpickler->fix_imports == -1)
6117 goto error;
6118
6119 result = load(unpickler);
6120 Py_DECREF(unpickler);
6121 return result;
6122
6123 error:
6124 Py_XDECREF(unpickler);
6125 return NULL;
6126}
6127
6128PyDoc_STRVAR(pickle_loads_doc,
6129"loads(input, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6130"\n"
6131"Read a pickled object hierarchy from a bytes object and return the\n"
6132"reconstituted object hierarchy specified therein\n"
6133"\n"
6134"The protocol version of the pickle is detected automatically, so no protocol\n"
6135"argument is needed. Bytes past the pickled object's representation are\n"
6136"ignored.\n"
6137"\n"
6138"Optional keyword arguments are fix_imports, encoding and errors, which\n"
6139"are used to control compatiblity support for pickle stream generated\n"
6140"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6141"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6142"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6143"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6144
6145static PyObject *
6146pickle_loads(PyObject *self, PyObject *args, PyObject *kwds)
6147{
6148 static char *kwlist[] = {"input", "fix_imports", "encoding", "errors", 0};
6149 PyObject *input;
6150 PyObject *fix_imports = Py_True;
6151 PyObject *result;
6152 char *encoding = NULL;
6153 char *errors = NULL;
6154 UnpicklerObject *unpickler;
6155
6156 /* fix_imports, encoding and errors are a keyword-only argument. */
6157 if (Py_SIZE(args) != 1) {
6158 PyErr_Format(PyExc_TypeError,
6159 "pickle.loads() takes exactly one positional "
6160 "argument (%zd given)", Py_SIZE(args));
6161 return NULL;
6162 }
6163
6164 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:loads", kwlist,
6165 &input, &fix_imports, &encoding, &errors))
6166 return NULL;
6167
6168 unpickler = _Unpickler_New();
6169 if (unpickler == NULL)
6170 return NULL;
6171
6172 if (_Unpickler_SetStringInput(unpickler, input) < 0)
6173 goto error;
6174
6175 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6176 goto error;
6177
6178 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6179 if (unpickler->fix_imports == -1)
6180 goto error;
6181
6182 result = load(unpickler);
6183 Py_DECREF(unpickler);
6184 return result;
6185
6186 error:
6187 Py_XDECREF(unpickler);
6188 return NULL;
6189}
6190
6191
6192static struct PyMethodDef pickle_methods[] = {
6193 {"dump", (PyCFunction)pickle_dump, METH_VARARGS|METH_KEYWORDS,
6194 pickle_dump_doc},
6195 {"dumps", (PyCFunction)pickle_dumps, METH_VARARGS|METH_KEYWORDS,
6196 pickle_dumps_doc},
6197 {"load", (PyCFunction)pickle_load, METH_VARARGS|METH_KEYWORDS,
6198 pickle_load_doc},
6199 {"loads", (PyCFunction)pickle_loads, METH_VARARGS|METH_KEYWORDS,
6200 pickle_loads_doc},
6201 {NULL, NULL} /* sentinel */
6202};
6203
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006204static int
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006205initmodule(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006206{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006207 PyObject *copyreg = NULL;
6208 PyObject *compat_pickle = NULL;
6209
6210 /* XXX: We should ensure that the types of the dictionaries imported are
6211 exactly PyDict objects. Otherwise, it is possible to crash the pickle
6212 since we use the PyDict API directly to access these dictionaries. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006213
6214 copyreg = PyImport_ImportModule("copyreg");
6215 if (!copyreg)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006216 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006217 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
6218 if (!dispatch_table)
6219 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006220 extension_registry = \
6221 PyObject_GetAttrString(copyreg, "_extension_registry");
6222 if (!extension_registry)
6223 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006224 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
6225 if (!inverted_registry)
6226 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006227 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
6228 if (!extension_cache)
6229 goto error;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006230 Py_CLEAR(copyreg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006231
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006232 /* Load the 2.x -> 3.x stdlib module mapping tables */
6233 compat_pickle = PyImport_ImportModule("_compat_pickle");
6234 if (!compat_pickle)
6235 goto error;
6236 name_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
6237 if (!name_mapping_2to3)
6238 goto error;
6239 if (!PyDict_CheckExact(name_mapping_2to3)) {
6240 PyErr_Format(PyExc_RuntimeError,
6241 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
6242 Py_TYPE(name_mapping_2to3)->tp_name);
6243 goto error;
6244 }
6245 import_mapping_2to3 = PyObject_GetAttrString(compat_pickle,
6246 "IMPORT_MAPPING");
6247 if (!import_mapping_2to3)
6248 goto error;
6249 if (!PyDict_CheckExact(import_mapping_2to3)) {
6250 PyErr_Format(PyExc_RuntimeError,
6251 "_compat_pickle.IMPORT_MAPPING should be a dict, "
6252 "not %.200s", Py_TYPE(import_mapping_2to3)->tp_name);
6253 goto error;
6254 }
6255 /* ... and the 3.x -> 2.x mapping tables */
6256 name_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6257 "REVERSE_NAME_MAPPING");
6258 if (!name_mapping_3to2)
6259 goto error;
6260 if (!PyDict_CheckExact(name_mapping_3to2)) {
6261 PyErr_Format(PyExc_RuntimeError,
Ezio Melotti13925002011-03-16 11:05:33 +02006262 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006263 "not %.200s", Py_TYPE(name_mapping_3to2)->tp_name);
6264 goto error;
6265 }
6266 import_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6267 "REVERSE_IMPORT_MAPPING");
6268 if (!import_mapping_3to2)
6269 goto error;
6270 if (!PyDict_CheckExact(import_mapping_3to2)) {
6271 PyErr_Format(PyExc_RuntimeError,
6272 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
6273 "not %.200s", Py_TYPE(import_mapping_3to2)->tp_name);
6274 goto error;
6275 }
6276 Py_CLEAR(compat_pickle);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006277
6278 empty_tuple = PyTuple_New(0);
6279 if (empty_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006280 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006281 two_tuple = PyTuple_New(2);
6282 if (two_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006283 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006284 /* We use this temp container with no regard to refcounts, or to
6285 * keeping containees alive. Exempt from GC, because we don't
6286 * want anything looking at two_tuple() by magic.
6287 */
6288 PyObject_GC_UnTrack(two_tuple);
6289
6290 return 0;
6291
6292 error:
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006293 Py_CLEAR(copyreg);
6294 Py_CLEAR(dispatch_table);
6295 Py_CLEAR(extension_registry);
6296 Py_CLEAR(inverted_registry);
6297 Py_CLEAR(extension_cache);
6298 Py_CLEAR(compat_pickle);
6299 Py_CLEAR(name_mapping_2to3);
6300 Py_CLEAR(import_mapping_2to3);
6301 Py_CLEAR(name_mapping_3to2);
6302 Py_CLEAR(import_mapping_3to2);
6303 Py_CLEAR(empty_tuple);
6304 Py_CLEAR(two_tuple);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006305 return -1;
6306}
6307
6308static struct PyModuleDef _picklemodule = {
6309 PyModuleDef_HEAD_INIT,
6310 "_pickle",
6311 pickle_module_doc,
6312 -1,
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006313 pickle_methods,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006314 NULL,
6315 NULL,
6316 NULL,
6317 NULL
6318};
6319
6320PyMODINIT_FUNC
6321PyInit__pickle(void)
6322{
6323 PyObject *m;
6324
6325 if (PyType_Ready(&Unpickler_Type) < 0)
6326 return NULL;
6327 if (PyType_Ready(&Pickler_Type) < 0)
6328 return NULL;
6329 if (PyType_Ready(&Pdata_Type) < 0)
6330 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006331 if (PyType_Ready(&PicklerMemoProxyType) < 0)
6332 return NULL;
6333 if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
6334 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006335
6336 /* Create the module and add the functions. */
6337 m = PyModule_Create(&_picklemodule);
6338 if (m == NULL)
6339 return NULL;
6340
6341 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
6342 return NULL;
6343 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
6344 return NULL;
6345
6346 /* Initialize the exceptions. */
6347 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
6348 if (PickleError == NULL)
6349 return NULL;
6350 PicklingError = \
6351 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
6352 if (PicklingError == NULL)
6353 return NULL;
6354 UnpicklingError = \
6355 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
6356 if (UnpicklingError == NULL)
6357 return NULL;
6358
6359 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
6360 return NULL;
6361 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
6362 return NULL;
6363 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
6364 return NULL;
6365
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006366 if (initmodule() < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006367 return NULL;
6368
6369 return m;
6370}