blob: adc35f18bd41863522e207d042e175b92e33af57 [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000013/* Pickle opcodes. These must be kept updated with pickle.py.
14 Extensive docs are in pickletools.py. */
15enum opcode {
16 MARK = '(',
17 STOP = '.',
18 POP = '0',
19 POP_MARK = '1',
20 DUP = '2',
21 FLOAT = 'F',
22 INT = 'I',
23 BININT = 'J',
24 BININT1 = 'K',
25 LONG = 'L',
26 BININT2 = 'M',
27 NONE = 'N',
28 PERSID = 'P',
29 BINPERSID = 'Q',
30 REDUCE = 'R',
31 STRING = 'S',
32 BINSTRING = 'T',
33 SHORT_BINSTRING = 'U',
34 UNICODE = 'V',
35 BINUNICODE = 'X',
36 APPEND = 'a',
37 BUILD = 'b',
38 GLOBAL = 'c',
39 DICT = 'd',
40 EMPTY_DICT = '}',
41 APPENDS = 'e',
42 GET = 'g',
43 BINGET = 'h',
44 INST = 'i',
45 LONG_BINGET = 'j',
46 LIST = 'l',
47 EMPTY_LIST = ']',
48 OBJ = 'o',
49 PUT = 'p',
50 BINPUT = 'q',
51 LONG_BINPUT = 'r',
52 SETITEM = 's',
53 TUPLE = 't',
54 EMPTY_TUPLE = ')',
55 SETITEMS = 'u',
56 BINFLOAT = 'G',
57
58 /* Protocol 2. */
59 PROTO = '\x80',
60 NEWOBJ = '\x81',
61 EXT1 = '\x82',
62 EXT2 = '\x83',
63 EXT4 = '\x84',
64 TUPLE1 = '\x85',
65 TUPLE2 = '\x86',
66 TUPLE3 = '\x87',
67 NEWTRUE = '\x88',
68 NEWFALSE = '\x89',
69 LONG1 = '\x8a',
70 LONG4 = '\x8b',
71
72 /* Protocol 3 (Python 3.x) */
73 BINBYTES = 'B',
Victor Stinner132ef6c2010-11-09 09:39:41 +000074 SHORT_BINBYTES = 'C'
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000075};
76
77/* These aren't opcodes -- they're ways to pickle bools before protocol 2
78 * so that unpicklers written before bools were introduced unpickle them
79 * as ints, but unpicklers after can recognize that bools were intended.
80 * Note that protocol 2 added direct ways to pickle bools.
81 */
82#undef TRUE
83#define TRUE "I01\n"
84#undef FALSE
85#define FALSE "I00\n"
86
87enum {
88 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
89 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
90 break if this gets out of synch with pickle.py, but it's unclear that would
91 help anything either. */
92 BATCHSIZE = 1000,
93
94 /* Nesting limit until Pickler, when running in "fast mode", starts
95 checking for self-referential data-structures. */
96 FAST_NESTING_LIMIT = 50,
97
Antoine Pitrouea99c5c2010-09-09 18:33:21 +000098 /* Initial size of the write buffer of Pickler. */
99 WRITE_BUF_SIZE = 4096,
100
101 /* Maximum size of the write buffer of Pickler when pickling to a
102 stream. This is ignored for in-memory pickling. */
103 MAX_WRITE_BUF_SIZE = 64 * 1024,
Antoine Pitrou04248a82010-10-12 20:51:21 +0000104
105 /* Prefetch size when unpickling (disabled on unpeekable streams) */
Victor Stinner132ef6c2010-11-09 09:39:41 +0000106 PREFETCH = 8192 * 16
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000107};
108
109/* Exception classes for pickle. These should override the ones defined in
110 pickle.py, when the C-optimized Pickler and Unpickler are used. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000111static PyObject *PickleError = NULL;
112static PyObject *PicklingError = NULL;
113static PyObject *UnpicklingError = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000114
115/* copyreg.dispatch_table, {type_object: pickling_function} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000116static PyObject *dispatch_table = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000117/* For EXT[124] opcodes. */
118/* copyreg._extension_registry, {(module_name, function_name): code} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000119static PyObject *extension_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000120/* copyreg._inverted_registry, {code: (module_name, function_name)} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000121static PyObject *inverted_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000122/* copyreg._extension_cache, {code: object} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000123static PyObject *extension_cache = NULL;
124
125/* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */
126static PyObject *name_mapping_2to3 = NULL;
127/* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
128static PyObject *import_mapping_2to3 = NULL;
129/* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
130static PyObject *name_mapping_3to2 = NULL;
131static PyObject *import_mapping_3to2 = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000132
133/* XXX: Are these really nescessary? */
134/* As the name says, an empty tuple. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000135static PyObject *empty_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000136/* For looking up name pairs in copyreg._extension_registry. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000137static PyObject *two_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000138
139static int
140stack_underflow(void)
141{
142 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
143 return -1;
144}
145
146/* Internal data type used as the unpickling stack. */
147typedef struct {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000148 PyObject_VAR_HEAD
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000149 PyObject **data;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000150 Py_ssize_t allocated; /* number of slots in data allocated */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000151} Pdata;
152
153static void
154Pdata_dealloc(Pdata *self)
155{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000156 int i = Py_SIZE(self);
157 while (--i >= 0) {
158 Py_DECREF(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000159 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000160 PyMem_FREE(self->data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000161 PyObject_Del(self);
162}
163
164static PyTypeObject Pdata_Type = {
165 PyVarObject_HEAD_INIT(NULL, 0)
166 "_pickle.Pdata", /*tp_name*/
167 sizeof(Pdata), /*tp_basicsize*/
168 0, /*tp_itemsize*/
169 (destructor)Pdata_dealloc, /*tp_dealloc*/
170};
171
172static PyObject *
173Pdata_New(void)
174{
175 Pdata *self;
176
177 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
178 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000179 Py_SIZE(self) = 0;
180 self->allocated = 8;
181 self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000182 if (self->data)
183 return (PyObject *)self;
184 Py_DECREF(self);
185 return PyErr_NoMemory();
186}
187
188
189/* Retain only the initial clearto items. If clearto >= the current
190 * number of items, this is a (non-erroneous) NOP.
191 */
192static int
193Pdata_clear(Pdata *self, int clearto)
194{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000195 int i = Py_SIZE(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000196
197 if (clearto < 0)
198 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000199 if (clearto >= i)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000200 return 0;
201
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000202 while (--i >= clearto) {
203 Py_CLEAR(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000204 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000205 Py_SIZE(self) = clearto;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000206 return 0;
207}
208
209static int
210Pdata_grow(Pdata *self)
211{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000212 PyObject **data = self->data;
213 Py_ssize_t allocated = self->allocated;
214 Py_ssize_t new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000215
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000216 new_allocated = (allocated >> 3) + 6;
217 /* check for integer overflow */
218 if (new_allocated > PY_SSIZE_T_MAX - allocated)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000219 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000220 new_allocated += allocated;
221 if (new_allocated > (PY_SSIZE_T_MAX / sizeof(PyObject *)))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000222 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000223 data = PyMem_REALLOC(data, new_allocated * sizeof(PyObject *));
224 if (data == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000225 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000226
227 self->data = data;
228 self->allocated = new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000229 return 0;
230
231 nomemory:
232 PyErr_NoMemory();
233 return -1;
234}
235
236/* D is a Pdata*. Pop the topmost element and store it into V, which
237 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
238 * is raised and V is set to NULL.
239 */
240static PyObject *
241Pdata_pop(Pdata *self)
242{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000243 if (Py_SIZE(self) == 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000244 PyErr_SetString(UnpicklingError, "bad pickle data");
245 return NULL;
246 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000247 return self->data[--Py_SIZE(self)];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000248}
249#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
250
251static int
252Pdata_push(Pdata *self, PyObject *obj)
253{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000254 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000255 return -1;
256 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000257 self->data[Py_SIZE(self)++] = obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000258 return 0;
259}
260
261/* Push an object on stack, transferring its ownership to the stack. */
262#define PDATA_PUSH(D, O, ER) do { \
263 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
264
265/* Push an object on stack, adding a new reference to the object. */
266#define PDATA_APPEND(D, O, ER) do { \
267 Py_INCREF((O)); \
268 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
269
270static PyObject *
271Pdata_poptuple(Pdata *self, Py_ssize_t start)
272{
273 PyObject *tuple;
274 Py_ssize_t len, i, j;
275
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000276 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000277 tuple = PyTuple_New(len);
278 if (tuple == NULL)
279 return NULL;
280 for (i = start, j = 0; j < len; i++, j++)
281 PyTuple_SET_ITEM(tuple, j, self->data[i]);
282
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000283 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000284 return tuple;
285}
286
287static PyObject *
288Pdata_poplist(Pdata *self, Py_ssize_t start)
289{
290 PyObject *list;
291 Py_ssize_t len, i, j;
292
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000293 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000294 list = PyList_New(len);
295 if (list == NULL)
296 return NULL;
297 for (i = start, j = 0; j < len; i++, j++)
298 PyList_SET_ITEM(list, j, self->data[i]);
299
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000300 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000301 return list;
302}
303
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000304typedef struct {
305 PyObject *me_key;
306 long me_value;
307} PyMemoEntry;
308
309typedef struct {
310 Py_ssize_t mt_mask;
311 Py_ssize_t mt_used;
312 Py_ssize_t mt_allocated;
313 PyMemoEntry *mt_table;
314} PyMemoTable;
315
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000316typedef struct PicklerObject {
317 PyObject_HEAD
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000318 PyMemoTable *memo; /* Memo table, keep track of the seen
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000319 objects to support self-referential objects
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000320 pickling. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000321 PyObject *pers_func; /* persistent_id() method, can be NULL */
322 PyObject *arg;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000323
324 PyObject *write; /* write() method of the output stream. */
325 PyObject *output_buffer; /* Write into a local bytearray buffer before
326 flushing to the stream. */
327 Py_ssize_t output_len; /* Length of output_buffer. */
328 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000329 int proto; /* Pickle protocol number, >= 0 */
330 int bin; /* Boolean, true if proto > 0 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000331 int buf_size; /* Size of the current buffered pickle data */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000332 int fast; /* Enable fast mode if set to a true value.
333 The fast mode disable the usage of memo,
334 therefore speeding the pickling process by
335 not generating superfluous PUT opcodes. It
336 should not be used if with self-referential
337 objects. */
338 int fast_nesting;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000339 int fix_imports; /* Indicate whether Pickler should fix
340 the name of globals for Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000341 PyObject *fast_memo;
342} PicklerObject;
343
344typedef struct UnpicklerObject {
345 PyObject_HEAD
346 Pdata *stack; /* Pickle data stack, store unpickled objects. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000347
348 /* The unpickler memo is just an array of PyObject *s. Using a dict
349 is unnecessary, since the keys are contiguous ints. */
350 PyObject **memo;
351 Py_ssize_t memo_size;
352
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000353 PyObject *arg;
354 PyObject *pers_func; /* persistent_load() method, can be NULL. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000355
356 Py_buffer buffer;
357 char *input_buffer;
358 char *input_line;
359 Py_ssize_t input_len;
360 Py_ssize_t next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000361 Py_ssize_t prefetched_idx; /* index of first prefetched byte */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000362 PyObject *read; /* read() method of the input stream. */
363 PyObject *readline; /* readline() method of the input stream. */
Antoine Pitrou04248a82010-10-12 20:51:21 +0000364 PyObject *peek; /* peek() method of the input stream, or NULL */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000365
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000366 char *encoding; /* Name of the encoding to be used for
367 decoding strings pickled using Python
368 2.x. The default value is "ASCII" */
369 char *errors; /* Name of errors handling scheme to used when
370 decoding strings. The default value is
371 "strict". */
372 int *marks; /* Mark stack, used for unpickling container
373 objects. */
374 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
375 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000376 int proto; /* Protocol of the pickle loaded. */
377 int fix_imports; /* Indicate whether Unpickler should fix
378 the name of globals pickled by Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000379} UnpicklerObject;
380
381/* Forward declarations */
382static int save(PicklerObject *, PyObject *, int);
383static int save_reduce(PicklerObject *, PyObject *, PyObject *);
384static PyTypeObject Pickler_Type;
385static PyTypeObject Unpickler_Type;
386
387
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000388/*************************************************************************
389 A custom hashtable mapping void* to longs. This is used by the pickler for
390 memoization. Using a custom hashtable rather than PyDict allows us to skip
391 a bunch of unnecessary object creation. This makes a huge performance
392 difference. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000393
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000394#define MT_MINSIZE 8
395#define PERTURB_SHIFT 5
396
397
398static PyMemoTable *
399PyMemoTable_New(void)
400{
401 PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
402 if (memo == NULL) {
403 PyErr_NoMemory();
404 return NULL;
405 }
406
407 memo->mt_used = 0;
408 memo->mt_allocated = MT_MINSIZE;
409 memo->mt_mask = MT_MINSIZE - 1;
410 memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
411 if (memo->mt_table == NULL) {
412 PyMem_FREE(memo);
413 PyErr_NoMemory();
414 return NULL;
415 }
416 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
417
418 return memo;
419}
420
421static PyMemoTable *
422PyMemoTable_Copy(PyMemoTable *self)
423{
424 Py_ssize_t i;
425 PyMemoTable *new = PyMemoTable_New();
426 if (new == NULL)
427 return NULL;
428
429 new->mt_used = self->mt_used;
430 new->mt_allocated = self->mt_allocated;
431 new->mt_mask = self->mt_mask;
432 /* The table we get from _New() is probably smaller than we wanted.
433 Free it and allocate one that's the right size. */
434 PyMem_FREE(new->mt_table);
435 new->mt_table = PyMem_MALLOC(self->mt_allocated * sizeof(PyMemoEntry));
436 if (new->mt_table == NULL) {
437 PyMem_FREE(new);
438 return NULL;
439 }
440 for (i = 0; i < self->mt_allocated; i++) {
441 Py_XINCREF(self->mt_table[i].me_key);
442 }
443 memcpy(new->mt_table, self->mt_table,
444 sizeof(PyMemoEntry) * self->mt_allocated);
445
446 return new;
447}
448
449static Py_ssize_t
450PyMemoTable_Size(PyMemoTable *self)
451{
452 return self->mt_used;
453}
454
455static int
456PyMemoTable_Clear(PyMemoTable *self)
457{
458 Py_ssize_t i = self->mt_allocated;
459
460 while (--i >= 0) {
461 Py_XDECREF(self->mt_table[i].me_key);
462 }
463 self->mt_used = 0;
464 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
465 return 0;
466}
467
468static void
469PyMemoTable_Del(PyMemoTable *self)
470{
471 if (self == NULL)
472 return;
473 PyMemoTable_Clear(self);
474
475 PyMem_FREE(self->mt_table);
476 PyMem_FREE(self);
477}
478
479/* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
480 can be considerably simpler than dictobject.c's lookdict(). */
481static PyMemoEntry *
482_PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
483{
484 size_t i;
485 size_t perturb;
486 size_t mask = (size_t)self->mt_mask;
487 PyMemoEntry *table = self->mt_table;
488 PyMemoEntry *entry;
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000489 Py_hash_t hash = (Py_hash_t)key >> 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000490
491 i = hash & mask;
492 entry = &table[i];
493 if (entry->me_key == NULL || entry->me_key == key)
494 return entry;
495
496 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
497 i = (i << 2) + i + perturb + 1;
498 entry = &table[i & mask];
499 if (entry->me_key == NULL || entry->me_key == key)
500 return entry;
501 }
502 assert(0); /* Never reached */
503 return NULL;
504}
505
506/* Returns -1 on failure, 0 on success. */
507static int
508_PyMemoTable_ResizeTable(PyMemoTable *self, Py_ssize_t min_size)
509{
510 PyMemoEntry *oldtable = NULL;
511 PyMemoEntry *oldentry, *newentry;
512 Py_ssize_t new_size = MT_MINSIZE;
513 Py_ssize_t to_process;
514
515 assert(min_size > 0);
516
517 /* Find the smallest valid table size >= min_size. */
518 while (new_size < min_size && new_size > 0)
519 new_size <<= 1;
520 if (new_size <= 0) {
521 PyErr_NoMemory();
522 return -1;
523 }
524 /* new_size needs to be a power of two. */
525 assert((new_size & (new_size - 1)) == 0);
526
527 /* Allocate new table. */
528 oldtable = self->mt_table;
529 self->mt_table = PyMem_MALLOC(new_size * sizeof(PyMemoEntry));
530 if (self->mt_table == NULL) {
531 PyMem_FREE(oldtable);
532 PyErr_NoMemory();
533 return -1;
534 }
535 self->mt_allocated = new_size;
536 self->mt_mask = new_size - 1;
537 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
538
539 /* Copy entries from the old table. */
540 to_process = self->mt_used;
541 for (oldentry = oldtable; to_process > 0; oldentry++) {
542 if (oldentry->me_key != NULL) {
543 to_process--;
544 /* newentry is a pointer to a chunk of the new
545 mt_table, so we're setting the key:value pair
546 in-place. */
547 newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
548 newentry->me_key = oldentry->me_key;
549 newentry->me_value = oldentry->me_value;
550 }
551 }
552
553 /* Deallocate the old table. */
554 PyMem_FREE(oldtable);
555 return 0;
556}
557
558/* Returns NULL on failure, a pointer to the value otherwise. */
559static long *
560PyMemoTable_Get(PyMemoTable *self, PyObject *key)
561{
562 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
563 if (entry->me_key == NULL)
564 return NULL;
565 return &entry->me_value;
566}
567
568/* Returns -1 on failure, 0 on success. */
569static int
570PyMemoTable_Set(PyMemoTable *self, PyObject *key, long value)
571{
572 PyMemoEntry *entry;
573
574 assert(key != NULL);
575
576 entry = _PyMemoTable_Lookup(self, key);
577 if (entry->me_key != NULL) {
578 entry->me_value = value;
579 return 0;
580 }
581 Py_INCREF(key);
582 entry->me_key = key;
583 entry->me_value = value;
584 self->mt_used++;
585
586 /* If we added a key, we can safely resize. Otherwise just return!
587 * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
588 *
589 * Quadrupling the size improves average table sparseness
590 * (reducing collisions) at the cost of some memory. It also halves
591 * the number of expensive resize operations in a growing memo table.
592 *
593 * Very large memo tables (over 50K items) use doubling instead.
594 * This may help applications with severe memory constraints.
595 */
596 if (!(self->mt_used * 3 >= (self->mt_mask + 1) * 2))
597 return 0;
598 return _PyMemoTable_ResizeTable(self,
599 (self->mt_used > 50000 ? 2 : 4) * self->mt_used);
600}
601
602#undef MT_MINSIZE
603#undef PERTURB_SHIFT
604
605/*************************************************************************/
606
607/* Helpers for creating the argument tuple passed to functions. This has the
608 performance advantage of calling PyTuple_New() only once.
609
610 XXX(avassalotti): Inline directly in _Pickler_FastCall() and
611 _Unpickler_FastCall(). */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000612#define ARG_TUP(self, obj) do { \
613 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
614 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
615 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
616 } \
617 else { \
618 Py_DECREF((obj)); \
619 } \
620 } while (0)
621
622#define FREE_ARG_TUP(self) do { \
623 if ((self)->arg->ob_refcnt > 1) \
624 Py_CLEAR((self)->arg); \
625 } while (0)
626
627/* A temporary cleaner API for fast single argument function call.
628
629 XXX: Does caching the argument tuple provides any real performance benefits?
630
631 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
632 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
633 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
634 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
635 (i.e, call PyTuple_New() and store the returned value in an array), to save
636 one second (wall clock time). Either ways, the loading time a pickle stream
637 large enough to generate this number of calls would be massively
638 overwhelmed by other factors, like I/O throughput, the GC traversal and
639 object allocation overhead. So, I really doubt these functions provide any
640 real benefits.
641
642 On the other hand, oprofile reports that pickle spends a lot of time in
643 these functions. But, that is probably more related to the function call
644 overhead, than the argument tuple allocation.
645
646 XXX: And, what is the reference behavior of these? Steal, borrow? At first
647 glance, it seems to steal the reference of 'arg' and borrow the reference
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000648 of 'func'. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000649static PyObject *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000650_Pickler_FastCall(PicklerObject *self, PyObject *func, PyObject *arg)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000651{
652 PyObject *result = NULL;
653
654 ARG_TUP(self, arg);
655 if (self->arg) {
656 result = PyObject_Call(func, self->arg, NULL);
657 FREE_ARG_TUP(self);
658 }
659 return result;
660}
661
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000662static int
663_Pickler_ClearBuffer(PicklerObject *self)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000664{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000665 Py_CLEAR(self->output_buffer);
666 self->output_buffer =
667 PyBytes_FromStringAndSize(NULL, self->max_output_len);
668 if (self->output_buffer == NULL)
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000669 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000670 self->output_len = 0;
671 return 0;
672}
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000673
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000674static PyObject *
675_Pickler_GetString(PicklerObject *self)
676{
677 PyObject *output_buffer = self->output_buffer;
678
679 assert(self->output_buffer != NULL);
680 self->output_buffer = NULL;
681 /* Resize down to exact size */
682 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
683 return NULL;
684 return output_buffer;
685}
686
687static int
688_Pickler_FlushToFile(PicklerObject *self)
689{
690 PyObject *output, *result;
691
692 assert(self->write != NULL);
693
694 output = _Pickler_GetString(self);
695 if (output == NULL)
696 return -1;
697
698 result = _Pickler_FastCall(self, self->write, output);
699 Py_XDECREF(result);
700 return (result == NULL) ? -1 : 0;
701}
702
703static int
704_Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t n)
705{
706 Py_ssize_t i, required;
707 char *buffer;
708
709 assert(s != NULL);
710
711 required = self->output_len + n;
712 if (required > self->max_output_len) {
713 if (self->write != NULL && required > MAX_WRITE_BUF_SIZE) {
714 /* XXX This reallocates a new buffer every time, which is a bit
715 wasteful. */
716 if (_Pickler_FlushToFile(self) < 0)
717 return -1;
718 if (_Pickler_ClearBuffer(self) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000719 return -1;
720 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000721 if (self->write != NULL && n > MAX_WRITE_BUF_SIZE) {
722 /* we already flushed above, so the buffer is empty */
723 PyObject *result;
724 /* XXX we could spare an intermediate copy and pass
725 a memoryview instead */
726 PyObject *output = PyBytes_FromStringAndSize(s, n);
727 if (s == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000728 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000729 result = _Pickler_FastCall(self, self->write, output);
730 Py_XDECREF(result);
731 return (result == NULL) ? -1 : 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000732 }
733 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000734 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
735 PyErr_NoMemory();
736 return -1;
737 }
738 self->max_output_len = (self->output_len + n) * 2;
739 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
740 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000741 }
742 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000743 buffer = PyBytes_AS_STRING(self->output_buffer);
744 if (n < 8) {
745 /* This is faster than memcpy when the string is short. */
746 for (i = 0; i < n; i++) {
747 buffer[self->output_len + i] = s[i];
748 }
749 }
750 else {
751 memcpy(buffer + self->output_len, s, n);
752 }
753 self->output_len += n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000754 return n;
755}
756
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000757static PicklerObject *
758_Pickler_New(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000759{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000760 PicklerObject *self;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000761
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000762 self = PyObject_GC_New(PicklerObject, &Pickler_Type);
763 if (self == NULL)
764 return NULL;
765
766 self->pers_func = NULL;
767 self->arg = NULL;
768 self->write = NULL;
769 self->proto = 0;
770 self->bin = 0;
771 self->fast = 0;
772 self->fast_nesting = 0;
773 self->fix_imports = 0;
774 self->fast_memo = NULL;
775
776 self->memo = PyMemoTable_New();
777 if (self->memo == NULL) {
778 Py_DECREF(self);
779 return NULL;
780 }
781 self->max_output_len = WRITE_BUF_SIZE;
782 self->output_len = 0;
783 self->output_buffer = PyBytes_FromStringAndSize(NULL,
784 self->max_output_len);
785 if (self->output_buffer == NULL) {
786 Py_DECREF(self);
787 return NULL;
788 }
789 return self;
790}
791
792static int
793_Pickler_SetProtocol(PicklerObject *self, PyObject *proto_obj,
794 PyObject *fix_imports_obj)
795{
796 long proto = 0;
797 int fix_imports;
798
799 if (proto_obj == NULL || proto_obj == Py_None)
800 proto = DEFAULT_PROTOCOL;
801 else {
802 proto = PyLong_AsLong(proto_obj);
803 if (proto == -1 && PyErr_Occurred())
804 return -1;
805 }
806 if (proto < 0)
807 proto = HIGHEST_PROTOCOL;
808 if (proto > HIGHEST_PROTOCOL) {
809 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
810 HIGHEST_PROTOCOL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000811 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000812 }
813 fix_imports = PyObject_IsTrue(fix_imports_obj);
814 if (fix_imports == -1)
815 return -1;
816
817 self->proto = proto;
818 self->bin = proto > 0;
819 self->fix_imports = fix_imports && proto < 3;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000820
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000821 return 0;
822}
823
824/* Returns -1 (with an exception set) on failure, 0 on success. This may
825 be called once on a freshly created Pickler. */
826static int
827_Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
828{
829 assert(file != NULL);
830 self->write = PyObject_GetAttrString(file, "write");
831 if (self->write == NULL) {
832 if (PyErr_ExceptionMatches(PyExc_AttributeError))
833 PyErr_SetString(PyExc_TypeError,
834 "file must have a 'write' attribute");
835 return -1;
836 }
837
838 return 0;
839}
840
841/* See documentation for _Pickler_FastCall(). */
842static PyObject *
843_Unpickler_FastCall(UnpicklerObject *self, PyObject *func, PyObject *arg)
844{
845 PyObject *result = NULL;
846
847 ARG_TUP(self, arg);
848 if (self->arg) {
849 result = PyObject_Call(func, self->arg, NULL);
850 FREE_ARG_TUP(self);
851 }
852 return result;
853}
854
855/* Returns the size of the input on success, -1 on failure. This takes its
856 own reference to `input`. */
857static Py_ssize_t
858_Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
859{
860 if (self->buffer.buf != NULL)
861 PyBuffer_Release(&self->buffer);
862 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
863 return -1;
864 self->input_buffer = self->buffer.buf;
865 self->input_len = self->buffer.len;
866 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000867 self->prefetched_idx = self->input_len;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000868 return self->input_len;
869}
870
Antoine Pitrou04248a82010-10-12 20:51:21 +0000871static int
872_Unpickler_SkipConsumed(UnpicklerObject *self)
873{
874 Py_ssize_t consumed = self->next_read_idx - self->prefetched_idx;
875
876 if (consumed > 0) {
877 PyObject *r;
878 assert(self->peek); /* otherwise we did something wrong */
879 /* This makes an useless copy... */
880 r = PyObject_CallFunction(self->read, "n", consumed);
881 if (r == NULL)
882 return -1;
883 Py_DECREF(r);
884 self->prefetched_idx = self->next_read_idx;
885 }
886 return 0;
887}
888
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000889static const Py_ssize_t READ_WHOLE_LINE = -1;
890
891/* If reading from a file, we need to only pull the bytes we need, since there
892 may be multiple pickle objects arranged contiguously in the same input
893 buffer.
894
895 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
896 bytes from the input stream/buffer.
897
898 Update the unpickler's input buffer with the newly-read data. Returns -1 on
899 failure; on success, returns the number of bytes read from the file.
900
901 On success, self->input_len will be 0; this is intentional so that when
902 unpickling from a file, the "we've run out of data" code paths will trigger,
903 causing the Unpickler to go back to the file for more data. Use the returned
904 size to tell you how much data you can process. */
905static Py_ssize_t
906_Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
907{
908 PyObject *data;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000909 Py_ssize_t read_size, prefetched_size = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000910
911 assert(self->read != NULL);
Antoine Pitrou04248a82010-10-12 20:51:21 +0000912
913 if (_Unpickler_SkipConsumed(self) < 0)
914 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000915
916 if (n == READ_WHOLE_LINE)
917 data = PyObject_Call(self->readline, empty_tuple, NULL);
918 else {
919 PyObject *len = PyLong_FromSsize_t(n);
920 if (len == NULL)
921 return -1;
922 data = _Unpickler_FastCall(self, self->read, len);
923 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000924 if (data == NULL)
925 return -1;
926
Antoine Pitrou04248a82010-10-12 20:51:21 +0000927 /* Prefetch some data without advancing the file pointer, if possible */
928 if (self->peek) {
929 PyObject *len, *prefetched;
930 len = PyLong_FromSsize_t(PREFETCH);
931 if (len == NULL) {
932 Py_DECREF(data);
933 return -1;
934 }
935 prefetched = _Unpickler_FastCall(self, self->peek, len);
936 if (prefetched == NULL) {
937 if (PyErr_ExceptionMatches(PyExc_NotImplementedError)) {
938 /* peek() is probably not supported by the given file object */
939 PyErr_Clear();
940 Py_CLEAR(self->peek);
941 }
942 else {
943 Py_DECREF(data);
944 return -1;
945 }
946 }
947 else {
948 assert(PyBytes_Check(prefetched));
949 prefetched_size = PyBytes_GET_SIZE(prefetched);
950 PyBytes_ConcatAndDel(&data, prefetched);
951 if (data == NULL)
952 return -1;
953 }
954 }
955
956 read_size = _Unpickler_SetStringInput(self, data) - prefetched_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000957 Py_DECREF(data);
Antoine Pitrou04248a82010-10-12 20:51:21 +0000958 self->prefetched_idx = read_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000959 return read_size;
960}
961
962/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
963
964 This should be used for all data reads, rather than accessing the unpickler's
965 input buffer directly. This method deals correctly with reading from input
966 streams, which the input buffer doesn't deal with.
967
968 Note that when reading from a file-like object, self->next_read_idx won't
969 be updated (it should remain at 0 for the entire unpickling process). You
970 should use this function's return value to know how many bytes you can
971 consume.
972
973 Returns -1 (with an exception set) on failure. On success, return the
974 number of chars read. */
975static Py_ssize_t
976_Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n)
977{
Antoine Pitrou04248a82010-10-12 20:51:21 +0000978 Py_ssize_t num_read;
979
Antoine Pitrou04248a82010-10-12 20:51:21 +0000980 if (self->next_read_idx + n <= self->input_len) {
981 *s = self->input_buffer + self->next_read_idx;
982 self->next_read_idx += n;
983 return n;
984 }
985 if (!self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000986 PyErr_Format(PyExc_EOFError, "Ran out of input");
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000987 return -1;
988 }
Antoine Pitrou04248a82010-10-12 20:51:21 +0000989 num_read = _Unpickler_ReadFromFile(self, n);
990 if (num_read < 0)
991 return -1;
992 if (num_read < n) {
993 PyErr_Format(PyExc_EOFError, "Ran out of input");
994 return -1;
995 }
996 *s = self->input_buffer;
997 self->next_read_idx = n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000998 return n;
999}
1000
1001static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001002_Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1003 char **result)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001004{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001005 char *input_line = PyMem_Realloc(self->input_line, len + 1);
1006 if (input_line == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001007 return -1;
1008
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001009 memcpy(input_line, line, len);
1010 input_line[len] = '\0';
1011 self->input_line = input_line;
1012 *result = self->input_line;
1013 return len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001014}
1015
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001016/* Read a line from the input stream/buffer. If we run off the end of the input
1017 before hitting \n, return the data we found.
1018
1019 Returns the number of chars read, or -1 on failure. */
1020static Py_ssize_t
1021_Unpickler_Readline(UnpicklerObject *self, char **result)
1022{
1023 Py_ssize_t i, num_read;
1024
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001025 for (i = self->next_read_idx; i < self->input_len; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001026 if (self->input_buffer[i] == '\n') {
1027 char *line_start = self->input_buffer + self->next_read_idx;
1028 num_read = i - self->next_read_idx + 1;
1029 self->next_read_idx = i + 1;
1030 return _Unpickler_CopyLine(self, line_start, num_read, result);
1031 }
1032 }
1033 if (self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001034 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1035 if (num_read < 0)
1036 return -1;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001037 self->next_read_idx = num_read;
Antoine Pitrouf6c7a852011-08-11 21:04:02 +02001038 return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001039 }
1040
1041 /* If we get here, we've run off the end of the input string. Return the
1042 remaining string and let the caller figure it out. */
1043 *result = self->input_buffer + self->next_read_idx;
1044 num_read = i - self->next_read_idx;
1045 self->next_read_idx = i;
1046 return num_read;
1047}
1048
1049/* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1050 will be modified in place. */
1051static int
1052_Unpickler_ResizeMemoList(UnpicklerObject *self, Py_ssize_t new_size)
1053{
1054 Py_ssize_t i;
1055 PyObject **memo;
1056
1057 assert(new_size > self->memo_size);
1058
1059 memo = PyMem_REALLOC(self->memo, new_size * sizeof(PyObject *));
1060 if (memo == NULL) {
1061 PyErr_NoMemory();
1062 return -1;
1063 }
1064 self->memo = memo;
1065 for (i = self->memo_size; i < new_size; i++)
1066 self->memo[i] = NULL;
1067 self->memo_size = new_size;
1068 return 0;
1069}
1070
1071/* Returns NULL if idx is out of bounds. */
1072static PyObject *
1073_Unpickler_MemoGet(UnpicklerObject *self, Py_ssize_t idx)
1074{
1075 if (idx < 0 || idx >= self->memo_size)
1076 return NULL;
1077
1078 return self->memo[idx];
1079}
1080
1081/* Returns -1 (with an exception set) on failure, 0 on success.
1082 This takes its own reference to `value`. */
1083static int
1084_Unpickler_MemoPut(UnpicklerObject *self, Py_ssize_t idx, PyObject *value)
1085{
1086 PyObject *old_item;
1087
1088 if (idx >= self->memo_size) {
1089 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1090 return -1;
1091 assert(idx < self->memo_size);
1092 }
1093 Py_INCREF(value);
1094 old_item = self->memo[idx];
1095 self->memo[idx] = value;
1096 Py_XDECREF(old_item);
1097 return 0;
1098}
1099
1100static PyObject **
1101_Unpickler_NewMemo(Py_ssize_t new_size)
1102{
1103 PyObject **memo = PyMem_MALLOC(new_size * sizeof(PyObject *));
1104 if (memo == NULL)
1105 return NULL;
1106 memset(memo, 0, new_size * sizeof(PyObject *));
1107 return memo;
1108}
1109
1110/* Free the unpickler's memo, taking care to decref any items left in it. */
1111static void
1112_Unpickler_MemoCleanup(UnpicklerObject *self)
1113{
1114 Py_ssize_t i;
1115 PyObject **memo = self->memo;
1116
1117 if (self->memo == NULL)
1118 return;
1119 self->memo = NULL;
1120 i = self->memo_size;
1121 while (--i >= 0) {
1122 Py_XDECREF(memo[i]);
1123 }
1124 PyMem_FREE(memo);
1125}
1126
1127static UnpicklerObject *
1128_Unpickler_New(void)
1129{
1130 UnpicklerObject *self;
1131
1132 self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1133 if (self == NULL)
1134 return NULL;
1135
1136 self->stack = (Pdata *)Pdata_New();
1137 if (self->stack == NULL) {
1138 Py_DECREF(self);
1139 return NULL;
1140 }
1141 memset(&self->buffer, 0, sizeof(Py_buffer));
1142
1143 self->memo_size = 32;
1144 self->memo = _Unpickler_NewMemo(self->memo_size);
1145 if (self->memo == NULL) {
1146 Py_DECREF(self);
1147 return NULL;
1148 }
1149
1150 self->arg = NULL;
1151 self->pers_func = NULL;
1152 self->input_buffer = NULL;
1153 self->input_line = NULL;
1154 self->input_len = 0;
1155 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001156 self->prefetched_idx = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001157 self->read = NULL;
1158 self->readline = NULL;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001159 self->peek = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001160 self->encoding = NULL;
1161 self->errors = NULL;
1162 self->marks = NULL;
1163 self->num_marks = 0;
1164 self->marks_size = 0;
1165 self->proto = 0;
1166 self->fix_imports = 0;
1167
1168 return self;
1169}
1170
1171/* Returns -1 (with an exception set) on failure, 0 on success. This may
1172 be called once on a freshly created Pickler. */
1173static int
1174_Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1175{
Antoine Pitrou04248a82010-10-12 20:51:21 +00001176 self->peek = PyObject_GetAttrString(file, "peek");
1177 if (self->peek == NULL) {
1178 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1179 PyErr_Clear();
1180 else
1181 return -1;
1182 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001183 self->read = PyObject_GetAttrString(file, "read");
1184 self->readline = PyObject_GetAttrString(file, "readline");
1185 if (self->readline == NULL || self->read == NULL) {
1186 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1187 PyErr_SetString(PyExc_TypeError,
1188 "file must have 'read' and 'readline' attributes");
1189 Py_CLEAR(self->read);
1190 Py_CLEAR(self->readline);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001191 Py_CLEAR(self->peek);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001192 return -1;
1193 }
1194 return 0;
1195}
1196
1197/* Returns -1 (with an exception set) on failure, 0 on success. This may
1198 be called once on a freshly created Pickler. */
1199static int
1200_Unpickler_SetInputEncoding(UnpicklerObject *self,
1201 const char *encoding,
1202 const char *errors)
1203{
1204 if (encoding == NULL)
1205 encoding = "ASCII";
1206 if (errors == NULL)
1207 errors = "strict";
1208
1209 self->encoding = strdup(encoding);
1210 self->errors = strdup(errors);
1211 if (self->encoding == NULL || self->errors == NULL) {
1212 PyErr_NoMemory();
1213 return -1;
1214 }
1215 return 0;
1216}
1217
1218/* Generate a GET opcode for an object stored in the memo. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001219static int
1220memo_get(PicklerObject *self, PyObject *key)
1221{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001222 long *value;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001223 char pdata[30];
1224 int len;
1225
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001226 value = PyMemoTable_Get(self->memo, key);
1227 if (value == NULL) {
1228 PyErr_SetObject(PyExc_KeyError, key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001229 return -1;
1230 }
1231
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001232 if (!self->bin) {
1233 pdata[0] = GET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001234 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", *value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001235 len = (int)strlen(pdata);
1236 }
1237 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001238 if (*value < 256) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001239 pdata[0] = BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001240 pdata[1] = (unsigned char)(*value & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001241 len = 2;
1242 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001243 else if (*value <= 0xffffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001244 pdata[0] = LONG_BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001245 pdata[1] = (unsigned char)(*value & 0xff);
1246 pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1247 pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1248 pdata[4] = (unsigned char)((*value >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001249 len = 5;
1250 }
1251 else { /* unlikely */
1252 PyErr_SetString(PicklingError,
1253 "memo id too large for LONG_BINGET");
1254 return -1;
1255 }
1256 }
1257
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001258 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001259 return -1;
1260
1261 return 0;
1262}
1263
1264/* Store an object in the memo, assign it a new unique ID based on the number
1265 of objects currently stored in the memo and generate a PUT opcode. */
1266static int
1267memo_put(PicklerObject *self, PyObject *obj)
1268{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001269 long x;
1270 char pdata[30];
1271 int len;
1272 int status = 0;
1273
1274 if (self->fast)
1275 return 0;
1276
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001277 x = PyMemoTable_Size(self->memo);
1278 if (PyMemoTable_Set(self->memo, obj, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001279 goto error;
1280
1281 if (!self->bin) {
1282 pdata[0] = PUT;
1283 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
1284 len = strlen(pdata);
1285 }
1286 else {
1287 if (x < 256) {
1288 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00001289 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001290 len = 2;
1291 }
1292 else if (x <= 0xffffffffL) {
1293 pdata[0] = LONG_BINPUT;
1294 pdata[1] = (unsigned char)(x & 0xff);
1295 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1296 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1297 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1298 len = 5;
1299 }
1300 else { /* unlikely */
1301 PyErr_SetString(PicklingError,
1302 "memo id too large for LONG_BINPUT");
1303 return -1;
1304 }
1305 }
1306
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001307 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001308 goto error;
1309
1310 if (0) {
1311 error:
1312 status = -1;
1313 }
1314
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001315 return status;
1316}
1317
1318static PyObject *
1319whichmodule(PyObject *global, PyObject *global_name)
1320{
1321 Py_ssize_t i, j;
1322 static PyObject *module_str = NULL;
1323 static PyObject *main_str = NULL;
1324 PyObject *module_name;
1325 PyObject *modules_dict;
1326 PyObject *module;
1327 PyObject *obj;
1328
1329 if (module_str == NULL) {
1330 module_str = PyUnicode_InternFromString("__module__");
1331 if (module_str == NULL)
1332 return NULL;
1333 main_str = PyUnicode_InternFromString("__main__");
1334 if (main_str == NULL)
1335 return NULL;
1336 }
1337
1338 module_name = PyObject_GetAttr(global, module_str);
1339
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00001340 /* In some rare cases (e.g., bound methods of extension types),
1341 __module__ can be None. If it is so, then search sys.modules
1342 for the module of global. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001343 if (module_name == Py_None) {
1344 Py_DECREF(module_name);
1345 goto search;
1346 }
1347
1348 if (module_name) {
1349 return module_name;
1350 }
1351 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1352 PyErr_Clear();
1353 else
1354 return NULL;
1355
1356 search:
1357 modules_dict = PySys_GetObject("modules");
1358 if (modules_dict == NULL)
1359 return NULL;
1360
1361 i = 0;
1362 module_name = NULL;
1363 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
Mark Dickinson211c6252009-02-01 10:28:51 +00001364 if (PyObject_RichCompareBool(module_name, main_str, Py_EQ) == 1)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001365 continue;
1366
1367 obj = PyObject_GetAttr(module, global_name);
1368 if (obj == NULL) {
1369 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1370 PyErr_Clear();
1371 else
1372 return NULL;
1373 continue;
1374 }
1375
1376 if (obj != global) {
1377 Py_DECREF(obj);
1378 continue;
1379 }
1380
1381 Py_DECREF(obj);
1382 break;
1383 }
1384
1385 /* If no module is found, use __main__. */
1386 if (!j) {
1387 module_name = main_str;
1388 }
1389
1390 Py_INCREF(module_name);
1391 return module_name;
1392}
1393
1394/* fast_save_enter() and fast_save_leave() are guards against recursive
1395 objects when Pickler is used with the "fast mode" (i.e., with object
1396 memoization disabled). If the nesting of a list or dict object exceed
1397 FAST_NESTING_LIMIT, these guards will start keeping an internal
1398 reference to the seen list or dict objects and check whether these objects
1399 are recursive. These are not strictly necessary, since save() has a
1400 hard-coded recursion limit, but they give a nicer error message than the
1401 typical RuntimeError. */
1402static int
1403fast_save_enter(PicklerObject *self, PyObject *obj)
1404{
1405 /* if fast_nesting < 0, we're doing an error exit. */
1406 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1407 PyObject *key = NULL;
1408 if (self->fast_memo == NULL) {
1409 self->fast_memo = PyDict_New();
1410 if (self->fast_memo == NULL) {
1411 self->fast_nesting = -1;
1412 return 0;
1413 }
1414 }
1415 key = PyLong_FromVoidPtr(obj);
1416 if (key == NULL)
1417 return 0;
1418 if (PyDict_GetItem(self->fast_memo, key)) {
1419 Py_DECREF(key);
1420 PyErr_Format(PyExc_ValueError,
1421 "fast mode: can't pickle cyclic objects "
1422 "including object type %.200s at %p",
1423 obj->ob_type->tp_name, obj);
1424 self->fast_nesting = -1;
1425 return 0;
1426 }
1427 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
1428 Py_DECREF(key);
1429 self->fast_nesting = -1;
1430 return 0;
1431 }
1432 Py_DECREF(key);
1433 }
1434 return 1;
1435}
1436
1437static int
1438fast_save_leave(PicklerObject *self, PyObject *obj)
1439{
1440 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
1441 PyObject *key = PyLong_FromVoidPtr(obj);
1442 if (key == NULL)
1443 return 0;
1444 if (PyDict_DelItem(self->fast_memo, key) < 0) {
1445 Py_DECREF(key);
1446 return 0;
1447 }
1448 Py_DECREF(key);
1449 }
1450 return 1;
1451}
1452
1453static int
1454save_none(PicklerObject *self, PyObject *obj)
1455{
1456 const char none_op = NONE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001457 if (_Pickler_Write(self, &none_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001458 return -1;
1459
1460 return 0;
1461}
1462
1463static int
1464save_bool(PicklerObject *self, PyObject *obj)
1465{
1466 static const char *buf[2] = { FALSE, TRUE };
1467 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
1468 int p = (obj == Py_True);
1469
1470 if (self->proto >= 2) {
1471 const char bool_op = p ? NEWTRUE : NEWFALSE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001472 if (_Pickler_Write(self, &bool_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001473 return -1;
1474 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001475 else if (_Pickler_Write(self, buf[p], len[p]) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001476 return -1;
1477
1478 return 0;
1479}
1480
1481static int
1482save_int(PicklerObject *self, long x)
1483{
1484 char pdata[32];
1485 int len = 0;
1486
1487 if (!self->bin
1488#if SIZEOF_LONG > 4
1489 || x > 0x7fffffffL || x < -0x80000000L
1490#endif
1491 ) {
1492 /* Text-mode pickle, or long too big to fit in the 4-byte
1493 * signed BININT format: store as a string.
1494 */
Mark Dickinson8dd05142009-01-20 20:43:58 +00001495 pdata[0] = LONG; /* use LONG for consistency with pickle.py */
1496 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ldL\n", x);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001497 if (_Pickler_Write(self, pdata, strlen(pdata)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001498 return -1;
1499 }
1500 else {
1501 /* Binary pickle and x fits in a signed 4-byte int. */
1502 pdata[1] = (unsigned char)(x & 0xff);
1503 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1504 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1505 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1506
1507 if ((pdata[4] == 0) && (pdata[3] == 0)) {
1508 if (pdata[2] == 0) {
1509 pdata[0] = BININT1;
1510 len = 2;
1511 }
1512 else {
1513 pdata[0] = BININT2;
1514 len = 3;
1515 }
1516 }
1517 else {
1518 pdata[0] = BININT;
1519 len = 5;
1520 }
1521
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001522 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001523 return -1;
1524 }
1525
1526 return 0;
1527}
1528
1529static int
1530save_long(PicklerObject *self, PyObject *obj)
1531{
1532 PyObject *repr = NULL;
1533 Py_ssize_t size;
1534 long val = PyLong_AsLong(obj);
1535 int status = 0;
1536
1537 const char long_op = LONG;
1538
1539 if (val == -1 && PyErr_Occurred()) {
1540 /* out of range for int pickling */
1541 PyErr_Clear();
1542 }
Antoine Pitroue58bffb2011-08-13 20:40:32 +02001543 else
1544#if SIZEOF_LONG > 4
1545 if (val <= 0x7fffffffL && val >= -0x80000000L)
1546#endif
1547 return save_int(self, val);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001548
1549 if (self->proto >= 2) {
1550 /* Linear-time pickling. */
1551 size_t nbits;
1552 size_t nbytes;
1553 unsigned char *pdata;
1554 char header[5];
1555 int i;
1556 int sign = _PyLong_Sign(obj);
1557
1558 if (sign == 0) {
1559 header[0] = LONG1;
1560 header[1] = 0; /* It's 0 -- an empty bytestring. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001561 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001562 goto error;
1563 return 0;
1564 }
1565 nbits = _PyLong_NumBits(obj);
1566 if (nbits == (size_t)-1 && PyErr_Occurred())
1567 goto error;
1568 /* How many bytes do we need? There are nbits >> 3 full
1569 * bytes of data, and nbits & 7 leftover bits. If there
1570 * are any leftover bits, then we clearly need another
1571 * byte. Wnat's not so obvious is that we *probably*
1572 * need another byte even if there aren't any leftovers:
1573 * the most-significant bit of the most-significant byte
1574 * acts like a sign bit, and it's usually got a sense
1575 * opposite of the one we need. The exception is longs
1576 * of the form -(2**(8*j-1)) for j > 0. Such a long is
1577 * its own 256's-complement, so has the right sign bit
1578 * even without the extra byte. That's a pain to check
1579 * for in advance, though, so we always grab an extra
1580 * byte at the start, and cut it back later if possible.
1581 */
1582 nbytes = (nbits >> 3) + 1;
1583 if (nbytes > INT_MAX) {
1584 PyErr_SetString(PyExc_OverflowError,
1585 "long too large to pickle");
1586 goto error;
1587 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001588 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001589 if (repr == NULL)
1590 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001591 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001592 i = _PyLong_AsByteArray((PyLongObject *)obj,
1593 pdata, nbytes,
1594 1 /* little endian */ , 1 /* signed */ );
1595 if (i < 0)
1596 goto error;
1597 /* If the long is negative, this may be a byte more than
1598 * needed. This is so iff the MSB is all redundant sign
1599 * bits.
1600 */
1601 if (sign < 0 &&
1602 nbytes > 1 &&
1603 pdata[nbytes - 1] == 0xff &&
1604 (pdata[nbytes - 2] & 0x80) != 0) {
1605 nbytes--;
1606 }
1607
1608 if (nbytes < 256) {
1609 header[0] = LONG1;
1610 header[1] = (unsigned char)nbytes;
1611 size = 2;
1612 }
1613 else {
1614 header[0] = LONG4;
1615 size = (int)nbytes;
1616 for (i = 1; i < 5; i++) {
1617 header[i] = (unsigned char)(size & 0xff);
1618 size >>= 8;
1619 }
1620 size = 5;
1621 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001622 if (_Pickler_Write(self, header, size) < 0 ||
1623 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001624 goto error;
1625 }
1626 else {
1627 char *string;
1628
Mark Dickinson8dd05142009-01-20 20:43:58 +00001629 /* proto < 2: write the repr and newline. This is quadratic-time (in
1630 the number of digits), in both directions. We add a trailing 'L'
1631 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001632
1633 repr = PyObject_Repr(obj);
1634 if (repr == NULL)
1635 goto error;
1636
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001637 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001638 if (string == NULL)
1639 goto error;
1640
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001641 if (_Pickler_Write(self, &long_op, 1) < 0 ||
1642 _Pickler_Write(self, string, size) < 0 ||
1643 _Pickler_Write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001644 goto error;
1645 }
1646
1647 if (0) {
1648 error:
1649 status = -1;
1650 }
1651 Py_XDECREF(repr);
1652
1653 return status;
1654}
1655
1656static int
1657save_float(PicklerObject *self, PyObject *obj)
1658{
1659 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1660
1661 if (self->bin) {
1662 char pdata[9];
1663 pdata[0] = BINFLOAT;
1664 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1665 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001666 if (_Pickler_Write(self, pdata, 9) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001667 return -1;
Eric Smith0923d1d2009-04-16 20:16:10 +00001668 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001669 else {
Eric Smith0923d1d2009-04-16 20:16:10 +00001670 int result = -1;
1671 char *buf = NULL;
1672 char op = FLOAT;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001673
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001674 if (_Pickler_Write(self, &op, 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001675 goto done;
1676
Mark Dickinson3e09f432009-04-17 08:41:23 +00001677 buf = PyOS_double_to_string(x, 'g', 17, 0, NULL);
Eric Smith0923d1d2009-04-16 20:16:10 +00001678 if (!buf) {
1679 PyErr_NoMemory();
1680 goto done;
1681 }
1682
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001683 if (_Pickler_Write(self, buf, strlen(buf)) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001684 goto done;
1685
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001686 if (_Pickler_Write(self, "\n", 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001687 goto done;
1688
1689 result = 0;
1690done:
1691 PyMem_Free(buf);
1692 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001693 }
1694
1695 return 0;
1696}
1697
1698static int
1699save_bytes(PicklerObject *self, PyObject *obj)
1700{
1701 if (self->proto < 3) {
1702 /* Older pickle protocols do not have an opcode for pickling bytes
1703 objects. Therefore, we need to fake the copy protocol (i.e.,
1704 the __reduce__ method) to permit bytes object unpickling. */
1705 PyObject *reduce_value = NULL;
1706 PyObject *bytelist = NULL;
1707 int status;
1708
1709 bytelist = PySequence_List(obj);
1710 if (bytelist == NULL)
1711 return -1;
1712
1713 reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
1714 bytelist);
1715 if (reduce_value == NULL) {
1716 Py_DECREF(bytelist);
1717 return -1;
1718 }
1719
1720 /* save_reduce() will memoize the object automatically. */
1721 status = save_reduce(self, reduce_value, obj);
1722 Py_DECREF(reduce_value);
1723 Py_DECREF(bytelist);
1724 return status;
1725 }
1726 else {
1727 Py_ssize_t size;
1728 char header[5];
1729 int len;
1730
1731 size = PyBytes_Size(obj);
1732 if (size < 0)
1733 return -1;
1734
1735 if (size < 256) {
1736 header[0] = SHORT_BINBYTES;
1737 header[1] = (unsigned char)size;
1738 len = 2;
1739 }
1740 else if (size <= 0xffffffffL) {
1741 header[0] = BINBYTES;
1742 header[1] = (unsigned char)(size & 0xff);
1743 header[2] = (unsigned char)((size >> 8) & 0xff);
1744 header[3] = (unsigned char)((size >> 16) & 0xff);
1745 header[4] = (unsigned char)((size >> 24) & 0xff);
1746 len = 5;
1747 }
1748 else {
1749 return -1; /* string too large */
1750 }
1751
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001752 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001753 return -1;
1754
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001755 if (_Pickler_Write(self, PyBytes_AS_STRING(obj), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001756 return -1;
1757
1758 if (memo_put(self, obj) < 0)
1759 return -1;
1760
1761 return 0;
1762 }
1763}
1764
1765/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1766 backslash and newline characters to \uXXXX escapes. */
1767static PyObject *
1768raw_unicode_escape(const Py_UNICODE *s, Py_ssize_t size)
1769{
1770 PyObject *repr, *result;
1771 char *p;
1772 char *q;
1773
1774 static const char *hexdigits = "0123456789abcdef";
1775
1776#ifdef Py_UNICODE_WIDE
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001777 const Py_ssize_t expandsize = 10;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001778#else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001779 const Py_ssize_t expandsize = 6;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001780#endif
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001781
1782 if (size > PY_SSIZE_T_MAX / expandsize)
1783 return PyErr_NoMemory();
1784
1785 repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001786 if (repr == NULL)
1787 return NULL;
1788 if (size == 0)
1789 goto done;
1790
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001791 p = q = PyByteArray_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001792 while (size-- > 0) {
1793 Py_UNICODE ch = *s++;
1794#ifdef Py_UNICODE_WIDE
1795 /* Map 32-bit characters to '\Uxxxxxxxx' */
1796 if (ch >= 0x10000) {
1797 *p++ = '\\';
1798 *p++ = 'U';
1799 *p++ = hexdigits[(ch >> 28) & 0xf];
1800 *p++ = hexdigits[(ch >> 24) & 0xf];
1801 *p++ = hexdigits[(ch >> 20) & 0xf];
1802 *p++ = hexdigits[(ch >> 16) & 0xf];
1803 *p++ = hexdigits[(ch >> 12) & 0xf];
1804 *p++ = hexdigits[(ch >> 8) & 0xf];
1805 *p++ = hexdigits[(ch >> 4) & 0xf];
1806 *p++ = hexdigits[ch & 15];
1807 }
1808 else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001809#else
1810 /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
1811 if (ch >= 0xD800 && ch < 0xDC00) {
1812 Py_UNICODE ch2;
1813 Py_UCS4 ucs;
1814
1815 ch2 = *s++;
1816 size--;
1817 if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
1818 ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
1819 *p++ = '\\';
1820 *p++ = 'U';
1821 *p++ = hexdigits[(ucs >> 28) & 0xf];
1822 *p++ = hexdigits[(ucs >> 24) & 0xf];
1823 *p++ = hexdigits[(ucs >> 20) & 0xf];
1824 *p++ = hexdigits[(ucs >> 16) & 0xf];
1825 *p++ = hexdigits[(ucs >> 12) & 0xf];
1826 *p++ = hexdigits[(ucs >> 8) & 0xf];
1827 *p++ = hexdigits[(ucs >> 4) & 0xf];
1828 *p++ = hexdigits[ucs & 0xf];
1829 continue;
1830 }
1831 /* Fall through: isolated surrogates are copied as-is */
1832 s--;
1833 size++;
1834 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001835#endif
1836 /* Map 16-bit characters to '\uxxxx' */
1837 if (ch >= 256 || ch == '\\' || ch == '\n') {
1838 *p++ = '\\';
1839 *p++ = 'u';
1840 *p++ = hexdigits[(ch >> 12) & 0xf];
1841 *p++ = hexdigits[(ch >> 8) & 0xf];
1842 *p++ = hexdigits[(ch >> 4) & 0xf];
1843 *p++ = hexdigits[ch & 15];
1844 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001845 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001846 else
1847 *p++ = (char) ch;
1848 }
1849 size = p - q;
1850
1851 done:
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001852 result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001853 Py_DECREF(repr);
1854 return result;
1855}
1856
1857static int
1858save_unicode(PicklerObject *self, PyObject *obj)
1859{
1860 Py_ssize_t size;
1861 PyObject *encoded = NULL;
1862
1863 if (self->bin) {
1864 char pdata[5];
1865
Victor Stinner485fb562010-04-13 11:07:24 +00001866 encoded = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(obj),
1867 PyUnicode_GET_SIZE(obj),
1868 "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001869 if (encoded == NULL)
1870 goto error;
1871
1872 size = PyBytes_GET_SIZE(encoded);
1873 if (size < 0 || size > 0xffffffffL)
1874 goto error; /* string too large */
1875
1876 pdata[0] = BINUNICODE;
1877 pdata[1] = (unsigned char)(size & 0xff);
1878 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1879 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1880 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1881
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001882 if (_Pickler_Write(self, pdata, 5) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001883 goto error;
1884
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001885 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001886 goto error;
1887 }
1888 else {
1889 const char unicode_op = UNICODE;
1890
1891 encoded = raw_unicode_escape(PyUnicode_AS_UNICODE(obj),
1892 PyUnicode_GET_SIZE(obj));
1893 if (encoded == NULL)
1894 goto error;
1895
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001896 if (_Pickler_Write(self, &unicode_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001897 goto error;
1898
1899 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001900 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001901 goto error;
1902
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001903 if (_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001904 goto error;
1905 }
1906 if (memo_put(self, obj) < 0)
1907 goto error;
1908
1909 Py_DECREF(encoded);
1910 return 0;
1911
1912 error:
1913 Py_XDECREF(encoded);
1914 return -1;
1915}
1916
1917/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1918static int
1919store_tuple_elements(PicklerObject *self, PyObject *t, int len)
1920{
1921 int i;
1922
1923 assert(PyTuple_Size(t) == len);
1924
1925 for (i = 0; i < len; i++) {
1926 PyObject *element = PyTuple_GET_ITEM(t, i);
1927
1928 if (element == NULL)
1929 return -1;
1930 if (save(self, element, 0) < 0)
1931 return -1;
1932 }
1933
1934 return 0;
1935}
1936
1937/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1938 * used across protocols to minimize the space needed to pickle them.
1939 * Tuples are also the only builtin immutable type that can be recursive
1940 * (a tuple can be reached from itself), and that requires some subtle
1941 * magic so that it works in all cases. IOW, this is a long routine.
1942 */
1943static int
1944save_tuple(PicklerObject *self, PyObject *obj)
1945{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001946 int len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001947
1948 const char mark_op = MARK;
1949 const char tuple_op = TUPLE;
1950 const char pop_op = POP;
1951 const char pop_mark_op = POP_MARK;
1952 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
1953
1954 if ((len = PyTuple_Size(obj)) < 0)
1955 return -1;
1956
1957 if (len == 0) {
1958 char pdata[2];
1959
1960 if (self->proto) {
1961 pdata[0] = EMPTY_TUPLE;
1962 len = 1;
1963 }
1964 else {
1965 pdata[0] = MARK;
1966 pdata[1] = TUPLE;
1967 len = 2;
1968 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001969 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001970 return -1;
1971 return 0;
1972 }
1973
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001974 /* The tuple isn't in the memo now. If it shows up there after
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001975 * saving the tuple elements, the tuple must be recursive, in
1976 * which case we'll pop everything we put on the stack, and fetch
1977 * its value from the memo.
1978 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001979 if (len <= 3 && self->proto >= 2) {
1980 /* Use TUPLE{1,2,3} opcodes. */
1981 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001982 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001983
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001984 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001985 /* pop the len elements */
1986 for (i = 0; i < len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001987 if (_Pickler_Write(self, &pop_op, 1) < 0)
1988 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001989 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001990 if (memo_get(self, obj) < 0)
1991 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001992
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001993 return 0;
1994 }
1995 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001996 if (_Pickler_Write(self, len2opcode + len, 1) < 0)
1997 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001998 }
1999 goto memoize;
2000 }
2001
2002 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2003 * Generate MARK e1 e2 ... TUPLE
2004 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002005 if (_Pickler_Write(self, &mark_op, 1) < 0)
2006 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002007
2008 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002009 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002010
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002011 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002012 /* pop the stack stuff we pushed */
2013 if (self->bin) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002014 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2015 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002016 }
2017 else {
2018 /* Note that we pop one more than len, to remove
2019 * the MARK too.
2020 */
2021 for (i = 0; i <= len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002022 if (_Pickler_Write(self, &pop_op, 1) < 0)
2023 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002024 }
2025 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002026 if (memo_get(self, obj) < 0)
2027 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002028
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002029 return 0;
2030 }
2031 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002032 if (_Pickler_Write(self, &tuple_op, 1) < 0)
2033 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002034 }
2035
2036 memoize:
2037 if (memo_put(self, obj) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002038 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002039
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002040 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002041}
2042
2043/* iter is an iterator giving items, and we batch up chunks of
2044 * MARK item item ... item APPENDS
2045 * opcode sequences. Calling code should have arranged to first create an
2046 * empty list, or list-like object, for the APPENDS to operate on.
2047 * Returns 0 on success, <0 on error.
2048 */
2049static int
2050batch_list(PicklerObject *self, PyObject *iter)
2051{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002052 PyObject *obj = NULL;
2053 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002054 int i, n;
2055
2056 const char mark_op = MARK;
2057 const char append_op = APPEND;
2058 const char appends_op = APPENDS;
2059
2060 assert(iter != NULL);
2061
2062 /* XXX: I think this function could be made faster by avoiding the
2063 iterator interface and fetching objects directly from list using
2064 PyList_GET_ITEM.
2065 */
2066
2067 if (self->proto == 0) {
2068 /* APPENDS isn't available; do one at a time. */
2069 for (;;) {
2070 obj = PyIter_Next(iter);
2071 if (obj == NULL) {
2072 if (PyErr_Occurred())
2073 return -1;
2074 break;
2075 }
2076 i = save(self, obj, 0);
2077 Py_DECREF(obj);
2078 if (i < 0)
2079 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002080 if (_Pickler_Write(self, &append_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002081 return -1;
2082 }
2083 return 0;
2084 }
2085
2086 /* proto > 0: write in batches of BATCHSIZE. */
2087 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002088 /* Get first item */
2089 firstitem = PyIter_Next(iter);
2090 if (firstitem == NULL) {
2091 if (PyErr_Occurred())
2092 goto error;
2093
2094 /* nothing more to add */
2095 break;
2096 }
2097
2098 /* Try to get a second item */
2099 obj = PyIter_Next(iter);
2100 if (obj == NULL) {
2101 if (PyErr_Occurred())
2102 goto error;
2103
2104 /* Only one item to write */
2105 if (save(self, firstitem, 0) < 0)
2106 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002107 if (_Pickler_Write(self, &append_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002108 goto error;
2109 Py_CLEAR(firstitem);
2110 break;
2111 }
2112
2113 /* More than one item to write */
2114
2115 /* Pump out MARK, items, APPENDS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002116 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002117 goto error;
2118
2119 if (save(self, firstitem, 0) < 0)
2120 goto error;
2121 Py_CLEAR(firstitem);
2122 n = 1;
2123
2124 /* Fetch and save up to BATCHSIZE items */
2125 while (obj) {
2126 if (save(self, obj, 0) < 0)
2127 goto error;
2128 Py_CLEAR(obj);
2129 n += 1;
2130
2131 if (n == BATCHSIZE)
2132 break;
2133
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002134 obj = PyIter_Next(iter);
2135 if (obj == NULL) {
2136 if (PyErr_Occurred())
2137 goto error;
2138 break;
2139 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002140 }
2141
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002142 if (_Pickler_Write(self, &appends_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002143 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002144
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002145 } while (n == BATCHSIZE);
2146 return 0;
2147
2148 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002149 Py_XDECREF(firstitem);
2150 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002151 return -1;
2152}
2153
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002154/* This is a variant of batch_list() above, specialized for lists (with no
2155 * support for list subclasses). Like batch_list(), we batch up chunks of
2156 * MARK item item ... item APPENDS
2157 * opcode sequences. Calling code should have arranged to first create an
2158 * empty list, or list-like object, for the APPENDS to operate on.
2159 * Returns 0 on success, -1 on error.
2160 *
2161 * This version is considerably faster than batch_list(), if less general.
2162 *
2163 * Note that this only works for protocols > 0.
2164 */
2165static int
2166batch_list_exact(PicklerObject *self, PyObject *obj)
2167{
2168 PyObject *item = NULL;
2169 int this_batch, total;
2170
2171 const char append_op = APPEND;
2172 const char appends_op = APPENDS;
2173 const char mark_op = MARK;
2174
2175 assert(obj != NULL);
2176 assert(self->proto > 0);
2177 assert(PyList_CheckExact(obj));
2178
2179 if (PyList_GET_SIZE(obj) == 1) {
2180 item = PyList_GET_ITEM(obj, 0);
2181 if (save(self, item, 0) < 0)
2182 return -1;
2183 if (_Pickler_Write(self, &append_op, 1) < 0)
2184 return -1;
2185 return 0;
2186 }
2187
2188 /* Write in batches of BATCHSIZE. */
2189 total = 0;
2190 do {
2191 this_batch = 0;
2192 if (_Pickler_Write(self, &mark_op, 1) < 0)
2193 return -1;
2194 while (total < PyList_GET_SIZE(obj)) {
2195 item = PyList_GET_ITEM(obj, total);
2196 if (save(self, item, 0) < 0)
2197 return -1;
2198 total++;
2199 if (++this_batch == BATCHSIZE)
2200 break;
2201 }
2202 if (_Pickler_Write(self, &appends_op, 1) < 0)
2203 return -1;
2204
2205 } while (total < PyList_GET_SIZE(obj));
2206
2207 return 0;
2208}
2209
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002210static int
2211save_list(PicklerObject *self, PyObject *obj)
2212{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002213 char header[3];
2214 int len;
2215 int status = 0;
2216
2217 if (self->fast && !fast_save_enter(self, obj))
2218 goto error;
2219
2220 /* Create an empty list. */
2221 if (self->bin) {
2222 header[0] = EMPTY_LIST;
2223 len = 1;
2224 }
2225 else {
2226 header[0] = MARK;
2227 header[1] = LIST;
2228 len = 2;
2229 }
2230
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002231 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002232 goto error;
2233
2234 /* Get list length, and bow out early if empty. */
2235 if ((len = PyList_Size(obj)) < 0)
2236 goto error;
2237
2238 if (memo_put(self, obj) < 0)
2239 goto error;
2240
2241 if (len != 0) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002242 /* Materialize the list elements. */
2243 if (PyList_CheckExact(obj) && self->proto > 0) {
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002244 if (Py_EnterRecursiveCall(" while pickling an object"))
2245 goto error;
2246 status = batch_list_exact(self, obj);
2247 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002248 } else {
2249 PyObject *iter = PyObject_GetIter(obj);
2250 if (iter == NULL)
2251 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002252
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002253 if (Py_EnterRecursiveCall(" while pickling an object")) {
2254 Py_DECREF(iter);
2255 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002256 }
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002257 status = batch_list(self, iter);
2258 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002259 Py_DECREF(iter);
2260 }
2261 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002262 if (0) {
2263 error:
2264 status = -1;
2265 }
2266
2267 if (self->fast && !fast_save_leave(self, obj))
2268 status = -1;
2269
2270 return status;
2271}
2272
2273/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
2274 * MARK key value ... key value SETITEMS
2275 * opcode sequences. Calling code should have arranged to first create an
2276 * empty dict, or dict-like object, for the SETITEMS to operate on.
2277 * Returns 0 on success, <0 on error.
2278 *
2279 * This is very much like batch_list(). The difference between saving
2280 * elements directly, and picking apart two-tuples, is so long-winded at
2281 * the C level, though, that attempts to combine these routines were too
2282 * ugly to bear.
2283 */
2284static int
2285batch_dict(PicklerObject *self, PyObject *iter)
2286{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002287 PyObject *obj = NULL;
2288 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002289 int i, n;
2290
2291 const char mark_op = MARK;
2292 const char setitem_op = SETITEM;
2293 const char setitems_op = SETITEMS;
2294
2295 assert(iter != NULL);
2296
2297 if (self->proto == 0) {
2298 /* SETITEMS isn't available; do one at a time. */
2299 for (;;) {
2300 obj = PyIter_Next(iter);
2301 if (obj == NULL) {
2302 if (PyErr_Occurred())
2303 return -1;
2304 break;
2305 }
2306 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2307 PyErr_SetString(PyExc_TypeError, "dict items "
2308 "iterator must return 2-tuples");
2309 return -1;
2310 }
2311 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
2312 if (i >= 0)
2313 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
2314 Py_DECREF(obj);
2315 if (i < 0)
2316 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002317 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002318 return -1;
2319 }
2320 return 0;
2321 }
2322
2323 /* proto > 0: write in batches of BATCHSIZE. */
2324 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002325 /* Get first item */
2326 firstitem = PyIter_Next(iter);
2327 if (firstitem == NULL) {
2328 if (PyErr_Occurred())
2329 goto error;
2330
2331 /* nothing more to add */
2332 break;
2333 }
2334 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
2335 PyErr_SetString(PyExc_TypeError, "dict items "
2336 "iterator must return 2-tuples");
2337 goto error;
2338 }
2339
2340 /* Try to get a second item */
2341 obj = PyIter_Next(iter);
2342 if (obj == NULL) {
2343 if (PyErr_Occurred())
2344 goto error;
2345
2346 /* Only one item to write */
2347 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2348 goto error;
2349 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2350 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002351 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002352 goto error;
2353 Py_CLEAR(firstitem);
2354 break;
2355 }
2356
2357 /* More than one item to write */
2358
2359 /* Pump out MARK, items, SETITEMS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002360 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002361 goto error;
2362
2363 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2364 goto error;
2365 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2366 goto error;
2367 Py_CLEAR(firstitem);
2368 n = 1;
2369
2370 /* Fetch and save up to BATCHSIZE items */
2371 while (obj) {
2372 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2373 PyErr_SetString(PyExc_TypeError, "dict items "
2374 "iterator must return 2-tuples");
2375 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002376 }
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002377 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
2378 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
2379 goto error;
2380 Py_CLEAR(obj);
2381 n += 1;
2382
2383 if (n == BATCHSIZE)
2384 break;
2385
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002386 obj = PyIter_Next(iter);
2387 if (obj == NULL) {
2388 if (PyErr_Occurred())
2389 goto error;
2390 break;
2391 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002392 }
2393
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002394 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002395 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002396
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002397 } while (n == BATCHSIZE);
2398 return 0;
2399
2400 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002401 Py_XDECREF(firstitem);
2402 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002403 return -1;
2404}
2405
Collin Winter5c9b02d2009-05-25 05:43:30 +00002406/* This is a variant of batch_dict() above that specializes for dicts, with no
2407 * support for dict subclasses. Like batch_dict(), we batch up chunks of
2408 * MARK key value ... key value SETITEMS
2409 * opcode sequences. Calling code should have arranged to first create an
2410 * empty dict, or dict-like object, for the SETITEMS to operate on.
2411 * Returns 0 on success, -1 on error.
2412 *
2413 * Note that this currently doesn't work for protocol 0.
2414 */
2415static int
2416batch_dict_exact(PicklerObject *self, PyObject *obj)
2417{
2418 PyObject *key = NULL, *value = NULL;
2419 int i;
2420 Py_ssize_t dict_size, ppos = 0;
2421
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00002422 const char mark_op = MARK;
2423 const char setitem_op = SETITEM;
2424 const char setitems_op = SETITEMS;
Collin Winter5c9b02d2009-05-25 05:43:30 +00002425
2426 assert(obj != NULL);
2427 assert(self->proto > 0);
2428
2429 dict_size = PyDict_Size(obj);
2430
2431 /* Special-case len(d) == 1 to save space. */
2432 if (dict_size == 1) {
2433 PyDict_Next(obj, &ppos, &key, &value);
2434 if (save(self, key, 0) < 0)
2435 return -1;
2436 if (save(self, value, 0) < 0)
2437 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002438 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002439 return -1;
2440 return 0;
2441 }
2442
2443 /* Write in batches of BATCHSIZE. */
2444 do {
2445 i = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002446 if (_Pickler_Write(self, &mark_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002447 return -1;
2448 while (PyDict_Next(obj, &ppos, &key, &value)) {
2449 if (save(self, key, 0) < 0)
2450 return -1;
2451 if (save(self, value, 0) < 0)
2452 return -1;
2453 if (++i == BATCHSIZE)
2454 break;
2455 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002456 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002457 return -1;
2458 if (PyDict_Size(obj) != dict_size) {
2459 PyErr_Format(
2460 PyExc_RuntimeError,
2461 "dictionary changed size during iteration");
2462 return -1;
2463 }
2464
2465 } while (i == BATCHSIZE);
2466 return 0;
2467}
2468
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002469static int
2470save_dict(PicklerObject *self, PyObject *obj)
2471{
2472 PyObject *items, *iter;
2473 char header[3];
2474 int len;
2475 int status = 0;
2476
2477 if (self->fast && !fast_save_enter(self, obj))
2478 goto error;
2479
2480 /* Create an empty dict. */
2481 if (self->bin) {
2482 header[0] = EMPTY_DICT;
2483 len = 1;
2484 }
2485 else {
2486 header[0] = MARK;
2487 header[1] = DICT;
2488 len = 2;
2489 }
2490
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002491 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002492 goto error;
2493
2494 /* Get dict size, and bow out early if empty. */
2495 if ((len = PyDict_Size(obj)) < 0)
2496 goto error;
2497
2498 if (memo_put(self, obj) < 0)
2499 goto error;
2500
2501 if (len != 0) {
2502 /* Save the dict items. */
Collin Winter5c9b02d2009-05-25 05:43:30 +00002503 if (PyDict_CheckExact(obj) && self->proto > 0) {
2504 /* We can take certain shortcuts if we know this is a dict and
2505 not a dict subclass. */
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002506 if (Py_EnterRecursiveCall(" while pickling an object"))
2507 goto error;
2508 status = batch_dict_exact(self, obj);
2509 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002510 } else {
2511 items = PyObject_CallMethod(obj, "items", "()");
2512 if (items == NULL)
2513 goto error;
2514 iter = PyObject_GetIter(items);
2515 Py_DECREF(items);
2516 if (iter == NULL)
2517 goto error;
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002518 if (Py_EnterRecursiveCall(" while pickling an object")) {
2519 Py_DECREF(iter);
2520 goto error;
2521 }
Collin Winter5c9b02d2009-05-25 05:43:30 +00002522 status = batch_dict(self, iter);
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002523 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002524 Py_DECREF(iter);
2525 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002526 }
2527
2528 if (0) {
2529 error:
2530 status = -1;
2531 }
2532
2533 if (self->fast && !fast_save_leave(self, obj))
2534 status = -1;
2535
2536 return status;
2537}
2538
2539static int
2540save_global(PicklerObject *self, PyObject *obj, PyObject *name)
2541{
2542 static PyObject *name_str = NULL;
2543 PyObject *global_name = NULL;
2544 PyObject *module_name = NULL;
2545 PyObject *module = NULL;
2546 PyObject *cls;
2547 int status = 0;
2548
2549 const char global_op = GLOBAL;
2550
2551 if (name_str == NULL) {
2552 name_str = PyUnicode_InternFromString("__name__");
2553 if (name_str == NULL)
2554 goto error;
2555 }
2556
2557 if (name) {
2558 global_name = name;
2559 Py_INCREF(global_name);
2560 }
2561 else {
2562 global_name = PyObject_GetAttr(obj, name_str);
2563 if (global_name == NULL)
2564 goto error;
2565 }
2566
2567 module_name = whichmodule(obj, global_name);
2568 if (module_name == NULL)
2569 goto error;
2570
2571 /* XXX: Change to use the import C API directly with level=0 to disallow
2572 relative imports.
2573
2574 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
2575 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
2576 custom import functions (IMHO, this would be a nice security
2577 feature). The import C API would need to be extended to support the
2578 extra parameters of __import__ to fix that. */
2579 module = PyImport_Import(module_name);
2580 if (module == NULL) {
2581 PyErr_Format(PicklingError,
2582 "Can't pickle %R: import of module %R failed",
2583 obj, module_name);
2584 goto error;
2585 }
2586 cls = PyObject_GetAttr(module, global_name);
2587 if (cls == NULL) {
2588 PyErr_Format(PicklingError,
2589 "Can't pickle %R: attribute lookup %S.%S failed",
2590 obj, module_name, global_name);
2591 goto error;
2592 }
2593 if (cls != obj) {
2594 Py_DECREF(cls);
2595 PyErr_Format(PicklingError,
2596 "Can't pickle %R: it's not the same object as %S.%S",
2597 obj, module_name, global_name);
2598 goto error;
2599 }
2600 Py_DECREF(cls);
2601
2602 if (self->proto >= 2) {
2603 /* See whether this is in the extension registry, and if
2604 * so generate an EXT opcode.
2605 */
2606 PyObject *code_obj; /* extension code as Python object */
2607 long code; /* extension code as C value */
2608 char pdata[5];
2609 int n;
2610
2611 PyTuple_SET_ITEM(two_tuple, 0, module_name);
2612 PyTuple_SET_ITEM(two_tuple, 1, global_name);
2613 code_obj = PyDict_GetItem(extension_registry, two_tuple);
2614 /* The object is not registered in the extension registry.
2615 This is the most likely code path. */
2616 if (code_obj == NULL)
2617 goto gen_global;
2618
2619 /* XXX: pickle.py doesn't check neither the type, nor the range
2620 of the value returned by the extension_registry. It should for
2621 consistency. */
2622
2623 /* Verify code_obj has the right type and value. */
2624 if (!PyLong_Check(code_obj)) {
2625 PyErr_Format(PicklingError,
2626 "Can't pickle %R: extension code %R isn't an integer",
2627 obj, code_obj);
2628 goto error;
2629 }
2630 code = PyLong_AS_LONG(code_obj);
2631 if (code <= 0 || code > 0x7fffffffL) {
2632 PyErr_Format(PicklingError,
2633 "Can't pickle %R: extension code %ld is out of range",
2634 obj, code);
2635 goto error;
2636 }
2637
2638 /* Generate an EXT opcode. */
2639 if (code <= 0xff) {
2640 pdata[0] = EXT1;
2641 pdata[1] = (unsigned char)code;
2642 n = 2;
2643 }
2644 else if (code <= 0xffff) {
2645 pdata[0] = EXT2;
2646 pdata[1] = (unsigned char)(code & 0xff);
2647 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2648 n = 3;
2649 }
2650 else {
2651 pdata[0] = EXT4;
2652 pdata[1] = (unsigned char)(code & 0xff);
2653 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2654 pdata[3] = (unsigned char)((code >> 16) & 0xff);
2655 pdata[4] = (unsigned char)((code >> 24) & 0xff);
2656 n = 5;
2657 }
2658
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002659 if (_Pickler_Write(self, pdata, n) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002660 goto error;
2661 }
2662 else {
2663 /* Generate a normal global opcode if we are using a pickle
2664 protocol <= 2, or if the object is not registered in the
2665 extension registry. */
2666 PyObject *encoded;
2667 PyObject *(*unicode_encoder)(PyObject *);
2668
2669 gen_global:
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002670 if (_Pickler_Write(self, &global_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002671 goto error;
2672
2673 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
2674 the module name and the global name using UTF-8. We do so only when
2675 we are using the pickle protocol newer than version 3. This is to
2676 ensure compatibility with older Unpickler running on Python 2.x. */
2677 if (self->proto >= 3) {
2678 unicode_encoder = PyUnicode_AsUTF8String;
2679 }
2680 else {
2681 unicode_encoder = PyUnicode_AsASCIIString;
2682 }
2683
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002684 /* For protocol < 3 and if the user didn't request against doing so,
2685 we convert module names to the old 2.x module names. */
2686 if (self->fix_imports) {
2687 PyObject *key;
2688 PyObject *item;
2689
2690 key = PyTuple_Pack(2, module_name, global_name);
2691 if (key == NULL)
2692 goto error;
2693 item = PyDict_GetItemWithError(name_mapping_3to2, key);
2694 Py_DECREF(key);
2695 if (item) {
2696 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
2697 PyErr_Format(PyExc_RuntimeError,
2698 "_compat_pickle.REVERSE_NAME_MAPPING values "
2699 "should be 2-tuples, not %.200s",
2700 Py_TYPE(item)->tp_name);
2701 goto error;
2702 }
2703 Py_CLEAR(module_name);
2704 Py_CLEAR(global_name);
2705 module_name = PyTuple_GET_ITEM(item, 0);
2706 global_name = PyTuple_GET_ITEM(item, 1);
2707 if (!PyUnicode_Check(module_name) ||
2708 !PyUnicode_Check(global_name)) {
2709 PyErr_Format(PyExc_RuntimeError,
2710 "_compat_pickle.REVERSE_NAME_MAPPING values "
2711 "should be pairs of str, not (%.200s, %.200s)",
2712 Py_TYPE(module_name)->tp_name,
2713 Py_TYPE(global_name)->tp_name);
2714 goto error;
2715 }
2716 Py_INCREF(module_name);
2717 Py_INCREF(global_name);
2718 }
2719 else if (PyErr_Occurred()) {
2720 goto error;
2721 }
2722
2723 item = PyDict_GetItemWithError(import_mapping_3to2, module_name);
2724 if (item) {
2725 if (!PyUnicode_Check(item)) {
2726 PyErr_Format(PyExc_RuntimeError,
2727 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
2728 "should be strings, not %.200s",
2729 Py_TYPE(item)->tp_name);
2730 goto error;
2731 }
2732 Py_CLEAR(module_name);
2733 module_name = item;
2734 Py_INCREF(module_name);
2735 }
2736 else if (PyErr_Occurred()) {
2737 goto error;
2738 }
2739 }
2740
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002741 /* Save the name of the module. */
2742 encoded = unicode_encoder(module_name);
2743 if (encoded == NULL) {
2744 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2745 PyErr_Format(PicklingError,
2746 "can't pickle module identifier '%S' using "
2747 "pickle protocol %i", module_name, self->proto);
2748 goto error;
2749 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002750 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002751 PyBytes_GET_SIZE(encoded)) < 0) {
2752 Py_DECREF(encoded);
2753 goto error;
2754 }
2755 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002756 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002757 goto error;
2758
2759 /* Save the name of the module. */
2760 encoded = unicode_encoder(global_name);
2761 if (encoded == NULL) {
2762 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2763 PyErr_Format(PicklingError,
2764 "can't pickle global identifier '%S' using "
2765 "pickle protocol %i", global_name, self->proto);
2766 goto error;
2767 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002768 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002769 PyBytes_GET_SIZE(encoded)) < 0) {
2770 Py_DECREF(encoded);
2771 goto error;
2772 }
2773 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002774 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002775 goto error;
2776
2777 /* Memoize the object. */
2778 if (memo_put(self, obj) < 0)
2779 goto error;
2780 }
2781
2782 if (0) {
2783 error:
2784 status = -1;
2785 }
2786 Py_XDECREF(module_name);
2787 Py_XDECREF(global_name);
2788 Py_XDECREF(module);
2789
2790 return status;
2791}
2792
2793static int
2794save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
2795{
2796 PyObject *pid = NULL;
2797 int status = 0;
2798
2799 const char persid_op = PERSID;
2800 const char binpersid_op = BINPERSID;
2801
2802 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002803 pid = _Pickler_FastCall(self, func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002804 if (pid == NULL)
2805 return -1;
2806
2807 if (pid != Py_None) {
2808 if (self->bin) {
2809 if (save(self, pid, 1) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002810 _Pickler_Write(self, &binpersid_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002811 goto error;
2812 }
2813 else {
2814 PyObject *pid_str = NULL;
2815 char *pid_ascii_bytes;
2816 Py_ssize_t size;
2817
2818 pid_str = PyObject_Str(pid);
2819 if (pid_str == NULL)
2820 goto error;
2821
2822 /* XXX: Should it check whether the persistent id only contains
2823 ASCII characters? And what if the pid contains embedded
2824 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00002825 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002826 Py_DECREF(pid_str);
2827 if (pid_ascii_bytes == NULL)
2828 goto error;
2829
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002830 if (_Pickler_Write(self, &persid_op, 1) < 0 ||
2831 _Pickler_Write(self, pid_ascii_bytes, size) < 0 ||
2832 _Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002833 goto error;
2834 }
2835 status = 1;
2836 }
2837
2838 if (0) {
2839 error:
2840 status = -1;
2841 }
2842 Py_XDECREF(pid);
2843
2844 return status;
2845}
2846
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002847static PyObject *
2848get_class(PyObject *obj)
2849{
2850 PyObject *cls;
2851 static PyObject *str_class;
2852
2853 if (str_class == NULL) {
2854 str_class = PyUnicode_InternFromString("__class__");
2855 if (str_class == NULL)
2856 return NULL;
2857 }
2858 cls = PyObject_GetAttr(obj, str_class);
2859 if (cls == NULL) {
2860 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2861 PyErr_Clear();
2862 cls = (PyObject *) Py_TYPE(obj);
2863 Py_INCREF(cls);
2864 }
2865 }
2866 return cls;
2867}
2868
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002869/* We're saving obj, and args is the 2-thru-5 tuple returned by the
2870 * appropriate __reduce__ method for obj.
2871 */
2872static int
2873save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
2874{
2875 PyObject *callable;
2876 PyObject *argtup;
2877 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002878 PyObject *listitems = Py_None;
2879 PyObject *dictitems = Py_None;
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002880 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002881
2882 int use_newobj = self->proto >= 2;
2883
2884 const char reduce_op = REDUCE;
2885 const char build_op = BUILD;
2886 const char newobj_op = NEWOBJ;
2887
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002888 size = PyTuple_Size(args);
2889 if (size < 2 || size > 5) {
2890 PyErr_SetString(PicklingError, "tuple returned by "
2891 "__reduce__ must contain 2 through 5 elements");
2892 return -1;
2893 }
2894
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002895 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
2896 &callable, &argtup, &state, &listitems, &dictitems))
2897 return -1;
2898
2899 if (!PyCallable_Check(callable)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002900 PyErr_SetString(PicklingError, "first item of the tuple "
2901 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002902 return -1;
2903 }
2904 if (!PyTuple_Check(argtup)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002905 PyErr_SetString(PicklingError, "second item of the tuple "
2906 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002907 return -1;
2908 }
2909
2910 if (state == Py_None)
2911 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002912
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002913 if (listitems == Py_None)
2914 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002915 else if (!PyIter_Check(listitems)) {
2916 PyErr_Format(PicklingError, "Fourth element of tuple"
2917 "returned by __reduce__ must be an iterator, not %s",
2918 Py_TYPE(listitems)->tp_name);
2919 return -1;
2920 }
2921
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002922 if (dictitems == Py_None)
2923 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002924 else if (!PyIter_Check(dictitems)) {
2925 PyErr_Format(PicklingError, "Fifth element of tuple"
2926 "returned by __reduce__ must be an iterator, not %s",
2927 Py_TYPE(dictitems)->tp_name);
2928 return -1;
2929 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002930
2931 /* Protocol 2 special case: if callable's name is __newobj__, use
2932 NEWOBJ. */
2933 if (use_newobj) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002934 static PyObject *newobj_str = NULL, *name_str = NULL;
2935 PyObject *name;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002936
2937 if (newobj_str == NULL) {
2938 newobj_str = PyUnicode_InternFromString("__newobj__");
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002939 name_str = PyUnicode_InternFromString("__name__");
2940 if (newobj_str == NULL || name_str == NULL)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002941 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002942 }
2943
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002944 name = PyObject_GetAttr(callable, name_str);
2945 if (name == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002946 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2947 PyErr_Clear();
2948 else
2949 return -1;
2950 use_newobj = 0;
2951 }
2952 else {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002953 use_newobj = PyUnicode_Check(name) &&
2954 PyUnicode_Compare(name, newobj_str) == 0;
2955 Py_DECREF(name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002956 }
2957 }
2958 if (use_newobj) {
2959 PyObject *cls;
2960 PyObject *newargtup;
2961 PyObject *obj_class;
2962 int p;
2963
2964 /* Sanity checks. */
2965 if (Py_SIZE(argtup) < 1) {
2966 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
2967 return -1;
2968 }
2969
2970 cls = PyTuple_GET_ITEM(argtup, 0);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002971 if (!PyType_Check(cls)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002972 PyErr_SetString(PicklingError, "args[0] from "
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002973 "__newobj__ args is not a type");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002974 return -1;
2975 }
2976
2977 if (obj != NULL) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002978 obj_class = get_class(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002979 p = obj_class != cls; /* true iff a problem */
2980 Py_DECREF(obj_class);
2981 if (p) {
2982 PyErr_SetString(PicklingError, "args[0] from "
2983 "__newobj__ args has the wrong class");
2984 return -1;
2985 }
2986 }
2987 /* XXX: These calls save() are prone to infinite recursion. Imagine
2988 what happen if the value returned by the __reduce__() method of
2989 some extension type contains another object of the same type. Ouch!
2990
2991 Here is a quick example, that I ran into, to illustrate what I
2992 mean:
2993
2994 >>> import pickle, copyreg
2995 >>> copyreg.dispatch_table.pop(complex)
2996 >>> pickle.dumps(1+2j)
2997 Traceback (most recent call last):
2998 ...
2999 RuntimeError: maximum recursion depth exceeded
3000
3001 Removing the complex class from copyreg.dispatch_table made the
3002 __reduce_ex__() method emit another complex object:
3003
3004 >>> (1+1j).__reduce_ex__(2)
3005 (<function __newobj__ at 0xb7b71c3c>,
3006 (<class 'complex'>, (1+1j)), None, None, None)
3007
3008 Thus when save() was called on newargstup (the 2nd item) recursion
3009 ensued. Of course, the bug was in the complex class which had a
3010 broken __getnewargs__() that emitted another complex object. But,
3011 the point, here, is it is quite easy to end up with a broken reduce
3012 function. */
3013
3014 /* Save the class and its __new__ arguments. */
3015 if (save(self, cls, 0) < 0)
3016 return -1;
3017
3018 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
3019 if (newargtup == NULL)
3020 return -1;
3021
3022 p = save(self, newargtup, 0);
3023 Py_DECREF(newargtup);
3024 if (p < 0)
3025 return -1;
3026
3027 /* Add NEWOBJ opcode. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003028 if (_Pickler_Write(self, &newobj_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003029 return -1;
3030 }
3031 else { /* Not using NEWOBJ. */
3032 if (save(self, callable, 0) < 0 ||
3033 save(self, argtup, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003034 _Pickler_Write(self, &reduce_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003035 return -1;
3036 }
3037
3038 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
3039 the caller do not want to memoize the object. Not particularly useful,
3040 but that is to mimic the behavior save_reduce() in pickle.py when
3041 obj is None. */
3042 if (obj && memo_put(self, obj) < 0)
3043 return -1;
3044
3045 if (listitems && batch_list(self, listitems) < 0)
3046 return -1;
3047
3048 if (dictitems && batch_dict(self, dictitems) < 0)
3049 return -1;
3050
3051 if (state) {
3052 if (save(self, state, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003053 _Pickler_Write(self, &build_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003054 return -1;
3055 }
3056
3057 return 0;
3058}
3059
3060static int
3061save(PicklerObject *self, PyObject *obj, int pers_save)
3062{
3063 PyTypeObject *type;
3064 PyObject *reduce_func = NULL;
3065 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003066 int status = 0;
3067
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003068 if (Py_EnterRecursiveCall(" while pickling an object"))
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003069 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003070
3071 /* The extra pers_save argument is necessary to avoid calling save_pers()
3072 on its returned object. */
3073 if (!pers_save && self->pers_func) {
3074 /* save_pers() returns:
3075 -1 to signal an error;
3076 0 if it did nothing successfully;
3077 1 if a persistent id was saved.
3078 */
3079 if ((status = save_pers(self, obj, self->pers_func)) != 0)
3080 goto done;
3081 }
3082
3083 type = Py_TYPE(obj);
3084
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003085 /* The old cPickle had an optimization that used switch-case statement
3086 dispatching on the first letter of the type name. This has was removed
3087 since benchmarks shown that this optimization was actually slowing
3088 things down. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003089
3090 /* Atom types; these aren't memoized, so don't check the memo. */
3091
3092 if (obj == Py_None) {
3093 status = save_none(self, obj);
3094 goto done;
3095 }
3096 else if (obj == Py_False || obj == Py_True) {
3097 status = save_bool(self, obj);
3098 goto done;
3099 }
3100 else if (type == &PyLong_Type) {
3101 status = save_long(self, obj);
3102 goto done;
3103 }
3104 else if (type == &PyFloat_Type) {
3105 status = save_float(self, obj);
3106 goto done;
3107 }
3108
3109 /* Check the memo to see if it has the object. If so, generate
3110 a GET (or BINGET) opcode, instead of pickling the object
3111 once again. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003112 if (PyMemoTable_Get(self->memo, obj)) {
3113 if (memo_get(self, obj) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003114 goto error;
3115 goto done;
3116 }
3117
3118 if (type == &PyBytes_Type) {
3119 status = save_bytes(self, obj);
3120 goto done;
3121 }
3122 else if (type == &PyUnicode_Type) {
3123 status = save_unicode(self, obj);
3124 goto done;
3125 }
3126 else if (type == &PyDict_Type) {
3127 status = save_dict(self, obj);
3128 goto done;
3129 }
3130 else if (type == &PyList_Type) {
3131 status = save_list(self, obj);
3132 goto done;
3133 }
3134 else if (type == &PyTuple_Type) {
3135 status = save_tuple(self, obj);
3136 goto done;
3137 }
3138 else if (type == &PyType_Type) {
3139 status = save_global(self, obj, NULL);
3140 goto done;
3141 }
3142 else if (type == &PyFunction_Type) {
3143 status = save_global(self, obj, NULL);
3144 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
3145 /* fall back to reduce */
3146 PyErr_Clear();
3147 }
3148 else {
3149 goto done;
3150 }
3151 }
3152 else if (type == &PyCFunction_Type) {
3153 status = save_global(self, obj, NULL);
3154 goto done;
3155 }
3156 else if (PyType_IsSubtype(type, &PyType_Type)) {
3157 status = save_global(self, obj, NULL);
3158 goto done;
3159 }
3160
3161 /* XXX: This part needs some unit tests. */
3162
3163 /* Get a reduction callable, and call it. This may come from
3164 * copyreg.dispatch_table, the object's __reduce_ex__ method,
3165 * or the object's __reduce__ method.
3166 */
3167 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
3168 if (reduce_func != NULL) {
3169 /* Here, the reference count of the reduce_func object returned by
3170 PyDict_GetItem needs to be increased to be consistent with the one
3171 returned by PyObject_GetAttr. This is allow us to blindly DECREF
3172 reduce_func at the end of the save() routine.
3173 */
3174 Py_INCREF(reduce_func);
3175 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003176 reduce_value = _Pickler_FastCall(self, reduce_func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003177 }
3178 else {
3179 static PyObject *reduce_str = NULL;
3180 static PyObject *reduce_ex_str = NULL;
3181
3182 /* Cache the name of the reduce methods. */
3183 if (reduce_str == NULL) {
3184 reduce_str = PyUnicode_InternFromString("__reduce__");
3185 if (reduce_str == NULL)
3186 goto error;
3187 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
3188 if (reduce_ex_str == NULL)
3189 goto error;
3190 }
3191
3192 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
3193 automatically defined as __reduce__. While this is convenient, this
3194 make it impossible to know which method was actually called. Of
3195 course, this is not a big deal. But still, it would be nice to let
3196 the user know which method was called when something go
3197 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
3198 don't actually have to check for a __reduce__ method. */
3199
3200 /* Check for a __reduce_ex__ method. */
3201 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
3202 if (reduce_func != NULL) {
3203 PyObject *proto;
3204 proto = PyLong_FromLong(self->proto);
3205 if (proto != NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003206 reduce_value = _Pickler_FastCall(self, reduce_func, proto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003207 }
3208 }
3209 else {
3210 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3211 PyErr_Clear();
3212 else
3213 goto error;
3214 /* Check for a __reduce__ method. */
3215 reduce_func = PyObject_GetAttr(obj, reduce_str);
3216 if (reduce_func != NULL) {
3217 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
3218 }
3219 else {
3220 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
3221 type->tp_name, obj);
3222 goto error;
3223 }
3224 }
3225 }
3226
3227 if (reduce_value == NULL)
3228 goto error;
3229
3230 if (PyUnicode_Check(reduce_value)) {
3231 status = save_global(self, obj, reduce_value);
3232 goto done;
3233 }
3234
3235 if (!PyTuple_Check(reduce_value)) {
3236 PyErr_SetString(PicklingError,
3237 "__reduce__ must return a string or tuple");
3238 goto error;
3239 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003240
3241 status = save_reduce(self, reduce_value, obj);
3242
3243 if (0) {
3244 error:
3245 status = -1;
3246 }
3247 done:
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003248 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003249 Py_XDECREF(reduce_func);
3250 Py_XDECREF(reduce_value);
3251
3252 return status;
3253}
3254
3255static int
3256dump(PicklerObject *self, PyObject *obj)
3257{
3258 const char stop_op = STOP;
3259
3260 if (self->proto >= 2) {
3261 char header[2];
3262
3263 header[0] = PROTO;
3264 assert(self->proto >= 0 && self->proto < 256);
3265 header[1] = (unsigned char)self->proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003266 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003267 return -1;
3268 }
3269
3270 if (save(self, obj, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003271 _Pickler_Write(self, &stop_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003272 return -1;
3273
3274 return 0;
3275}
3276
3277PyDoc_STRVAR(Pickler_clear_memo_doc,
3278"clear_memo() -> None. Clears the pickler's \"memo\"."
3279"\n"
3280"The memo is the data structure that remembers which objects the\n"
3281"pickler has already seen, so that shared or recursive objects are\n"
3282"pickled by reference and not by value. This method is useful when\n"
3283"re-using picklers.");
3284
3285static PyObject *
3286Pickler_clear_memo(PicklerObject *self)
3287{
3288 if (self->memo)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003289 PyMemoTable_Clear(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003290
3291 Py_RETURN_NONE;
3292}
3293
3294PyDoc_STRVAR(Pickler_dump_doc,
3295"dump(obj) -> None. Write a pickled representation of obj to the open file.");
3296
3297static PyObject *
3298Pickler_dump(PicklerObject *self, PyObject *args)
3299{
3300 PyObject *obj;
3301
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00003302 /* Check whether the Pickler was initialized correctly (issue3664).
3303 Developers often forget to call __init__() in their subclasses, which
3304 would trigger a segfault without this check. */
3305 if (self->write == NULL) {
3306 PyErr_Format(PicklingError,
3307 "Pickler.__init__() was not called by %s.__init__()",
3308 Py_TYPE(self)->tp_name);
3309 return NULL;
3310 }
3311
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003312 if (!PyArg_ParseTuple(args, "O:dump", &obj))
3313 return NULL;
3314
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003315 if (_Pickler_ClearBuffer(self) < 0)
3316 return NULL;
3317
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003318 if (dump(self, obj) < 0)
3319 return NULL;
3320
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003321 if (_Pickler_FlushToFile(self) < 0)
3322 return NULL;
3323
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003324 Py_RETURN_NONE;
3325}
3326
3327static struct PyMethodDef Pickler_methods[] = {
3328 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
3329 Pickler_dump_doc},
3330 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
3331 Pickler_clear_memo_doc},
3332 {NULL, NULL} /* sentinel */
3333};
3334
3335static void
3336Pickler_dealloc(PicklerObject *self)
3337{
3338 PyObject_GC_UnTrack(self);
3339
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003340 Py_XDECREF(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003341 Py_XDECREF(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003342 Py_XDECREF(self->pers_func);
3343 Py_XDECREF(self->arg);
3344 Py_XDECREF(self->fast_memo);
3345
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003346 PyMemoTable_Del(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003347
3348 Py_TYPE(self)->tp_free((PyObject *)self);
3349}
3350
3351static int
3352Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
3353{
3354 Py_VISIT(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003355 Py_VISIT(self->pers_func);
3356 Py_VISIT(self->arg);
3357 Py_VISIT(self->fast_memo);
3358 return 0;
3359}
3360
3361static int
3362Pickler_clear(PicklerObject *self)
3363{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003364 Py_CLEAR(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003365 Py_CLEAR(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003366 Py_CLEAR(self->pers_func);
3367 Py_CLEAR(self->arg);
3368 Py_CLEAR(self->fast_memo);
3369
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003370 if (self->memo != NULL) {
3371 PyMemoTable *memo = self->memo;
3372 self->memo = NULL;
3373 PyMemoTable_Del(memo);
3374 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003375 return 0;
3376}
3377
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003378
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003379PyDoc_STRVAR(Pickler_doc,
3380"Pickler(file, protocol=None)"
3381"\n"
3382"This takes a binary file for writing a pickle data stream.\n"
3383"\n"
3384"The optional protocol argument tells the pickler to use the\n"
3385"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
3386"protocol is 3; a backward-incompatible protocol designed for\n"
3387"Python 3.0.\n"
3388"\n"
3389"Specifying a negative protocol version selects the highest\n"
3390"protocol version supported. The higher the protocol used, the\n"
3391"more recent the version of Python needed to read the pickle\n"
3392"produced.\n"
3393"\n"
3394"The file argument must have a write() method that accepts a single\n"
3395"bytes argument. It can thus be a file object opened for binary\n"
3396"writing, a io.BytesIO instance, or any other custom object that\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003397"meets this interface.\n"
3398"\n"
3399"If fix_imports is True and protocol is less than 3, pickle will try to\n"
3400"map the new Python 3.x names to the old module names used in Python\n"
3401"2.x, so that the pickle data stream is readable with Python 2.x.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003402
3403static int
3404Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
3405{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003406 static char *kwlist[] = {"file", "protocol", "fix_imports", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003407 PyObject *file;
3408 PyObject *proto_obj = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003409 PyObject *fix_imports = Py_True;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003410
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003411 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:Pickler",
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003412 kwlist, &file, &proto_obj, &fix_imports))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003413 return -1;
3414
3415 /* In case of multiple __init__() calls, clear previous content. */
3416 if (self->write != NULL)
3417 (void)Pickler_clear(self);
3418
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003419 if (_Pickler_SetProtocol(self, proto_obj, fix_imports) < 0)
3420 return -1;
3421
3422 if (_Pickler_SetOutputStream(self, file) < 0)
3423 return -1;
3424
3425 /* memo and output_buffer may have already been created in _Pickler_New */
3426 if (self->memo == NULL) {
3427 self->memo = PyMemoTable_New();
3428 if (self->memo == NULL)
3429 return -1;
3430 }
3431 self->output_len = 0;
3432 if (self->output_buffer == NULL) {
3433 self->max_output_len = WRITE_BUF_SIZE;
3434 self->output_buffer = PyBytes_FromStringAndSize(NULL,
3435 self->max_output_len);
3436 if (self->output_buffer == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003437 return -1;
3438 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003439
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003440 self->arg = NULL;
3441 self->fast = 0;
3442 self->fast_nesting = 0;
3443 self->fast_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003444 self->pers_func = NULL;
3445 if (PyObject_HasAttrString((PyObject *)self, "persistent_id")) {
3446 self->pers_func = PyObject_GetAttrString((PyObject *)self,
3447 "persistent_id");
3448 if (self->pers_func == NULL)
3449 return -1;
3450 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003451 return 0;
3452}
3453
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003454/* Define a proxy object for the Pickler's internal memo object. This is to
3455 * avoid breaking code like:
3456 * pickler.memo.clear()
3457 * and
3458 * pickler.memo = saved_memo
3459 * Is this a good idea? Not really, but we don't want to break code that uses
3460 * it. Note that we don't implement the entire mapping API here. This is
3461 * intentional, as these should be treated as black-box implementation details.
3462 */
3463
3464typedef struct {
3465 PyObject_HEAD
3466 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
3467} PicklerMemoProxyObject;
3468
3469PyDoc_STRVAR(pmp_clear_doc,
3470"memo.clear() -> None. Remove all items from memo.");
3471
3472static PyObject *
3473pmp_clear(PicklerMemoProxyObject *self)
3474{
3475 if (self->pickler->memo)
3476 PyMemoTable_Clear(self->pickler->memo);
3477 Py_RETURN_NONE;
3478}
3479
3480PyDoc_STRVAR(pmp_copy_doc,
3481"memo.copy() -> new_memo. Copy the memo to a new object.");
3482
3483static PyObject *
3484pmp_copy(PicklerMemoProxyObject *self)
3485{
3486 Py_ssize_t i;
3487 PyMemoTable *memo;
3488 PyObject *new_memo = PyDict_New();
3489 if (new_memo == NULL)
3490 return NULL;
3491
3492 memo = self->pickler->memo;
3493 for (i = 0; i < memo->mt_allocated; ++i) {
3494 PyMemoEntry entry = memo->mt_table[i];
3495 if (entry.me_key != NULL) {
3496 int status;
3497 PyObject *key, *value;
3498
3499 key = PyLong_FromVoidPtr(entry.me_key);
3500 value = Py_BuildValue("lO", entry.me_value, entry.me_key);
3501
3502 if (key == NULL || value == NULL) {
3503 Py_XDECREF(key);
3504 Py_XDECREF(value);
3505 goto error;
3506 }
3507 status = PyDict_SetItem(new_memo, key, value);
3508 Py_DECREF(key);
3509 Py_DECREF(value);
3510 if (status < 0)
3511 goto error;
3512 }
3513 }
3514 return new_memo;
3515
3516 error:
3517 Py_XDECREF(new_memo);
3518 return NULL;
3519}
3520
3521PyDoc_STRVAR(pmp_reduce_doc,
3522"memo.__reduce__(). Pickling support.");
3523
3524static PyObject *
3525pmp_reduce(PicklerMemoProxyObject *self, PyObject *args)
3526{
3527 PyObject *reduce_value, *dict_args;
3528 PyObject *contents = pmp_copy(self);
3529 if (contents == NULL)
3530 return NULL;
3531
3532 reduce_value = PyTuple_New(2);
3533 if (reduce_value == NULL) {
3534 Py_DECREF(contents);
3535 return NULL;
3536 }
3537 dict_args = PyTuple_New(1);
3538 if (dict_args == NULL) {
3539 Py_DECREF(contents);
3540 Py_DECREF(reduce_value);
3541 return NULL;
3542 }
3543 PyTuple_SET_ITEM(dict_args, 0, contents);
3544 Py_INCREF((PyObject *)&PyDict_Type);
3545 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
3546 PyTuple_SET_ITEM(reduce_value, 1, dict_args);
3547 return reduce_value;
3548}
3549
3550static PyMethodDef picklerproxy_methods[] = {
3551 {"clear", (PyCFunction)pmp_clear, METH_NOARGS, pmp_clear_doc},
3552 {"copy", (PyCFunction)pmp_copy, METH_NOARGS, pmp_copy_doc},
3553 {"__reduce__", (PyCFunction)pmp_reduce, METH_VARARGS, pmp_reduce_doc},
3554 {NULL, NULL} /* sentinel */
3555};
3556
3557static void
3558PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
3559{
3560 PyObject_GC_UnTrack(self);
3561 Py_XDECREF(self->pickler);
3562 PyObject_GC_Del((PyObject *)self);
3563}
3564
3565static int
3566PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
3567 visitproc visit, void *arg)
3568{
3569 Py_VISIT(self->pickler);
3570 return 0;
3571}
3572
3573static int
3574PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
3575{
3576 Py_CLEAR(self->pickler);
3577 return 0;
3578}
3579
3580static PyTypeObject PicklerMemoProxyType = {
3581 PyVarObject_HEAD_INIT(NULL, 0)
3582 "_pickle.PicklerMemoProxy", /*tp_name*/
3583 sizeof(PicklerMemoProxyObject), /*tp_basicsize*/
3584 0,
3585 (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */
3586 0, /* tp_print */
3587 0, /* tp_getattr */
3588 0, /* tp_setattr */
3589 0, /* tp_compare */
3590 0, /* tp_repr */
3591 0, /* tp_as_number */
3592 0, /* tp_as_sequence */
3593 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00003594 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003595 0, /* tp_call */
3596 0, /* tp_str */
3597 PyObject_GenericGetAttr, /* tp_getattro */
3598 PyObject_GenericSetAttr, /* tp_setattro */
3599 0, /* tp_as_buffer */
3600 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3601 0, /* tp_doc */
3602 (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */
3603 (inquiry)PicklerMemoProxy_clear, /* tp_clear */
3604 0, /* tp_richcompare */
3605 0, /* tp_weaklistoffset */
3606 0, /* tp_iter */
3607 0, /* tp_iternext */
3608 picklerproxy_methods, /* tp_methods */
3609};
3610
3611static PyObject *
3612PicklerMemoProxy_New(PicklerObject *pickler)
3613{
3614 PicklerMemoProxyObject *self;
3615
3616 self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
3617 if (self == NULL)
3618 return NULL;
3619 Py_INCREF(pickler);
3620 self->pickler = pickler;
3621 PyObject_GC_Track(self);
3622 return (PyObject *)self;
3623}
3624
3625/*****************************************************************************/
3626
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003627static PyObject *
3628Pickler_get_memo(PicklerObject *self)
3629{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003630 return PicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003631}
3632
3633static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003634Pickler_set_memo(PicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003635{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003636 PyMemoTable *new_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003637
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003638 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003639 PyErr_SetString(PyExc_TypeError,
3640 "attribute deletion is not supported");
3641 return -1;
3642 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003643
3644 if (Py_TYPE(obj) == &PicklerMemoProxyType) {
3645 PicklerObject *pickler =
3646 ((PicklerMemoProxyObject *)obj)->pickler;
3647
3648 new_memo = PyMemoTable_Copy(pickler->memo);
3649 if (new_memo == NULL)
3650 return -1;
3651 }
3652 else if (PyDict_Check(obj)) {
3653 Py_ssize_t i = 0;
3654 PyObject *key, *value;
3655
3656 new_memo = PyMemoTable_New();
3657 if (new_memo == NULL)
3658 return -1;
3659
3660 while (PyDict_Next(obj, &i, &key, &value)) {
3661 long memo_id;
3662 PyObject *memo_obj;
3663
3664 if (!PyTuple_Check(value) || Py_SIZE(value) != 2) {
3665 PyErr_SetString(PyExc_TypeError,
3666 "'memo' values must be 2-item tuples");
3667 goto error;
3668 }
3669 memo_id = PyLong_AsLong(PyTuple_GET_ITEM(value, 0));
3670 if (memo_id == -1 && PyErr_Occurred())
3671 goto error;
3672 memo_obj = PyTuple_GET_ITEM(value, 1);
3673 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
3674 goto error;
3675 }
3676 }
3677 else {
3678 PyErr_Format(PyExc_TypeError,
3679 "'memo' attribute must be an PicklerMemoProxy object"
3680 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003681 return -1;
3682 }
3683
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003684 PyMemoTable_Del(self->memo);
3685 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003686
3687 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003688
3689 error:
3690 if (new_memo)
3691 PyMemoTable_Del(new_memo);
3692 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003693}
3694
3695static PyObject *
3696Pickler_get_persid(PicklerObject *self)
3697{
3698 if (self->pers_func == NULL)
3699 PyErr_SetString(PyExc_AttributeError, "persistent_id");
3700 else
3701 Py_INCREF(self->pers_func);
3702 return self->pers_func;
3703}
3704
3705static int
3706Pickler_set_persid(PicklerObject *self, PyObject *value)
3707{
3708 PyObject *tmp;
3709
3710 if (value == NULL) {
3711 PyErr_SetString(PyExc_TypeError,
3712 "attribute deletion is not supported");
3713 return -1;
3714 }
3715 if (!PyCallable_Check(value)) {
3716 PyErr_SetString(PyExc_TypeError,
3717 "persistent_id must be a callable taking one argument");
3718 return -1;
3719 }
3720
3721 tmp = self->pers_func;
3722 Py_INCREF(value);
3723 self->pers_func = value;
3724 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
3725
3726 return 0;
3727}
3728
3729static PyMemberDef Pickler_members[] = {
3730 {"bin", T_INT, offsetof(PicklerObject, bin)},
3731 {"fast", T_INT, offsetof(PicklerObject, fast)},
3732 {NULL}
3733};
3734
3735static PyGetSetDef Pickler_getsets[] = {
3736 {"memo", (getter)Pickler_get_memo,
3737 (setter)Pickler_set_memo},
3738 {"persistent_id", (getter)Pickler_get_persid,
3739 (setter)Pickler_set_persid},
3740 {NULL}
3741};
3742
3743static PyTypeObject Pickler_Type = {
3744 PyVarObject_HEAD_INIT(NULL, 0)
3745 "_pickle.Pickler" , /*tp_name*/
3746 sizeof(PicklerObject), /*tp_basicsize*/
3747 0, /*tp_itemsize*/
3748 (destructor)Pickler_dealloc, /*tp_dealloc*/
3749 0, /*tp_print*/
3750 0, /*tp_getattr*/
3751 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00003752 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003753 0, /*tp_repr*/
3754 0, /*tp_as_number*/
3755 0, /*tp_as_sequence*/
3756 0, /*tp_as_mapping*/
3757 0, /*tp_hash*/
3758 0, /*tp_call*/
3759 0, /*tp_str*/
3760 0, /*tp_getattro*/
3761 0, /*tp_setattro*/
3762 0, /*tp_as_buffer*/
3763 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3764 Pickler_doc, /*tp_doc*/
3765 (traverseproc)Pickler_traverse, /*tp_traverse*/
3766 (inquiry)Pickler_clear, /*tp_clear*/
3767 0, /*tp_richcompare*/
3768 0, /*tp_weaklistoffset*/
3769 0, /*tp_iter*/
3770 0, /*tp_iternext*/
3771 Pickler_methods, /*tp_methods*/
3772 Pickler_members, /*tp_members*/
3773 Pickler_getsets, /*tp_getset*/
3774 0, /*tp_base*/
3775 0, /*tp_dict*/
3776 0, /*tp_descr_get*/
3777 0, /*tp_descr_set*/
3778 0, /*tp_dictoffset*/
3779 (initproc)Pickler_init, /*tp_init*/
3780 PyType_GenericAlloc, /*tp_alloc*/
3781 PyType_GenericNew, /*tp_new*/
3782 PyObject_GC_Del, /*tp_free*/
3783 0, /*tp_is_gc*/
3784};
3785
3786/* Temporary helper for calling self.find_class().
3787
3788 XXX: It would be nice to able to avoid Python function call overhead, by
3789 using directly the C version of find_class(), when find_class() is not
3790 overridden by a subclass. Although, this could become rather hackish. A
3791 simpler optimization would be to call the C function when self is not a
3792 subclass instance. */
3793static PyObject *
3794find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
3795{
3796 return PyObject_CallMethod((PyObject *)self, "find_class", "OO",
3797 module_name, global_name);
3798}
3799
3800static int
3801marker(UnpicklerObject *self)
3802{
3803 if (self->num_marks < 1) {
3804 PyErr_SetString(UnpicklingError, "could not find MARK");
3805 return -1;
3806 }
3807
3808 return self->marks[--self->num_marks];
3809}
3810
3811static int
3812load_none(UnpicklerObject *self)
3813{
3814 PDATA_APPEND(self->stack, Py_None, -1);
3815 return 0;
3816}
3817
3818static int
3819bad_readline(void)
3820{
3821 PyErr_SetString(UnpicklingError, "pickle data was truncated");
3822 return -1;
3823}
3824
3825static int
3826load_int(UnpicklerObject *self)
3827{
3828 PyObject *value;
3829 char *endptr, *s;
3830 Py_ssize_t len;
3831 long x;
3832
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003833 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003834 return -1;
3835 if (len < 2)
3836 return bad_readline();
3837
3838 errno = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003839 /* XXX: Should the base argument of strtol() be explicitly set to 10?
3840 XXX(avassalotti): Should this uses PyOS_strtol()? */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003841 x = strtol(s, &endptr, 0);
3842
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003843 if (errno || (*endptr != '\n' && *endptr != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003844 /* Hm, maybe we've got something long. Let's try reading
3845 * it as a Python long object. */
3846 errno = 0;
3847 /* XXX: Same thing about the base here. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003848 value = PyLong_FromString(s, NULL, 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003849 if (value == NULL) {
3850 PyErr_SetString(PyExc_ValueError,
3851 "could not convert string to int");
3852 return -1;
3853 }
3854 }
3855 else {
3856 if (len == 3 && (x == 0 || x == 1)) {
3857 if ((value = PyBool_FromLong(x)) == NULL)
3858 return -1;
3859 }
3860 else {
3861 if ((value = PyLong_FromLong(x)) == NULL)
3862 return -1;
3863 }
3864 }
3865
3866 PDATA_PUSH(self->stack, value, -1);
3867 return 0;
3868}
3869
3870static int
3871load_bool(UnpicklerObject *self, PyObject *boolean)
3872{
3873 assert(boolean == Py_True || boolean == Py_False);
3874 PDATA_APPEND(self->stack, boolean, -1);
3875 return 0;
3876}
3877
3878/* s contains x bytes of a little-endian integer. Return its value as a
3879 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
3880 * int, but when x is 4 it's a signed one. This is an historical source
3881 * of x-platform bugs.
3882 */
3883static long
3884calc_binint(char *bytes, int size)
3885{
3886 unsigned char *s = (unsigned char *)bytes;
3887 int i = size;
3888 long x = 0;
3889
3890 for (i = 0; i < size; i++) {
3891 x |= (long)s[i] << (i * 8);
3892 }
3893
3894 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
3895 * is signed, so on a box with longs bigger than 4 bytes we need
3896 * to extend a BININT's sign bit to the full width.
3897 */
3898 if (SIZEOF_LONG > 4 && size == 4) {
3899 x |= -(x & (1L << 31));
3900 }
3901
3902 return x;
3903}
3904
3905static int
3906load_binintx(UnpicklerObject *self, char *s, int size)
3907{
3908 PyObject *value;
3909 long x;
3910
3911 x = calc_binint(s, size);
3912
3913 if ((value = PyLong_FromLong(x)) == NULL)
3914 return -1;
3915
3916 PDATA_PUSH(self->stack, value, -1);
3917 return 0;
3918}
3919
3920static int
3921load_binint(UnpicklerObject *self)
3922{
3923 char *s;
3924
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003925 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003926 return -1;
3927
3928 return load_binintx(self, s, 4);
3929}
3930
3931static int
3932load_binint1(UnpicklerObject *self)
3933{
3934 char *s;
3935
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003936 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003937 return -1;
3938
3939 return load_binintx(self, s, 1);
3940}
3941
3942static int
3943load_binint2(UnpicklerObject *self)
3944{
3945 char *s;
3946
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003947 if (_Unpickler_Read(self, &s, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003948 return -1;
3949
3950 return load_binintx(self, s, 2);
3951}
3952
3953static int
3954load_long(UnpicklerObject *self)
3955{
3956 PyObject *value;
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00003957 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003958 Py_ssize_t len;
3959
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003960 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003961 return -1;
3962 if (len < 2)
3963 return bad_readline();
3964
Mark Dickinson8dd05142009-01-20 20:43:58 +00003965 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
3966 the 'L' before calling PyLong_FromString. In order to maintain
3967 compatibility with Python 3.0.0, we don't actually *require*
3968 the 'L' to be present. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003969 if (s[len-2] == 'L')
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00003970 s[len-2] = '\0';
Alexandre Vassalottie4bccb72009-01-24 01:47:57 +00003971 /* XXX: Should the base argument explicitly set to 10? */
3972 value = PyLong_FromString(s, NULL, 0);
Mark Dickinson8dd05142009-01-20 20:43:58 +00003973 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003974 return -1;
3975
3976 PDATA_PUSH(self->stack, value, -1);
3977 return 0;
3978}
3979
3980/* 'size' bytes contain the # of bytes of little-endian 256's-complement
3981 * data following.
3982 */
3983static int
3984load_counted_long(UnpicklerObject *self, int size)
3985{
3986 PyObject *value;
3987 char *nbytes;
3988 char *pdata;
3989
3990 assert(size == 1 || size == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003991 if (_Unpickler_Read(self, &nbytes, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003992 return -1;
3993
3994 size = calc_binint(nbytes, size);
3995 if (size < 0) {
3996 /* Corrupt or hostile pickle -- we never write one like this */
3997 PyErr_SetString(UnpicklingError,
3998 "LONG pickle has negative byte count");
3999 return -1;
4000 }
4001
4002 if (size == 0)
4003 value = PyLong_FromLong(0L);
4004 else {
4005 /* Read the raw little-endian bytes and convert. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004006 if (_Unpickler_Read(self, &pdata, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004007 return -1;
4008 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
4009 1 /* little endian */ , 1 /* signed */ );
4010 }
4011 if (value == NULL)
4012 return -1;
4013 PDATA_PUSH(self->stack, value, -1);
4014 return 0;
4015}
4016
4017static int
4018load_float(UnpicklerObject *self)
4019{
4020 PyObject *value;
4021 char *endptr, *s;
4022 Py_ssize_t len;
4023 double d;
4024
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004025 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004026 return -1;
4027 if (len < 2)
4028 return bad_readline();
4029
4030 errno = 0;
Mark Dickinson725bfd82009-05-03 20:33:40 +00004031 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
4032 if (d == -1.0 && PyErr_Occurred())
4033 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004034 if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004035 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
4036 return -1;
4037 }
Mark Dickinson725bfd82009-05-03 20:33:40 +00004038 value = PyFloat_FromDouble(d);
4039 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004040 return -1;
4041
4042 PDATA_PUSH(self->stack, value, -1);
4043 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004044}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004045
4046static int
4047load_binfloat(UnpicklerObject *self)
4048{
4049 PyObject *value;
4050 double x;
4051 char *s;
4052
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004053 if (_Unpickler_Read(self, &s, 8) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004054 return -1;
4055
4056 x = _PyFloat_Unpack8((unsigned char *)s, 0);
4057 if (x == -1.0 && PyErr_Occurred())
4058 return -1;
4059
4060 if ((value = PyFloat_FromDouble(x)) == NULL)
4061 return -1;
4062
4063 PDATA_PUSH(self->stack, value, -1);
4064 return 0;
4065}
4066
4067static int
4068load_string(UnpicklerObject *self)
4069{
4070 PyObject *bytes;
4071 PyObject *str = NULL;
4072 Py_ssize_t len;
4073 char *s, *p;
4074
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004075 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004076 return -1;
4077 if (len < 3)
4078 return bad_readline();
4079 if ((s = strdup(s)) == NULL) {
4080 PyErr_NoMemory();
4081 return -1;
4082 }
4083
4084 /* Strip outermost quotes */
4085 while (s[len - 1] <= ' ')
4086 len--;
4087 if (s[0] == '"' && s[len - 1] == '"') {
4088 s[len - 1] = '\0';
4089 p = s + 1;
4090 len -= 2;
4091 }
4092 else if (s[0] == '\'' && s[len - 1] == '\'') {
4093 s[len - 1] = '\0';
4094 p = s + 1;
4095 len -= 2;
4096 }
4097 else {
4098 free(s);
4099 PyErr_SetString(PyExc_ValueError, "insecure string pickle");
4100 return -1;
4101 }
4102
4103 /* Use the PyBytes API to decode the string, since that is what is used
4104 to encode, and then coerce the result to Unicode. */
4105 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
4106 free(s);
4107 if (bytes == NULL)
4108 return -1;
4109 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
4110 Py_DECREF(bytes);
4111 if (str == NULL)
4112 return -1;
4113
4114 PDATA_PUSH(self->stack, str, -1);
4115 return 0;
4116}
4117
4118static int
4119load_binbytes(UnpicklerObject *self)
4120{
4121 PyObject *bytes;
4122 long x;
4123 char *s;
4124
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004125 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004126 return -1;
4127
4128 x = calc_binint(s, 4);
4129 if (x < 0) {
4130 PyErr_SetString(UnpicklingError,
4131 "BINBYTES pickle has negative byte count");
4132 return -1;
4133 }
4134
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004135 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004136 return -1;
4137 bytes = PyBytes_FromStringAndSize(s, x);
4138 if (bytes == NULL)
4139 return -1;
4140
4141 PDATA_PUSH(self->stack, bytes, -1);
4142 return 0;
4143}
4144
4145static int
4146load_short_binbytes(UnpicklerObject *self)
4147{
4148 PyObject *bytes;
4149 unsigned char x;
4150 char *s;
4151
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004152 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004153 return -1;
4154
4155 x = (unsigned char)s[0];
4156
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004157 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004158 return -1;
4159
4160 bytes = PyBytes_FromStringAndSize(s, x);
4161 if (bytes == NULL)
4162 return -1;
4163
4164 PDATA_PUSH(self->stack, bytes, -1);
4165 return 0;
4166}
4167
4168static int
4169load_binstring(UnpicklerObject *self)
4170{
4171 PyObject *str;
4172 long x;
4173 char *s;
4174
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004175 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004176 return -1;
4177
4178 x = calc_binint(s, 4);
4179 if (x < 0) {
4180 PyErr_SetString(UnpicklingError,
4181 "BINSTRING pickle has negative byte count");
4182 return -1;
4183 }
4184
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004185 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004186 return -1;
4187
4188 /* Convert Python 2.x strings to unicode. */
4189 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4190 if (str == NULL)
4191 return -1;
4192
4193 PDATA_PUSH(self->stack, str, -1);
4194 return 0;
4195}
4196
4197static int
4198load_short_binstring(UnpicklerObject *self)
4199{
4200 PyObject *str;
4201 unsigned char x;
4202 char *s;
4203
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004204 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004205 return -1;
4206
4207 x = (unsigned char)s[0];
4208
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004209 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004210 return -1;
4211
4212 /* Convert Python 2.x strings to unicode. */
4213 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4214 if (str == NULL)
4215 return -1;
4216
4217 PDATA_PUSH(self->stack, str, -1);
4218 return 0;
4219}
4220
4221static int
4222load_unicode(UnpicklerObject *self)
4223{
4224 PyObject *str;
4225 Py_ssize_t len;
4226 char *s;
4227
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004228 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004229 return -1;
4230 if (len < 1)
4231 return bad_readline();
4232
4233 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
4234 if (str == NULL)
4235 return -1;
4236
4237 PDATA_PUSH(self->stack, str, -1);
4238 return 0;
4239}
4240
4241static int
4242load_binunicode(UnpicklerObject *self)
4243{
4244 PyObject *str;
4245 long size;
4246 char *s;
4247
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004248 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004249 return -1;
4250
4251 size = calc_binint(s, 4);
4252 if (size < 0) {
4253 PyErr_SetString(UnpicklingError,
4254 "BINUNICODE pickle has negative byte count");
4255 return -1;
4256 }
4257
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004258 if (_Unpickler_Read(self, &s, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004259 return -1;
4260
Victor Stinner485fb562010-04-13 11:07:24 +00004261 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004262 if (str == NULL)
4263 return -1;
4264
4265 PDATA_PUSH(self->stack, str, -1);
4266 return 0;
4267}
4268
4269static int
4270load_tuple(UnpicklerObject *self)
4271{
4272 PyObject *tuple;
4273 int i;
4274
4275 if ((i = marker(self)) < 0)
4276 return -1;
4277
4278 tuple = Pdata_poptuple(self->stack, i);
4279 if (tuple == NULL)
4280 return -1;
4281 PDATA_PUSH(self->stack, tuple, -1);
4282 return 0;
4283}
4284
4285static int
4286load_counted_tuple(UnpicklerObject *self, int len)
4287{
4288 PyObject *tuple;
4289
4290 tuple = PyTuple_New(len);
4291 if (tuple == NULL)
4292 return -1;
4293
4294 while (--len >= 0) {
4295 PyObject *item;
4296
4297 PDATA_POP(self->stack, item);
4298 if (item == NULL)
4299 return -1;
4300 PyTuple_SET_ITEM(tuple, len, item);
4301 }
4302 PDATA_PUSH(self->stack, tuple, -1);
4303 return 0;
4304}
4305
4306static int
4307load_empty_list(UnpicklerObject *self)
4308{
4309 PyObject *list;
4310
4311 if ((list = PyList_New(0)) == NULL)
4312 return -1;
4313 PDATA_PUSH(self->stack, list, -1);
4314 return 0;
4315}
4316
4317static int
4318load_empty_dict(UnpicklerObject *self)
4319{
4320 PyObject *dict;
4321
4322 if ((dict = PyDict_New()) == NULL)
4323 return -1;
4324 PDATA_PUSH(self->stack, dict, -1);
4325 return 0;
4326}
4327
4328static int
4329load_list(UnpicklerObject *self)
4330{
4331 PyObject *list;
4332 int i;
4333
4334 if ((i = marker(self)) < 0)
4335 return -1;
4336
4337 list = Pdata_poplist(self->stack, i);
4338 if (list == NULL)
4339 return -1;
4340 PDATA_PUSH(self->stack, list, -1);
4341 return 0;
4342}
4343
4344static int
4345load_dict(UnpicklerObject *self)
4346{
4347 PyObject *dict, *key, *value;
4348 int i, j, k;
4349
4350 if ((i = marker(self)) < 0)
4351 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004352 j = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004353
4354 if ((dict = PyDict_New()) == NULL)
4355 return -1;
4356
4357 for (k = i + 1; k < j; k += 2) {
4358 key = self->stack->data[k - 1];
4359 value = self->stack->data[k];
4360 if (PyDict_SetItem(dict, key, value) < 0) {
4361 Py_DECREF(dict);
4362 return -1;
4363 }
4364 }
4365 Pdata_clear(self->stack, i);
4366 PDATA_PUSH(self->stack, dict, -1);
4367 return 0;
4368}
4369
4370static PyObject *
4371instantiate(PyObject *cls, PyObject *args)
4372{
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004373 PyObject *result = NULL;
4374 /* Caller must assure args are a tuple. Normally, args come from
4375 Pdata_poptuple which packs objects from the top of the stack
4376 into a newly created tuple. */
4377 assert(PyTuple_Check(args));
4378 if (Py_SIZE(args) > 0 || !PyType_Check(cls) ||
4379 PyObject_HasAttrString(cls, "__getinitargs__")) {
4380 result = PyObject_CallObject(cls, args);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004381 }
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004382 else {
4383 result = PyObject_CallMethod(cls, "__new__", "O", cls);
4384 }
4385 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004386}
4387
4388static int
4389load_obj(UnpicklerObject *self)
4390{
4391 PyObject *cls, *args, *obj = NULL;
4392 int i;
4393
4394 if ((i = marker(self)) < 0)
4395 return -1;
4396
4397 args = Pdata_poptuple(self->stack, i + 1);
4398 if (args == NULL)
4399 return -1;
4400
4401 PDATA_POP(self->stack, cls);
4402 if (cls) {
4403 obj = instantiate(cls, args);
4404 Py_DECREF(cls);
4405 }
4406 Py_DECREF(args);
4407 if (obj == NULL)
4408 return -1;
4409
4410 PDATA_PUSH(self->stack, obj, -1);
4411 return 0;
4412}
4413
4414static int
4415load_inst(UnpicklerObject *self)
4416{
4417 PyObject *cls = NULL;
4418 PyObject *args = NULL;
4419 PyObject *obj = NULL;
4420 PyObject *module_name;
4421 PyObject *class_name;
4422 Py_ssize_t len;
4423 int i;
4424 char *s;
4425
4426 if ((i = marker(self)) < 0)
4427 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004428 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004429 return -1;
4430 if (len < 2)
4431 return bad_readline();
4432
4433 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
4434 identifiers are permitted in Python 3.0, since the INST opcode is only
4435 supported by older protocols on Python 2.x. */
4436 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
4437 if (module_name == NULL)
4438 return -1;
4439
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004440 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004441 if (len < 2)
4442 return bad_readline();
4443 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004444 if (class_name != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004445 cls = find_class(self, module_name, class_name);
4446 Py_DECREF(class_name);
4447 }
4448 }
4449 Py_DECREF(module_name);
4450
4451 if (cls == NULL)
4452 return -1;
4453
4454 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
4455 obj = instantiate(cls, args);
4456 Py_DECREF(args);
4457 }
4458 Py_DECREF(cls);
4459
4460 if (obj == NULL)
4461 return -1;
4462
4463 PDATA_PUSH(self->stack, obj, -1);
4464 return 0;
4465}
4466
4467static int
4468load_newobj(UnpicklerObject *self)
4469{
4470 PyObject *args = NULL;
4471 PyObject *clsraw = NULL;
4472 PyTypeObject *cls; /* clsraw cast to its true type */
4473 PyObject *obj;
4474
4475 /* Stack is ... cls argtuple, and we want to call
4476 * cls.__new__(cls, *argtuple).
4477 */
4478 PDATA_POP(self->stack, args);
4479 if (args == NULL)
4480 goto error;
4481 if (!PyTuple_Check(args)) {
4482 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
4483 goto error;
4484 }
4485
4486 PDATA_POP(self->stack, clsraw);
4487 cls = (PyTypeObject *)clsraw;
4488 if (cls == NULL)
4489 goto error;
4490 if (!PyType_Check(cls)) {
4491 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4492 "isn't a type object");
4493 goto error;
4494 }
4495 if (cls->tp_new == NULL) {
4496 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4497 "has NULL tp_new");
4498 goto error;
4499 }
4500
4501 /* Call __new__. */
4502 obj = cls->tp_new(cls, args, NULL);
4503 if (obj == NULL)
4504 goto error;
4505
4506 Py_DECREF(args);
4507 Py_DECREF(clsraw);
4508 PDATA_PUSH(self->stack, obj, -1);
4509 return 0;
4510
4511 error:
4512 Py_XDECREF(args);
4513 Py_XDECREF(clsraw);
4514 return -1;
4515}
4516
4517static int
4518load_global(UnpicklerObject *self)
4519{
4520 PyObject *global = NULL;
4521 PyObject *module_name;
4522 PyObject *global_name;
4523 Py_ssize_t len;
4524 char *s;
4525
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004526 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004527 return -1;
4528 if (len < 2)
4529 return bad_readline();
4530 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4531 if (!module_name)
4532 return -1;
4533
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004534 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004535 if (len < 2) {
4536 Py_DECREF(module_name);
4537 return bad_readline();
4538 }
4539 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4540 if (global_name) {
4541 global = find_class(self, module_name, global_name);
4542 Py_DECREF(global_name);
4543 }
4544 }
4545 Py_DECREF(module_name);
4546
4547 if (global == NULL)
4548 return -1;
4549 PDATA_PUSH(self->stack, global, -1);
4550 return 0;
4551}
4552
4553static int
4554load_persid(UnpicklerObject *self)
4555{
4556 PyObject *pid;
4557 Py_ssize_t len;
4558 char *s;
4559
4560 if (self->pers_func) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004561 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004562 return -1;
4563 if (len < 2)
4564 return bad_readline();
4565
4566 pid = PyBytes_FromStringAndSize(s, len - 1);
4567 if (pid == NULL)
4568 return -1;
4569
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004570 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004571 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004572 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004573 if (pid == NULL)
4574 return -1;
4575
4576 PDATA_PUSH(self->stack, pid, -1);
4577 return 0;
4578 }
4579 else {
4580 PyErr_SetString(UnpicklingError,
4581 "A load persistent id instruction was encountered,\n"
4582 "but no persistent_load function was specified.");
4583 return -1;
4584 }
4585}
4586
4587static int
4588load_binpersid(UnpicklerObject *self)
4589{
4590 PyObject *pid;
4591
4592 if (self->pers_func) {
4593 PDATA_POP(self->stack, pid);
4594 if (pid == NULL)
4595 return -1;
4596
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004597 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004598 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004599 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004600 if (pid == NULL)
4601 return -1;
4602
4603 PDATA_PUSH(self->stack, pid, -1);
4604 return 0;
4605 }
4606 else {
4607 PyErr_SetString(UnpicklingError,
4608 "A load persistent id instruction was encountered,\n"
4609 "but no persistent_load function was specified.");
4610 return -1;
4611 }
4612}
4613
4614static int
4615load_pop(UnpicklerObject *self)
4616{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004617 int len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004618
4619 /* Note that we split the (pickle.py) stack into two stacks,
4620 * an object stack and a mark stack. We have to be clever and
4621 * pop the right one. We do this by looking at the top of the
Collin Winter8ca69de2009-05-26 16:53:41 +00004622 * mark stack first, and only signalling a stack underflow if
4623 * the object stack is empty and the mark stack doesn't match
4624 * our expectations.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004625 */
Collin Winter8ca69de2009-05-26 16:53:41 +00004626 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004627 self->num_marks--;
Antoine Pitrou01a15ea2010-01-07 17:57:31 +00004628 } else if (len > 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004629 len--;
4630 Py_DECREF(self->stack->data[len]);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004631 Py_SIZE(self->stack) = len;
Collin Winter8ca69de2009-05-26 16:53:41 +00004632 } else {
4633 return stack_underflow();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004634 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004635 return 0;
4636}
4637
4638static int
4639load_pop_mark(UnpicklerObject *self)
4640{
4641 int i;
4642
4643 if ((i = marker(self)) < 0)
4644 return -1;
4645
4646 Pdata_clear(self->stack, i);
4647
4648 return 0;
4649}
4650
4651static int
4652load_dup(UnpicklerObject *self)
4653{
4654 PyObject *last;
4655 int len;
4656
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004657 if ((len = Py_SIZE(self->stack)) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004658 return stack_underflow();
4659 last = self->stack->data[len - 1];
4660 PDATA_APPEND(self->stack, last, -1);
4661 return 0;
4662}
4663
4664static int
4665load_get(UnpicklerObject *self)
4666{
4667 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004668 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004669 Py_ssize_t len;
4670 char *s;
4671
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004672 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004673 return -1;
4674 if (len < 2)
4675 return bad_readline();
4676
4677 key = PyLong_FromString(s, NULL, 10);
4678 if (key == NULL)
4679 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004680 idx = PyLong_AsSsize_t(key);
4681 if (idx == -1 && PyErr_Occurred()) {
4682 Py_DECREF(key);
4683 return -1;
4684 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004685
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004686 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004687 if (value == NULL) {
4688 if (!PyErr_Occurred())
4689 PyErr_SetObject(PyExc_KeyError, key);
4690 Py_DECREF(key);
4691 return -1;
4692 }
4693 Py_DECREF(key);
4694
4695 PDATA_APPEND(self->stack, value, -1);
4696 return 0;
4697}
4698
4699static int
4700load_binget(UnpicklerObject *self)
4701{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004702 PyObject *value;
4703 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004704 char *s;
4705
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004706 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004707 return -1;
4708
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004709 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004710
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004711 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004712 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004713 PyObject *key = PyLong_FromSsize_t(idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004714 if (!PyErr_Occurred())
4715 PyErr_SetObject(PyExc_KeyError, key);
4716 Py_DECREF(key);
4717 return -1;
4718 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004719
4720 PDATA_APPEND(self->stack, value, -1);
4721 return 0;
4722}
4723
4724static int
4725load_long_binget(UnpicklerObject *self)
4726{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004727 PyObject *value;
4728 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004729 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004730
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004731 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004732 return -1;
4733
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004734 idx = (long)Py_CHARMASK(s[0]);
4735 idx |= (long)Py_CHARMASK(s[1]) << 8;
4736 idx |= (long)Py_CHARMASK(s[2]) << 16;
4737 idx |= (long)Py_CHARMASK(s[3]) << 24;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004738
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004739 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004740 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004741 PyObject *key = PyLong_FromSsize_t(idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004742 if (!PyErr_Occurred())
4743 PyErr_SetObject(PyExc_KeyError, key);
4744 Py_DECREF(key);
4745 return -1;
4746 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004747
4748 PDATA_APPEND(self->stack, value, -1);
4749 return 0;
4750}
4751
4752/* Push an object from the extension registry (EXT[124]). nbytes is
4753 * the number of bytes following the opcode, holding the index (code) value.
4754 */
4755static int
4756load_extension(UnpicklerObject *self, int nbytes)
4757{
4758 char *codebytes; /* the nbytes bytes after the opcode */
4759 long code; /* calc_binint returns long */
4760 PyObject *py_code; /* code as a Python int */
4761 PyObject *obj; /* the object to push */
4762 PyObject *pair; /* (module_name, class_name) */
4763 PyObject *module_name, *class_name;
4764
4765 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004766 if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004767 return -1;
4768 code = calc_binint(codebytes, nbytes);
4769 if (code <= 0) { /* note that 0 is forbidden */
4770 /* Corrupt or hostile pickle. */
4771 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
4772 return -1;
4773 }
4774
4775 /* Look for the code in the cache. */
4776 py_code = PyLong_FromLong(code);
4777 if (py_code == NULL)
4778 return -1;
4779 obj = PyDict_GetItem(extension_cache, py_code);
4780 if (obj != NULL) {
4781 /* Bingo. */
4782 Py_DECREF(py_code);
4783 PDATA_APPEND(self->stack, obj, -1);
4784 return 0;
4785 }
4786
4787 /* Look up the (module_name, class_name) pair. */
4788 pair = PyDict_GetItem(inverted_registry, py_code);
4789 if (pair == NULL) {
4790 Py_DECREF(py_code);
4791 PyErr_Format(PyExc_ValueError, "unregistered extension "
4792 "code %ld", code);
4793 return -1;
4794 }
4795 /* Since the extension registry is manipulable via Python code,
4796 * confirm that pair is really a 2-tuple of strings.
4797 */
4798 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
4799 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
4800 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
4801 Py_DECREF(py_code);
4802 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
4803 "isn't a 2-tuple of strings", code);
4804 return -1;
4805 }
4806 /* Load the object. */
4807 obj = find_class(self, module_name, class_name);
4808 if (obj == NULL) {
4809 Py_DECREF(py_code);
4810 return -1;
4811 }
4812 /* Cache code -> obj. */
4813 code = PyDict_SetItem(extension_cache, py_code, obj);
4814 Py_DECREF(py_code);
4815 if (code < 0) {
4816 Py_DECREF(obj);
4817 return -1;
4818 }
4819 PDATA_PUSH(self->stack, obj, -1);
4820 return 0;
4821}
4822
4823static int
4824load_put(UnpicklerObject *self)
4825{
4826 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004827 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004828 Py_ssize_t len;
4829 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004830
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004831 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004832 return -1;
4833 if (len < 2)
4834 return bad_readline();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004835 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004836 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004837 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004838
4839 key = PyLong_FromString(s, NULL, 10);
4840 if (key == NULL)
4841 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004842 idx = PyLong_AsSsize_t(key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004843 Py_DECREF(key);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004844 if (idx == -1 && PyErr_Occurred())
4845 return -1;
4846
4847 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004848}
4849
4850static int
4851load_binput(UnpicklerObject *self)
4852{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004853 PyObject *value;
4854 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004855 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004856
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004857 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004858 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004859
4860 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004861 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004862 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004863
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004864 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004865
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004866 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004867}
4868
4869static int
4870load_long_binput(UnpicklerObject *self)
4871{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004872 PyObject *value;
4873 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004874 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004875
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004876 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004877 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004878
4879 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004880 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004881 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004882
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004883 idx = (long)Py_CHARMASK(s[0]);
4884 idx |= (long)Py_CHARMASK(s[1]) << 8;
4885 idx |= (long)Py_CHARMASK(s[2]) << 16;
4886 idx |= (long)Py_CHARMASK(s[3]) << 24;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004887
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004888 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004889}
4890
4891static int
4892do_append(UnpicklerObject *self, int x)
4893{
4894 PyObject *value;
4895 PyObject *list;
4896 int len, i;
4897
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004898 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004899 if (x > len || x <= 0)
4900 return stack_underflow();
4901 if (len == x) /* nothing to do */
4902 return 0;
4903
4904 list = self->stack->data[x - 1];
4905
4906 if (PyList_Check(list)) {
4907 PyObject *slice;
4908 Py_ssize_t list_len;
4909
4910 slice = Pdata_poplist(self->stack, x);
4911 if (!slice)
4912 return -1;
4913 list_len = PyList_GET_SIZE(list);
4914 i = PyList_SetSlice(list, list_len, list_len, slice);
4915 Py_DECREF(slice);
4916 return i;
4917 }
4918 else {
4919 PyObject *append_func;
4920
4921 append_func = PyObject_GetAttrString(list, "append");
4922 if (append_func == NULL)
4923 return -1;
4924 for (i = x; i < len; i++) {
4925 PyObject *result;
4926
4927 value = self->stack->data[i];
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004928 result = _Unpickler_FastCall(self, append_func, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004929 if (result == NULL) {
4930 Pdata_clear(self->stack, i + 1);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004931 Py_SIZE(self->stack) = x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004932 return -1;
4933 }
4934 Py_DECREF(result);
4935 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004936 Py_SIZE(self->stack) = x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004937 }
4938
4939 return 0;
4940}
4941
4942static int
4943load_append(UnpicklerObject *self)
4944{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004945 return do_append(self, Py_SIZE(self->stack) - 1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004946}
4947
4948static int
4949load_appends(UnpicklerObject *self)
4950{
4951 return do_append(self, marker(self));
4952}
4953
4954static int
4955do_setitems(UnpicklerObject *self, int x)
4956{
4957 PyObject *value, *key;
4958 PyObject *dict;
4959 int len, i;
4960 int status = 0;
4961
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004962 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004963 if (x > len || x <= 0)
4964 return stack_underflow();
4965 if (len == x) /* nothing to do */
4966 return 0;
4967 if ((len - x) % 2 != 0) {
4968 /* Currupt or hostile pickle -- we never write one like this. */
4969 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
4970 return -1;
4971 }
4972
4973 /* Here, dict does not actually need to be a PyDict; it could be anything
4974 that supports the __setitem__ attribute. */
4975 dict = self->stack->data[x - 1];
4976
4977 for (i = x + 1; i < len; i += 2) {
4978 key = self->stack->data[i - 1];
4979 value = self->stack->data[i];
4980 if (PyObject_SetItem(dict, key, value) < 0) {
4981 status = -1;
4982 break;
4983 }
4984 }
4985
4986 Pdata_clear(self->stack, x);
4987 return status;
4988}
4989
4990static int
4991load_setitem(UnpicklerObject *self)
4992{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004993 return do_setitems(self, Py_SIZE(self->stack) - 2);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004994}
4995
4996static int
4997load_setitems(UnpicklerObject *self)
4998{
4999 return do_setitems(self, marker(self));
5000}
5001
5002static int
5003load_build(UnpicklerObject *self)
5004{
5005 PyObject *state, *inst, *slotstate;
5006 PyObject *setstate;
5007 int status = 0;
5008
5009 /* Stack is ... instance, state. We want to leave instance at
5010 * the stack top, possibly mutated via instance.__setstate__(state).
5011 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005012 if (Py_SIZE(self->stack) < 2)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005013 return stack_underflow();
5014
5015 PDATA_POP(self->stack, state);
5016 if (state == NULL)
5017 return -1;
5018
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005019 inst = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005020
5021 setstate = PyObject_GetAttrString(inst, "__setstate__");
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005022 if (setstate == NULL) {
5023 if (PyErr_ExceptionMatches(PyExc_AttributeError))
5024 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00005025 else {
5026 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005027 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00005028 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005029 }
5030 else {
5031 PyObject *result;
5032
5033 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005034 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Antoine Pitroud79dc622008-09-05 00:03:33 +00005035 reference to state first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005036 result = _Unpickler_FastCall(self, setstate, state);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005037 Py_DECREF(setstate);
5038 if (result == NULL)
5039 return -1;
5040 Py_DECREF(result);
5041 return 0;
5042 }
5043
5044 /* A default __setstate__. First see whether state embeds a
5045 * slot state dict too (a proto 2 addition).
5046 */
5047 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
5048 PyObject *tmp = state;
5049
5050 state = PyTuple_GET_ITEM(tmp, 0);
5051 slotstate = PyTuple_GET_ITEM(tmp, 1);
5052 Py_INCREF(state);
5053 Py_INCREF(slotstate);
5054 Py_DECREF(tmp);
5055 }
5056 else
5057 slotstate = NULL;
5058
5059 /* Set inst.__dict__ from the state dict (if any). */
5060 if (state != Py_None) {
5061 PyObject *dict;
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005062 PyObject *d_key, *d_value;
5063 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005064
5065 if (!PyDict_Check(state)) {
5066 PyErr_SetString(UnpicklingError, "state is not a dictionary");
5067 goto error;
5068 }
5069 dict = PyObject_GetAttrString(inst, "__dict__");
5070 if (dict == NULL)
5071 goto error;
5072
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005073 i = 0;
5074 while (PyDict_Next(state, &i, &d_key, &d_value)) {
5075 /* normally the keys for instance attributes are
5076 interned. we should try to do that here. */
5077 Py_INCREF(d_key);
5078 if (PyUnicode_CheckExact(d_key))
5079 PyUnicode_InternInPlace(&d_key);
5080 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
5081 Py_DECREF(d_key);
5082 goto error;
5083 }
5084 Py_DECREF(d_key);
5085 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005086 Py_DECREF(dict);
5087 }
5088
5089 /* Also set instance attributes from the slotstate dict (if any). */
5090 if (slotstate != NULL) {
5091 PyObject *d_key, *d_value;
5092 Py_ssize_t i;
5093
5094 if (!PyDict_Check(slotstate)) {
5095 PyErr_SetString(UnpicklingError,
5096 "slot state is not a dictionary");
5097 goto error;
5098 }
5099 i = 0;
5100 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
5101 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
5102 goto error;
5103 }
5104 }
5105
5106 if (0) {
5107 error:
5108 status = -1;
5109 }
5110
5111 Py_DECREF(state);
5112 Py_XDECREF(slotstate);
5113 return status;
5114}
5115
5116static int
5117load_mark(UnpicklerObject *self)
5118{
5119
5120 /* Note that we split the (pickle.py) stack into two stacks, an
5121 * object stack and a mark stack. Here we push a mark onto the
5122 * mark stack.
5123 */
5124
5125 if ((self->num_marks + 1) >= self->marks_size) {
5126 size_t alloc;
5127 int *marks;
5128
5129 /* Use the size_t type to check for overflow. */
5130 alloc = ((size_t)self->num_marks << 1) + 20;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00005131 if (alloc > PY_SSIZE_T_MAX ||
5132 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005133 PyErr_NoMemory();
5134 return -1;
5135 }
5136
5137 if (self->marks == NULL)
5138 marks = (int *)PyMem_Malloc(alloc * sizeof(int));
5139 else
5140 marks = (int *)PyMem_Realloc(self->marks, alloc * sizeof(int));
5141 if (marks == NULL) {
5142 PyErr_NoMemory();
5143 return -1;
5144 }
5145 self->marks = marks;
5146 self->marks_size = (Py_ssize_t)alloc;
5147 }
5148
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005149 self->marks[self->num_marks++] = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005150
5151 return 0;
5152}
5153
5154static int
5155load_reduce(UnpicklerObject *self)
5156{
5157 PyObject *callable = NULL;
5158 PyObject *argtup = NULL;
5159 PyObject *obj = NULL;
5160
5161 PDATA_POP(self->stack, argtup);
5162 if (argtup == NULL)
5163 return -1;
5164 PDATA_POP(self->stack, callable);
5165 if (callable) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005166 obj = PyObject_CallObject(callable, argtup);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005167 Py_DECREF(callable);
5168 }
5169 Py_DECREF(argtup);
5170
5171 if (obj == NULL)
5172 return -1;
5173
5174 PDATA_PUSH(self->stack, obj, -1);
5175 return 0;
5176}
5177
5178/* Just raises an error if we don't know the protocol specified. PROTO
5179 * is the first opcode for protocols >= 2.
5180 */
5181static int
5182load_proto(UnpicklerObject *self)
5183{
5184 char *s;
5185 int i;
5186
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005187 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005188 return -1;
5189
5190 i = (unsigned char)s[0];
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005191 if (i <= HIGHEST_PROTOCOL) {
5192 self->proto = i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005193 return 0;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005194 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005195
5196 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
5197 return -1;
5198}
5199
5200static PyObject *
5201load(UnpicklerObject *self)
5202{
5203 PyObject *err;
5204 PyObject *value = NULL;
5205 char *s;
5206
5207 self->num_marks = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005208 if (Py_SIZE(self->stack))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005209 Pdata_clear(self->stack, 0);
5210
5211 /* Convenient macros for the dispatch while-switch loop just below. */
5212#define OP(opcode, load_func) \
5213 case opcode: if (load_func(self) < 0) break; continue;
5214
5215#define OP_ARG(opcode, load_func, arg) \
5216 case opcode: if (load_func(self, (arg)) < 0) break; continue;
5217
5218 while (1) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005219 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005220 break;
5221
5222 switch ((enum opcode)s[0]) {
5223 OP(NONE, load_none)
5224 OP(BININT, load_binint)
5225 OP(BININT1, load_binint1)
5226 OP(BININT2, load_binint2)
5227 OP(INT, load_int)
5228 OP(LONG, load_long)
5229 OP_ARG(LONG1, load_counted_long, 1)
5230 OP_ARG(LONG4, load_counted_long, 4)
5231 OP(FLOAT, load_float)
5232 OP(BINFLOAT, load_binfloat)
5233 OP(BINBYTES, load_binbytes)
5234 OP(SHORT_BINBYTES, load_short_binbytes)
5235 OP(BINSTRING, load_binstring)
5236 OP(SHORT_BINSTRING, load_short_binstring)
5237 OP(STRING, load_string)
5238 OP(UNICODE, load_unicode)
5239 OP(BINUNICODE, load_binunicode)
5240 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
5241 OP_ARG(TUPLE1, load_counted_tuple, 1)
5242 OP_ARG(TUPLE2, load_counted_tuple, 2)
5243 OP_ARG(TUPLE3, load_counted_tuple, 3)
5244 OP(TUPLE, load_tuple)
5245 OP(EMPTY_LIST, load_empty_list)
5246 OP(LIST, load_list)
5247 OP(EMPTY_DICT, load_empty_dict)
5248 OP(DICT, load_dict)
5249 OP(OBJ, load_obj)
5250 OP(INST, load_inst)
5251 OP(NEWOBJ, load_newobj)
5252 OP(GLOBAL, load_global)
5253 OP(APPEND, load_append)
5254 OP(APPENDS, load_appends)
5255 OP(BUILD, load_build)
5256 OP(DUP, load_dup)
5257 OP(BINGET, load_binget)
5258 OP(LONG_BINGET, load_long_binget)
5259 OP(GET, load_get)
5260 OP(MARK, load_mark)
5261 OP(BINPUT, load_binput)
5262 OP(LONG_BINPUT, load_long_binput)
5263 OP(PUT, load_put)
5264 OP(POP, load_pop)
5265 OP(POP_MARK, load_pop_mark)
5266 OP(SETITEM, load_setitem)
5267 OP(SETITEMS, load_setitems)
5268 OP(PERSID, load_persid)
5269 OP(BINPERSID, load_binpersid)
5270 OP(REDUCE, load_reduce)
5271 OP(PROTO, load_proto)
5272 OP_ARG(EXT1, load_extension, 1)
5273 OP_ARG(EXT2, load_extension, 2)
5274 OP_ARG(EXT4, load_extension, 4)
5275 OP_ARG(NEWTRUE, load_bool, Py_True)
5276 OP_ARG(NEWFALSE, load_bool, Py_False)
5277
5278 case STOP:
5279 break;
5280
5281 case '\0':
5282 PyErr_SetNone(PyExc_EOFError);
5283 return NULL;
5284
5285 default:
5286 PyErr_Format(UnpicklingError,
5287 "invalid load key, '%c'.", s[0]);
5288 return NULL;
5289 }
5290
5291 break; /* and we are done! */
5292 }
5293
Antoine Pitrou04248a82010-10-12 20:51:21 +00005294 if (_Unpickler_SkipConsumed(self) < 0)
5295 return NULL;
5296
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005297 /* XXX: It is not clear what this is actually for. */
5298 if ((err = PyErr_Occurred())) {
5299 if (err == PyExc_EOFError) {
5300 PyErr_SetNone(PyExc_EOFError);
5301 }
5302 return NULL;
5303 }
5304
5305 PDATA_POP(self->stack, value);
5306 return value;
5307}
5308
5309PyDoc_STRVAR(Unpickler_load_doc,
5310"load() -> object. Load a pickle."
5311"\n"
5312"Read a pickled object representation from the open file object given in\n"
5313"the constructor, and return the reconstituted object hierarchy specified\n"
5314"therein.\n");
5315
5316static PyObject *
5317Unpickler_load(UnpicklerObject *self)
5318{
5319 /* Check whether the Unpickler was initialized correctly. This prevents
5320 segfaulting if a subclass overridden __init__ with a function that does
5321 not call Unpickler.__init__(). Here, we simply ensure that self->read
5322 is not NULL. */
5323 if (self->read == NULL) {
5324 PyErr_Format(UnpicklingError,
5325 "Unpickler.__init__() was not called by %s.__init__()",
5326 Py_TYPE(self)->tp_name);
5327 return NULL;
5328 }
5329
5330 return load(self);
5331}
5332
5333/* The name of find_class() is misleading. In newer pickle protocols, this
5334 function is used for loading any global (i.e., functions), not just
5335 classes. The name is kept only for backward compatibility. */
5336
5337PyDoc_STRVAR(Unpickler_find_class_doc,
5338"find_class(module_name, global_name) -> object.\n"
5339"\n"
5340"Return an object from a specified module, importing the module if\n"
5341"necessary. Subclasses may override this method (e.g. to restrict\n"
5342"unpickling of arbitrary classes and functions).\n"
5343"\n"
5344"This method is called whenever a class or a function object is\n"
5345"needed. Both arguments passed are str objects.\n");
5346
5347static PyObject *
5348Unpickler_find_class(UnpicklerObject *self, PyObject *args)
5349{
5350 PyObject *global;
5351 PyObject *modules_dict;
5352 PyObject *module;
5353 PyObject *module_name, *global_name;
5354
5355 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
5356 &module_name, &global_name))
5357 return NULL;
5358
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005359 /* Try to map the old names used in Python 2.x to the new ones used in
5360 Python 3.x. We do this only with old pickle protocols and when the
5361 user has not disabled the feature. */
5362 if (self->proto < 3 && self->fix_imports) {
5363 PyObject *key;
5364 PyObject *item;
5365
5366 /* Check if the global (i.e., a function or a class) was renamed
5367 or moved to another module. */
5368 key = PyTuple_Pack(2, module_name, global_name);
5369 if (key == NULL)
5370 return NULL;
5371 item = PyDict_GetItemWithError(name_mapping_2to3, key);
5372 Py_DECREF(key);
5373 if (item) {
5374 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
5375 PyErr_Format(PyExc_RuntimeError,
5376 "_compat_pickle.NAME_MAPPING values should be "
5377 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
5378 return NULL;
5379 }
5380 module_name = PyTuple_GET_ITEM(item, 0);
5381 global_name = PyTuple_GET_ITEM(item, 1);
5382 if (!PyUnicode_Check(module_name) ||
5383 !PyUnicode_Check(global_name)) {
5384 PyErr_Format(PyExc_RuntimeError,
5385 "_compat_pickle.NAME_MAPPING values should be "
5386 "pairs of str, not (%.200s, %.200s)",
5387 Py_TYPE(module_name)->tp_name,
5388 Py_TYPE(global_name)->tp_name);
5389 return NULL;
5390 }
5391 }
5392 else if (PyErr_Occurred()) {
5393 return NULL;
5394 }
5395
5396 /* Check if the module was renamed. */
5397 item = PyDict_GetItemWithError(import_mapping_2to3, module_name);
5398 if (item) {
5399 if (!PyUnicode_Check(item)) {
5400 PyErr_Format(PyExc_RuntimeError,
5401 "_compat_pickle.IMPORT_MAPPING values should be "
5402 "strings, not %.200s", Py_TYPE(item)->tp_name);
5403 return NULL;
5404 }
5405 module_name = item;
5406 }
5407 else if (PyErr_Occurred()) {
5408 return NULL;
5409 }
5410 }
5411
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005412 modules_dict = PySys_GetObject("modules");
5413 if (modules_dict == NULL)
5414 return NULL;
5415
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005416 module = PyDict_GetItemWithError(modules_dict, module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005417 if (module == NULL) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005418 if (PyErr_Occurred())
5419 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005420 module = PyImport_Import(module_name);
5421 if (module == NULL)
5422 return NULL;
5423 global = PyObject_GetAttr(module, global_name);
5424 Py_DECREF(module);
5425 }
5426 else {
5427 global = PyObject_GetAttr(module, global_name);
5428 }
5429 return global;
5430}
5431
5432static struct PyMethodDef Unpickler_methods[] = {
5433 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
5434 Unpickler_load_doc},
5435 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
5436 Unpickler_find_class_doc},
5437 {NULL, NULL} /* sentinel */
5438};
5439
5440static void
5441Unpickler_dealloc(UnpicklerObject *self)
5442{
5443 PyObject_GC_UnTrack((PyObject *)self);
5444 Py_XDECREF(self->readline);
5445 Py_XDECREF(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005446 Py_XDECREF(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005447 Py_XDECREF(self->stack);
5448 Py_XDECREF(self->pers_func);
5449 Py_XDECREF(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005450 if (self->buffer.buf != NULL) {
5451 PyBuffer_Release(&self->buffer);
5452 self->buffer.buf = NULL;
5453 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005454
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005455 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005456 PyMem_Free(self->marks);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005457 PyMem_Free(self->input_line);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005458 free(self->encoding);
5459 free(self->errors);
5460
5461 Py_TYPE(self)->tp_free((PyObject *)self);
5462}
5463
5464static int
5465Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
5466{
5467 Py_VISIT(self->readline);
5468 Py_VISIT(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005469 Py_VISIT(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005470 Py_VISIT(self->stack);
5471 Py_VISIT(self->pers_func);
5472 Py_VISIT(self->arg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005473 return 0;
5474}
5475
5476static int
5477Unpickler_clear(UnpicklerObject *self)
5478{
5479 Py_CLEAR(self->readline);
5480 Py_CLEAR(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005481 Py_CLEAR(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005482 Py_CLEAR(self->stack);
5483 Py_CLEAR(self->pers_func);
5484 Py_CLEAR(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005485 if (self->buffer.buf != NULL) {
5486 PyBuffer_Release(&self->buffer);
5487 self->buffer.buf = NULL;
5488 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005489
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005490 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005491 PyMem_Free(self->marks);
5492 self->marks = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005493 PyMem_Free(self->input_line);
5494 self->input_line = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005495 free(self->encoding);
5496 self->encoding = NULL;
5497 free(self->errors);
5498 self->errors = NULL;
5499
5500 return 0;
5501}
5502
5503PyDoc_STRVAR(Unpickler_doc,
5504"Unpickler(file, *, encoding='ASCII', errors='strict')"
5505"\n"
5506"This takes a binary file for reading a pickle data stream.\n"
5507"\n"
5508"The protocol version of the pickle is detected automatically, so no\n"
5509"proto argument is needed.\n"
5510"\n"
5511"The file-like object must have two methods, a read() method\n"
5512"that takes an integer argument, and a readline() method that\n"
5513"requires no arguments. Both methods should return bytes.\n"
5514"Thus file-like object can be a binary file object opened for\n"
5515"reading, a BytesIO object, or any other custom object that\n"
5516"meets this interface.\n"
5517"\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005518"Optional keyword arguments are *fix_imports*, *encoding* and *errors*,\n"
5519"which are used to control compatiblity support for pickle stream\n"
5520"generated by Python 2.x. If *fix_imports* is True, pickle will try to\n"
5521"map the old Python 2.x names to the new names used in Python 3.x. The\n"
5522"*encoding* and *errors* tell pickle how to decode 8-bit string\n"
5523"instances pickled by Python 2.x; these default to 'ASCII' and\n"
5524"'strict', respectively.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005525
5526static int
5527Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
5528{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005529 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005530 PyObject *file;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005531 PyObject *fix_imports = Py_True;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005532 char *encoding = NULL;
5533 char *errors = NULL;
5534
5535 /* XXX: That is an horrible error message. But, I don't know how to do
5536 better... */
5537 if (Py_SIZE(args) != 1) {
5538 PyErr_Format(PyExc_TypeError,
5539 "%s takes exactly one positional argument (%zd given)",
5540 Py_TYPE(self)->tp_name, Py_SIZE(args));
5541 return -1;
5542 }
5543
5544 /* Arguments parsing needs to be done in the __init__() method to allow
5545 subclasses to define their own __init__() method, which may (or may
5546 not) support Unpickler arguments. However, this means we need to be
5547 extra careful in the other Unpickler methods, since a subclass could
5548 forget to call Unpickler.__init__() thus breaking our internal
5549 invariants. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005550 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:Unpickler", kwlist,
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005551 &file, &fix_imports, &encoding, &errors))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005552 return -1;
5553
5554 /* In case of multiple __init__() calls, clear previous content. */
5555 if (self->read != NULL)
5556 (void)Unpickler_clear(self);
5557
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005558 if (_Unpickler_SetInputStream(self, file) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005559 return -1;
5560
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005561 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005562 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005563
5564 self->fix_imports = PyObject_IsTrue(fix_imports);
5565 if (self->fix_imports == -1)
5566 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005567
5568 if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) {
5569 self->pers_func = PyObject_GetAttrString((PyObject *)self,
5570 "persistent_load");
5571 if (self->pers_func == NULL)
5572 return -1;
5573 }
5574 else {
5575 self->pers_func = NULL;
5576 }
5577
5578 self->stack = (Pdata *)Pdata_New();
5579 if (self->stack == NULL)
5580 return -1;
5581
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005582 self->memo_size = 32;
5583 self->memo = _Unpickler_NewMemo(self->memo_size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005584 if (self->memo == NULL)
5585 return -1;
5586
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005587 self->arg = NULL;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005588 self->proto = 0;
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005589
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005590 return 0;
5591}
5592
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005593/* Define a proxy object for the Unpickler's internal memo object. This is to
5594 * avoid breaking code like:
5595 * unpickler.memo.clear()
5596 * and
5597 * unpickler.memo = saved_memo
5598 * Is this a good idea? Not really, but we don't want to break code that uses
5599 * it. Note that we don't implement the entire mapping API here. This is
5600 * intentional, as these should be treated as black-box implementation details.
5601 *
5602 * We do, however, have to implement pickling/unpickling support because of
5603 * real-world code like cvs2svn.
5604 */
5605
5606typedef struct {
5607 PyObject_HEAD
5608 UnpicklerObject *unpickler;
5609} UnpicklerMemoProxyObject;
5610
5611PyDoc_STRVAR(ump_clear_doc,
5612"memo.clear() -> None. Remove all items from memo.");
5613
5614static PyObject *
5615ump_clear(UnpicklerMemoProxyObject *self)
5616{
5617 _Unpickler_MemoCleanup(self->unpickler);
5618 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
5619 if (self->unpickler->memo == NULL)
5620 return NULL;
5621 Py_RETURN_NONE;
5622}
5623
5624PyDoc_STRVAR(ump_copy_doc,
5625"memo.copy() -> new_memo. Copy the memo to a new object.");
5626
5627static PyObject *
5628ump_copy(UnpicklerMemoProxyObject *self)
5629{
5630 Py_ssize_t i;
5631 PyObject *new_memo = PyDict_New();
5632 if (new_memo == NULL)
5633 return NULL;
5634
5635 for (i = 0; i < self->unpickler->memo_size; i++) {
5636 int status;
5637 PyObject *key, *value;
5638
5639 value = self->unpickler->memo[i];
5640 if (value == NULL)
5641 continue;
5642
5643 key = PyLong_FromSsize_t(i);
5644 if (key == NULL)
5645 goto error;
5646 status = PyDict_SetItem(new_memo, key, value);
5647 Py_DECREF(key);
5648 if (status < 0)
5649 goto error;
5650 }
5651 return new_memo;
5652
5653error:
5654 Py_DECREF(new_memo);
5655 return NULL;
5656}
5657
5658PyDoc_STRVAR(ump_reduce_doc,
5659"memo.__reduce__(). Pickling support.");
5660
5661static PyObject *
5662ump_reduce(UnpicklerMemoProxyObject *self, PyObject *args)
5663{
5664 PyObject *reduce_value;
5665 PyObject *constructor_args;
5666 PyObject *contents = ump_copy(self);
5667 if (contents == NULL)
5668 return NULL;
5669
5670 reduce_value = PyTuple_New(2);
5671 if (reduce_value == NULL) {
5672 Py_DECREF(contents);
5673 return NULL;
5674 }
5675 constructor_args = PyTuple_New(1);
5676 if (constructor_args == NULL) {
5677 Py_DECREF(contents);
5678 Py_DECREF(reduce_value);
5679 return NULL;
5680 }
5681 PyTuple_SET_ITEM(constructor_args, 0, contents);
5682 Py_INCREF((PyObject *)&PyDict_Type);
5683 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
5684 PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
5685 return reduce_value;
5686}
5687
5688static PyMethodDef unpicklerproxy_methods[] = {
5689 {"clear", (PyCFunction)ump_clear, METH_NOARGS, ump_clear_doc},
5690 {"copy", (PyCFunction)ump_copy, METH_NOARGS, ump_copy_doc},
5691 {"__reduce__", (PyCFunction)ump_reduce, METH_VARARGS, ump_reduce_doc},
5692 {NULL, NULL} /* sentinel */
5693};
5694
5695static void
5696UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
5697{
5698 PyObject_GC_UnTrack(self);
5699 Py_XDECREF(self->unpickler);
5700 PyObject_GC_Del((PyObject *)self);
5701}
5702
5703static int
5704UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
5705 visitproc visit, void *arg)
5706{
5707 Py_VISIT(self->unpickler);
5708 return 0;
5709}
5710
5711static int
5712UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
5713{
5714 Py_CLEAR(self->unpickler);
5715 return 0;
5716}
5717
5718static PyTypeObject UnpicklerMemoProxyType = {
5719 PyVarObject_HEAD_INIT(NULL, 0)
5720 "_pickle.UnpicklerMemoProxy", /*tp_name*/
5721 sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/
5722 0,
5723 (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */
5724 0, /* tp_print */
5725 0, /* tp_getattr */
5726 0, /* tp_setattr */
5727 0, /* tp_compare */
5728 0, /* tp_repr */
5729 0, /* tp_as_number */
5730 0, /* tp_as_sequence */
5731 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00005732 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005733 0, /* tp_call */
5734 0, /* tp_str */
5735 PyObject_GenericGetAttr, /* tp_getattro */
5736 PyObject_GenericSetAttr, /* tp_setattro */
5737 0, /* tp_as_buffer */
5738 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5739 0, /* tp_doc */
5740 (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */
5741 (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */
5742 0, /* tp_richcompare */
5743 0, /* tp_weaklistoffset */
5744 0, /* tp_iter */
5745 0, /* tp_iternext */
5746 unpicklerproxy_methods, /* tp_methods */
5747};
5748
5749static PyObject *
5750UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
5751{
5752 UnpicklerMemoProxyObject *self;
5753
5754 self = PyObject_GC_New(UnpicklerMemoProxyObject,
5755 &UnpicklerMemoProxyType);
5756 if (self == NULL)
5757 return NULL;
5758 Py_INCREF(unpickler);
5759 self->unpickler = unpickler;
5760 PyObject_GC_Track(self);
5761 return (PyObject *)self;
5762}
5763
5764/*****************************************************************************/
5765
5766
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005767static PyObject *
5768Unpickler_get_memo(UnpicklerObject *self)
5769{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005770 return UnpicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005771}
5772
5773static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005774Unpickler_set_memo(UnpicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005775{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005776 PyObject **new_memo;
5777 Py_ssize_t new_memo_size = 0;
5778 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005779
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005780 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005781 PyErr_SetString(PyExc_TypeError,
5782 "attribute deletion is not supported");
5783 return -1;
5784 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005785
5786 if (Py_TYPE(obj) == &UnpicklerMemoProxyType) {
5787 UnpicklerObject *unpickler =
5788 ((UnpicklerMemoProxyObject *)obj)->unpickler;
5789
5790 new_memo_size = unpickler->memo_size;
5791 new_memo = _Unpickler_NewMemo(new_memo_size);
5792 if (new_memo == NULL)
5793 return -1;
5794
5795 for (i = 0; i < new_memo_size; i++) {
5796 Py_XINCREF(unpickler->memo[i]);
5797 new_memo[i] = unpickler->memo[i];
5798 }
5799 }
5800 else if (PyDict_Check(obj)) {
5801 Py_ssize_t i = 0;
5802 PyObject *key, *value;
5803
5804 new_memo_size = PyDict_Size(obj);
5805 new_memo = _Unpickler_NewMemo(new_memo_size);
5806 if (new_memo == NULL)
5807 return -1;
5808
5809 while (PyDict_Next(obj, &i, &key, &value)) {
5810 Py_ssize_t idx;
5811 if (!PyLong_Check(key)) {
5812 PyErr_SetString(PyExc_TypeError,
5813 "memo key must be integers");
5814 goto error;
5815 }
5816 idx = PyLong_AsSsize_t(key);
5817 if (idx == -1 && PyErr_Occurred())
5818 goto error;
5819 if (_Unpickler_MemoPut(self, idx, value) < 0)
5820 goto error;
5821 }
5822 }
5823 else {
5824 PyErr_Format(PyExc_TypeError,
5825 "'memo' attribute must be an UnpicklerMemoProxy object"
5826 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005827 return -1;
5828 }
5829
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005830 _Unpickler_MemoCleanup(self);
5831 self->memo_size = new_memo_size;
5832 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005833
5834 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005835
5836 error:
5837 if (new_memo_size) {
5838 i = new_memo_size;
5839 while (--i >= 0) {
5840 Py_XDECREF(new_memo[i]);
5841 }
5842 PyMem_FREE(new_memo);
5843 }
5844 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005845}
5846
5847static PyObject *
5848Unpickler_get_persload(UnpicklerObject *self)
5849{
5850 if (self->pers_func == NULL)
5851 PyErr_SetString(PyExc_AttributeError, "persistent_load");
5852 else
5853 Py_INCREF(self->pers_func);
5854 return self->pers_func;
5855}
5856
5857static int
5858Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
5859{
5860 PyObject *tmp;
5861
5862 if (value == NULL) {
5863 PyErr_SetString(PyExc_TypeError,
5864 "attribute deletion is not supported");
5865 return -1;
5866 }
5867 if (!PyCallable_Check(value)) {
5868 PyErr_SetString(PyExc_TypeError,
5869 "persistent_load must be a callable taking "
5870 "one argument");
5871 return -1;
5872 }
5873
5874 tmp = self->pers_func;
5875 Py_INCREF(value);
5876 self->pers_func = value;
5877 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
5878
5879 return 0;
5880}
5881
5882static PyGetSetDef Unpickler_getsets[] = {
5883 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
5884 {"persistent_load", (getter)Unpickler_get_persload,
5885 (setter)Unpickler_set_persload},
5886 {NULL}
5887};
5888
5889static PyTypeObject Unpickler_Type = {
5890 PyVarObject_HEAD_INIT(NULL, 0)
5891 "_pickle.Unpickler", /*tp_name*/
5892 sizeof(UnpicklerObject), /*tp_basicsize*/
5893 0, /*tp_itemsize*/
5894 (destructor)Unpickler_dealloc, /*tp_dealloc*/
5895 0, /*tp_print*/
5896 0, /*tp_getattr*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005897 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00005898 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005899 0, /*tp_repr*/
5900 0, /*tp_as_number*/
5901 0, /*tp_as_sequence*/
5902 0, /*tp_as_mapping*/
5903 0, /*tp_hash*/
5904 0, /*tp_call*/
5905 0, /*tp_str*/
5906 0, /*tp_getattro*/
5907 0, /*tp_setattro*/
5908 0, /*tp_as_buffer*/
5909 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5910 Unpickler_doc, /*tp_doc*/
5911 (traverseproc)Unpickler_traverse, /*tp_traverse*/
5912 (inquiry)Unpickler_clear, /*tp_clear*/
5913 0, /*tp_richcompare*/
5914 0, /*tp_weaklistoffset*/
5915 0, /*tp_iter*/
5916 0, /*tp_iternext*/
5917 Unpickler_methods, /*tp_methods*/
5918 0, /*tp_members*/
5919 Unpickler_getsets, /*tp_getset*/
5920 0, /*tp_base*/
5921 0, /*tp_dict*/
5922 0, /*tp_descr_get*/
5923 0, /*tp_descr_set*/
5924 0, /*tp_dictoffset*/
5925 (initproc)Unpickler_init, /*tp_init*/
5926 PyType_GenericAlloc, /*tp_alloc*/
5927 PyType_GenericNew, /*tp_new*/
5928 PyObject_GC_Del, /*tp_free*/
5929 0, /*tp_is_gc*/
5930};
5931
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005932PyDoc_STRVAR(pickle_dump_doc,
5933"dump(obj, file, protocol=None, *, fix_imports=True) -> None\n"
5934"\n"
5935"Write a pickled representation of obj to the open file object file. This\n"
5936"is equivalent to ``Pickler(file, protocol).dump(obj)``, but may be more\n"
5937"efficient.\n"
5938"\n"
5939"The optional protocol argument tells the pickler to use the given protocol;\n"
5940"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
5941"backward-incompatible protocol designed for Python 3.0.\n"
5942"\n"
5943"Specifying a negative protocol version selects the highest protocol version\n"
5944"supported. The higher the protocol used, the more recent the version of\n"
5945"Python needed to read the pickle produced.\n"
5946"\n"
5947"The file argument must have a write() method that accepts a single bytes\n"
5948"argument. It can thus be a file object opened for binary writing, a\n"
5949"io.BytesIO instance, or any other custom object that meets this interface.\n"
5950"\n"
5951"If fix_imports is True and protocol is less than 3, pickle will try to\n"
5952"map the new Python 3.x names to the old module names used in Python 2.x,\n"
5953"so that the pickle data stream is readable with Python 2.x.\n");
5954
5955static PyObject *
5956pickle_dump(PyObject *self, PyObject *args, PyObject *kwds)
5957{
5958 static char *kwlist[] = {"obj", "file", "protocol", "fix_imports", 0};
5959 PyObject *obj;
5960 PyObject *file;
5961 PyObject *proto = NULL;
5962 PyObject *fix_imports = Py_True;
5963 PicklerObject *pickler;
5964
5965 /* fix_imports is a keyword-only argument. */
5966 if (Py_SIZE(args) > 3) {
5967 PyErr_Format(PyExc_TypeError,
5968 "pickle.dump() takes at most 3 positional "
5969 "argument (%zd given)", Py_SIZE(args));
5970 return NULL;
5971 }
5972
5973 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|OO:dump", kwlist,
5974 &obj, &file, &proto, &fix_imports))
5975 return NULL;
5976
5977 pickler = _Pickler_New();
5978 if (pickler == NULL)
5979 return NULL;
5980
5981 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
5982 goto error;
5983
5984 if (_Pickler_SetOutputStream(pickler, file) < 0)
5985 goto error;
5986
5987 if (dump(pickler, obj) < 0)
5988 goto error;
5989
5990 if (_Pickler_FlushToFile(pickler) < 0)
5991 goto error;
5992
5993 Py_DECREF(pickler);
5994 Py_RETURN_NONE;
5995
5996 error:
5997 Py_XDECREF(pickler);
5998 return NULL;
5999}
6000
6001PyDoc_STRVAR(pickle_dumps_doc,
6002"dumps(obj, protocol=None, *, fix_imports=True) -> bytes\n"
6003"\n"
6004"Return the pickled representation of the object as a bytes\n"
6005"object, instead of writing it to a file.\n"
6006"\n"
6007"The optional protocol argument tells the pickler to use the given protocol;\n"
6008"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
6009"backward-incompatible protocol designed for Python 3.0.\n"
6010"\n"
6011"Specifying a negative protocol version selects the highest protocol version\n"
6012"supported. The higher the protocol used, the more recent the version of\n"
6013"Python needed to read the pickle produced.\n"
6014"\n"
6015"If fix_imports is True and *protocol* is less than 3, pickle will try to\n"
6016"map the new Python 3.x names to the old module names used in Python 2.x,\n"
6017"so that the pickle data stream is readable with Python 2.x.\n");
6018
6019static PyObject *
6020pickle_dumps(PyObject *self, PyObject *args, PyObject *kwds)
6021{
6022 static char *kwlist[] = {"obj", "protocol", "fix_imports", 0};
6023 PyObject *obj;
6024 PyObject *proto = NULL;
6025 PyObject *result;
6026 PyObject *fix_imports = Py_True;
6027 PicklerObject *pickler;
6028
6029 /* fix_imports is a keyword-only argument. */
6030 if (Py_SIZE(args) > 2) {
6031 PyErr_Format(PyExc_TypeError,
6032 "pickle.dumps() takes at most 2 positional "
6033 "argument (%zd given)", Py_SIZE(args));
6034 return NULL;
6035 }
6036
6037 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:dumps", kwlist,
6038 &obj, &proto, &fix_imports))
6039 return NULL;
6040
6041 pickler = _Pickler_New();
6042 if (pickler == NULL)
6043 return NULL;
6044
6045 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
6046 goto error;
6047
6048 if (dump(pickler, obj) < 0)
6049 goto error;
6050
6051 result = _Pickler_GetString(pickler);
6052 Py_DECREF(pickler);
6053 return result;
6054
6055 error:
6056 Py_XDECREF(pickler);
6057 return NULL;
6058}
6059
6060PyDoc_STRVAR(pickle_load_doc,
6061"load(file, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6062"\n"
6063"Read a pickled object representation from the open file object file and\n"
6064"return the reconstituted object hierarchy specified therein. This is\n"
6065"equivalent to ``Unpickler(file).load()``, but may be more efficient.\n"
6066"\n"
6067"The protocol version of the pickle is detected automatically, so no protocol\n"
6068"argument is needed. Bytes past the pickled object's representation are\n"
6069"ignored.\n"
6070"\n"
6071"The argument file must have two methods, a read() method that takes an\n"
6072"integer argument, and a readline() method that requires no arguments. Both\n"
6073"methods should return bytes. Thus *file* can be a binary file object opened\n"
6074"for reading, a BytesIO object, or any other custom object that meets this\n"
6075"interface.\n"
6076"\n"
6077"Optional keyword arguments are fix_imports, encoding and errors,\n"
6078"which are used to control compatiblity support for pickle stream generated\n"
6079"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6080"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6081"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6082"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6083
6084static PyObject *
6085pickle_load(PyObject *self, PyObject *args, PyObject *kwds)
6086{
6087 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
6088 PyObject *file;
6089 PyObject *fix_imports = Py_True;
6090 PyObject *result;
6091 char *encoding = NULL;
6092 char *errors = NULL;
6093 UnpicklerObject *unpickler;
6094
6095 /* fix_imports, encoding and errors are a keyword-only argument. */
6096 if (Py_SIZE(args) != 1) {
6097 PyErr_Format(PyExc_TypeError,
6098 "pickle.load() takes exactly one positional "
6099 "argument (%zd given)", Py_SIZE(args));
6100 return NULL;
6101 }
6102
6103 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:load", kwlist,
6104 &file, &fix_imports, &encoding, &errors))
6105 return NULL;
6106
6107 unpickler = _Unpickler_New();
6108 if (unpickler == NULL)
6109 return NULL;
6110
6111 if (_Unpickler_SetInputStream(unpickler, file) < 0)
6112 goto error;
6113
6114 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6115 goto error;
6116
6117 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6118 if (unpickler->fix_imports == -1)
6119 goto error;
6120
6121 result = load(unpickler);
6122 Py_DECREF(unpickler);
6123 return result;
6124
6125 error:
6126 Py_XDECREF(unpickler);
6127 return NULL;
6128}
6129
6130PyDoc_STRVAR(pickle_loads_doc,
6131"loads(input, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6132"\n"
6133"Read a pickled object hierarchy from a bytes object and return the\n"
6134"reconstituted object hierarchy specified therein\n"
6135"\n"
6136"The protocol version of the pickle is detected automatically, so no protocol\n"
6137"argument is needed. Bytes past the pickled object's representation are\n"
6138"ignored.\n"
6139"\n"
6140"Optional keyword arguments are fix_imports, encoding and errors, which\n"
6141"are used to control compatiblity support for pickle stream generated\n"
6142"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6143"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6144"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6145"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6146
6147static PyObject *
6148pickle_loads(PyObject *self, PyObject *args, PyObject *kwds)
6149{
6150 static char *kwlist[] = {"input", "fix_imports", "encoding", "errors", 0};
6151 PyObject *input;
6152 PyObject *fix_imports = Py_True;
6153 PyObject *result;
6154 char *encoding = NULL;
6155 char *errors = NULL;
6156 UnpicklerObject *unpickler;
6157
6158 /* fix_imports, encoding and errors are a keyword-only argument. */
6159 if (Py_SIZE(args) != 1) {
6160 PyErr_Format(PyExc_TypeError,
6161 "pickle.loads() takes exactly one positional "
6162 "argument (%zd given)", Py_SIZE(args));
6163 return NULL;
6164 }
6165
6166 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:loads", kwlist,
6167 &input, &fix_imports, &encoding, &errors))
6168 return NULL;
6169
6170 unpickler = _Unpickler_New();
6171 if (unpickler == NULL)
6172 return NULL;
6173
6174 if (_Unpickler_SetStringInput(unpickler, input) < 0)
6175 goto error;
6176
6177 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6178 goto error;
6179
6180 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6181 if (unpickler->fix_imports == -1)
6182 goto error;
6183
6184 result = load(unpickler);
6185 Py_DECREF(unpickler);
6186 return result;
6187
6188 error:
6189 Py_XDECREF(unpickler);
6190 return NULL;
6191}
6192
6193
6194static struct PyMethodDef pickle_methods[] = {
6195 {"dump", (PyCFunction)pickle_dump, METH_VARARGS|METH_KEYWORDS,
6196 pickle_dump_doc},
6197 {"dumps", (PyCFunction)pickle_dumps, METH_VARARGS|METH_KEYWORDS,
6198 pickle_dumps_doc},
6199 {"load", (PyCFunction)pickle_load, METH_VARARGS|METH_KEYWORDS,
6200 pickle_load_doc},
6201 {"loads", (PyCFunction)pickle_loads, METH_VARARGS|METH_KEYWORDS,
6202 pickle_loads_doc},
6203 {NULL, NULL} /* sentinel */
6204};
6205
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006206static int
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006207initmodule(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006208{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006209 PyObject *copyreg = NULL;
6210 PyObject *compat_pickle = NULL;
6211
6212 /* XXX: We should ensure that the types of the dictionaries imported are
6213 exactly PyDict objects. Otherwise, it is possible to crash the pickle
6214 since we use the PyDict API directly to access these dictionaries. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006215
6216 copyreg = PyImport_ImportModule("copyreg");
6217 if (!copyreg)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006218 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006219 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
6220 if (!dispatch_table)
6221 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006222 extension_registry = \
6223 PyObject_GetAttrString(copyreg, "_extension_registry");
6224 if (!extension_registry)
6225 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006226 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
6227 if (!inverted_registry)
6228 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006229 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
6230 if (!extension_cache)
6231 goto error;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006232 Py_CLEAR(copyreg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006233
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006234 /* Load the 2.x -> 3.x stdlib module mapping tables */
6235 compat_pickle = PyImport_ImportModule("_compat_pickle");
6236 if (!compat_pickle)
6237 goto error;
6238 name_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
6239 if (!name_mapping_2to3)
6240 goto error;
6241 if (!PyDict_CheckExact(name_mapping_2to3)) {
6242 PyErr_Format(PyExc_RuntimeError,
6243 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
6244 Py_TYPE(name_mapping_2to3)->tp_name);
6245 goto error;
6246 }
6247 import_mapping_2to3 = PyObject_GetAttrString(compat_pickle,
6248 "IMPORT_MAPPING");
6249 if (!import_mapping_2to3)
6250 goto error;
6251 if (!PyDict_CheckExact(import_mapping_2to3)) {
6252 PyErr_Format(PyExc_RuntimeError,
6253 "_compat_pickle.IMPORT_MAPPING should be a dict, "
6254 "not %.200s", Py_TYPE(import_mapping_2to3)->tp_name);
6255 goto error;
6256 }
6257 /* ... and the 3.x -> 2.x mapping tables */
6258 name_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6259 "REVERSE_NAME_MAPPING");
6260 if (!name_mapping_3to2)
6261 goto error;
6262 if (!PyDict_CheckExact(name_mapping_3to2)) {
6263 PyErr_Format(PyExc_RuntimeError,
Ezio Melotti13925002011-03-16 11:05:33 +02006264 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006265 "not %.200s", Py_TYPE(name_mapping_3to2)->tp_name);
6266 goto error;
6267 }
6268 import_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6269 "REVERSE_IMPORT_MAPPING");
6270 if (!import_mapping_3to2)
6271 goto error;
6272 if (!PyDict_CheckExact(import_mapping_3to2)) {
6273 PyErr_Format(PyExc_RuntimeError,
6274 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
6275 "not %.200s", Py_TYPE(import_mapping_3to2)->tp_name);
6276 goto error;
6277 }
6278 Py_CLEAR(compat_pickle);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006279
6280 empty_tuple = PyTuple_New(0);
6281 if (empty_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006282 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006283 two_tuple = PyTuple_New(2);
6284 if (two_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006285 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006286 /* We use this temp container with no regard to refcounts, or to
6287 * keeping containees alive. Exempt from GC, because we don't
6288 * want anything looking at two_tuple() by magic.
6289 */
6290 PyObject_GC_UnTrack(two_tuple);
6291
6292 return 0;
6293
6294 error:
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006295 Py_CLEAR(copyreg);
6296 Py_CLEAR(dispatch_table);
6297 Py_CLEAR(extension_registry);
6298 Py_CLEAR(inverted_registry);
6299 Py_CLEAR(extension_cache);
6300 Py_CLEAR(compat_pickle);
6301 Py_CLEAR(name_mapping_2to3);
6302 Py_CLEAR(import_mapping_2to3);
6303 Py_CLEAR(name_mapping_3to2);
6304 Py_CLEAR(import_mapping_3to2);
6305 Py_CLEAR(empty_tuple);
6306 Py_CLEAR(two_tuple);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006307 return -1;
6308}
6309
6310static struct PyModuleDef _picklemodule = {
6311 PyModuleDef_HEAD_INIT,
6312 "_pickle",
6313 pickle_module_doc,
6314 -1,
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006315 pickle_methods,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006316 NULL,
6317 NULL,
6318 NULL,
6319 NULL
6320};
6321
6322PyMODINIT_FUNC
6323PyInit__pickle(void)
6324{
6325 PyObject *m;
6326
6327 if (PyType_Ready(&Unpickler_Type) < 0)
6328 return NULL;
6329 if (PyType_Ready(&Pickler_Type) < 0)
6330 return NULL;
6331 if (PyType_Ready(&Pdata_Type) < 0)
6332 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006333 if (PyType_Ready(&PicklerMemoProxyType) < 0)
6334 return NULL;
6335 if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
6336 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006337
6338 /* Create the module and add the functions. */
6339 m = PyModule_Create(&_picklemodule);
6340 if (m == NULL)
6341 return NULL;
6342
Antoine Pitrou8391cf42011-07-15 21:01:21 +02006343 Py_INCREF(&Pickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006344 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
6345 return NULL;
Antoine Pitrou8391cf42011-07-15 21:01:21 +02006346 Py_INCREF(&Unpickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006347 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
6348 return NULL;
6349
6350 /* Initialize the exceptions. */
6351 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
6352 if (PickleError == NULL)
6353 return NULL;
6354 PicklingError = \
6355 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
6356 if (PicklingError == NULL)
6357 return NULL;
6358 UnpicklingError = \
6359 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
6360 if (UnpicklingError == NULL)
6361 return NULL;
6362
6363 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
6364 return NULL;
6365 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
6366 return NULL;
6367 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
6368 return NULL;
6369
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006370 if (initmodule() < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006371 return NULL;
6372
6373 return m;
6374}