blob: 5952c1bf7a319bf40b4428f9b7f9f7bb66325503 [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000013/* Pickle opcodes. These must be kept updated with pickle.py.
14 Extensive docs are in pickletools.py. */
15enum opcode {
16 MARK = '(',
17 STOP = '.',
18 POP = '0',
19 POP_MARK = '1',
20 DUP = '2',
21 FLOAT = 'F',
22 INT = 'I',
23 BININT = 'J',
24 BININT1 = 'K',
25 LONG = 'L',
26 BININT2 = 'M',
27 NONE = 'N',
28 PERSID = 'P',
29 BINPERSID = 'Q',
30 REDUCE = 'R',
31 STRING = 'S',
32 BINSTRING = 'T',
33 SHORT_BINSTRING = 'U',
34 UNICODE = 'V',
35 BINUNICODE = 'X',
36 APPEND = 'a',
37 BUILD = 'b',
38 GLOBAL = 'c',
39 DICT = 'd',
40 EMPTY_DICT = '}',
41 APPENDS = 'e',
42 GET = 'g',
43 BINGET = 'h',
44 INST = 'i',
45 LONG_BINGET = 'j',
46 LIST = 'l',
47 EMPTY_LIST = ']',
48 OBJ = 'o',
49 PUT = 'p',
50 BINPUT = 'q',
51 LONG_BINPUT = 'r',
52 SETITEM = 's',
53 TUPLE = 't',
54 EMPTY_TUPLE = ')',
55 SETITEMS = 'u',
56 BINFLOAT = 'G',
57
58 /* Protocol 2. */
59 PROTO = '\x80',
60 NEWOBJ = '\x81',
61 EXT1 = '\x82',
62 EXT2 = '\x83',
63 EXT4 = '\x84',
64 TUPLE1 = '\x85',
65 TUPLE2 = '\x86',
66 TUPLE3 = '\x87',
67 NEWTRUE = '\x88',
68 NEWFALSE = '\x89',
69 LONG1 = '\x8a',
70 LONG4 = '\x8b',
71
72 /* Protocol 3 (Python 3.x) */
73 BINBYTES = 'B',
Victor Stinner132ef6c2010-11-09 09:39:41 +000074 SHORT_BINBYTES = 'C'
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000075};
76
77/* These aren't opcodes -- they're ways to pickle bools before protocol 2
78 * so that unpicklers written before bools were introduced unpickle them
79 * as ints, but unpicklers after can recognize that bools were intended.
80 * Note that protocol 2 added direct ways to pickle bools.
81 */
82#undef TRUE
83#define TRUE "I01\n"
84#undef FALSE
85#define FALSE "I00\n"
86
87enum {
88 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
89 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
90 break if this gets out of synch with pickle.py, but it's unclear that would
91 help anything either. */
92 BATCHSIZE = 1000,
93
94 /* Nesting limit until Pickler, when running in "fast mode", starts
95 checking for self-referential data-structures. */
96 FAST_NESTING_LIMIT = 50,
97
Antoine Pitrouea99c5c2010-09-09 18:33:21 +000098 /* Initial size of the write buffer of Pickler. */
99 WRITE_BUF_SIZE = 4096,
100
101 /* Maximum size of the write buffer of Pickler when pickling to a
102 stream. This is ignored for in-memory pickling. */
103 MAX_WRITE_BUF_SIZE = 64 * 1024,
Antoine Pitrou04248a82010-10-12 20:51:21 +0000104
105 /* Prefetch size when unpickling (disabled on unpeekable streams) */
Victor Stinner132ef6c2010-11-09 09:39:41 +0000106 PREFETCH = 8192 * 16
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000107};
108
109/* Exception classes for pickle. These should override the ones defined in
110 pickle.py, when the C-optimized Pickler and Unpickler are used. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000111static PyObject *PickleError = NULL;
112static PyObject *PicklingError = NULL;
113static PyObject *UnpicklingError = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000114
115/* copyreg.dispatch_table, {type_object: pickling_function} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000116static PyObject *dispatch_table = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000117/* For EXT[124] opcodes. */
118/* copyreg._extension_registry, {(module_name, function_name): code} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000119static PyObject *extension_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000120/* copyreg._inverted_registry, {code: (module_name, function_name)} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000121static PyObject *inverted_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000122/* copyreg._extension_cache, {code: object} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000123static PyObject *extension_cache = NULL;
124
125/* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */
126static PyObject *name_mapping_2to3 = NULL;
127/* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
128static PyObject *import_mapping_2to3 = NULL;
129/* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
130static PyObject *name_mapping_3to2 = NULL;
131static PyObject *import_mapping_3to2 = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000132
133/* XXX: Are these really nescessary? */
134/* As the name says, an empty tuple. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000135static PyObject *empty_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000136/* For looking up name pairs in copyreg._extension_registry. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000137static PyObject *two_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000138
139static int
140stack_underflow(void)
141{
142 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
143 return -1;
144}
145
146/* Internal data type used as the unpickling stack. */
147typedef struct {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000148 PyObject_VAR_HEAD
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000149 PyObject **data;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000150 Py_ssize_t allocated; /* number of slots in data allocated */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000151} Pdata;
152
153static void
154Pdata_dealloc(Pdata *self)
155{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000156 int i = Py_SIZE(self);
157 while (--i >= 0) {
158 Py_DECREF(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000159 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000160 PyMem_FREE(self->data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000161 PyObject_Del(self);
162}
163
164static PyTypeObject Pdata_Type = {
165 PyVarObject_HEAD_INIT(NULL, 0)
166 "_pickle.Pdata", /*tp_name*/
167 sizeof(Pdata), /*tp_basicsize*/
168 0, /*tp_itemsize*/
169 (destructor)Pdata_dealloc, /*tp_dealloc*/
170};
171
172static PyObject *
173Pdata_New(void)
174{
175 Pdata *self;
176
177 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
178 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000179 Py_SIZE(self) = 0;
180 self->allocated = 8;
181 self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000182 if (self->data)
183 return (PyObject *)self;
184 Py_DECREF(self);
185 return PyErr_NoMemory();
186}
187
188
189/* Retain only the initial clearto items. If clearto >= the current
190 * number of items, this is a (non-erroneous) NOP.
191 */
192static int
193Pdata_clear(Pdata *self, int clearto)
194{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000195 int i = Py_SIZE(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000196
197 if (clearto < 0)
198 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000199 if (clearto >= i)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000200 return 0;
201
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000202 while (--i >= clearto) {
203 Py_CLEAR(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000204 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000205 Py_SIZE(self) = clearto;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000206 return 0;
207}
208
209static int
210Pdata_grow(Pdata *self)
211{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000212 PyObject **data = self->data;
213 Py_ssize_t allocated = self->allocated;
214 Py_ssize_t new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000215
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000216 new_allocated = (allocated >> 3) + 6;
217 /* check for integer overflow */
218 if (new_allocated > PY_SSIZE_T_MAX - allocated)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000219 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000220 new_allocated += allocated;
221 if (new_allocated > (PY_SSIZE_T_MAX / sizeof(PyObject *)))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000222 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000223 data = PyMem_REALLOC(data, new_allocated * sizeof(PyObject *));
224 if (data == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000225 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000226
227 self->data = data;
228 self->allocated = new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000229 return 0;
230
231 nomemory:
232 PyErr_NoMemory();
233 return -1;
234}
235
236/* D is a Pdata*. Pop the topmost element and store it into V, which
237 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
238 * is raised and V is set to NULL.
239 */
240static PyObject *
241Pdata_pop(Pdata *self)
242{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000243 if (Py_SIZE(self) == 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000244 PyErr_SetString(UnpicklingError, "bad pickle data");
245 return NULL;
246 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000247 return self->data[--Py_SIZE(self)];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000248}
249#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
250
251static int
252Pdata_push(Pdata *self, PyObject *obj)
253{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000254 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000255 return -1;
256 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000257 self->data[Py_SIZE(self)++] = obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000258 return 0;
259}
260
261/* Push an object on stack, transferring its ownership to the stack. */
262#define PDATA_PUSH(D, O, ER) do { \
263 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
264
265/* Push an object on stack, adding a new reference to the object. */
266#define PDATA_APPEND(D, O, ER) do { \
267 Py_INCREF((O)); \
268 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
269
270static PyObject *
271Pdata_poptuple(Pdata *self, Py_ssize_t start)
272{
273 PyObject *tuple;
274 Py_ssize_t len, i, j;
275
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000276 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000277 tuple = PyTuple_New(len);
278 if (tuple == NULL)
279 return NULL;
280 for (i = start, j = 0; j < len; i++, j++)
281 PyTuple_SET_ITEM(tuple, j, self->data[i]);
282
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000283 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000284 return tuple;
285}
286
287static PyObject *
288Pdata_poplist(Pdata *self, Py_ssize_t start)
289{
290 PyObject *list;
291 Py_ssize_t len, i, j;
292
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000293 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000294 list = PyList_New(len);
295 if (list == NULL)
296 return NULL;
297 for (i = start, j = 0; j < len; i++, j++)
298 PyList_SET_ITEM(list, j, self->data[i]);
299
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000300 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000301 return list;
302}
303
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000304typedef struct {
305 PyObject *me_key;
306 long me_value;
307} PyMemoEntry;
308
309typedef struct {
310 Py_ssize_t mt_mask;
311 Py_ssize_t mt_used;
312 Py_ssize_t mt_allocated;
313 PyMemoEntry *mt_table;
314} PyMemoTable;
315
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000316typedef struct PicklerObject {
317 PyObject_HEAD
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000318 PyMemoTable *memo; /* Memo table, keep track of the seen
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000319 objects to support self-referential objects
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000320 pickling. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000321 PyObject *pers_func; /* persistent_id() method, can be NULL */
322 PyObject *arg;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000323
324 PyObject *write; /* write() method of the output stream. */
325 PyObject *output_buffer; /* Write into a local bytearray buffer before
326 flushing to the stream. */
327 Py_ssize_t output_len; /* Length of output_buffer. */
328 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000329 int proto; /* Pickle protocol number, >= 0 */
330 int bin; /* Boolean, true if proto > 0 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000331 int buf_size; /* Size of the current buffered pickle data */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000332 int fast; /* Enable fast mode if set to a true value.
333 The fast mode disable the usage of memo,
334 therefore speeding the pickling process by
335 not generating superfluous PUT opcodes. It
336 should not be used if with self-referential
337 objects. */
338 int fast_nesting;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000339 int fix_imports; /* Indicate whether Pickler should fix
340 the name of globals for Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000341 PyObject *fast_memo;
342} PicklerObject;
343
344typedef struct UnpicklerObject {
345 PyObject_HEAD
346 Pdata *stack; /* Pickle data stack, store unpickled objects. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000347
348 /* The unpickler memo is just an array of PyObject *s. Using a dict
349 is unnecessary, since the keys are contiguous ints. */
350 PyObject **memo;
351 Py_ssize_t memo_size;
352
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000353 PyObject *arg;
354 PyObject *pers_func; /* persistent_load() method, can be NULL. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000355
356 Py_buffer buffer;
357 char *input_buffer;
358 char *input_line;
359 Py_ssize_t input_len;
360 Py_ssize_t next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000361 Py_ssize_t prefetched_idx; /* index of first prefetched byte */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000362 PyObject *read; /* read() method of the input stream. */
363 PyObject *readline; /* readline() method of the input stream. */
Antoine Pitrou04248a82010-10-12 20:51:21 +0000364 PyObject *peek; /* peek() method of the input stream, or NULL */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000365
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000366 char *encoding; /* Name of the encoding to be used for
367 decoding strings pickled using Python
368 2.x. The default value is "ASCII" */
369 char *errors; /* Name of errors handling scheme to used when
370 decoding strings. The default value is
371 "strict". */
372 int *marks; /* Mark stack, used for unpickling container
373 objects. */
374 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
375 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000376 int proto; /* Protocol of the pickle loaded. */
377 int fix_imports; /* Indicate whether Unpickler should fix
378 the name of globals pickled by Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000379} UnpicklerObject;
380
381/* Forward declarations */
382static int save(PicklerObject *, PyObject *, int);
383static int save_reduce(PicklerObject *, PyObject *, PyObject *);
384static PyTypeObject Pickler_Type;
385static PyTypeObject Unpickler_Type;
386
387
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000388/*************************************************************************
389 A custom hashtable mapping void* to longs. This is used by the pickler for
390 memoization. Using a custom hashtable rather than PyDict allows us to skip
391 a bunch of unnecessary object creation. This makes a huge performance
392 difference. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000393
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000394#define MT_MINSIZE 8
395#define PERTURB_SHIFT 5
396
397
398static PyMemoTable *
399PyMemoTable_New(void)
400{
401 PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
402 if (memo == NULL) {
403 PyErr_NoMemory();
404 return NULL;
405 }
406
407 memo->mt_used = 0;
408 memo->mt_allocated = MT_MINSIZE;
409 memo->mt_mask = MT_MINSIZE - 1;
410 memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
411 if (memo->mt_table == NULL) {
412 PyMem_FREE(memo);
413 PyErr_NoMemory();
414 return NULL;
415 }
416 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
417
418 return memo;
419}
420
421static PyMemoTable *
422PyMemoTable_Copy(PyMemoTable *self)
423{
424 Py_ssize_t i;
425 PyMemoTable *new = PyMemoTable_New();
426 if (new == NULL)
427 return NULL;
428
429 new->mt_used = self->mt_used;
430 new->mt_allocated = self->mt_allocated;
431 new->mt_mask = self->mt_mask;
432 /* The table we get from _New() is probably smaller than we wanted.
433 Free it and allocate one that's the right size. */
434 PyMem_FREE(new->mt_table);
435 new->mt_table = PyMem_MALLOC(self->mt_allocated * sizeof(PyMemoEntry));
436 if (new->mt_table == NULL) {
437 PyMem_FREE(new);
438 return NULL;
439 }
440 for (i = 0; i < self->mt_allocated; i++) {
441 Py_XINCREF(self->mt_table[i].me_key);
442 }
443 memcpy(new->mt_table, self->mt_table,
444 sizeof(PyMemoEntry) * self->mt_allocated);
445
446 return new;
447}
448
449static Py_ssize_t
450PyMemoTable_Size(PyMemoTable *self)
451{
452 return self->mt_used;
453}
454
455static int
456PyMemoTable_Clear(PyMemoTable *self)
457{
458 Py_ssize_t i = self->mt_allocated;
459
460 while (--i >= 0) {
461 Py_XDECREF(self->mt_table[i].me_key);
462 }
463 self->mt_used = 0;
464 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
465 return 0;
466}
467
468static void
469PyMemoTable_Del(PyMemoTable *self)
470{
471 if (self == NULL)
472 return;
473 PyMemoTable_Clear(self);
474
475 PyMem_FREE(self->mt_table);
476 PyMem_FREE(self);
477}
478
479/* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
480 can be considerably simpler than dictobject.c's lookdict(). */
481static PyMemoEntry *
482_PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
483{
484 size_t i;
485 size_t perturb;
486 size_t mask = (size_t)self->mt_mask;
487 PyMemoEntry *table = self->mt_table;
488 PyMemoEntry *entry;
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000489 Py_hash_t hash = (Py_hash_t)key >> 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000490
491 i = hash & mask;
492 entry = &table[i];
493 if (entry->me_key == NULL || entry->me_key == key)
494 return entry;
495
496 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
497 i = (i << 2) + i + perturb + 1;
498 entry = &table[i & mask];
499 if (entry->me_key == NULL || entry->me_key == key)
500 return entry;
501 }
502 assert(0); /* Never reached */
503 return NULL;
504}
505
506/* Returns -1 on failure, 0 on success. */
507static int
508_PyMemoTable_ResizeTable(PyMemoTable *self, Py_ssize_t min_size)
509{
510 PyMemoEntry *oldtable = NULL;
511 PyMemoEntry *oldentry, *newentry;
512 Py_ssize_t new_size = MT_MINSIZE;
513 Py_ssize_t to_process;
514
515 assert(min_size > 0);
516
517 /* Find the smallest valid table size >= min_size. */
518 while (new_size < min_size && new_size > 0)
519 new_size <<= 1;
520 if (new_size <= 0) {
521 PyErr_NoMemory();
522 return -1;
523 }
524 /* new_size needs to be a power of two. */
525 assert((new_size & (new_size - 1)) == 0);
526
527 /* Allocate new table. */
528 oldtable = self->mt_table;
529 self->mt_table = PyMem_MALLOC(new_size * sizeof(PyMemoEntry));
530 if (self->mt_table == NULL) {
531 PyMem_FREE(oldtable);
532 PyErr_NoMemory();
533 return -1;
534 }
535 self->mt_allocated = new_size;
536 self->mt_mask = new_size - 1;
537 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
538
539 /* Copy entries from the old table. */
540 to_process = self->mt_used;
541 for (oldentry = oldtable; to_process > 0; oldentry++) {
542 if (oldentry->me_key != NULL) {
543 to_process--;
544 /* newentry is a pointer to a chunk of the new
545 mt_table, so we're setting the key:value pair
546 in-place. */
547 newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
548 newentry->me_key = oldentry->me_key;
549 newentry->me_value = oldentry->me_value;
550 }
551 }
552
553 /* Deallocate the old table. */
554 PyMem_FREE(oldtable);
555 return 0;
556}
557
558/* Returns NULL on failure, a pointer to the value otherwise. */
559static long *
560PyMemoTable_Get(PyMemoTable *self, PyObject *key)
561{
562 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
563 if (entry->me_key == NULL)
564 return NULL;
565 return &entry->me_value;
566}
567
568/* Returns -1 on failure, 0 on success. */
569static int
570PyMemoTable_Set(PyMemoTable *self, PyObject *key, long value)
571{
572 PyMemoEntry *entry;
573
574 assert(key != NULL);
575
576 entry = _PyMemoTable_Lookup(self, key);
577 if (entry->me_key != NULL) {
578 entry->me_value = value;
579 return 0;
580 }
581 Py_INCREF(key);
582 entry->me_key = key;
583 entry->me_value = value;
584 self->mt_used++;
585
586 /* If we added a key, we can safely resize. Otherwise just return!
587 * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
588 *
589 * Quadrupling the size improves average table sparseness
590 * (reducing collisions) at the cost of some memory. It also halves
591 * the number of expensive resize operations in a growing memo table.
592 *
593 * Very large memo tables (over 50K items) use doubling instead.
594 * This may help applications with severe memory constraints.
595 */
596 if (!(self->mt_used * 3 >= (self->mt_mask + 1) * 2))
597 return 0;
598 return _PyMemoTable_ResizeTable(self,
599 (self->mt_used > 50000 ? 2 : 4) * self->mt_used);
600}
601
602#undef MT_MINSIZE
603#undef PERTURB_SHIFT
604
605/*************************************************************************/
606
607/* Helpers for creating the argument tuple passed to functions. This has the
608 performance advantage of calling PyTuple_New() only once.
609
610 XXX(avassalotti): Inline directly in _Pickler_FastCall() and
611 _Unpickler_FastCall(). */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000612#define ARG_TUP(self, obj) do { \
613 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
614 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
615 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
616 } \
617 else { \
618 Py_DECREF((obj)); \
619 } \
620 } while (0)
621
622#define FREE_ARG_TUP(self) do { \
623 if ((self)->arg->ob_refcnt > 1) \
624 Py_CLEAR((self)->arg); \
625 } while (0)
626
627/* A temporary cleaner API for fast single argument function call.
628
629 XXX: Does caching the argument tuple provides any real performance benefits?
630
631 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
632 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
633 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
634 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
635 (i.e, call PyTuple_New() and store the returned value in an array), to save
636 one second (wall clock time). Either ways, the loading time a pickle stream
637 large enough to generate this number of calls would be massively
638 overwhelmed by other factors, like I/O throughput, the GC traversal and
639 object allocation overhead. So, I really doubt these functions provide any
640 real benefits.
641
642 On the other hand, oprofile reports that pickle spends a lot of time in
643 these functions. But, that is probably more related to the function call
644 overhead, than the argument tuple allocation.
645
646 XXX: And, what is the reference behavior of these? Steal, borrow? At first
647 glance, it seems to steal the reference of 'arg' and borrow the reference
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000648 of 'func'. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000649static PyObject *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000650_Pickler_FastCall(PicklerObject *self, PyObject *func, PyObject *arg)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000651{
652 PyObject *result = NULL;
653
654 ARG_TUP(self, arg);
655 if (self->arg) {
656 result = PyObject_Call(func, self->arg, NULL);
657 FREE_ARG_TUP(self);
658 }
659 return result;
660}
661
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000662static int
663_Pickler_ClearBuffer(PicklerObject *self)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000664{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000665 Py_CLEAR(self->output_buffer);
666 self->output_buffer =
667 PyBytes_FromStringAndSize(NULL, self->max_output_len);
668 if (self->output_buffer == NULL)
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000669 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000670 self->output_len = 0;
671 return 0;
672}
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000673
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000674static PyObject *
675_Pickler_GetString(PicklerObject *self)
676{
677 PyObject *output_buffer = self->output_buffer;
678
679 assert(self->output_buffer != NULL);
680 self->output_buffer = NULL;
681 /* Resize down to exact size */
682 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
683 return NULL;
684 return output_buffer;
685}
686
687static int
688_Pickler_FlushToFile(PicklerObject *self)
689{
690 PyObject *output, *result;
691
692 assert(self->write != NULL);
693
694 output = _Pickler_GetString(self);
695 if (output == NULL)
696 return -1;
697
698 result = _Pickler_FastCall(self, self->write, output);
699 Py_XDECREF(result);
700 return (result == NULL) ? -1 : 0;
701}
702
703static int
704_Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t n)
705{
706 Py_ssize_t i, required;
707 char *buffer;
708
709 assert(s != NULL);
710
711 required = self->output_len + n;
712 if (required > self->max_output_len) {
713 if (self->write != NULL && required > MAX_WRITE_BUF_SIZE) {
714 /* XXX This reallocates a new buffer every time, which is a bit
715 wasteful. */
716 if (_Pickler_FlushToFile(self) < 0)
717 return -1;
718 if (_Pickler_ClearBuffer(self) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000719 return -1;
720 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000721 if (self->write != NULL && n > MAX_WRITE_BUF_SIZE) {
722 /* we already flushed above, so the buffer is empty */
723 PyObject *result;
724 /* XXX we could spare an intermediate copy and pass
725 a memoryview instead */
726 PyObject *output = PyBytes_FromStringAndSize(s, n);
727 if (s == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000728 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000729 result = _Pickler_FastCall(self, self->write, output);
730 Py_XDECREF(result);
731 return (result == NULL) ? -1 : 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000732 }
733 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000734 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
735 PyErr_NoMemory();
736 return -1;
737 }
738 self->max_output_len = (self->output_len + n) * 2;
739 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
740 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000741 }
742 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000743 buffer = PyBytes_AS_STRING(self->output_buffer);
744 if (n < 8) {
745 /* This is faster than memcpy when the string is short. */
746 for (i = 0; i < n; i++) {
747 buffer[self->output_len + i] = s[i];
748 }
749 }
750 else {
751 memcpy(buffer + self->output_len, s, n);
752 }
753 self->output_len += n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000754 return n;
755}
756
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000757static PicklerObject *
758_Pickler_New(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000759{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000760 PicklerObject *self;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000761
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000762 self = PyObject_GC_New(PicklerObject, &Pickler_Type);
763 if (self == NULL)
764 return NULL;
765
766 self->pers_func = NULL;
767 self->arg = NULL;
768 self->write = NULL;
769 self->proto = 0;
770 self->bin = 0;
771 self->fast = 0;
772 self->fast_nesting = 0;
773 self->fix_imports = 0;
774 self->fast_memo = NULL;
775
776 self->memo = PyMemoTable_New();
777 if (self->memo == NULL) {
778 Py_DECREF(self);
779 return NULL;
780 }
781 self->max_output_len = WRITE_BUF_SIZE;
782 self->output_len = 0;
783 self->output_buffer = PyBytes_FromStringAndSize(NULL,
784 self->max_output_len);
785 if (self->output_buffer == NULL) {
786 Py_DECREF(self);
787 return NULL;
788 }
789 return self;
790}
791
792static int
793_Pickler_SetProtocol(PicklerObject *self, PyObject *proto_obj,
794 PyObject *fix_imports_obj)
795{
796 long proto = 0;
797 int fix_imports;
798
799 if (proto_obj == NULL || proto_obj == Py_None)
800 proto = DEFAULT_PROTOCOL;
801 else {
802 proto = PyLong_AsLong(proto_obj);
803 if (proto == -1 && PyErr_Occurred())
804 return -1;
805 }
806 if (proto < 0)
807 proto = HIGHEST_PROTOCOL;
808 if (proto > HIGHEST_PROTOCOL) {
809 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
810 HIGHEST_PROTOCOL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000811 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000812 }
813 fix_imports = PyObject_IsTrue(fix_imports_obj);
814 if (fix_imports == -1)
815 return -1;
816
817 self->proto = proto;
818 self->bin = proto > 0;
819 self->fix_imports = fix_imports && proto < 3;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000820
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000821 return 0;
822}
823
824/* Returns -1 (with an exception set) on failure, 0 on success. This may
825 be called once on a freshly created Pickler. */
826static int
827_Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
828{
829 assert(file != NULL);
830 self->write = PyObject_GetAttrString(file, "write");
831 if (self->write == NULL) {
832 if (PyErr_ExceptionMatches(PyExc_AttributeError))
833 PyErr_SetString(PyExc_TypeError,
834 "file must have a 'write' attribute");
835 return -1;
836 }
837
838 return 0;
839}
840
841/* See documentation for _Pickler_FastCall(). */
842static PyObject *
843_Unpickler_FastCall(UnpicklerObject *self, PyObject *func, PyObject *arg)
844{
845 PyObject *result = NULL;
846
847 ARG_TUP(self, arg);
848 if (self->arg) {
849 result = PyObject_Call(func, self->arg, NULL);
850 FREE_ARG_TUP(self);
851 }
852 return result;
853}
854
855/* Returns the size of the input on success, -1 on failure. This takes its
856 own reference to `input`. */
857static Py_ssize_t
858_Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
859{
860 if (self->buffer.buf != NULL)
861 PyBuffer_Release(&self->buffer);
862 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
863 return -1;
864 self->input_buffer = self->buffer.buf;
865 self->input_len = self->buffer.len;
866 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000867 self->prefetched_idx = self->input_len;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000868 return self->input_len;
869}
870
Antoine Pitrou04248a82010-10-12 20:51:21 +0000871static int
872_Unpickler_SkipConsumed(UnpicklerObject *self)
873{
874 Py_ssize_t consumed = self->next_read_idx - self->prefetched_idx;
875
876 if (consumed > 0) {
877 PyObject *r;
878 assert(self->peek); /* otherwise we did something wrong */
879 /* This makes an useless copy... */
880 r = PyObject_CallFunction(self->read, "n", consumed);
881 if (r == NULL)
882 return -1;
883 Py_DECREF(r);
884 self->prefetched_idx = self->next_read_idx;
885 }
886 return 0;
887}
888
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000889static const Py_ssize_t READ_WHOLE_LINE = -1;
890
891/* If reading from a file, we need to only pull the bytes we need, since there
892 may be multiple pickle objects arranged contiguously in the same input
893 buffer.
894
895 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
896 bytes from the input stream/buffer.
897
898 Update the unpickler's input buffer with the newly-read data. Returns -1 on
899 failure; on success, returns the number of bytes read from the file.
900
901 On success, self->input_len will be 0; this is intentional so that when
902 unpickling from a file, the "we've run out of data" code paths will trigger,
903 causing the Unpickler to go back to the file for more data. Use the returned
904 size to tell you how much data you can process. */
905static Py_ssize_t
906_Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
907{
908 PyObject *data;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000909 Py_ssize_t read_size, prefetched_size = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000910
911 assert(self->read != NULL);
Antoine Pitrou04248a82010-10-12 20:51:21 +0000912
913 if (_Unpickler_SkipConsumed(self) < 0)
914 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000915
916 if (n == READ_WHOLE_LINE)
917 data = PyObject_Call(self->readline, empty_tuple, NULL);
918 else {
919 PyObject *len = PyLong_FromSsize_t(n);
920 if (len == NULL)
921 return -1;
922 data = _Unpickler_FastCall(self, self->read, len);
923 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000924 if (data == NULL)
925 return -1;
926
Antoine Pitrou04248a82010-10-12 20:51:21 +0000927 /* Prefetch some data without advancing the file pointer, if possible */
928 if (self->peek) {
929 PyObject *len, *prefetched;
930 len = PyLong_FromSsize_t(PREFETCH);
931 if (len == NULL) {
932 Py_DECREF(data);
933 return -1;
934 }
935 prefetched = _Unpickler_FastCall(self, self->peek, len);
936 if (prefetched == NULL) {
937 if (PyErr_ExceptionMatches(PyExc_NotImplementedError)) {
938 /* peek() is probably not supported by the given file object */
939 PyErr_Clear();
940 Py_CLEAR(self->peek);
941 }
942 else {
943 Py_DECREF(data);
944 return -1;
945 }
946 }
947 else {
948 assert(PyBytes_Check(prefetched));
949 prefetched_size = PyBytes_GET_SIZE(prefetched);
950 PyBytes_ConcatAndDel(&data, prefetched);
951 if (data == NULL)
952 return -1;
953 }
954 }
955
956 read_size = _Unpickler_SetStringInput(self, data) - prefetched_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000957 Py_DECREF(data);
Antoine Pitrou04248a82010-10-12 20:51:21 +0000958 self->prefetched_idx = read_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000959 return read_size;
960}
961
962/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
963
964 This should be used for all data reads, rather than accessing the unpickler's
965 input buffer directly. This method deals correctly with reading from input
966 streams, which the input buffer doesn't deal with.
967
968 Note that when reading from a file-like object, self->next_read_idx won't
969 be updated (it should remain at 0 for the entire unpickling process). You
970 should use this function's return value to know how many bytes you can
971 consume.
972
973 Returns -1 (with an exception set) on failure. On success, return the
974 number of chars read. */
975static Py_ssize_t
976_Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n)
977{
Antoine Pitrou04248a82010-10-12 20:51:21 +0000978 Py_ssize_t num_read;
979
Antoine Pitrou04248a82010-10-12 20:51:21 +0000980 if (self->next_read_idx + n <= self->input_len) {
981 *s = self->input_buffer + self->next_read_idx;
982 self->next_read_idx += n;
983 return n;
984 }
985 if (!self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000986 PyErr_Format(PyExc_EOFError, "Ran out of input");
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000987 return -1;
988 }
Antoine Pitrou04248a82010-10-12 20:51:21 +0000989 num_read = _Unpickler_ReadFromFile(self, n);
990 if (num_read < 0)
991 return -1;
992 if (num_read < n) {
993 PyErr_Format(PyExc_EOFError, "Ran out of input");
994 return -1;
995 }
996 *s = self->input_buffer;
997 self->next_read_idx = n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000998 return n;
999}
1000
1001static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001002_Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1003 char **result)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001004{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001005 char *input_line = PyMem_Realloc(self->input_line, len + 1);
1006 if (input_line == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001007 return -1;
1008
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001009 memcpy(input_line, line, len);
1010 input_line[len] = '\0';
1011 self->input_line = input_line;
1012 *result = self->input_line;
1013 return len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001014}
1015
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001016/* Read a line from the input stream/buffer. If we run off the end of the input
1017 before hitting \n, return the data we found.
1018
1019 Returns the number of chars read, or -1 on failure. */
1020static Py_ssize_t
1021_Unpickler_Readline(UnpicklerObject *self, char **result)
1022{
1023 Py_ssize_t i, num_read;
1024
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001025 for (i = self->next_read_idx; i < self->input_len; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001026 if (self->input_buffer[i] == '\n') {
1027 char *line_start = self->input_buffer + self->next_read_idx;
1028 num_read = i - self->next_read_idx + 1;
1029 self->next_read_idx = i + 1;
1030 return _Unpickler_CopyLine(self, line_start, num_read, result);
1031 }
1032 }
1033 if (self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001034 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1035 if (num_read < 0)
1036 return -1;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001037 self->next_read_idx = num_read;
Antoine Pitrouf6c7a852011-08-11 21:04:02 +02001038 return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001039 }
1040
1041 /* If we get here, we've run off the end of the input string. Return the
1042 remaining string and let the caller figure it out. */
1043 *result = self->input_buffer + self->next_read_idx;
1044 num_read = i - self->next_read_idx;
1045 self->next_read_idx = i;
1046 return num_read;
1047}
1048
1049/* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1050 will be modified in place. */
1051static int
1052_Unpickler_ResizeMemoList(UnpicklerObject *self, Py_ssize_t new_size)
1053{
1054 Py_ssize_t i;
1055 PyObject **memo;
1056
1057 assert(new_size > self->memo_size);
1058
1059 memo = PyMem_REALLOC(self->memo, new_size * sizeof(PyObject *));
1060 if (memo == NULL) {
1061 PyErr_NoMemory();
1062 return -1;
1063 }
1064 self->memo = memo;
1065 for (i = self->memo_size; i < new_size; i++)
1066 self->memo[i] = NULL;
1067 self->memo_size = new_size;
1068 return 0;
1069}
1070
1071/* Returns NULL if idx is out of bounds. */
1072static PyObject *
1073_Unpickler_MemoGet(UnpicklerObject *self, Py_ssize_t idx)
1074{
1075 if (idx < 0 || idx >= self->memo_size)
1076 return NULL;
1077
1078 return self->memo[idx];
1079}
1080
1081/* Returns -1 (with an exception set) on failure, 0 on success.
1082 This takes its own reference to `value`. */
1083static int
1084_Unpickler_MemoPut(UnpicklerObject *self, Py_ssize_t idx, PyObject *value)
1085{
1086 PyObject *old_item;
1087
1088 if (idx >= self->memo_size) {
1089 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1090 return -1;
1091 assert(idx < self->memo_size);
1092 }
1093 Py_INCREF(value);
1094 old_item = self->memo[idx];
1095 self->memo[idx] = value;
1096 Py_XDECREF(old_item);
1097 return 0;
1098}
1099
1100static PyObject **
1101_Unpickler_NewMemo(Py_ssize_t new_size)
1102{
1103 PyObject **memo = PyMem_MALLOC(new_size * sizeof(PyObject *));
1104 if (memo == NULL)
1105 return NULL;
1106 memset(memo, 0, new_size * sizeof(PyObject *));
1107 return memo;
1108}
1109
1110/* Free the unpickler's memo, taking care to decref any items left in it. */
1111static void
1112_Unpickler_MemoCleanup(UnpicklerObject *self)
1113{
1114 Py_ssize_t i;
1115 PyObject **memo = self->memo;
1116
1117 if (self->memo == NULL)
1118 return;
1119 self->memo = NULL;
1120 i = self->memo_size;
1121 while (--i >= 0) {
1122 Py_XDECREF(memo[i]);
1123 }
1124 PyMem_FREE(memo);
1125}
1126
1127static UnpicklerObject *
1128_Unpickler_New(void)
1129{
1130 UnpicklerObject *self;
1131
1132 self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1133 if (self == NULL)
1134 return NULL;
1135
1136 self->stack = (Pdata *)Pdata_New();
1137 if (self->stack == NULL) {
1138 Py_DECREF(self);
1139 return NULL;
1140 }
1141 memset(&self->buffer, 0, sizeof(Py_buffer));
1142
1143 self->memo_size = 32;
1144 self->memo = _Unpickler_NewMemo(self->memo_size);
1145 if (self->memo == NULL) {
1146 Py_DECREF(self);
1147 return NULL;
1148 }
1149
1150 self->arg = NULL;
1151 self->pers_func = NULL;
1152 self->input_buffer = NULL;
1153 self->input_line = NULL;
1154 self->input_len = 0;
1155 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001156 self->prefetched_idx = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001157 self->read = NULL;
1158 self->readline = NULL;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001159 self->peek = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001160 self->encoding = NULL;
1161 self->errors = NULL;
1162 self->marks = NULL;
1163 self->num_marks = 0;
1164 self->marks_size = 0;
1165 self->proto = 0;
1166 self->fix_imports = 0;
1167
1168 return self;
1169}
1170
1171/* Returns -1 (with an exception set) on failure, 0 on success. This may
1172 be called once on a freshly created Pickler. */
1173static int
1174_Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1175{
Antoine Pitrou04248a82010-10-12 20:51:21 +00001176 self->peek = PyObject_GetAttrString(file, "peek");
1177 if (self->peek == NULL) {
1178 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1179 PyErr_Clear();
1180 else
1181 return -1;
1182 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001183 self->read = PyObject_GetAttrString(file, "read");
1184 self->readline = PyObject_GetAttrString(file, "readline");
1185 if (self->readline == NULL || self->read == NULL) {
1186 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1187 PyErr_SetString(PyExc_TypeError,
1188 "file must have 'read' and 'readline' attributes");
1189 Py_CLEAR(self->read);
1190 Py_CLEAR(self->readline);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001191 Py_CLEAR(self->peek);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001192 return -1;
1193 }
1194 return 0;
1195}
1196
1197/* Returns -1 (with an exception set) on failure, 0 on success. This may
1198 be called once on a freshly created Pickler. */
1199static int
1200_Unpickler_SetInputEncoding(UnpicklerObject *self,
1201 const char *encoding,
1202 const char *errors)
1203{
1204 if (encoding == NULL)
1205 encoding = "ASCII";
1206 if (errors == NULL)
1207 errors = "strict";
1208
1209 self->encoding = strdup(encoding);
1210 self->errors = strdup(errors);
1211 if (self->encoding == NULL || self->errors == NULL) {
1212 PyErr_NoMemory();
1213 return -1;
1214 }
1215 return 0;
1216}
1217
1218/* Generate a GET opcode for an object stored in the memo. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001219static int
1220memo_get(PicklerObject *self, PyObject *key)
1221{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001222 long *value;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001223 char pdata[30];
1224 int len;
1225
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001226 value = PyMemoTable_Get(self->memo, key);
1227 if (value == NULL) {
1228 PyErr_SetObject(PyExc_KeyError, key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001229 return -1;
1230 }
1231
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001232 if (!self->bin) {
1233 pdata[0] = GET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001234 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", *value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001235 len = (int)strlen(pdata);
1236 }
1237 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001238 if (*value < 256) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001239 pdata[0] = BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001240 pdata[1] = (unsigned char)(*value & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001241 len = 2;
1242 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001243 else if (*value <= 0xffffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001244 pdata[0] = LONG_BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001245 pdata[1] = (unsigned char)(*value & 0xff);
1246 pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1247 pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1248 pdata[4] = (unsigned char)((*value >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001249 len = 5;
1250 }
1251 else { /* unlikely */
1252 PyErr_SetString(PicklingError,
1253 "memo id too large for LONG_BINGET");
1254 return -1;
1255 }
1256 }
1257
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001258 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001259 return -1;
1260
1261 return 0;
1262}
1263
1264/* Store an object in the memo, assign it a new unique ID based on the number
1265 of objects currently stored in the memo and generate a PUT opcode. */
1266static int
1267memo_put(PicklerObject *self, PyObject *obj)
1268{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001269 long x;
1270 char pdata[30];
1271 int len;
1272 int status = 0;
1273
1274 if (self->fast)
1275 return 0;
1276
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001277 x = PyMemoTable_Size(self->memo);
1278 if (PyMemoTable_Set(self->memo, obj, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001279 goto error;
1280
1281 if (!self->bin) {
1282 pdata[0] = PUT;
1283 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
1284 len = strlen(pdata);
1285 }
1286 else {
1287 if (x < 256) {
1288 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00001289 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001290 len = 2;
1291 }
1292 else if (x <= 0xffffffffL) {
1293 pdata[0] = LONG_BINPUT;
1294 pdata[1] = (unsigned char)(x & 0xff);
1295 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1296 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1297 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1298 len = 5;
1299 }
1300 else { /* unlikely */
1301 PyErr_SetString(PicklingError,
1302 "memo id too large for LONG_BINPUT");
1303 return -1;
1304 }
1305 }
1306
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001307 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001308 goto error;
1309
1310 if (0) {
1311 error:
1312 status = -1;
1313 }
1314
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001315 return status;
1316}
1317
1318static PyObject *
1319whichmodule(PyObject *global, PyObject *global_name)
1320{
1321 Py_ssize_t i, j;
1322 static PyObject *module_str = NULL;
1323 static PyObject *main_str = NULL;
1324 PyObject *module_name;
1325 PyObject *modules_dict;
1326 PyObject *module;
1327 PyObject *obj;
1328
1329 if (module_str == NULL) {
1330 module_str = PyUnicode_InternFromString("__module__");
1331 if (module_str == NULL)
1332 return NULL;
1333 main_str = PyUnicode_InternFromString("__main__");
1334 if (main_str == NULL)
1335 return NULL;
1336 }
1337
1338 module_name = PyObject_GetAttr(global, module_str);
1339
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00001340 /* In some rare cases (e.g., bound methods of extension types),
1341 __module__ can be None. If it is so, then search sys.modules
1342 for the module of global. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001343 if (module_name == Py_None) {
1344 Py_DECREF(module_name);
1345 goto search;
1346 }
1347
1348 if (module_name) {
1349 return module_name;
1350 }
1351 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1352 PyErr_Clear();
1353 else
1354 return NULL;
1355
1356 search:
1357 modules_dict = PySys_GetObject("modules");
1358 if (modules_dict == NULL)
1359 return NULL;
1360
1361 i = 0;
1362 module_name = NULL;
1363 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
Mark Dickinson211c6252009-02-01 10:28:51 +00001364 if (PyObject_RichCompareBool(module_name, main_str, Py_EQ) == 1)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001365 continue;
1366
1367 obj = PyObject_GetAttr(module, global_name);
1368 if (obj == NULL) {
1369 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1370 PyErr_Clear();
1371 else
1372 return NULL;
1373 continue;
1374 }
1375
1376 if (obj != global) {
1377 Py_DECREF(obj);
1378 continue;
1379 }
1380
1381 Py_DECREF(obj);
1382 break;
1383 }
1384
1385 /* If no module is found, use __main__. */
1386 if (!j) {
1387 module_name = main_str;
1388 }
1389
1390 Py_INCREF(module_name);
1391 return module_name;
1392}
1393
1394/* fast_save_enter() and fast_save_leave() are guards against recursive
1395 objects when Pickler is used with the "fast mode" (i.e., with object
1396 memoization disabled). If the nesting of a list or dict object exceed
1397 FAST_NESTING_LIMIT, these guards will start keeping an internal
1398 reference to the seen list or dict objects and check whether these objects
1399 are recursive. These are not strictly necessary, since save() has a
1400 hard-coded recursion limit, but they give a nicer error message than the
1401 typical RuntimeError. */
1402static int
1403fast_save_enter(PicklerObject *self, PyObject *obj)
1404{
1405 /* if fast_nesting < 0, we're doing an error exit. */
1406 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1407 PyObject *key = NULL;
1408 if (self->fast_memo == NULL) {
1409 self->fast_memo = PyDict_New();
1410 if (self->fast_memo == NULL) {
1411 self->fast_nesting = -1;
1412 return 0;
1413 }
1414 }
1415 key = PyLong_FromVoidPtr(obj);
1416 if (key == NULL)
1417 return 0;
1418 if (PyDict_GetItem(self->fast_memo, key)) {
1419 Py_DECREF(key);
1420 PyErr_Format(PyExc_ValueError,
1421 "fast mode: can't pickle cyclic objects "
1422 "including object type %.200s at %p",
1423 obj->ob_type->tp_name, obj);
1424 self->fast_nesting = -1;
1425 return 0;
1426 }
1427 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
1428 Py_DECREF(key);
1429 self->fast_nesting = -1;
1430 return 0;
1431 }
1432 Py_DECREF(key);
1433 }
1434 return 1;
1435}
1436
1437static int
1438fast_save_leave(PicklerObject *self, PyObject *obj)
1439{
1440 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
1441 PyObject *key = PyLong_FromVoidPtr(obj);
1442 if (key == NULL)
1443 return 0;
1444 if (PyDict_DelItem(self->fast_memo, key) < 0) {
1445 Py_DECREF(key);
1446 return 0;
1447 }
1448 Py_DECREF(key);
1449 }
1450 return 1;
1451}
1452
1453static int
1454save_none(PicklerObject *self, PyObject *obj)
1455{
1456 const char none_op = NONE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001457 if (_Pickler_Write(self, &none_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001458 return -1;
1459
1460 return 0;
1461}
1462
1463static int
1464save_bool(PicklerObject *self, PyObject *obj)
1465{
1466 static const char *buf[2] = { FALSE, TRUE };
1467 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
1468 int p = (obj == Py_True);
1469
1470 if (self->proto >= 2) {
1471 const char bool_op = p ? NEWTRUE : NEWFALSE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001472 if (_Pickler_Write(self, &bool_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001473 return -1;
1474 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001475 else if (_Pickler_Write(self, buf[p], len[p]) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001476 return -1;
1477
1478 return 0;
1479}
1480
1481static int
1482save_int(PicklerObject *self, long x)
1483{
1484 char pdata[32];
1485 int len = 0;
1486
1487 if (!self->bin
1488#if SIZEOF_LONG > 4
1489 || x > 0x7fffffffL || x < -0x80000000L
1490#endif
1491 ) {
1492 /* Text-mode pickle, or long too big to fit in the 4-byte
1493 * signed BININT format: store as a string.
1494 */
Mark Dickinson8dd05142009-01-20 20:43:58 +00001495 pdata[0] = LONG; /* use LONG for consistency with pickle.py */
1496 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ldL\n", x);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001497 if (_Pickler_Write(self, pdata, strlen(pdata)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001498 return -1;
1499 }
1500 else {
1501 /* Binary pickle and x fits in a signed 4-byte int. */
1502 pdata[1] = (unsigned char)(x & 0xff);
1503 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1504 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1505 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1506
1507 if ((pdata[4] == 0) && (pdata[3] == 0)) {
1508 if (pdata[2] == 0) {
1509 pdata[0] = BININT1;
1510 len = 2;
1511 }
1512 else {
1513 pdata[0] = BININT2;
1514 len = 3;
1515 }
1516 }
1517 else {
1518 pdata[0] = BININT;
1519 len = 5;
1520 }
1521
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001522 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001523 return -1;
1524 }
1525
1526 return 0;
1527}
1528
1529static int
1530save_long(PicklerObject *self, PyObject *obj)
1531{
1532 PyObject *repr = NULL;
1533 Py_ssize_t size;
1534 long val = PyLong_AsLong(obj);
1535 int status = 0;
1536
1537 const char long_op = LONG;
1538
1539 if (val == -1 && PyErr_Occurred()) {
1540 /* out of range for int pickling */
1541 PyErr_Clear();
1542 }
Antoine Pitrou3c7e9282011-08-13 20:15:19 +02001543 else if (val <= 0x7fffffffL && val >= -0x80000000L)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001544 return save_int(self, val);
1545
1546 if (self->proto >= 2) {
1547 /* Linear-time pickling. */
1548 size_t nbits;
1549 size_t nbytes;
1550 unsigned char *pdata;
1551 char header[5];
1552 int i;
1553 int sign = _PyLong_Sign(obj);
1554
1555 if (sign == 0) {
1556 header[0] = LONG1;
1557 header[1] = 0; /* It's 0 -- an empty bytestring. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001558 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001559 goto error;
1560 return 0;
1561 }
1562 nbits = _PyLong_NumBits(obj);
1563 if (nbits == (size_t)-1 && PyErr_Occurred())
1564 goto error;
1565 /* How many bytes do we need? There are nbits >> 3 full
1566 * bytes of data, and nbits & 7 leftover bits. If there
1567 * are any leftover bits, then we clearly need another
1568 * byte. Wnat's not so obvious is that we *probably*
1569 * need another byte even if there aren't any leftovers:
1570 * the most-significant bit of the most-significant byte
1571 * acts like a sign bit, and it's usually got a sense
1572 * opposite of the one we need. The exception is longs
1573 * of the form -(2**(8*j-1)) for j > 0. Such a long is
1574 * its own 256's-complement, so has the right sign bit
1575 * even without the extra byte. That's a pain to check
1576 * for in advance, though, so we always grab an extra
1577 * byte at the start, and cut it back later if possible.
1578 */
1579 nbytes = (nbits >> 3) + 1;
1580 if (nbytes > INT_MAX) {
1581 PyErr_SetString(PyExc_OverflowError,
1582 "long too large to pickle");
1583 goto error;
1584 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001585 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001586 if (repr == NULL)
1587 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001588 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001589 i = _PyLong_AsByteArray((PyLongObject *)obj,
1590 pdata, nbytes,
1591 1 /* little endian */ , 1 /* signed */ );
1592 if (i < 0)
1593 goto error;
1594 /* If the long is negative, this may be a byte more than
1595 * needed. This is so iff the MSB is all redundant sign
1596 * bits.
1597 */
1598 if (sign < 0 &&
1599 nbytes > 1 &&
1600 pdata[nbytes - 1] == 0xff &&
1601 (pdata[nbytes - 2] & 0x80) != 0) {
1602 nbytes--;
1603 }
1604
1605 if (nbytes < 256) {
1606 header[0] = LONG1;
1607 header[1] = (unsigned char)nbytes;
1608 size = 2;
1609 }
1610 else {
1611 header[0] = LONG4;
1612 size = (int)nbytes;
1613 for (i = 1; i < 5; i++) {
1614 header[i] = (unsigned char)(size & 0xff);
1615 size >>= 8;
1616 }
1617 size = 5;
1618 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001619 if (_Pickler_Write(self, header, size) < 0 ||
1620 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001621 goto error;
1622 }
1623 else {
1624 char *string;
1625
Mark Dickinson8dd05142009-01-20 20:43:58 +00001626 /* proto < 2: write the repr and newline. This is quadratic-time (in
1627 the number of digits), in both directions. We add a trailing 'L'
1628 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001629
1630 repr = PyObject_Repr(obj);
1631 if (repr == NULL)
1632 goto error;
1633
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001634 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001635 if (string == NULL)
1636 goto error;
1637
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001638 if (_Pickler_Write(self, &long_op, 1) < 0 ||
1639 _Pickler_Write(self, string, size) < 0 ||
1640 _Pickler_Write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001641 goto error;
1642 }
1643
1644 if (0) {
1645 error:
1646 status = -1;
1647 }
1648 Py_XDECREF(repr);
1649
1650 return status;
1651}
1652
1653static int
1654save_float(PicklerObject *self, PyObject *obj)
1655{
1656 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1657
1658 if (self->bin) {
1659 char pdata[9];
1660 pdata[0] = BINFLOAT;
1661 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1662 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001663 if (_Pickler_Write(self, pdata, 9) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001664 return -1;
Eric Smith0923d1d2009-04-16 20:16:10 +00001665 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001666 else {
Eric Smith0923d1d2009-04-16 20:16:10 +00001667 int result = -1;
1668 char *buf = NULL;
1669 char op = FLOAT;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001670
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001671 if (_Pickler_Write(self, &op, 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001672 goto done;
1673
Mark Dickinson3e09f432009-04-17 08:41:23 +00001674 buf = PyOS_double_to_string(x, 'g', 17, 0, NULL);
Eric Smith0923d1d2009-04-16 20:16:10 +00001675 if (!buf) {
1676 PyErr_NoMemory();
1677 goto done;
1678 }
1679
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001680 if (_Pickler_Write(self, buf, strlen(buf)) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001681 goto done;
1682
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001683 if (_Pickler_Write(self, "\n", 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001684 goto done;
1685
1686 result = 0;
1687done:
1688 PyMem_Free(buf);
1689 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001690 }
1691
1692 return 0;
1693}
1694
1695static int
1696save_bytes(PicklerObject *self, PyObject *obj)
1697{
1698 if (self->proto < 3) {
1699 /* Older pickle protocols do not have an opcode for pickling bytes
1700 objects. Therefore, we need to fake the copy protocol (i.e.,
1701 the __reduce__ method) to permit bytes object unpickling. */
1702 PyObject *reduce_value = NULL;
1703 PyObject *bytelist = NULL;
1704 int status;
1705
1706 bytelist = PySequence_List(obj);
1707 if (bytelist == NULL)
1708 return -1;
1709
1710 reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
1711 bytelist);
1712 if (reduce_value == NULL) {
1713 Py_DECREF(bytelist);
1714 return -1;
1715 }
1716
1717 /* save_reduce() will memoize the object automatically. */
1718 status = save_reduce(self, reduce_value, obj);
1719 Py_DECREF(reduce_value);
1720 Py_DECREF(bytelist);
1721 return status;
1722 }
1723 else {
1724 Py_ssize_t size;
1725 char header[5];
1726 int len;
1727
1728 size = PyBytes_Size(obj);
1729 if (size < 0)
1730 return -1;
1731
1732 if (size < 256) {
1733 header[0] = SHORT_BINBYTES;
1734 header[1] = (unsigned char)size;
1735 len = 2;
1736 }
1737 else if (size <= 0xffffffffL) {
1738 header[0] = BINBYTES;
1739 header[1] = (unsigned char)(size & 0xff);
1740 header[2] = (unsigned char)((size >> 8) & 0xff);
1741 header[3] = (unsigned char)((size >> 16) & 0xff);
1742 header[4] = (unsigned char)((size >> 24) & 0xff);
1743 len = 5;
1744 }
1745 else {
1746 return -1; /* string too large */
1747 }
1748
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001749 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001750 return -1;
1751
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001752 if (_Pickler_Write(self, PyBytes_AS_STRING(obj), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001753 return -1;
1754
1755 if (memo_put(self, obj) < 0)
1756 return -1;
1757
1758 return 0;
1759 }
1760}
1761
1762/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1763 backslash and newline characters to \uXXXX escapes. */
1764static PyObject *
1765raw_unicode_escape(const Py_UNICODE *s, Py_ssize_t size)
1766{
1767 PyObject *repr, *result;
1768 char *p;
1769 char *q;
1770
1771 static const char *hexdigits = "0123456789abcdef";
1772
1773#ifdef Py_UNICODE_WIDE
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001774 const Py_ssize_t expandsize = 10;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001775#else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001776 const Py_ssize_t expandsize = 6;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001777#endif
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001778
1779 if (size > PY_SSIZE_T_MAX / expandsize)
1780 return PyErr_NoMemory();
1781
1782 repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001783 if (repr == NULL)
1784 return NULL;
1785 if (size == 0)
1786 goto done;
1787
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001788 p = q = PyByteArray_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001789 while (size-- > 0) {
1790 Py_UNICODE ch = *s++;
1791#ifdef Py_UNICODE_WIDE
1792 /* Map 32-bit characters to '\Uxxxxxxxx' */
1793 if (ch >= 0x10000) {
1794 *p++ = '\\';
1795 *p++ = 'U';
1796 *p++ = hexdigits[(ch >> 28) & 0xf];
1797 *p++ = hexdigits[(ch >> 24) & 0xf];
1798 *p++ = hexdigits[(ch >> 20) & 0xf];
1799 *p++ = hexdigits[(ch >> 16) & 0xf];
1800 *p++ = hexdigits[(ch >> 12) & 0xf];
1801 *p++ = hexdigits[(ch >> 8) & 0xf];
1802 *p++ = hexdigits[(ch >> 4) & 0xf];
1803 *p++ = hexdigits[ch & 15];
1804 }
1805 else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001806#else
1807 /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
1808 if (ch >= 0xD800 && ch < 0xDC00) {
1809 Py_UNICODE ch2;
1810 Py_UCS4 ucs;
1811
1812 ch2 = *s++;
1813 size--;
1814 if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
1815 ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
1816 *p++ = '\\';
1817 *p++ = 'U';
1818 *p++ = hexdigits[(ucs >> 28) & 0xf];
1819 *p++ = hexdigits[(ucs >> 24) & 0xf];
1820 *p++ = hexdigits[(ucs >> 20) & 0xf];
1821 *p++ = hexdigits[(ucs >> 16) & 0xf];
1822 *p++ = hexdigits[(ucs >> 12) & 0xf];
1823 *p++ = hexdigits[(ucs >> 8) & 0xf];
1824 *p++ = hexdigits[(ucs >> 4) & 0xf];
1825 *p++ = hexdigits[ucs & 0xf];
1826 continue;
1827 }
1828 /* Fall through: isolated surrogates are copied as-is */
1829 s--;
1830 size++;
1831 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001832#endif
1833 /* Map 16-bit characters to '\uxxxx' */
1834 if (ch >= 256 || ch == '\\' || ch == '\n') {
1835 *p++ = '\\';
1836 *p++ = 'u';
1837 *p++ = hexdigits[(ch >> 12) & 0xf];
1838 *p++ = hexdigits[(ch >> 8) & 0xf];
1839 *p++ = hexdigits[(ch >> 4) & 0xf];
1840 *p++ = hexdigits[ch & 15];
1841 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001842 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001843 else
1844 *p++ = (char) ch;
1845 }
1846 size = p - q;
1847
1848 done:
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001849 result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001850 Py_DECREF(repr);
1851 return result;
1852}
1853
1854static int
1855save_unicode(PicklerObject *self, PyObject *obj)
1856{
1857 Py_ssize_t size;
1858 PyObject *encoded = NULL;
1859
1860 if (self->bin) {
1861 char pdata[5];
1862
Victor Stinner485fb562010-04-13 11:07:24 +00001863 encoded = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(obj),
1864 PyUnicode_GET_SIZE(obj),
1865 "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001866 if (encoded == NULL)
1867 goto error;
1868
1869 size = PyBytes_GET_SIZE(encoded);
1870 if (size < 0 || size > 0xffffffffL)
1871 goto error; /* string too large */
1872
1873 pdata[0] = BINUNICODE;
1874 pdata[1] = (unsigned char)(size & 0xff);
1875 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1876 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1877 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1878
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001879 if (_Pickler_Write(self, pdata, 5) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001880 goto error;
1881
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001882 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001883 goto error;
1884 }
1885 else {
1886 const char unicode_op = UNICODE;
1887
1888 encoded = raw_unicode_escape(PyUnicode_AS_UNICODE(obj),
1889 PyUnicode_GET_SIZE(obj));
1890 if (encoded == NULL)
1891 goto error;
1892
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001893 if (_Pickler_Write(self, &unicode_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001894 goto error;
1895
1896 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001897 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001898 goto error;
1899
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001900 if (_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001901 goto error;
1902 }
1903 if (memo_put(self, obj) < 0)
1904 goto error;
1905
1906 Py_DECREF(encoded);
1907 return 0;
1908
1909 error:
1910 Py_XDECREF(encoded);
1911 return -1;
1912}
1913
1914/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1915static int
1916store_tuple_elements(PicklerObject *self, PyObject *t, int len)
1917{
1918 int i;
1919
1920 assert(PyTuple_Size(t) == len);
1921
1922 for (i = 0; i < len; i++) {
1923 PyObject *element = PyTuple_GET_ITEM(t, i);
1924
1925 if (element == NULL)
1926 return -1;
1927 if (save(self, element, 0) < 0)
1928 return -1;
1929 }
1930
1931 return 0;
1932}
1933
1934/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1935 * used across protocols to minimize the space needed to pickle them.
1936 * Tuples are also the only builtin immutable type that can be recursive
1937 * (a tuple can be reached from itself), and that requires some subtle
1938 * magic so that it works in all cases. IOW, this is a long routine.
1939 */
1940static int
1941save_tuple(PicklerObject *self, PyObject *obj)
1942{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001943 int len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001944
1945 const char mark_op = MARK;
1946 const char tuple_op = TUPLE;
1947 const char pop_op = POP;
1948 const char pop_mark_op = POP_MARK;
1949 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
1950
1951 if ((len = PyTuple_Size(obj)) < 0)
1952 return -1;
1953
1954 if (len == 0) {
1955 char pdata[2];
1956
1957 if (self->proto) {
1958 pdata[0] = EMPTY_TUPLE;
1959 len = 1;
1960 }
1961 else {
1962 pdata[0] = MARK;
1963 pdata[1] = TUPLE;
1964 len = 2;
1965 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001966 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001967 return -1;
1968 return 0;
1969 }
1970
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001971 /* The tuple isn't in the memo now. If it shows up there after
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001972 * saving the tuple elements, the tuple must be recursive, in
1973 * which case we'll pop everything we put on the stack, and fetch
1974 * its value from the memo.
1975 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001976 if (len <= 3 && self->proto >= 2) {
1977 /* Use TUPLE{1,2,3} opcodes. */
1978 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001979 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001980
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001981 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001982 /* pop the len elements */
1983 for (i = 0; i < len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001984 if (_Pickler_Write(self, &pop_op, 1) < 0)
1985 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001986 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001987 if (memo_get(self, obj) < 0)
1988 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001989
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001990 return 0;
1991 }
1992 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001993 if (_Pickler_Write(self, len2opcode + len, 1) < 0)
1994 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001995 }
1996 goto memoize;
1997 }
1998
1999 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2000 * Generate MARK e1 e2 ... TUPLE
2001 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002002 if (_Pickler_Write(self, &mark_op, 1) < 0)
2003 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002004
2005 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002006 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002007
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002008 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002009 /* pop the stack stuff we pushed */
2010 if (self->bin) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002011 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2012 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002013 }
2014 else {
2015 /* Note that we pop one more than len, to remove
2016 * the MARK too.
2017 */
2018 for (i = 0; i <= len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002019 if (_Pickler_Write(self, &pop_op, 1) < 0)
2020 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002021 }
2022 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002023 if (memo_get(self, obj) < 0)
2024 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002025
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002026 return 0;
2027 }
2028 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002029 if (_Pickler_Write(self, &tuple_op, 1) < 0)
2030 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002031 }
2032
2033 memoize:
2034 if (memo_put(self, obj) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002035 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002036
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002037 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002038}
2039
2040/* iter is an iterator giving items, and we batch up chunks of
2041 * MARK item item ... item APPENDS
2042 * opcode sequences. Calling code should have arranged to first create an
2043 * empty list, or list-like object, for the APPENDS to operate on.
2044 * Returns 0 on success, <0 on error.
2045 */
2046static int
2047batch_list(PicklerObject *self, PyObject *iter)
2048{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002049 PyObject *obj = NULL;
2050 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002051 int i, n;
2052
2053 const char mark_op = MARK;
2054 const char append_op = APPEND;
2055 const char appends_op = APPENDS;
2056
2057 assert(iter != NULL);
2058
2059 /* XXX: I think this function could be made faster by avoiding the
2060 iterator interface and fetching objects directly from list using
2061 PyList_GET_ITEM.
2062 */
2063
2064 if (self->proto == 0) {
2065 /* APPENDS isn't available; do one at a time. */
2066 for (;;) {
2067 obj = PyIter_Next(iter);
2068 if (obj == NULL) {
2069 if (PyErr_Occurred())
2070 return -1;
2071 break;
2072 }
2073 i = save(self, obj, 0);
2074 Py_DECREF(obj);
2075 if (i < 0)
2076 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002077 if (_Pickler_Write(self, &append_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002078 return -1;
2079 }
2080 return 0;
2081 }
2082
2083 /* proto > 0: write in batches of BATCHSIZE. */
2084 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002085 /* Get first item */
2086 firstitem = PyIter_Next(iter);
2087 if (firstitem == NULL) {
2088 if (PyErr_Occurred())
2089 goto error;
2090
2091 /* nothing more to add */
2092 break;
2093 }
2094
2095 /* Try to get a second item */
2096 obj = PyIter_Next(iter);
2097 if (obj == NULL) {
2098 if (PyErr_Occurred())
2099 goto error;
2100
2101 /* Only one item to write */
2102 if (save(self, firstitem, 0) < 0)
2103 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002104 if (_Pickler_Write(self, &append_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002105 goto error;
2106 Py_CLEAR(firstitem);
2107 break;
2108 }
2109
2110 /* More than one item to write */
2111
2112 /* Pump out MARK, items, APPENDS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002113 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002114 goto error;
2115
2116 if (save(self, firstitem, 0) < 0)
2117 goto error;
2118 Py_CLEAR(firstitem);
2119 n = 1;
2120
2121 /* Fetch and save up to BATCHSIZE items */
2122 while (obj) {
2123 if (save(self, obj, 0) < 0)
2124 goto error;
2125 Py_CLEAR(obj);
2126 n += 1;
2127
2128 if (n == BATCHSIZE)
2129 break;
2130
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002131 obj = PyIter_Next(iter);
2132 if (obj == NULL) {
2133 if (PyErr_Occurred())
2134 goto error;
2135 break;
2136 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002137 }
2138
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002139 if (_Pickler_Write(self, &appends_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002140 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002141
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002142 } while (n == BATCHSIZE);
2143 return 0;
2144
2145 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002146 Py_XDECREF(firstitem);
2147 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002148 return -1;
2149}
2150
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002151/* This is a variant of batch_list() above, specialized for lists (with no
2152 * support for list subclasses). Like batch_list(), we batch up chunks of
2153 * MARK item item ... item APPENDS
2154 * opcode sequences. Calling code should have arranged to first create an
2155 * empty list, or list-like object, for the APPENDS to operate on.
2156 * Returns 0 on success, -1 on error.
2157 *
2158 * This version is considerably faster than batch_list(), if less general.
2159 *
2160 * Note that this only works for protocols > 0.
2161 */
2162static int
2163batch_list_exact(PicklerObject *self, PyObject *obj)
2164{
2165 PyObject *item = NULL;
2166 int this_batch, total;
2167
2168 const char append_op = APPEND;
2169 const char appends_op = APPENDS;
2170 const char mark_op = MARK;
2171
2172 assert(obj != NULL);
2173 assert(self->proto > 0);
2174 assert(PyList_CheckExact(obj));
2175
2176 if (PyList_GET_SIZE(obj) == 1) {
2177 item = PyList_GET_ITEM(obj, 0);
2178 if (save(self, item, 0) < 0)
2179 return -1;
2180 if (_Pickler_Write(self, &append_op, 1) < 0)
2181 return -1;
2182 return 0;
2183 }
2184
2185 /* Write in batches of BATCHSIZE. */
2186 total = 0;
2187 do {
2188 this_batch = 0;
2189 if (_Pickler_Write(self, &mark_op, 1) < 0)
2190 return -1;
2191 while (total < PyList_GET_SIZE(obj)) {
2192 item = PyList_GET_ITEM(obj, total);
2193 if (save(self, item, 0) < 0)
2194 return -1;
2195 total++;
2196 if (++this_batch == BATCHSIZE)
2197 break;
2198 }
2199 if (_Pickler_Write(self, &appends_op, 1) < 0)
2200 return -1;
2201
2202 } while (total < PyList_GET_SIZE(obj));
2203
2204 return 0;
2205}
2206
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002207static int
2208save_list(PicklerObject *self, PyObject *obj)
2209{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002210 char header[3];
2211 int len;
2212 int status = 0;
2213
2214 if (self->fast && !fast_save_enter(self, obj))
2215 goto error;
2216
2217 /* Create an empty list. */
2218 if (self->bin) {
2219 header[0] = EMPTY_LIST;
2220 len = 1;
2221 }
2222 else {
2223 header[0] = MARK;
2224 header[1] = LIST;
2225 len = 2;
2226 }
2227
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002228 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002229 goto error;
2230
2231 /* Get list length, and bow out early if empty. */
2232 if ((len = PyList_Size(obj)) < 0)
2233 goto error;
2234
2235 if (memo_put(self, obj) < 0)
2236 goto error;
2237
2238 if (len != 0) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002239 /* Materialize the list elements. */
2240 if (PyList_CheckExact(obj) && self->proto > 0) {
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002241 if (Py_EnterRecursiveCall(" while pickling an object"))
2242 goto error;
2243 status = batch_list_exact(self, obj);
2244 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002245 } else {
2246 PyObject *iter = PyObject_GetIter(obj);
2247 if (iter == NULL)
2248 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002249
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002250 if (Py_EnterRecursiveCall(" while pickling an object")) {
2251 Py_DECREF(iter);
2252 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002253 }
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002254 status = batch_list(self, iter);
2255 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002256 Py_DECREF(iter);
2257 }
2258 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002259 if (0) {
2260 error:
2261 status = -1;
2262 }
2263
2264 if (self->fast && !fast_save_leave(self, obj))
2265 status = -1;
2266
2267 return status;
2268}
2269
2270/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
2271 * MARK key value ... key value SETITEMS
2272 * opcode sequences. Calling code should have arranged to first create an
2273 * empty dict, or dict-like object, for the SETITEMS to operate on.
2274 * Returns 0 on success, <0 on error.
2275 *
2276 * This is very much like batch_list(). The difference between saving
2277 * elements directly, and picking apart two-tuples, is so long-winded at
2278 * the C level, though, that attempts to combine these routines were too
2279 * ugly to bear.
2280 */
2281static int
2282batch_dict(PicklerObject *self, PyObject *iter)
2283{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002284 PyObject *obj = NULL;
2285 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002286 int i, n;
2287
2288 const char mark_op = MARK;
2289 const char setitem_op = SETITEM;
2290 const char setitems_op = SETITEMS;
2291
2292 assert(iter != NULL);
2293
2294 if (self->proto == 0) {
2295 /* SETITEMS isn't available; do one at a time. */
2296 for (;;) {
2297 obj = PyIter_Next(iter);
2298 if (obj == NULL) {
2299 if (PyErr_Occurred())
2300 return -1;
2301 break;
2302 }
2303 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2304 PyErr_SetString(PyExc_TypeError, "dict items "
2305 "iterator must return 2-tuples");
2306 return -1;
2307 }
2308 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
2309 if (i >= 0)
2310 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
2311 Py_DECREF(obj);
2312 if (i < 0)
2313 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002314 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002315 return -1;
2316 }
2317 return 0;
2318 }
2319
2320 /* proto > 0: write in batches of BATCHSIZE. */
2321 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002322 /* Get first item */
2323 firstitem = PyIter_Next(iter);
2324 if (firstitem == NULL) {
2325 if (PyErr_Occurred())
2326 goto error;
2327
2328 /* nothing more to add */
2329 break;
2330 }
2331 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
2332 PyErr_SetString(PyExc_TypeError, "dict items "
2333 "iterator must return 2-tuples");
2334 goto error;
2335 }
2336
2337 /* Try to get a second item */
2338 obj = PyIter_Next(iter);
2339 if (obj == NULL) {
2340 if (PyErr_Occurred())
2341 goto error;
2342
2343 /* Only one item to write */
2344 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2345 goto error;
2346 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2347 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002348 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002349 goto error;
2350 Py_CLEAR(firstitem);
2351 break;
2352 }
2353
2354 /* More than one item to write */
2355
2356 /* Pump out MARK, items, SETITEMS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002357 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002358 goto error;
2359
2360 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2361 goto error;
2362 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2363 goto error;
2364 Py_CLEAR(firstitem);
2365 n = 1;
2366
2367 /* Fetch and save up to BATCHSIZE items */
2368 while (obj) {
2369 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2370 PyErr_SetString(PyExc_TypeError, "dict items "
2371 "iterator must return 2-tuples");
2372 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002373 }
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002374 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
2375 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
2376 goto error;
2377 Py_CLEAR(obj);
2378 n += 1;
2379
2380 if (n == BATCHSIZE)
2381 break;
2382
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002383 obj = PyIter_Next(iter);
2384 if (obj == NULL) {
2385 if (PyErr_Occurred())
2386 goto error;
2387 break;
2388 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002389 }
2390
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002391 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002392 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002393
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002394 } while (n == BATCHSIZE);
2395 return 0;
2396
2397 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002398 Py_XDECREF(firstitem);
2399 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002400 return -1;
2401}
2402
Collin Winter5c9b02d2009-05-25 05:43:30 +00002403/* This is a variant of batch_dict() above that specializes for dicts, with no
2404 * support for dict subclasses. Like batch_dict(), we batch up chunks of
2405 * MARK key value ... key value SETITEMS
2406 * opcode sequences. Calling code should have arranged to first create an
2407 * empty dict, or dict-like object, for the SETITEMS to operate on.
2408 * Returns 0 on success, -1 on error.
2409 *
2410 * Note that this currently doesn't work for protocol 0.
2411 */
2412static int
2413batch_dict_exact(PicklerObject *self, PyObject *obj)
2414{
2415 PyObject *key = NULL, *value = NULL;
2416 int i;
2417 Py_ssize_t dict_size, ppos = 0;
2418
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00002419 const char mark_op = MARK;
2420 const char setitem_op = SETITEM;
2421 const char setitems_op = SETITEMS;
Collin Winter5c9b02d2009-05-25 05:43:30 +00002422
2423 assert(obj != NULL);
2424 assert(self->proto > 0);
2425
2426 dict_size = PyDict_Size(obj);
2427
2428 /* Special-case len(d) == 1 to save space. */
2429 if (dict_size == 1) {
2430 PyDict_Next(obj, &ppos, &key, &value);
2431 if (save(self, key, 0) < 0)
2432 return -1;
2433 if (save(self, value, 0) < 0)
2434 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002435 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002436 return -1;
2437 return 0;
2438 }
2439
2440 /* Write in batches of BATCHSIZE. */
2441 do {
2442 i = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002443 if (_Pickler_Write(self, &mark_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002444 return -1;
2445 while (PyDict_Next(obj, &ppos, &key, &value)) {
2446 if (save(self, key, 0) < 0)
2447 return -1;
2448 if (save(self, value, 0) < 0)
2449 return -1;
2450 if (++i == BATCHSIZE)
2451 break;
2452 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002453 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002454 return -1;
2455 if (PyDict_Size(obj) != dict_size) {
2456 PyErr_Format(
2457 PyExc_RuntimeError,
2458 "dictionary changed size during iteration");
2459 return -1;
2460 }
2461
2462 } while (i == BATCHSIZE);
2463 return 0;
2464}
2465
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002466static int
2467save_dict(PicklerObject *self, PyObject *obj)
2468{
2469 PyObject *items, *iter;
2470 char header[3];
2471 int len;
2472 int status = 0;
2473
2474 if (self->fast && !fast_save_enter(self, obj))
2475 goto error;
2476
2477 /* Create an empty dict. */
2478 if (self->bin) {
2479 header[0] = EMPTY_DICT;
2480 len = 1;
2481 }
2482 else {
2483 header[0] = MARK;
2484 header[1] = DICT;
2485 len = 2;
2486 }
2487
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002488 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002489 goto error;
2490
2491 /* Get dict size, and bow out early if empty. */
2492 if ((len = PyDict_Size(obj)) < 0)
2493 goto error;
2494
2495 if (memo_put(self, obj) < 0)
2496 goto error;
2497
2498 if (len != 0) {
2499 /* Save the dict items. */
Collin Winter5c9b02d2009-05-25 05:43:30 +00002500 if (PyDict_CheckExact(obj) && self->proto > 0) {
2501 /* We can take certain shortcuts if we know this is a dict and
2502 not a dict subclass. */
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002503 if (Py_EnterRecursiveCall(" while pickling an object"))
2504 goto error;
2505 status = batch_dict_exact(self, obj);
2506 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002507 } else {
2508 items = PyObject_CallMethod(obj, "items", "()");
2509 if (items == NULL)
2510 goto error;
2511 iter = PyObject_GetIter(items);
2512 Py_DECREF(items);
2513 if (iter == NULL)
2514 goto error;
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002515 if (Py_EnterRecursiveCall(" while pickling an object")) {
2516 Py_DECREF(iter);
2517 goto error;
2518 }
Collin Winter5c9b02d2009-05-25 05:43:30 +00002519 status = batch_dict(self, iter);
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002520 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002521 Py_DECREF(iter);
2522 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002523 }
2524
2525 if (0) {
2526 error:
2527 status = -1;
2528 }
2529
2530 if (self->fast && !fast_save_leave(self, obj))
2531 status = -1;
2532
2533 return status;
2534}
2535
2536static int
2537save_global(PicklerObject *self, PyObject *obj, PyObject *name)
2538{
2539 static PyObject *name_str = NULL;
2540 PyObject *global_name = NULL;
2541 PyObject *module_name = NULL;
2542 PyObject *module = NULL;
2543 PyObject *cls;
2544 int status = 0;
2545
2546 const char global_op = GLOBAL;
2547
2548 if (name_str == NULL) {
2549 name_str = PyUnicode_InternFromString("__name__");
2550 if (name_str == NULL)
2551 goto error;
2552 }
2553
2554 if (name) {
2555 global_name = name;
2556 Py_INCREF(global_name);
2557 }
2558 else {
2559 global_name = PyObject_GetAttr(obj, name_str);
2560 if (global_name == NULL)
2561 goto error;
2562 }
2563
2564 module_name = whichmodule(obj, global_name);
2565 if (module_name == NULL)
2566 goto error;
2567
2568 /* XXX: Change to use the import C API directly with level=0 to disallow
2569 relative imports.
2570
2571 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
2572 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
2573 custom import functions (IMHO, this would be a nice security
2574 feature). The import C API would need to be extended to support the
2575 extra parameters of __import__ to fix that. */
2576 module = PyImport_Import(module_name);
2577 if (module == NULL) {
2578 PyErr_Format(PicklingError,
2579 "Can't pickle %R: import of module %R failed",
2580 obj, module_name);
2581 goto error;
2582 }
2583 cls = PyObject_GetAttr(module, global_name);
2584 if (cls == NULL) {
2585 PyErr_Format(PicklingError,
2586 "Can't pickle %R: attribute lookup %S.%S failed",
2587 obj, module_name, global_name);
2588 goto error;
2589 }
2590 if (cls != obj) {
2591 Py_DECREF(cls);
2592 PyErr_Format(PicklingError,
2593 "Can't pickle %R: it's not the same object as %S.%S",
2594 obj, module_name, global_name);
2595 goto error;
2596 }
2597 Py_DECREF(cls);
2598
2599 if (self->proto >= 2) {
2600 /* See whether this is in the extension registry, and if
2601 * so generate an EXT opcode.
2602 */
2603 PyObject *code_obj; /* extension code as Python object */
2604 long code; /* extension code as C value */
2605 char pdata[5];
2606 int n;
2607
2608 PyTuple_SET_ITEM(two_tuple, 0, module_name);
2609 PyTuple_SET_ITEM(two_tuple, 1, global_name);
2610 code_obj = PyDict_GetItem(extension_registry, two_tuple);
2611 /* The object is not registered in the extension registry.
2612 This is the most likely code path. */
2613 if (code_obj == NULL)
2614 goto gen_global;
2615
2616 /* XXX: pickle.py doesn't check neither the type, nor the range
2617 of the value returned by the extension_registry. It should for
2618 consistency. */
2619
2620 /* Verify code_obj has the right type and value. */
2621 if (!PyLong_Check(code_obj)) {
2622 PyErr_Format(PicklingError,
2623 "Can't pickle %R: extension code %R isn't an integer",
2624 obj, code_obj);
2625 goto error;
2626 }
2627 code = PyLong_AS_LONG(code_obj);
2628 if (code <= 0 || code > 0x7fffffffL) {
2629 PyErr_Format(PicklingError,
2630 "Can't pickle %R: extension code %ld is out of range",
2631 obj, code);
2632 goto error;
2633 }
2634
2635 /* Generate an EXT opcode. */
2636 if (code <= 0xff) {
2637 pdata[0] = EXT1;
2638 pdata[1] = (unsigned char)code;
2639 n = 2;
2640 }
2641 else if (code <= 0xffff) {
2642 pdata[0] = EXT2;
2643 pdata[1] = (unsigned char)(code & 0xff);
2644 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2645 n = 3;
2646 }
2647 else {
2648 pdata[0] = EXT4;
2649 pdata[1] = (unsigned char)(code & 0xff);
2650 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2651 pdata[3] = (unsigned char)((code >> 16) & 0xff);
2652 pdata[4] = (unsigned char)((code >> 24) & 0xff);
2653 n = 5;
2654 }
2655
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002656 if (_Pickler_Write(self, pdata, n) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002657 goto error;
2658 }
2659 else {
2660 /* Generate a normal global opcode if we are using a pickle
2661 protocol <= 2, or if the object is not registered in the
2662 extension registry. */
2663 PyObject *encoded;
2664 PyObject *(*unicode_encoder)(PyObject *);
2665
2666 gen_global:
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002667 if (_Pickler_Write(self, &global_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002668 goto error;
2669
2670 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
2671 the module name and the global name using UTF-8. We do so only when
2672 we are using the pickle protocol newer than version 3. This is to
2673 ensure compatibility with older Unpickler running on Python 2.x. */
2674 if (self->proto >= 3) {
2675 unicode_encoder = PyUnicode_AsUTF8String;
2676 }
2677 else {
2678 unicode_encoder = PyUnicode_AsASCIIString;
2679 }
2680
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002681 /* For protocol < 3 and if the user didn't request against doing so,
2682 we convert module names to the old 2.x module names. */
2683 if (self->fix_imports) {
2684 PyObject *key;
2685 PyObject *item;
2686
2687 key = PyTuple_Pack(2, module_name, global_name);
2688 if (key == NULL)
2689 goto error;
2690 item = PyDict_GetItemWithError(name_mapping_3to2, key);
2691 Py_DECREF(key);
2692 if (item) {
2693 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
2694 PyErr_Format(PyExc_RuntimeError,
2695 "_compat_pickle.REVERSE_NAME_MAPPING values "
2696 "should be 2-tuples, not %.200s",
2697 Py_TYPE(item)->tp_name);
2698 goto error;
2699 }
2700 Py_CLEAR(module_name);
2701 Py_CLEAR(global_name);
2702 module_name = PyTuple_GET_ITEM(item, 0);
2703 global_name = PyTuple_GET_ITEM(item, 1);
2704 if (!PyUnicode_Check(module_name) ||
2705 !PyUnicode_Check(global_name)) {
2706 PyErr_Format(PyExc_RuntimeError,
2707 "_compat_pickle.REVERSE_NAME_MAPPING values "
2708 "should be pairs of str, not (%.200s, %.200s)",
2709 Py_TYPE(module_name)->tp_name,
2710 Py_TYPE(global_name)->tp_name);
2711 goto error;
2712 }
2713 Py_INCREF(module_name);
2714 Py_INCREF(global_name);
2715 }
2716 else if (PyErr_Occurred()) {
2717 goto error;
2718 }
2719
2720 item = PyDict_GetItemWithError(import_mapping_3to2, module_name);
2721 if (item) {
2722 if (!PyUnicode_Check(item)) {
2723 PyErr_Format(PyExc_RuntimeError,
2724 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
2725 "should be strings, not %.200s",
2726 Py_TYPE(item)->tp_name);
2727 goto error;
2728 }
2729 Py_CLEAR(module_name);
2730 module_name = item;
2731 Py_INCREF(module_name);
2732 }
2733 else if (PyErr_Occurred()) {
2734 goto error;
2735 }
2736 }
2737
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002738 /* Save the name of the module. */
2739 encoded = unicode_encoder(module_name);
2740 if (encoded == NULL) {
2741 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2742 PyErr_Format(PicklingError,
2743 "can't pickle module identifier '%S' using "
2744 "pickle protocol %i", module_name, self->proto);
2745 goto error;
2746 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002747 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002748 PyBytes_GET_SIZE(encoded)) < 0) {
2749 Py_DECREF(encoded);
2750 goto error;
2751 }
2752 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002753 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002754 goto error;
2755
2756 /* Save the name of the module. */
2757 encoded = unicode_encoder(global_name);
2758 if (encoded == NULL) {
2759 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2760 PyErr_Format(PicklingError,
2761 "can't pickle global identifier '%S' using "
2762 "pickle protocol %i", global_name, self->proto);
2763 goto error;
2764 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002765 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002766 PyBytes_GET_SIZE(encoded)) < 0) {
2767 Py_DECREF(encoded);
2768 goto error;
2769 }
2770 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002771 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002772 goto error;
2773
2774 /* Memoize the object. */
2775 if (memo_put(self, obj) < 0)
2776 goto error;
2777 }
2778
2779 if (0) {
2780 error:
2781 status = -1;
2782 }
2783 Py_XDECREF(module_name);
2784 Py_XDECREF(global_name);
2785 Py_XDECREF(module);
2786
2787 return status;
2788}
2789
2790static int
2791save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
2792{
2793 PyObject *pid = NULL;
2794 int status = 0;
2795
2796 const char persid_op = PERSID;
2797 const char binpersid_op = BINPERSID;
2798
2799 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002800 pid = _Pickler_FastCall(self, func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002801 if (pid == NULL)
2802 return -1;
2803
2804 if (pid != Py_None) {
2805 if (self->bin) {
2806 if (save(self, pid, 1) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002807 _Pickler_Write(self, &binpersid_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002808 goto error;
2809 }
2810 else {
2811 PyObject *pid_str = NULL;
2812 char *pid_ascii_bytes;
2813 Py_ssize_t size;
2814
2815 pid_str = PyObject_Str(pid);
2816 if (pid_str == NULL)
2817 goto error;
2818
2819 /* XXX: Should it check whether the persistent id only contains
2820 ASCII characters? And what if the pid contains embedded
2821 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00002822 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002823 Py_DECREF(pid_str);
2824 if (pid_ascii_bytes == NULL)
2825 goto error;
2826
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002827 if (_Pickler_Write(self, &persid_op, 1) < 0 ||
2828 _Pickler_Write(self, pid_ascii_bytes, size) < 0 ||
2829 _Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002830 goto error;
2831 }
2832 status = 1;
2833 }
2834
2835 if (0) {
2836 error:
2837 status = -1;
2838 }
2839 Py_XDECREF(pid);
2840
2841 return status;
2842}
2843
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002844static PyObject *
2845get_class(PyObject *obj)
2846{
2847 PyObject *cls;
2848 static PyObject *str_class;
2849
2850 if (str_class == NULL) {
2851 str_class = PyUnicode_InternFromString("__class__");
2852 if (str_class == NULL)
2853 return NULL;
2854 }
2855 cls = PyObject_GetAttr(obj, str_class);
2856 if (cls == NULL) {
2857 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2858 PyErr_Clear();
2859 cls = (PyObject *) Py_TYPE(obj);
2860 Py_INCREF(cls);
2861 }
2862 }
2863 return cls;
2864}
2865
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002866/* We're saving obj, and args is the 2-thru-5 tuple returned by the
2867 * appropriate __reduce__ method for obj.
2868 */
2869static int
2870save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
2871{
2872 PyObject *callable;
2873 PyObject *argtup;
2874 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002875 PyObject *listitems = Py_None;
2876 PyObject *dictitems = Py_None;
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002877 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002878
2879 int use_newobj = self->proto >= 2;
2880
2881 const char reduce_op = REDUCE;
2882 const char build_op = BUILD;
2883 const char newobj_op = NEWOBJ;
2884
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002885 size = PyTuple_Size(args);
2886 if (size < 2 || size > 5) {
2887 PyErr_SetString(PicklingError, "tuple returned by "
2888 "__reduce__ must contain 2 through 5 elements");
2889 return -1;
2890 }
2891
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002892 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
2893 &callable, &argtup, &state, &listitems, &dictitems))
2894 return -1;
2895
2896 if (!PyCallable_Check(callable)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002897 PyErr_SetString(PicklingError, "first item of the tuple "
2898 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002899 return -1;
2900 }
2901 if (!PyTuple_Check(argtup)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002902 PyErr_SetString(PicklingError, "second item of the tuple "
2903 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002904 return -1;
2905 }
2906
2907 if (state == Py_None)
2908 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002909
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002910 if (listitems == Py_None)
2911 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002912 else if (!PyIter_Check(listitems)) {
2913 PyErr_Format(PicklingError, "Fourth element of tuple"
2914 "returned by __reduce__ must be an iterator, not %s",
2915 Py_TYPE(listitems)->tp_name);
2916 return -1;
2917 }
2918
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002919 if (dictitems == Py_None)
2920 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002921 else if (!PyIter_Check(dictitems)) {
2922 PyErr_Format(PicklingError, "Fifth element of tuple"
2923 "returned by __reduce__ must be an iterator, not %s",
2924 Py_TYPE(dictitems)->tp_name);
2925 return -1;
2926 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002927
2928 /* Protocol 2 special case: if callable's name is __newobj__, use
2929 NEWOBJ. */
2930 if (use_newobj) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002931 static PyObject *newobj_str = NULL, *name_str = NULL;
2932 PyObject *name;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002933
2934 if (newobj_str == NULL) {
2935 newobj_str = PyUnicode_InternFromString("__newobj__");
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002936 name_str = PyUnicode_InternFromString("__name__");
2937 if (newobj_str == NULL || name_str == NULL)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002938 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002939 }
2940
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002941 name = PyObject_GetAttr(callable, name_str);
2942 if (name == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002943 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2944 PyErr_Clear();
2945 else
2946 return -1;
2947 use_newobj = 0;
2948 }
2949 else {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002950 use_newobj = PyUnicode_Check(name) &&
2951 PyUnicode_Compare(name, newobj_str) == 0;
2952 Py_DECREF(name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002953 }
2954 }
2955 if (use_newobj) {
2956 PyObject *cls;
2957 PyObject *newargtup;
2958 PyObject *obj_class;
2959 int p;
2960
2961 /* Sanity checks. */
2962 if (Py_SIZE(argtup) < 1) {
2963 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
2964 return -1;
2965 }
2966
2967 cls = PyTuple_GET_ITEM(argtup, 0);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002968 if (!PyType_Check(cls)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002969 PyErr_SetString(PicklingError, "args[0] from "
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002970 "__newobj__ args is not a type");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002971 return -1;
2972 }
2973
2974 if (obj != NULL) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002975 obj_class = get_class(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002976 p = obj_class != cls; /* true iff a problem */
2977 Py_DECREF(obj_class);
2978 if (p) {
2979 PyErr_SetString(PicklingError, "args[0] from "
2980 "__newobj__ args has the wrong class");
2981 return -1;
2982 }
2983 }
2984 /* XXX: These calls save() are prone to infinite recursion. Imagine
2985 what happen if the value returned by the __reduce__() method of
2986 some extension type contains another object of the same type. Ouch!
2987
2988 Here is a quick example, that I ran into, to illustrate what I
2989 mean:
2990
2991 >>> import pickle, copyreg
2992 >>> copyreg.dispatch_table.pop(complex)
2993 >>> pickle.dumps(1+2j)
2994 Traceback (most recent call last):
2995 ...
2996 RuntimeError: maximum recursion depth exceeded
2997
2998 Removing the complex class from copyreg.dispatch_table made the
2999 __reduce_ex__() method emit another complex object:
3000
3001 >>> (1+1j).__reduce_ex__(2)
3002 (<function __newobj__ at 0xb7b71c3c>,
3003 (<class 'complex'>, (1+1j)), None, None, None)
3004
3005 Thus when save() was called on newargstup (the 2nd item) recursion
3006 ensued. Of course, the bug was in the complex class which had a
3007 broken __getnewargs__() that emitted another complex object. But,
3008 the point, here, is it is quite easy to end up with a broken reduce
3009 function. */
3010
3011 /* Save the class and its __new__ arguments. */
3012 if (save(self, cls, 0) < 0)
3013 return -1;
3014
3015 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
3016 if (newargtup == NULL)
3017 return -1;
3018
3019 p = save(self, newargtup, 0);
3020 Py_DECREF(newargtup);
3021 if (p < 0)
3022 return -1;
3023
3024 /* Add NEWOBJ opcode. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003025 if (_Pickler_Write(self, &newobj_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003026 return -1;
3027 }
3028 else { /* Not using NEWOBJ. */
3029 if (save(self, callable, 0) < 0 ||
3030 save(self, argtup, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003031 _Pickler_Write(self, &reduce_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003032 return -1;
3033 }
3034
3035 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
3036 the caller do not want to memoize the object. Not particularly useful,
3037 but that is to mimic the behavior save_reduce() in pickle.py when
3038 obj is None. */
3039 if (obj && memo_put(self, obj) < 0)
3040 return -1;
3041
3042 if (listitems && batch_list(self, listitems) < 0)
3043 return -1;
3044
3045 if (dictitems && batch_dict(self, dictitems) < 0)
3046 return -1;
3047
3048 if (state) {
3049 if (save(self, state, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003050 _Pickler_Write(self, &build_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003051 return -1;
3052 }
3053
3054 return 0;
3055}
3056
3057static int
3058save(PicklerObject *self, PyObject *obj, int pers_save)
3059{
3060 PyTypeObject *type;
3061 PyObject *reduce_func = NULL;
3062 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003063 int status = 0;
3064
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003065 if (Py_EnterRecursiveCall(" while pickling an object"))
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003066 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003067
3068 /* The extra pers_save argument is necessary to avoid calling save_pers()
3069 on its returned object. */
3070 if (!pers_save && self->pers_func) {
3071 /* save_pers() returns:
3072 -1 to signal an error;
3073 0 if it did nothing successfully;
3074 1 if a persistent id was saved.
3075 */
3076 if ((status = save_pers(self, obj, self->pers_func)) != 0)
3077 goto done;
3078 }
3079
3080 type = Py_TYPE(obj);
3081
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003082 /* The old cPickle had an optimization that used switch-case statement
3083 dispatching on the first letter of the type name. This has was removed
3084 since benchmarks shown that this optimization was actually slowing
3085 things down. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003086
3087 /* Atom types; these aren't memoized, so don't check the memo. */
3088
3089 if (obj == Py_None) {
3090 status = save_none(self, obj);
3091 goto done;
3092 }
3093 else if (obj == Py_False || obj == Py_True) {
3094 status = save_bool(self, obj);
3095 goto done;
3096 }
3097 else if (type == &PyLong_Type) {
3098 status = save_long(self, obj);
3099 goto done;
3100 }
3101 else if (type == &PyFloat_Type) {
3102 status = save_float(self, obj);
3103 goto done;
3104 }
3105
3106 /* Check the memo to see if it has the object. If so, generate
3107 a GET (or BINGET) opcode, instead of pickling the object
3108 once again. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003109 if (PyMemoTable_Get(self->memo, obj)) {
3110 if (memo_get(self, obj) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003111 goto error;
3112 goto done;
3113 }
3114
3115 if (type == &PyBytes_Type) {
3116 status = save_bytes(self, obj);
3117 goto done;
3118 }
3119 else if (type == &PyUnicode_Type) {
3120 status = save_unicode(self, obj);
3121 goto done;
3122 }
3123 else if (type == &PyDict_Type) {
3124 status = save_dict(self, obj);
3125 goto done;
3126 }
3127 else if (type == &PyList_Type) {
3128 status = save_list(self, obj);
3129 goto done;
3130 }
3131 else if (type == &PyTuple_Type) {
3132 status = save_tuple(self, obj);
3133 goto done;
3134 }
3135 else if (type == &PyType_Type) {
3136 status = save_global(self, obj, NULL);
3137 goto done;
3138 }
3139 else if (type == &PyFunction_Type) {
3140 status = save_global(self, obj, NULL);
3141 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
3142 /* fall back to reduce */
3143 PyErr_Clear();
3144 }
3145 else {
3146 goto done;
3147 }
3148 }
3149 else if (type == &PyCFunction_Type) {
3150 status = save_global(self, obj, NULL);
3151 goto done;
3152 }
3153 else if (PyType_IsSubtype(type, &PyType_Type)) {
3154 status = save_global(self, obj, NULL);
3155 goto done;
3156 }
3157
3158 /* XXX: This part needs some unit tests. */
3159
3160 /* Get a reduction callable, and call it. This may come from
3161 * copyreg.dispatch_table, the object's __reduce_ex__ method,
3162 * or the object's __reduce__ method.
3163 */
3164 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
3165 if (reduce_func != NULL) {
3166 /* Here, the reference count of the reduce_func object returned by
3167 PyDict_GetItem needs to be increased to be consistent with the one
3168 returned by PyObject_GetAttr. This is allow us to blindly DECREF
3169 reduce_func at the end of the save() routine.
3170 */
3171 Py_INCREF(reduce_func);
3172 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003173 reduce_value = _Pickler_FastCall(self, reduce_func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003174 }
3175 else {
3176 static PyObject *reduce_str = NULL;
3177 static PyObject *reduce_ex_str = NULL;
3178
3179 /* Cache the name of the reduce methods. */
3180 if (reduce_str == NULL) {
3181 reduce_str = PyUnicode_InternFromString("__reduce__");
3182 if (reduce_str == NULL)
3183 goto error;
3184 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
3185 if (reduce_ex_str == NULL)
3186 goto error;
3187 }
3188
3189 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
3190 automatically defined as __reduce__. While this is convenient, this
3191 make it impossible to know which method was actually called. Of
3192 course, this is not a big deal. But still, it would be nice to let
3193 the user know which method was called when something go
3194 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
3195 don't actually have to check for a __reduce__ method. */
3196
3197 /* Check for a __reduce_ex__ method. */
3198 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
3199 if (reduce_func != NULL) {
3200 PyObject *proto;
3201 proto = PyLong_FromLong(self->proto);
3202 if (proto != NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003203 reduce_value = _Pickler_FastCall(self, reduce_func, proto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003204 }
3205 }
3206 else {
3207 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3208 PyErr_Clear();
3209 else
3210 goto error;
3211 /* Check for a __reduce__ method. */
3212 reduce_func = PyObject_GetAttr(obj, reduce_str);
3213 if (reduce_func != NULL) {
3214 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
3215 }
3216 else {
3217 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
3218 type->tp_name, obj);
3219 goto error;
3220 }
3221 }
3222 }
3223
3224 if (reduce_value == NULL)
3225 goto error;
3226
3227 if (PyUnicode_Check(reduce_value)) {
3228 status = save_global(self, obj, reduce_value);
3229 goto done;
3230 }
3231
3232 if (!PyTuple_Check(reduce_value)) {
3233 PyErr_SetString(PicklingError,
3234 "__reduce__ must return a string or tuple");
3235 goto error;
3236 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003237
3238 status = save_reduce(self, reduce_value, obj);
3239
3240 if (0) {
3241 error:
3242 status = -1;
3243 }
3244 done:
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003245 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003246 Py_XDECREF(reduce_func);
3247 Py_XDECREF(reduce_value);
3248
3249 return status;
3250}
3251
3252static int
3253dump(PicklerObject *self, PyObject *obj)
3254{
3255 const char stop_op = STOP;
3256
3257 if (self->proto >= 2) {
3258 char header[2];
3259
3260 header[0] = PROTO;
3261 assert(self->proto >= 0 && self->proto < 256);
3262 header[1] = (unsigned char)self->proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003263 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003264 return -1;
3265 }
3266
3267 if (save(self, obj, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003268 _Pickler_Write(self, &stop_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003269 return -1;
3270
3271 return 0;
3272}
3273
3274PyDoc_STRVAR(Pickler_clear_memo_doc,
3275"clear_memo() -> None. Clears the pickler's \"memo\"."
3276"\n"
3277"The memo is the data structure that remembers which objects the\n"
3278"pickler has already seen, so that shared or recursive objects are\n"
3279"pickled by reference and not by value. This method is useful when\n"
3280"re-using picklers.");
3281
3282static PyObject *
3283Pickler_clear_memo(PicklerObject *self)
3284{
3285 if (self->memo)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003286 PyMemoTable_Clear(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003287
3288 Py_RETURN_NONE;
3289}
3290
3291PyDoc_STRVAR(Pickler_dump_doc,
3292"dump(obj) -> None. Write a pickled representation of obj to the open file.");
3293
3294static PyObject *
3295Pickler_dump(PicklerObject *self, PyObject *args)
3296{
3297 PyObject *obj;
3298
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00003299 /* Check whether the Pickler was initialized correctly (issue3664).
3300 Developers often forget to call __init__() in their subclasses, which
3301 would trigger a segfault without this check. */
3302 if (self->write == NULL) {
3303 PyErr_Format(PicklingError,
3304 "Pickler.__init__() was not called by %s.__init__()",
3305 Py_TYPE(self)->tp_name);
3306 return NULL;
3307 }
3308
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003309 if (!PyArg_ParseTuple(args, "O:dump", &obj))
3310 return NULL;
3311
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003312 if (_Pickler_ClearBuffer(self) < 0)
3313 return NULL;
3314
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003315 if (dump(self, obj) < 0)
3316 return NULL;
3317
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003318 if (_Pickler_FlushToFile(self) < 0)
3319 return NULL;
3320
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003321 Py_RETURN_NONE;
3322}
3323
3324static struct PyMethodDef Pickler_methods[] = {
3325 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
3326 Pickler_dump_doc},
3327 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
3328 Pickler_clear_memo_doc},
3329 {NULL, NULL} /* sentinel */
3330};
3331
3332static void
3333Pickler_dealloc(PicklerObject *self)
3334{
3335 PyObject_GC_UnTrack(self);
3336
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003337 Py_XDECREF(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003338 Py_XDECREF(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003339 Py_XDECREF(self->pers_func);
3340 Py_XDECREF(self->arg);
3341 Py_XDECREF(self->fast_memo);
3342
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003343 PyMemoTable_Del(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003344
3345 Py_TYPE(self)->tp_free((PyObject *)self);
3346}
3347
3348static int
3349Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
3350{
3351 Py_VISIT(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003352 Py_VISIT(self->pers_func);
3353 Py_VISIT(self->arg);
3354 Py_VISIT(self->fast_memo);
3355 return 0;
3356}
3357
3358static int
3359Pickler_clear(PicklerObject *self)
3360{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003361 Py_CLEAR(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003362 Py_CLEAR(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003363 Py_CLEAR(self->pers_func);
3364 Py_CLEAR(self->arg);
3365 Py_CLEAR(self->fast_memo);
3366
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003367 if (self->memo != NULL) {
3368 PyMemoTable *memo = self->memo;
3369 self->memo = NULL;
3370 PyMemoTable_Del(memo);
3371 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003372 return 0;
3373}
3374
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003375
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003376PyDoc_STRVAR(Pickler_doc,
3377"Pickler(file, protocol=None)"
3378"\n"
3379"This takes a binary file for writing a pickle data stream.\n"
3380"\n"
3381"The optional protocol argument tells the pickler to use the\n"
3382"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
3383"protocol is 3; a backward-incompatible protocol designed for\n"
3384"Python 3.0.\n"
3385"\n"
3386"Specifying a negative protocol version selects the highest\n"
3387"protocol version supported. The higher the protocol used, the\n"
3388"more recent the version of Python needed to read the pickle\n"
3389"produced.\n"
3390"\n"
3391"The file argument must have a write() method that accepts a single\n"
3392"bytes argument. It can thus be a file object opened for binary\n"
3393"writing, a io.BytesIO instance, or any other custom object that\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003394"meets this interface.\n"
3395"\n"
3396"If fix_imports is True and protocol is less than 3, pickle will try to\n"
3397"map the new Python 3.x names to the old module names used in Python\n"
3398"2.x, so that the pickle data stream is readable with Python 2.x.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003399
3400static int
3401Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
3402{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003403 static char *kwlist[] = {"file", "protocol", "fix_imports", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003404 PyObject *file;
3405 PyObject *proto_obj = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003406 PyObject *fix_imports = Py_True;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003407
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003408 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:Pickler",
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003409 kwlist, &file, &proto_obj, &fix_imports))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003410 return -1;
3411
3412 /* In case of multiple __init__() calls, clear previous content. */
3413 if (self->write != NULL)
3414 (void)Pickler_clear(self);
3415
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003416 if (_Pickler_SetProtocol(self, proto_obj, fix_imports) < 0)
3417 return -1;
3418
3419 if (_Pickler_SetOutputStream(self, file) < 0)
3420 return -1;
3421
3422 /* memo and output_buffer may have already been created in _Pickler_New */
3423 if (self->memo == NULL) {
3424 self->memo = PyMemoTable_New();
3425 if (self->memo == NULL)
3426 return -1;
3427 }
3428 self->output_len = 0;
3429 if (self->output_buffer == NULL) {
3430 self->max_output_len = WRITE_BUF_SIZE;
3431 self->output_buffer = PyBytes_FromStringAndSize(NULL,
3432 self->max_output_len);
3433 if (self->output_buffer == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003434 return -1;
3435 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003436
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003437 self->arg = NULL;
3438 self->fast = 0;
3439 self->fast_nesting = 0;
3440 self->fast_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003441 self->pers_func = NULL;
3442 if (PyObject_HasAttrString((PyObject *)self, "persistent_id")) {
3443 self->pers_func = PyObject_GetAttrString((PyObject *)self,
3444 "persistent_id");
3445 if (self->pers_func == NULL)
3446 return -1;
3447 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003448 return 0;
3449}
3450
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003451/* Define a proxy object for the Pickler's internal memo object. This is to
3452 * avoid breaking code like:
3453 * pickler.memo.clear()
3454 * and
3455 * pickler.memo = saved_memo
3456 * Is this a good idea? Not really, but we don't want to break code that uses
3457 * it. Note that we don't implement the entire mapping API here. This is
3458 * intentional, as these should be treated as black-box implementation details.
3459 */
3460
3461typedef struct {
3462 PyObject_HEAD
3463 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
3464} PicklerMemoProxyObject;
3465
3466PyDoc_STRVAR(pmp_clear_doc,
3467"memo.clear() -> None. Remove all items from memo.");
3468
3469static PyObject *
3470pmp_clear(PicklerMemoProxyObject *self)
3471{
3472 if (self->pickler->memo)
3473 PyMemoTable_Clear(self->pickler->memo);
3474 Py_RETURN_NONE;
3475}
3476
3477PyDoc_STRVAR(pmp_copy_doc,
3478"memo.copy() -> new_memo. Copy the memo to a new object.");
3479
3480static PyObject *
3481pmp_copy(PicklerMemoProxyObject *self)
3482{
3483 Py_ssize_t i;
3484 PyMemoTable *memo;
3485 PyObject *new_memo = PyDict_New();
3486 if (new_memo == NULL)
3487 return NULL;
3488
3489 memo = self->pickler->memo;
3490 for (i = 0; i < memo->mt_allocated; ++i) {
3491 PyMemoEntry entry = memo->mt_table[i];
3492 if (entry.me_key != NULL) {
3493 int status;
3494 PyObject *key, *value;
3495
3496 key = PyLong_FromVoidPtr(entry.me_key);
3497 value = Py_BuildValue("lO", entry.me_value, entry.me_key);
3498
3499 if (key == NULL || value == NULL) {
3500 Py_XDECREF(key);
3501 Py_XDECREF(value);
3502 goto error;
3503 }
3504 status = PyDict_SetItem(new_memo, key, value);
3505 Py_DECREF(key);
3506 Py_DECREF(value);
3507 if (status < 0)
3508 goto error;
3509 }
3510 }
3511 return new_memo;
3512
3513 error:
3514 Py_XDECREF(new_memo);
3515 return NULL;
3516}
3517
3518PyDoc_STRVAR(pmp_reduce_doc,
3519"memo.__reduce__(). Pickling support.");
3520
3521static PyObject *
3522pmp_reduce(PicklerMemoProxyObject *self, PyObject *args)
3523{
3524 PyObject *reduce_value, *dict_args;
3525 PyObject *contents = pmp_copy(self);
3526 if (contents == NULL)
3527 return NULL;
3528
3529 reduce_value = PyTuple_New(2);
3530 if (reduce_value == NULL) {
3531 Py_DECREF(contents);
3532 return NULL;
3533 }
3534 dict_args = PyTuple_New(1);
3535 if (dict_args == NULL) {
3536 Py_DECREF(contents);
3537 Py_DECREF(reduce_value);
3538 return NULL;
3539 }
3540 PyTuple_SET_ITEM(dict_args, 0, contents);
3541 Py_INCREF((PyObject *)&PyDict_Type);
3542 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
3543 PyTuple_SET_ITEM(reduce_value, 1, dict_args);
3544 return reduce_value;
3545}
3546
3547static PyMethodDef picklerproxy_methods[] = {
3548 {"clear", (PyCFunction)pmp_clear, METH_NOARGS, pmp_clear_doc},
3549 {"copy", (PyCFunction)pmp_copy, METH_NOARGS, pmp_copy_doc},
3550 {"__reduce__", (PyCFunction)pmp_reduce, METH_VARARGS, pmp_reduce_doc},
3551 {NULL, NULL} /* sentinel */
3552};
3553
3554static void
3555PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
3556{
3557 PyObject_GC_UnTrack(self);
3558 Py_XDECREF(self->pickler);
3559 PyObject_GC_Del((PyObject *)self);
3560}
3561
3562static int
3563PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
3564 visitproc visit, void *arg)
3565{
3566 Py_VISIT(self->pickler);
3567 return 0;
3568}
3569
3570static int
3571PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
3572{
3573 Py_CLEAR(self->pickler);
3574 return 0;
3575}
3576
3577static PyTypeObject PicklerMemoProxyType = {
3578 PyVarObject_HEAD_INIT(NULL, 0)
3579 "_pickle.PicklerMemoProxy", /*tp_name*/
3580 sizeof(PicklerMemoProxyObject), /*tp_basicsize*/
3581 0,
3582 (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */
3583 0, /* tp_print */
3584 0, /* tp_getattr */
3585 0, /* tp_setattr */
3586 0, /* tp_compare */
3587 0, /* tp_repr */
3588 0, /* tp_as_number */
3589 0, /* tp_as_sequence */
3590 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00003591 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003592 0, /* tp_call */
3593 0, /* tp_str */
3594 PyObject_GenericGetAttr, /* tp_getattro */
3595 PyObject_GenericSetAttr, /* tp_setattro */
3596 0, /* tp_as_buffer */
3597 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3598 0, /* tp_doc */
3599 (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */
3600 (inquiry)PicklerMemoProxy_clear, /* tp_clear */
3601 0, /* tp_richcompare */
3602 0, /* tp_weaklistoffset */
3603 0, /* tp_iter */
3604 0, /* tp_iternext */
3605 picklerproxy_methods, /* tp_methods */
3606};
3607
3608static PyObject *
3609PicklerMemoProxy_New(PicklerObject *pickler)
3610{
3611 PicklerMemoProxyObject *self;
3612
3613 self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
3614 if (self == NULL)
3615 return NULL;
3616 Py_INCREF(pickler);
3617 self->pickler = pickler;
3618 PyObject_GC_Track(self);
3619 return (PyObject *)self;
3620}
3621
3622/*****************************************************************************/
3623
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003624static PyObject *
3625Pickler_get_memo(PicklerObject *self)
3626{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003627 return PicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003628}
3629
3630static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003631Pickler_set_memo(PicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003632{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003633 PyMemoTable *new_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003634
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003635 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003636 PyErr_SetString(PyExc_TypeError,
3637 "attribute deletion is not supported");
3638 return -1;
3639 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003640
3641 if (Py_TYPE(obj) == &PicklerMemoProxyType) {
3642 PicklerObject *pickler =
3643 ((PicklerMemoProxyObject *)obj)->pickler;
3644
3645 new_memo = PyMemoTable_Copy(pickler->memo);
3646 if (new_memo == NULL)
3647 return -1;
3648 }
3649 else if (PyDict_Check(obj)) {
3650 Py_ssize_t i = 0;
3651 PyObject *key, *value;
3652
3653 new_memo = PyMemoTable_New();
3654 if (new_memo == NULL)
3655 return -1;
3656
3657 while (PyDict_Next(obj, &i, &key, &value)) {
3658 long memo_id;
3659 PyObject *memo_obj;
3660
3661 if (!PyTuple_Check(value) || Py_SIZE(value) != 2) {
3662 PyErr_SetString(PyExc_TypeError,
3663 "'memo' values must be 2-item tuples");
3664 goto error;
3665 }
3666 memo_id = PyLong_AsLong(PyTuple_GET_ITEM(value, 0));
3667 if (memo_id == -1 && PyErr_Occurred())
3668 goto error;
3669 memo_obj = PyTuple_GET_ITEM(value, 1);
3670 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
3671 goto error;
3672 }
3673 }
3674 else {
3675 PyErr_Format(PyExc_TypeError,
3676 "'memo' attribute must be an PicklerMemoProxy object"
3677 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003678 return -1;
3679 }
3680
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003681 PyMemoTable_Del(self->memo);
3682 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003683
3684 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003685
3686 error:
3687 if (new_memo)
3688 PyMemoTable_Del(new_memo);
3689 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003690}
3691
3692static PyObject *
3693Pickler_get_persid(PicklerObject *self)
3694{
3695 if (self->pers_func == NULL)
3696 PyErr_SetString(PyExc_AttributeError, "persistent_id");
3697 else
3698 Py_INCREF(self->pers_func);
3699 return self->pers_func;
3700}
3701
3702static int
3703Pickler_set_persid(PicklerObject *self, PyObject *value)
3704{
3705 PyObject *tmp;
3706
3707 if (value == NULL) {
3708 PyErr_SetString(PyExc_TypeError,
3709 "attribute deletion is not supported");
3710 return -1;
3711 }
3712 if (!PyCallable_Check(value)) {
3713 PyErr_SetString(PyExc_TypeError,
3714 "persistent_id must be a callable taking one argument");
3715 return -1;
3716 }
3717
3718 tmp = self->pers_func;
3719 Py_INCREF(value);
3720 self->pers_func = value;
3721 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
3722
3723 return 0;
3724}
3725
3726static PyMemberDef Pickler_members[] = {
3727 {"bin", T_INT, offsetof(PicklerObject, bin)},
3728 {"fast", T_INT, offsetof(PicklerObject, fast)},
3729 {NULL}
3730};
3731
3732static PyGetSetDef Pickler_getsets[] = {
3733 {"memo", (getter)Pickler_get_memo,
3734 (setter)Pickler_set_memo},
3735 {"persistent_id", (getter)Pickler_get_persid,
3736 (setter)Pickler_set_persid},
3737 {NULL}
3738};
3739
3740static PyTypeObject Pickler_Type = {
3741 PyVarObject_HEAD_INIT(NULL, 0)
3742 "_pickle.Pickler" , /*tp_name*/
3743 sizeof(PicklerObject), /*tp_basicsize*/
3744 0, /*tp_itemsize*/
3745 (destructor)Pickler_dealloc, /*tp_dealloc*/
3746 0, /*tp_print*/
3747 0, /*tp_getattr*/
3748 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00003749 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003750 0, /*tp_repr*/
3751 0, /*tp_as_number*/
3752 0, /*tp_as_sequence*/
3753 0, /*tp_as_mapping*/
3754 0, /*tp_hash*/
3755 0, /*tp_call*/
3756 0, /*tp_str*/
3757 0, /*tp_getattro*/
3758 0, /*tp_setattro*/
3759 0, /*tp_as_buffer*/
3760 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3761 Pickler_doc, /*tp_doc*/
3762 (traverseproc)Pickler_traverse, /*tp_traverse*/
3763 (inquiry)Pickler_clear, /*tp_clear*/
3764 0, /*tp_richcompare*/
3765 0, /*tp_weaklistoffset*/
3766 0, /*tp_iter*/
3767 0, /*tp_iternext*/
3768 Pickler_methods, /*tp_methods*/
3769 Pickler_members, /*tp_members*/
3770 Pickler_getsets, /*tp_getset*/
3771 0, /*tp_base*/
3772 0, /*tp_dict*/
3773 0, /*tp_descr_get*/
3774 0, /*tp_descr_set*/
3775 0, /*tp_dictoffset*/
3776 (initproc)Pickler_init, /*tp_init*/
3777 PyType_GenericAlloc, /*tp_alloc*/
3778 PyType_GenericNew, /*tp_new*/
3779 PyObject_GC_Del, /*tp_free*/
3780 0, /*tp_is_gc*/
3781};
3782
3783/* Temporary helper for calling self.find_class().
3784
3785 XXX: It would be nice to able to avoid Python function call overhead, by
3786 using directly the C version of find_class(), when find_class() is not
3787 overridden by a subclass. Although, this could become rather hackish. A
3788 simpler optimization would be to call the C function when self is not a
3789 subclass instance. */
3790static PyObject *
3791find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
3792{
3793 return PyObject_CallMethod((PyObject *)self, "find_class", "OO",
3794 module_name, global_name);
3795}
3796
3797static int
3798marker(UnpicklerObject *self)
3799{
3800 if (self->num_marks < 1) {
3801 PyErr_SetString(UnpicklingError, "could not find MARK");
3802 return -1;
3803 }
3804
3805 return self->marks[--self->num_marks];
3806}
3807
3808static int
3809load_none(UnpicklerObject *self)
3810{
3811 PDATA_APPEND(self->stack, Py_None, -1);
3812 return 0;
3813}
3814
3815static int
3816bad_readline(void)
3817{
3818 PyErr_SetString(UnpicklingError, "pickle data was truncated");
3819 return -1;
3820}
3821
3822static int
3823load_int(UnpicklerObject *self)
3824{
3825 PyObject *value;
3826 char *endptr, *s;
3827 Py_ssize_t len;
3828 long x;
3829
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003830 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003831 return -1;
3832 if (len < 2)
3833 return bad_readline();
3834
3835 errno = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003836 /* XXX: Should the base argument of strtol() be explicitly set to 10?
3837 XXX(avassalotti): Should this uses PyOS_strtol()? */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003838 x = strtol(s, &endptr, 0);
3839
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003840 if (errno || (*endptr != '\n' && *endptr != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003841 /* Hm, maybe we've got something long. Let's try reading
3842 * it as a Python long object. */
3843 errno = 0;
3844 /* XXX: Same thing about the base here. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003845 value = PyLong_FromString(s, NULL, 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003846 if (value == NULL) {
3847 PyErr_SetString(PyExc_ValueError,
3848 "could not convert string to int");
3849 return -1;
3850 }
3851 }
3852 else {
3853 if (len == 3 && (x == 0 || x == 1)) {
3854 if ((value = PyBool_FromLong(x)) == NULL)
3855 return -1;
3856 }
3857 else {
3858 if ((value = PyLong_FromLong(x)) == NULL)
3859 return -1;
3860 }
3861 }
3862
3863 PDATA_PUSH(self->stack, value, -1);
3864 return 0;
3865}
3866
3867static int
3868load_bool(UnpicklerObject *self, PyObject *boolean)
3869{
3870 assert(boolean == Py_True || boolean == Py_False);
3871 PDATA_APPEND(self->stack, boolean, -1);
3872 return 0;
3873}
3874
3875/* s contains x bytes of a little-endian integer. Return its value as a
3876 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
3877 * int, but when x is 4 it's a signed one. This is an historical source
3878 * of x-platform bugs.
3879 */
3880static long
3881calc_binint(char *bytes, int size)
3882{
3883 unsigned char *s = (unsigned char *)bytes;
3884 int i = size;
3885 long x = 0;
3886
3887 for (i = 0; i < size; i++) {
3888 x |= (long)s[i] << (i * 8);
3889 }
3890
3891 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
3892 * is signed, so on a box with longs bigger than 4 bytes we need
3893 * to extend a BININT's sign bit to the full width.
3894 */
3895 if (SIZEOF_LONG > 4 && size == 4) {
3896 x |= -(x & (1L << 31));
3897 }
3898
3899 return x;
3900}
3901
3902static int
3903load_binintx(UnpicklerObject *self, char *s, int size)
3904{
3905 PyObject *value;
3906 long x;
3907
3908 x = calc_binint(s, size);
3909
3910 if ((value = PyLong_FromLong(x)) == NULL)
3911 return -1;
3912
3913 PDATA_PUSH(self->stack, value, -1);
3914 return 0;
3915}
3916
3917static int
3918load_binint(UnpicklerObject *self)
3919{
3920 char *s;
3921
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003922 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003923 return -1;
3924
3925 return load_binintx(self, s, 4);
3926}
3927
3928static int
3929load_binint1(UnpicklerObject *self)
3930{
3931 char *s;
3932
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003933 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003934 return -1;
3935
3936 return load_binintx(self, s, 1);
3937}
3938
3939static int
3940load_binint2(UnpicklerObject *self)
3941{
3942 char *s;
3943
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003944 if (_Unpickler_Read(self, &s, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003945 return -1;
3946
3947 return load_binintx(self, s, 2);
3948}
3949
3950static int
3951load_long(UnpicklerObject *self)
3952{
3953 PyObject *value;
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00003954 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003955 Py_ssize_t len;
3956
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003957 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003958 return -1;
3959 if (len < 2)
3960 return bad_readline();
3961
Mark Dickinson8dd05142009-01-20 20:43:58 +00003962 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
3963 the 'L' before calling PyLong_FromString. In order to maintain
3964 compatibility with Python 3.0.0, we don't actually *require*
3965 the 'L' to be present. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003966 if (s[len-2] == 'L')
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00003967 s[len-2] = '\0';
Alexandre Vassalottie4bccb72009-01-24 01:47:57 +00003968 /* XXX: Should the base argument explicitly set to 10? */
3969 value = PyLong_FromString(s, NULL, 0);
Mark Dickinson8dd05142009-01-20 20:43:58 +00003970 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003971 return -1;
3972
3973 PDATA_PUSH(self->stack, value, -1);
3974 return 0;
3975}
3976
3977/* 'size' bytes contain the # of bytes of little-endian 256's-complement
3978 * data following.
3979 */
3980static int
3981load_counted_long(UnpicklerObject *self, int size)
3982{
3983 PyObject *value;
3984 char *nbytes;
3985 char *pdata;
3986
3987 assert(size == 1 || size == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003988 if (_Unpickler_Read(self, &nbytes, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003989 return -1;
3990
3991 size = calc_binint(nbytes, size);
3992 if (size < 0) {
3993 /* Corrupt or hostile pickle -- we never write one like this */
3994 PyErr_SetString(UnpicklingError,
3995 "LONG pickle has negative byte count");
3996 return -1;
3997 }
3998
3999 if (size == 0)
4000 value = PyLong_FromLong(0L);
4001 else {
4002 /* Read the raw little-endian bytes and convert. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004003 if (_Unpickler_Read(self, &pdata, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004004 return -1;
4005 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
4006 1 /* little endian */ , 1 /* signed */ );
4007 }
4008 if (value == NULL)
4009 return -1;
4010 PDATA_PUSH(self->stack, value, -1);
4011 return 0;
4012}
4013
4014static int
4015load_float(UnpicklerObject *self)
4016{
4017 PyObject *value;
4018 char *endptr, *s;
4019 Py_ssize_t len;
4020 double d;
4021
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004022 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004023 return -1;
4024 if (len < 2)
4025 return bad_readline();
4026
4027 errno = 0;
Mark Dickinson725bfd82009-05-03 20:33:40 +00004028 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
4029 if (d == -1.0 && PyErr_Occurred())
4030 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004031 if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004032 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
4033 return -1;
4034 }
Mark Dickinson725bfd82009-05-03 20:33:40 +00004035 value = PyFloat_FromDouble(d);
4036 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004037 return -1;
4038
4039 PDATA_PUSH(self->stack, value, -1);
4040 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004041}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004042
4043static int
4044load_binfloat(UnpicklerObject *self)
4045{
4046 PyObject *value;
4047 double x;
4048 char *s;
4049
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004050 if (_Unpickler_Read(self, &s, 8) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004051 return -1;
4052
4053 x = _PyFloat_Unpack8((unsigned char *)s, 0);
4054 if (x == -1.0 && PyErr_Occurred())
4055 return -1;
4056
4057 if ((value = PyFloat_FromDouble(x)) == NULL)
4058 return -1;
4059
4060 PDATA_PUSH(self->stack, value, -1);
4061 return 0;
4062}
4063
4064static int
4065load_string(UnpicklerObject *self)
4066{
4067 PyObject *bytes;
4068 PyObject *str = NULL;
4069 Py_ssize_t len;
4070 char *s, *p;
4071
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004072 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004073 return -1;
4074 if (len < 3)
4075 return bad_readline();
4076 if ((s = strdup(s)) == NULL) {
4077 PyErr_NoMemory();
4078 return -1;
4079 }
4080
4081 /* Strip outermost quotes */
4082 while (s[len - 1] <= ' ')
4083 len--;
4084 if (s[0] == '"' && s[len - 1] == '"') {
4085 s[len - 1] = '\0';
4086 p = s + 1;
4087 len -= 2;
4088 }
4089 else if (s[0] == '\'' && s[len - 1] == '\'') {
4090 s[len - 1] = '\0';
4091 p = s + 1;
4092 len -= 2;
4093 }
4094 else {
4095 free(s);
4096 PyErr_SetString(PyExc_ValueError, "insecure string pickle");
4097 return -1;
4098 }
4099
4100 /* Use the PyBytes API to decode the string, since that is what is used
4101 to encode, and then coerce the result to Unicode. */
4102 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
4103 free(s);
4104 if (bytes == NULL)
4105 return -1;
4106 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
4107 Py_DECREF(bytes);
4108 if (str == NULL)
4109 return -1;
4110
4111 PDATA_PUSH(self->stack, str, -1);
4112 return 0;
4113}
4114
4115static int
4116load_binbytes(UnpicklerObject *self)
4117{
4118 PyObject *bytes;
4119 long x;
4120 char *s;
4121
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004122 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004123 return -1;
4124
4125 x = calc_binint(s, 4);
4126 if (x < 0) {
4127 PyErr_SetString(UnpicklingError,
4128 "BINBYTES pickle has negative byte count");
4129 return -1;
4130 }
4131
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004132 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004133 return -1;
4134 bytes = PyBytes_FromStringAndSize(s, x);
4135 if (bytes == NULL)
4136 return -1;
4137
4138 PDATA_PUSH(self->stack, bytes, -1);
4139 return 0;
4140}
4141
4142static int
4143load_short_binbytes(UnpicklerObject *self)
4144{
4145 PyObject *bytes;
4146 unsigned char x;
4147 char *s;
4148
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004149 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004150 return -1;
4151
4152 x = (unsigned char)s[0];
4153
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004154 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004155 return -1;
4156
4157 bytes = PyBytes_FromStringAndSize(s, x);
4158 if (bytes == NULL)
4159 return -1;
4160
4161 PDATA_PUSH(self->stack, bytes, -1);
4162 return 0;
4163}
4164
4165static int
4166load_binstring(UnpicklerObject *self)
4167{
4168 PyObject *str;
4169 long x;
4170 char *s;
4171
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004172 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004173 return -1;
4174
4175 x = calc_binint(s, 4);
4176 if (x < 0) {
4177 PyErr_SetString(UnpicklingError,
4178 "BINSTRING pickle has negative byte count");
4179 return -1;
4180 }
4181
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004182 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004183 return -1;
4184
4185 /* Convert Python 2.x strings to unicode. */
4186 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4187 if (str == NULL)
4188 return -1;
4189
4190 PDATA_PUSH(self->stack, str, -1);
4191 return 0;
4192}
4193
4194static int
4195load_short_binstring(UnpicklerObject *self)
4196{
4197 PyObject *str;
4198 unsigned char x;
4199 char *s;
4200
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004201 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004202 return -1;
4203
4204 x = (unsigned char)s[0];
4205
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004206 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004207 return -1;
4208
4209 /* Convert Python 2.x strings to unicode. */
4210 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4211 if (str == NULL)
4212 return -1;
4213
4214 PDATA_PUSH(self->stack, str, -1);
4215 return 0;
4216}
4217
4218static int
4219load_unicode(UnpicklerObject *self)
4220{
4221 PyObject *str;
4222 Py_ssize_t len;
4223 char *s;
4224
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004225 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004226 return -1;
4227 if (len < 1)
4228 return bad_readline();
4229
4230 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
4231 if (str == NULL)
4232 return -1;
4233
4234 PDATA_PUSH(self->stack, str, -1);
4235 return 0;
4236}
4237
4238static int
4239load_binunicode(UnpicklerObject *self)
4240{
4241 PyObject *str;
4242 long size;
4243 char *s;
4244
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004245 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004246 return -1;
4247
4248 size = calc_binint(s, 4);
4249 if (size < 0) {
4250 PyErr_SetString(UnpicklingError,
4251 "BINUNICODE pickle has negative byte count");
4252 return -1;
4253 }
4254
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004255 if (_Unpickler_Read(self, &s, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004256 return -1;
4257
Victor Stinner485fb562010-04-13 11:07:24 +00004258 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004259 if (str == NULL)
4260 return -1;
4261
4262 PDATA_PUSH(self->stack, str, -1);
4263 return 0;
4264}
4265
4266static int
4267load_tuple(UnpicklerObject *self)
4268{
4269 PyObject *tuple;
4270 int i;
4271
4272 if ((i = marker(self)) < 0)
4273 return -1;
4274
4275 tuple = Pdata_poptuple(self->stack, i);
4276 if (tuple == NULL)
4277 return -1;
4278 PDATA_PUSH(self->stack, tuple, -1);
4279 return 0;
4280}
4281
4282static int
4283load_counted_tuple(UnpicklerObject *self, int len)
4284{
4285 PyObject *tuple;
4286
4287 tuple = PyTuple_New(len);
4288 if (tuple == NULL)
4289 return -1;
4290
4291 while (--len >= 0) {
4292 PyObject *item;
4293
4294 PDATA_POP(self->stack, item);
4295 if (item == NULL)
4296 return -1;
4297 PyTuple_SET_ITEM(tuple, len, item);
4298 }
4299 PDATA_PUSH(self->stack, tuple, -1);
4300 return 0;
4301}
4302
4303static int
4304load_empty_list(UnpicklerObject *self)
4305{
4306 PyObject *list;
4307
4308 if ((list = PyList_New(0)) == NULL)
4309 return -1;
4310 PDATA_PUSH(self->stack, list, -1);
4311 return 0;
4312}
4313
4314static int
4315load_empty_dict(UnpicklerObject *self)
4316{
4317 PyObject *dict;
4318
4319 if ((dict = PyDict_New()) == NULL)
4320 return -1;
4321 PDATA_PUSH(self->stack, dict, -1);
4322 return 0;
4323}
4324
4325static int
4326load_list(UnpicklerObject *self)
4327{
4328 PyObject *list;
4329 int i;
4330
4331 if ((i = marker(self)) < 0)
4332 return -1;
4333
4334 list = Pdata_poplist(self->stack, i);
4335 if (list == NULL)
4336 return -1;
4337 PDATA_PUSH(self->stack, list, -1);
4338 return 0;
4339}
4340
4341static int
4342load_dict(UnpicklerObject *self)
4343{
4344 PyObject *dict, *key, *value;
4345 int i, j, k;
4346
4347 if ((i = marker(self)) < 0)
4348 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004349 j = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004350
4351 if ((dict = PyDict_New()) == NULL)
4352 return -1;
4353
4354 for (k = i + 1; k < j; k += 2) {
4355 key = self->stack->data[k - 1];
4356 value = self->stack->data[k];
4357 if (PyDict_SetItem(dict, key, value) < 0) {
4358 Py_DECREF(dict);
4359 return -1;
4360 }
4361 }
4362 Pdata_clear(self->stack, i);
4363 PDATA_PUSH(self->stack, dict, -1);
4364 return 0;
4365}
4366
4367static PyObject *
4368instantiate(PyObject *cls, PyObject *args)
4369{
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004370 PyObject *result = NULL;
4371 /* Caller must assure args are a tuple. Normally, args come from
4372 Pdata_poptuple which packs objects from the top of the stack
4373 into a newly created tuple. */
4374 assert(PyTuple_Check(args));
4375 if (Py_SIZE(args) > 0 || !PyType_Check(cls) ||
4376 PyObject_HasAttrString(cls, "__getinitargs__")) {
4377 result = PyObject_CallObject(cls, args);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004378 }
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004379 else {
4380 result = PyObject_CallMethod(cls, "__new__", "O", cls);
4381 }
4382 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004383}
4384
4385static int
4386load_obj(UnpicklerObject *self)
4387{
4388 PyObject *cls, *args, *obj = NULL;
4389 int i;
4390
4391 if ((i = marker(self)) < 0)
4392 return -1;
4393
4394 args = Pdata_poptuple(self->stack, i + 1);
4395 if (args == NULL)
4396 return -1;
4397
4398 PDATA_POP(self->stack, cls);
4399 if (cls) {
4400 obj = instantiate(cls, args);
4401 Py_DECREF(cls);
4402 }
4403 Py_DECREF(args);
4404 if (obj == NULL)
4405 return -1;
4406
4407 PDATA_PUSH(self->stack, obj, -1);
4408 return 0;
4409}
4410
4411static int
4412load_inst(UnpicklerObject *self)
4413{
4414 PyObject *cls = NULL;
4415 PyObject *args = NULL;
4416 PyObject *obj = NULL;
4417 PyObject *module_name;
4418 PyObject *class_name;
4419 Py_ssize_t len;
4420 int i;
4421 char *s;
4422
4423 if ((i = marker(self)) < 0)
4424 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004425 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004426 return -1;
4427 if (len < 2)
4428 return bad_readline();
4429
4430 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
4431 identifiers are permitted in Python 3.0, since the INST opcode is only
4432 supported by older protocols on Python 2.x. */
4433 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
4434 if (module_name == NULL)
4435 return -1;
4436
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004437 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004438 if (len < 2)
4439 return bad_readline();
4440 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004441 if (class_name != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004442 cls = find_class(self, module_name, class_name);
4443 Py_DECREF(class_name);
4444 }
4445 }
4446 Py_DECREF(module_name);
4447
4448 if (cls == NULL)
4449 return -1;
4450
4451 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
4452 obj = instantiate(cls, args);
4453 Py_DECREF(args);
4454 }
4455 Py_DECREF(cls);
4456
4457 if (obj == NULL)
4458 return -1;
4459
4460 PDATA_PUSH(self->stack, obj, -1);
4461 return 0;
4462}
4463
4464static int
4465load_newobj(UnpicklerObject *self)
4466{
4467 PyObject *args = NULL;
4468 PyObject *clsraw = NULL;
4469 PyTypeObject *cls; /* clsraw cast to its true type */
4470 PyObject *obj;
4471
4472 /* Stack is ... cls argtuple, and we want to call
4473 * cls.__new__(cls, *argtuple).
4474 */
4475 PDATA_POP(self->stack, args);
4476 if (args == NULL)
4477 goto error;
4478 if (!PyTuple_Check(args)) {
4479 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
4480 goto error;
4481 }
4482
4483 PDATA_POP(self->stack, clsraw);
4484 cls = (PyTypeObject *)clsraw;
4485 if (cls == NULL)
4486 goto error;
4487 if (!PyType_Check(cls)) {
4488 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4489 "isn't a type object");
4490 goto error;
4491 }
4492 if (cls->tp_new == NULL) {
4493 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4494 "has NULL tp_new");
4495 goto error;
4496 }
4497
4498 /* Call __new__. */
4499 obj = cls->tp_new(cls, args, NULL);
4500 if (obj == NULL)
4501 goto error;
4502
4503 Py_DECREF(args);
4504 Py_DECREF(clsraw);
4505 PDATA_PUSH(self->stack, obj, -1);
4506 return 0;
4507
4508 error:
4509 Py_XDECREF(args);
4510 Py_XDECREF(clsraw);
4511 return -1;
4512}
4513
4514static int
4515load_global(UnpicklerObject *self)
4516{
4517 PyObject *global = NULL;
4518 PyObject *module_name;
4519 PyObject *global_name;
4520 Py_ssize_t len;
4521 char *s;
4522
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004523 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004524 return -1;
4525 if (len < 2)
4526 return bad_readline();
4527 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4528 if (!module_name)
4529 return -1;
4530
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004531 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004532 if (len < 2) {
4533 Py_DECREF(module_name);
4534 return bad_readline();
4535 }
4536 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4537 if (global_name) {
4538 global = find_class(self, module_name, global_name);
4539 Py_DECREF(global_name);
4540 }
4541 }
4542 Py_DECREF(module_name);
4543
4544 if (global == NULL)
4545 return -1;
4546 PDATA_PUSH(self->stack, global, -1);
4547 return 0;
4548}
4549
4550static int
4551load_persid(UnpicklerObject *self)
4552{
4553 PyObject *pid;
4554 Py_ssize_t len;
4555 char *s;
4556
4557 if (self->pers_func) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004558 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004559 return -1;
4560 if (len < 2)
4561 return bad_readline();
4562
4563 pid = PyBytes_FromStringAndSize(s, len - 1);
4564 if (pid == NULL)
4565 return -1;
4566
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004567 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004568 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004569 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004570 if (pid == NULL)
4571 return -1;
4572
4573 PDATA_PUSH(self->stack, pid, -1);
4574 return 0;
4575 }
4576 else {
4577 PyErr_SetString(UnpicklingError,
4578 "A load persistent id instruction was encountered,\n"
4579 "but no persistent_load function was specified.");
4580 return -1;
4581 }
4582}
4583
4584static int
4585load_binpersid(UnpicklerObject *self)
4586{
4587 PyObject *pid;
4588
4589 if (self->pers_func) {
4590 PDATA_POP(self->stack, pid);
4591 if (pid == NULL)
4592 return -1;
4593
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004594 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004595 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004596 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004597 if (pid == NULL)
4598 return -1;
4599
4600 PDATA_PUSH(self->stack, pid, -1);
4601 return 0;
4602 }
4603 else {
4604 PyErr_SetString(UnpicklingError,
4605 "A load persistent id instruction was encountered,\n"
4606 "but no persistent_load function was specified.");
4607 return -1;
4608 }
4609}
4610
4611static int
4612load_pop(UnpicklerObject *self)
4613{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004614 int len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004615
4616 /* Note that we split the (pickle.py) stack into two stacks,
4617 * an object stack and a mark stack. We have to be clever and
4618 * pop the right one. We do this by looking at the top of the
Collin Winter8ca69de2009-05-26 16:53:41 +00004619 * mark stack first, and only signalling a stack underflow if
4620 * the object stack is empty and the mark stack doesn't match
4621 * our expectations.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004622 */
Collin Winter8ca69de2009-05-26 16:53:41 +00004623 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004624 self->num_marks--;
Antoine Pitrou01a15ea2010-01-07 17:57:31 +00004625 } else if (len > 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004626 len--;
4627 Py_DECREF(self->stack->data[len]);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004628 Py_SIZE(self->stack) = len;
Collin Winter8ca69de2009-05-26 16:53:41 +00004629 } else {
4630 return stack_underflow();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004631 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004632 return 0;
4633}
4634
4635static int
4636load_pop_mark(UnpicklerObject *self)
4637{
4638 int i;
4639
4640 if ((i = marker(self)) < 0)
4641 return -1;
4642
4643 Pdata_clear(self->stack, i);
4644
4645 return 0;
4646}
4647
4648static int
4649load_dup(UnpicklerObject *self)
4650{
4651 PyObject *last;
4652 int len;
4653
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004654 if ((len = Py_SIZE(self->stack)) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004655 return stack_underflow();
4656 last = self->stack->data[len - 1];
4657 PDATA_APPEND(self->stack, last, -1);
4658 return 0;
4659}
4660
4661static int
4662load_get(UnpicklerObject *self)
4663{
4664 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004665 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004666 Py_ssize_t len;
4667 char *s;
4668
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004669 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004670 return -1;
4671 if (len < 2)
4672 return bad_readline();
4673
4674 key = PyLong_FromString(s, NULL, 10);
4675 if (key == NULL)
4676 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004677 idx = PyLong_AsSsize_t(key);
4678 if (idx == -1 && PyErr_Occurred()) {
4679 Py_DECREF(key);
4680 return -1;
4681 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004682
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004683 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004684 if (value == NULL) {
4685 if (!PyErr_Occurred())
4686 PyErr_SetObject(PyExc_KeyError, key);
4687 Py_DECREF(key);
4688 return -1;
4689 }
4690 Py_DECREF(key);
4691
4692 PDATA_APPEND(self->stack, value, -1);
4693 return 0;
4694}
4695
4696static int
4697load_binget(UnpicklerObject *self)
4698{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004699 PyObject *value;
4700 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004701 char *s;
4702
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004703 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004704 return -1;
4705
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004706 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004707
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004708 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004709 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004710 PyObject *key = PyLong_FromSsize_t(idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004711 if (!PyErr_Occurred())
4712 PyErr_SetObject(PyExc_KeyError, key);
4713 Py_DECREF(key);
4714 return -1;
4715 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004716
4717 PDATA_APPEND(self->stack, value, -1);
4718 return 0;
4719}
4720
4721static int
4722load_long_binget(UnpicklerObject *self)
4723{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004724 PyObject *value;
4725 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004726 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004727
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004728 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004729 return -1;
4730
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004731 idx = (long)Py_CHARMASK(s[0]);
4732 idx |= (long)Py_CHARMASK(s[1]) << 8;
4733 idx |= (long)Py_CHARMASK(s[2]) << 16;
4734 idx |= (long)Py_CHARMASK(s[3]) << 24;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004735
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004736 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004737 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004738 PyObject *key = PyLong_FromSsize_t(idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004739 if (!PyErr_Occurred())
4740 PyErr_SetObject(PyExc_KeyError, key);
4741 Py_DECREF(key);
4742 return -1;
4743 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004744
4745 PDATA_APPEND(self->stack, value, -1);
4746 return 0;
4747}
4748
4749/* Push an object from the extension registry (EXT[124]). nbytes is
4750 * the number of bytes following the opcode, holding the index (code) value.
4751 */
4752static int
4753load_extension(UnpicklerObject *self, int nbytes)
4754{
4755 char *codebytes; /* the nbytes bytes after the opcode */
4756 long code; /* calc_binint returns long */
4757 PyObject *py_code; /* code as a Python int */
4758 PyObject *obj; /* the object to push */
4759 PyObject *pair; /* (module_name, class_name) */
4760 PyObject *module_name, *class_name;
4761
4762 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004763 if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004764 return -1;
4765 code = calc_binint(codebytes, nbytes);
4766 if (code <= 0) { /* note that 0 is forbidden */
4767 /* Corrupt or hostile pickle. */
4768 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
4769 return -1;
4770 }
4771
4772 /* Look for the code in the cache. */
4773 py_code = PyLong_FromLong(code);
4774 if (py_code == NULL)
4775 return -1;
4776 obj = PyDict_GetItem(extension_cache, py_code);
4777 if (obj != NULL) {
4778 /* Bingo. */
4779 Py_DECREF(py_code);
4780 PDATA_APPEND(self->stack, obj, -1);
4781 return 0;
4782 }
4783
4784 /* Look up the (module_name, class_name) pair. */
4785 pair = PyDict_GetItem(inverted_registry, py_code);
4786 if (pair == NULL) {
4787 Py_DECREF(py_code);
4788 PyErr_Format(PyExc_ValueError, "unregistered extension "
4789 "code %ld", code);
4790 return -1;
4791 }
4792 /* Since the extension registry is manipulable via Python code,
4793 * confirm that pair is really a 2-tuple of strings.
4794 */
4795 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
4796 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
4797 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
4798 Py_DECREF(py_code);
4799 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
4800 "isn't a 2-tuple of strings", code);
4801 return -1;
4802 }
4803 /* Load the object. */
4804 obj = find_class(self, module_name, class_name);
4805 if (obj == NULL) {
4806 Py_DECREF(py_code);
4807 return -1;
4808 }
4809 /* Cache code -> obj. */
4810 code = PyDict_SetItem(extension_cache, py_code, obj);
4811 Py_DECREF(py_code);
4812 if (code < 0) {
4813 Py_DECREF(obj);
4814 return -1;
4815 }
4816 PDATA_PUSH(self->stack, obj, -1);
4817 return 0;
4818}
4819
4820static int
4821load_put(UnpicklerObject *self)
4822{
4823 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004824 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004825 Py_ssize_t len;
4826 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004827
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004828 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004829 return -1;
4830 if (len < 2)
4831 return bad_readline();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004832 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004833 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004834 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004835
4836 key = PyLong_FromString(s, NULL, 10);
4837 if (key == NULL)
4838 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004839 idx = PyLong_AsSsize_t(key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004840 Py_DECREF(key);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004841 if (idx == -1 && PyErr_Occurred())
4842 return -1;
4843
4844 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004845}
4846
4847static int
4848load_binput(UnpicklerObject *self)
4849{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004850 PyObject *value;
4851 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004852 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004853
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004854 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004855 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004856
4857 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004858 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004859 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004860
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004861 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004862
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004863 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004864}
4865
4866static int
4867load_long_binput(UnpicklerObject *self)
4868{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004869 PyObject *value;
4870 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004871 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004872
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004873 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004874 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004875
4876 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004877 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004878 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004879
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004880 idx = (long)Py_CHARMASK(s[0]);
4881 idx |= (long)Py_CHARMASK(s[1]) << 8;
4882 idx |= (long)Py_CHARMASK(s[2]) << 16;
4883 idx |= (long)Py_CHARMASK(s[3]) << 24;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004884
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004885 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004886}
4887
4888static int
4889do_append(UnpicklerObject *self, int x)
4890{
4891 PyObject *value;
4892 PyObject *list;
4893 int len, i;
4894
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004895 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004896 if (x > len || x <= 0)
4897 return stack_underflow();
4898 if (len == x) /* nothing to do */
4899 return 0;
4900
4901 list = self->stack->data[x - 1];
4902
4903 if (PyList_Check(list)) {
4904 PyObject *slice;
4905 Py_ssize_t list_len;
4906
4907 slice = Pdata_poplist(self->stack, x);
4908 if (!slice)
4909 return -1;
4910 list_len = PyList_GET_SIZE(list);
4911 i = PyList_SetSlice(list, list_len, list_len, slice);
4912 Py_DECREF(slice);
4913 return i;
4914 }
4915 else {
4916 PyObject *append_func;
4917
4918 append_func = PyObject_GetAttrString(list, "append");
4919 if (append_func == NULL)
4920 return -1;
4921 for (i = x; i < len; i++) {
4922 PyObject *result;
4923
4924 value = self->stack->data[i];
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004925 result = _Unpickler_FastCall(self, append_func, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004926 if (result == NULL) {
4927 Pdata_clear(self->stack, i + 1);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004928 Py_SIZE(self->stack) = x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004929 return -1;
4930 }
4931 Py_DECREF(result);
4932 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004933 Py_SIZE(self->stack) = x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004934 }
4935
4936 return 0;
4937}
4938
4939static int
4940load_append(UnpicklerObject *self)
4941{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004942 return do_append(self, Py_SIZE(self->stack) - 1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004943}
4944
4945static int
4946load_appends(UnpicklerObject *self)
4947{
4948 return do_append(self, marker(self));
4949}
4950
4951static int
4952do_setitems(UnpicklerObject *self, int x)
4953{
4954 PyObject *value, *key;
4955 PyObject *dict;
4956 int len, i;
4957 int status = 0;
4958
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004959 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004960 if (x > len || x <= 0)
4961 return stack_underflow();
4962 if (len == x) /* nothing to do */
4963 return 0;
4964 if ((len - x) % 2 != 0) {
4965 /* Currupt or hostile pickle -- we never write one like this. */
4966 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
4967 return -1;
4968 }
4969
4970 /* Here, dict does not actually need to be a PyDict; it could be anything
4971 that supports the __setitem__ attribute. */
4972 dict = self->stack->data[x - 1];
4973
4974 for (i = x + 1; i < len; i += 2) {
4975 key = self->stack->data[i - 1];
4976 value = self->stack->data[i];
4977 if (PyObject_SetItem(dict, key, value) < 0) {
4978 status = -1;
4979 break;
4980 }
4981 }
4982
4983 Pdata_clear(self->stack, x);
4984 return status;
4985}
4986
4987static int
4988load_setitem(UnpicklerObject *self)
4989{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004990 return do_setitems(self, Py_SIZE(self->stack) - 2);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004991}
4992
4993static int
4994load_setitems(UnpicklerObject *self)
4995{
4996 return do_setitems(self, marker(self));
4997}
4998
4999static int
5000load_build(UnpicklerObject *self)
5001{
5002 PyObject *state, *inst, *slotstate;
5003 PyObject *setstate;
5004 int status = 0;
5005
5006 /* Stack is ... instance, state. We want to leave instance at
5007 * the stack top, possibly mutated via instance.__setstate__(state).
5008 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005009 if (Py_SIZE(self->stack) < 2)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005010 return stack_underflow();
5011
5012 PDATA_POP(self->stack, state);
5013 if (state == NULL)
5014 return -1;
5015
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005016 inst = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005017
5018 setstate = PyObject_GetAttrString(inst, "__setstate__");
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005019 if (setstate == NULL) {
5020 if (PyErr_ExceptionMatches(PyExc_AttributeError))
5021 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00005022 else {
5023 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005024 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00005025 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005026 }
5027 else {
5028 PyObject *result;
5029
5030 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005031 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Antoine Pitroud79dc622008-09-05 00:03:33 +00005032 reference to state first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005033 result = _Unpickler_FastCall(self, setstate, state);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005034 Py_DECREF(setstate);
5035 if (result == NULL)
5036 return -1;
5037 Py_DECREF(result);
5038 return 0;
5039 }
5040
5041 /* A default __setstate__. First see whether state embeds a
5042 * slot state dict too (a proto 2 addition).
5043 */
5044 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
5045 PyObject *tmp = state;
5046
5047 state = PyTuple_GET_ITEM(tmp, 0);
5048 slotstate = PyTuple_GET_ITEM(tmp, 1);
5049 Py_INCREF(state);
5050 Py_INCREF(slotstate);
5051 Py_DECREF(tmp);
5052 }
5053 else
5054 slotstate = NULL;
5055
5056 /* Set inst.__dict__ from the state dict (if any). */
5057 if (state != Py_None) {
5058 PyObject *dict;
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005059 PyObject *d_key, *d_value;
5060 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005061
5062 if (!PyDict_Check(state)) {
5063 PyErr_SetString(UnpicklingError, "state is not a dictionary");
5064 goto error;
5065 }
5066 dict = PyObject_GetAttrString(inst, "__dict__");
5067 if (dict == NULL)
5068 goto error;
5069
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005070 i = 0;
5071 while (PyDict_Next(state, &i, &d_key, &d_value)) {
5072 /* normally the keys for instance attributes are
5073 interned. we should try to do that here. */
5074 Py_INCREF(d_key);
5075 if (PyUnicode_CheckExact(d_key))
5076 PyUnicode_InternInPlace(&d_key);
5077 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
5078 Py_DECREF(d_key);
5079 goto error;
5080 }
5081 Py_DECREF(d_key);
5082 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005083 Py_DECREF(dict);
5084 }
5085
5086 /* Also set instance attributes from the slotstate dict (if any). */
5087 if (slotstate != NULL) {
5088 PyObject *d_key, *d_value;
5089 Py_ssize_t i;
5090
5091 if (!PyDict_Check(slotstate)) {
5092 PyErr_SetString(UnpicklingError,
5093 "slot state is not a dictionary");
5094 goto error;
5095 }
5096 i = 0;
5097 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
5098 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
5099 goto error;
5100 }
5101 }
5102
5103 if (0) {
5104 error:
5105 status = -1;
5106 }
5107
5108 Py_DECREF(state);
5109 Py_XDECREF(slotstate);
5110 return status;
5111}
5112
5113static int
5114load_mark(UnpicklerObject *self)
5115{
5116
5117 /* Note that we split the (pickle.py) stack into two stacks, an
5118 * object stack and a mark stack. Here we push a mark onto the
5119 * mark stack.
5120 */
5121
5122 if ((self->num_marks + 1) >= self->marks_size) {
5123 size_t alloc;
5124 int *marks;
5125
5126 /* Use the size_t type to check for overflow. */
5127 alloc = ((size_t)self->num_marks << 1) + 20;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00005128 if (alloc > PY_SSIZE_T_MAX ||
5129 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005130 PyErr_NoMemory();
5131 return -1;
5132 }
5133
5134 if (self->marks == NULL)
5135 marks = (int *)PyMem_Malloc(alloc * sizeof(int));
5136 else
5137 marks = (int *)PyMem_Realloc(self->marks, alloc * sizeof(int));
5138 if (marks == NULL) {
5139 PyErr_NoMemory();
5140 return -1;
5141 }
5142 self->marks = marks;
5143 self->marks_size = (Py_ssize_t)alloc;
5144 }
5145
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005146 self->marks[self->num_marks++] = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005147
5148 return 0;
5149}
5150
5151static int
5152load_reduce(UnpicklerObject *self)
5153{
5154 PyObject *callable = NULL;
5155 PyObject *argtup = NULL;
5156 PyObject *obj = NULL;
5157
5158 PDATA_POP(self->stack, argtup);
5159 if (argtup == NULL)
5160 return -1;
5161 PDATA_POP(self->stack, callable);
5162 if (callable) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005163 obj = PyObject_CallObject(callable, argtup);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005164 Py_DECREF(callable);
5165 }
5166 Py_DECREF(argtup);
5167
5168 if (obj == NULL)
5169 return -1;
5170
5171 PDATA_PUSH(self->stack, obj, -1);
5172 return 0;
5173}
5174
5175/* Just raises an error if we don't know the protocol specified. PROTO
5176 * is the first opcode for protocols >= 2.
5177 */
5178static int
5179load_proto(UnpicklerObject *self)
5180{
5181 char *s;
5182 int i;
5183
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005184 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005185 return -1;
5186
5187 i = (unsigned char)s[0];
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005188 if (i <= HIGHEST_PROTOCOL) {
5189 self->proto = i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005190 return 0;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005191 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005192
5193 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
5194 return -1;
5195}
5196
5197static PyObject *
5198load(UnpicklerObject *self)
5199{
5200 PyObject *err;
5201 PyObject *value = NULL;
5202 char *s;
5203
5204 self->num_marks = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005205 if (Py_SIZE(self->stack))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005206 Pdata_clear(self->stack, 0);
5207
5208 /* Convenient macros for the dispatch while-switch loop just below. */
5209#define OP(opcode, load_func) \
5210 case opcode: if (load_func(self) < 0) break; continue;
5211
5212#define OP_ARG(opcode, load_func, arg) \
5213 case opcode: if (load_func(self, (arg)) < 0) break; continue;
5214
5215 while (1) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005216 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005217 break;
5218
5219 switch ((enum opcode)s[0]) {
5220 OP(NONE, load_none)
5221 OP(BININT, load_binint)
5222 OP(BININT1, load_binint1)
5223 OP(BININT2, load_binint2)
5224 OP(INT, load_int)
5225 OP(LONG, load_long)
5226 OP_ARG(LONG1, load_counted_long, 1)
5227 OP_ARG(LONG4, load_counted_long, 4)
5228 OP(FLOAT, load_float)
5229 OP(BINFLOAT, load_binfloat)
5230 OP(BINBYTES, load_binbytes)
5231 OP(SHORT_BINBYTES, load_short_binbytes)
5232 OP(BINSTRING, load_binstring)
5233 OP(SHORT_BINSTRING, load_short_binstring)
5234 OP(STRING, load_string)
5235 OP(UNICODE, load_unicode)
5236 OP(BINUNICODE, load_binunicode)
5237 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
5238 OP_ARG(TUPLE1, load_counted_tuple, 1)
5239 OP_ARG(TUPLE2, load_counted_tuple, 2)
5240 OP_ARG(TUPLE3, load_counted_tuple, 3)
5241 OP(TUPLE, load_tuple)
5242 OP(EMPTY_LIST, load_empty_list)
5243 OP(LIST, load_list)
5244 OP(EMPTY_DICT, load_empty_dict)
5245 OP(DICT, load_dict)
5246 OP(OBJ, load_obj)
5247 OP(INST, load_inst)
5248 OP(NEWOBJ, load_newobj)
5249 OP(GLOBAL, load_global)
5250 OP(APPEND, load_append)
5251 OP(APPENDS, load_appends)
5252 OP(BUILD, load_build)
5253 OP(DUP, load_dup)
5254 OP(BINGET, load_binget)
5255 OP(LONG_BINGET, load_long_binget)
5256 OP(GET, load_get)
5257 OP(MARK, load_mark)
5258 OP(BINPUT, load_binput)
5259 OP(LONG_BINPUT, load_long_binput)
5260 OP(PUT, load_put)
5261 OP(POP, load_pop)
5262 OP(POP_MARK, load_pop_mark)
5263 OP(SETITEM, load_setitem)
5264 OP(SETITEMS, load_setitems)
5265 OP(PERSID, load_persid)
5266 OP(BINPERSID, load_binpersid)
5267 OP(REDUCE, load_reduce)
5268 OP(PROTO, load_proto)
5269 OP_ARG(EXT1, load_extension, 1)
5270 OP_ARG(EXT2, load_extension, 2)
5271 OP_ARG(EXT4, load_extension, 4)
5272 OP_ARG(NEWTRUE, load_bool, Py_True)
5273 OP_ARG(NEWFALSE, load_bool, Py_False)
5274
5275 case STOP:
5276 break;
5277
5278 case '\0':
5279 PyErr_SetNone(PyExc_EOFError);
5280 return NULL;
5281
5282 default:
5283 PyErr_Format(UnpicklingError,
5284 "invalid load key, '%c'.", s[0]);
5285 return NULL;
5286 }
5287
5288 break; /* and we are done! */
5289 }
5290
Antoine Pitrou04248a82010-10-12 20:51:21 +00005291 if (_Unpickler_SkipConsumed(self) < 0)
5292 return NULL;
5293
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005294 /* XXX: It is not clear what this is actually for. */
5295 if ((err = PyErr_Occurred())) {
5296 if (err == PyExc_EOFError) {
5297 PyErr_SetNone(PyExc_EOFError);
5298 }
5299 return NULL;
5300 }
5301
5302 PDATA_POP(self->stack, value);
5303 return value;
5304}
5305
5306PyDoc_STRVAR(Unpickler_load_doc,
5307"load() -> object. Load a pickle."
5308"\n"
5309"Read a pickled object representation from the open file object given in\n"
5310"the constructor, and return the reconstituted object hierarchy specified\n"
5311"therein.\n");
5312
5313static PyObject *
5314Unpickler_load(UnpicklerObject *self)
5315{
5316 /* Check whether the Unpickler was initialized correctly. This prevents
5317 segfaulting if a subclass overridden __init__ with a function that does
5318 not call Unpickler.__init__(). Here, we simply ensure that self->read
5319 is not NULL. */
5320 if (self->read == NULL) {
5321 PyErr_Format(UnpicklingError,
5322 "Unpickler.__init__() was not called by %s.__init__()",
5323 Py_TYPE(self)->tp_name);
5324 return NULL;
5325 }
5326
5327 return load(self);
5328}
5329
5330/* The name of find_class() is misleading. In newer pickle protocols, this
5331 function is used for loading any global (i.e., functions), not just
5332 classes. The name is kept only for backward compatibility. */
5333
5334PyDoc_STRVAR(Unpickler_find_class_doc,
5335"find_class(module_name, global_name) -> object.\n"
5336"\n"
5337"Return an object from a specified module, importing the module if\n"
5338"necessary. Subclasses may override this method (e.g. to restrict\n"
5339"unpickling of arbitrary classes and functions).\n"
5340"\n"
5341"This method is called whenever a class or a function object is\n"
5342"needed. Both arguments passed are str objects.\n");
5343
5344static PyObject *
5345Unpickler_find_class(UnpicklerObject *self, PyObject *args)
5346{
5347 PyObject *global;
5348 PyObject *modules_dict;
5349 PyObject *module;
5350 PyObject *module_name, *global_name;
5351
5352 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
5353 &module_name, &global_name))
5354 return NULL;
5355
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005356 /* Try to map the old names used in Python 2.x to the new ones used in
5357 Python 3.x. We do this only with old pickle protocols and when the
5358 user has not disabled the feature. */
5359 if (self->proto < 3 && self->fix_imports) {
5360 PyObject *key;
5361 PyObject *item;
5362
5363 /* Check if the global (i.e., a function or a class) was renamed
5364 or moved to another module. */
5365 key = PyTuple_Pack(2, module_name, global_name);
5366 if (key == NULL)
5367 return NULL;
5368 item = PyDict_GetItemWithError(name_mapping_2to3, key);
5369 Py_DECREF(key);
5370 if (item) {
5371 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
5372 PyErr_Format(PyExc_RuntimeError,
5373 "_compat_pickle.NAME_MAPPING values should be "
5374 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
5375 return NULL;
5376 }
5377 module_name = PyTuple_GET_ITEM(item, 0);
5378 global_name = PyTuple_GET_ITEM(item, 1);
5379 if (!PyUnicode_Check(module_name) ||
5380 !PyUnicode_Check(global_name)) {
5381 PyErr_Format(PyExc_RuntimeError,
5382 "_compat_pickle.NAME_MAPPING values should be "
5383 "pairs of str, not (%.200s, %.200s)",
5384 Py_TYPE(module_name)->tp_name,
5385 Py_TYPE(global_name)->tp_name);
5386 return NULL;
5387 }
5388 }
5389 else if (PyErr_Occurred()) {
5390 return NULL;
5391 }
5392
5393 /* Check if the module was renamed. */
5394 item = PyDict_GetItemWithError(import_mapping_2to3, module_name);
5395 if (item) {
5396 if (!PyUnicode_Check(item)) {
5397 PyErr_Format(PyExc_RuntimeError,
5398 "_compat_pickle.IMPORT_MAPPING values should be "
5399 "strings, not %.200s", Py_TYPE(item)->tp_name);
5400 return NULL;
5401 }
5402 module_name = item;
5403 }
5404 else if (PyErr_Occurred()) {
5405 return NULL;
5406 }
5407 }
5408
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005409 modules_dict = PySys_GetObject("modules");
5410 if (modules_dict == NULL)
5411 return NULL;
5412
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005413 module = PyDict_GetItemWithError(modules_dict, module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005414 if (module == NULL) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005415 if (PyErr_Occurred())
5416 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005417 module = PyImport_Import(module_name);
5418 if (module == NULL)
5419 return NULL;
5420 global = PyObject_GetAttr(module, global_name);
5421 Py_DECREF(module);
5422 }
5423 else {
5424 global = PyObject_GetAttr(module, global_name);
5425 }
5426 return global;
5427}
5428
5429static struct PyMethodDef Unpickler_methods[] = {
5430 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
5431 Unpickler_load_doc},
5432 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
5433 Unpickler_find_class_doc},
5434 {NULL, NULL} /* sentinel */
5435};
5436
5437static void
5438Unpickler_dealloc(UnpicklerObject *self)
5439{
5440 PyObject_GC_UnTrack((PyObject *)self);
5441 Py_XDECREF(self->readline);
5442 Py_XDECREF(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005443 Py_XDECREF(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005444 Py_XDECREF(self->stack);
5445 Py_XDECREF(self->pers_func);
5446 Py_XDECREF(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005447 if (self->buffer.buf != NULL) {
5448 PyBuffer_Release(&self->buffer);
5449 self->buffer.buf = NULL;
5450 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005451
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005452 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005453 PyMem_Free(self->marks);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005454 PyMem_Free(self->input_line);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005455 free(self->encoding);
5456 free(self->errors);
5457
5458 Py_TYPE(self)->tp_free((PyObject *)self);
5459}
5460
5461static int
5462Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
5463{
5464 Py_VISIT(self->readline);
5465 Py_VISIT(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005466 Py_VISIT(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005467 Py_VISIT(self->stack);
5468 Py_VISIT(self->pers_func);
5469 Py_VISIT(self->arg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005470 return 0;
5471}
5472
5473static int
5474Unpickler_clear(UnpicklerObject *self)
5475{
5476 Py_CLEAR(self->readline);
5477 Py_CLEAR(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005478 Py_CLEAR(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005479 Py_CLEAR(self->stack);
5480 Py_CLEAR(self->pers_func);
5481 Py_CLEAR(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005482 if (self->buffer.buf != NULL) {
5483 PyBuffer_Release(&self->buffer);
5484 self->buffer.buf = NULL;
5485 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005486
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005487 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005488 PyMem_Free(self->marks);
5489 self->marks = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005490 PyMem_Free(self->input_line);
5491 self->input_line = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005492 free(self->encoding);
5493 self->encoding = NULL;
5494 free(self->errors);
5495 self->errors = NULL;
5496
5497 return 0;
5498}
5499
5500PyDoc_STRVAR(Unpickler_doc,
5501"Unpickler(file, *, encoding='ASCII', errors='strict')"
5502"\n"
5503"This takes a binary file for reading a pickle data stream.\n"
5504"\n"
5505"The protocol version of the pickle is detected automatically, so no\n"
5506"proto argument is needed.\n"
5507"\n"
5508"The file-like object must have two methods, a read() method\n"
5509"that takes an integer argument, and a readline() method that\n"
5510"requires no arguments. Both methods should return bytes.\n"
5511"Thus file-like object can be a binary file object opened for\n"
5512"reading, a BytesIO object, or any other custom object that\n"
5513"meets this interface.\n"
5514"\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005515"Optional keyword arguments are *fix_imports*, *encoding* and *errors*,\n"
5516"which are used to control compatiblity support for pickle stream\n"
5517"generated by Python 2.x. If *fix_imports* is True, pickle will try to\n"
5518"map the old Python 2.x names to the new names used in Python 3.x. The\n"
5519"*encoding* and *errors* tell pickle how to decode 8-bit string\n"
5520"instances pickled by Python 2.x; these default to 'ASCII' and\n"
5521"'strict', respectively.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005522
5523static int
5524Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
5525{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005526 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005527 PyObject *file;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005528 PyObject *fix_imports = Py_True;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005529 char *encoding = NULL;
5530 char *errors = NULL;
5531
5532 /* XXX: That is an horrible error message. But, I don't know how to do
5533 better... */
5534 if (Py_SIZE(args) != 1) {
5535 PyErr_Format(PyExc_TypeError,
5536 "%s takes exactly one positional argument (%zd given)",
5537 Py_TYPE(self)->tp_name, Py_SIZE(args));
5538 return -1;
5539 }
5540
5541 /* Arguments parsing needs to be done in the __init__() method to allow
5542 subclasses to define their own __init__() method, which may (or may
5543 not) support Unpickler arguments. However, this means we need to be
5544 extra careful in the other Unpickler methods, since a subclass could
5545 forget to call Unpickler.__init__() thus breaking our internal
5546 invariants. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005547 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:Unpickler", kwlist,
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005548 &file, &fix_imports, &encoding, &errors))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005549 return -1;
5550
5551 /* In case of multiple __init__() calls, clear previous content. */
5552 if (self->read != NULL)
5553 (void)Unpickler_clear(self);
5554
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005555 if (_Unpickler_SetInputStream(self, file) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005556 return -1;
5557
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005558 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005559 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005560
5561 self->fix_imports = PyObject_IsTrue(fix_imports);
5562 if (self->fix_imports == -1)
5563 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005564
5565 if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) {
5566 self->pers_func = PyObject_GetAttrString((PyObject *)self,
5567 "persistent_load");
5568 if (self->pers_func == NULL)
5569 return -1;
5570 }
5571 else {
5572 self->pers_func = NULL;
5573 }
5574
5575 self->stack = (Pdata *)Pdata_New();
5576 if (self->stack == NULL)
5577 return -1;
5578
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005579 self->memo_size = 32;
5580 self->memo = _Unpickler_NewMemo(self->memo_size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005581 if (self->memo == NULL)
5582 return -1;
5583
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005584 self->arg = NULL;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005585 self->proto = 0;
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005586
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005587 return 0;
5588}
5589
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005590/* Define a proxy object for the Unpickler's internal memo object. This is to
5591 * avoid breaking code like:
5592 * unpickler.memo.clear()
5593 * and
5594 * unpickler.memo = saved_memo
5595 * Is this a good idea? Not really, but we don't want to break code that uses
5596 * it. Note that we don't implement the entire mapping API here. This is
5597 * intentional, as these should be treated as black-box implementation details.
5598 *
5599 * We do, however, have to implement pickling/unpickling support because of
5600 * real-world code like cvs2svn.
5601 */
5602
5603typedef struct {
5604 PyObject_HEAD
5605 UnpicklerObject *unpickler;
5606} UnpicklerMemoProxyObject;
5607
5608PyDoc_STRVAR(ump_clear_doc,
5609"memo.clear() -> None. Remove all items from memo.");
5610
5611static PyObject *
5612ump_clear(UnpicklerMemoProxyObject *self)
5613{
5614 _Unpickler_MemoCleanup(self->unpickler);
5615 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
5616 if (self->unpickler->memo == NULL)
5617 return NULL;
5618 Py_RETURN_NONE;
5619}
5620
5621PyDoc_STRVAR(ump_copy_doc,
5622"memo.copy() -> new_memo. Copy the memo to a new object.");
5623
5624static PyObject *
5625ump_copy(UnpicklerMemoProxyObject *self)
5626{
5627 Py_ssize_t i;
5628 PyObject *new_memo = PyDict_New();
5629 if (new_memo == NULL)
5630 return NULL;
5631
5632 for (i = 0; i < self->unpickler->memo_size; i++) {
5633 int status;
5634 PyObject *key, *value;
5635
5636 value = self->unpickler->memo[i];
5637 if (value == NULL)
5638 continue;
5639
5640 key = PyLong_FromSsize_t(i);
5641 if (key == NULL)
5642 goto error;
5643 status = PyDict_SetItem(new_memo, key, value);
5644 Py_DECREF(key);
5645 if (status < 0)
5646 goto error;
5647 }
5648 return new_memo;
5649
5650error:
5651 Py_DECREF(new_memo);
5652 return NULL;
5653}
5654
5655PyDoc_STRVAR(ump_reduce_doc,
5656"memo.__reduce__(). Pickling support.");
5657
5658static PyObject *
5659ump_reduce(UnpicklerMemoProxyObject *self, PyObject *args)
5660{
5661 PyObject *reduce_value;
5662 PyObject *constructor_args;
5663 PyObject *contents = ump_copy(self);
5664 if (contents == NULL)
5665 return NULL;
5666
5667 reduce_value = PyTuple_New(2);
5668 if (reduce_value == NULL) {
5669 Py_DECREF(contents);
5670 return NULL;
5671 }
5672 constructor_args = PyTuple_New(1);
5673 if (constructor_args == NULL) {
5674 Py_DECREF(contents);
5675 Py_DECREF(reduce_value);
5676 return NULL;
5677 }
5678 PyTuple_SET_ITEM(constructor_args, 0, contents);
5679 Py_INCREF((PyObject *)&PyDict_Type);
5680 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
5681 PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
5682 return reduce_value;
5683}
5684
5685static PyMethodDef unpicklerproxy_methods[] = {
5686 {"clear", (PyCFunction)ump_clear, METH_NOARGS, ump_clear_doc},
5687 {"copy", (PyCFunction)ump_copy, METH_NOARGS, ump_copy_doc},
5688 {"__reduce__", (PyCFunction)ump_reduce, METH_VARARGS, ump_reduce_doc},
5689 {NULL, NULL} /* sentinel */
5690};
5691
5692static void
5693UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
5694{
5695 PyObject_GC_UnTrack(self);
5696 Py_XDECREF(self->unpickler);
5697 PyObject_GC_Del((PyObject *)self);
5698}
5699
5700static int
5701UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
5702 visitproc visit, void *arg)
5703{
5704 Py_VISIT(self->unpickler);
5705 return 0;
5706}
5707
5708static int
5709UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
5710{
5711 Py_CLEAR(self->unpickler);
5712 return 0;
5713}
5714
5715static PyTypeObject UnpicklerMemoProxyType = {
5716 PyVarObject_HEAD_INIT(NULL, 0)
5717 "_pickle.UnpicklerMemoProxy", /*tp_name*/
5718 sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/
5719 0,
5720 (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */
5721 0, /* tp_print */
5722 0, /* tp_getattr */
5723 0, /* tp_setattr */
5724 0, /* tp_compare */
5725 0, /* tp_repr */
5726 0, /* tp_as_number */
5727 0, /* tp_as_sequence */
5728 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00005729 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005730 0, /* tp_call */
5731 0, /* tp_str */
5732 PyObject_GenericGetAttr, /* tp_getattro */
5733 PyObject_GenericSetAttr, /* tp_setattro */
5734 0, /* tp_as_buffer */
5735 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5736 0, /* tp_doc */
5737 (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */
5738 (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */
5739 0, /* tp_richcompare */
5740 0, /* tp_weaklistoffset */
5741 0, /* tp_iter */
5742 0, /* tp_iternext */
5743 unpicklerproxy_methods, /* tp_methods */
5744};
5745
5746static PyObject *
5747UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
5748{
5749 UnpicklerMemoProxyObject *self;
5750
5751 self = PyObject_GC_New(UnpicklerMemoProxyObject,
5752 &UnpicklerMemoProxyType);
5753 if (self == NULL)
5754 return NULL;
5755 Py_INCREF(unpickler);
5756 self->unpickler = unpickler;
5757 PyObject_GC_Track(self);
5758 return (PyObject *)self;
5759}
5760
5761/*****************************************************************************/
5762
5763
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005764static PyObject *
5765Unpickler_get_memo(UnpicklerObject *self)
5766{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005767 return UnpicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005768}
5769
5770static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005771Unpickler_set_memo(UnpicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005772{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005773 PyObject **new_memo;
5774 Py_ssize_t new_memo_size = 0;
5775 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005776
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005777 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005778 PyErr_SetString(PyExc_TypeError,
5779 "attribute deletion is not supported");
5780 return -1;
5781 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005782
5783 if (Py_TYPE(obj) == &UnpicklerMemoProxyType) {
5784 UnpicklerObject *unpickler =
5785 ((UnpicklerMemoProxyObject *)obj)->unpickler;
5786
5787 new_memo_size = unpickler->memo_size;
5788 new_memo = _Unpickler_NewMemo(new_memo_size);
5789 if (new_memo == NULL)
5790 return -1;
5791
5792 for (i = 0; i < new_memo_size; i++) {
5793 Py_XINCREF(unpickler->memo[i]);
5794 new_memo[i] = unpickler->memo[i];
5795 }
5796 }
5797 else if (PyDict_Check(obj)) {
5798 Py_ssize_t i = 0;
5799 PyObject *key, *value;
5800
5801 new_memo_size = PyDict_Size(obj);
5802 new_memo = _Unpickler_NewMemo(new_memo_size);
5803 if (new_memo == NULL)
5804 return -1;
5805
5806 while (PyDict_Next(obj, &i, &key, &value)) {
5807 Py_ssize_t idx;
5808 if (!PyLong_Check(key)) {
5809 PyErr_SetString(PyExc_TypeError,
5810 "memo key must be integers");
5811 goto error;
5812 }
5813 idx = PyLong_AsSsize_t(key);
5814 if (idx == -1 && PyErr_Occurred())
5815 goto error;
5816 if (_Unpickler_MemoPut(self, idx, value) < 0)
5817 goto error;
5818 }
5819 }
5820 else {
5821 PyErr_Format(PyExc_TypeError,
5822 "'memo' attribute must be an UnpicklerMemoProxy object"
5823 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005824 return -1;
5825 }
5826
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005827 _Unpickler_MemoCleanup(self);
5828 self->memo_size = new_memo_size;
5829 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005830
5831 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005832
5833 error:
5834 if (new_memo_size) {
5835 i = new_memo_size;
5836 while (--i >= 0) {
5837 Py_XDECREF(new_memo[i]);
5838 }
5839 PyMem_FREE(new_memo);
5840 }
5841 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005842}
5843
5844static PyObject *
5845Unpickler_get_persload(UnpicklerObject *self)
5846{
5847 if (self->pers_func == NULL)
5848 PyErr_SetString(PyExc_AttributeError, "persistent_load");
5849 else
5850 Py_INCREF(self->pers_func);
5851 return self->pers_func;
5852}
5853
5854static int
5855Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
5856{
5857 PyObject *tmp;
5858
5859 if (value == NULL) {
5860 PyErr_SetString(PyExc_TypeError,
5861 "attribute deletion is not supported");
5862 return -1;
5863 }
5864 if (!PyCallable_Check(value)) {
5865 PyErr_SetString(PyExc_TypeError,
5866 "persistent_load must be a callable taking "
5867 "one argument");
5868 return -1;
5869 }
5870
5871 tmp = self->pers_func;
5872 Py_INCREF(value);
5873 self->pers_func = value;
5874 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
5875
5876 return 0;
5877}
5878
5879static PyGetSetDef Unpickler_getsets[] = {
5880 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
5881 {"persistent_load", (getter)Unpickler_get_persload,
5882 (setter)Unpickler_set_persload},
5883 {NULL}
5884};
5885
5886static PyTypeObject Unpickler_Type = {
5887 PyVarObject_HEAD_INIT(NULL, 0)
5888 "_pickle.Unpickler", /*tp_name*/
5889 sizeof(UnpicklerObject), /*tp_basicsize*/
5890 0, /*tp_itemsize*/
5891 (destructor)Unpickler_dealloc, /*tp_dealloc*/
5892 0, /*tp_print*/
5893 0, /*tp_getattr*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005894 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00005895 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005896 0, /*tp_repr*/
5897 0, /*tp_as_number*/
5898 0, /*tp_as_sequence*/
5899 0, /*tp_as_mapping*/
5900 0, /*tp_hash*/
5901 0, /*tp_call*/
5902 0, /*tp_str*/
5903 0, /*tp_getattro*/
5904 0, /*tp_setattro*/
5905 0, /*tp_as_buffer*/
5906 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5907 Unpickler_doc, /*tp_doc*/
5908 (traverseproc)Unpickler_traverse, /*tp_traverse*/
5909 (inquiry)Unpickler_clear, /*tp_clear*/
5910 0, /*tp_richcompare*/
5911 0, /*tp_weaklistoffset*/
5912 0, /*tp_iter*/
5913 0, /*tp_iternext*/
5914 Unpickler_methods, /*tp_methods*/
5915 0, /*tp_members*/
5916 Unpickler_getsets, /*tp_getset*/
5917 0, /*tp_base*/
5918 0, /*tp_dict*/
5919 0, /*tp_descr_get*/
5920 0, /*tp_descr_set*/
5921 0, /*tp_dictoffset*/
5922 (initproc)Unpickler_init, /*tp_init*/
5923 PyType_GenericAlloc, /*tp_alloc*/
5924 PyType_GenericNew, /*tp_new*/
5925 PyObject_GC_Del, /*tp_free*/
5926 0, /*tp_is_gc*/
5927};
5928
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005929PyDoc_STRVAR(pickle_dump_doc,
5930"dump(obj, file, protocol=None, *, fix_imports=True) -> None\n"
5931"\n"
5932"Write a pickled representation of obj to the open file object file. This\n"
5933"is equivalent to ``Pickler(file, protocol).dump(obj)``, but may be more\n"
5934"efficient.\n"
5935"\n"
5936"The optional protocol argument tells the pickler to use the given protocol;\n"
5937"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
5938"backward-incompatible protocol designed for Python 3.0.\n"
5939"\n"
5940"Specifying a negative protocol version selects the highest protocol version\n"
5941"supported. The higher the protocol used, the more recent the version of\n"
5942"Python needed to read the pickle produced.\n"
5943"\n"
5944"The file argument must have a write() method that accepts a single bytes\n"
5945"argument. It can thus be a file object opened for binary writing, a\n"
5946"io.BytesIO instance, or any other custom object that meets this interface.\n"
5947"\n"
5948"If fix_imports is True and protocol is less than 3, pickle will try to\n"
5949"map the new Python 3.x names to the old module names used in Python 2.x,\n"
5950"so that the pickle data stream is readable with Python 2.x.\n");
5951
5952static PyObject *
5953pickle_dump(PyObject *self, PyObject *args, PyObject *kwds)
5954{
5955 static char *kwlist[] = {"obj", "file", "protocol", "fix_imports", 0};
5956 PyObject *obj;
5957 PyObject *file;
5958 PyObject *proto = NULL;
5959 PyObject *fix_imports = Py_True;
5960 PicklerObject *pickler;
5961
5962 /* fix_imports is a keyword-only argument. */
5963 if (Py_SIZE(args) > 3) {
5964 PyErr_Format(PyExc_TypeError,
5965 "pickle.dump() takes at most 3 positional "
5966 "argument (%zd given)", Py_SIZE(args));
5967 return NULL;
5968 }
5969
5970 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|OO:dump", kwlist,
5971 &obj, &file, &proto, &fix_imports))
5972 return NULL;
5973
5974 pickler = _Pickler_New();
5975 if (pickler == NULL)
5976 return NULL;
5977
5978 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
5979 goto error;
5980
5981 if (_Pickler_SetOutputStream(pickler, file) < 0)
5982 goto error;
5983
5984 if (dump(pickler, obj) < 0)
5985 goto error;
5986
5987 if (_Pickler_FlushToFile(pickler) < 0)
5988 goto error;
5989
5990 Py_DECREF(pickler);
5991 Py_RETURN_NONE;
5992
5993 error:
5994 Py_XDECREF(pickler);
5995 return NULL;
5996}
5997
5998PyDoc_STRVAR(pickle_dumps_doc,
5999"dumps(obj, protocol=None, *, fix_imports=True) -> bytes\n"
6000"\n"
6001"Return the pickled representation of the object as a bytes\n"
6002"object, instead of writing it to a file.\n"
6003"\n"
6004"The optional protocol argument tells the pickler to use the given protocol;\n"
6005"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
6006"backward-incompatible protocol designed for Python 3.0.\n"
6007"\n"
6008"Specifying a negative protocol version selects the highest protocol version\n"
6009"supported. The higher the protocol used, the more recent the version of\n"
6010"Python needed to read the pickle produced.\n"
6011"\n"
6012"If fix_imports is True and *protocol* is less than 3, pickle will try to\n"
6013"map the new Python 3.x names to the old module names used in Python 2.x,\n"
6014"so that the pickle data stream is readable with Python 2.x.\n");
6015
6016static PyObject *
6017pickle_dumps(PyObject *self, PyObject *args, PyObject *kwds)
6018{
6019 static char *kwlist[] = {"obj", "protocol", "fix_imports", 0};
6020 PyObject *obj;
6021 PyObject *proto = NULL;
6022 PyObject *result;
6023 PyObject *fix_imports = Py_True;
6024 PicklerObject *pickler;
6025
6026 /* fix_imports is a keyword-only argument. */
6027 if (Py_SIZE(args) > 2) {
6028 PyErr_Format(PyExc_TypeError,
6029 "pickle.dumps() takes at most 2 positional "
6030 "argument (%zd given)", Py_SIZE(args));
6031 return NULL;
6032 }
6033
6034 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:dumps", kwlist,
6035 &obj, &proto, &fix_imports))
6036 return NULL;
6037
6038 pickler = _Pickler_New();
6039 if (pickler == NULL)
6040 return NULL;
6041
6042 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
6043 goto error;
6044
6045 if (dump(pickler, obj) < 0)
6046 goto error;
6047
6048 result = _Pickler_GetString(pickler);
6049 Py_DECREF(pickler);
6050 return result;
6051
6052 error:
6053 Py_XDECREF(pickler);
6054 return NULL;
6055}
6056
6057PyDoc_STRVAR(pickle_load_doc,
6058"load(file, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6059"\n"
6060"Read a pickled object representation from the open file object file and\n"
6061"return the reconstituted object hierarchy specified therein. This is\n"
6062"equivalent to ``Unpickler(file).load()``, but may be more efficient.\n"
6063"\n"
6064"The protocol version of the pickle is detected automatically, so no protocol\n"
6065"argument is needed. Bytes past the pickled object's representation are\n"
6066"ignored.\n"
6067"\n"
6068"The argument file must have two methods, a read() method that takes an\n"
6069"integer argument, and a readline() method that requires no arguments. Both\n"
6070"methods should return bytes. Thus *file* can be a binary file object opened\n"
6071"for reading, a BytesIO object, or any other custom object that meets this\n"
6072"interface.\n"
6073"\n"
6074"Optional keyword arguments are fix_imports, encoding and errors,\n"
6075"which are used to control compatiblity support for pickle stream generated\n"
6076"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6077"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6078"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6079"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6080
6081static PyObject *
6082pickle_load(PyObject *self, PyObject *args, PyObject *kwds)
6083{
6084 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
6085 PyObject *file;
6086 PyObject *fix_imports = Py_True;
6087 PyObject *result;
6088 char *encoding = NULL;
6089 char *errors = NULL;
6090 UnpicklerObject *unpickler;
6091
6092 /* fix_imports, encoding and errors are a keyword-only argument. */
6093 if (Py_SIZE(args) != 1) {
6094 PyErr_Format(PyExc_TypeError,
6095 "pickle.load() takes exactly one positional "
6096 "argument (%zd given)", Py_SIZE(args));
6097 return NULL;
6098 }
6099
6100 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:load", kwlist,
6101 &file, &fix_imports, &encoding, &errors))
6102 return NULL;
6103
6104 unpickler = _Unpickler_New();
6105 if (unpickler == NULL)
6106 return NULL;
6107
6108 if (_Unpickler_SetInputStream(unpickler, file) < 0)
6109 goto error;
6110
6111 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6112 goto error;
6113
6114 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6115 if (unpickler->fix_imports == -1)
6116 goto error;
6117
6118 result = load(unpickler);
6119 Py_DECREF(unpickler);
6120 return result;
6121
6122 error:
6123 Py_XDECREF(unpickler);
6124 return NULL;
6125}
6126
6127PyDoc_STRVAR(pickle_loads_doc,
6128"loads(input, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6129"\n"
6130"Read a pickled object hierarchy from a bytes object and return the\n"
6131"reconstituted object hierarchy specified therein\n"
6132"\n"
6133"The protocol version of the pickle is detected automatically, so no protocol\n"
6134"argument is needed. Bytes past the pickled object's representation are\n"
6135"ignored.\n"
6136"\n"
6137"Optional keyword arguments are fix_imports, encoding and errors, which\n"
6138"are used to control compatiblity support for pickle stream generated\n"
6139"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6140"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6141"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6142"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6143
6144static PyObject *
6145pickle_loads(PyObject *self, PyObject *args, PyObject *kwds)
6146{
6147 static char *kwlist[] = {"input", "fix_imports", "encoding", "errors", 0};
6148 PyObject *input;
6149 PyObject *fix_imports = Py_True;
6150 PyObject *result;
6151 char *encoding = NULL;
6152 char *errors = NULL;
6153 UnpicklerObject *unpickler;
6154
6155 /* fix_imports, encoding and errors are a keyword-only argument. */
6156 if (Py_SIZE(args) != 1) {
6157 PyErr_Format(PyExc_TypeError,
6158 "pickle.loads() takes exactly one positional "
6159 "argument (%zd given)", Py_SIZE(args));
6160 return NULL;
6161 }
6162
6163 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:loads", kwlist,
6164 &input, &fix_imports, &encoding, &errors))
6165 return NULL;
6166
6167 unpickler = _Unpickler_New();
6168 if (unpickler == NULL)
6169 return NULL;
6170
6171 if (_Unpickler_SetStringInput(unpickler, input) < 0)
6172 goto error;
6173
6174 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6175 goto error;
6176
6177 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6178 if (unpickler->fix_imports == -1)
6179 goto error;
6180
6181 result = load(unpickler);
6182 Py_DECREF(unpickler);
6183 return result;
6184
6185 error:
6186 Py_XDECREF(unpickler);
6187 return NULL;
6188}
6189
6190
6191static struct PyMethodDef pickle_methods[] = {
6192 {"dump", (PyCFunction)pickle_dump, METH_VARARGS|METH_KEYWORDS,
6193 pickle_dump_doc},
6194 {"dumps", (PyCFunction)pickle_dumps, METH_VARARGS|METH_KEYWORDS,
6195 pickle_dumps_doc},
6196 {"load", (PyCFunction)pickle_load, METH_VARARGS|METH_KEYWORDS,
6197 pickle_load_doc},
6198 {"loads", (PyCFunction)pickle_loads, METH_VARARGS|METH_KEYWORDS,
6199 pickle_loads_doc},
6200 {NULL, NULL} /* sentinel */
6201};
6202
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006203static int
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006204initmodule(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006205{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006206 PyObject *copyreg = NULL;
6207 PyObject *compat_pickle = NULL;
6208
6209 /* XXX: We should ensure that the types of the dictionaries imported are
6210 exactly PyDict objects. Otherwise, it is possible to crash the pickle
6211 since we use the PyDict API directly to access these dictionaries. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006212
6213 copyreg = PyImport_ImportModule("copyreg");
6214 if (!copyreg)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006215 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006216 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
6217 if (!dispatch_table)
6218 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006219 extension_registry = \
6220 PyObject_GetAttrString(copyreg, "_extension_registry");
6221 if (!extension_registry)
6222 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006223 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
6224 if (!inverted_registry)
6225 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006226 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
6227 if (!extension_cache)
6228 goto error;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006229 Py_CLEAR(copyreg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006230
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006231 /* Load the 2.x -> 3.x stdlib module mapping tables */
6232 compat_pickle = PyImport_ImportModule("_compat_pickle");
6233 if (!compat_pickle)
6234 goto error;
6235 name_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
6236 if (!name_mapping_2to3)
6237 goto error;
6238 if (!PyDict_CheckExact(name_mapping_2to3)) {
6239 PyErr_Format(PyExc_RuntimeError,
6240 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
6241 Py_TYPE(name_mapping_2to3)->tp_name);
6242 goto error;
6243 }
6244 import_mapping_2to3 = PyObject_GetAttrString(compat_pickle,
6245 "IMPORT_MAPPING");
6246 if (!import_mapping_2to3)
6247 goto error;
6248 if (!PyDict_CheckExact(import_mapping_2to3)) {
6249 PyErr_Format(PyExc_RuntimeError,
6250 "_compat_pickle.IMPORT_MAPPING should be a dict, "
6251 "not %.200s", Py_TYPE(import_mapping_2to3)->tp_name);
6252 goto error;
6253 }
6254 /* ... and the 3.x -> 2.x mapping tables */
6255 name_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6256 "REVERSE_NAME_MAPPING");
6257 if (!name_mapping_3to2)
6258 goto error;
6259 if (!PyDict_CheckExact(name_mapping_3to2)) {
6260 PyErr_Format(PyExc_RuntimeError,
Ezio Melotti13925002011-03-16 11:05:33 +02006261 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006262 "not %.200s", Py_TYPE(name_mapping_3to2)->tp_name);
6263 goto error;
6264 }
6265 import_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6266 "REVERSE_IMPORT_MAPPING");
6267 if (!import_mapping_3to2)
6268 goto error;
6269 if (!PyDict_CheckExact(import_mapping_3to2)) {
6270 PyErr_Format(PyExc_RuntimeError,
6271 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
6272 "not %.200s", Py_TYPE(import_mapping_3to2)->tp_name);
6273 goto error;
6274 }
6275 Py_CLEAR(compat_pickle);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006276
6277 empty_tuple = PyTuple_New(0);
6278 if (empty_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006279 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006280 two_tuple = PyTuple_New(2);
6281 if (two_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006282 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006283 /* We use this temp container with no regard to refcounts, or to
6284 * keeping containees alive. Exempt from GC, because we don't
6285 * want anything looking at two_tuple() by magic.
6286 */
6287 PyObject_GC_UnTrack(two_tuple);
6288
6289 return 0;
6290
6291 error:
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006292 Py_CLEAR(copyreg);
6293 Py_CLEAR(dispatch_table);
6294 Py_CLEAR(extension_registry);
6295 Py_CLEAR(inverted_registry);
6296 Py_CLEAR(extension_cache);
6297 Py_CLEAR(compat_pickle);
6298 Py_CLEAR(name_mapping_2to3);
6299 Py_CLEAR(import_mapping_2to3);
6300 Py_CLEAR(name_mapping_3to2);
6301 Py_CLEAR(import_mapping_3to2);
6302 Py_CLEAR(empty_tuple);
6303 Py_CLEAR(two_tuple);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006304 return -1;
6305}
6306
6307static struct PyModuleDef _picklemodule = {
6308 PyModuleDef_HEAD_INIT,
6309 "_pickle",
6310 pickle_module_doc,
6311 -1,
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006312 pickle_methods,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006313 NULL,
6314 NULL,
6315 NULL,
6316 NULL
6317};
6318
6319PyMODINIT_FUNC
6320PyInit__pickle(void)
6321{
6322 PyObject *m;
6323
6324 if (PyType_Ready(&Unpickler_Type) < 0)
6325 return NULL;
6326 if (PyType_Ready(&Pickler_Type) < 0)
6327 return NULL;
6328 if (PyType_Ready(&Pdata_Type) < 0)
6329 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006330 if (PyType_Ready(&PicklerMemoProxyType) < 0)
6331 return NULL;
6332 if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
6333 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006334
6335 /* Create the module and add the functions. */
6336 m = PyModule_Create(&_picklemodule);
6337 if (m == NULL)
6338 return NULL;
6339
Antoine Pitrou8391cf42011-07-15 21:01:21 +02006340 Py_INCREF(&Pickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006341 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
6342 return NULL;
Antoine Pitrou8391cf42011-07-15 21:01:21 +02006343 Py_INCREF(&Unpickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006344 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
6345 return NULL;
6346
6347 /* Initialize the exceptions. */
6348 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
6349 if (PickleError == NULL)
6350 return NULL;
6351 PicklingError = \
6352 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
6353 if (PicklingError == NULL)
6354 return NULL;
6355 UnpicklingError = \
6356 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
6357 if (UnpicklingError == NULL)
6358 return NULL;
6359
6360 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
6361 return NULL;
6362 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
6363 return NULL;
6364 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
6365 return NULL;
6366
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006367 if (initmodule() < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006368 return NULL;
6369
6370 return m;
6371}