blob: 287f0a3c15affa7eec03db24087406dc0c06b72e [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000013/* Pickle opcodes. These must be kept updated with pickle.py.
14 Extensive docs are in pickletools.py. */
15enum opcode {
16 MARK = '(',
17 STOP = '.',
18 POP = '0',
19 POP_MARK = '1',
20 DUP = '2',
21 FLOAT = 'F',
22 INT = 'I',
23 BININT = 'J',
24 BININT1 = 'K',
25 LONG = 'L',
26 BININT2 = 'M',
27 NONE = 'N',
28 PERSID = 'P',
29 BINPERSID = 'Q',
30 REDUCE = 'R',
31 STRING = 'S',
32 BINSTRING = 'T',
33 SHORT_BINSTRING = 'U',
34 UNICODE = 'V',
35 BINUNICODE = 'X',
36 APPEND = 'a',
37 BUILD = 'b',
38 GLOBAL = 'c',
39 DICT = 'd',
40 EMPTY_DICT = '}',
41 APPENDS = 'e',
42 GET = 'g',
43 BINGET = 'h',
44 INST = 'i',
45 LONG_BINGET = 'j',
46 LIST = 'l',
47 EMPTY_LIST = ']',
48 OBJ = 'o',
49 PUT = 'p',
50 BINPUT = 'q',
51 LONG_BINPUT = 'r',
52 SETITEM = 's',
53 TUPLE = 't',
54 EMPTY_TUPLE = ')',
55 SETITEMS = 'u',
56 BINFLOAT = 'G',
57
58 /* Protocol 2. */
59 PROTO = '\x80',
60 NEWOBJ = '\x81',
61 EXT1 = '\x82',
62 EXT2 = '\x83',
63 EXT4 = '\x84',
64 TUPLE1 = '\x85',
65 TUPLE2 = '\x86',
66 TUPLE3 = '\x87',
67 NEWTRUE = '\x88',
68 NEWFALSE = '\x89',
69 LONG1 = '\x8a',
70 LONG4 = '\x8b',
71
72 /* Protocol 3 (Python 3.x) */
73 BINBYTES = 'B',
Victor Stinner132ef6c2010-11-09 09:39:41 +000074 SHORT_BINBYTES = 'C'
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000075};
76
77/* These aren't opcodes -- they're ways to pickle bools before protocol 2
78 * so that unpicklers written before bools were introduced unpickle them
79 * as ints, but unpicklers after can recognize that bools were intended.
80 * Note that protocol 2 added direct ways to pickle bools.
81 */
82#undef TRUE
83#define TRUE "I01\n"
84#undef FALSE
85#define FALSE "I00\n"
86
87enum {
88 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
89 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
90 break if this gets out of synch with pickle.py, but it's unclear that would
91 help anything either. */
92 BATCHSIZE = 1000,
93
94 /* Nesting limit until Pickler, when running in "fast mode", starts
95 checking for self-referential data-structures. */
96 FAST_NESTING_LIMIT = 50,
97
Antoine Pitrouea99c5c2010-09-09 18:33:21 +000098 /* Initial size of the write buffer of Pickler. */
99 WRITE_BUF_SIZE = 4096,
100
101 /* Maximum size of the write buffer of Pickler when pickling to a
102 stream. This is ignored for in-memory pickling. */
103 MAX_WRITE_BUF_SIZE = 64 * 1024,
Antoine Pitrou04248a82010-10-12 20:51:21 +0000104
105 /* Prefetch size when unpickling (disabled on unpeekable streams) */
Victor Stinner132ef6c2010-11-09 09:39:41 +0000106 PREFETCH = 8192 * 16
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000107};
108
109/* Exception classes for pickle. These should override the ones defined in
110 pickle.py, when the C-optimized Pickler and Unpickler are used. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000111static PyObject *PickleError = NULL;
112static PyObject *PicklingError = NULL;
113static PyObject *UnpicklingError = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000114
115/* copyreg.dispatch_table, {type_object: pickling_function} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000116static PyObject *dispatch_table = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000117/* For EXT[124] opcodes. */
118/* copyreg._extension_registry, {(module_name, function_name): code} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000119static PyObject *extension_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000120/* copyreg._inverted_registry, {code: (module_name, function_name)} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000121static PyObject *inverted_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000122/* copyreg._extension_cache, {code: object} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000123static PyObject *extension_cache = NULL;
124
125/* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */
126static PyObject *name_mapping_2to3 = NULL;
127/* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
128static PyObject *import_mapping_2to3 = NULL;
129/* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
130static PyObject *name_mapping_3to2 = NULL;
131static PyObject *import_mapping_3to2 = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000132
133/* XXX: Are these really nescessary? */
134/* As the name says, an empty tuple. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000135static PyObject *empty_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000136/* For looking up name pairs in copyreg._extension_registry. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000137static PyObject *two_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000138
139static int
140stack_underflow(void)
141{
142 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
143 return -1;
144}
145
146/* Internal data type used as the unpickling stack. */
147typedef struct {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000148 PyObject_VAR_HEAD
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000149 PyObject **data;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000150 Py_ssize_t allocated; /* number of slots in data allocated */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000151} Pdata;
152
153static void
154Pdata_dealloc(Pdata *self)
155{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000156 int i = Py_SIZE(self);
157 while (--i >= 0) {
158 Py_DECREF(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000159 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000160 PyMem_FREE(self->data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000161 PyObject_Del(self);
162}
163
164static PyTypeObject Pdata_Type = {
165 PyVarObject_HEAD_INIT(NULL, 0)
166 "_pickle.Pdata", /*tp_name*/
167 sizeof(Pdata), /*tp_basicsize*/
168 0, /*tp_itemsize*/
169 (destructor)Pdata_dealloc, /*tp_dealloc*/
170};
171
172static PyObject *
173Pdata_New(void)
174{
175 Pdata *self;
176
177 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
178 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000179 Py_SIZE(self) = 0;
180 self->allocated = 8;
181 self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000182 if (self->data)
183 return (PyObject *)self;
184 Py_DECREF(self);
185 return PyErr_NoMemory();
186}
187
188
189/* Retain only the initial clearto items. If clearto >= the current
190 * number of items, this is a (non-erroneous) NOP.
191 */
192static int
193Pdata_clear(Pdata *self, int clearto)
194{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000195 int i = Py_SIZE(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000196
197 if (clearto < 0)
198 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000199 if (clearto >= i)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000200 return 0;
201
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000202 while (--i >= clearto) {
203 Py_CLEAR(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000204 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000205 Py_SIZE(self) = clearto;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000206 return 0;
207}
208
209static int
210Pdata_grow(Pdata *self)
211{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000212 PyObject **data = self->data;
213 Py_ssize_t allocated = self->allocated;
214 Py_ssize_t new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000215
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000216 new_allocated = (allocated >> 3) + 6;
217 /* check for integer overflow */
218 if (new_allocated > PY_SSIZE_T_MAX - allocated)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000219 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000220 new_allocated += allocated;
221 if (new_allocated > (PY_SSIZE_T_MAX / sizeof(PyObject *)))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000222 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000223 data = PyMem_REALLOC(data, new_allocated * sizeof(PyObject *));
224 if (data == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000225 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000226
227 self->data = data;
228 self->allocated = new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000229 return 0;
230
231 nomemory:
232 PyErr_NoMemory();
233 return -1;
234}
235
236/* D is a Pdata*. Pop the topmost element and store it into V, which
237 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
238 * is raised and V is set to NULL.
239 */
240static PyObject *
241Pdata_pop(Pdata *self)
242{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000243 if (Py_SIZE(self) == 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000244 PyErr_SetString(UnpicklingError, "bad pickle data");
245 return NULL;
246 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000247 return self->data[--Py_SIZE(self)];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000248}
249#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
250
251static int
252Pdata_push(Pdata *self, PyObject *obj)
253{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000254 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000255 return -1;
256 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000257 self->data[Py_SIZE(self)++] = obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000258 return 0;
259}
260
261/* Push an object on stack, transferring its ownership to the stack. */
262#define PDATA_PUSH(D, O, ER) do { \
263 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
264
265/* Push an object on stack, adding a new reference to the object. */
266#define PDATA_APPEND(D, O, ER) do { \
267 Py_INCREF((O)); \
268 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
269
270static PyObject *
271Pdata_poptuple(Pdata *self, Py_ssize_t start)
272{
273 PyObject *tuple;
274 Py_ssize_t len, i, j;
275
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000276 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000277 tuple = PyTuple_New(len);
278 if (tuple == NULL)
279 return NULL;
280 for (i = start, j = 0; j < len; i++, j++)
281 PyTuple_SET_ITEM(tuple, j, self->data[i]);
282
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000283 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000284 return tuple;
285}
286
287static PyObject *
288Pdata_poplist(Pdata *self, Py_ssize_t start)
289{
290 PyObject *list;
291 Py_ssize_t len, i, j;
292
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000293 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000294 list = PyList_New(len);
295 if (list == NULL)
296 return NULL;
297 for (i = start, j = 0; j < len; i++, j++)
298 PyList_SET_ITEM(list, j, self->data[i]);
299
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000300 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000301 return list;
302}
303
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000304typedef struct {
305 PyObject *me_key;
306 long me_value;
307} PyMemoEntry;
308
309typedef struct {
310 Py_ssize_t mt_mask;
311 Py_ssize_t mt_used;
312 Py_ssize_t mt_allocated;
313 PyMemoEntry *mt_table;
314} PyMemoTable;
315
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000316typedef struct PicklerObject {
317 PyObject_HEAD
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000318 PyMemoTable *memo; /* Memo table, keep track of the seen
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000319 objects to support self-referential objects
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000320 pickling. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000321 PyObject *pers_func; /* persistent_id() method, can be NULL */
322 PyObject *arg;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000323
324 PyObject *write; /* write() method of the output stream. */
325 PyObject *output_buffer; /* Write into a local bytearray buffer before
326 flushing to the stream. */
327 Py_ssize_t output_len; /* Length of output_buffer. */
328 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000329 int proto; /* Pickle protocol number, >= 0 */
330 int bin; /* Boolean, true if proto > 0 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000331 int buf_size; /* Size of the current buffered pickle data */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000332 int fast; /* Enable fast mode if set to a true value.
333 The fast mode disable the usage of memo,
334 therefore speeding the pickling process by
335 not generating superfluous PUT opcodes. It
336 should not be used if with self-referential
337 objects. */
338 int fast_nesting;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000339 int fix_imports; /* Indicate whether Pickler should fix
340 the name of globals for Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000341 PyObject *fast_memo;
342} PicklerObject;
343
344typedef struct UnpicklerObject {
345 PyObject_HEAD
346 Pdata *stack; /* Pickle data stack, store unpickled objects. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000347
348 /* The unpickler memo is just an array of PyObject *s. Using a dict
349 is unnecessary, since the keys are contiguous ints. */
350 PyObject **memo;
351 Py_ssize_t memo_size;
352
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000353 PyObject *arg;
354 PyObject *pers_func; /* persistent_load() method, can be NULL. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000355
356 Py_buffer buffer;
357 char *input_buffer;
358 char *input_line;
359 Py_ssize_t input_len;
360 Py_ssize_t next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000361 Py_ssize_t prefetched_idx; /* index of first prefetched byte */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000362 PyObject *read; /* read() method of the input stream. */
363 PyObject *readline; /* readline() method of the input stream. */
Antoine Pitrou04248a82010-10-12 20:51:21 +0000364 PyObject *peek; /* peek() method of the input stream, or NULL */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000365
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000366 char *encoding; /* Name of the encoding to be used for
367 decoding strings pickled using Python
368 2.x. The default value is "ASCII" */
369 char *errors; /* Name of errors handling scheme to used when
370 decoding strings. The default value is
371 "strict". */
372 int *marks; /* Mark stack, used for unpickling container
373 objects. */
374 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
375 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000376 int proto; /* Protocol of the pickle loaded. */
377 int fix_imports; /* Indicate whether Unpickler should fix
378 the name of globals pickled by Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000379} UnpicklerObject;
380
381/* Forward declarations */
382static int save(PicklerObject *, PyObject *, int);
383static int save_reduce(PicklerObject *, PyObject *, PyObject *);
384static PyTypeObject Pickler_Type;
385static PyTypeObject Unpickler_Type;
386
387
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000388/*************************************************************************
389 A custom hashtable mapping void* to longs. This is used by the pickler for
390 memoization. Using a custom hashtable rather than PyDict allows us to skip
391 a bunch of unnecessary object creation. This makes a huge performance
392 difference. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000393
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000394#define MT_MINSIZE 8
395#define PERTURB_SHIFT 5
396
397
398static PyMemoTable *
399PyMemoTable_New(void)
400{
401 PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
402 if (memo == NULL) {
403 PyErr_NoMemory();
404 return NULL;
405 }
406
407 memo->mt_used = 0;
408 memo->mt_allocated = MT_MINSIZE;
409 memo->mt_mask = MT_MINSIZE - 1;
410 memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
411 if (memo->mt_table == NULL) {
412 PyMem_FREE(memo);
413 PyErr_NoMemory();
414 return NULL;
415 }
416 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
417
418 return memo;
419}
420
421static PyMemoTable *
422PyMemoTable_Copy(PyMemoTable *self)
423{
424 Py_ssize_t i;
425 PyMemoTable *new = PyMemoTable_New();
426 if (new == NULL)
427 return NULL;
428
429 new->mt_used = self->mt_used;
430 new->mt_allocated = self->mt_allocated;
431 new->mt_mask = self->mt_mask;
432 /* The table we get from _New() is probably smaller than we wanted.
433 Free it and allocate one that's the right size. */
434 PyMem_FREE(new->mt_table);
435 new->mt_table = PyMem_MALLOC(self->mt_allocated * sizeof(PyMemoEntry));
436 if (new->mt_table == NULL) {
437 PyMem_FREE(new);
438 return NULL;
439 }
440 for (i = 0; i < self->mt_allocated; i++) {
441 Py_XINCREF(self->mt_table[i].me_key);
442 }
443 memcpy(new->mt_table, self->mt_table,
444 sizeof(PyMemoEntry) * self->mt_allocated);
445
446 return new;
447}
448
449static Py_ssize_t
450PyMemoTable_Size(PyMemoTable *self)
451{
452 return self->mt_used;
453}
454
455static int
456PyMemoTable_Clear(PyMemoTable *self)
457{
458 Py_ssize_t i = self->mt_allocated;
459
460 while (--i >= 0) {
461 Py_XDECREF(self->mt_table[i].me_key);
462 }
463 self->mt_used = 0;
464 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
465 return 0;
466}
467
468static void
469PyMemoTable_Del(PyMemoTable *self)
470{
471 if (self == NULL)
472 return;
473 PyMemoTable_Clear(self);
474
475 PyMem_FREE(self->mt_table);
476 PyMem_FREE(self);
477}
478
479/* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
480 can be considerably simpler than dictobject.c's lookdict(). */
481static PyMemoEntry *
482_PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
483{
484 size_t i;
485 size_t perturb;
486 size_t mask = (size_t)self->mt_mask;
487 PyMemoEntry *table = self->mt_table;
488 PyMemoEntry *entry;
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000489 Py_hash_t hash = (Py_hash_t)key >> 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000490
491 i = hash & mask;
492 entry = &table[i];
493 if (entry->me_key == NULL || entry->me_key == key)
494 return entry;
495
496 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
497 i = (i << 2) + i + perturb + 1;
498 entry = &table[i & mask];
499 if (entry->me_key == NULL || entry->me_key == key)
500 return entry;
501 }
502 assert(0); /* Never reached */
503 return NULL;
504}
505
506/* Returns -1 on failure, 0 on success. */
507static int
508_PyMemoTable_ResizeTable(PyMemoTable *self, Py_ssize_t min_size)
509{
510 PyMemoEntry *oldtable = NULL;
511 PyMemoEntry *oldentry, *newentry;
512 Py_ssize_t new_size = MT_MINSIZE;
513 Py_ssize_t to_process;
514
515 assert(min_size > 0);
516
517 /* Find the smallest valid table size >= min_size. */
518 while (new_size < min_size && new_size > 0)
519 new_size <<= 1;
520 if (new_size <= 0) {
521 PyErr_NoMemory();
522 return -1;
523 }
524 /* new_size needs to be a power of two. */
525 assert((new_size & (new_size - 1)) == 0);
526
527 /* Allocate new table. */
528 oldtable = self->mt_table;
529 self->mt_table = PyMem_MALLOC(new_size * sizeof(PyMemoEntry));
530 if (self->mt_table == NULL) {
531 PyMem_FREE(oldtable);
532 PyErr_NoMemory();
533 return -1;
534 }
535 self->mt_allocated = new_size;
536 self->mt_mask = new_size - 1;
537 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
538
539 /* Copy entries from the old table. */
540 to_process = self->mt_used;
541 for (oldentry = oldtable; to_process > 0; oldentry++) {
542 if (oldentry->me_key != NULL) {
543 to_process--;
544 /* newentry is a pointer to a chunk of the new
545 mt_table, so we're setting the key:value pair
546 in-place. */
547 newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
548 newentry->me_key = oldentry->me_key;
549 newentry->me_value = oldentry->me_value;
550 }
551 }
552
553 /* Deallocate the old table. */
554 PyMem_FREE(oldtable);
555 return 0;
556}
557
558/* Returns NULL on failure, a pointer to the value otherwise. */
559static long *
560PyMemoTable_Get(PyMemoTable *self, PyObject *key)
561{
562 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
563 if (entry->me_key == NULL)
564 return NULL;
565 return &entry->me_value;
566}
567
568/* Returns -1 on failure, 0 on success. */
569static int
570PyMemoTable_Set(PyMemoTable *self, PyObject *key, long value)
571{
572 PyMemoEntry *entry;
573
574 assert(key != NULL);
575
576 entry = _PyMemoTable_Lookup(self, key);
577 if (entry->me_key != NULL) {
578 entry->me_value = value;
579 return 0;
580 }
581 Py_INCREF(key);
582 entry->me_key = key;
583 entry->me_value = value;
584 self->mt_used++;
585
586 /* If we added a key, we can safely resize. Otherwise just return!
587 * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
588 *
589 * Quadrupling the size improves average table sparseness
590 * (reducing collisions) at the cost of some memory. It also halves
591 * the number of expensive resize operations in a growing memo table.
592 *
593 * Very large memo tables (over 50K items) use doubling instead.
594 * This may help applications with severe memory constraints.
595 */
596 if (!(self->mt_used * 3 >= (self->mt_mask + 1) * 2))
597 return 0;
598 return _PyMemoTable_ResizeTable(self,
599 (self->mt_used > 50000 ? 2 : 4) * self->mt_used);
600}
601
602#undef MT_MINSIZE
603#undef PERTURB_SHIFT
604
605/*************************************************************************/
606
607/* Helpers for creating the argument tuple passed to functions. This has the
608 performance advantage of calling PyTuple_New() only once.
609
610 XXX(avassalotti): Inline directly in _Pickler_FastCall() and
611 _Unpickler_FastCall(). */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000612#define ARG_TUP(self, obj) do { \
613 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
614 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
615 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
616 } \
617 else { \
618 Py_DECREF((obj)); \
619 } \
620 } while (0)
621
622#define FREE_ARG_TUP(self) do { \
623 if ((self)->arg->ob_refcnt > 1) \
624 Py_CLEAR((self)->arg); \
625 } while (0)
626
627/* A temporary cleaner API for fast single argument function call.
628
629 XXX: Does caching the argument tuple provides any real performance benefits?
630
631 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
632 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
633 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
634 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
635 (i.e, call PyTuple_New() and store the returned value in an array), to save
636 one second (wall clock time). Either ways, the loading time a pickle stream
637 large enough to generate this number of calls would be massively
638 overwhelmed by other factors, like I/O throughput, the GC traversal and
639 object allocation overhead. So, I really doubt these functions provide any
640 real benefits.
641
642 On the other hand, oprofile reports that pickle spends a lot of time in
643 these functions. But, that is probably more related to the function call
644 overhead, than the argument tuple allocation.
645
646 XXX: And, what is the reference behavior of these? Steal, borrow? At first
647 glance, it seems to steal the reference of 'arg' and borrow the reference
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000648 of 'func'. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000649static PyObject *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000650_Pickler_FastCall(PicklerObject *self, PyObject *func, PyObject *arg)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000651{
652 PyObject *result = NULL;
653
654 ARG_TUP(self, arg);
655 if (self->arg) {
656 result = PyObject_Call(func, self->arg, NULL);
657 FREE_ARG_TUP(self);
658 }
659 return result;
660}
661
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000662static int
663_Pickler_ClearBuffer(PicklerObject *self)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000664{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000665 Py_CLEAR(self->output_buffer);
666 self->output_buffer =
667 PyBytes_FromStringAndSize(NULL, self->max_output_len);
668 if (self->output_buffer == NULL)
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000669 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000670 self->output_len = 0;
671 return 0;
672}
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000673
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000674static PyObject *
675_Pickler_GetString(PicklerObject *self)
676{
677 PyObject *output_buffer = self->output_buffer;
678
679 assert(self->output_buffer != NULL);
680 self->output_buffer = NULL;
681 /* Resize down to exact size */
682 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
683 return NULL;
684 return output_buffer;
685}
686
687static int
688_Pickler_FlushToFile(PicklerObject *self)
689{
690 PyObject *output, *result;
691
692 assert(self->write != NULL);
693
694 output = _Pickler_GetString(self);
695 if (output == NULL)
696 return -1;
697
698 result = _Pickler_FastCall(self, self->write, output);
699 Py_XDECREF(result);
700 return (result == NULL) ? -1 : 0;
701}
702
703static int
704_Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t n)
705{
706 Py_ssize_t i, required;
707 char *buffer;
708
709 assert(s != NULL);
710
711 required = self->output_len + n;
712 if (required > self->max_output_len) {
713 if (self->write != NULL && required > MAX_WRITE_BUF_SIZE) {
714 /* XXX This reallocates a new buffer every time, which is a bit
715 wasteful. */
716 if (_Pickler_FlushToFile(self) < 0)
717 return -1;
718 if (_Pickler_ClearBuffer(self) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000719 return -1;
720 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000721 if (self->write != NULL && n > MAX_WRITE_BUF_SIZE) {
722 /* we already flushed above, so the buffer is empty */
723 PyObject *result;
724 /* XXX we could spare an intermediate copy and pass
725 a memoryview instead */
726 PyObject *output = PyBytes_FromStringAndSize(s, n);
727 if (s == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000728 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000729 result = _Pickler_FastCall(self, self->write, output);
730 Py_XDECREF(result);
731 return (result == NULL) ? -1 : 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000732 }
733 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000734 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
735 PyErr_NoMemory();
736 return -1;
737 }
738 self->max_output_len = (self->output_len + n) * 2;
739 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
740 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000741 }
742 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000743 buffer = PyBytes_AS_STRING(self->output_buffer);
744 if (n < 8) {
745 /* This is faster than memcpy when the string is short. */
746 for (i = 0; i < n; i++) {
747 buffer[self->output_len + i] = s[i];
748 }
749 }
750 else {
751 memcpy(buffer + self->output_len, s, n);
752 }
753 self->output_len += n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000754 return n;
755}
756
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000757static PicklerObject *
758_Pickler_New(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000759{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000760 PicklerObject *self;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000761
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000762 self = PyObject_GC_New(PicklerObject, &Pickler_Type);
763 if (self == NULL)
764 return NULL;
765
766 self->pers_func = NULL;
767 self->arg = NULL;
768 self->write = NULL;
769 self->proto = 0;
770 self->bin = 0;
771 self->fast = 0;
772 self->fast_nesting = 0;
773 self->fix_imports = 0;
774 self->fast_memo = NULL;
775
776 self->memo = PyMemoTable_New();
777 if (self->memo == NULL) {
778 Py_DECREF(self);
779 return NULL;
780 }
781 self->max_output_len = WRITE_BUF_SIZE;
782 self->output_len = 0;
783 self->output_buffer = PyBytes_FromStringAndSize(NULL,
784 self->max_output_len);
785 if (self->output_buffer == NULL) {
786 Py_DECREF(self);
787 return NULL;
788 }
789 return self;
790}
791
792static int
793_Pickler_SetProtocol(PicklerObject *self, PyObject *proto_obj,
794 PyObject *fix_imports_obj)
795{
796 long proto = 0;
797 int fix_imports;
798
799 if (proto_obj == NULL || proto_obj == Py_None)
800 proto = DEFAULT_PROTOCOL;
801 else {
802 proto = PyLong_AsLong(proto_obj);
803 if (proto == -1 && PyErr_Occurred())
804 return -1;
805 }
806 if (proto < 0)
807 proto = HIGHEST_PROTOCOL;
808 if (proto > HIGHEST_PROTOCOL) {
809 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
810 HIGHEST_PROTOCOL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000811 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000812 }
813 fix_imports = PyObject_IsTrue(fix_imports_obj);
814 if (fix_imports == -1)
815 return -1;
816
817 self->proto = proto;
818 self->bin = proto > 0;
819 self->fix_imports = fix_imports && proto < 3;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000820
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000821 return 0;
822}
823
824/* Returns -1 (with an exception set) on failure, 0 on success. This may
825 be called once on a freshly created Pickler. */
826static int
827_Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
828{
829 assert(file != NULL);
830 self->write = PyObject_GetAttrString(file, "write");
831 if (self->write == NULL) {
832 if (PyErr_ExceptionMatches(PyExc_AttributeError))
833 PyErr_SetString(PyExc_TypeError,
834 "file must have a 'write' attribute");
835 return -1;
836 }
837
838 return 0;
839}
840
841/* See documentation for _Pickler_FastCall(). */
842static PyObject *
843_Unpickler_FastCall(UnpicklerObject *self, PyObject *func, PyObject *arg)
844{
845 PyObject *result = NULL;
846
847 ARG_TUP(self, arg);
848 if (self->arg) {
849 result = PyObject_Call(func, self->arg, NULL);
850 FREE_ARG_TUP(self);
851 }
852 return result;
853}
854
855/* Returns the size of the input on success, -1 on failure. This takes its
856 own reference to `input`. */
857static Py_ssize_t
858_Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
859{
860 if (self->buffer.buf != NULL)
861 PyBuffer_Release(&self->buffer);
862 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
863 return -1;
864 self->input_buffer = self->buffer.buf;
865 self->input_len = self->buffer.len;
866 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000867 self->prefetched_idx = self->input_len;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000868 return self->input_len;
869}
870
Antoine Pitrou04248a82010-10-12 20:51:21 +0000871static int
872_Unpickler_SkipConsumed(UnpicklerObject *self)
873{
874 Py_ssize_t consumed = self->next_read_idx - self->prefetched_idx;
875
876 if (consumed > 0) {
877 PyObject *r;
878 assert(self->peek); /* otherwise we did something wrong */
879 /* This makes an useless copy... */
880 r = PyObject_CallFunction(self->read, "n", consumed);
881 if (r == NULL)
882 return -1;
883 Py_DECREF(r);
884 self->prefetched_idx = self->next_read_idx;
885 }
886 return 0;
887}
888
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000889static const Py_ssize_t READ_WHOLE_LINE = -1;
890
891/* If reading from a file, we need to only pull the bytes we need, since there
892 may be multiple pickle objects arranged contiguously in the same input
893 buffer.
894
895 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
896 bytes from the input stream/buffer.
897
898 Update the unpickler's input buffer with the newly-read data. Returns -1 on
899 failure; on success, returns the number of bytes read from the file.
900
901 On success, self->input_len will be 0; this is intentional so that when
902 unpickling from a file, the "we've run out of data" code paths will trigger,
903 causing the Unpickler to go back to the file for more data. Use the returned
904 size to tell you how much data you can process. */
905static Py_ssize_t
906_Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
907{
908 PyObject *data;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000909 Py_ssize_t read_size, prefetched_size = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000910
911 assert(self->read != NULL);
Antoine Pitrou04248a82010-10-12 20:51:21 +0000912
913 if (_Unpickler_SkipConsumed(self) < 0)
914 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000915
916 if (n == READ_WHOLE_LINE)
917 data = PyObject_Call(self->readline, empty_tuple, NULL);
918 else {
919 PyObject *len = PyLong_FromSsize_t(n);
920 if (len == NULL)
921 return -1;
922 data = _Unpickler_FastCall(self, self->read, len);
923 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000924 if (data == NULL)
925 return -1;
926
Antoine Pitrou04248a82010-10-12 20:51:21 +0000927 /* Prefetch some data without advancing the file pointer, if possible */
928 if (self->peek) {
929 PyObject *len, *prefetched;
930 len = PyLong_FromSsize_t(PREFETCH);
931 if (len == NULL) {
932 Py_DECREF(data);
933 return -1;
934 }
935 prefetched = _Unpickler_FastCall(self, self->peek, len);
936 if (prefetched == NULL) {
937 if (PyErr_ExceptionMatches(PyExc_NotImplementedError)) {
938 /* peek() is probably not supported by the given file object */
939 PyErr_Clear();
940 Py_CLEAR(self->peek);
941 }
942 else {
943 Py_DECREF(data);
944 return -1;
945 }
946 }
947 else {
948 assert(PyBytes_Check(prefetched));
949 prefetched_size = PyBytes_GET_SIZE(prefetched);
950 PyBytes_ConcatAndDel(&data, prefetched);
951 if (data == NULL)
952 return -1;
953 }
954 }
955
956 read_size = _Unpickler_SetStringInput(self, data) - prefetched_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000957 Py_DECREF(data);
Antoine Pitrou04248a82010-10-12 20:51:21 +0000958 self->prefetched_idx = read_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000959 return read_size;
960}
961
962/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
963
964 This should be used for all data reads, rather than accessing the unpickler's
965 input buffer directly. This method deals correctly with reading from input
966 streams, which the input buffer doesn't deal with.
967
968 Note that when reading from a file-like object, self->next_read_idx won't
969 be updated (it should remain at 0 for the entire unpickling process). You
970 should use this function's return value to know how many bytes you can
971 consume.
972
973 Returns -1 (with an exception set) on failure. On success, return the
974 number of chars read. */
975static Py_ssize_t
976_Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n)
977{
Antoine Pitrou04248a82010-10-12 20:51:21 +0000978 Py_ssize_t num_read;
979
Antoine Pitrou04248a82010-10-12 20:51:21 +0000980 if (self->next_read_idx + n <= self->input_len) {
981 *s = self->input_buffer + self->next_read_idx;
982 self->next_read_idx += n;
983 return n;
984 }
985 if (!self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000986 PyErr_Format(PyExc_EOFError, "Ran out of input");
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000987 return -1;
988 }
Antoine Pitrou04248a82010-10-12 20:51:21 +0000989 num_read = _Unpickler_ReadFromFile(self, n);
990 if (num_read < 0)
991 return -1;
992 if (num_read < n) {
993 PyErr_Format(PyExc_EOFError, "Ran out of input");
994 return -1;
995 }
996 *s = self->input_buffer;
997 self->next_read_idx = n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000998 return n;
999}
1000
1001static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001002_Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1003 char **result)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001004{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001005 char *input_line = PyMem_Realloc(self->input_line, len + 1);
1006 if (input_line == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001007 return -1;
1008
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001009 memcpy(input_line, line, len);
1010 input_line[len] = '\0';
1011 self->input_line = input_line;
1012 *result = self->input_line;
1013 return len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001014}
1015
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001016/* Read a line from the input stream/buffer. If we run off the end of the input
1017 before hitting \n, return the data we found.
1018
1019 Returns the number of chars read, or -1 on failure. */
1020static Py_ssize_t
1021_Unpickler_Readline(UnpicklerObject *self, char **result)
1022{
1023 Py_ssize_t i, num_read;
1024
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001025 for (i = self->next_read_idx; i < self->input_len; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001026 if (self->input_buffer[i] == '\n') {
1027 char *line_start = self->input_buffer + self->next_read_idx;
1028 num_read = i - self->next_read_idx + 1;
1029 self->next_read_idx = i + 1;
1030 return _Unpickler_CopyLine(self, line_start, num_read, result);
1031 }
1032 }
1033 if (self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001034 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1035 if (num_read < 0)
1036 return -1;
1037 *result = self->input_buffer;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001038 self->next_read_idx = num_read;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001039 return num_read;
1040 }
1041
1042 /* If we get here, we've run off the end of the input string. Return the
1043 remaining string and let the caller figure it out. */
1044 *result = self->input_buffer + self->next_read_idx;
1045 num_read = i - self->next_read_idx;
1046 self->next_read_idx = i;
1047 return num_read;
1048}
1049
1050/* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1051 will be modified in place. */
1052static int
1053_Unpickler_ResizeMemoList(UnpicklerObject *self, Py_ssize_t new_size)
1054{
1055 Py_ssize_t i;
1056 PyObject **memo;
1057
1058 assert(new_size > self->memo_size);
1059
1060 memo = PyMem_REALLOC(self->memo, new_size * sizeof(PyObject *));
1061 if (memo == NULL) {
1062 PyErr_NoMemory();
1063 return -1;
1064 }
1065 self->memo = memo;
1066 for (i = self->memo_size; i < new_size; i++)
1067 self->memo[i] = NULL;
1068 self->memo_size = new_size;
1069 return 0;
1070}
1071
1072/* Returns NULL if idx is out of bounds. */
1073static PyObject *
1074_Unpickler_MemoGet(UnpicklerObject *self, Py_ssize_t idx)
1075{
1076 if (idx < 0 || idx >= self->memo_size)
1077 return NULL;
1078
1079 return self->memo[idx];
1080}
1081
1082/* Returns -1 (with an exception set) on failure, 0 on success.
1083 This takes its own reference to `value`. */
1084static int
1085_Unpickler_MemoPut(UnpicklerObject *self, Py_ssize_t idx, PyObject *value)
1086{
1087 PyObject *old_item;
1088
1089 if (idx >= self->memo_size) {
1090 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1091 return -1;
1092 assert(idx < self->memo_size);
1093 }
1094 Py_INCREF(value);
1095 old_item = self->memo[idx];
1096 self->memo[idx] = value;
1097 Py_XDECREF(old_item);
1098 return 0;
1099}
1100
1101static PyObject **
1102_Unpickler_NewMemo(Py_ssize_t new_size)
1103{
1104 PyObject **memo = PyMem_MALLOC(new_size * sizeof(PyObject *));
1105 if (memo == NULL)
1106 return NULL;
1107 memset(memo, 0, new_size * sizeof(PyObject *));
1108 return memo;
1109}
1110
1111/* Free the unpickler's memo, taking care to decref any items left in it. */
1112static void
1113_Unpickler_MemoCleanup(UnpicklerObject *self)
1114{
1115 Py_ssize_t i;
1116 PyObject **memo = self->memo;
1117
1118 if (self->memo == NULL)
1119 return;
1120 self->memo = NULL;
1121 i = self->memo_size;
1122 while (--i >= 0) {
1123 Py_XDECREF(memo[i]);
1124 }
1125 PyMem_FREE(memo);
1126}
1127
1128static UnpicklerObject *
1129_Unpickler_New(void)
1130{
1131 UnpicklerObject *self;
1132
1133 self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1134 if (self == NULL)
1135 return NULL;
1136
1137 self->stack = (Pdata *)Pdata_New();
1138 if (self->stack == NULL) {
1139 Py_DECREF(self);
1140 return NULL;
1141 }
1142 memset(&self->buffer, 0, sizeof(Py_buffer));
1143
1144 self->memo_size = 32;
1145 self->memo = _Unpickler_NewMemo(self->memo_size);
1146 if (self->memo == NULL) {
1147 Py_DECREF(self);
1148 return NULL;
1149 }
1150
1151 self->arg = NULL;
1152 self->pers_func = NULL;
1153 self->input_buffer = NULL;
1154 self->input_line = NULL;
1155 self->input_len = 0;
1156 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001157 self->prefetched_idx = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001158 self->read = NULL;
1159 self->readline = NULL;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001160 self->peek = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001161 self->encoding = NULL;
1162 self->errors = NULL;
1163 self->marks = NULL;
1164 self->num_marks = 0;
1165 self->marks_size = 0;
1166 self->proto = 0;
1167 self->fix_imports = 0;
1168
1169 return self;
1170}
1171
1172/* Returns -1 (with an exception set) on failure, 0 on success. This may
1173 be called once on a freshly created Pickler. */
1174static int
1175_Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1176{
Antoine Pitrou04248a82010-10-12 20:51:21 +00001177 self->peek = PyObject_GetAttrString(file, "peek");
1178 if (self->peek == NULL) {
1179 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1180 PyErr_Clear();
1181 else
1182 return -1;
1183 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001184 self->read = PyObject_GetAttrString(file, "read");
1185 self->readline = PyObject_GetAttrString(file, "readline");
1186 if (self->readline == NULL || self->read == NULL) {
1187 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1188 PyErr_SetString(PyExc_TypeError,
1189 "file must have 'read' and 'readline' attributes");
1190 Py_CLEAR(self->read);
1191 Py_CLEAR(self->readline);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001192 Py_CLEAR(self->peek);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001193 return -1;
1194 }
1195 return 0;
1196}
1197
1198/* Returns -1 (with an exception set) on failure, 0 on success. This may
1199 be called once on a freshly created Pickler. */
1200static int
1201_Unpickler_SetInputEncoding(UnpicklerObject *self,
1202 const char *encoding,
1203 const char *errors)
1204{
1205 if (encoding == NULL)
1206 encoding = "ASCII";
1207 if (errors == NULL)
1208 errors = "strict";
1209
1210 self->encoding = strdup(encoding);
1211 self->errors = strdup(errors);
1212 if (self->encoding == NULL || self->errors == NULL) {
1213 PyErr_NoMemory();
1214 return -1;
1215 }
1216 return 0;
1217}
1218
1219/* Generate a GET opcode for an object stored in the memo. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001220static int
1221memo_get(PicklerObject *self, PyObject *key)
1222{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001223 long *value;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001224 char pdata[30];
1225 int len;
1226
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001227 value = PyMemoTable_Get(self->memo, key);
1228 if (value == NULL) {
1229 PyErr_SetObject(PyExc_KeyError, key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001230 return -1;
1231 }
1232
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001233 if (!self->bin) {
1234 pdata[0] = GET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001235 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", *value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001236 len = (int)strlen(pdata);
1237 }
1238 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001239 if (*value < 256) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001240 pdata[0] = BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001241 pdata[1] = (unsigned char)(*value & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001242 len = 2;
1243 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001244 else if (*value <= 0xffffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001245 pdata[0] = LONG_BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001246 pdata[1] = (unsigned char)(*value & 0xff);
1247 pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1248 pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1249 pdata[4] = (unsigned char)((*value >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001250 len = 5;
1251 }
1252 else { /* unlikely */
1253 PyErr_SetString(PicklingError,
1254 "memo id too large for LONG_BINGET");
1255 return -1;
1256 }
1257 }
1258
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001259 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001260 return -1;
1261
1262 return 0;
1263}
1264
1265/* Store an object in the memo, assign it a new unique ID based on the number
1266 of objects currently stored in the memo and generate a PUT opcode. */
1267static int
1268memo_put(PicklerObject *self, PyObject *obj)
1269{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001270 long x;
1271 char pdata[30];
1272 int len;
1273 int status = 0;
1274
1275 if (self->fast)
1276 return 0;
1277
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001278 x = PyMemoTable_Size(self->memo);
1279 if (PyMemoTable_Set(self->memo, obj, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001280 goto error;
1281
1282 if (!self->bin) {
1283 pdata[0] = PUT;
1284 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
1285 len = strlen(pdata);
1286 }
1287 else {
1288 if (x < 256) {
1289 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00001290 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001291 len = 2;
1292 }
1293 else if (x <= 0xffffffffL) {
1294 pdata[0] = LONG_BINPUT;
1295 pdata[1] = (unsigned char)(x & 0xff);
1296 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1297 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1298 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1299 len = 5;
1300 }
1301 else { /* unlikely */
1302 PyErr_SetString(PicklingError,
1303 "memo id too large for LONG_BINPUT");
1304 return -1;
1305 }
1306 }
1307
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001308 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001309 goto error;
1310
1311 if (0) {
1312 error:
1313 status = -1;
1314 }
1315
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001316 return status;
1317}
1318
1319static PyObject *
1320whichmodule(PyObject *global, PyObject *global_name)
1321{
1322 Py_ssize_t i, j;
1323 static PyObject *module_str = NULL;
1324 static PyObject *main_str = NULL;
1325 PyObject *module_name;
1326 PyObject *modules_dict;
1327 PyObject *module;
1328 PyObject *obj;
1329
1330 if (module_str == NULL) {
1331 module_str = PyUnicode_InternFromString("__module__");
1332 if (module_str == NULL)
1333 return NULL;
1334 main_str = PyUnicode_InternFromString("__main__");
1335 if (main_str == NULL)
1336 return NULL;
1337 }
1338
1339 module_name = PyObject_GetAttr(global, module_str);
1340
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00001341 /* In some rare cases (e.g., bound methods of extension types),
1342 __module__ can be None. If it is so, then search sys.modules
1343 for the module of global. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001344 if (module_name == Py_None) {
1345 Py_DECREF(module_name);
1346 goto search;
1347 }
1348
1349 if (module_name) {
1350 return module_name;
1351 }
1352 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1353 PyErr_Clear();
1354 else
1355 return NULL;
1356
1357 search:
1358 modules_dict = PySys_GetObject("modules");
1359 if (modules_dict == NULL)
1360 return NULL;
1361
1362 i = 0;
1363 module_name = NULL;
1364 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
Mark Dickinson211c6252009-02-01 10:28:51 +00001365 if (PyObject_RichCompareBool(module_name, main_str, Py_EQ) == 1)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001366 continue;
1367
1368 obj = PyObject_GetAttr(module, global_name);
1369 if (obj == NULL) {
1370 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1371 PyErr_Clear();
1372 else
1373 return NULL;
1374 continue;
1375 }
1376
1377 if (obj != global) {
1378 Py_DECREF(obj);
1379 continue;
1380 }
1381
1382 Py_DECREF(obj);
1383 break;
1384 }
1385
1386 /* If no module is found, use __main__. */
1387 if (!j) {
1388 module_name = main_str;
1389 }
1390
1391 Py_INCREF(module_name);
1392 return module_name;
1393}
1394
1395/* fast_save_enter() and fast_save_leave() are guards against recursive
1396 objects when Pickler is used with the "fast mode" (i.e., with object
1397 memoization disabled). If the nesting of a list or dict object exceed
1398 FAST_NESTING_LIMIT, these guards will start keeping an internal
1399 reference to the seen list or dict objects and check whether these objects
1400 are recursive. These are not strictly necessary, since save() has a
1401 hard-coded recursion limit, but they give a nicer error message than the
1402 typical RuntimeError. */
1403static int
1404fast_save_enter(PicklerObject *self, PyObject *obj)
1405{
1406 /* if fast_nesting < 0, we're doing an error exit. */
1407 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1408 PyObject *key = NULL;
1409 if (self->fast_memo == NULL) {
1410 self->fast_memo = PyDict_New();
1411 if (self->fast_memo == NULL) {
1412 self->fast_nesting = -1;
1413 return 0;
1414 }
1415 }
1416 key = PyLong_FromVoidPtr(obj);
1417 if (key == NULL)
1418 return 0;
1419 if (PyDict_GetItem(self->fast_memo, key)) {
1420 Py_DECREF(key);
1421 PyErr_Format(PyExc_ValueError,
1422 "fast mode: can't pickle cyclic objects "
1423 "including object type %.200s at %p",
1424 obj->ob_type->tp_name, obj);
1425 self->fast_nesting = -1;
1426 return 0;
1427 }
1428 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
1429 Py_DECREF(key);
1430 self->fast_nesting = -1;
1431 return 0;
1432 }
1433 Py_DECREF(key);
1434 }
1435 return 1;
1436}
1437
1438static int
1439fast_save_leave(PicklerObject *self, PyObject *obj)
1440{
1441 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
1442 PyObject *key = PyLong_FromVoidPtr(obj);
1443 if (key == NULL)
1444 return 0;
1445 if (PyDict_DelItem(self->fast_memo, key) < 0) {
1446 Py_DECREF(key);
1447 return 0;
1448 }
1449 Py_DECREF(key);
1450 }
1451 return 1;
1452}
1453
1454static int
1455save_none(PicklerObject *self, PyObject *obj)
1456{
1457 const char none_op = NONE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001458 if (_Pickler_Write(self, &none_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001459 return -1;
1460
1461 return 0;
1462}
1463
1464static int
1465save_bool(PicklerObject *self, PyObject *obj)
1466{
1467 static const char *buf[2] = { FALSE, TRUE };
1468 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
1469 int p = (obj == Py_True);
1470
1471 if (self->proto >= 2) {
1472 const char bool_op = p ? NEWTRUE : NEWFALSE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001473 if (_Pickler_Write(self, &bool_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001474 return -1;
1475 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001476 else if (_Pickler_Write(self, buf[p], len[p]) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001477 return -1;
1478
1479 return 0;
1480}
1481
1482static int
1483save_int(PicklerObject *self, long x)
1484{
1485 char pdata[32];
1486 int len = 0;
1487
1488 if (!self->bin
1489#if SIZEOF_LONG > 4
1490 || x > 0x7fffffffL || x < -0x80000000L
1491#endif
1492 ) {
1493 /* Text-mode pickle, or long too big to fit in the 4-byte
1494 * signed BININT format: store as a string.
1495 */
Mark Dickinson8dd05142009-01-20 20:43:58 +00001496 pdata[0] = LONG; /* use LONG for consistency with pickle.py */
1497 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ldL\n", x);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001498 if (_Pickler_Write(self, pdata, strlen(pdata)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001499 return -1;
1500 }
1501 else {
1502 /* Binary pickle and x fits in a signed 4-byte int. */
1503 pdata[1] = (unsigned char)(x & 0xff);
1504 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1505 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1506 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1507
1508 if ((pdata[4] == 0) && (pdata[3] == 0)) {
1509 if (pdata[2] == 0) {
1510 pdata[0] = BININT1;
1511 len = 2;
1512 }
1513 else {
1514 pdata[0] = BININT2;
1515 len = 3;
1516 }
1517 }
1518 else {
1519 pdata[0] = BININT;
1520 len = 5;
1521 }
1522
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001523 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001524 return -1;
1525 }
1526
1527 return 0;
1528}
1529
1530static int
1531save_long(PicklerObject *self, PyObject *obj)
1532{
1533 PyObject *repr = NULL;
1534 Py_ssize_t size;
1535 long val = PyLong_AsLong(obj);
1536 int status = 0;
1537
1538 const char long_op = LONG;
1539
1540 if (val == -1 && PyErr_Occurred()) {
1541 /* out of range for int pickling */
1542 PyErr_Clear();
1543 }
1544 else
1545 return save_int(self, val);
1546
1547 if (self->proto >= 2) {
1548 /* Linear-time pickling. */
1549 size_t nbits;
1550 size_t nbytes;
1551 unsigned char *pdata;
1552 char header[5];
1553 int i;
1554 int sign = _PyLong_Sign(obj);
1555
1556 if (sign == 0) {
1557 header[0] = LONG1;
1558 header[1] = 0; /* It's 0 -- an empty bytestring. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001559 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001560 goto error;
1561 return 0;
1562 }
1563 nbits = _PyLong_NumBits(obj);
1564 if (nbits == (size_t)-1 && PyErr_Occurred())
1565 goto error;
1566 /* How many bytes do we need? There are nbits >> 3 full
1567 * bytes of data, and nbits & 7 leftover bits. If there
1568 * are any leftover bits, then we clearly need another
1569 * byte. Wnat's not so obvious is that we *probably*
1570 * need another byte even if there aren't any leftovers:
1571 * the most-significant bit of the most-significant byte
1572 * acts like a sign bit, and it's usually got a sense
1573 * opposite of the one we need. The exception is longs
1574 * of the form -(2**(8*j-1)) for j > 0. Such a long is
1575 * its own 256's-complement, so has the right sign bit
1576 * even without the extra byte. That's a pain to check
1577 * for in advance, though, so we always grab an extra
1578 * byte at the start, and cut it back later if possible.
1579 */
1580 nbytes = (nbits >> 3) + 1;
1581 if (nbytes > INT_MAX) {
1582 PyErr_SetString(PyExc_OverflowError,
1583 "long too large to pickle");
1584 goto error;
1585 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001586 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001587 if (repr == NULL)
1588 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001589 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001590 i = _PyLong_AsByteArray((PyLongObject *)obj,
1591 pdata, nbytes,
1592 1 /* little endian */ , 1 /* signed */ );
1593 if (i < 0)
1594 goto error;
1595 /* If the long is negative, this may be a byte more than
1596 * needed. This is so iff the MSB is all redundant sign
1597 * bits.
1598 */
1599 if (sign < 0 &&
1600 nbytes > 1 &&
1601 pdata[nbytes - 1] == 0xff &&
1602 (pdata[nbytes - 2] & 0x80) != 0) {
1603 nbytes--;
1604 }
1605
1606 if (nbytes < 256) {
1607 header[0] = LONG1;
1608 header[1] = (unsigned char)nbytes;
1609 size = 2;
1610 }
1611 else {
1612 header[0] = LONG4;
1613 size = (int)nbytes;
1614 for (i = 1; i < 5; i++) {
1615 header[i] = (unsigned char)(size & 0xff);
1616 size >>= 8;
1617 }
1618 size = 5;
1619 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001620 if (_Pickler_Write(self, header, size) < 0 ||
1621 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001622 goto error;
1623 }
1624 else {
1625 char *string;
1626
Mark Dickinson8dd05142009-01-20 20:43:58 +00001627 /* proto < 2: write the repr and newline. This is quadratic-time (in
1628 the number of digits), in both directions. We add a trailing 'L'
1629 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001630
1631 repr = PyObject_Repr(obj);
1632 if (repr == NULL)
1633 goto error;
1634
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001635 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001636 if (string == NULL)
1637 goto error;
1638
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001639 if (_Pickler_Write(self, &long_op, 1) < 0 ||
1640 _Pickler_Write(self, string, size) < 0 ||
1641 _Pickler_Write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001642 goto error;
1643 }
1644
1645 if (0) {
1646 error:
1647 status = -1;
1648 }
1649 Py_XDECREF(repr);
1650
1651 return status;
1652}
1653
1654static int
1655save_float(PicklerObject *self, PyObject *obj)
1656{
1657 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1658
1659 if (self->bin) {
1660 char pdata[9];
1661 pdata[0] = BINFLOAT;
1662 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1663 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001664 if (_Pickler_Write(self, pdata, 9) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001665 return -1;
Eric Smith0923d1d2009-04-16 20:16:10 +00001666 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001667 else {
Eric Smith0923d1d2009-04-16 20:16:10 +00001668 int result = -1;
1669 char *buf = NULL;
1670 char op = FLOAT;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001671
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001672 if (_Pickler_Write(self, &op, 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001673 goto done;
1674
Mark Dickinson3e09f432009-04-17 08:41:23 +00001675 buf = PyOS_double_to_string(x, 'g', 17, 0, NULL);
Eric Smith0923d1d2009-04-16 20:16:10 +00001676 if (!buf) {
1677 PyErr_NoMemory();
1678 goto done;
1679 }
1680
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001681 if (_Pickler_Write(self, buf, strlen(buf)) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001682 goto done;
1683
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001684 if (_Pickler_Write(self, "\n", 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001685 goto done;
1686
1687 result = 0;
1688done:
1689 PyMem_Free(buf);
1690 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001691 }
1692
1693 return 0;
1694}
1695
1696static int
1697save_bytes(PicklerObject *self, PyObject *obj)
1698{
1699 if (self->proto < 3) {
1700 /* Older pickle protocols do not have an opcode for pickling bytes
1701 objects. Therefore, we need to fake the copy protocol (i.e.,
1702 the __reduce__ method) to permit bytes object unpickling. */
1703 PyObject *reduce_value = NULL;
1704 PyObject *bytelist = NULL;
1705 int status;
1706
1707 bytelist = PySequence_List(obj);
1708 if (bytelist == NULL)
1709 return -1;
1710
1711 reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
1712 bytelist);
1713 if (reduce_value == NULL) {
1714 Py_DECREF(bytelist);
1715 return -1;
1716 }
1717
1718 /* save_reduce() will memoize the object automatically. */
1719 status = save_reduce(self, reduce_value, obj);
1720 Py_DECREF(reduce_value);
1721 Py_DECREF(bytelist);
1722 return status;
1723 }
1724 else {
1725 Py_ssize_t size;
1726 char header[5];
1727 int len;
1728
1729 size = PyBytes_Size(obj);
1730 if (size < 0)
1731 return -1;
1732
1733 if (size < 256) {
1734 header[0] = SHORT_BINBYTES;
1735 header[1] = (unsigned char)size;
1736 len = 2;
1737 }
1738 else if (size <= 0xffffffffL) {
1739 header[0] = BINBYTES;
1740 header[1] = (unsigned char)(size & 0xff);
1741 header[2] = (unsigned char)((size >> 8) & 0xff);
1742 header[3] = (unsigned char)((size >> 16) & 0xff);
1743 header[4] = (unsigned char)((size >> 24) & 0xff);
1744 len = 5;
1745 }
1746 else {
1747 return -1; /* string too large */
1748 }
1749
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001750 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001751 return -1;
1752
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001753 if (_Pickler_Write(self, PyBytes_AS_STRING(obj), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001754 return -1;
1755
1756 if (memo_put(self, obj) < 0)
1757 return -1;
1758
1759 return 0;
1760 }
1761}
1762
1763/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1764 backslash and newline characters to \uXXXX escapes. */
1765static PyObject *
1766raw_unicode_escape(const Py_UNICODE *s, Py_ssize_t size)
1767{
1768 PyObject *repr, *result;
1769 char *p;
1770 char *q;
1771
1772 static const char *hexdigits = "0123456789abcdef";
1773
1774#ifdef Py_UNICODE_WIDE
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001775 const Py_ssize_t expandsize = 10;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001776#else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001777 const Py_ssize_t expandsize = 6;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001778#endif
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001779
1780 if (size > PY_SSIZE_T_MAX / expandsize)
1781 return PyErr_NoMemory();
1782
1783 repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001784 if (repr == NULL)
1785 return NULL;
1786 if (size == 0)
1787 goto done;
1788
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001789 p = q = PyByteArray_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001790 while (size-- > 0) {
1791 Py_UNICODE ch = *s++;
1792#ifdef Py_UNICODE_WIDE
1793 /* Map 32-bit characters to '\Uxxxxxxxx' */
1794 if (ch >= 0x10000) {
1795 *p++ = '\\';
1796 *p++ = 'U';
1797 *p++ = hexdigits[(ch >> 28) & 0xf];
1798 *p++ = hexdigits[(ch >> 24) & 0xf];
1799 *p++ = hexdigits[(ch >> 20) & 0xf];
1800 *p++ = hexdigits[(ch >> 16) & 0xf];
1801 *p++ = hexdigits[(ch >> 12) & 0xf];
1802 *p++ = hexdigits[(ch >> 8) & 0xf];
1803 *p++ = hexdigits[(ch >> 4) & 0xf];
1804 *p++ = hexdigits[ch & 15];
1805 }
1806 else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001807#else
1808 /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
1809 if (ch >= 0xD800 && ch < 0xDC00) {
1810 Py_UNICODE ch2;
1811 Py_UCS4 ucs;
1812
1813 ch2 = *s++;
1814 size--;
1815 if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
1816 ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
1817 *p++ = '\\';
1818 *p++ = 'U';
1819 *p++ = hexdigits[(ucs >> 28) & 0xf];
1820 *p++ = hexdigits[(ucs >> 24) & 0xf];
1821 *p++ = hexdigits[(ucs >> 20) & 0xf];
1822 *p++ = hexdigits[(ucs >> 16) & 0xf];
1823 *p++ = hexdigits[(ucs >> 12) & 0xf];
1824 *p++ = hexdigits[(ucs >> 8) & 0xf];
1825 *p++ = hexdigits[(ucs >> 4) & 0xf];
1826 *p++ = hexdigits[ucs & 0xf];
1827 continue;
1828 }
1829 /* Fall through: isolated surrogates are copied as-is */
1830 s--;
1831 size++;
1832 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001833#endif
1834 /* Map 16-bit characters to '\uxxxx' */
1835 if (ch >= 256 || ch == '\\' || ch == '\n') {
1836 *p++ = '\\';
1837 *p++ = 'u';
1838 *p++ = hexdigits[(ch >> 12) & 0xf];
1839 *p++ = hexdigits[(ch >> 8) & 0xf];
1840 *p++ = hexdigits[(ch >> 4) & 0xf];
1841 *p++ = hexdigits[ch & 15];
1842 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001843 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001844 else
1845 *p++ = (char) ch;
1846 }
1847 size = p - q;
1848
1849 done:
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001850 result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001851 Py_DECREF(repr);
1852 return result;
1853}
1854
1855static int
1856save_unicode(PicklerObject *self, PyObject *obj)
1857{
1858 Py_ssize_t size;
1859 PyObject *encoded = NULL;
1860
1861 if (self->bin) {
1862 char pdata[5];
1863
Victor Stinner485fb562010-04-13 11:07:24 +00001864 encoded = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(obj),
1865 PyUnicode_GET_SIZE(obj),
1866 "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001867 if (encoded == NULL)
1868 goto error;
1869
1870 size = PyBytes_GET_SIZE(encoded);
1871 if (size < 0 || size > 0xffffffffL)
1872 goto error; /* string too large */
1873
1874 pdata[0] = BINUNICODE;
1875 pdata[1] = (unsigned char)(size & 0xff);
1876 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1877 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1878 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1879
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001880 if (_Pickler_Write(self, pdata, 5) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001881 goto error;
1882
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001883 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001884 goto error;
1885 }
1886 else {
1887 const char unicode_op = UNICODE;
1888
1889 encoded = raw_unicode_escape(PyUnicode_AS_UNICODE(obj),
1890 PyUnicode_GET_SIZE(obj));
1891 if (encoded == NULL)
1892 goto error;
1893
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001894 if (_Pickler_Write(self, &unicode_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001895 goto error;
1896
1897 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001898 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001899 goto error;
1900
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001901 if (_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001902 goto error;
1903 }
1904 if (memo_put(self, obj) < 0)
1905 goto error;
1906
1907 Py_DECREF(encoded);
1908 return 0;
1909
1910 error:
1911 Py_XDECREF(encoded);
1912 return -1;
1913}
1914
1915/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1916static int
1917store_tuple_elements(PicklerObject *self, PyObject *t, int len)
1918{
1919 int i;
1920
1921 assert(PyTuple_Size(t) == len);
1922
1923 for (i = 0; i < len; i++) {
1924 PyObject *element = PyTuple_GET_ITEM(t, i);
1925
1926 if (element == NULL)
1927 return -1;
1928 if (save(self, element, 0) < 0)
1929 return -1;
1930 }
1931
1932 return 0;
1933}
1934
1935/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1936 * used across protocols to minimize the space needed to pickle them.
1937 * Tuples are also the only builtin immutable type that can be recursive
1938 * (a tuple can be reached from itself), and that requires some subtle
1939 * magic so that it works in all cases. IOW, this is a long routine.
1940 */
1941static int
1942save_tuple(PicklerObject *self, PyObject *obj)
1943{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001944 int len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001945
1946 const char mark_op = MARK;
1947 const char tuple_op = TUPLE;
1948 const char pop_op = POP;
1949 const char pop_mark_op = POP_MARK;
1950 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
1951
1952 if ((len = PyTuple_Size(obj)) < 0)
1953 return -1;
1954
1955 if (len == 0) {
1956 char pdata[2];
1957
1958 if (self->proto) {
1959 pdata[0] = EMPTY_TUPLE;
1960 len = 1;
1961 }
1962 else {
1963 pdata[0] = MARK;
1964 pdata[1] = TUPLE;
1965 len = 2;
1966 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001967 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001968 return -1;
1969 return 0;
1970 }
1971
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001972 /* The tuple isn't in the memo now. If it shows up there after
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001973 * saving the tuple elements, the tuple must be recursive, in
1974 * which case we'll pop everything we put on the stack, and fetch
1975 * its value from the memo.
1976 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001977 if (len <= 3 && self->proto >= 2) {
1978 /* Use TUPLE{1,2,3} opcodes. */
1979 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001980 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001981
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001982 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001983 /* pop the len elements */
1984 for (i = 0; i < len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001985 if (_Pickler_Write(self, &pop_op, 1) < 0)
1986 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001987 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001988 if (memo_get(self, obj) < 0)
1989 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001990
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001991 return 0;
1992 }
1993 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001994 if (_Pickler_Write(self, len2opcode + len, 1) < 0)
1995 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001996 }
1997 goto memoize;
1998 }
1999
2000 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2001 * Generate MARK e1 e2 ... TUPLE
2002 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002003 if (_Pickler_Write(self, &mark_op, 1) < 0)
2004 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002005
2006 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002007 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002008
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002009 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002010 /* pop the stack stuff we pushed */
2011 if (self->bin) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002012 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2013 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002014 }
2015 else {
2016 /* Note that we pop one more than len, to remove
2017 * the MARK too.
2018 */
2019 for (i = 0; i <= len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002020 if (_Pickler_Write(self, &pop_op, 1) < 0)
2021 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002022 }
2023 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002024 if (memo_get(self, obj) < 0)
2025 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002026
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002027 return 0;
2028 }
2029 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002030 if (_Pickler_Write(self, &tuple_op, 1) < 0)
2031 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002032 }
2033
2034 memoize:
2035 if (memo_put(self, obj) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002036 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002037
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002038 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002039}
2040
2041/* iter is an iterator giving items, and we batch up chunks of
2042 * MARK item item ... item APPENDS
2043 * opcode sequences. Calling code should have arranged to first create an
2044 * empty list, or list-like object, for the APPENDS to operate on.
2045 * Returns 0 on success, <0 on error.
2046 */
2047static int
2048batch_list(PicklerObject *self, PyObject *iter)
2049{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002050 PyObject *obj = NULL;
2051 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002052 int i, n;
2053
2054 const char mark_op = MARK;
2055 const char append_op = APPEND;
2056 const char appends_op = APPENDS;
2057
2058 assert(iter != NULL);
2059
2060 /* XXX: I think this function could be made faster by avoiding the
2061 iterator interface and fetching objects directly from list using
2062 PyList_GET_ITEM.
2063 */
2064
2065 if (self->proto == 0) {
2066 /* APPENDS isn't available; do one at a time. */
2067 for (;;) {
2068 obj = PyIter_Next(iter);
2069 if (obj == NULL) {
2070 if (PyErr_Occurred())
2071 return -1;
2072 break;
2073 }
2074 i = save(self, obj, 0);
2075 Py_DECREF(obj);
2076 if (i < 0)
2077 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002078 if (_Pickler_Write(self, &append_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002079 return -1;
2080 }
2081 return 0;
2082 }
2083
2084 /* proto > 0: write in batches of BATCHSIZE. */
2085 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002086 /* Get first item */
2087 firstitem = PyIter_Next(iter);
2088 if (firstitem == NULL) {
2089 if (PyErr_Occurred())
2090 goto error;
2091
2092 /* nothing more to add */
2093 break;
2094 }
2095
2096 /* Try to get a second item */
2097 obj = PyIter_Next(iter);
2098 if (obj == NULL) {
2099 if (PyErr_Occurred())
2100 goto error;
2101
2102 /* Only one item to write */
2103 if (save(self, firstitem, 0) < 0)
2104 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002105 if (_Pickler_Write(self, &append_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002106 goto error;
2107 Py_CLEAR(firstitem);
2108 break;
2109 }
2110
2111 /* More than one item to write */
2112
2113 /* Pump out MARK, items, APPENDS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002114 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002115 goto error;
2116
2117 if (save(self, firstitem, 0) < 0)
2118 goto error;
2119 Py_CLEAR(firstitem);
2120 n = 1;
2121
2122 /* Fetch and save up to BATCHSIZE items */
2123 while (obj) {
2124 if (save(self, obj, 0) < 0)
2125 goto error;
2126 Py_CLEAR(obj);
2127 n += 1;
2128
2129 if (n == BATCHSIZE)
2130 break;
2131
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002132 obj = PyIter_Next(iter);
2133 if (obj == NULL) {
2134 if (PyErr_Occurred())
2135 goto error;
2136 break;
2137 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002138 }
2139
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002140 if (_Pickler_Write(self, &appends_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002141 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002142
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002143 } while (n == BATCHSIZE);
2144 return 0;
2145
2146 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002147 Py_XDECREF(firstitem);
2148 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002149 return -1;
2150}
2151
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002152/* This is a variant of batch_list() above, specialized for lists (with no
2153 * support for list subclasses). Like batch_list(), we batch up chunks of
2154 * MARK item item ... item APPENDS
2155 * opcode sequences. Calling code should have arranged to first create an
2156 * empty list, or list-like object, for the APPENDS to operate on.
2157 * Returns 0 on success, -1 on error.
2158 *
2159 * This version is considerably faster than batch_list(), if less general.
2160 *
2161 * Note that this only works for protocols > 0.
2162 */
2163static int
2164batch_list_exact(PicklerObject *self, PyObject *obj)
2165{
2166 PyObject *item = NULL;
2167 int this_batch, total;
2168
2169 const char append_op = APPEND;
2170 const char appends_op = APPENDS;
2171 const char mark_op = MARK;
2172
2173 assert(obj != NULL);
2174 assert(self->proto > 0);
2175 assert(PyList_CheckExact(obj));
2176
2177 if (PyList_GET_SIZE(obj) == 1) {
2178 item = PyList_GET_ITEM(obj, 0);
2179 if (save(self, item, 0) < 0)
2180 return -1;
2181 if (_Pickler_Write(self, &append_op, 1) < 0)
2182 return -1;
2183 return 0;
2184 }
2185
2186 /* Write in batches of BATCHSIZE. */
2187 total = 0;
2188 do {
2189 this_batch = 0;
2190 if (_Pickler_Write(self, &mark_op, 1) < 0)
2191 return -1;
2192 while (total < PyList_GET_SIZE(obj)) {
2193 item = PyList_GET_ITEM(obj, total);
2194 if (save(self, item, 0) < 0)
2195 return -1;
2196 total++;
2197 if (++this_batch == BATCHSIZE)
2198 break;
2199 }
2200 if (_Pickler_Write(self, &appends_op, 1) < 0)
2201 return -1;
2202
2203 } while (total < PyList_GET_SIZE(obj));
2204
2205 return 0;
2206}
2207
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002208static int
2209save_list(PicklerObject *self, PyObject *obj)
2210{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002211 char header[3];
2212 int len;
2213 int status = 0;
2214
2215 if (self->fast && !fast_save_enter(self, obj))
2216 goto error;
2217
2218 /* Create an empty list. */
2219 if (self->bin) {
2220 header[0] = EMPTY_LIST;
2221 len = 1;
2222 }
2223 else {
2224 header[0] = MARK;
2225 header[1] = LIST;
2226 len = 2;
2227 }
2228
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002229 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002230 goto error;
2231
2232 /* Get list length, and bow out early if empty. */
2233 if ((len = PyList_Size(obj)) < 0)
2234 goto error;
2235
2236 if (memo_put(self, obj) < 0)
2237 goto error;
2238
2239 if (len != 0) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002240 /* Materialize the list elements. */
2241 if (PyList_CheckExact(obj) && self->proto > 0) {
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002242 if (Py_EnterRecursiveCall(" while pickling an object"))
2243 goto error;
2244 status = batch_list_exact(self, obj);
2245 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002246 } else {
2247 PyObject *iter = PyObject_GetIter(obj);
2248 if (iter == NULL)
2249 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002250
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002251 if (Py_EnterRecursiveCall(" while pickling an object")) {
2252 Py_DECREF(iter);
2253 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002254 }
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002255 status = batch_list(self, iter);
2256 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002257 Py_DECREF(iter);
2258 }
2259 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002260 if (0) {
2261 error:
2262 status = -1;
2263 }
2264
2265 if (self->fast && !fast_save_leave(self, obj))
2266 status = -1;
2267
2268 return status;
2269}
2270
2271/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
2272 * MARK key value ... key value SETITEMS
2273 * opcode sequences. Calling code should have arranged to first create an
2274 * empty dict, or dict-like object, for the SETITEMS to operate on.
2275 * Returns 0 on success, <0 on error.
2276 *
2277 * This is very much like batch_list(). The difference between saving
2278 * elements directly, and picking apart two-tuples, is so long-winded at
2279 * the C level, though, that attempts to combine these routines were too
2280 * ugly to bear.
2281 */
2282static int
2283batch_dict(PicklerObject *self, PyObject *iter)
2284{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002285 PyObject *obj = NULL;
2286 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002287 int i, n;
2288
2289 const char mark_op = MARK;
2290 const char setitem_op = SETITEM;
2291 const char setitems_op = SETITEMS;
2292
2293 assert(iter != NULL);
2294
2295 if (self->proto == 0) {
2296 /* SETITEMS isn't available; do one at a time. */
2297 for (;;) {
2298 obj = PyIter_Next(iter);
2299 if (obj == NULL) {
2300 if (PyErr_Occurred())
2301 return -1;
2302 break;
2303 }
2304 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2305 PyErr_SetString(PyExc_TypeError, "dict items "
2306 "iterator must return 2-tuples");
2307 return -1;
2308 }
2309 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
2310 if (i >= 0)
2311 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
2312 Py_DECREF(obj);
2313 if (i < 0)
2314 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002315 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002316 return -1;
2317 }
2318 return 0;
2319 }
2320
2321 /* proto > 0: write in batches of BATCHSIZE. */
2322 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002323 /* Get first item */
2324 firstitem = PyIter_Next(iter);
2325 if (firstitem == NULL) {
2326 if (PyErr_Occurred())
2327 goto error;
2328
2329 /* nothing more to add */
2330 break;
2331 }
2332 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
2333 PyErr_SetString(PyExc_TypeError, "dict items "
2334 "iterator must return 2-tuples");
2335 goto error;
2336 }
2337
2338 /* Try to get a second item */
2339 obj = PyIter_Next(iter);
2340 if (obj == NULL) {
2341 if (PyErr_Occurred())
2342 goto error;
2343
2344 /* Only one item to write */
2345 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2346 goto error;
2347 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2348 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002349 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002350 goto error;
2351 Py_CLEAR(firstitem);
2352 break;
2353 }
2354
2355 /* More than one item to write */
2356
2357 /* Pump out MARK, items, SETITEMS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002358 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002359 goto error;
2360
2361 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2362 goto error;
2363 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2364 goto error;
2365 Py_CLEAR(firstitem);
2366 n = 1;
2367
2368 /* Fetch and save up to BATCHSIZE items */
2369 while (obj) {
2370 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2371 PyErr_SetString(PyExc_TypeError, "dict items "
2372 "iterator must return 2-tuples");
2373 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002374 }
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002375 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
2376 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
2377 goto error;
2378 Py_CLEAR(obj);
2379 n += 1;
2380
2381 if (n == BATCHSIZE)
2382 break;
2383
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002384 obj = PyIter_Next(iter);
2385 if (obj == NULL) {
2386 if (PyErr_Occurred())
2387 goto error;
2388 break;
2389 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002390 }
2391
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002392 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002393 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002394
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002395 } while (n == BATCHSIZE);
2396 return 0;
2397
2398 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002399 Py_XDECREF(firstitem);
2400 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002401 return -1;
2402}
2403
Collin Winter5c9b02d2009-05-25 05:43:30 +00002404/* This is a variant of batch_dict() above that specializes for dicts, with no
2405 * support for dict subclasses. Like batch_dict(), we batch up chunks of
2406 * MARK key value ... key value SETITEMS
2407 * opcode sequences. Calling code should have arranged to first create an
2408 * empty dict, or dict-like object, for the SETITEMS to operate on.
2409 * Returns 0 on success, -1 on error.
2410 *
2411 * Note that this currently doesn't work for protocol 0.
2412 */
2413static int
2414batch_dict_exact(PicklerObject *self, PyObject *obj)
2415{
2416 PyObject *key = NULL, *value = NULL;
2417 int i;
2418 Py_ssize_t dict_size, ppos = 0;
2419
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00002420 const char mark_op = MARK;
2421 const char setitem_op = SETITEM;
2422 const char setitems_op = SETITEMS;
Collin Winter5c9b02d2009-05-25 05:43:30 +00002423
2424 assert(obj != NULL);
2425 assert(self->proto > 0);
2426
2427 dict_size = PyDict_Size(obj);
2428
2429 /* Special-case len(d) == 1 to save space. */
2430 if (dict_size == 1) {
2431 PyDict_Next(obj, &ppos, &key, &value);
2432 if (save(self, key, 0) < 0)
2433 return -1;
2434 if (save(self, value, 0) < 0)
2435 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002436 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002437 return -1;
2438 return 0;
2439 }
2440
2441 /* Write in batches of BATCHSIZE. */
2442 do {
2443 i = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002444 if (_Pickler_Write(self, &mark_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002445 return -1;
2446 while (PyDict_Next(obj, &ppos, &key, &value)) {
2447 if (save(self, key, 0) < 0)
2448 return -1;
2449 if (save(self, value, 0) < 0)
2450 return -1;
2451 if (++i == BATCHSIZE)
2452 break;
2453 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002454 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002455 return -1;
2456 if (PyDict_Size(obj) != dict_size) {
2457 PyErr_Format(
2458 PyExc_RuntimeError,
2459 "dictionary changed size during iteration");
2460 return -1;
2461 }
2462
2463 } while (i == BATCHSIZE);
2464 return 0;
2465}
2466
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002467static int
2468save_dict(PicklerObject *self, PyObject *obj)
2469{
2470 PyObject *items, *iter;
2471 char header[3];
2472 int len;
2473 int status = 0;
2474
2475 if (self->fast && !fast_save_enter(self, obj))
2476 goto error;
2477
2478 /* Create an empty dict. */
2479 if (self->bin) {
2480 header[0] = EMPTY_DICT;
2481 len = 1;
2482 }
2483 else {
2484 header[0] = MARK;
2485 header[1] = DICT;
2486 len = 2;
2487 }
2488
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002489 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002490 goto error;
2491
2492 /* Get dict size, and bow out early if empty. */
2493 if ((len = PyDict_Size(obj)) < 0)
2494 goto error;
2495
2496 if (memo_put(self, obj) < 0)
2497 goto error;
2498
2499 if (len != 0) {
2500 /* Save the dict items. */
Collin Winter5c9b02d2009-05-25 05:43:30 +00002501 if (PyDict_CheckExact(obj) && self->proto > 0) {
2502 /* We can take certain shortcuts if we know this is a dict and
2503 not a dict subclass. */
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002504 if (Py_EnterRecursiveCall(" while pickling an object"))
2505 goto error;
2506 status = batch_dict_exact(self, obj);
2507 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002508 } else {
2509 items = PyObject_CallMethod(obj, "items", "()");
2510 if (items == NULL)
2511 goto error;
2512 iter = PyObject_GetIter(items);
2513 Py_DECREF(items);
2514 if (iter == NULL)
2515 goto error;
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002516 if (Py_EnterRecursiveCall(" while pickling an object")) {
2517 Py_DECREF(iter);
2518 goto error;
2519 }
Collin Winter5c9b02d2009-05-25 05:43:30 +00002520 status = batch_dict(self, iter);
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002521 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002522 Py_DECREF(iter);
2523 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002524 }
2525
2526 if (0) {
2527 error:
2528 status = -1;
2529 }
2530
2531 if (self->fast && !fast_save_leave(self, obj))
2532 status = -1;
2533
2534 return status;
2535}
2536
2537static int
2538save_global(PicklerObject *self, PyObject *obj, PyObject *name)
2539{
2540 static PyObject *name_str = NULL;
2541 PyObject *global_name = NULL;
2542 PyObject *module_name = NULL;
2543 PyObject *module = NULL;
2544 PyObject *cls;
2545 int status = 0;
2546
2547 const char global_op = GLOBAL;
2548
2549 if (name_str == NULL) {
2550 name_str = PyUnicode_InternFromString("__name__");
2551 if (name_str == NULL)
2552 goto error;
2553 }
2554
2555 if (name) {
2556 global_name = name;
2557 Py_INCREF(global_name);
2558 }
2559 else {
2560 global_name = PyObject_GetAttr(obj, name_str);
2561 if (global_name == NULL)
2562 goto error;
2563 }
2564
2565 module_name = whichmodule(obj, global_name);
2566 if (module_name == NULL)
2567 goto error;
2568
2569 /* XXX: Change to use the import C API directly with level=0 to disallow
2570 relative imports.
2571
2572 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
2573 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
2574 custom import functions (IMHO, this would be a nice security
2575 feature). The import C API would need to be extended to support the
2576 extra parameters of __import__ to fix that. */
2577 module = PyImport_Import(module_name);
2578 if (module == NULL) {
2579 PyErr_Format(PicklingError,
2580 "Can't pickle %R: import of module %R failed",
2581 obj, module_name);
2582 goto error;
2583 }
2584 cls = PyObject_GetAttr(module, global_name);
2585 if (cls == NULL) {
2586 PyErr_Format(PicklingError,
2587 "Can't pickle %R: attribute lookup %S.%S failed",
2588 obj, module_name, global_name);
2589 goto error;
2590 }
2591 if (cls != obj) {
2592 Py_DECREF(cls);
2593 PyErr_Format(PicklingError,
2594 "Can't pickle %R: it's not the same object as %S.%S",
2595 obj, module_name, global_name);
2596 goto error;
2597 }
2598 Py_DECREF(cls);
2599
2600 if (self->proto >= 2) {
2601 /* See whether this is in the extension registry, and if
2602 * so generate an EXT opcode.
2603 */
2604 PyObject *code_obj; /* extension code as Python object */
2605 long code; /* extension code as C value */
2606 char pdata[5];
2607 int n;
2608
2609 PyTuple_SET_ITEM(two_tuple, 0, module_name);
2610 PyTuple_SET_ITEM(two_tuple, 1, global_name);
2611 code_obj = PyDict_GetItem(extension_registry, two_tuple);
2612 /* The object is not registered in the extension registry.
2613 This is the most likely code path. */
2614 if (code_obj == NULL)
2615 goto gen_global;
2616
2617 /* XXX: pickle.py doesn't check neither the type, nor the range
2618 of the value returned by the extension_registry. It should for
2619 consistency. */
2620
2621 /* Verify code_obj has the right type and value. */
2622 if (!PyLong_Check(code_obj)) {
2623 PyErr_Format(PicklingError,
2624 "Can't pickle %R: extension code %R isn't an integer",
2625 obj, code_obj);
2626 goto error;
2627 }
2628 code = PyLong_AS_LONG(code_obj);
2629 if (code <= 0 || code > 0x7fffffffL) {
2630 PyErr_Format(PicklingError,
2631 "Can't pickle %R: extension code %ld is out of range",
2632 obj, code);
2633 goto error;
2634 }
2635
2636 /* Generate an EXT opcode. */
2637 if (code <= 0xff) {
2638 pdata[0] = EXT1;
2639 pdata[1] = (unsigned char)code;
2640 n = 2;
2641 }
2642 else if (code <= 0xffff) {
2643 pdata[0] = EXT2;
2644 pdata[1] = (unsigned char)(code & 0xff);
2645 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2646 n = 3;
2647 }
2648 else {
2649 pdata[0] = EXT4;
2650 pdata[1] = (unsigned char)(code & 0xff);
2651 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2652 pdata[3] = (unsigned char)((code >> 16) & 0xff);
2653 pdata[4] = (unsigned char)((code >> 24) & 0xff);
2654 n = 5;
2655 }
2656
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002657 if (_Pickler_Write(self, pdata, n) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002658 goto error;
2659 }
2660 else {
2661 /* Generate a normal global opcode if we are using a pickle
2662 protocol <= 2, or if the object is not registered in the
2663 extension registry. */
2664 PyObject *encoded;
2665 PyObject *(*unicode_encoder)(PyObject *);
2666
2667 gen_global:
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002668 if (_Pickler_Write(self, &global_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002669 goto error;
2670
2671 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
2672 the module name and the global name using UTF-8. We do so only when
2673 we are using the pickle protocol newer than version 3. This is to
2674 ensure compatibility with older Unpickler running on Python 2.x. */
2675 if (self->proto >= 3) {
2676 unicode_encoder = PyUnicode_AsUTF8String;
2677 }
2678 else {
2679 unicode_encoder = PyUnicode_AsASCIIString;
2680 }
2681
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002682 /* For protocol < 3 and if the user didn't request against doing so,
2683 we convert module names to the old 2.x module names. */
2684 if (self->fix_imports) {
2685 PyObject *key;
2686 PyObject *item;
2687
2688 key = PyTuple_Pack(2, module_name, global_name);
2689 if (key == NULL)
2690 goto error;
2691 item = PyDict_GetItemWithError(name_mapping_3to2, key);
2692 Py_DECREF(key);
2693 if (item) {
2694 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
2695 PyErr_Format(PyExc_RuntimeError,
2696 "_compat_pickle.REVERSE_NAME_MAPPING values "
2697 "should be 2-tuples, not %.200s",
2698 Py_TYPE(item)->tp_name);
2699 goto error;
2700 }
2701 Py_CLEAR(module_name);
2702 Py_CLEAR(global_name);
2703 module_name = PyTuple_GET_ITEM(item, 0);
2704 global_name = PyTuple_GET_ITEM(item, 1);
2705 if (!PyUnicode_Check(module_name) ||
2706 !PyUnicode_Check(global_name)) {
2707 PyErr_Format(PyExc_RuntimeError,
2708 "_compat_pickle.REVERSE_NAME_MAPPING values "
2709 "should be pairs of str, not (%.200s, %.200s)",
2710 Py_TYPE(module_name)->tp_name,
2711 Py_TYPE(global_name)->tp_name);
2712 goto error;
2713 }
2714 Py_INCREF(module_name);
2715 Py_INCREF(global_name);
2716 }
2717 else if (PyErr_Occurred()) {
2718 goto error;
2719 }
2720
2721 item = PyDict_GetItemWithError(import_mapping_3to2, module_name);
2722 if (item) {
2723 if (!PyUnicode_Check(item)) {
2724 PyErr_Format(PyExc_RuntimeError,
2725 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
2726 "should be strings, not %.200s",
2727 Py_TYPE(item)->tp_name);
2728 goto error;
2729 }
2730 Py_CLEAR(module_name);
2731 module_name = item;
2732 Py_INCREF(module_name);
2733 }
2734 else if (PyErr_Occurred()) {
2735 goto error;
2736 }
2737 }
2738
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002739 /* Save the name of the module. */
2740 encoded = unicode_encoder(module_name);
2741 if (encoded == NULL) {
2742 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2743 PyErr_Format(PicklingError,
2744 "can't pickle module identifier '%S' using "
2745 "pickle protocol %i", module_name, self->proto);
2746 goto error;
2747 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002748 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002749 PyBytes_GET_SIZE(encoded)) < 0) {
2750 Py_DECREF(encoded);
2751 goto error;
2752 }
2753 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002754 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002755 goto error;
2756
2757 /* Save the name of the module. */
2758 encoded = unicode_encoder(global_name);
2759 if (encoded == NULL) {
2760 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2761 PyErr_Format(PicklingError,
2762 "can't pickle global identifier '%S' using "
2763 "pickle protocol %i", global_name, self->proto);
2764 goto error;
2765 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002766 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002767 PyBytes_GET_SIZE(encoded)) < 0) {
2768 Py_DECREF(encoded);
2769 goto error;
2770 }
2771 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002772 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002773 goto error;
2774
2775 /* Memoize the object. */
2776 if (memo_put(self, obj) < 0)
2777 goto error;
2778 }
2779
2780 if (0) {
2781 error:
2782 status = -1;
2783 }
2784 Py_XDECREF(module_name);
2785 Py_XDECREF(global_name);
2786 Py_XDECREF(module);
2787
2788 return status;
2789}
2790
2791static int
2792save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
2793{
2794 PyObject *pid = NULL;
2795 int status = 0;
2796
2797 const char persid_op = PERSID;
2798 const char binpersid_op = BINPERSID;
2799
2800 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002801 pid = _Pickler_FastCall(self, func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002802 if (pid == NULL)
2803 return -1;
2804
2805 if (pid != Py_None) {
2806 if (self->bin) {
2807 if (save(self, pid, 1) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002808 _Pickler_Write(self, &binpersid_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002809 goto error;
2810 }
2811 else {
2812 PyObject *pid_str = NULL;
2813 char *pid_ascii_bytes;
2814 Py_ssize_t size;
2815
2816 pid_str = PyObject_Str(pid);
2817 if (pid_str == NULL)
2818 goto error;
2819
2820 /* XXX: Should it check whether the persistent id only contains
2821 ASCII characters? And what if the pid contains embedded
2822 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00002823 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002824 Py_DECREF(pid_str);
2825 if (pid_ascii_bytes == NULL)
2826 goto error;
2827
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002828 if (_Pickler_Write(self, &persid_op, 1) < 0 ||
2829 _Pickler_Write(self, pid_ascii_bytes, size) < 0 ||
2830 _Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002831 goto error;
2832 }
2833 status = 1;
2834 }
2835
2836 if (0) {
2837 error:
2838 status = -1;
2839 }
2840 Py_XDECREF(pid);
2841
2842 return status;
2843}
2844
2845/* We're saving obj, and args is the 2-thru-5 tuple returned by the
2846 * appropriate __reduce__ method for obj.
2847 */
2848static int
2849save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
2850{
2851 PyObject *callable;
2852 PyObject *argtup;
2853 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002854 PyObject *listitems = Py_None;
2855 PyObject *dictitems = Py_None;
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002856 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002857
2858 int use_newobj = self->proto >= 2;
2859
2860 const char reduce_op = REDUCE;
2861 const char build_op = BUILD;
2862 const char newobj_op = NEWOBJ;
2863
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002864 size = PyTuple_Size(args);
2865 if (size < 2 || size > 5) {
2866 PyErr_SetString(PicklingError, "tuple returned by "
2867 "__reduce__ must contain 2 through 5 elements");
2868 return -1;
2869 }
2870
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002871 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
2872 &callable, &argtup, &state, &listitems, &dictitems))
2873 return -1;
2874
2875 if (!PyCallable_Check(callable)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002876 PyErr_SetString(PicklingError, "first item of the tuple "
2877 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002878 return -1;
2879 }
2880 if (!PyTuple_Check(argtup)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002881 PyErr_SetString(PicklingError, "second item of the tuple "
2882 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002883 return -1;
2884 }
2885
2886 if (state == Py_None)
2887 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002888
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002889 if (listitems == Py_None)
2890 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002891 else if (!PyIter_Check(listitems)) {
2892 PyErr_Format(PicklingError, "Fourth element of tuple"
2893 "returned by __reduce__ must be an iterator, not %s",
2894 Py_TYPE(listitems)->tp_name);
2895 return -1;
2896 }
2897
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002898 if (dictitems == Py_None)
2899 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002900 else if (!PyIter_Check(dictitems)) {
2901 PyErr_Format(PicklingError, "Fifth element of tuple"
2902 "returned by __reduce__ must be an iterator, not %s",
2903 Py_TYPE(dictitems)->tp_name);
2904 return -1;
2905 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002906
2907 /* Protocol 2 special case: if callable's name is __newobj__, use
2908 NEWOBJ. */
2909 if (use_newobj) {
Antoine Pitrouff150f22010-10-22 21:41:05 +00002910 static PyObject *newobj_str = NULL;
2911 PyObject *name_str;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002912
2913 if (newobj_str == NULL) {
2914 newobj_str = PyUnicode_InternFromString("__newobj__");
Antoine Pitrouff150f22010-10-22 21:41:05 +00002915 if (newobj_str == NULL)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002916 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002917 }
2918
Antoine Pitrouff150f22010-10-22 21:41:05 +00002919 name_str = PyObject_GetAttrString(callable, "__name__");
2920 if (name_str == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002921 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2922 PyErr_Clear();
2923 else
2924 return -1;
2925 use_newobj = 0;
2926 }
2927 else {
Antoine Pitrouff150f22010-10-22 21:41:05 +00002928 use_newobj = PyUnicode_Check(name_str) &&
2929 PyUnicode_Compare(name_str, newobj_str) == 0;
2930 Py_DECREF(name_str);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002931 }
2932 }
2933 if (use_newobj) {
2934 PyObject *cls;
2935 PyObject *newargtup;
2936 PyObject *obj_class;
2937 int p;
2938
2939 /* Sanity checks. */
2940 if (Py_SIZE(argtup) < 1) {
2941 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
2942 return -1;
2943 }
2944
2945 cls = PyTuple_GET_ITEM(argtup, 0);
Antoine Pitrouff150f22010-10-22 21:41:05 +00002946 if (!PyObject_HasAttrString(cls, "__new__")) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002947 PyErr_SetString(PicklingError, "args[0] from "
Antoine Pitrouff150f22010-10-22 21:41:05 +00002948 "__newobj__ args has no __new__");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002949 return -1;
2950 }
2951
2952 if (obj != NULL) {
Antoine Pitrouff150f22010-10-22 21:41:05 +00002953 obj_class = PyObject_GetAttrString(obj, "__class__");
2954 if (obj_class == NULL) {
2955 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2956 PyErr_Clear();
2957 else
2958 return -1;
2959 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002960 p = obj_class != cls; /* true iff a problem */
2961 Py_DECREF(obj_class);
2962 if (p) {
2963 PyErr_SetString(PicklingError, "args[0] from "
2964 "__newobj__ args has the wrong class");
2965 return -1;
2966 }
2967 }
2968 /* XXX: These calls save() are prone to infinite recursion. Imagine
2969 what happen if the value returned by the __reduce__() method of
2970 some extension type contains another object of the same type. Ouch!
2971
2972 Here is a quick example, that I ran into, to illustrate what I
2973 mean:
2974
2975 >>> import pickle, copyreg
2976 >>> copyreg.dispatch_table.pop(complex)
2977 >>> pickle.dumps(1+2j)
2978 Traceback (most recent call last):
2979 ...
2980 RuntimeError: maximum recursion depth exceeded
2981
2982 Removing the complex class from copyreg.dispatch_table made the
2983 __reduce_ex__() method emit another complex object:
2984
2985 >>> (1+1j).__reduce_ex__(2)
2986 (<function __newobj__ at 0xb7b71c3c>,
2987 (<class 'complex'>, (1+1j)), None, None, None)
2988
2989 Thus when save() was called on newargstup (the 2nd item) recursion
2990 ensued. Of course, the bug was in the complex class which had a
2991 broken __getnewargs__() that emitted another complex object. But,
2992 the point, here, is it is quite easy to end up with a broken reduce
2993 function. */
2994
2995 /* Save the class and its __new__ arguments. */
2996 if (save(self, cls, 0) < 0)
2997 return -1;
2998
2999 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
3000 if (newargtup == NULL)
3001 return -1;
3002
3003 p = save(self, newargtup, 0);
3004 Py_DECREF(newargtup);
3005 if (p < 0)
3006 return -1;
3007
3008 /* Add NEWOBJ opcode. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003009 if (_Pickler_Write(self, &newobj_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003010 return -1;
3011 }
3012 else { /* Not using NEWOBJ. */
3013 if (save(self, callable, 0) < 0 ||
3014 save(self, argtup, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003015 _Pickler_Write(self, &reduce_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003016 return -1;
3017 }
3018
3019 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
3020 the caller do not want to memoize the object. Not particularly useful,
3021 but that is to mimic the behavior save_reduce() in pickle.py when
3022 obj is None. */
3023 if (obj && memo_put(self, obj) < 0)
3024 return -1;
3025
3026 if (listitems && batch_list(self, listitems) < 0)
3027 return -1;
3028
3029 if (dictitems && batch_dict(self, dictitems) < 0)
3030 return -1;
3031
3032 if (state) {
3033 if (save(self, state, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003034 _Pickler_Write(self, &build_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003035 return -1;
3036 }
3037
3038 return 0;
3039}
3040
3041static int
3042save(PicklerObject *self, PyObject *obj, int pers_save)
3043{
3044 PyTypeObject *type;
3045 PyObject *reduce_func = NULL;
3046 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003047 int status = 0;
3048
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003049 if (Py_EnterRecursiveCall(" while pickling an object"))
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003050 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003051
3052 /* The extra pers_save argument is necessary to avoid calling save_pers()
3053 on its returned object. */
3054 if (!pers_save && self->pers_func) {
3055 /* save_pers() returns:
3056 -1 to signal an error;
3057 0 if it did nothing successfully;
3058 1 if a persistent id was saved.
3059 */
3060 if ((status = save_pers(self, obj, self->pers_func)) != 0)
3061 goto done;
3062 }
3063
3064 type = Py_TYPE(obj);
3065
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003066 /* The old cPickle had an optimization that used switch-case statement
3067 dispatching on the first letter of the type name. This has was removed
3068 since benchmarks shown that this optimization was actually slowing
3069 things down. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003070
3071 /* Atom types; these aren't memoized, so don't check the memo. */
3072
3073 if (obj == Py_None) {
3074 status = save_none(self, obj);
3075 goto done;
3076 }
3077 else if (obj == Py_False || obj == Py_True) {
3078 status = save_bool(self, obj);
3079 goto done;
3080 }
3081 else if (type == &PyLong_Type) {
3082 status = save_long(self, obj);
3083 goto done;
3084 }
3085 else if (type == &PyFloat_Type) {
3086 status = save_float(self, obj);
3087 goto done;
3088 }
3089
3090 /* Check the memo to see if it has the object. If so, generate
3091 a GET (or BINGET) opcode, instead of pickling the object
3092 once again. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003093 if (PyMemoTable_Get(self->memo, obj)) {
3094 if (memo_get(self, obj) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003095 goto error;
3096 goto done;
3097 }
3098
3099 if (type == &PyBytes_Type) {
3100 status = save_bytes(self, obj);
3101 goto done;
3102 }
3103 else if (type == &PyUnicode_Type) {
3104 status = save_unicode(self, obj);
3105 goto done;
3106 }
3107 else if (type == &PyDict_Type) {
3108 status = save_dict(self, obj);
3109 goto done;
3110 }
3111 else if (type == &PyList_Type) {
3112 status = save_list(self, obj);
3113 goto done;
3114 }
3115 else if (type == &PyTuple_Type) {
3116 status = save_tuple(self, obj);
3117 goto done;
3118 }
3119 else if (type == &PyType_Type) {
3120 status = save_global(self, obj, NULL);
3121 goto done;
3122 }
3123 else if (type == &PyFunction_Type) {
3124 status = save_global(self, obj, NULL);
3125 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
3126 /* fall back to reduce */
3127 PyErr_Clear();
3128 }
3129 else {
3130 goto done;
3131 }
3132 }
3133 else if (type == &PyCFunction_Type) {
3134 status = save_global(self, obj, NULL);
3135 goto done;
3136 }
3137 else if (PyType_IsSubtype(type, &PyType_Type)) {
3138 status = save_global(self, obj, NULL);
3139 goto done;
3140 }
3141
3142 /* XXX: This part needs some unit tests. */
3143
3144 /* Get a reduction callable, and call it. This may come from
3145 * copyreg.dispatch_table, the object's __reduce_ex__ method,
3146 * or the object's __reduce__ method.
3147 */
3148 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
3149 if (reduce_func != NULL) {
3150 /* Here, the reference count of the reduce_func object returned by
3151 PyDict_GetItem needs to be increased to be consistent with the one
3152 returned by PyObject_GetAttr. This is allow us to blindly DECREF
3153 reduce_func at the end of the save() routine.
3154 */
3155 Py_INCREF(reduce_func);
3156 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003157 reduce_value = _Pickler_FastCall(self, reduce_func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003158 }
3159 else {
3160 static PyObject *reduce_str = NULL;
3161 static PyObject *reduce_ex_str = NULL;
3162
3163 /* Cache the name of the reduce methods. */
3164 if (reduce_str == NULL) {
3165 reduce_str = PyUnicode_InternFromString("__reduce__");
3166 if (reduce_str == NULL)
3167 goto error;
3168 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
3169 if (reduce_ex_str == NULL)
3170 goto error;
3171 }
3172
3173 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
3174 automatically defined as __reduce__. While this is convenient, this
3175 make it impossible to know which method was actually called. Of
3176 course, this is not a big deal. But still, it would be nice to let
3177 the user know which method was called when something go
3178 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
3179 don't actually have to check for a __reduce__ method. */
3180
3181 /* Check for a __reduce_ex__ method. */
3182 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
3183 if (reduce_func != NULL) {
3184 PyObject *proto;
3185 proto = PyLong_FromLong(self->proto);
3186 if (proto != NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003187 reduce_value = _Pickler_FastCall(self, reduce_func, proto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003188 }
3189 }
3190 else {
3191 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3192 PyErr_Clear();
3193 else
3194 goto error;
3195 /* Check for a __reduce__ method. */
3196 reduce_func = PyObject_GetAttr(obj, reduce_str);
3197 if (reduce_func != NULL) {
3198 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
3199 }
3200 else {
3201 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
3202 type->tp_name, obj);
3203 goto error;
3204 }
3205 }
3206 }
3207
3208 if (reduce_value == NULL)
3209 goto error;
3210
3211 if (PyUnicode_Check(reduce_value)) {
3212 status = save_global(self, obj, reduce_value);
3213 goto done;
3214 }
3215
3216 if (!PyTuple_Check(reduce_value)) {
3217 PyErr_SetString(PicklingError,
3218 "__reduce__ must return a string or tuple");
3219 goto error;
3220 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003221
3222 status = save_reduce(self, reduce_value, obj);
3223
3224 if (0) {
3225 error:
3226 status = -1;
3227 }
3228 done:
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003229 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003230 Py_XDECREF(reduce_func);
3231 Py_XDECREF(reduce_value);
3232
3233 return status;
3234}
3235
3236static int
3237dump(PicklerObject *self, PyObject *obj)
3238{
3239 const char stop_op = STOP;
3240
3241 if (self->proto >= 2) {
3242 char header[2];
3243
3244 header[0] = PROTO;
3245 assert(self->proto >= 0 && self->proto < 256);
3246 header[1] = (unsigned char)self->proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003247 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003248 return -1;
3249 }
3250
3251 if (save(self, obj, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003252 _Pickler_Write(self, &stop_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003253 return -1;
3254
3255 return 0;
3256}
3257
3258PyDoc_STRVAR(Pickler_clear_memo_doc,
3259"clear_memo() -> None. Clears the pickler's \"memo\"."
3260"\n"
3261"The memo is the data structure that remembers which objects the\n"
3262"pickler has already seen, so that shared or recursive objects are\n"
3263"pickled by reference and not by value. This method is useful when\n"
3264"re-using picklers.");
3265
3266static PyObject *
3267Pickler_clear_memo(PicklerObject *self)
3268{
3269 if (self->memo)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003270 PyMemoTable_Clear(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003271
3272 Py_RETURN_NONE;
3273}
3274
3275PyDoc_STRVAR(Pickler_dump_doc,
3276"dump(obj) -> None. Write a pickled representation of obj to the open file.");
3277
3278static PyObject *
3279Pickler_dump(PicklerObject *self, PyObject *args)
3280{
3281 PyObject *obj;
3282
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00003283 /* Check whether the Pickler was initialized correctly (issue3664).
3284 Developers often forget to call __init__() in their subclasses, which
3285 would trigger a segfault without this check. */
3286 if (self->write == NULL) {
3287 PyErr_Format(PicklingError,
3288 "Pickler.__init__() was not called by %s.__init__()",
3289 Py_TYPE(self)->tp_name);
3290 return NULL;
3291 }
3292
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003293 if (!PyArg_ParseTuple(args, "O:dump", &obj))
3294 return NULL;
3295
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003296 if (_Pickler_ClearBuffer(self) < 0)
3297 return NULL;
3298
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003299 if (dump(self, obj) < 0)
3300 return NULL;
3301
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003302 if (_Pickler_FlushToFile(self) < 0)
3303 return NULL;
3304
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003305 Py_RETURN_NONE;
3306}
3307
3308static struct PyMethodDef Pickler_methods[] = {
3309 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
3310 Pickler_dump_doc},
3311 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
3312 Pickler_clear_memo_doc},
3313 {NULL, NULL} /* sentinel */
3314};
3315
3316static void
3317Pickler_dealloc(PicklerObject *self)
3318{
3319 PyObject_GC_UnTrack(self);
3320
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003321 Py_XDECREF(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003322 Py_XDECREF(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003323 Py_XDECREF(self->pers_func);
3324 Py_XDECREF(self->arg);
3325 Py_XDECREF(self->fast_memo);
3326
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003327 PyMemoTable_Del(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003328
3329 Py_TYPE(self)->tp_free((PyObject *)self);
3330}
3331
3332static int
3333Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
3334{
3335 Py_VISIT(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003336 Py_VISIT(self->pers_func);
3337 Py_VISIT(self->arg);
3338 Py_VISIT(self->fast_memo);
3339 return 0;
3340}
3341
3342static int
3343Pickler_clear(PicklerObject *self)
3344{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003345 Py_CLEAR(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003346 Py_CLEAR(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003347 Py_CLEAR(self->pers_func);
3348 Py_CLEAR(self->arg);
3349 Py_CLEAR(self->fast_memo);
3350
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003351 if (self->memo != NULL) {
3352 PyMemoTable *memo = self->memo;
3353 self->memo = NULL;
3354 PyMemoTable_Del(memo);
3355 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003356 return 0;
3357}
3358
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003359
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003360PyDoc_STRVAR(Pickler_doc,
3361"Pickler(file, protocol=None)"
3362"\n"
3363"This takes a binary file for writing a pickle data stream.\n"
3364"\n"
3365"The optional protocol argument tells the pickler to use the\n"
3366"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
3367"protocol is 3; a backward-incompatible protocol designed for\n"
3368"Python 3.0.\n"
3369"\n"
3370"Specifying a negative protocol version selects the highest\n"
3371"protocol version supported. The higher the protocol used, the\n"
3372"more recent the version of Python needed to read the pickle\n"
3373"produced.\n"
3374"\n"
3375"The file argument must have a write() method that accepts a single\n"
3376"bytes argument. It can thus be a file object opened for binary\n"
3377"writing, a io.BytesIO instance, or any other custom object that\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003378"meets this interface.\n"
3379"\n"
3380"If fix_imports is True and protocol is less than 3, pickle will try to\n"
3381"map the new Python 3.x names to the old module names used in Python\n"
3382"2.x, so that the pickle data stream is readable with Python 2.x.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003383
3384static int
3385Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
3386{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003387 static char *kwlist[] = {"file", "protocol", "fix_imports", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003388 PyObject *file;
3389 PyObject *proto_obj = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003390 PyObject *fix_imports = Py_True;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003391
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003392 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:Pickler",
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003393 kwlist, &file, &proto_obj, &fix_imports))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003394 return -1;
3395
3396 /* In case of multiple __init__() calls, clear previous content. */
3397 if (self->write != NULL)
3398 (void)Pickler_clear(self);
3399
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003400 if (_Pickler_SetProtocol(self, proto_obj, fix_imports) < 0)
3401 return -1;
3402
3403 if (_Pickler_SetOutputStream(self, file) < 0)
3404 return -1;
3405
3406 /* memo and output_buffer may have already been created in _Pickler_New */
3407 if (self->memo == NULL) {
3408 self->memo = PyMemoTable_New();
3409 if (self->memo == NULL)
3410 return -1;
3411 }
3412 self->output_len = 0;
3413 if (self->output_buffer == NULL) {
3414 self->max_output_len = WRITE_BUF_SIZE;
3415 self->output_buffer = PyBytes_FromStringAndSize(NULL,
3416 self->max_output_len);
3417 if (self->output_buffer == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003418 return -1;
3419 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003420
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003421 self->arg = NULL;
3422 self->fast = 0;
3423 self->fast_nesting = 0;
3424 self->fast_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003425 self->pers_func = NULL;
3426 if (PyObject_HasAttrString((PyObject *)self, "persistent_id")) {
3427 self->pers_func = PyObject_GetAttrString((PyObject *)self,
3428 "persistent_id");
3429 if (self->pers_func == NULL)
3430 return -1;
3431 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003432 return 0;
3433}
3434
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003435/* Define a proxy object for the Pickler's internal memo object. This is to
3436 * avoid breaking code like:
3437 * pickler.memo.clear()
3438 * and
3439 * pickler.memo = saved_memo
3440 * Is this a good idea? Not really, but we don't want to break code that uses
3441 * it. Note that we don't implement the entire mapping API here. This is
3442 * intentional, as these should be treated as black-box implementation details.
3443 */
3444
3445typedef struct {
3446 PyObject_HEAD
3447 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
3448} PicklerMemoProxyObject;
3449
3450PyDoc_STRVAR(pmp_clear_doc,
3451"memo.clear() -> None. Remove all items from memo.");
3452
3453static PyObject *
3454pmp_clear(PicklerMemoProxyObject *self)
3455{
3456 if (self->pickler->memo)
3457 PyMemoTable_Clear(self->pickler->memo);
3458 Py_RETURN_NONE;
3459}
3460
3461PyDoc_STRVAR(pmp_copy_doc,
3462"memo.copy() -> new_memo. Copy the memo to a new object.");
3463
3464static PyObject *
3465pmp_copy(PicklerMemoProxyObject *self)
3466{
3467 Py_ssize_t i;
3468 PyMemoTable *memo;
3469 PyObject *new_memo = PyDict_New();
3470 if (new_memo == NULL)
3471 return NULL;
3472
3473 memo = self->pickler->memo;
3474 for (i = 0; i < memo->mt_allocated; ++i) {
3475 PyMemoEntry entry = memo->mt_table[i];
3476 if (entry.me_key != NULL) {
3477 int status;
3478 PyObject *key, *value;
3479
3480 key = PyLong_FromVoidPtr(entry.me_key);
3481 value = Py_BuildValue("lO", entry.me_value, entry.me_key);
3482
3483 if (key == NULL || value == NULL) {
3484 Py_XDECREF(key);
3485 Py_XDECREF(value);
3486 goto error;
3487 }
3488 status = PyDict_SetItem(new_memo, key, value);
3489 Py_DECREF(key);
3490 Py_DECREF(value);
3491 if (status < 0)
3492 goto error;
3493 }
3494 }
3495 return new_memo;
3496
3497 error:
3498 Py_XDECREF(new_memo);
3499 return NULL;
3500}
3501
3502PyDoc_STRVAR(pmp_reduce_doc,
3503"memo.__reduce__(). Pickling support.");
3504
3505static PyObject *
3506pmp_reduce(PicklerMemoProxyObject *self, PyObject *args)
3507{
3508 PyObject *reduce_value, *dict_args;
3509 PyObject *contents = pmp_copy(self);
3510 if (contents == NULL)
3511 return NULL;
3512
3513 reduce_value = PyTuple_New(2);
3514 if (reduce_value == NULL) {
3515 Py_DECREF(contents);
3516 return NULL;
3517 }
3518 dict_args = PyTuple_New(1);
3519 if (dict_args == NULL) {
3520 Py_DECREF(contents);
3521 Py_DECREF(reduce_value);
3522 return NULL;
3523 }
3524 PyTuple_SET_ITEM(dict_args, 0, contents);
3525 Py_INCREF((PyObject *)&PyDict_Type);
3526 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
3527 PyTuple_SET_ITEM(reduce_value, 1, dict_args);
3528 return reduce_value;
3529}
3530
3531static PyMethodDef picklerproxy_methods[] = {
3532 {"clear", (PyCFunction)pmp_clear, METH_NOARGS, pmp_clear_doc},
3533 {"copy", (PyCFunction)pmp_copy, METH_NOARGS, pmp_copy_doc},
3534 {"__reduce__", (PyCFunction)pmp_reduce, METH_VARARGS, pmp_reduce_doc},
3535 {NULL, NULL} /* sentinel */
3536};
3537
3538static void
3539PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
3540{
3541 PyObject_GC_UnTrack(self);
3542 Py_XDECREF(self->pickler);
3543 PyObject_GC_Del((PyObject *)self);
3544}
3545
3546static int
3547PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
3548 visitproc visit, void *arg)
3549{
3550 Py_VISIT(self->pickler);
3551 return 0;
3552}
3553
3554static int
3555PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
3556{
3557 Py_CLEAR(self->pickler);
3558 return 0;
3559}
3560
3561static PyTypeObject PicklerMemoProxyType = {
3562 PyVarObject_HEAD_INIT(NULL, 0)
3563 "_pickle.PicklerMemoProxy", /*tp_name*/
3564 sizeof(PicklerMemoProxyObject), /*tp_basicsize*/
3565 0,
3566 (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */
3567 0, /* tp_print */
3568 0, /* tp_getattr */
3569 0, /* tp_setattr */
3570 0, /* tp_compare */
3571 0, /* tp_repr */
3572 0, /* tp_as_number */
3573 0, /* tp_as_sequence */
3574 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00003575 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003576 0, /* tp_call */
3577 0, /* tp_str */
3578 PyObject_GenericGetAttr, /* tp_getattro */
3579 PyObject_GenericSetAttr, /* tp_setattro */
3580 0, /* tp_as_buffer */
3581 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3582 0, /* tp_doc */
3583 (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */
3584 (inquiry)PicklerMemoProxy_clear, /* tp_clear */
3585 0, /* tp_richcompare */
3586 0, /* tp_weaklistoffset */
3587 0, /* tp_iter */
3588 0, /* tp_iternext */
3589 picklerproxy_methods, /* tp_methods */
3590};
3591
3592static PyObject *
3593PicklerMemoProxy_New(PicklerObject *pickler)
3594{
3595 PicklerMemoProxyObject *self;
3596
3597 self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
3598 if (self == NULL)
3599 return NULL;
3600 Py_INCREF(pickler);
3601 self->pickler = pickler;
3602 PyObject_GC_Track(self);
3603 return (PyObject *)self;
3604}
3605
3606/*****************************************************************************/
3607
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003608static PyObject *
3609Pickler_get_memo(PicklerObject *self)
3610{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003611 return PicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003612}
3613
3614static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003615Pickler_set_memo(PicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003616{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003617 PyMemoTable *new_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003618
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003619 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003620 PyErr_SetString(PyExc_TypeError,
3621 "attribute deletion is not supported");
3622 return -1;
3623 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003624
3625 if (Py_TYPE(obj) == &PicklerMemoProxyType) {
3626 PicklerObject *pickler =
3627 ((PicklerMemoProxyObject *)obj)->pickler;
3628
3629 new_memo = PyMemoTable_Copy(pickler->memo);
3630 if (new_memo == NULL)
3631 return -1;
3632 }
3633 else if (PyDict_Check(obj)) {
3634 Py_ssize_t i = 0;
3635 PyObject *key, *value;
3636
3637 new_memo = PyMemoTable_New();
3638 if (new_memo == NULL)
3639 return -1;
3640
3641 while (PyDict_Next(obj, &i, &key, &value)) {
3642 long memo_id;
3643 PyObject *memo_obj;
3644
3645 if (!PyTuple_Check(value) || Py_SIZE(value) != 2) {
3646 PyErr_SetString(PyExc_TypeError,
3647 "'memo' values must be 2-item tuples");
3648 goto error;
3649 }
3650 memo_id = PyLong_AsLong(PyTuple_GET_ITEM(value, 0));
3651 if (memo_id == -1 && PyErr_Occurred())
3652 goto error;
3653 memo_obj = PyTuple_GET_ITEM(value, 1);
3654 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
3655 goto error;
3656 }
3657 }
3658 else {
3659 PyErr_Format(PyExc_TypeError,
3660 "'memo' attribute must be an PicklerMemoProxy object"
3661 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003662 return -1;
3663 }
3664
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003665 PyMemoTable_Del(self->memo);
3666 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003667
3668 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003669
3670 error:
3671 if (new_memo)
3672 PyMemoTable_Del(new_memo);
3673 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003674}
3675
3676static PyObject *
3677Pickler_get_persid(PicklerObject *self)
3678{
3679 if (self->pers_func == NULL)
3680 PyErr_SetString(PyExc_AttributeError, "persistent_id");
3681 else
3682 Py_INCREF(self->pers_func);
3683 return self->pers_func;
3684}
3685
3686static int
3687Pickler_set_persid(PicklerObject *self, PyObject *value)
3688{
3689 PyObject *tmp;
3690
3691 if (value == NULL) {
3692 PyErr_SetString(PyExc_TypeError,
3693 "attribute deletion is not supported");
3694 return -1;
3695 }
3696 if (!PyCallable_Check(value)) {
3697 PyErr_SetString(PyExc_TypeError,
3698 "persistent_id must be a callable taking one argument");
3699 return -1;
3700 }
3701
3702 tmp = self->pers_func;
3703 Py_INCREF(value);
3704 self->pers_func = value;
3705 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
3706
3707 return 0;
3708}
3709
3710static PyMemberDef Pickler_members[] = {
3711 {"bin", T_INT, offsetof(PicklerObject, bin)},
3712 {"fast", T_INT, offsetof(PicklerObject, fast)},
3713 {NULL}
3714};
3715
3716static PyGetSetDef Pickler_getsets[] = {
3717 {"memo", (getter)Pickler_get_memo,
3718 (setter)Pickler_set_memo},
3719 {"persistent_id", (getter)Pickler_get_persid,
3720 (setter)Pickler_set_persid},
3721 {NULL}
3722};
3723
3724static PyTypeObject Pickler_Type = {
3725 PyVarObject_HEAD_INIT(NULL, 0)
3726 "_pickle.Pickler" , /*tp_name*/
3727 sizeof(PicklerObject), /*tp_basicsize*/
3728 0, /*tp_itemsize*/
3729 (destructor)Pickler_dealloc, /*tp_dealloc*/
3730 0, /*tp_print*/
3731 0, /*tp_getattr*/
3732 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00003733 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003734 0, /*tp_repr*/
3735 0, /*tp_as_number*/
3736 0, /*tp_as_sequence*/
3737 0, /*tp_as_mapping*/
3738 0, /*tp_hash*/
3739 0, /*tp_call*/
3740 0, /*tp_str*/
3741 0, /*tp_getattro*/
3742 0, /*tp_setattro*/
3743 0, /*tp_as_buffer*/
3744 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3745 Pickler_doc, /*tp_doc*/
3746 (traverseproc)Pickler_traverse, /*tp_traverse*/
3747 (inquiry)Pickler_clear, /*tp_clear*/
3748 0, /*tp_richcompare*/
3749 0, /*tp_weaklistoffset*/
3750 0, /*tp_iter*/
3751 0, /*tp_iternext*/
3752 Pickler_methods, /*tp_methods*/
3753 Pickler_members, /*tp_members*/
3754 Pickler_getsets, /*tp_getset*/
3755 0, /*tp_base*/
3756 0, /*tp_dict*/
3757 0, /*tp_descr_get*/
3758 0, /*tp_descr_set*/
3759 0, /*tp_dictoffset*/
3760 (initproc)Pickler_init, /*tp_init*/
3761 PyType_GenericAlloc, /*tp_alloc*/
3762 PyType_GenericNew, /*tp_new*/
3763 PyObject_GC_Del, /*tp_free*/
3764 0, /*tp_is_gc*/
3765};
3766
3767/* Temporary helper for calling self.find_class().
3768
3769 XXX: It would be nice to able to avoid Python function call overhead, by
3770 using directly the C version of find_class(), when find_class() is not
3771 overridden by a subclass. Although, this could become rather hackish. A
3772 simpler optimization would be to call the C function when self is not a
3773 subclass instance. */
3774static PyObject *
3775find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
3776{
3777 return PyObject_CallMethod((PyObject *)self, "find_class", "OO",
3778 module_name, global_name);
3779}
3780
3781static int
3782marker(UnpicklerObject *self)
3783{
3784 if (self->num_marks < 1) {
3785 PyErr_SetString(UnpicklingError, "could not find MARK");
3786 return -1;
3787 }
3788
3789 return self->marks[--self->num_marks];
3790}
3791
3792static int
3793load_none(UnpicklerObject *self)
3794{
3795 PDATA_APPEND(self->stack, Py_None, -1);
3796 return 0;
3797}
3798
3799static int
3800bad_readline(void)
3801{
3802 PyErr_SetString(UnpicklingError, "pickle data was truncated");
3803 return -1;
3804}
3805
3806static int
3807load_int(UnpicklerObject *self)
3808{
3809 PyObject *value;
3810 char *endptr, *s;
3811 Py_ssize_t len;
3812 long x;
3813
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003814 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003815 return -1;
3816 if (len < 2)
3817 return bad_readline();
3818
3819 errno = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003820 /* XXX: Should the base argument of strtol() be explicitly set to 10?
3821 XXX(avassalotti): Should this uses PyOS_strtol()? */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003822 x = strtol(s, &endptr, 0);
3823
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003824 if (errno || (*endptr != '\n' && *endptr != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003825 /* Hm, maybe we've got something long. Let's try reading
3826 * it as a Python long object. */
3827 errno = 0;
3828 /* XXX: Same thing about the base here. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003829 value = PyLong_FromString(s, NULL, 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003830 if (value == NULL) {
3831 PyErr_SetString(PyExc_ValueError,
3832 "could not convert string to int");
3833 return -1;
3834 }
3835 }
3836 else {
3837 if (len == 3 && (x == 0 || x == 1)) {
3838 if ((value = PyBool_FromLong(x)) == NULL)
3839 return -1;
3840 }
3841 else {
3842 if ((value = PyLong_FromLong(x)) == NULL)
3843 return -1;
3844 }
3845 }
3846
3847 PDATA_PUSH(self->stack, value, -1);
3848 return 0;
3849}
3850
3851static int
3852load_bool(UnpicklerObject *self, PyObject *boolean)
3853{
3854 assert(boolean == Py_True || boolean == Py_False);
3855 PDATA_APPEND(self->stack, boolean, -1);
3856 return 0;
3857}
3858
3859/* s contains x bytes of a little-endian integer. Return its value as a
3860 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
3861 * int, but when x is 4 it's a signed one. This is an historical source
3862 * of x-platform bugs.
3863 */
3864static long
3865calc_binint(char *bytes, int size)
3866{
3867 unsigned char *s = (unsigned char *)bytes;
3868 int i = size;
3869 long x = 0;
3870
3871 for (i = 0; i < size; i++) {
3872 x |= (long)s[i] << (i * 8);
3873 }
3874
3875 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
3876 * is signed, so on a box with longs bigger than 4 bytes we need
3877 * to extend a BININT's sign bit to the full width.
3878 */
3879 if (SIZEOF_LONG > 4 && size == 4) {
3880 x |= -(x & (1L << 31));
3881 }
3882
3883 return x;
3884}
3885
3886static int
3887load_binintx(UnpicklerObject *self, char *s, int size)
3888{
3889 PyObject *value;
3890 long x;
3891
3892 x = calc_binint(s, size);
3893
3894 if ((value = PyLong_FromLong(x)) == NULL)
3895 return -1;
3896
3897 PDATA_PUSH(self->stack, value, -1);
3898 return 0;
3899}
3900
3901static int
3902load_binint(UnpicklerObject *self)
3903{
3904 char *s;
3905
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003906 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003907 return -1;
3908
3909 return load_binintx(self, s, 4);
3910}
3911
3912static int
3913load_binint1(UnpicklerObject *self)
3914{
3915 char *s;
3916
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003917 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003918 return -1;
3919
3920 return load_binintx(self, s, 1);
3921}
3922
3923static int
3924load_binint2(UnpicklerObject *self)
3925{
3926 char *s;
3927
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003928 if (_Unpickler_Read(self, &s, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003929 return -1;
3930
3931 return load_binintx(self, s, 2);
3932}
3933
3934static int
3935load_long(UnpicklerObject *self)
3936{
3937 PyObject *value;
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00003938 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003939 Py_ssize_t len;
3940
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003941 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003942 return -1;
3943 if (len < 2)
3944 return bad_readline();
3945
Mark Dickinson8dd05142009-01-20 20:43:58 +00003946 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
3947 the 'L' before calling PyLong_FromString. In order to maintain
3948 compatibility with Python 3.0.0, we don't actually *require*
3949 the 'L' to be present. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003950 if (s[len-2] == 'L')
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00003951 s[len-2] = '\0';
Alexandre Vassalottie4bccb72009-01-24 01:47:57 +00003952 /* XXX: Should the base argument explicitly set to 10? */
3953 value = PyLong_FromString(s, NULL, 0);
Mark Dickinson8dd05142009-01-20 20:43:58 +00003954 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003955 return -1;
3956
3957 PDATA_PUSH(self->stack, value, -1);
3958 return 0;
3959}
3960
3961/* 'size' bytes contain the # of bytes of little-endian 256's-complement
3962 * data following.
3963 */
3964static int
3965load_counted_long(UnpicklerObject *self, int size)
3966{
3967 PyObject *value;
3968 char *nbytes;
3969 char *pdata;
3970
3971 assert(size == 1 || size == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003972 if (_Unpickler_Read(self, &nbytes, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003973 return -1;
3974
3975 size = calc_binint(nbytes, size);
3976 if (size < 0) {
3977 /* Corrupt or hostile pickle -- we never write one like this */
3978 PyErr_SetString(UnpicklingError,
3979 "LONG pickle has negative byte count");
3980 return -1;
3981 }
3982
3983 if (size == 0)
3984 value = PyLong_FromLong(0L);
3985 else {
3986 /* Read the raw little-endian bytes and convert. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003987 if (_Unpickler_Read(self, &pdata, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003988 return -1;
3989 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
3990 1 /* little endian */ , 1 /* signed */ );
3991 }
3992 if (value == NULL)
3993 return -1;
3994 PDATA_PUSH(self->stack, value, -1);
3995 return 0;
3996}
3997
3998static int
3999load_float(UnpicklerObject *self)
4000{
4001 PyObject *value;
4002 char *endptr, *s;
4003 Py_ssize_t len;
4004 double d;
4005
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004006 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004007 return -1;
4008 if (len < 2)
4009 return bad_readline();
4010
4011 errno = 0;
Mark Dickinson725bfd82009-05-03 20:33:40 +00004012 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
4013 if (d == -1.0 && PyErr_Occurred())
4014 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004015 if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004016 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
4017 return -1;
4018 }
Mark Dickinson725bfd82009-05-03 20:33:40 +00004019 value = PyFloat_FromDouble(d);
4020 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004021 return -1;
4022
4023 PDATA_PUSH(self->stack, value, -1);
4024 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004025}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004026
4027static int
4028load_binfloat(UnpicklerObject *self)
4029{
4030 PyObject *value;
4031 double x;
4032 char *s;
4033
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004034 if (_Unpickler_Read(self, &s, 8) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004035 return -1;
4036
4037 x = _PyFloat_Unpack8((unsigned char *)s, 0);
4038 if (x == -1.0 && PyErr_Occurred())
4039 return -1;
4040
4041 if ((value = PyFloat_FromDouble(x)) == NULL)
4042 return -1;
4043
4044 PDATA_PUSH(self->stack, value, -1);
4045 return 0;
4046}
4047
4048static int
4049load_string(UnpicklerObject *self)
4050{
4051 PyObject *bytes;
4052 PyObject *str = NULL;
4053 Py_ssize_t len;
4054 char *s, *p;
4055
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004056 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004057 return -1;
4058 if (len < 3)
4059 return bad_readline();
4060 if ((s = strdup(s)) == NULL) {
4061 PyErr_NoMemory();
4062 return -1;
4063 }
4064
4065 /* Strip outermost quotes */
4066 while (s[len - 1] <= ' ')
4067 len--;
4068 if (s[0] == '"' && s[len - 1] == '"') {
4069 s[len - 1] = '\0';
4070 p = s + 1;
4071 len -= 2;
4072 }
4073 else if (s[0] == '\'' && s[len - 1] == '\'') {
4074 s[len - 1] = '\0';
4075 p = s + 1;
4076 len -= 2;
4077 }
4078 else {
4079 free(s);
4080 PyErr_SetString(PyExc_ValueError, "insecure string pickle");
4081 return -1;
4082 }
4083
4084 /* Use the PyBytes API to decode the string, since that is what is used
4085 to encode, and then coerce the result to Unicode. */
4086 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
4087 free(s);
4088 if (bytes == NULL)
4089 return -1;
4090 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
4091 Py_DECREF(bytes);
4092 if (str == NULL)
4093 return -1;
4094
4095 PDATA_PUSH(self->stack, str, -1);
4096 return 0;
4097}
4098
4099static int
4100load_binbytes(UnpicklerObject *self)
4101{
4102 PyObject *bytes;
4103 long x;
4104 char *s;
4105
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004106 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004107 return -1;
4108
4109 x = calc_binint(s, 4);
4110 if (x < 0) {
4111 PyErr_SetString(UnpicklingError,
4112 "BINBYTES pickle has negative byte count");
4113 return -1;
4114 }
4115
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004116 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004117 return -1;
4118 bytes = PyBytes_FromStringAndSize(s, x);
4119 if (bytes == NULL)
4120 return -1;
4121
4122 PDATA_PUSH(self->stack, bytes, -1);
4123 return 0;
4124}
4125
4126static int
4127load_short_binbytes(UnpicklerObject *self)
4128{
4129 PyObject *bytes;
4130 unsigned char x;
4131 char *s;
4132
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004133 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004134 return -1;
4135
4136 x = (unsigned char)s[0];
4137
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004138 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004139 return -1;
4140
4141 bytes = PyBytes_FromStringAndSize(s, x);
4142 if (bytes == NULL)
4143 return -1;
4144
4145 PDATA_PUSH(self->stack, bytes, -1);
4146 return 0;
4147}
4148
4149static int
4150load_binstring(UnpicklerObject *self)
4151{
4152 PyObject *str;
4153 long x;
4154 char *s;
4155
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004156 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004157 return -1;
4158
4159 x = calc_binint(s, 4);
4160 if (x < 0) {
4161 PyErr_SetString(UnpicklingError,
4162 "BINSTRING pickle has negative byte count");
4163 return -1;
4164 }
4165
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004166 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004167 return -1;
4168
4169 /* Convert Python 2.x strings to unicode. */
4170 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4171 if (str == NULL)
4172 return -1;
4173
4174 PDATA_PUSH(self->stack, str, -1);
4175 return 0;
4176}
4177
4178static int
4179load_short_binstring(UnpicklerObject *self)
4180{
4181 PyObject *str;
4182 unsigned char x;
4183 char *s;
4184
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004185 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004186 return -1;
4187
4188 x = (unsigned char)s[0];
4189
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004190 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004191 return -1;
4192
4193 /* Convert Python 2.x strings to unicode. */
4194 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4195 if (str == NULL)
4196 return -1;
4197
4198 PDATA_PUSH(self->stack, str, -1);
4199 return 0;
4200}
4201
4202static int
4203load_unicode(UnpicklerObject *self)
4204{
4205 PyObject *str;
4206 Py_ssize_t len;
4207 char *s;
4208
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004209 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004210 return -1;
4211 if (len < 1)
4212 return bad_readline();
4213
4214 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
4215 if (str == NULL)
4216 return -1;
4217
4218 PDATA_PUSH(self->stack, str, -1);
4219 return 0;
4220}
4221
4222static int
4223load_binunicode(UnpicklerObject *self)
4224{
4225 PyObject *str;
4226 long size;
4227 char *s;
4228
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004229 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004230 return -1;
4231
4232 size = calc_binint(s, 4);
4233 if (size < 0) {
4234 PyErr_SetString(UnpicklingError,
4235 "BINUNICODE pickle has negative byte count");
4236 return -1;
4237 }
4238
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004239 if (_Unpickler_Read(self, &s, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004240 return -1;
4241
Victor Stinner485fb562010-04-13 11:07:24 +00004242 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004243 if (str == NULL)
4244 return -1;
4245
4246 PDATA_PUSH(self->stack, str, -1);
4247 return 0;
4248}
4249
4250static int
4251load_tuple(UnpicklerObject *self)
4252{
4253 PyObject *tuple;
4254 int i;
4255
4256 if ((i = marker(self)) < 0)
4257 return -1;
4258
4259 tuple = Pdata_poptuple(self->stack, i);
4260 if (tuple == NULL)
4261 return -1;
4262 PDATA_PUSH(self->stack, tuple, -1);
4263 return 0;
4264}
4265
4266static int
4267load_counted_tuple(UnpicklerObject *self, int len)
4268{
4269 PyObject *tuple;
4270
4271 tuple = PyTuple_New(len);
4272 if (tuple == NULL)
4273 return -1;
4274
4275 while (--len >= 0) {
4276 PyObject *item;
4277
4278 PDATA_POP(self->stack, item);
4279 if (item == NULL)
4280 return -1;
4281 PyTuple_SET_ITEM(tuple, len, item);
4282 }
4283 PDATA_PUSH(self->stack, tuple, -1);
4284 return 0;
4285}
4286
4287static int
4288load_empty_list(UnpicklerObject *self)
4289{
4290 PyObject *list;
4291
4292 if ((list = PyList_New(0)) == NULL)
4293 return -1;
4294 PDATA_PUSH(self->stack, list, -1);
4295 return 0;
4296}
4297
4298static int
4299load_empty_dict(UnpicklerObject *self)
4300{
4301 PyObject *dict;
4302
4303 if ((dict = PyDict_New()) == NULL)
4304 return -1;
4305 PDATA_PUSH(self->stack, dict, -1);
4306 return 0;
4307}
4308
4309static int
4310load_list(UnpicklerObject *self)
4311{
4312 PyObject *list;
4313 int i;
4314
4315 if ((i = marker(self)) < 0)
4316 return -1;
4317
4318 list = Pdata_poplist(self->stack, i);
4319 if (list == NULL)
4320 return -1;
4321 PDATA_PUSH(self->stack, list, -1);
4322 return 0;
4323}
4324
4325static int
4326load_dict(UnpicklerObject *self)
4327{
4328 PyObject *dict, *key, *value;
4329 int i, j, k;
4330
4331 if ((i = marker(self)) < 0)
4332 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004333 j = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004334
4335 if ((dict = PyDict_New()) == NULL)
4336 return -1;
4337
4338 for (k = i + 1; k < j; k += 2) {
4339 key = self->stack->data[k - 1];
4340 value = self->stack->data[k];
4341 if (PyDict_SetItem(dict, key, value) < 0) {
4342 Py_DECREF(dict);
4343 return -1;
4344 }
4345 }
4346 Pdata_clear(self->stack, i);
4347 PDATA_PUSH(self->stack, dict, -1);
4348 return 0;
4349}
4350
4351static PyObject *
4352instantiate(PyObject *cls, PyObject *args)
4353{
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004354 PyObject *result = NULL;
4355 /* Caller must assure args are a tuple. Normally, args come from
4356 Pdata_poptuple which packs objects from the top of the stack
4357 into a newly created tuple. */
4358 assert(PyTuple_Check(args));
4359 if (Py_SIZE(args) > 0 || !PyType_Check(cls) ||
4360 PyObject_HasAttrString(cls, "__getinitargs__")) {
4361 result = PyObject_CallObject(cls, args);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004362 }
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004363 else {
4364 result = PyObject_CallMethod(cls, "__new__", "O", cls);
4365 }
4366 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004367}
4368
4369static int
4370load_obj(UnpicklerObject *self)
4371{
4372 PyObject *cls, *args, *obj = NULL;
4373 int i;
4374
4375 if ((i = marker(self)) < 0)
4376 return -1;
4377
4378 args = Pdata_poptuple(self->stack, i + 1);
4379 if (args == NULL)
4380 return -1;
4381
4382 PDATA_POP(self->stack, cls);
4383 if (cls) {
4384 obj = instantiate(cls, args);
4385 Py_DECREF(cls);
4386 }
4387 Py_DECREF(args);
4388 if (obj == NULL)
4389 return -1;
4390
4391 PDATA_PUSH(self->stack, obj, -1);
4392 return 0;
4393}
4394
4395static int
4396load_inst(UnpicklerObject *self)
4397{
4398 PyObject *cls = NULL;
4399 PyObject *args = NULL;
4400 PyObject *obj = NULL;
4401 PyObject *module_name;
4402 PyObject *class_name;
4403 Py_ssize_t len;
4404 int i;
4405 char *s;
4406
4407 if ((i = marker(self)) < 0)
4408 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004409 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004410 return -1;
4411 if (len < 2)
4412 return bad_readline();
4413
4414 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
4415 identifiers are permitted in Python 3.0, since the INST opcode is only
4416 supported by older protocols on Python 2.x. */
4417 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
4418 if (module_name == NULL)
4419 return -1;
4420
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004421 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004422 if (len < 2)
4423 return bad_readline();
4424 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004425 if (class_name != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004426 cls = find_class(self, module_name, class_name);
4427 Py_DECREF(class_name);
4428 }
4429 }
4430 Py_DECREF(module_name);
4431
4432 if (cls == NULL)
4433 return -1;
4434
4435 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
4436 obj = instantiate(cls, args);
4437 Py_DECREF(args);
4438 }
4439 Py_DECREF(cls);
4440
4441 if (obj == NULL)
4442 return -1;
4443
4444 PDATA_PUSH(self->stack, obj, -1);
4445 return 0;
4446}
4447
4448static int
4449load_newobj(UnpicklerObject *self)
4450{
4451 PyObject *args = NULL;
4452 PyObject *clsraw = NULL;
4453 PyTypeObject *cls; /* clsraw cast to its true type */
4454 PyObject *obj;
4455
4456 /* Stack is ... cls argtuple, and we want to call
4457 * cls.__new__(cls, *argtuple).
4458 */
4459 PDATA_POP(self->stack, args);
4460 if (args == NULL)
4461 goto error;
4462 if (!PyTuple_Check(args)) {
4463 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
4464 goto error;
4465 }
4466
4467 PDATA_POP(self->stack, clsraw);
4468 cls = (PyTypeObject *)clsraw;
4469 if (cls == NULL)
4470 goto error;
4471 if (!PyType_Check(cls)) {
4472 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4473 "isn't a type object");
4474 goto error;
4475 }
4476 if (cls->tp_new == NULL) {
4477 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4478 "has NULL tp_new");
4479 goto error;
4480 }
4481
4482 /* Call __new__. */
4483 obj = cls->tp_new(cls, args, NULL);
4484 if (obj == NULL)
4485 goto error;
4486
4487 Py_DECREF(args);
4488 Py_DECREF(clsraw);
4489 PDATA_PUSH(self->stack, obj, -1);
4490 return 0;
4491
4492 error:
4493 Py_XDECREF(args);
4494 Py_XDECREF(clsraw);
4495 return -1;
4496}
4497
4498static int
4499load_global(UnpicklerObject *self)
4500{
4501 PyObject *global = NULL;
4502 PyObject *module_name;
4503 PyObject *global_name;
4504 Py_ssize_t len;
4505 char *s;
4506
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004507 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004508 return -1;
4509 if (len < 2)
4510 return bad_readline();
4511 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4512 if (!module_name)
4513 return -1;
4514
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004515 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004516 if (len < 2) {
4517 Py_DECREF(module_name);
4518 return bad_readline();
4519 }
4520 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4521 if (global_name) {
4522 global = find_class(self, module_name, global_name);
4523 Py_DECREF(global_name);
4524 }
4525 }
4526 Py_DECREF(module_name);
4527
4528 if (global == NULL)
4529 return -1;
4530 PDATA_PUSH(self->stack, global, -1);
4531 return 0;
4532}
4533
4534static int
4535load_persid(UnpicklerObject *self)
4536{
4537 PyObject *pid;
4538 Py_ssize_t len;
4539 char *s;
4540
4541 if (self->pers_func) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004542 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004543 return -1;
4544 if (len < 2)
4545 return bad_readline();
4546
4547 pid = PyBytes_FromStringAndSize(s, len - 1);
4548 if (pid == NULL)
4549 return -1;
4550
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004551 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004552 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004553 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004554 if (pid == NULL)
4555 return -1;
4556
4557 PDATA_PUSH(self->stack, pid, -1);
4558 return 0;
4559 }
4560 else {
4561 PyErr_SetString(UnpicklingError,
4562 "A load persistent id instruction was encountered,\n"
4563 "but no persistent_load function was specified.");
4564 return -1;
4565 }
4566}
4567
4568static int
4569load_binpersid(UnpicklerObject *self)
4570{
4571 PyObject *pid;
4572
4573 if (self->pers_func) {
4574 PDATA_POP(self->stack, pid);
4575 if (pid == NULL)
4576 return -1;
4577
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004578 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004579 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004580 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004581 if (pid == NULL)
4582 return -1;
4583
4584 PDATA_PUSH(self->stack, pid, -1);
4585 return 0;
4586 }
4587 else {
4588 PyErr_SetString(UnpicklingError,
4589 "A load persistent id instruction was encountered,\n"
4590 "but no persistent_load function was specified.");
4591 return -1;
4592 }
4593}
4594
4595static int
4596load_pop(UnpicklerObject *self)
4597{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004598 int len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004599
4600 /* Note that we split the (pickle.py) stack into two stacks,
4601 * an object stack and a mark stack. We have to be clever and
4602 * pop the right one. We do this by looking at the top of the
Collin Winter8ca69de2009-05-26 16:53:41 +00004603 * mark stack first, and only signalling a stack underflow if
4604 * the object stack is empty and the mark stack doesn't match
4605 * our expectations.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004606 */
Collin Winter8ca69de2009-05-26 16:53:41 +00004607 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004608 self->num_marks--;
Antoine Pitrou01a15ea2010-01-07 17:57:31 +00004609 } else if (len > 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004610 len--;
4611 Py_DECREF(self->stack->data[len]);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004612 Py_SIZE(self->stack) = len;
Collin Winter8ca69de2009-05-26 16:53:41 +00004613 } else {
4614 return stack_underflow();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004615 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004616 return 0;
4617}
4618
4619static int
4620load_pop_mark(UnpicklerObject *self)
4621{
4622 int i;
4623
4624 if ((i = marker(self)) < 0)
4625 return -1;
4626
4627 Pdata_clear(self->stack, i);
4628
4629 return 0;
4630}
4631
4632static int
4633load_dup(UnpicklerObject *self)
4634{
4635 PyObject *last;
4636 int len;
4637
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004638 if ((len = Py_SIZE(self->stack)) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004639 return stack_underflow();
4640 last = self->stack->data[len - 1];
4641 PDATA_APPEND(self->stack, last, -1);
4642 return 0;
4643}
4644
4645static int
4646load_get(UnpicklerObject *self)
4647{
4648 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004649 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004650 Py_ssize_t len;
4651 char *s;
4652
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004653 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004654 return -1;
4655 if (len < 2)
4656 return bad_readline();
4657
4658 key = PyLong_FromString(s, NULL, 10);
4659 if (key == NULL)
4660 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004661 idx = PyLong_AsSsize_t(key);
4662 if (idx == -1 && PyErr_Occurred()) {
4663 Py_DECREF(key);
4664 return -1;
4665 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004666
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004667 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004668 if (value == NULL) {
4669 if (!PyErr_Occurred())
4670 PyErr_SetObject(PyExc_KeyError, key);
4671 Py_DECREF(key);
4672 return -1;
4673 }
4674 Py_DECREF(key);
4675
4676 PDATA_APPEND(self->stack, value, -1);
4677 return 0;
4678}
4679
4680static int
4681load_binget(UnpicklerObject *self)
4682{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004683 PyObject *value;
4684 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004685 char *s;
4686
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004687 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004688 return -1;
4689
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004690 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004691
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004692 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004693 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004694 PyObject *key = PyLong_FromSsize_t(idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004695 if (!PyErr_Occurred())
4696 PyErr_SetObject(PyExc_KeyError, key);
4697 Py_DECREF(key);
4698 return -1;
4699 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004700
4701 PDATA_APPEND(self->stack, value, -1);
4702 return 0;
4703}
4704
4705static int
4706load_long_binget(UnpicklerObject *self)
4707{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004708 PyObject *value;
4709 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004710 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004711
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004712 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004713 return -1;
4714
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004715 idx = (long)Py_CHARMASK(s[0]);
4716 idx |= (long)Py_CHARMASK(s[1]) << 8;
4717 idx |= (long)Py_CHARMASK(s[2]) << 16;
4718 idx |= (long)Py_CHARMASK(s[3]) << 24;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004719
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004720 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004721 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004722 PyObject *key = PyLong_FromSsize_t(idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004723 if (!PyErr_Occurred())
4724 PyErr_SetObject(PyExc_KeyError, key);
4725 Py_DECREF(key);
4726 return -1;
4727 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004728
4729 PDATA_APPEND(self->stack, value, -1);
4730 return 0;
4731}
4732
4733/* Push an object from the extension registry (EXT[124]). nbytes is
4734 * the number of bytes following the opcode, holding the index (code) value.
4735 */
4736static int
4737load_extension(UnpicklerObject *self, int nbytes)
4738{
4739 char *codebytes; /* the nbytes bytes after the opcode */
4740 long code; /* calc_binint returns long */
4741 PyObject *py_code; /* code as a Python int */
4742 PyObject *obj; /* the object to push */
4743 PyObject *pair; /* (module_name, class_name) */
4744 PyObject *module_name, *class_name;
4745
4746 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004747 if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004748 return -1;
4749 code = calc_binint(codebytes, nbytes);
4750 if (code <= 0) { /* note that 0 is forbidden */
4751 /* Corrupt or hostile pickle. */
4752 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
4753 return -1;
4754 }
4755
4756 /* Look for the code in the cache. */
4757 py_code = PyLong_FromLong(code);
4758 if (py_code == NULL)
4759 return -1;
4760 obj = PyDict_GetItem(extension_cache, py_code);
4761 if (obj != NULL) {
4762 /* Bingo. */
4763 Py_DECREF(py_code);
4764 PDATA_APPEND(self->stack, obj, -1);
4765 return 0;
4766 }
4767
4768 /* Look up the (module_name, class_name) pair. */
4769 pair = PyDict_GetItem(inverted_registry, py_code);
4770 if (pair == NULL) {
4771 Py_DECREF(py_code);
4772 PyErr_Format(PyExc_ValueError, "unregistered extension "
4773 "code %ld", code);
4774 return -1;
4775 }
4776 /* Since the extension registry is manipulable via Python code,
4777 * confirm that pair is really a 2-tuple of strings.
4778 */
4779 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
4780 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
4781 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
4782 Py_DECREF(py_code);
4783 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
4784 "isn't a 2-tuple of strings", code);
4785 return -1;
4786 }
4787 /* Load the object. */
4788 obj = find_class(self, module_name, class_name);
4789 if (obj == NULL) {
4790 Py_DECREF(py_code);
4791 return -1;
4792 }
4793 /* Cache code -> obj. */
4794 code = PyDict_SetItem(extension_cache, py_code, obj);
4795 Py_DECREF(py_code);
4796 if (code < 0) {
4797 Py_DECREF(obj);
4798 return -1;
4799 }
4800 PDATA_PUSH(self->stack, obj, -1);
4801 return 0;
4802}
4803
4804static int
4805load_put(UnpicklerObject *self)
4806{
4807 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004808 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004809 Py_ssize_t len;
4810 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004811
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004812 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004813 return -1;
4814 if (len < 2)
4815 return bad_readline();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004816 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004817 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004818 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004819
4820 key = PyLong_FromString(s, NULL, 10);
4821 if (key == NULL)
4822 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004823 idx = PyLong_AsSsize_t(key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004824 Py_DECREF(key);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004825 if (idx == -1 && PyErr_Occurred())
4826 return -1;
4827
4828 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004829}
4830
4831static int
4832load_binput(UnpicklerObject *self)
4833{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004834 PyObject *value;
4835 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004836 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004837
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004838 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004839 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004840
4841 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004842 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004843 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004844
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004845 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004846
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004847 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004848}
4849
4850static int
4851load_long_binput(UnpicklerObject *self)
4852{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004853 PyObject *value;
4854 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004855 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004856
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004857 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004858 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004859
4860 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004861 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004862 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004863
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004864 idx = (long)Py_CHARMASK(s[0]);
4865 idx |= (long)Py_CHARMASK(s[1]) << 8;
4866 idx |= (long)Py_CHARMASK(s[2]) << 16;
4867 idx |= (long)Py_CHARMASK(s[3]) << 24;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004868
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004869 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004870}
4871
4872static int
4873do_append(UnpicklerObject *self, int x)
4874{
4875 PyObject *value;
4876 PyObject *list;
4877 int len, i;
4878
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004879 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004880 if (x > len || x <= 0)
4881 return stack_underflow();
4882 if (len == x) /* nothing to do */
4883 return 0;
4884
4885 list = self->stack->data[x - 1];
4886
4887 if (PyList_Check(list)) {
4888 PyObject *slice;
4889 Py_ssize_t list_len;
4890
4891 slice = Pdata_poplist(self->stack, x);
4892 if (!slice)
4893 return -1;
4894 list_len = PyList_GET_SIZE(list);
4895 i = PyList_SetSlice(list, list_len, list_len, slice);
4896 Py_DECREF(slice);
4897 return i;
4898 }
4899 else {
4900 PyObject *append_func;
4901
4902 append_func = PyObject_GetAttrString(list, "append");
4903 if (append_func == NULL)
4904 return -1;
4905 for (i = x; i < len; i++) {
4906 PyObject *result;
4907
4908 value = self->stack->data[i];
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004909 result = _Unpickler_FastCall(self, append_func, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004910 if (result == NULL) {
4911 Pdata_clear(self->stack, i + 1);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004912 Py_SIZE(self->stack) = x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004913 return -1;
4914 }
4915 Py_DECREF(result);
4916 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004917 Py_SIZE(self->stack) = x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004918 }
4919
4920 return 0;
4921}
4922
4923static int
4924load_append(UnpicklerObject *self)
4925{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004926 return do_append(self, Py_SIZE(self->stack) - 1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004927}
4928
4929static int
4930load_appends(UnpicklerObject *self)
4931{
4932 return do_append(self, marker(self));
4933}
4934
4935static int
4936do_setitems(UnpicklerObject *self, int x)
4937{
4938 PyObject *value, *key;
4939 PyObject *dict;
4940 int len, i;
4941 int status = 0;
4942
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004943 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004944 if (x > len || x <= 0)
4945 return stack_underflow();
4946 if (len == x) /* nothing to do */
4947 return 0;
4948 if ((len - x) % 2 != 0) {
4949 /* Currupt or hostile pickle -- we never write one like this. */
4950 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
4951 return -1;
4952 }
4953
4954 /* Here, dict does not actually need to be a PyDict; it could be anything
4955 that supports the __setitem__ attribute. */
4956 dict = self->stack->data[x - 1];
4957
4958 for (i = x + 1; i < len; i += 2) {
4959 key = self->stack->data[i - 1];
4960 value = self->stack->data[i];
4961 if (PyObject_SetItem(dict, key, value) < 0) {
4962 status = -1;
4963 break;
4964 }
4965 }
4966
4967 Pdata_clear(self->stack, x);
4968 return status;
4969}
4970
4971static int
4972load_setitem(UnpicklerObject *self)
4973{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004974 return do_setitems(self, Py_SIZE(self->stack) - 2);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004975}
4976
4977static int
4978load_setitems(UnpicklerObject *self)
4979{
4980 return do_setitems(self, marker(self));
4981}
4982
4983static int
4984load_build(UnpicklerObject *self)
4985{
4986 PyObject *state, *inst, *slotstate;
4987 PyObject *setstate;
4988 int status = 0;
4989
4990 /* Stack is ... instance, state. We want to leave instance at
4991 * the stack top, possibly mutated via instance.__setstate__(state).
4992 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004993 if (Py_SIZE(self->stack) < 2)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004994 return stack_underflow();
4995
4996 PDATA_POP(self->stack, state);
4997 if (state == NULL)
4998 return -1;
4999
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005000 inst = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005001
5002 setstate = PyObject_GetAttrString(inst, "__setstate__");
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005003 if (setstate == NULL) {
5004 if (PyErr_ExceptionMatches(PyExc_AttributeError))
5005 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00005006 else {
5007 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005008 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00005009 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005010 }
5011 else {
5012 PyObject *result;
5013
5014 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005015 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Antoine Pitroud79dc622008-09-05 00:03:33 +00005016 reference to state first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005017 result = _Unpickler_FastCall(self, setstate, state);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005018 Py_DECREF(setstate);
5019 if (result == NULL)
5020 return -1;
5021 Py_DECREF(result);
5022 return 0;
5023 }
5024
5025 /* A default __setstate__. First see whether state embeds a
5026 * slot state dict too (a proto 2 addition).
5027 */
5028 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
5029 PyObject *tmp = state;
5030
5031 state = PyTuple_GET_ITEM(tmp, 0);
5032 slotstate = PyTuple_GET_ITEM(tmp, 1);
5033 Py_INCREF(state);
5034 Py_INCREF(slotstate);
5035 Py_DECREF(tmp);
5036 }
5037 else
5038 slotstate = NULL;
5039
5040 /* Set inst.__dict__ from the state dict (if any). */
5041 if (state != Py_None) {
5042 PyObject *dict;
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005043 PyObject *d_key, *d_value;
5044 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005045
5046 if (!PyDict_Check(state)) {
5047 PyErr_SetString(UnpicklingError, "state is not a dictionary");
5048 goto error;
5049 }
5050 dict = PyObject_GetAttrString(inst, "__dict__");
5051 if (dict == NULL)
5052 goto error;
5053
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005054 i = 0;
5055 while (PyDict_Next(state, &i, &d_key, &d_value)) {
5056 /* normally the keys for instance attributes are
5057 interned. we should try to do that here. */
5058 Py_INCREF(d_key);
5059 if (PyUnicode_CheckExact(d_key))
5060 PyUnicode_InternInPlace(&d_key);
5061 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
5062 Py_DECREF(d_key);
5063 goto error;
5064 }
5065 Py_DECREF(d_key);
5066 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005067 Py_DECREF(dict);
5068 }
5069
5070 /* Also set instance attributes from the slotstate dict (if any). */
5071 if (slotstate != NULL) {
5072 PyObject *d_key, *d_value;
5073 Py_ssize_t i;
5074
5075 if (!PyDict_Check(slotstate)) {
5076 PyErr_SetString(UnpicklingError,
5077 "slot state is not a dictionary");
5078 goto error;
5079 }
5080 i = 0;
5081 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
5082 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
5083 goto error;
5084 }
5085 }
5086
5087 if (0) {
5088 error:
5089 status = -1;
5090 }
5091
5092 Py_DECREF(state);
5093 Py_XDECREF(slotstate);
5094 return status;
5095}
5096
5097static int
5098load_mark(UnpicklerObject *self)
5099{
5100
5101 /* Note that we split the (pickle.py) stack into two stacks, an
5102 * object stack and a mark stack. Here we push a mark onto the
5103 * mark stack.
5104 */
5105
5106 if ((self->num_marks + 1) >= self->marks_size) {
5107 size_t alloc;
5108 int *marks;
5109
5110 /* Use the size_t type to check for overflow. */
5111 alloc = ((size_t)self->num_marks << 1) + 20;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00005112 if (alloc > PY_SSIZE_T_MAX ||
5113 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005114 PyErr_NoMemory();
5115 return -1;
5116 }
5117
5118 if (self->marks == NULL)
5119 marks = (int *)PyMem_Malloc(alloc * sizeof(int));
5120 else
5121 marks = (int *)PyMem_Realloc(self->marks, alloc * sizeof(int));
5122 if (marks == NULL) {
5123 PyErr_NoMemory();
5124 return -1;
5125 }
5126 self->marks = marks;
5127 self->marks_size = (Py_ssize_t)alloc;
5128 }
5129
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005130 self->marks[self->num_marks++] = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005131
5132 return 0;
5133}
5134
5135static int
5136load_reduce(UnpicklerObject *self)
5137{
5138 PyObject *callable = NULL;
5139 PyObject *argtup = NULL;
5140 PyObject *obj = NULL;
5141
5142 PDATA_POP(self->stack, argtup);
5143 if (argtup == NULL)
5144 return -1;
5145 PDATA_POP(self->stack, callable);
5146 if (callable) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005147 obj = PyObject_CallObject(callable, argtup);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005148 Py_DECREF(callable);
5149 }
5150 Py_DECREF(argtup);
5151
5152 if (obj == NULL)
5153 return -1;
5154
5155 PDATA_PUSH(self->stack, obj, -1);
5156 return 0;
5157}
5158
5159/* Just raises an error if we don't know the protocol specified. PROTO
5160 * is the first opcode for protocols >= 2.
5161 */
5162static int
5163load_proto(UnpicklerObject *self)
5164{
5165 char *s;
5166 int i;
5167
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005168 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005169 return -1;
5170
5171 i = (unsigned char)s[0];
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005172 if (i <= HIGHEST_PROTOCOL) {
5173 self->proto = i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005174 return 0;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005175 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005176
5177 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
5178 return -1;
5179}
5180
5181static PyObject *
5182load(UnpicklerObject *self)
5183{
5184 PyObject *err;
5185 PyObject *value = NULL;
5186 char *s;
5187
5188 self->num_marks = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005189 if (Py_SIZE(self->stack))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005190 Pdata_clear(self->stack, 0);
5191
5192 /* Convenient macros for the dispatch while-switch loop just below. */
5193#define OP(opcode, load_func) \
5194 case opcode: if (load_func(self) < 0) break; continue;
5195
5196#define OP_ARG(opcode, load_func, arg) \
5197 case opcode: if (load_func(self, (arg)) < 0) break; continue;
5198
5199 while (1) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005200 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005201 break;
5202
5203 switch ((enum opcode)s[0]) {
5204 OP(NONE, load_none)
5205 OP(BININT, load_binint)
5206 OP(BININT1, load_binint1)
5207 OP(BININT2, load_binint2)
5208 OP(INT, load_int)
5209 OP(LONG, load_long)
5210 OP_ARG(LONG1, load_counted_long, 1)
5211 OP_ARG(LONG4, load_counted_long, 4)
5212 OP(FLOAT, load_float)
5213 OP(BINFLOAT, load_binfloat)
5214 OP(BINBYTES, load_binbytes)
5215 OP(SHORT_BINBYTES, load_short_binbytes)
5216 OP(BINSTRING, load_binstring)
5217 OP(SHORT_BINSTRING, load_short_binstring)
5218 OP(STRING, load_string)
5219 OP(UNICODE, load_unicode)
5220 OP(BINUNICODE, load_binunicode)
5221 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
5222 OP_ARG(TUPLE1, load_counted_tuple, 1)
5223 OP_ARG(TUPLE2, load_counted_tuple, 2)
5224 OP_ARG(TUPLE3, load_counted_tuple, 3)
5225 OP(TUPLE, load_tuple)
5226 OP(EMPTY_LIST, load_empty_list)
5227 OP(LIST, load_list)
5228 OP(EMPTY_DICT, load_empty_dict)
5229 OP(DICT, load_dict)
5230 OP(OBJ, load_obj)
5231 OP(INST, load_inst)
5232 OP(NEWOBJ, load_newobj)
5233 OP(GLOBAL, load_global)
5234 OP(APPEND, load_append)
5235 OP(APPENDS, load_appends)
5236 OP(BUILD, load_build)
5237 OP(DUP, load_dup)
5238 OP(BINGET, load_binget)
5239 OP(LONG_BINGET, load_long_binget)
5240 OP(GET, load_get)
5241 OP(MARK, load_mark)
5242 OP(BINPUT, load_binput)
5243 OP(LONG_BINPUT, load_long_binput)
5244 OP(PUT, load_put)
5245 OP(POP, load_pop)
5246 OP(POP_MARK, load_pop_mark)
5247 OP(SETITEM, load_setitem)
5248 OP(SETITEMS, load_setitems)
5249 OP(PERSID, load_persid)
5250 OP(BINPERSID, load_binpersid)
5251 OP(REDUCE, load_reduce)
5252 OP(PROTO, load_proto)
5253 OP_ARG(EXT1, load_extension, 1)
5254 OP_ARG(EXT2, load_extension, 2)
5255 OP_ARG(EXT4, load_extension, 4)
5256 OP_ARG(NEWTRUE, load_bool, Py_True)
5257 OP_ARG(NEWFALSE, load_bool, Py_False)
5258
5259 case STOP:
5260 break;
5261
5262 case '\0':
5263 PyErr_SetNone(PyExc_EOFError);
5264 return NULL;
5265
5266 default:
5267 PyErr_Format(UnpicklingError,
5268 "invalid load key, '%c'.", s[0]);
5269 return NULL;
5270 }
5271
5272 break; /* and we are done! */
5273 }
5274
Antoine Pitrou04248a82010-10-12 20:51:21 +00005275 if (_Unpickler_SkipConsumed(self) < 0)
5276 return NULL;
5277
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005278 /* XXX: It is not clear what this is actually for. */
5279 if ((err = PyErr_Occurred())) {
5280 if (err == PyExc_EOFError) {
5281 PyErr_SetNone(PyExc_EOFError);
5282 }
5283 return NULL;
5284 }
5285
5286 PDATA_POP(self->stack, value);
5287 return value;
5288}
5289
5290PyDoc_STRVAR(Unpickler_load_doc,
5291"load() -> object. Load a pickle."
5292"\n"
5293"Read a pickled object representation from the open file object given in\n"
5294"the constructor, and return the reconstituted object hierarchy specified\n"
5295"therein.\n");
5296
5297static PyObject *
5298Unpickler_load(UnpicklerObject *self)
5299{
5300 /* Check whether the Unpickler was initialized correctly. This prevents
5301 segfaulting if a subclass overridden __init__ with a function that does
5302 not call Unpickler.__init__(). Here, we simply ensure that self->read
5303 is not NULL. */
5304 if (self->read == NULL) {
5305 PyErr_Format(UnpicklingError,
5306 "Unpickler.__init__() was not called by %s.__init__()",
5307 Py_TYPE(self)->tp_name);
5308 return NULL;
5309 }
5310
5311 return load(self);
5312}
5313
5314/* The name of find_class() is misleading. In newer pickle protocols, this
5315 function is used for loading any global (i.e., functions), not just
5316 classes. The name is kept only for backward compatibility. */
5317
5318PyDoc_STRVAR(Unpickler_find_class_doc,
5319"find_class(module_name, global_name) -> object.\n"
5320"\n"
5321"Return an object from a specified module, importing the module if\n"
5322"necessary. Subclasses may override this method (e.g. to restrict\n"
5323"unpickling of arbitrary classes and functions).\n"
5324"\n"
5325"This method is called whenever a class or a function object is\n"
5326"needed. Both arguments passed are str objects.\n");
5327
5328static PyObject *
5329Unpickler_find_class(UnpicklerObject *self, PyObject *args)
5330{
5331 PyObject *global;
5332 PyObject *modules_dict;
5333 PyObject *module;
5334 PyObject *module_name, *global_name;
5335
5336 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
5337 &module_name, &global_name))
5338 return NULL;
5339
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005340 /* Try to map the old names used in Python 2.x to the new ones used in
5341 Python 3.x. We do this only with old pickle protocols and when the
5342 user has not disabled the feature. */
5343 if (self->proto < 3 && self->fix_imports) {
5344 PyObject *key;
5345 PyObject *item;
5346
5347 /* Check if the global (i.e., a function or a class) was renamed
5348 or moved to another module. */
5349 key = PyTuple_Pack(2, module_name, global_name);
5350 if (key == NULL)
5351 return NULL;
5352 item = PyDict_GetItemWithError(name_mapping_2to3, key);
5353 Py_DECREF(key);
5354 if (item) {
5355 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
5356 PyErr_Format(PyExc_RuntimeError,
5357 "_compat_pickle.NAME_MAPPING values should be "
5358 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
5359 return NULL;
5360 }
5361 module_name = PyTuple_GET_ITEM(item, 0);
5362 global_name = PyTuple_GET_ITEM(item, 1);
5363 if (!PyUnicode_Check(module_name) ||
5364 !PyUnicode_Check(global_name)) {
5365 PyErr_Format(PyExc_RuntimeError,
5366 "_compat_pickle.NAME_MAPPING values should be "
5367 "pairs of str, not (%.200s, %.200s)",
5368 Py_TYPE(module_name)->tp_name,
5369 Py_TYPE(global_name)->tp_name);
5370 return NULL;
5371 }
5372 }
5373 else if (PyErr_Occurred()) {
5374 return NULL;
5375 }
5376
5377 /* Check if the module was renamed. */
5378 item = PyDict_GetItemWithError(import_mapping_2to3, module_name);
5379 if (item) {
5380 if (!PyUnicode_Check(item)) {
5381 PyErr_Format(PyExc_RuntimeError,
5382 "_compat_pickle.IMPORT_MAPPING values should be "
5383 "strings, not %.200s", Py_TYPE(item)->tp_name);
5384 return NULL;
5385 }
5386 module_name = item;
5387 }
5388 else if (PyErr_Occurred()) {
5389 return NULL;
5390 }
5391 }
5392
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005393 modules_dict = PySys_GetObject("modules");
5394 if (modules_dict == NULL)
5395 return NULL;
5396
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005397 module = PyDict_GetItemWithError(modules_dict, module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005398 if (module == NULL) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005399 if (PyErr_Occurred())
5400 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005401 module = PyImport_Import(module_name);
5402 if (module == NULL)
5403 return NULL;
5404 global = PyObject_GetAttr(module, global_name);
5405 Py_DECREF(module);
5406 }
5407 else {
5408 global = PyObject_GetAttr(module, global_name);
5409 }
5410 return global;
5411}
5412
5413static struct PyMethodDef Unpickler_methods[] = {
5414 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
5415 Unpickler_load_doc},
5416 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
5417 Unpickler_find_class_doc},
5418 {NULL, NULL} /* sentinel */
5419};
5420
5421static void
5422Unpickler_dealloc(UnpicklerObject *self)
5423{
5424 PyObject_GC_UnTrack((PyObject *)self);
5425 Py_XDECREF(self->readline);
5426 Py_XDECREF(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005427 Py_XDECREF(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005428 Py_XDECREF(self->stack);
5429 Py_XDECREF(self->pers_func);
5430 Py_XDECREF(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005431 if (self->buffer.buf != NULL) {
5432 PyBuffer_Release(&self->buffer);
5433 self->buffer.buf = NULL;
5434 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005435
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005436 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005437 PyMem_Free(self->marks);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005438 PyMem_Free(self->input_line);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005439 free(self->encoding);
5440 free(self->errors);
5441
5442 Py_TYPE(self)->tp_free((PyObject *)self);
5443}
5444
5445static int
5446Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
5447{
5448 Py_VISIT(self->readline);
5449 Py_VISIT(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005450 Py_VISIT(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005451 Py_VISIT(self->stack);
5452 Py_VISIT(self->pers_func);
5453 Py_VISIT(self->arg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005454 return 0;
5455}
5456
5457static int
5458Unpickler_clear(UnpicklerObject *self)
5459{
5460 Py_CLEAR(self->readline);
5461 Py_CLEAR(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005462 Py_CLEAR(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005463 Py_CLEAR(self->stack);
5464 Py_CLEAR(self->pers_func);
5465 Py_CLEAR(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005466 if (self->buffer.buf != NULL) {
5467 PyBuffer_Release(&self->buffer);
5468 self->buffer.buf = NULL;
5469 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005470
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005471 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005472 PyMem_Free(self->marks);
5473 self->marks = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005474 PyMem_Free(self->input_line);
5475 self->input_line = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005476 free(self->encoding);
5477 self->encoding = NULL;
5478 free(self->errors);
5479 self->errors = NULL;
5480
5481 return 0;
5482}
5483
5484PyDoc_STRVAR(Unpickler_doc,
5485"Unpickler(file, *, encoding='ASCII', errors='strict')"
5486"\n"
5487"This takes a binary file for reading a pickle data stream.\n"
5488"\n"
5489"The protocol version of the pickle is detected automatically, so no\n"
5490"proto argument is needed.\n"
5491"\n"
5492"The file-like object must have two methods, a read() method\n"
5493"that takes an integer argument, and a readline() method that\n"
5494"requires no arguments. Both methods should return bytes.\n"
5495"Thus file-like object can be a binary file object opened for\n"
5496"reading, a BytesIO object, or any other custom object that\n"
5497"meets this interface.\n"
5498"\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005499"Optional keyword arguments are *fix_imports*, *encoding* and *errors*,\n"
5500"which are used to control compatiblity support for pickle stream\n"
5501"generated by Python 2.x. If *fix_imports* is True, pickle will try to\n"
5502"map the old Python 2.x names to the new names used in Python 3.x. The\n"
5503"*encoding* and *errors* tell pickle how to decode 8-bit string\n"
5504"instances pickled by Python 2.x; these default to 'ASCII' and\n"
5505"'strict', respectively.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005506
5507static int
5508Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
5509{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005510 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005511 PyObject *file;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005512 PyObject *fix_imports = Py_True;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005513 char *encoding = NULL;
5514 char *errors = NULL;
5515
5516 /* XXX: That is an horrible error message. But, I don't know how to do
5517 better... */
5518 if (Py_SIZE(args) != 1) {
5519 PyErr_Format(PyExc_TypeError,
5520 "%s takes exactly one positional argument (%zd given)",
5521 Py_TYPE(self)->tp_name, Py_SIZE(args));
5522 return -1;
5523 }
5524
5525 /* Arguments parsing needs to be done in the __init__() method to allow
5526 subclasses to define their own __init__() method, which may (or may
5527 not) support Unpickler arguments. However, this means we need to be
5528 extra careful in the other Unpickler methods, since a subclass could
5529 forget to call Unpickler.__init__() thus breaking our internal
5530 invariants. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005531 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:Unpickler", kwlist,
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005532 &file, &fix_imports, &encoding, &errors))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005533 return -1;
5534
5535 /* In case of multiple __init__() calls, clear previous content. */
5536 if (self->read != NULL)
5537 (void)Unpickler_clear(self);
5538
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005539 if (_Unpickler_SetInputStream(self, file) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005540 return -1;
5541
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005542 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005543 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005544
5545 self->fix_imports = PyObject_IsTrue(fix_imports);
5546 if (self->fix_imports == -1)
5547 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005548
5549 if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) {
5550 self->pers_func = PyObject_GetAttrString((PyObject *)self,
5551 "persistent_load");
5552 if (self->pers_func == NULL)
5553 return -1;
5554 }
5555 else {
5556 self->pers_func = NULL;
5557 }
5558
5559 self->stack = (Pdata *)Pdata_New();
5560 if (self->stack == NULL)
5561 return -1;
5562
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005563 self->memo_size = 32;
5564 self->memo = _Unpickler_NewMemo(self->memo_size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005565 if (self->memo == NULL)
5566 return -1;
5567
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005568 self->arg = NULL;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005569 self->proto = 0;
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005570
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005571 return 0;
5572}
5573
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005574/* Define a proxy object for the Unpickler's internal memo object. This is to
5575 * avoid breaking code like:
5576 * unpickler.memo.clear()
5577 * and
5578 * unpickler.memo = saved_memo
5579 * Is this a good idea? Not really, but we don't want to break code that uses
5580 * it. Note that we don't implement the entire mapping API here. This is
5581 * intentional, as these should be treated as black-box implementation details.
5582 *
5583 * We do, however, have to implement pickling/unpickling support because of
5584 * real-world code like cvs2svn.
5585 */
5586
5587typedef struct {
5588 PyObject_HEAD
5589 UnpicklerObject *unpickler;
5590} UnpicklerMemoProxyObject;
5591
5592PyDoc_STRVAR(ump_clear_doc,
5593"memo.clear() -> None. Remove all items from memo.");
5594
5595static PyObject *
5596ump_clear(UnpicklerMemoProxyObject *self)
5597{
5598 _Unpickler_MemoCleanup(self->unpickler);
5599 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
5600 if (self->unpickler->memo == NULL)
5601 return NULL;
5602 Py_RETURN_NONE;
5603}
5604
5605PyDoc_STRVAR(ump_copy_doc,
5606"memo.copy() -> new_memo. Copy the memo to a new object.");
5607
5608static PyObject *
5609ump_copy(UnpicklerMemoProxyObject *self)
5610{
5611 Py_ssize_t i;
5612 PyObject *new_memo = PyDict_New();
5613 if (new_memo == NULL)
5614 return NULL;
5615
5616 for (i = 0; i < self->unpickler->memo_size; i++) {
5617 int status;
5618 PyObject *key, *value;
5619
5620 value = self->unpickler->memo[i];
5621 if (value == NULL)
5622 continue;
5623
5624 key = PyLong_FromSsize_t(i);
5625 if (key == NULL)
5626 goto error;
5627 status = PyDict_SetItem(new_memo, key, value);
5628 Py_DECREF(key);
5629 if (status < 0)
5630 goto error;
5631 }
5632 return new_memo;
5633
5634error:
5635 Py_DECREF(new_memo);
5636 return NULL;
5637}
5638
5639PyDoc_STRVAR(ump_reduce_doc,
5640"memo.__reduce__(). Pickling support.");
5641
5642static PyObject *
5643ump_reduce(UnpicklerMemoProxyObject *self, PyObject *args)
5644{
5645 PyObject *reduce_value;
5646 PyObject *constructor_args;
5647 PyObject *contents = ump_copy(self);
5648 if (contents == NULL)
5649 return NULL;
5650
5651 reduce_value = PyTuple_New(2);
5652 if (reduce_value == NULL) {
5653 Py_DECREF(contents);
5654 return NULL;
5655 }
5656 constructor_args = PyTuple_New(1);
5657 if (constructor_args == NULL) {
5658 Py_DECREF(contents);
5659 Py_DECREF(reduce_value);
5660 return NULL;
5661 }
5662 PyTuple_SET_ITEM(constructor_args, 0, contents);
5663 Py_INCREF((PyObject *)&PyDict_Type);
5664 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
5665 PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
5666 return reduce_value;
5667}
5668
5669static PyMethodDef unpicklerproxy_methods[] = {
5670 {"clear", (PyCFunction)ump_clear, METH_NOARGS, ump_clear_doc},
5671 {"copy", (PyCFunction)ump_copy, METH_NOARGS, ump_copy_doc},
5672 {"__reduce__", (PyCFunction)ump_reduce, METH_VARARGS, ump_reduce_doc},
5673 {NULL, NULL} /* sentinel */
5674};
5675
5676static void
5677UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
5678{
5679 PyObject_GC_UnTrack(self);
5680 Py_XDECREF(self->unpickler);
5681 PyObject_GC_Del((PyObject *)self);
5682}
5683
5684static int
5685UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
5686 visitproc visit, void *arg)
5687{
5688 Py_VISIT(self->unpickler);
5689 return 0;
5690}
5691
5692static int
5693UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
5694{
5695 Py_CLEAR(self->unpickler);
5696 return 0;
5697}
5698
5699static PyTypeObject UnpicklerMemoProxyType = {
5700 PyVarObject_HEAD_INIT(NULL, 0)
5701 "_pickle.UnpicklerMemoProxy", /*tp_name*/
5702 sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/
5703 0,
5704 (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */
5705 0, /* tp_print */
5706 0, /* tp_getattr */
5707 0, /* tp_setattr */
5708 0, /* tp_compare */
5709 0, /* tp_repr */
5710 0, /* tp_as_number */
5711 0, /* tp_as_sequence */
5712 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00005713 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005714 0, /* tp_call */
5715 0, /* tp_str */
5716 PyObject_GenericGetAttr, /* tp_getattro */
5717 PyObject_GenericSetAttr, /* tp_setattro */
5718 0, /* tp_as_buffer */
5719 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5720 0, /* tp_doc */
5721 (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */
5722 (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */
5723 0, /* tp_richcompare */
5724 0, /* tp_weaklistoffset */
5725 0, /* tp_iter */
5726 0, /* tp_iternext */
5727 unpicklerproxy_methods, /* tp_methods */
5728};
5729
5730static PyObject *
5731UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
5732{
5733 UnpicklerMemoProxyObject *self;
5734
5735 self = PyObject_GC_New(UnpicklerMemoProxyObject,
5736 &UnpicklerMemoProxyType);
5737 if (self == NULL)
5738 return NULL;
5739 Py_INCREF(unpickler);
5740 self->unpickler = unpickler;
5741 PyObject_GC_Track(self);
5742 return (PyObject *)self;
5743}
5744
5745/*****************************************************************************/
5746
5747
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005748static PyObject *
5749Unpickler_get_memo(UnpicklerObject *self)
5750{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005751 return UnpicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005752}
5753
5754static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005755Unpickler_set_memo(UnpicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005756{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005757 PyObject **new_memo;
5758 Py_ssize_t new_memo_size = 0;
5759 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005760
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005761 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005762 PyErr_SetString(PyExc_TypeError,
5763 "attribute deletion is not supported");
5764 return -1;
5765 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005766
5767 if (Py_TYPE(obj) == &UnpicklerMemoProxyType) {
5768 UnpicklerObject *unpickler =
5769 ((UnpicklerMemoProxyObject *)obj)->unpickler;
5770
5771 new_memo_size = unpickler->memo_size;
5772 new_memo = _Unpickler_NewMemo(new_memo_size);
5773 if (new_memo == NULL)
5774 return -1;
5775
5776 for (i = 0; i < new_memo_size; i++) {
5777 Py_XINCREF(unpickler->memo[i]);
5778 new_memo[i] = unpickler->memo[i];
5779 }
5780 }
5781 else if (PyDict_Check(obj)) {
5782 Py_ssize_t i = 0;
5783 PyObject *key, *value;
5784
5785 new_memo_size = PyDict_Size(obj);
5786 new_memo = _Unpickler_NewMemo(new_memo_size);
5787 if (new_memo == NULL)
5788 return -1;
5789
5790 while (PyDict_Next(obj, &i, &key, &value)) {
5791 Py_ssize_t idx;
5792 if (!PyLong_Check(key)) {
5793 PyErr_SetString(PyExc_TypeError,
5794 "memo key must be integers");
5795 goto error;
5796 }
5797 idx = PyLong_AsSsize_t(key);
5798 if (idx == -1 && PyErr_Occurred())
5799 goto error;
5800 if (_Unpickler_MemoPut(self, idx, value) < 0)
5801 goto error;
5802 }
5803 }
5804 else {
5805 PyErr_Format(PyExc_TypeError,
5806 "'memo' attribute must be an UnpicklerMemoProxy object"
5807 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005808 return -1;
5809 }
5810
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005811 _Unpickler_MemoCleanup(self);
5812 self->memo_size = new_memo_size;
5813 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005814
5815 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005816
5817 error:
5818 if (new_memo_size) {
5819 i = new_memo_size;
5820 while (--i >= 0) {
5821 Py_XDECREF(new_memo[i]);
5822 }
5823 PyMem_FREE(new_memo);
5824 }
5825 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005826}
5827
5828static PyObject *
5829Unpickler_get_persload(UnpicklerObject *self)
5830{
5831 if (self->pers_func == NULL)
5832 PyErr_SetString(PyExc_AttributeError, "persistent_load");
5833 else
5834 Py_INCREF(self->pers_func);
5835 return self->pers_func;
5836}
5837
5838static int
5839Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
5840{
5841 PyObject *tmp;
5842
5843 if (value == NULL) {
5844 PyErr_SetString(PyExc_TypeError,
5845 "attribute deletion is not supported");
5846 return -1;
5847 }
5848 if (!PyCallable_Check(value)) {
5849 PyErr_SetString(PyExc_TypeError,
5850 "persistent_load must be a callable taking "
5851 "one argument");
5852 return -1;
5853 }
5854
5855 tmp = self->pers_func;
5856 Py_INCREF(value);
5857 self->pers_func = value;
5858 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
5859
5860 return 0;
5861}
5862
5863static PyGetSetDef Unpickler_getsets[] = {
5864 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
5865 {"persistent_load", (getter)Unpickler_get_persload,
5866 (setter)Unpickler_set_persload},
5867 {NULL}
5868};
5869
5870static PyTypeObject Unpickler_Type = {
5871 PyVarObject_HEAD_INIT(NULL, 0)
5872 "_pickle.Unpickler", /*tp_name*/
5873 sizeof(UnpicklerObject), /*tp_basicsize*/
5874 0, /*tp_itemsize*/
5875 (destructor)Unpickler_dealloc, /*tp_dealloc*/
5876 0, /*tp_print*/
5877 0, /*tp_getattr*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005878 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00005879 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005880 0, /*tp_repr*/
5881 0, /*tp_as_number*/
5882 0, /*tp_as_sequence*/
5883 0, /*tp_as_mapping*/
5884 0, /*tp_hash*/
5885 0, /*tp_call*/
5886 0, /*tp_str*/
5887 0, /*tp_getattro*/
5888 0, /*tp_setattro*/
5889 0, /*tp_as_buffer*/
5890 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5891 Unpickler_doc, /*tp_doc*/
5892 (traverseproc)Unpickler_traverse, /*tp_traverse*/
5893 (inquiry)Unpickler_clear, /*tp_clear*/
5894 0, /*tp_richcompare*/
5895 0, /*tp_weaklistoffset*/
5896 0, /*tp_iter*/
5897 0, /*tp_iternext*/
5898 Unpickler_methods, /*tp_methods*/
5899 0, /*tp_members*/
5900 Unpickler_getsets, /*tp_getset*/
5901 0, /*tp_base*/
5902 0, /*tp_dict*/
5903 0, /*tp_descr_get*/
5904 0, /*tp_descr_set*/
5905 0, /*tp_dictoffset*/
5906 (initproc)Unpickler_init, /*tp_init*/
5907 PyType_GenericAlloc, /*tp_alloc*/
5908 PyType_GenericNew, /*tp_new*/
5909 PyObject_GC_Del, /*tp_free*/
5910 0, /*tp_is_gc*/
5911};
5912
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005913PyDoc_STRVAR(pickle_dump_doc,
5914"dump(obj, file, protocol=None, *, fix_imports=True) -> None\n"
5915"\n"
5916"Write a pickled representation of obj to the open file object file. This\n"
5917"is equivalent to ``Pickler(file, protocol).dump(obj)``, but may be more\n"
5918"efficient.\n"
5919"\n"
5920"The optional protocol argument tells the pickler to use the given protocol;\n"
5921"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
5922"backward-incompatible protocol designed for Python 3.0.\n"
5923"\n"
5924"Specifying a negative protocol version selects the highest protocol version\n"
5925"supported. The higher the protocol used, the more recent the version of\n"
5926"Python needed to read the pickle produced.\n"
5927"\n"
5928"The file argument must have a write() method that accepts a single bytes\n"
5929"argument. It can thus be a file object opened for binary writing, a\n"
5930"io.BytesIO instance, or any other custom object that meets this interface.\n"
5931"\n"
5932"If fix_imports is True and protocol is less than 3, pickle will try to\n"
5933"map the new Python 3.x names to the old module names used in Python 2.x,\n"
5934"so that the pickle data stream is readable with Python 2.x.\n");
5935
5936static PyObject *
5937pickle_dump(PyObject *self, PyObject *args, PyObject *kwds)
5938{
5939 static char *kwlist[] = {"obj", "file", "protocol", "fix_imports", 0};
5940 PyObject *obj;
5941 PyObject *file;
5942 PyObject *proto = NULL;
5943 PyObject *fix_imports = Py_True;
5944 PicklerObject *pickler;
5945
5946 /* fix_imports is a keyword-only argument. */
5947 if (Py_SIZE(args) > 3) {
5948 PyErr_Format(PyExc_TypeError,
5949 "pickle.dump() takes at most 3 positional "
5950 "argument (%zd given)", Py_SIZE(args));
5951 return NULL;
5952 }
5953
5954 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|OO:dump", kwlist,
5955 &obj, &file, &proto, &fix_imports))
5956 return NULL;
5957
5958 pickler = _Pickler_New();
5959 if (pickler == NULL)
5960 return NULL;
5961
5962 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
5963 goto error;
5964
5965 if (_Pickler_SetOutputStream(pickler, file) < 0)
5966 goto error;
5967
5968 if (dump(pickler, obj) < 0)
5969 goto error;
5970
5971 if (_Pickler_FlushToFile(pickler) < 0)
5972 goto error;
5973
5974 Py_DECREF(pickler);
5975 Py_RETURN_NONE;
5976
5977 error:
5978 Py_XDECREF(pickler);
5979 return NULL;
5980}
5981
5982PyDoc_STRVAR(pickle_dumps_doc,
5983"dumps(obj, protocol=None, *, fix_imports=True) -> bytes\n"
5984"\n"
5985"Return the pickled representation of the object as a bytes\n"
5986"object, instead of writing it to a file.\n"
5987"\n"
5988"The optional protocol argument tells the pickler to use the given protocol;\n"
5989"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
5990"backward-incompatible protocol designed for Python 3.0.\n"
5991"\n"
5992"Specifying a negative protocol version selects the highest protocol version\n"
5993"supported. The higher the protocol used, the more recent the version of\n"
5994"Python needed to read the pickle produced.\n"
5995"\n"
5996"If fix_imports is True and *protocol* is less than 3, pickle will try to\n"
5997"map the new Python 3.x names to the old module names used in Python 2.x,\n"
5998"so that the pickle data stream is readable with Python 2.x.\n");
5999
6000static PyObject *
6001pickle_dumps(PyObject *self, PyObject *args, PyObject *kwds)
6002{
6003 static char *kwlist[] = {"obj", "protocol", "fix_imports", 0};
6004 PyObject *obj;
6005 PyObject *proto = NULL;
6006 PyObject *result;
6007 PyObject *fix_imports = Py_True;
6008 PicklerObject *pickler;
6009
6010 /* fix_imports is a keyword-only argument. */
6011 if (Py_SIZE(args) > 2) {
6012 PyErr_Format(PyExc_TypeError,
6013 "pickle.dumps() takes at most 2 positional "
6014 "argument (%zd given)", Py_SIZE(args));
6015 return NULL;
6016 }
6017
6018 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:dumps", kwlist,
6019 &obj, &proto, &fix_imports))
6020 return NULL;
6021
6022 pickler = _Pickler_New();
6023 if (pickler == NULL)
6024 return NULL;
6025
6026 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
6027 goto error;
6028
6029 if (dump(pickler, obj) < 0)
6030 goto error;
6031
6032 result = _Pickler_GetString(pickler);
6033 Py_DECREF(pickler);
6034 return result;
6035
6036 error:
6037 Py_XDECREF(pickler);
6038 return NULL;
6039}
6040
6041PyDoc_STRVAR(pickle_load_doc,
6042"load(file, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6043"\n"
6044"Read a pickled object representation from the open file object file and\n"
6045"return the reconstituted object hierarchy specified therein. This is\n"
6046"equivalent to ``Unpickler(file).load()``, but may be more efficient.\n"
6047"\n"
6048"The protocol version of the pickle is detected automatically, so no protocol\n"
6049"argument is needed. Bytes past the pickled object's representation are\n"
6050"ignored.\n"
6051"\n"
6052"The argument file must have two methods, a read() method that takes an\n"
6053"integer argument, and a readline() method that requires no arguments. Both\n"
6054"methods should return bytes. Thus *file* can be a binary file object opened\n"
6055"for reading, a BytesIO object, or any other custom object that meets this\n"
6056"interface.\n"
6057"\n"
6058"Optional keyword arguments are fix_imports, encoding and errors,\n"
6059"which are used to control compatiblity support for pickle stream generated\n"
6060"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6061"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6062"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6063"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6064
6065static PyObject *
6066pickle_load(PyObject *self, PyObject *args, PyObject *kwds)
6067{
6068 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
6069 PyObject *file;
6070 PyObject *fix_imports = Py_True;
6071 PyObject *result;
6072 char *encoding = NULL;
6073 char *errors = NULL;
6074 UnpicklerObject *unpickler;
6075
6076 /* fix_imports, encoding and errors are a keyword-only argument. */
6077 if (Py_SIZE(args) != 1) {
6078 PyErr_Format(PyExc_TypeError,
6079 "pickle.load() takes exactly one positional "
6080 "argument (%zd given)", Py_SIZE(args));
6081 return NULL;
6082 }
6083
6084 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:load", kwlist,
6085 &file, &fix_imports, &encoding, &errors))
6086 return NULL;
6087
6088 unpickler = _Unpickler_New();
6089 if (unpickler == NULL)
6090 return NULL;
6091
6092 if (_Unpickler_SetInputStream(unpickler, file) < 0)
6093 goto error;
6094
6095 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6096 goto error;
6097
6098 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6099 if (unpickler->fix_imports == -1)
6100 goto error;
6101
6102 result = load(unpickler);
6103 Py_DECREF(unpickler);
6104 return result;
6105
6106 error:
6107 Py_XDECREF(unpickler);
6108 return NULL;
6109}
6110
6111PyDoc_STRVAR(pickle_loads_doc,
6112"loads(input, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6113"\n"
6114"Read a pickled object hierarchy from a bytes object and return the\n"
6115"reconstituted object hierarchy specified therein\n"
6116"\n"
6117"The protocol version of the pickle is detected automatically, so no protocol\n"
6118"argument is needed. Bytes past the pickled object's representation are\n"
6119"ignored.\n"
6120"\n"
6121"Optional keyword arguments are fix_imports, encoding and errors, which\n"
6122"are used to control compatiblity support for pickle stream generated\n"
6123"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6124"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6125"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6126"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6127
6128static PyObject *
6129pickle_loads(PyObject *self, PyObject *args, PyObject *kwds)
6130{
6131 static char *kwlist[] = {"input", "fix_imports", "encoding", "errors", 0};
6132 PyObject *input;
6133 PyObject *fix_imports = Py_True;
6134 PyObject *result;
6135 char *encoding = NULL;
6136 char *errors = NULL;
6137 UnpicklerObject *unpickler;
6138
6139 /* fix_imports, encoding and errors are a keyword-only argument. */
6140 if (Py_SIZE(args) != 1) {
6141 PyErr_Format(PyExc_TypeError,
6142 "pickle.loads() takes exactly one positional "
6143 "argument (%zd given)", Py_SIZE(args));
6144 return NULL;
6145 }
6146
6147 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:loads", kwlist,
6148 &input, &fix_imports, &encoding, &errors))
6149 return NULL;
6150
6151 unpickler = _Unpickler_New();
6152 if (unpickler == NULL)
6153 return NULL;
6154
6155 if (_Unpickler_SetStringInput(unpickler, input) < 0)
6156 goto error;
6157
6158 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6159 goto error;
6160
6161 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6162 if (unpickler->fix_imports == -1)
6163 goto error;
6164
6165 result = load(unpickler);
6166 Py_DECREF(unpickler);
6167 return result;
6168
6169 error:
6170 Py_XDECREF(unpickler);
6171 return NULL;
6172}
6173
6174
6175static struct PyMethodDef pickle_methods[] = {
6176 {"dump", (PyCFunction)pickle_dump, METH_VARARGS|METH_KEYWORDS,
6177 pickle_dump_doc},
6178 {"dumps", (PyCFunction)pickle_dumps, METH_VARARGS|METH_KEYWORDS,
6179 pickle_dumps_doc},
6180 {"load", (PyCFunction)pickle_load, METH_VARARGS|METH_KEYWORDS,
6181 pickle_load_doc},
6182 {"loads", (PyCFunction)pickle_loads, METH_VARARGS|METH_KEYWORDS,
6183 pickle_loads_doc},
6184 {NULL, NULL} /* sentinel */
6185};
6186
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006187static int
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006188initmodule(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006189{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006190 PyObject *copyreg = NULL;
6191 PyObject *compat_pickle = NULL;
6192
6193 /* XXX: We should ensure that the types of the dictionaries imported are
6194 exactly PyDict objects. Otherwise, it is possible to crash the pickle
6195 since we use the PyDict API directly to access these dictionaries. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006196
6197 copyreg = PyImport_ImportModule("copyreg");
6198 if (!copyreg)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006199 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006200 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
6201 if (!dispatch_table)
6202 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006203 extension_registry = \
6204 PyObject_GetAttrString(copyreg, "_extension_registry");
6205 if (!extension_registry)
6206 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006207 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
6208 if (!inverted_registry)
6209 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006210 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
6211 if (!extension_cache)
6212 goto error;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006213 Py_CLEAR(copyreg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006214
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006215 /* Load the 2.x -> 3.x stdlib module mapping tables */
6216 compat_pickle = PyImport_ImportModule("_compat_pickle");
6217 if (!compat_pickle)
6218 goto error;
6219 name_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
6220 if (!name_mapping_2to3)
6221 goto error;
6222 if (!PyDict_CheckExact(name_mapping_2to3)) {
6223 PyErr_Format(PyExc_RuntimeError,
6224 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
6225 Py_TYPE(name_mapping_2to3)->tp_name);
6226 goto error;
6227 }
6228 import_mapping_2to3 = PyObject_GetAttrString(compat_pickle,
6229 "IMPORT_MAPPING");
6230 if (!import_mapping_2to3)
6231 goto error;
6232 if (!PyDict_CheckExact(import_mapping_2to3)) {
6233 PyErr_Format(PyExc_RuntimeError,
6234 "_compat_pickle.IMPORT_MAPPING should be a dict, "
6235 "not %.200s", Py_TYPE(import_mapping_2to3)->tp_name);
6236 goto error;
6237 }
6238 /* ... and the 3.x -> 2.x mapping tables */
6239 name_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6240 "REVERSE_NAME_MAPPING");
6241 if (!name_mapping_3to2)
6242 goto error;
6243 if (!PyDict_CheckExact(name_mapping_3to2)) {
6244 PyErr_Format(PyExc_RuntimeError,
Ezio Melotti13925002011-03-16 11:05:33 +02006245 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006246 "not %.200s", Py_TYPE(name_mapping_3to2)->tp_name);
6247 goto error;
6248 }
6249 import_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6250 "REVERSE_IMPORT_MAPPING");
6251 if (!import_mapping_3to2)
6252 goto error;
6253 if (!PyDict_CheckExact(import_mapping_3to2)) {
6254 PyErr_Format(PyExc_RuntimeError,
6255 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
6256 "not %.200s", Py_TYPE(import_mapping_3to2)->tp_name);
6257 goto error;
6258 }
6259 Py_CLEAR(compat_pickle);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006260
6261 empty_tuple = PyTuple_New(0);
6262 if (empty_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006263 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006264 two_tuple = PyTuple_New(2);
6265 if (two_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006266 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006267 /* We use this temp container with no regard to refcounts, or to
6268 * keeping containees alive. Exempt from GC, because we don't
6269 * want anything looking at two_tuple() by magic.
6270 */
6271 PyObject_GC_UnTrack(two_tuple);
6272
6273 return 0;
6274
6275 error:
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006276 Py_CLEAR(copyreg);
6277 Py_CLEAR(dispatch_table);
6278 Py_CLEAR(extension_registry);
6279 Py_CLEAR(inverted_registry);
6280 Py_CLEAR(extension_cache);
6281 Py_CLEAR(compat_pickle);
6282 Py_CLEAR(name_mapping_2to3);
6283 Py_CLEAR(import_mapping_2to3);
6284 Py_CLEAR(name_mapping_3to2);
6285 Py_CLEAR(import_mapping_3to2);
6286 Py_CLEAR(empty_tuple);
6287 Py_CLEAR(two_tuple);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006288 return -1;
6289}
6290
6291static struct PyModuleDef _picklemodule = {
6292 PyModuleDef_HEAD_INIT,
6293 "_pickle",
6294 pickle_module_doc,
6295 -1,
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006296 pickle_methods,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006297 NULL,
6298 NULL,
6299 NULL,
6300 NULL
6301};
6302
6303PyMODINIT_FUNC
6304PyInit__pickle(void)
6305{
6306 PyObject *m;
6307
6308 if (PyType_Ready(&Unpickler_Type) < 0)
6309 return NULL;
6310 if (PyType_Ready(&Pickler_Type) < 0)
6311 return NULL;
6312 if (PyType_Ready(&Pdata_Type) < 0)
6313 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006314 if (PyType_Ready(&PicklerMemoProxyType) < 0)
6315 return NULL;
6316 if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
6317 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006318
6319 /* Create the module and add the functions. */
6320 m = PyModule_Create(&_picklemodule);
6321 if (m == NULL)
6322 return NULL;
6323
Antoine Pitrou8391cf42011-07-15 21:01:21 +02006324 Py_INCREF(&Pickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006325 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
6326 return NULL;
Antoine Pitrou8391cf42011-07-15 21:01:21 +02006327 Py_INCREF(&Unpickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006328 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
6329 return NULL;
6330
6331 /* Initialize the exceptions. */
6332 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
6333 if (PickleError == NULL)
6334 return NULL;
6335 PicklingError = \
6336 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
6337 if (PicklingError == NULL)
6338 return NULL;
6339 UnpicklingError = \
6340 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
6341 if (UnpicklingError == NULL)
6342 return NULL;
6343
6344 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
6345 return NULL;
6346 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
6347 return NULL;
6348 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
6349 return NULL;
6350
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006351 if (initmodule() < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006352 return NULL;
6353
6354 return m;
6355}