blob: 2c29b20950a524a3a8760a69bb011fc10844b5d3 [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000013/* Pickle opcodes. These must be kept updated with pickle.py.
14 Extensive docs are in pickletools.py. */
15enum opcode {
16 MARK = '(',
17 STOP = '.',
18 POP = '0',
19 POP_MARK = '1',
20 DUP = '2',
21 FLOAT = 'F',
22 INT = 'I',
23 BININT = 'J',
24 BININT1 = 'K',
25 LONG = 'L',
26 BININT2 = 'M',
27 NONE = 'N',
28 PERSID = 'P',
29 BINPERSID = 'Q',
30 REDUCE = 'R',
31 STRING = 'S',
32 BINSTRING = 'T',
33 SHORT_BINSTRING = 'U',
34 UNICODE = 'V',
35 BINUNICODE = 'X',
36 APPEND = 'a',
37 BUILD = 'b',
38 GLOBAL = 'c',
39 DICT = 'd',
40 EMPTY_DICT = '}',
41 APPENDS = 'e',
42 GET = 'g',
43 BINGET = 'h',
44 INST = 'i',
45 LONG_BINGET = 'j',
46 LIST = 'l',
47 EMPTY_LIST = ']',
48 OBJ = 'o',
49 PUT = 'p',
50 BINPUT = 'q',
51 LONG_BINPUT = 'r',
52 SETITEM = 's',
53 TUPLE = 't',
54 EMPTY_TUPLE = ')',
55 SETITEMS = 'u',
56 BINFLOAT = 'G',
57
58 /* Protocol 2. */
59 PROTO = '\x80',
60 NEWOBJ = '\x81',
61 EXT1 = '\x82',
62 EXT2 = '\x83',
63 EXT4 = '\x84',
64 TUPLE1 = '\x85',
65 TUPLE2 = '\x86',
66 TUPLE3 = '\x87',
67 NEWTRUE = '\x88',
68 NEWFALSE = '\x89',
69 LONG1 = '\x8a',
70 LONG4 = '\x8b',
71
72 /* Protocol 3 (Python 3.x) */
73 BINBYTES = 'B',
Victor Stinner132ef6c2010-11-09 09:39:41 +000074 SHORT_BINBYTES = 'C'
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000075};
76
77/* These aren't opcodes -- they're ways to pickle bools before protocol 2
78 * so that unpicklers written before bools were introduced unpickle them
79 * as ints, but unpicklers after can recognize that bools were intended.
80 * Note that protocol 2 added direct ways to pickle bools.
81 */
82#undef TRUE
83#define TRUE "I01\n"
84#undef FALSE
85#define FALSE "I00\n"
86
87enum {
88 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
89 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
90 break if this gets out of synch with pickle.py, but it's unclear that would
91 help anything either. */
92 BATCHSIZE = 1000,
93
94 /* Nesting limit until Pickler, when running in "fast mode", starts
95 checking for self-referential data-structures. */
96 FAST_NESTING_LIMIT = 50,
97
Antoine Pitrouea99c5c2010-09-09 18:33:21 +000098 /* Initial size of the write buffer of Pickler. */
99 WRITE_BUF_SIZE = 4096,
100
101 /* Maximum size of the write buffer of Pickler when pickling to a
102 stream. This is ignored for in-memory pickling. */
103 MAX_WRITE_BUF_SIZE = 64 * 1024,
Antoine Pitrou04248a82010-10-12 20:51:21 +0000104
105 /* Prefetch size when unpickling (disabled on unpeekable streams) */
Victor Stinner132ef6c2010-11-09 09:39:41 +0000106 PREFETCH = 8192 * 16
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000107};
108
109/* Exception classes for pickle. These should override the ones defined in
110 pickle.py, when the C-optimized Pickler and Unpickler are used. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000111static PyObject *PickleError = NULL;
112static PyObject *PicklingError = NULL;
113static PyObject *UnpicklingError = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000114
115/* copyreg.dispatch_table, {type_object: pickling_function} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000116static PyObject *dispatch_table = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000117/* For EXT[124] opcodes. */
118/* copyreg._extension_registry, {(module_name, function_name): code} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000119static PyObject *extension_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000120/* copyreg._inverted_registry, {code: (module_name, function_name)} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000121static PyObject *inverted_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000122/* copyreg._extension_cache, {code: object} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000123static PyObject *extension_cache = NULL;
124
125/* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */
126static PyObject *name_mapping_2to3 = NULL;
127/* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
128static PyObject *import_mapping_2to3 = NULL;
129/* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
130static PyObject *name_mapping_3to2 = NULL;
131static PyObject *import_mapping_3to2 = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000132
133/* XXX: Are these really nescessary? */
134/* As the name says, an empty tuple. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000135static PyObject *empty_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000136/* For looking up name pairs in copyreg._extension_registry. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000137static PyObject *two_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000138
139static int
140stack_underflow(void)
141{
142 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
143 return -1;
144}
145
146/* Internal data type used as the unpickling stack. */
147typedef struct {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000148 PyObject_VAR_HEAD
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000149 PyObject **data;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000150 Py_ssize_t allocated; /* number of slots in data allocated */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000151} Pdata;
152
153static void
154Pdata_dealloc(Pdata *self)
155{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200156 Py_ssize_t i = Py_SIZE(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000157 while (--i >= 0) {
158 Py_DECREF(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000159 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000160 PyMem_FREE(self->data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000161 PyObject_Del(self);
162}
163
164static PyTypeObject Pdata_Type = {
165 PyVarObject_HEAD_INIT(NULL, 0)
166 "_pickle.Pdata", /*tp_name*/
167 sizeof(Pdata), /*tp_basicsize*/
168 0, /*tp_itemsize*/
169 (destructor)Pdata_dealloc, /*tp_dealloc*/
170};
171
172static PyObject *
173Pdata_New(void)
174{
175 Pdata *self;
176
177 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
178 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000179 Py_SIZE(self) = 0;
180 self->allocated = 8;
181 self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000182 if (self->data)
183 return (PyObject *)self;
184 Py_DECREF(self);
185 return PyErr_NoMemory();
186}
187
188
189/* Retain only the initial clearto items. If clearto >= the current
190 * number of items, this is a (non-erroneous) NOP.
191 */
192static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200193Pdata_clear(Pdata *self, Py_ssize_t clearto)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000194{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200195 Py_ssize_t i = Py_SIZE(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000196
197 if (clearto < 0)
198 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000199 if (clearto >= i)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000200 return 0;
201
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000202 while (--i >= clearto) {
203 Py_CLEAR(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000204 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000205 Py_SIZE(self) = clearto;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000206 return 0;
207}
208
209static int
210Pdata_grow(Pdata *self)
211{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000212 PyObject **data = self->data;
213 Py_ssize_t allocated = self->allocated;
214 Py_ssize_t new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000215
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000216 new_allocated = (allocated >> 3) + 6;
217 /* check for integer overflow */
218 if (new_allocated > PY_SSIZE_T_MAX - allocated)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000219 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000220 new_allocated += allocated;
221 if (new_allocated > (PY_SSIZE_T_MAX / sizeof(PyObject *)))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000222 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000223 data = PyMem_REALLOC(data, new_allocated * sizeof(PyObject *));
224 if (data == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000225 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000226
227 self->data = data;
228 self->allocated = new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000229 return 0;
230
231 nomemory:
232 PyErr_NoMemory();
233 return -1;
234}
235
236/* D is a Pdata*. Pop the topmost element and store it into V, which
237 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
238 * is raised and V is set to NULL.
239 */
240static PyObject *
241Pdata_pop(Pdata *self)
242{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000243 if (Py_SIZE(self) == 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000244 PyErr_SetString(UnpicklingError, "bad pickle data");
245 return NULL;
246 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000247 return self->data[--Py_SIZE(self)];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000248}
249#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
250
251static int
252Pdata_push(Pdata *self, PyObject *obj)
253{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000254 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000255 return -1;
256 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000257 self->data[Py_SIZE(self)++] = obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000258 return 0;
259}
260
261/* Push an object on stack, transferring its ownership to the stack. */
262#define PDATA_PUSH(D, O, ER) do { \
263 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
264
265/* Push an object on stack, adding a new reference to the object. */
266#define PDATA_APPEND(D, O, ER) do { \
267 Py_INCREF((O)); \
268 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
269
270static PyObject *
271Pdata_poptuple(Pdata *self, Py_ssize_t start)
272{
273 PyObject *tuple;
274 Py_ssize_t len, i, j;
275
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000276 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000277 tuple = PyTuple_New(len);
278 if (tuple == NULL)
279 return NULL;
280 for (i = start, j = 0; j < len; i++, j++)
281 PyTuple_SET_ITEM(tuple, j, self->data[i]);
282
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000283 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000284 return tuple;
285}
286
287static PyObject *
288Pdata_poplist(Pdata *self, Py_ssize_t start)
289{
290 PyObject *list;
291 Py_ssize_t len, i, j;
292
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000293 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000294 list = PyList_New(len);
295 if (list == NULL)
296 return NULL;
297 for (i = start, j = 0; j < len; i++, j++)
298 PyList_SET_ITEM(list, j, self->data[i]);
299
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000300 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000301 return list;
302}
303
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000304typedef struct {
305 PyObject *me_key;
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200306 Py_ssize_t me_value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000307} PyMemoEntry;
308
309typedef struct {
310 Py_ssize_t mt_mask;
311 Py_ssize_t mt_used;
312 Py_ssize_t mt_allocated;
313 PyMemoEntry *mt_table;
314} PyMemoTable;
315
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000316typedef struct PicklerObject {
317 PyObject_HEAD
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000318 PyMemoTable *memo; /* Memo table, keep track of the seen
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000319 objects to support self-referential objects
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000320 pickling. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000321 PyObject *pers_func; /* persistent_id() method, can be NULL */
322 PyObject *arg;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000323
324 PyObject *write; /* write() method of the output stream. */
325 PyObject *output_buffer; /* Write into a local bytearray buffer before
326 flushing to the stream. */
327 Py_ssize_t output_len; /* Length of output_buffer. */
328 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000329 int proto; /* Pickle protocol number, >= 0 */
330 int bin; /* Boolean, true if proto > 0 */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200331 Py_ssize_t buf_size; /* Size of the current buffered pickle data */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000332 int fast; /* Enable fast mode if set to a true value.
333 The fast mode disable the usage of memo,
334 therefore speeding the pickling process by
335 not generating superfluous PUT opcodes. It
336 should not be used if with self-referential
337 objects. */
338 int fast_nesting;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000339 int fix_imports; /* Indicate whether Pickler should fix
340 the name of globals for Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000341 PyObject *fast_memo;
342} PicklerObject;
343
344typedef struct UnpicklerObject {
345 PyObject_HEAD
346 Pdata *stack; /* Pickle data stack, store unpickled objects. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000347
348 /* The unpickler memo is just an array of PyObject *s. Using a dict
349 is unnecessary, since the keys are contiguous ints. */
350 PyObject **memo;
351 Py_ssize_t memo_size;
352
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000353 PyObject *arg;
354 PyObject *pers_func; /* persistent_load() method, can be NULL. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000355
356 Py_buffer buffer;
357 char *input_buffer;
358 char *input_line;
359 Py_ssize_t input_len;
360 Py_ssize_t next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000361 Py_ssize_t prefetched_idx; /* index of first prefetched byte */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000362 PyObject *read; /* read() method of the input stream. */
363 PyObject *readline; /* readline() method of the input stream. */
Antoine Pitrou04248a82010-10-12 20:51:21 +0000364 PyObject *peek; /* peek() method of the input stream, or NULL */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000365
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000366 char *encoding; /* Name of the encoding to be used for
367 decoding strings pickled using Python
368 2.x. The default value is "ASCII" */
369 char *errors; /* Name of errors handling scheme to used when
370 decoding strings. The default value is
371 "strict". */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200372 Py_ssize_t *marks; /* Mark stack, used for unpickling container
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000373 objects. */
374 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
375 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000376 int proto; /* Protocol of the pickle loaded. */
377 int fix_imports; /* Indicate whether Unpickler should fix
378 the name of globals pickled by Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000379} UnpicklerObject;
380
381/* Forward declarations */
382static int save(PicklerObject *, PyObject *, int);
383static int save_reduce(PicklerObject *, PyObject *, PyObject *);
384static PyTypeObject Pickler_Type;
385static PyTypeObject Unpickler_Type;
386
387
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000388/*************************************************************************
389 A custom hashtable mapping void* to longs. This is used by the pickler for
390 memoization. Using a custom hashtable rather than PyDict allows us to skip
391 a bunch of unnecessary object creation. This makes a huge performance
392 difference. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000393
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000394#define MT_MINSIZE 8
395#define PERTURB_SHIFT 5
396
397
398static PyMemoTable *
399PyMemoTable_New(void)
400{
401 PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
402 if (memo == NULL) {
403 PyErr_NoMemory();
404 return NULL;
405 }
406
407 memo->mt_used = 0;
408 memo->mt_allocated = MT_MINSIZE;
409 memo->mt_mask = MT_MINSIZE - 1;
410 memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
411 if (memo->mt_table == NULL) {
412 PyMem_FREE(memo);
413 PyErr_NoMemory();
414 return NULL;
415 }
416 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
417
418 return memo;
419}
420
421static PyMemoTable *
422PyMemoTable_Copy(PyMemoTable *self)
423{
424 Py_ssize_t i;
425 PyMemoTable *new = PyMemoTable_New();
426 if (new == NULL)
427 return NULL;
428
429 new->mt_used = self->mt_used;
430 new->mt_allocated = self->mt_allocated;
431 new->mt_mask = self->mt_mask;
432 /* The table we get from _New() is probably smaller than we wanted.
433 Free it and allocate one that's the right size. */
434 PyMem_FREE(new->mt_table);
435 new->mt_table = PyMem_MALLOC(self->mt_allocated * sizeof(PyMemoEntry));
436 if (new->mt_table == NULL) {
437 PyMem_FREE(new);
438 return NULL;
439 }
440 for (i = 0; i < self->mt_allocated; i++) {
441 Py_XINCREF(self->mt_table[i].me_key);
442 }
443 memcpy(new->mt_table, self->mt_table,
444 sizeof(PyMemoEntry) * self->mt_allocated);
445
446 return new;
447}
448
449static Py_ssize_t
450PyMemoTable_Size(PyMemoTable *self)
451{
452 return self->mt_used;
453}
454
455static int
456PyMemoTable_Clear(PyMemoTable *self)
457{
458 Py_ssize_t i = self->mt_allocated;
459
460 while (--i >= 0) {
461 Py_XDECREF(self->mt_table[i].me_key);
462 }
463 self->mt_used = 0;
464 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
465 return 0;
466}
467
468static void
469PyMemoTable_Del(PyMemoTable *self)
470{
471 if (self == NULL)
472 return;
473 PyMemoTable_Clear(self);
474
475 PyMem_FREE(self->mt_table);
476 PyMem_FREE(self);
477}
478
479/* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
480 can be considerably simpler than dictobject.c's lookdict(). */
481static PyMemoEntry *
482_PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
483{
484 size_t i;
485 size_t perturb;
486 size_t mask = (size_t)self->mt_mask;
487 PyMemoEntry *table = self->mt_table;
488 PyMemoEntry *entry;
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000489 Py_hash_t hash = (Py_hash_t)key >> 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000490
491 i = hash & mask;
492 entry = &table[i];
493 if (entry->me_key == NULL || entry->me_key == key)
494 return entry;
495
496 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
497 i = (i << 2) + i + perturb + 1;
498 entry = &table[i & mask];
499 if (entry->me_key == NULL || entry->me_key == key)
500 return entry;
501 }
502 assert(0); /* Never reached */
503 return NULL;
504}
505
506/* Returns -1 on failure, 0 on success. */
507static int
508_PyMemoTable_ResizeTable(PyMemoTable *self, Py_ssize_t min_size)
509{
510 PyMemoEntry *oldtable = NULL;
511 PyMemoEntry *oldentry, *newentry;
512 Py_ssize_t new_size = MT_MINSIZE;
513 Py_ssize_t to_process;
514
515 assert(min_size > 0);
516
517 /* Find the smallest valid table size >= min_size. */
518 while (new_size < min_size && new_size > 0)
519 new_size <<= 1;
520 if (new_size <= 0) {
521 PyErr_NoMemory();
522 return -1;
523 }
524 /* new_size needs to be a power of two. */
525 assert((new_size & (new_size - 1)) == 0);
526
527 /* Allocate new table. */
528 oldtable = self->mt_table;
529 self->mt_table = PyMem_MALLOC(new_size * sizeof(PyMemoEntry));
530 if (self->mt_table == NULL) {
531 PyMem_FREE(oldtable);
532 PyErr_NoMemory();
533 return -1;
534 }
535 self->mt_allocated = new_size;
536 self->mt_mask = new_size - 1;
537 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
538
539 /* Copy entries from the old table. */
540 to_process = self->mt_used;
541 for (oldentry = oldtable; to_process > 0; oldentry++) {
542 if (oldentry->me_key != NULL) {
543 to_process--;
544 /* newentry is a pointer to a chunk of the new
545 mt_table, so we're setting the key:value pair
546 in-place. */
547 newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
548 newentry->me_key = oldentry->me_key;
549 newentry->me_value = oldentry->me_value;
550 }
551 }
552
553 /* Deallocate the old table. */
554 PyMem_FREE(oldtable);
555 return 0;
556}
557
558/* Returns NULL on failure, a pointer to the value otherwise. */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200559static Py_ssize_t *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000560PyMemoTable_Get(PyMemoTable *self, PyObject *key)
561{
562 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
563 if (entry->me_key == NULL)
564 return NULL;
565 return &entry->me_value;
566}
567
568/* Returns -1 on failure, 0 on success. */
569static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200570PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000571{
572 PyMemoEntry *entry;
573
574 assert(key != NULL);
575
576 entry = _PyMemoTable_Lookup(self, key);
577 if (entry->me_key != NULL) {
578 entry->me_value = value;
579 return 0;
580 }
581 Py_INCREF(key);
582 entry->me_key = key;
583 entry->me_value = value;
584 self->mt_used++;
585
586 /* If we added a key, we can safely resize. Otherwise just return!
587 * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
588 *
589 * Quadrupling the size improves average table sparseness
590 * (reducing collisions) at the cost of some memory. It also halves
591 * the number of expensive resize operations in a growing memo table.
592 *
593 * Very large memo tables (over 50K items) use doubling instead.
594 * This may help applications with severe memory constraints.
595 */
596 if (!(self->mt_used * 3 >= (self->mt_mask + 1) * 2))
597 return 0;
598 return _PyMemoTable_ResizeTable(self,
599 (self->mt_used > 50000 ? 2 : 4) * self->mt_used);
600}
601
602#undef MT_MINSIZE
603#undef PERTURB_SHIFT
604
605/*************************************************************************/
606
607/* Helpers for creating the argument tuple passed to functions. This has the
Victor Stinner121aab42011-09-29 23:40:53 +0200608 performance advantage of calling PyTuple_New() only once.
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000609
610 XXX(avassalotti): Inline directly in _Pickler_FastCall() and
611 _Unpickler_FastCall(). */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000612#define ARG_TUP(self, obj) do { \
613 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
614 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
615 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
616 } \
617 else { \
618 Py_DECREF((obj)); \
619 } \
620 } while (0)
621
622#define FREE_ARG_TUP(self) do { \
623 if ((self)->arg->ob_refcnt > 1) \
624 Py_CLEAR((self)->arg); \
625 } while (0)
626
627/* A temporary cleaner API for fast single argument function call.
628
629 XXX: Does caching the argument tuple provides any real performance benefits?
630
631 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
632 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
633 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
634 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
635 (i.e, call PyTuple_New() and store the returned value in an array), to save
636 one second (wall clock time). Either ways, the loading time a pickle stream
637 large enough to generate this number of calls would be massively
638 overwhelmed by other factors, like I/O throughput, the GC traversal and
639 object allocation overhead. So, I really doubt these functions provide any
640 real benefits.
641
642 On the other hand, oprofile reports that pickle spends a lot of time in
643 these functions. But, that is probably more related to the function call
644 overhead, than the argument tuple allocation.
645
646 XXX: And, what is the reference behavior of these? Steal, borrow? At first
647 glance, it seems to steal the reference of 'arg' and borrow the reference
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000648 of 'func'. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000649static PyObject *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000650_Pickler_FastCall(PicklerObject *self, PyObject *func, PyObject *arg)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000651{
652 PyObject *result = NULL;
653
654 ARG_TUP(self, arg);
655 if (self->arg) {
656 result = PyObject_Call(func, self->arg, NULL);
657 FREE_ARG_TUP(self);
658 }
659 return result;
660}
661
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000662static int
663_Pickler_ClearBuffer(PicklerObject *self)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000664{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000665 Py_CLEAR(self->output_buffer);
666 self->output_buffer =
667 PyBytes_FromStringAndSize(NULL, self->max_output_len);
668 if (self->output_buffer == NULL)
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000669 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000670 self->output_len = 0;
671 return 0;
672}
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000673
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000674static PyObject *
675_Pickler_GetString(PicklerObject *self)
676{
677 PyObject *output_buffer = self->output_buffer;
678
679 assert(self->output_buffer != NULL);
680 self->output_buffer = NULL;
681 /* Resize down to exact size */
682 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
683 return NULL;
684 return output_buffer;
685}
686
687static int
688_Pickler_FlushToFile(PicklerObject *self)
689{
690 PyObject *output, *result;
691
692 assert(self->write != NULL);
693
694 output = _Pickler_GetString(self);
695 if (output == NULL)
696 return -1;
697
698 result = _Pickler_FastCall(self, self->write, output);
699 Py_XDECREF(result);
700 return (result == NULL) ? -1 : 0;
701}
702
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200703static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000704_Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t n)
705{
706 Py_ssize_t i, required;
707 char *buffer;
708
709 assert(s != NULL);
710
711 required = self->output_len + n;
712 if (required > self->max_output_len) {
713 if (self->write != NULL && required > MAX_WRITE_BUF_SIZE) {
714 /* XXX This reallocates a new buffer every time, which is a bit
715 wasteful. */
716 if (_Pickler_FlushToFile(self) < 0)
717 return -1;
718 if (_Pickler_ClearBuffer(self) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000719 return -1;
720 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000721 if (self->write != NULL && n > MAX_WRITE_BUF_SIZE) {
722 /* we already flushed above, so the buffer is empty */
723 PyObject *result;
724 /* XXX we could spare an intermediate copy and pass
725 a memoryview instead */
726 PyObject *output = PyBytes_FromStringAndSize(s, n);
727 if (s == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000728 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000729 result = _Pickler_FastCall(self, self->write, output);
730 Py_XDECREF(result);
731 return (result == NULL) ? -1 : 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000732 }
733 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000734 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
735 PyErr_NoMemory();
736 return -1;
737 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200738 self->max_output_len = (self->output_len + n) / 2 * 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000739 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
740 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000741 }
742 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000743 buffer = PyBytes_AS_STRING(self->output_buffer);
744 if (n < 8) {
745 /* This is faster than memcpy when the string is short. */
746 for (i = 0; i < n; i++) {
747 buffer[self->output_len + i] = s[i];
748 }
749 }
750 else {
751 memcpy(buffer + self->output_len, s, n);
752 }
753 self->output_len += n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000754 return n;
755}
756
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000757static PicklerObject *
758_Pickler_New(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000759{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000760 PicklerObject *self;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000761
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000762 self = PyObject_GC_New(PicklerObject, &Pickler_Type);
763 if (self == NULL)
764 return NULL;
765
766 self->pers_func = NULL;
767 self->arg = NULL;
768 self->write = NULL;
769 self->proto = 0;
770 self->bin = 0;
771 self->fast = 0;
772 self->fast_nesting = 0;
773 self->fix_imports = 0;
774 self->fast_memo = NULL;
775
776 self->memo = PyMemoTable_New();
777 if (self->memo == NULL) {
778 Py_DECREF(self);
779 return NULL;
780 }
781 self->max_output_len = WRITE_BUF_SIZE;
782 self->output_len = 0;
783 self->output_buffer = PyBytes_FromStringAndSize(NULL,
784 self->max_output_len);
785 if (self->output_buffer == NULL) {
786 Py_DECREF(self);
787 return NULL;
788 }
789 return self;
790}
791
792static int
793_Pickler_SetProtocol(PicklerObject *self, PyObject *proto_obj,
794 PyObject *fix_imports_obj)
795{
796 long proto = 0;
797 int fix_imports;
798
799 if (proto_obj == NULL || proto_obj == Py_None)
800 proto = DEFAULT_PROTOCOL;
801 else {
802 proto = PyLong_AsLong(proto_obj);
803 if (proto == -1 && PyErr_Occurred())
804 return -1;
805 }
806 if (proto < 0)
807 proto = HIGHEST_PROTOCOL;
808 if (proto > HIGHEST_PROTOCOL) {
809 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
810 HIGHEST_PROTOCOL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000811 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000812 }
813 fix_imports = PyObject_IsTrue(fix_imports_obj);
814 if (fix_imports == -1)
815 return -1;
Victor Stinner121aab42011-09-29 23:40:53 +0200816
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000817 self->proto = proto;
818 self->bin = proto > 0;
819 self->fix_imports = fix_imports && proto < 3;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000820
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000821 return 0;
822}
823
824/* Returns -1 (with an exception set) on failure, 0 on success. This may
825 be called once on a freshly created Pickler. */
826static int
827_Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
828{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200829 _Py_IDENTIFIER(write);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000830 assert(file != NULL);
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +0200831 self->write = _PyObject_GetAttrId(file, &PyId_write);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000832 if (self->write == NULL) {
833 if (PyErr_ExceptionMatches(PyExc_AttributeError))
834 PyErr_SetString(PyExc_TypeError,
835 "file must have a 'write' attribute");
836 return -1;
837 }
838
839 return 0;
840}
841
842/* See documentation for _Pickler_FastCall(). */
843static PyObject *
844_Unpickler_FastCall(UnpicklerObject *self, PyObject *func, PyObject *arg)
845{
846 PyObject *result = NULL;
847
848 ARG_TUP(self, arg);
849 if (self->arg) {
850 result = PyObject_Call(func, self->arg, NULL);
851 FREE_ARG_TUP(self);
852 }
853 return result;
854}
855
856/* Returns the size of the input on success, -1 on failure. This takes its
857 own reference to `input`. */
858static Py_ssize_t
859_Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
860{
861 if (self->buffer.buf != NULL)
862 PyBuffer_Release(&self->buffer);
863 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
864 return -1;
865 self->input_buffer = self->buffer.buf;
866 self->input_len = self->buffer.len;
867 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000868 self->prefetched_idx = self->input_len;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000869 return self->input_len;
870}
871
Antoine Pitrou04248a82010-10-12 20:51:21 +0000872static int
873_Unpickler_SkipConsumed(UnpicklerObject *self)
874{
875 Py_ssize_t consumed = self->next_read_idx - self->prefetched_idx;
876
877 if (consumed > 0) {
878 PyObject *r;
879 assert(self->peek); /* otherwise we did something wrong */
880 /* This makes an useless copy... */
881 r = PyObject_CallFunction(self->read, "n", consumed);
882 if (r == NULL)
883 return -1;
884 Py_DECREF(r);
885 self->prefetched_idx = self->next_read_idx;
886 }
887 return 0;
888}
889
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000890static const Py_ssize_t READ_WHOLE_LINE = -1;
891
892/* If reading from a file, we need to only pull the bytes we need, since there
893 may be multiple pickle objects arranged contiguously in the same input
894 buffer.
895
896 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
897 bytes from the input stream/buffer.
898
899 Update the unpickler's input buffer with the newly-read data. Returns -1 on
900 failure; on success, returns the number of bytes read from the file.
901
902 On success, self->input_len will be 0; this is intentional so that when
903 unpickling from a file, the "we've run out of data" code paths will trigger,
904 causing the Unpickler to go back to the file for more data. Use the returned
905 size to tell you how much data you can process. */
906static Py_ssize_t
907_Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
908{
909 PyObject *data;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000910 Py_ssize_t read_size, prefetched_size = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000911
912 assert(self->read != NULL);
Victor Stinner121aab42011-09-29 23:40:53 +0200913
Antoine Pitrou04248a82010-10-12 20:51:21 +0000914 if (_Unpickler_SkipConsumed(self) < 0)
915 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000916
917 if (n == READ_WHOLE_LINE)
918 data = PyObject_Call(self->readline, empty_tuple, NULL);
919 else {
920 PyObject *len = PyLong_FromSsize_t(n);
921 if (len == NULL)
922 return -1;
923 data = _Unpickler_FastCall(self, self->read, len);
924 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000925 if (data == NULL)
926 return -1;
927
Antoine Pitrou04248a82010-10-12 20:51:21 +0000928 /* Prefetch some data without advancing the file pointer, if possible */
929 if (self->peek) {
930 PyObject *len, *prefetched;
931 len = PyLong_FromSsize_t(PREFETCH);
932 if (len == NULL) {
933 Py_DECREF(data);
934 return -1;
935 }
936 prefetched = _Unpickler_FastCall(self, self->peek, len);
937 if (prefetched == NULL) {
938 if (PyErr_ExceptionMatches(PyExc_NotImplementedError)) {
939 /* peek() is probably not supported by the given file object */
940 PyErr_Clear();
941 Py_CLEAR(self->peek);
942 }
943 else {
944 Py_DECREF(data);
945 return -1;
946 }
947 }
948 else {
949 assert(PyBytes_Check(prefetched));
950 prefetched_size = PyBytes_GET_SIZE(prefetched);
951 PyBytes_ConcatAndDel(&data, prefetched);
952 if (data == NULL)
953 return -1;
954 }
955 }
956
957 read_size = _Unpickler_SetStringInput(self, data) - prefetched_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000958 Py_DECREF(data);
Antoine Pitrou04248a82010-10-12 20:51:21 +0000959 self->prefetched_idx = read_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000960 return read_size;
961}
962
963/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
964
965 This should be used for all data reads, rather than accessing the unpickler's
966 input buffer directly. This method deals correctly with reading from input
967 streams, which the input buffer doesn't deal with.
968
969 Note that when reading from a file-like object, self->next_read_idx won't
970 be updated (it should remain at 0 for the entire unpickling process). You
971 should use this function's return value to know how many bytes you can
972 consume.
973
974 Returns -1 (with an exception set) on failure. On success, return the
975 number of chars read. */
976static Py_ssize_t
977_Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n)
978{
Antoine Pitrou04248a82010-10-12 20:51:21 +0000979 Py_ssize_t num_read;
980
Antoine Pitrou04248a82010-10-12 20:51:21 +0000981 if (self->next_read_idx + n <= self->input_len) {
982 *s = self->input_buffer + self->next_read_idx;
983 self->next_read_idx += n;
984 return n;
985 }
986 if (!self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000987 PyErr_Format(PyExc_EOFError, "Ran out of input");
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000988 return -1;
989 }
Antoine Pitrou04248a82010-10-12 20:51:21 +0000990 num_read = _Unpickler_ReadFromFile(self, n);
991 if (num_read < 0)
992 return -1;
993 if (num_read < n) {
994 PyErr_Format(PyExc_EOFError, "Ran out of input");
995 return -1;
996 }
997 *s = self->input_buffer;
998 self->next_read_idx = n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000999 return n;
1000}
1001
1002static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001003_Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1004 char **result)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001005{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001006 char *input_line = PyMem_Realloc(self->input_line, len + 1);
1007 if (input_line == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001008 return -1;
1009
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001010 memcpy(input_line, line, len);
1011 input_line[len] = '\0';
1012 self->input_line = input_line;
1013 *result = self->input_line;
1014 return len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001015}
1016
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001017/* Read a line from the input stream/buffer. If we run off the end of the input
1018 before hitting \n, return the data we found.
1019
1020 Returns the number of chars read, or -1 on failure. */
1021static Py_ssize_t
1022_Unpickler_Readline(UnpicklerObject *self, char **result)
1023{
1024 Py_ssize_t i, num_read;
1025
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001026 for (i = self->next_read_idx; i < self->input_len; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001027 if (self->input_buffer[i] == '\n') {
1028 char *line_start = self->input_buffer + self->next_read_idx;
1029 num_read = i - self->next_read_idx + 1;
1030 self->next_read_idx = i + 1;
1031 return _Unpickler_CopyLine(self, line_start, num_read, result);
1032 }
1033 }
1034 if (self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001035 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1036 if (num_read < 0)
1037 return -1;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001038 self->next_read_idx = num_read;
Antoine Pitrouf6c7a852011-08-11 21:04:02 +02001039 return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001040 }
Victor Stinner121aab42011-09-29 23:40:53 +02001041
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001042 /* If we get here, we've run off the end of the input string. Return the
1043 remaining string and let the caller figure it out. */
1044 *result = self->input_buffer + self->next_read_idx;
1045 num_read = i - self->next_read_idx;
1046 self->next_read_idx = i;
1047 return num_read;
1048}
1049
1050/* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1051 will be modified in place. */
1052static int
1053_Unpickler_ResizeMemoList(UnpicklerObject *self, Py_ssize_t new_size)
1054{
1055 Py_ssize_t i;
1056 PyObject **memo;
1057
1058 assert(new_size > self->memo_size);
1059
1060 memo = PyMem_REALLOC(self->memo, new_size * sizeof(PyObject *));
1061 if (memo == NULL) {
1062 PyErr_NoMemory();
1063 return -1;
1064 }
1065 self->memo = memo;
1066 for (i = self->memo_size; i < new_size; i++)
1067 self->memo[i] = NULL;
1068 self->memo_size = new_size;
1069 return 0;
1070}
1071
1072/* Returns NULL if idx is out of bounds. */
1073static PyObject *
1074_Unpickler_MemoGet(UnpicklerObject *self, Py_ssize_t idx)
1075{
1076 if (idx < 0 || idx >= self->memo_size)
1077 return NULL;
1078
1079 return self->memo[idx];
1080}
1081
1082/* Returns -1 (with an exception set) on failure, 0 on success.
1083 This takes its own reference to `value`. */
1084static int
1085_Unpickler_MemoPut(UnpicklerObject *self, Py_ssize_t idx, PyObject *value)
1086{
1087 PyObject *old_item;
1088
1089 if (idx >= self->memo_size) {
1090 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1091 return -1;
1092 assert(idx < self->memo_size);
1093 }
1094 Py_INCREF(value);
1095 old_item = self->memo[idx];
1096 self->memo[idx] = value;
1097 Py_XDECREF(old_item);
1098 return 0;
1099}
1100
1101static PyObject **
1102_Unpickler_NewMemo(Py_ssize_t new_size)
1103{
1104 PyObject **memo = PyMem_MALLOC(new_size * sizeof(PyObject *));
1105 if (memo == NULL)
1106 return NULL;
1107 memset(memo, 0, new_size * sizeof(PyObject *));
1108 return memo;
1109}
1110
1111/* Free the unpickler's memo, taking care to decref any items left in it. */
1112static void
1113_Unpickler_MemoCleanup(UnpicklerObject *self)
1114{
1115 Py_ssize_t i;
1116 PyObject **memo = self->memo;
1117
1118 if (self->memo == NULL)
1119 return;
1120 self->memo = NULL;
1121 i = self->memo_size;
1122 while (--i >= 0) {
1123 Py_XDECREF(memo[i]);
1124 }
1125 PyMem_FREE(memo);
1126}
1127
1128static UnpicklerObject *
1129_Unpickler_New(void)
1130{
1131 UnpicklerObject *self;
1132
1133 self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1134 if (self == NULL)
1135 return NULL;
1136
1137 self->stack = (Pdata *)Pdata_New();
1138 if (self->stack == NULL) {
1139 Py_DECREF(self);
1140 return NULL;
1141 }
1142 memset(&self->buffer, 0, sizeof(Py_buffer));
1143
1144 self->memo_size = 32;
1145 self->memo = _Unpickler_NewMemo(self->memo_size);
1146 if (self->memo == NULL) {
1147 Py_DECREF(self);
1148 return NULL;
1149 }
1150
1151 self->arg = NULL;
1152 self->pers_func = NULL;
1153 self->input_buffer = NULL;
1154 self->input_line = NULL;
1155 self->input_len = 0;
1156 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001157 self->prefetched_idx = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001158 self->read = NULL;
1159 self->readline = NULL;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001160 self->peek = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001161 self->encoding = NULL;
1162 self->errors = NULL;
1163 self->marks = NULL;
1164 self->num_marks = 0;
1165 self->marks_size = 0;
1166 self->proto = 0;
1167 self->fix_imports = 0;
1168
1169 return self;
1170}
1171
1172/* Returns -1 (with an exception set) on failure, 0 on success. This may
1173 be called once on a freshly created Pickler. */
1174static int
1175_Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1176{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001177 _Py_IDENTIFIER(peek);
1178 _Py_IDENTIFIER(read);
1179 _Py_IDENTIFIER(readline);
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001180
1181 self->peek = _PyObject_GetAttrId(file, &PyId_peek);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001182 if (self->peek == NULL) {
1183 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1184 PyErr_Clear();
1185 else
1186 return -1;
1187 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001188 self->read = _PyObject_GetAttrId(file, &PyId_read);
1189 self->readline = _PyObject_GetAttrId(file, &PyId_readline);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001190 if (self->readline == NULL || self->read == NULL) {
1191 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1192 PyErr_SetString(PyExc_TypeError,
1193 "file must have 'read' and 'readline' attributes");
1194 Py_CLEAR(self->read);
1195 Py_CLEAR(self->readline);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001196 Py_CLEAR(self->peek);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001197 return -1;
1198 }
1199 return 0;
1200}
1201
1202/* Returns -1 (with an exception set) on failure, 0 on success. This may
1203 be called once on a freshly created Pickler. */
1204static int
1205_Unpickler_SetInputEncoding(UnpicklerObject *self,
1206 const char *encoding,
1207 const char *errors)
1208{
1209 if (encoding == NULL)
1210 encoding = "ASCII";
1211 if (errors == NULL)
1212 errors = "strict";
1213
1214 self->encoding = strdup(encoding);
1215 self->errors = strdup(errors);
1216 if (self->encoding == NULL || self->errors == NULL) {
1217 PyErr_NoMemory();
1218 return -1;
1219 }
1220 return 0;
1221}
1222
1223/* Generate a GET opcode for an object stored in the memo. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001224static int
1225memo_get(PicklerObject *self, PyObject *key)
1226{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001227 Py_ssize_t *value;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001228 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001229 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001230
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001231 value = PyMemoTable_Get(self->memo, key);
1232 if (value == NULL) {
1233 PyErr_SetObject(PyExc_KeyError, key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001234 return -1;
1235 }
1236
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001237 if (!self->bin) {
1238 pdata[0] = GET;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001239 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1240 "%" PY_FORMAT_SIZE_T "d\n", *value);
1241 len = strlen(pdata);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001242 }
1243 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001244 if (*value < 256) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001245 pdata[0] = BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001246 pdata[1] = (unsigned char)(*value & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001247 len = 2;
1248 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001249 else if (*value <= 0xffffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001250 pdata[0] = LONG_BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001251 pdata[1] = (unsigned char)(*value & 0xff);
1252 pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1253 pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1254 pdata[4] = (unsigned char)((*value >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001255 len = 5;
1256 }
1257 else { /* unlikely */
1258 PyErr_SetString(PicklingError,
1259 "memo id too large for LONG_BINGET");
1260 return -1;
1261 }
1262 }
1263
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001264 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001265 return -1;
1266
1267 return 0;
1268}
1269
1270/* Store an object in the memo, assign it a new unique ID based on the number
1271 of objects currently stored in the memo and generate a PUT opcode. */
1272static int
1273memo_put(PicklerObject *self, PyObject *obj)
1274{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001275 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001276 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001277 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001278 int status = 0;
1279
1280 if (self->fast)
1281 return 0;
1282
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001283 x = PyMemoTable_Size(self->memo);
1284 if (PyMemoTable_Set(self->memo, obj, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001285 goto error;
1286
1287 if (!self->bin) {
1288 pdata[0] = PUT;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001289 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1290 "%" PY_FORMAT_SIZE_T "d\n", x);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001291 len = strlen(pdata);
1292 }
1293 else {
1294 if (x < 256) {
1295 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00001296 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001297 len = 2;
1298 }
1299 else if (x <= 0xffffffffL) {
1300 pdata[0] = LONG_BINPUT;
1301 pdata[1] = (unsigned char)(x & 0xff);
1302 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1303 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1304 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1305 len = 5;
1306 }
1307 else { /* unlikely */
1308 PyErr_SetString(PicklingError,
1309 "memo id too large for LONG_BINPUT");
1310 return -1;
1311 }
1312 }
1313
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001314 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001315 goto error;
1316
1317 if (0) {
1318 error:
1319 status = -1;
1320 }
1321
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001322 return status;
1323}
1324
1325static PyObject *
1326whichmodule(PyObject *global, PyObject *global_name)
1327{
1328 Py_ssize_t i, j;
1329 static PyObject *module_str = NULL;
1330 static PyObject *main_str = NULL;
1331 PyObject *module_name;
1332 PyObject *modules_dict;
1333 PyObject *module;
1334 PyObject *obj;
1335
1336 if (module_str == NULL) {
1337 module_str = PyUnicode_InternFromString("__module__");
1338 if (module_str == NULL)
1339 return NULL;
1340 main_str = PyUnicode_InternFromString("__main__");
1341 if (main_str == NULL)
1342 return NULL;
1343 }
1344
1345 module_name = PyObject_GetAttr(global, module_str);
1346
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00001347 /* In some rare cases (e.g., bound methods of extension types),
1348 __module__ can be None. If it is so, then search sys.modules
1349 for the module of global. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001350 if (module_name == Py_None) {
1351 Py_DECREF(module_name);
1352 goto search;
1353 }
1354
1355 if (module_name) {
1356 return module_name;
1357 }
1358 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1359 PyErr_Clear();
1360 else
1361 return NULL;
1362
1363 search:
1364 modules_dict = PySys_GetObject("modules");
1365 if (modules_dict == NULL)
1366 return NULL;
1367
1368 i = 0;
1369 module_name = NULL;
1370 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
Mark Dickinson211c6252009-02-01 10:28:51 +00001371 if (PyObject_RichCompareBool(module_name, main_str, Py_EQ) == 1)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001372 continue;
1373
1374 obj = PyObject_GetAttr(module, global_name);
1375 if (obj == NULL) {
1376 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1377 PyErr_Clear();
1378 else
1379 return NULL;
1380 continue;
1381 }
1382
1383 if (obj != global) {
1384 Py_DECREF(obj);
1385 continue;
1386 }
1387
1388 Py_DECREF(obj);
1389 break;
1390 }
1391
1392 /* If no module is found, use __main__. */
1393 if (!j) {
1394 module_name = main_str;
1395 }
1396
1397 Py_INCREF(module_name);
1398 return module_name;
1399}
1400
1401/* fast_save_enter() and fast_save_leave() are guards against recursive
1402 objects when Pickler is used with the "fast mode" (i.e., with object
1403 memoization disabled). If the nesting of a list or dict object exceed
1404 FAST_NESTING_LIMIT, these guards will start keeping an internal
1405 reference to the seen list or dict objects and check whether these objects
1406 are recursive. These are not strictly necessary, since save() has a
1407 hard-coded recursion limit, but they give a nicer error message than the
1408 typical RuntimeError. */
1409static int
1410fast_save_enter(PicklerObject *self, PyObject *obj)
1411{
1412 /* if fast_nesting < 0, we're doing an error exit. */
1413 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1414 PyObject *key = NULL;
1415 if (self->fast_memo == NULL) {
1416 self->fast_memo = PyDict_New();
1417 if (self->fast_memo == NULL) {
1418 self->fast_nesting = -1;
1419 return 0;
1420 }
1421 }
1422 key = PyLong_FromVoidPtr(obj);
1423 if (key == NULL)
1424 return 0;
1425 if (PyDict_GetItem(self->fast_memo, key)) {
1426 Py_DECREF(key);
1427 PyErr_Format(PyExc_ValueError,
1428 "fast mode: can't pickle cyclic objects "
1429 "including object type %.200s at %p",
1430 obj->ob_type->tp_name, obj);
1431 self->fast_nesting = -1;
1432 return 0;
1433 }
1434 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
1435 Py_DECREF(key);
1436 self->fast_nesting = -1;
1437 return 0;
1438 }
1439 Py_DECREF(key);
1440 }
1441 return 1;
1442}
1443
1444static int
1445fast_save_leave(PicklerObject *self, PyObject *obj)
1446{
1447 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
1448 PyObject *key = PyLong_FromVoidPtr(obj);
1449 if (key == NULL)
1450 return 0;
1451 if (PyDict_DelItem(self->fast_memo, key) < 0) {
1452 Py_DECREF(key);
1453 return 0;
1454 }
1455 Py_DECREF(key);
1456 }
1457 return 1;
1458}
1459
1460static int
1461save_none(PicklerObject *self, PyObject *obj)
1462{
1463 const char none_op = NONE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001464 if (_Pickler_Write(self, &none_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001465 return -1;
1466
1467 return 0;
1468}
1469
1470static int
1471save_bool(PicklerObject *self, PyObject *obj)
1472{
1473 static const char *buf[2] = { FALSE, TRUE };
1474 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
1475 int p = (obj == Py_True);
1476
1477 if (self->proto >= 2) {
1478 const char bool_op = p ? NEWTRUE : NEWFALSE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001479 if (_Pickler_Write(self, &bool_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001480 return -1;
1481 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001482 else if (_Pickler_Write(self, buf[p], len[p]) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001483 return -1;
1484
1485 return 0;
1486}
1487
1488static int
1489save_int(PicklerObject *self, long x)
1490{
1491 char pdata[32];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001492 Py_ssize_t len = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001493
1494 if (!self->bin
1495#if SIZEOF_LONG > 4
1496 || x > 0x7fffffffL || x < -0x80000000L
1497#endif
1498 ) {
1499 /* Text-mode pickle, or long too big to fit in the 4-byte
1500 * signed BININT format: store as a string.
1501 */
Mark Dickinson8dd05142009-01-20 20:43:58 +00001502 pdata[0] = LONG; /* use LONG for consistency with pickle.py */
1503 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ldL\n", x);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001504 if (_Pickler_Write(self, pdata, strlen(pdata)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001505 return -1;
1506 }
1507 else {
1508 /* Binary pickle and x fits in a signed 4-byte int. */
1509 pdata[1] = (unsigned char)(x & 0xff);
1510 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1511 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1512 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1513
1514 if ((pdata[4] == 0) && (pdata[3] == 0)) {
1515 if (pdata[2] == 0) {
1516 pdata[0] = BININT1;
1517 len = 2;
1518 }
1519 else {
1520 pdata[0] = BININT2;
1521 len = 3;
1522 }
1523 }
1524 else {
1525 pdata[0] = BININT;
1526 len = 5;
1527 }
1528
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001529 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001530 return -1;
1531 }
1532
1533 return 0;
1534}
1535
1536static int
1537save_long(PicklerObject *self, PyObject *obj)
1538{
1539 PyObject *repr = NULL;
1540 Py_ssize_t size;
1541 long val = PyLong_AsLong(obj);
1542 int status = 0;
1543
1544 const char long_op = LONG;
1545
1546 if (val == -1 && PyErr_Occurred()) {
1547 /* out of range for int pickling */
1548 PyErr_Clear();
1549 }
Antoine Pitroue58bffb2011-08-13 20:40:32 +02001550 else
1551#if SIZEOF_LONG > 4
1552 if (val <= 0x7fffffffL && val >= -0x80000000L)
1553#endif
1554 return save_int(self, val);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001555
1556 if (self->proto >= 2) {
1557 /* Linear-time pickling. */
1558 size_t nbits;
1559 size_t nbytes;
1560 unsigned char *pdata;
1561 char header[5];
1562 int i;
1563 int sign = _PyLong_Sign(obj);
1564
1565 if (sign == 0) {
1566 header[0] = LONG1;
1567 header[1] = 0; /* It's 0 -- an empty bytestring. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001568 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001569 goto error;
1570 return 0;
1571 }
1572 nbits = _PyLong_NumBits(obj);
1573 if (nbits == (size_t)-1 && PyErr_Occurred())
1574 goto error;
1575 /* How many bytes do we need? There are nbits >> 3 full
1576 * bytes of data, and nbits & 7 leftover bits. If there
1577 * are any leftover bits, then we clearly need another
1578 * byte. Wnat's not so obvious is that we *probably*
1579 * need another byte even if there aren't any leftovers:
1580 * the most-significant bit of the most-significant byte
1581 * acts like a sign bit, and it's usually got a sense
1582 * opposite of the one we need. The exception is longs
1583 * of the form -(2**(8*j-1)) for j > 0. Such a long is
1584 * its own 256's-complement, so has the right sign bit
1585 * even without the extra byte. That's a pain to check
1586 * for in advance, though, so we always grab an extra
1587 * byte at the start, and cut it back later if possible.
1588 */
1589 nbytes = (nbits >> 3) + 1;
1590 if (nbytes > INT_MAX) {
1591 PyErr_SetString(PyExc_OverflowError,
1592 "long too large to pickle");
1593 goto error;
1594 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001595 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001596 if (repr == NULL)
1597 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001598 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001599 i = _PyLong_AsByteArray((PyLongObject *)obj,
1600 pdata, nbytes,
1601 1 /* little endian */ , 1 /* signed */ );
1602 if (i < 0)
1603 goto error;
1604 /* If the long is negative, this may be a byte more than
1605 * needed. This is so iff the MSB is all redundant sign
1606 * bits.
1607 */
1608 if (sign < 0 &&
Victor Stinner121aab42011-09-29 23:40:53 +02001609 nbytes > 1 &&
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001610 pdata[nbytes - 1] == 0xff &&
1611 (pdata[nbytes - 2] & 0x80) != 0) {
1612 nbytes--;
1613 }
1614
1615 if (nbytes < 256) {
1616 header[0] = LONG1;
1617 header[1] = (unsigned char)nbytes;
1618 size = 2;
1619 }
1620 else {
1621 header[0] = LONG4;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001622 size = (Py_ssize_t) nbytes;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001623 for (i = 1; i < 5; i++) {
1624 header[i] = (unsigned char)(size & 0xff);
1625 size >>= 8;
1626 }
1627 size = 5;
1628 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001629 if (_Pickler_Write(self, header, size) < 0 ||
1630 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001631 goto error;
1632 }
1633 else {
1634 char *string;
1635
Mark Dickinson8dd05142009-01-20 20:43:58 +00001636 /* proto < 2: write the repr and newline. This is quadratic-time (in
1637 the number of digits), in both directions. We add a trailing 'L'
1638 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001639
1640 repr = PyObject_Repr(obj);
1641 if (repr == NULL)
1642 goto error;
1643
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001644 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001645 if (string == NULL)
1646 goto error;
1647
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001648 if (_Pickler_Write(self, &long_op, 1) < 0 ||
1649 _Pickler_Write(self, string, size) < 0 ||
1650 _Pickler_Write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001651 goto error;
1652 }
1653
1654 if (0) {
1655 error:
1656 status = -1;
1657 }
1658 Py_XDECREF(repr);
1659
1660 return status;
1661}
1662
1663static int
1664save_float(PicklerObject *self, PyObject *obj)
1665{
1666 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1667
1668 if (self->bin) {
1669 char pdata[9];
1670 pdata[0] = BINFLOAT;
1671 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1672 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001673 if (_Pickler_Write(self, pdata, 9) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001674 return -1;
Victor Stinner121aab42011-09-29 23:40:53 +02001675 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001676 else {
Eric Smith0923d1d2009-04-16 20:16:10 +00001677 int result = -1;
1678 char *buf = NULL;
1679 char op = FLOAT;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001680
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001681 if (_Pickler_Write(self, &op, 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001682 goto done;
1683
Mark Dickinson3e09f432009-04-17 08:41:23 +00001684 buf = PyOS_double_to_string(x, 'g', 17, 0, NULL);
Eric Smith0923d1d2009-04-16 20:16:10 +00001685 if (!buf) {
1686 PyErr_NoMemory();
1687 goto done;
1688 }
1689
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001690 if (_Pickler_Write(self, buf, strlen(buf)) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001691 goto done;
1692
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001693 if (_Pickler_Write(self, "\n", 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001694 goto done;
1695
1696 result = 0;
1697done:
1698 PyMem_Free(buf);
1699 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001700 }
1701
1702 return 0;
1703}
1704
1705static int
1706save_bytes(PicklerObject *self, PyObject *obj)
1707{
1708 if (self->proto < 3) {
1709 /* Older pickle protocols do not have an opcode for pickling bytes
1710 objects. Therefore, we need to fake the copy protocol (i.e.,
1711 the __reduce__ method) to permit bytes object unpickling. */
1712 PyObject *reduce_value = NULL;
1713 PyObject *bytelist = NULL;
1714 int status;
1715
1716 bytelist = PySequence_List(obj);
1717 if (bytelist == NULL)
1718 return -1;
1719
1720 reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
1721 bytelist);
1722 if (reduce_value == NULL) {
1723 Py_DECREF(bytelist);
1724 return -1;
1725 }
1726
1727 /* save_reduce() will memoize the object automatically. */
1728 status = save_reduce(self, reduce_value, obj);
1729 Py_DECREF(reduce_value);
1730 Py_DECREF(bytelist);
1731 return status;
1732 }
1733 else {
1734 Py_ssize_t size;
1735 char header[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001736 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001737
1738 size = PyBytes_Size(obj);
1739 if (size < 0)
1740 return -1;
1741
1742 if (size < 256) {
1743 header[0] = SHORT_BINBYTES;
1744 header[1] = (unsigned char)size;
1745 len = 2;
1746 }
1747 else if (size <= 0xffffffffL) {
1748 header[0] = BINBYTES;
1749 header[1] = (unsigned char)(size & 0xff);
1750 header[2] = (unsigned char)((size >> 8) & 0xff);
1751 header[3] = (unsigned char)((size >> 16) & 0xff);
1752 header[4] = (unsigned char)((size >> 24) & 0xff);
1753 len = 5;
1754 }
1755 else {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001756 PyErr_SetString(PyExc_OverflowError,
1757 "cannot serialize a bytes object larger than 4GB");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001758 return -1; /* string too large */
1759 }
1760
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001761 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001762 return -1;
1763
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001764 if (_Pickler_Write(self, PyBytes_AS_STRING(obj), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001765 return -1;
1766
1767 if (memo_put(self, obj) < 0)
1768 return -1;
1769
1770 return 0;
1771 }
1772}
1773
1774/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1775 backslash and newline characters to \uXXXX escapes. */
1776static PyObject *
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001777raw_unicode_escape(PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001778{
1779 PyObject *repr, *result;
1780 char *p;
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001781 Py_ssize_t i, size, expandsize;
1782 void *data;
1783 unsigned int kind;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001784
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001785 if (PyUnicode_READY(obj))
1786 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001787
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001788 size = PyUnicode_GET_LENGTH(obj);
1789 data = PyUnicode_DATA(obj);
1790 kind = PyUnicode_KIND(obj);
1791 if (kind == PyUnicode_4BYTE_KIND)
1792 expandsize = 10;
1793 else
1794 expandsize = 6;
Victor Stinner121aab42011-09-29 23:40:53 +02001795
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001796 if (size > PY_SSIZE_T_MAX / expandsize)
1797 return PyErr_NoMemory();
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001798 repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001799 if (repr == NULL)
1800 return NULL;
1801 if (size == 0)
1802 goto done;
1803
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001804 p = PyByteArray_AS_STRING(repr);
1805 for (i=0; i < size; i++) {
1806 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001807 /* Map 32-bit characters to '\Uxxxxxxxx' */
1808 if (ch >= 0x10000) {
1809 *p++ = '\\';
1810 *p++ = 'U';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001811 *p++ = Py_hexdigits[(ch >> 28) & 0xf];
1812 *p++ = Py_hexdigits[(ch >> 24) & 0xf];
1813 *p++ = Py_hexdigits[(ch >> 20) & 0xf];
1814 *p++ = Py_hexdigits[(ch >> 16) & 0xf];
1815 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
1816 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
1817 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
1818 *p++ = Py_hexdigits[ch & 15];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001819 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001820 /* Map 16-bit characters to '\uxxxx' */
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001821 else if (ch >= 256 || ch == '\\' || ch == '\n') {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001822 *p++ = '\\';
1823 *p++ = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001824 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
1825 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
1826 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
1827 *p++ = Py_hexdigits[ch & 15];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001828 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001829 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001830 else
1831 *p++ = (char) ch;
1832 }
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001833 size = p - PyByteArray_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001834
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001835done:
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001836 result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001837 Py_DECREF(repr);
1838 return result;
1839}
1840
1841static int
1842save_unicode(PicklerObject *self, PyObject *obj)
1843{
1844 Py_ssize_t size;
1845 PyObject *encoded = NULL;
1846
1847 if (self->bin) {
1848 char pdata[5];
1849
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001850 encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001851 if (encoded == NULL)
1852 goto error;
1853
1854 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001855 if (size > 0xffffffffL) {
1856 PyErr_SetString(PyExc_OverflowError,
1857 "cannot serialize a string larger than 4GB");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001858 goto error; /* string too large */
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001859 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001860
1861 pdata[0] = BINUNICODE;
1862 pdata[1] = (unsigned char)(size & 0xff);
1863 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1864 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1865 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1866
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001867 if (_Pickler_Write(self, pdata, 5) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001868 goto error;
1869
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001870 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001871 goto error;
1872 }
1873 else {
1874 const char unicode_op = UNICODE;
1875
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001876 encoded = raw_unicode_escape(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001877 if (encoded == NULL)
1878 goto error;
1879
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001880 if (_Pickler_Write(self, &unicode_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001881 goto error;
1882
1883 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001884 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001885 goto error;
1886
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001887 if (_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001888 goto error;
1889 }
1890 if (memo_put(self, obj) < 0)
1891 goto error;
1892
1893 Py_DECREF(encoded);
1894 return 0;
1895
1896 error:
1897 Py_XDECREF(encoded);
1898 return -1;
1899}
1900
1901/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1902static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001903store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001904{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001905 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001906
1907 assert(PyTuple_Size(t) == len);
1908
1909 for (i = 0; i < len; i++) {
1910 PyObject *element = PyTuple_GET_ITEM(t, i);
1911
1912 if (element == NULL)
1913 return -1;
1914 if (save(self, element, 0) < 0)
1915 return -1;
1916 }
1917
1918 return 0;
1919}
1920
1921/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1922 * used across protocols to minimize the space needed to pickle them.
1923 * Tuples are also the only builtin immutable type that can be recursive
1924 * (a tuple can be reached from itself), and that requires some subtle
1925 * magic so that it works in all cases. IOW, this is a long routine.
1926 */
1927static int
1928save_tuple(PicklerObject *self, PyObject *obj)
1929{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001930 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001931
1932 const char mark_op = MARK;
1933 const char tuple_op = TUPLE;
1934 const char pop_op = POP;
1935 const char pop_mark_op = POP_MARK;
1936 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
1937
1938 if ((len = PyTuple_Size(obj)) < 0)
1939 return -1;
1940
1941 if (len == 0) {
1942 char pdata[2];
1943
1944 if (self->proto) {
1945 pdata[0] = EMPTY_TUPLE;
1946 len = 1;
1947 }
1948 else {
1949 pdata[0] = MARK;
1950 pdata[1] = TUPLE;
1951 len = 2;
1952 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001953 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001954 return -1;
1955 return 0;
1956 }
1957
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001958 /* The tuple isn't in the memo now. If it shows up there after
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001959 * saving the tuple elements, the tuple must be recursive, in
1960 * which case we'll pop everything we put on the stack, and fetch
1961 * its value from the memo.
1962 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001963 if (len <= 3 && self->proto >= 2) {
1964 /* Use TUPLE{1,2,3} opcodes. */
1965 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001966 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001967
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001968 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001969 /* pop the len elements */
1970 for (i = 0; i < len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001971 if (_Pickler_Write(self, &pop_op, 1) < 0)
1972 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001973 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001974 if (memo_get(self, obj) < 0)
1975 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001976
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001977 return 0;
1978 }
1979 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001980 if (_Pickler_Write(self, len2opcode + len, 1) < 0)
1981 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001982 }
1983 goto memoize;
1984 }
1985
1986 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
1987 * Generate MARK e1 e2 ... TUPLE
1988 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001989 if (_Pickler_Write(self, &mark_op, 1) < 0)
1990 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001991
1992 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001993 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001994
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001995 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001996 /* pop the stack stuff we pushed */
1997 if (self->bin) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001998 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
1999 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002000 }
2001 else {
2002 /* Note that we pop one more than len, to remove
2003 * the MARK too.
2004 */
2005 for (i = 0; i <= len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002006 if (_Pickler_Write(self, &pop_op, 1) < 0)
2007 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002008 }
2009 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002010 if (memo_get(self, obj) < 0)
2011 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002012
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002013 return 0;
2014 }
2015 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002016 if (_Pickler_Write(self, &tuple_op, 1) < 0)
2017 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002018 }
2019
2020 memoize:
2021 if (memo_put(self, obj) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002022 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002023
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002024 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002025}
2026
2027/* iter is an iterator giving items, and we batch up chunks of
2028 * MARK item item ... item APPENDS
2029 * opcode sequences. Calling code should have arranged to first create an
2030 * empty list, or list-like object, for the APPENDS to operate on.
2031 * Returns 0 on success, <0 on error.
2032 */
2033static int
2034batch_list(PicklerObject *self, PyObject *iter)
2035{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002036 PyObject *obj = NULL;
2037 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002038 int i, n;
2039
2040 const char mark_op = MARK;
2041 const char append_op = APPEND;
2042 const char appends_op = APPENDS;
2043
2044 assert(iter != NULL);
2045
2046 /* XXX: I think this function could be made faster by avoiding the
2047 iterator interface and fetching objects directly from list using
2048 PyList_GET_ITEM.
2049 */
2050
2051 if (self->proto == 0) {
2052 /* APPENDS isn't available; do one at a time. */
2053 for (;;) {
2054 obj = PyIter_Next(iter);
2055 if (obj == NULL) {
2056 if (PyErr_Occurred())
2057 return -1;
2058 break;
2059 }
2060 i = save(self, obj, 0);
2061 Py_DECREF(obj);
2062 if (i < 0)
2063 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002064 if (_Pickler_Write(self, &append_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002065 return -1;
2066 }
2067 return 0;
2068 }
2069
2070 /* proto > 0: write in batches of BATCHSIZE. */
2071 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002072 /* Get first item */
2073 firstitem = PyIter_Next(iter);
2074 if (firstitem == NULL) {
2075 if (PyErr_Occurred())
2076 goto error;
2077
2078 /* nothing more to add */
2079 break;
2080 }
2081
2082 /* Try to get a second item */
2083 obj = PyIter_Next(iter);
2084 if (obj == NULL) {
2085 if (PyErr_Occurred())
2086 goto error;
2087
2088 /* Only one item to write */
2089 if (save(self, firstitem, 0) < 0)
2090 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002091 if (_Pickler_Write(self, &append_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002092 goto error;
2093 Py_CLEAR(firstitem);
2094 break;
2095 }
2096
2097 /* More than one item to write */
2098
2099 /* Pump out MARK, items, APPENDS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002100 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002101 goto error;
2102
2103 if (save(self, firstitem, 0) < 0)
2104 goto error;
2105 Py_CLEAR(firstitem);
2106 n = 1;
2107
2108 /* Fetch and save up to BATCHSIZE items */
2109 while (obj) {
2110 if (save(self, obj, 0) < 0)
2111 goto error;
2112 Py_CLEAR(obj);
2113 n += 1;
2114
2115 if (n == BATCHSIZE)
2116 break;
2117
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002118 obj = PyIter_Next(iter);
2119 if (obj == NULL) {
2120 if (PyErr_Occurred())
2121 goto error;
2122 break;
2123 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002124 }
2125
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002126 if (_Pickler_Write(self, &appends_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002127 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002128
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002129 } while (n == BATCHSIZE);
2130 return 0;
2131
2132 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002133 Py_XDECREF(firstitem);
2134 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002135 return -1;
2136}
2137
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002138/* This is a variant of batch_list() above, specialized for lists (with no
2139 * support for list subclasses). Like batch_list(), we batch up chunks of
2140 * MARK item item ... item APPENDS
2141 * opcode sequences. Calling code should have arranged to first create an
2142 * empty list, or list-like object, for the APPENDS to operate on.
2143 * Returns 0 on success, -1 on error.
2144 *
2145 * This version is considerably faster than batch_list(), if less general.
2146 *
2147 * Note that this only works for protocols > 0.
2148 */
2149static int
2150batch_list_exact(PicklerObject *self, PyObject *obj)
2151{
2152 PyObject *item = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002153 Py_ssize_t this_batch, total;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002154
2155 const char append_op = APPEND;
2156 const char appends_op = APPENDS;
2157 const char mark_op = MARK;
2158
2159 assert(obj != NULL);
2160 assert(self->proto > 0);
2161 assert(PyList_CheckExact(obj));
2162
2163 if (PyList_GET_SIZE(obj) == 1) {
2164 item = PyList_GET_ITEM(obj, 0);
2165 if (save(self, item, 0) < 0)
2166 return -1;
2167 if (_Pickler_Write(self, &append_op, 1) < 0)
2168 return -1;
2169 return 0;
2170 }
2171
2172 /* Write in batches of BATCHSIZE. */
2173 total = 0;
2174 do {
2175 this_batch = 0;
2176 if (_Pickler_Write(self, &mark_op, 1) < 0)
2177 return -1;
2178 while (total < PyList_GET_SIZE(obj)) {
2179 item = PyList_GET_ITEM(obj, total);
2180 if (save(self, item, 0) < 0)
2181 return -1;
2182 total++;
2183 if (++this_batch == BATCHSIZE)
2184 break;
2185 }
2186 if (_Pickler_Write(self, &appends_op, 1) < 0)
2187 return -1;
2188
2189 } while (total < PyList_GET_SIZE(obj));
2190
2191 return 0;
2192}
2193
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002194static int
2195save_list(PicklerObject *self, PyObject *obj)
2196{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002197 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002198 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002199 int status = 0;
2200
2201 if (self->fast && !fast_save_enter(self, obj))
2202 goto error;
2203
2204 /* Create an empty list. */
2205 if (self->bin) {
2206 header[0] = EMPTY_LIST;
2207 len = 1;
2208 }
2209 else {
2210 header[0] = MARK;
2211 header[1] = LIST;
2212 len = 2;
2213 }
2214
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002215 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002216 goto error;
2217
2218 /* Get list length, and bow out early if empty. */
2219 if ((len = PyList_Size(obj)) < 0)
2220 goto error;
2221
2222 if (memo_put(self, obj) < 0)
2223 goto error;
2224
2225 if (len != 0) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002226 /* Materialize the list elements. */
2227 if (PyList_CheckExact(obj) && self->proto > 0) {
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002228 if (Py_EnterRecursiveCall(" while pickling an object"))
2229 goto error;
2230 status = batch_list_exact(self, obj);
2231 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002232 } else {
2233 PyObject *iter = PyObject_GetIter(obj);
2234 if (iter == NULL)
2235 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002236
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002237 if (Py_EnterRecursiveCall(" while pickling an object")) {
2238 Py_DECREF(iter);
2239 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002240 }
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002241 status = batch_list(self, iter);
2242 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002243 Py_DECREF(iter);
2244 }
2245 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002246 if (0) {
2247 error:
2248 status = -1;
2249 }
2250
2251 if (self->fast && !fast_save_leave(self, obj))
2252 status = -1;
2253
2254 return status;
2255}
2256
2257/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
2258 * MARK key value ... key value SETITEMS
2259 * opcode sequences. Calling code should have arranged to first create an
2260 * empty dict, or dict-like object, for the SETITEMS to operate on.
2261 * Returns 0 on success, <0 on error.
2262 *
2263 * This is very much like batch_list(). The difference between saving
2264 * elements directly, and picking apart two-tuples, is so long-winded at
2265 * the C level, though, that attempts to combine these routines were too
2266 * ugly to bear.
2267 */
2268static int
2269batch_dict(PicklerObject *self, PyObject *iter)
2270{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002271 PyObject *obj = NULL;
2272 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002273 int i, n;
2274
2275 const char mark_op = MARK;
2276 const char setitem_op = SETITEM;
2277 const char setitems_op = SETITEMS;
2278
2279 assert(iter != NULL);
2280
2281 if (self->proto == 0) {
2282 /* SETITEMS isn't available; do one at a time. */
2283 for (;;) {
2284 obj = PyIter_Next(iter);
2285 if (obj == NULL) {
2286 if (PyErr_Occurred())
2287 return -1;
2288 break;
2289 }
2290 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2291 PyErr_SetString(PyExc_TypeError, "dict items "
2292 "iterator must return 2-tuples");
2293 return -1;
2294 }
2295 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
2296 if (i >= 0)
2297 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
2298 Py_DECREF(obj);
2299 if (i < 0)
2300 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002301 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002302 return -1;
2303 }
2304 return 0;
2305 }
2306
2307 /* proto > 0: write in batches of BATCHSIZE. */
2308 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002309 /* Get first item */
2310 firstitem = PyIter_Next(iter);
2311 if (firstitem == NULL) {
2312 if (PyErr_Occurred())
2313 goto error;
2314
2315 /* nothing more to add */
2316 break;
2317 }
2318 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
2319 PyErr_SetString(PyExc_TypeError, "dict items "
2320 "iterator must return 2-tuples");
2321 goto error;
2322 }
2323
2324 /* Try to get a second item */
2325 obj = PyIter_Next(iter);
2326 if (obj == NULL) {
2327 if (PyErr_Occurred())
2328 goto error;
2329
2330 /* Only one item to write */
2331 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2332 goto error;
2333 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2334 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002335 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002336 goto error;
2337 Py_CLEAR(firstitem);
2338 break;
2339 }
2340
2341 /* More than one item to write */
2342
2343 /* Pump out MARK, items, SETITEMS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002344 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002345 goto error;
2346
2347 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2348 goto error;
2349 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2350 goto error;
2351 Py_CLEAR(firstitem);
2352 n = 1;
2353
2354 /* Fetch and save up to BATCHSIZE items */
2355 while (obj) {
2356 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2357 PyErr_SetString(PyExc_TypeError, "dict items "
2358 "iterator must return 2-tuples");
2359 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002360 }
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002361 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
2362 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
2363 goto error;
2364 Py_CLEAR(obj);
2365 n += 1;
2366
2367 if (n == BATCHSIZE)
2368 break;
2369
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002370 obj = PyIter_Next(iter);
2371 if (obj == NULL) {
2372 if (PyErr_Occurred())
2373 goto error;
2374 break;
2375 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002376 }
2377
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002378 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002379 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002380
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002381 } while (n == BATCHSIZE);
2382 return 0;
2383
2384 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002385 Py_XDECREF(firstitem);
2386 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002387 return -1;
2388}
2389
Collin Winter5c9b02d2009-05-25 05:43:30 +00002390/* This is a variant of batch_dict() above that specializes for dicts, with no
2391 * support for dict subclasses. Like batch_dict(), we batch up chunks of
2392 * MARK key value ... key value SETITEMS
2393 * opcode sequences. Calling code should have arranged to first create an
2394 * empty dict, or dict-like object, for the SETITEMS to operate on.
2395 * Returns 0 on success, -1 on error.
2396 *
2397 * Note that this currently doesn't work for protocol 0.
2398 */
2399static int
2400batch_dict_exact(PicklerObject *self, PyObject *obj)
2401{
2402 PyObject *key = NULL, *value = NULL;
2403 int i;
2404 Py_ssize_t dict_size, ppos = 0;
2405
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00002406 const char mark_op = MARK;
2407 const char setitem_op = SETITEM;
2408 const char setitems_op = SETITEMS;
Collin Winter5c9b02d2009-05-25 05:43:30 +00002409
2410 assert(obj != NULL);
2411 assert(self->proto > 0);
2412
2413 dict_size = PyDict_Size(obj);
2414
2415 /* Special-case len(d) == 1 to save space. */
2416 if (dict_size == 1) {
2417 PyDict_Next(obj, &ppos, &key, &value);
2418 if (save(self, key, 0) < 0)
2419 return -1;
2420 if (save(self, value, 0) < 0)
2421 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002422 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002423 return -1;
2424 return 0;
2425 }
2426
2427 /* Write in batches of BATCHSIZE. */
2428 do {
2429 i = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002430 if (_Pickler_Write(self, &mark_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002431 return -1;
2432 while (PyDict_Next(obj, &ppos, &key, &value)) {
2433 if (save(self, key, 0) < 0)
2434 return -1;
2435 if (save(self, value, 0) < 0)
2436 return -1;
2437 if (++i == BATCHSIZE)
2438 break;
2439 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002440 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002441 return -1;
2442 if (PyDict_Size(obj) != dict_size) {
2443 PyErr_Format(
2444 PyExc_RuntimeError,
2445 "dictionary changed size during iteration");
2446 return -1;
2447 }
2448
2449 } while (i == BATCHSIZE);
2450 return 0;
2451}
2452
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002453static int
2454save_dict(PicklerObject *self, PyObject *obj)
2455{
2456 PyObject *items, *iter;
2457 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002458 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002459 int status = 0;
2460
2461 if (self->fast && !fast_save_enter(self, obj))
2462 goto error;
2463
2464 /* Create an empty dict. */
2465 if (self->bin) {
2466 header[0] = EMPTY_DICT;
2467 len = 1;
2468 }
2469 else {
2470 header[0] = MARK;
2471 header[1] = DICT;
2472 len = 2;
2473 }
2474
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002475 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002476 goto error;
2477
2478 /* Get dict size, and bow out early if empty. */
2479 if ((len = PyDict_Size(obj)) < 0)
2480 goto error;
2481
2482 if (memo_put(self, obj) < 0)
2483 goto error;
2484
2485 if (len != 0) {
2486 /* Save the dict items. */
Collin Winter5c9b02d2009-05-25 05:43:30 +00002487 if (PyDict_CheckExact(obj) && self->proto > 0) {
2488 /* We can take certain shortcuts if we know this is a dict and
2489 not a dict subclass. */
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002490 if (Py_EnterRecursiveCall(" while pickling an object"))
2491 goto error;
2492 status = batch_dict_exact(self, obj);
2493 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002494 } else {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02002495 _Py_IDENTIFIER(items);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002496
2497 items = _PyObject_CallMethodId(obj, &PyId_items, "()");
Collin Winter5c9b02d2009-05-25 05:43:30 +00002498 if (items == NULL)
2499 goto error;
2500 iter = PyObject_GetIter(items);
2501 Py_DECREF(items);
2502 if (iter == NULL)
2503 goto error;
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002504 if (Py_EnterRecursiveCall(" while pickling an object")) {
2505 Py_DECREF(iter);
2506 goto error;
2507 }
Collin Winter5c9b02d2009-05-25 05:43:30 +00002508 status = batch_dict(self, iter);
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002509 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002510 Py_DECREF(iter);
2511 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002512 }
2513
2514 if (0) {
2515 error:
2516 status = -1;
2517 }
2518
2519 if (self->fast && !fast_save_leave(self, obj))
2520 status = -1;
2521
2522 return status;
2523}
2524
2525static int
2526save_global(PicklerObject *self, PyObject *obj, PyObject *name)
2527{
2528 static PyObject *name_str = NULL;
2529 PyObject *global_name = NULL;
2530 PyObject *module_name = NULL;
2531 PyObject *module = NULL;
2532 PyObject *cls;
2533 int status = 0;
2534
2535 const char global_op = GLOBAL;
2536
2537 if (name_str == NULL) {
2538 name_str = PyUnicode_InternFromString("__name__");
2539 if (name_str == NULL)
2540 goto error;
2541 }
2542
2543 if (name) {
2544 global_name = name;
2545 Py_INCREF(global_name);
2546 }
2547 else {
2548 global_name = PyObject_GetAttr(obj, name_str);
2549 if (global_name == NULL)
2550 goto error;
2551 }
2552
2553 module_name = whichmodule(obj, global_name);
2554 if (module_name == NULL)
2555 goto error;
2556
2557 /* XXX: Change to use the import C API directly with level=0 to disallow
2558 relative imports.
2559
2560 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
2561 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
2562 custom import functions (IMHO, this would be a nice security
2563 feature). The import C API would need to be extended to support the
2564 extra parameters of __import__ to fix that. */
2565 module = PyImport_Import(module_name);
2566 if (module == NULL) {
2567 PyErr_Format(PicklingError,
2568 "Can't pickle %R: import of module %R failed",
2569 obj, module_name);
2570 goto error;
2571 }
2572 cls = PyObject_GetAttr(module, global_name);
2573 if (cls == NULL) {
2574 PyErr_Format(PicklingError,
2575 "Can't pickle %R: attribute lookup %S.%S failed",
2576 obj, module_name, global_name);
2577 goto error;
2578 }
2579 if (cls != obj) {
2580 Py_DECREF(cls);
2581 PyErr_Format(PicklingError,
2582 "Can't pickle %R: it's not the same object as %S.%S",
2583 obj, module_name, global_name);
2584 goto error;
2585 }
2586 Py_DECREF(cls);
2587
2588 if (self->proto >= 2) {
2589 /* See whether this is in the extension registry, and if
2590 * so generate an EXT opcode.
2591 */
2592 PyObject *code_obj; /* extension code as Python object */
2593 long code; /* extension code as C value */
2594 char pdata[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002595 Py_ssize_t n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002596
2597 PyTuple_SET_ITEM(two_tuple, 0, module_name);
2598 PyTuple_SET_ITEM(two_tuple, 1, global_name);
2599 code_obj = PyDict_GetItem(extension_registry, two_tuple);
2600 /* The object is not registered in the extension registry.
2601 This is the most likely code path. */
2602 if (code_obj == NULL)
2603 goto gen_global;
2604
2605 /* XXX: pickle.py doesn't check neither the type, nor the range
2606 of the value returned by the extension_registry. It should for
2607 consistency. */
2608
2609 /* Verify code_obj has the right type and value. */
2610 if (!PyLong_Check(code_obj)) {
2611 PyErr_Format(PicklingError,
2612 "Can't pickle %R: extension code %R isn't an integer",
2613 obj, code_obj);
2614 goto error;
2615 }
2616 code = PyLong_AS_LONG(code_obj);
2617 if (code <= 0 || code > 0x7fffffffL) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002618 if (!PyErr_Occurred())
2619 PyErr_Format(PicklingError,
2620 "Can't pickle %R: extension code %ld is out of range",
2621 obj, code);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002622 goto error;
2623 }
2624
2625 /* Generate an EXT opcode. */
2626 if (code <= 0xff) {
2627 pdata[0] = EXT1;
2628 pdata[1] = (unsigned char)code;
2629 n = 2;
2630 }
2631 else if (code <= 0xffff) {
2632 pdata[0] = EXT2;
2633 pdata[1] = (unsigned char)(code & 0xff);
2634 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2635 n = 3;
2636 }
2637 else {
2638 pdata[0] = EXT4;
2639 pdata[1] = (unsigned char)(code & 0xff);
2640 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2641 pdata[3] = (unsigned char)((code >> 16) & 0xff);
2642 pdata[4] = (unsigned char)((code >> 24) & 0xff);
2643 n = 5;
2644 }
2645
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002646 if (_Pickler_Write(self, pdata, n) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002647 goto error;
2648 }
2649 else {
2650 /* Generate a normal global opcode if we are using a pickle
2651 protocol <= 2, or if the object is not registered in the
2652 extension registry. */
2653 PyObject *encoded;
2654 PyObject *(*unicode_encoder)(PyObject *);
2655
2656 gen_global:
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002657 if (_Pickler_Write(self, &global_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002658 goto error;
2659
2660 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
2661 the module name and the global name using UTF-8. We do so only when
2662 we are using the pickle protocol newer than version 3. This is to
2663 ensure compatibility with older Unpickler running on Python 2.x. */
2664 if (self->proto >= 3) {
2665 unicode_encoder = PyUnicode_AsUTF8String;
2666 }
2667 else {
2668 unicode_encoder = PyUnicode_AsASCIIString;
2669 }
2670
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002671 /* For protocol < 3 and if the user didn't request against doing so,
2672 we convert module names to the old 2.x module names. */
2673 if (self->fix_imports) {
2674 PyObject *key;
2675 PyObject *item;
2676
2677 key = PyTuple_Pack(2, module_name, global_name);
2678 if (key == NULL)
2679 goto error;
2680 item = PyDict_GetItemWithError(name_mapping_3to2, key);
2681 Py_DECREF(key);
2682 if (item) {
2683 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
2684 PyErr_Format(PyExc_RuntimeError,
2685 "_compat_pickle.REVERSE_NAME_MAPPING values "
2686 "should be 2-tuples, not %.200s",
2687 Py_TYPE(item)->tp_name);
2688 goto error;
2689 }
2690 Py_CLEAR(module_name);
2691 Py_CLEAR(global_name);
2692 module_name = PyTuple_GET_ITEM(item, 0);
2693 global_name = PyTuple_GET_ITEM(item, 1);
2694 if (!PyUnicode_Check(module_name) ||
2695 !PyUnicode_Check(global_name)) {
2696 PyErr_Format(PyExc_RuntimeError,
2697 "_compat_pickle.REVERSE_NAME_MAPPING values "
2698 "should be pairs of str, not (%.200s, %.200s)",
2699 Py_TYPE(module_name)->tp_name,
2700 Py_TYPE(global_name)->tp_name);
2701 goto error;
2702 }
2703 Py_INCREF(module_name);
2704 Py_INCREF(global_name);
2705 }
2706 else if (PyErr_Occurred()) {
2707 goto error;
2708 }
2709
2710 item = PyDict_GetItemWithError(import_mapping_3to2, module_name);
2711 if (item) {
2712 if (!PyUnicode_Check(item)) {
2713 PyErr_Format(PyExc_RuntimeError,
2714 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
2715 "should be strings, not %.200s",
2716 Py_TYPE(item)->tp_name);
2717 goto error;
2718 }
2719 Py_CLEAR(module_name);
2720 module_name = item;
2721 Py_INCREF(module_name);
2722 }
2723 else if (PyErr_Occurred()) {
2724 goto error;
2725 }
2726 }
2727
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002728 /* Save the name of the module. */
2729 encoded = unicode_encoder(module_name);
2730 if (encoded == NULL) {
2731 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2732 PyErr_Format(PicklingError,
2733 "can't pickle module identifier '%S' using "
2734 "pickle protocol %i", module_name, self->proto);
2735 goto error;
2736 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002737 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002738 PyBytes_GET_SIZE(encoded)) < 0) {
2739 Py_DECREF(encoded);
2740 goto error;
2741 }
2742 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002743 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002744 goto error;
2745
2746 /* Save the name of the module. */
2747 encoded = unicode_encoder(global_name);
2748 if (encoded == NULL) {
2749 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2750 PyErr_Format(PicklingError,
2751 "can't pickle global identifier '%S' using "
2752 "pickle protocol %i", global_name, self->proto);
2753 goto error;
2754 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002755 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002756 PyBytes_GET_SIZE(encoded)) < 0) {
2757 Py_DECREF(encoded);
2758 goto error;
2759 }
2760 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002761 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002762 goto error;
2763
2764 /* Memoize the object. */
2765 if (memo_put(self, obj) < 0)
2766 goto error;
2767 }
2768
2769 if (0) {
2770 error:
2771 status = -1;
2772 }
2773 Py_XDECREF(module_name);
2774 Py_XDECREF(global_name);
2775 Py_XDECREF(module);
2776
2777 return status;
2778}
2779
2780static int
2781save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
2782{
2783 PyObject *pid = NULL;
2784 int status = 0;
2785
2786 const char persid_op = PERSID;
2787 const char binpersid_op = BINPERSID;
2788
2789 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002790 pid = _Pickler_FastCall(self, func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002791 if (pid == NULL)
2792 return -1;
2793
2794 if (pid != Py_None) {
2795 if (self->bin) {
2796 if (save(self, pid, 1) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002797 _Pickler_Write(self, &binpersid_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002798 goto error;
2799 }
2800 else {
2801 PyObject *pid_str = NULL;
2802 char *pid_ascii_bytes;
2803 Py_ssize_t size;
2804
2805 pid_str = PyObject_Str(pid);
2806 if (pid_str == NULL)
2807 goto error;
2808
2809 /* XXX: Should it check whether the persistent id only contains
2810 ASCII characters? And what if the pid contains embedded
2811 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00002812 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002813 Py_DECREF(pid_str);
2814 if (pid_ascii_bytes == NULL)
2815 goto error;
2816
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002817 if (_Pickler_Write(self, &persid_op, 1) < 0 ||
2818 _Pickler_Write(self, pid_ascii_bytes, size) < 0 ||
2819 _Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002820 goto error;
2821 }
2822 status = 1;
2823 }
2824
2825 if (0) {
2826 error:
2827 status = -1;
2828 }
2829 Py_XDECREF(pid);
2830
2831 return status;
2832}
2833
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002834static PyObject *
2835get_class(PyObject *obj)
2836{
2837 PyObject *cls;
2838 static PyObject *str_class;
2839
2840 if (str_class == NULL) {
2841 str_class = PyUnicode_InternFromString("__class__");
2842 if (str_class == NULL)
2843 return NULL;
2844 }
2845 cls = PyObject_GetAttr(obj, str_class);
2846 if (cls == NULL) {
2847 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2848 PyErr_Clear();
2849 cls = (PyObject *) Py_TYPE(obj);
2850 Py_INCREF(cls);
2851 }
2852 }
2853 return cls;
2854}
2855
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002856/* We're saving obj, and args is the 2-thru-5 tuple returned by the
2857 * appropriate __reduce__ method for obj.
2858 */
2859static int
2860save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
2861{
2862 PyObject *callable;
2863 PyObject *argtup;
2864 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002865 PyObject *listitems = Py_None;
2866 PyObject *dictitems = Py_None;
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002867 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002868
2869 int use_newobj = self->proto >= 2;
2870
2871 const char reduce_op = REDUCE;
2872 const char build_op = BUILD;
2873 const char newobj_op = NEWOBJ;
2874
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002875 size = PyTuple_Size(args);
2876 if (size < 2 || size > 5) {
2877 PyErr_SetString(PicklingError, "tuple returned by "
2878 "__reduce__ must contain 2 through 5 elements");
2879 return -1;
2880 }
2881
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002882 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
2883 &callable, &argtup, &state, &listitems, &dictitems))
2884 return -1;
2885
2886 if (!PyCallable_Check(callable)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002887 PyErr_SetString(PicklingError, "first item of the tuple "
2888 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002889 return -1;
2890 }
2891 if (!PyTuple_Check(argtup)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002892 PyErr_SetString(PicklingError, "second item of the tuple "
2893 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002894 return -1;
2895 }
2896
2897 if (state == Py_None)
2898 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002899
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002900 if (listitems == Py_None)
2901 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002902 else if (!PyIter_Check(listitems)) {
2903 PyErr_Format(PicklingError, "Fourth element of tuple"
2904 "returned by __reduce__ must be an iterator, not %s",
2905 Py_TYPE(listitems)->tp_name);
2906 return -1;
2907 }
2908
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002909 if (dictitems == Py_None)
2910 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002911 else if (!PyIter_Check(dictitems)) {
2912 PyErr_Format(PicklingError, "Fifth element of tuple"
2913 "returned by __reduce__ must be an iterator, not %s",
2914 Py_TYPE(dictitems)->tp_name);
2915 return -1;
2916 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002917
2918 /* Protocol 2 special case: if callable's name is __newobj__, use
2919 NEWOBJ. */
2920 if (use_newobj) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002921 static PyObject *newobj_str = NULL, *name_str = NULL;
2922 PyObject *name;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002923
2924 if (newobj_str == NULL) {
2925 newobj_str = PyUnicode_InternFromString("__newobj__");
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002926 name_str = PyUnicode_InternFromString("__name__");
2927 if (newobj_str == NULL || name_str == NULL)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002928 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002929 }
2930
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002931 name = PyObject_GetAttr(callable, name_str);
2932 if (name == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002933 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2934 PyErr_Clear();
2935 else
2936 return -1;
2937 use_newobj = 0;
2938 }
2939 else {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002940 use_newobj = PyUnicode_Check(name) &&
2941 PyUnicode_Compare(name, newobj_str) == 0;
2942 Py_DECREF(name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002943 }
2944 }
2945 if (use_newobj) {
2946 PyObject *cls;
2947 PyObject *newargtup;
2948 PyObject *obj_class;
2949 int p;
2950
2951 /* Sanity checks. */
2952 if (Py_SIZE(argtup) < 1) {
2953 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
2954 return -1;
2955 }
2956
2957 cls = PyTuple_GET_ITEM(argtup, 0);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002958 if (!PyType_Check(cls)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002959 PyErr_SetString(PicklingError, "args[0] from "
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002960 "__newobj__ args is not a type");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002961 return -1;
2962 }
2963
2964 if (obj != NULL) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002965 obj_class = get_class(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002966 p = obj_class != cls; /* true iff a problem */
2967 Py_DECREF(obj_class);
2968 if (p) {
2969 PyErr_SetString(PicklingError, "args[0] from "
2970 "__newobj__ args has the wrong class");
2971 return -1;
2972 }
2973 }
2974 /* XXX: These calls save() are prone to infinite recursion. Imagine
2975 what happen if the value returned by the __reduce__() method of
2976 some extension type contains another object of the same type. Ouch!
2977
2978 Here is a quick example, that I ran into, to illustrate what I
2979 mean:
2980
2981 >>> import pickle, copyreg
2982 >>> copyreg.dispatch_table.pop(complex)
2983 >>> pickle.dumps(1+2j)
2984 Traceback (most recent call last):
2985 ...
2986 RuntimeError: maximum recursion depth exceeded
2987
2988 Removing the complex class from copyreg.dispatch_table made the
2989 __reduce_ex__() method emit another complex object:
2990
2991 >>> (1+1j).__reduce_ex__(2)
2992 (<function __newobj__ at 0xb7b71c3c>,
2993 (<class 'complex'>, (1+1j)), None, None, None)
2994
2995 Thus when save() was called on newargstup (the 2nd item) recursion
2996 ensued. Of course, the bug was in the complex class which had a
2997 broken __getnewargs__() that emitted another complex object. But,
2998 the point, here, is it is quite easy to end up with a broken reduce
2999 function. */
3000
3001 /* Save the class and its __new__ arguments. */
3002 if (save(self, cls, 0) < 0)
3003 return -1;
3004
3005 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
3006 if (newargtup == NULL)
3007 return -1;
3008
3009 p = save(self, newargtup, 0);
3010 Py_DECREF(newargtup);
3011 if (p < 0)
3012 return -1;
3013
3014 /* Add NEWOBJ opcode. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003015 if (_Pickler_Write(self, &newobj_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003016 return -1;
3017 }
3018 else { /* Not using NEWOBJ. */
3019 if (save(self, callable, 0) < 0 ||
3020 save(self, argtup, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003021 _Pickler_Write(self, &reduce_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003022 return -1;
3023 }
3024
3025 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
3026 the caller do not want to memoize the object. Not particularly useful,
3027 but that is to mimic the behavior save_reduce() in pickle.py when
3028 obj is None. */
3029 if (obj && memo_put(self, obj) < 0)
3030 return -1;
3031
3032 if (listitems && batch_list(self, listitems) < 0)
3033 return -1;
3034
3035 if (dictitems && batch_dict(self, dictitems) < 0)
3036 return -1;
3037
3038 if (state) {
Victor Stinner121aab42011-09-29 23:40:53 +02003039 if (save(self, state, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003040 _Pickler_Write(self, &build_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003041 return -1;
3042 }
3043
3044 return 0;
3045}
3046
3047static int
3048save(PicklerObject *self, PyObject *obj, int pers_save)
3049{
3050 PyTypeObject *type;
3051 PyObject *reduce_func = NULL;
3052 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003053 int status = 0;
3054
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003055 if (Py_EnterRecursiveCall(" while pickling an object"))
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003056 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003057
3058 /* The extra pers_save argument is necessary to avoid calling save_pers()
3059 on its returned object. */
3060 if (!pers_save && self->pers_func) {
3061 /* save_pers() returns:
3062 -1 to signal an error;
3063 0 if it did nothing successfully;
3064 1 if a persistent id was saved.
3065 */
3066 if ((status = save_pers(self, obj, self->pers_func)) != 0)
3067 goto done;
3068 }
3069
3070 type = Py_TYPE(obj);
3071
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003072 /* The old cPickle had an optimization that used switch-case statement
3073 dispatching on the first letter of the type name. This has was removed
3074 since benchmarks shown that this optimization was actually slowing
3075 things down. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003076
3077 /* Atom types; these aren't memoized, so don't check the memo. */
3078
3079 if (obj == Py_None) {
3080 status = save_none(self, obj);
3081 goto done;
3082 }
3083 else if (obj == Py_False || obj == Py_True) {
3084 status = save_bool(self, obj);
3085 goto done;
3086 }
3087 else if (type == &PyLong_Type) {
3088 status = save_long(self, obj);
3089 goto done;
3090 }
3091 else if (type == &PyFloat_Type) {
3092 status = save_float(self, obj);
3093 goto done;
3094 }
3095
3096 /* Check the memo to see if it has the object. If so, generate
3097 a GET (or BINGET) opcode, instead of pickling the object
3098 once again. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003099 if (PyMemoTable_Get(self->memo, obj)) {
3100 if (memo_get(self, obj) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003101 goto error;
3102 goto done;
3103 }
3104
3105 if (type == &PyBytes_Type) {
3106 status = save_bytes(self, obj);
3107 goto done;
3108 }
3109 else if (type == &PyUnicode_Type) {
3110 status = save_unicode(self, obj);
3111 goto done;
3112 }
3113 else if (type == &PyDict_Type) {
3114 status = save_dict(self, obj);
3115 goto done;
3116 }
3117 else if (type == &PyList_Type) {
3118 status = save_list(self, obj);
3119 goto done;
3120 }
3121 else if (type == &PyTuple_Type) {
3122 status = save_tuple(self, obj);
3123 goto done;
3124 }
3125 else if (type == &PyType_Type) {
3126 status = save_global(self, obj, NULL);
3127 goto done;
3128 }
3129 else if (type == &PyFunction_Type) {
3130 status = save_global(self, obj, NULL);
3131 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
3132 /* fall back to reduce */
3133 PyErr_Clear();
3134 }
3135 else {
3136 goto done;
3137 }
3138 }
3139 else if (type == &PyCFunction_Type) {
3140 status = save_global(self, obj, NULL);
3141 goto done;
3142 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003143
3144 /* XXX: This part needs some unit tests. */
3145
3146 /* Get a reduction callable, and call it. This may come from
3147 * copyreg.dispatch_table, the object's __reduce_ex__ method,
3148 * or the object's __reduce__ method.
3149 */
3150 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
3151 if (reduce_func != NULL) {
3152 /* Here, the reference count of the reduce_func object returned by
3153 PyDict_GetItem needs to be increased to be consistent with the one
3154 returned by PyObject_GetAttr. This is allow us to blindly DECREF
3155 reduce_func at the end of the save() routine.
3156 */
3157 Py_INCREF(reduce_func);
3158 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003159 reduce_value = _Pickler_FastCall(self, reduce_func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003160 }
Antoine Pitrouffd41d92011-10-04 09:23:04 +02003161 else if (PyType_IsSubtype(type, &PyType_Type)) {
3162 status = save_global(self, obj, NULL);
3163 goto done;
3164 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003165 else {
3166 static PyObject *reduce_str = NULL;
3167 static PyObject *reduce_ex_str = NULL;
3168
3169 /* Cache the name of the reduce methods. */
3170 if (reduce_str == NULL) {
3171 reduce_str = PyUnicode_InternFromString("__reduce__");
3172 if (reduce_str == NULL)
3173 goto error;
3174 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
3175 if (reduce_ex_str == NULL)
3176 goto error;
3177 }
3178
3179 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
3180 automatically defined as __reduce__. While this is convenient, this
3181 make it impossible to know which method was actually called. Of
3182 course, this is not a big deal. But still, it would be nice to let
3183 the user know which method was called when something go
3184 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
3185 don't actually have to check for a __reduce__ method. */
3186
3187 /* Check for a __reduce_ex__ method. */
3188 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
3189 if (reduce_func != NULL) {
3190 PyObject *proto;
3191 proto = PyLong_FromLong(self->proto);
3192 if (proto != NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003193 reduce_value = _Pickler_FastCall(self, reduce_func, proto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003194 }
3195 }
3196 else {
3197 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3198 PyErr_Clear();
3199 else
3200 goto error;
3201 /* Check for a __reduce__ method. */
3202 reduce_func = PyObject_GetAttr(obj, reduce_str);
3203 if (reduce_func != NULL) {
3204 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
3205 }
3206 else {
3207 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
3208 type->tp_name, obj);
3209 goto error;
3210 }
3211 }
3212 }
3213
3214 if (reduce_value == NULL)
3215 goto error;
3216
3217 if (PyUnicode_Check(reduce_value)) {
3218 status = save_global(self, obj, reduce_value);
3219 goto done;
3220 }
3221
3222 if (!PyTuple_Check(reduce_value)) {
3223 PyErr_SetString(PicklingError,
3224 "__reduce__ must return a string or tuple");
3225 goto error;
3226 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003227
3228 status = save_reduce(self, reduce_value, obj);
3229
3230 if (0) {
3231 error:
3232 status = -1;
3233 }
3234 done:
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003235 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003236 Py_XDECREF(reduce_func);
3237 Py_XDECREF(reduce_value);
3238
3239 return status;
3240}
3241
3242static int
3243dump(PicklerObject *self, PyObject *obj)
3244{
3245 const char stop_op = STOP;
3246
3247 if (self->proto >= 2) {
3248 char header[2];
3249
3250 header[0] = PROTO;
3251 assert(self->proto >= 0 && self->proto < 256);
3252 header[1] = (unsigned char)self->proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003253 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003254 return -1;
3255 }
3256
3257 if (save(self, obj, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003258 _Pickler_Write(self, &stop_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003259 return -1;
3260
3261 return 0;
3262}
3263
3264PyDoc_STRVAR(Pickler_clear_memo_doc,
3265"clear_memo() -> None. Clears the pickler's \"memo\"."
3266"\n"
3267"The memo is the data structure that remembers which objects the\n"
3268"pickler has already seen, so that shared or recursive objects are\n"
3269"pickled by reference and not by value. This method is useful when\n"
3270"re-using picklers.");
3271
3272static PyObject *
3273Pickler_clear_memo(PicklerObject *self)
3274{
3275 if (self->memo)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003276 PyMemoTable_Clear(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003277
3278 Py_RETURN_NONE;
3279}
3280
3281PyDoc_STRVAR(Pickler_dump_doc,
3282"dump(obj) -> None. Write a pickled representation of obj to the open file.");
3283
3284static PyObject *
3285Pickler_dump(PicklerObject *self, PyObject *args)
3286{
3287 PyObject *obj;
3288
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00003289 /* Check whether the Pickler was initialized correctly (issue3664).
3290 Developers often forget to call __init__() in their subclasses, which
3291 would trigger a segfault without this check. */
3292 if (self->write == NULL) {
Victor Stinner121aab42011-09-29 23:40:53 +02003293 PyErr_Format(PicklingError,
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00003294 "Pickler.__init__() was not called by %s.__init__()",
3295 Py_TYPE(self)->tp_name);
3296 return NULL;
3297 }
3298
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003299 if (!PyArg_ParseTuple(args, "O:dump", &obj))
3300 return NULL;
3301
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003302 if (_Pickler_ClearBuffer(self) < 0)
3303 return NULL;
3304
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003305 if (dump(self, obj) < 0)
3306 return NULL;
3307
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003308 if (_Pickler_FlushToFile(self) < 0)
3309 return NULL;
3310
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003311 Py_RETURN_NONE;
3312}
3313
3314static struct PyMethodDef Pickler_methods[] = {
3315 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
3316 Pickler_dump_doc},
3317 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
3318 Pickler_clear_memo_doc},
3319 {NULL, NULL} /* sentinel */
3320};
3321
3322static void
3323Pickler_dealloc(PicklerObject *self)
3324{
3325 PyObject_GC_UnTrack(self);
3326
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003327 Py_XDECREF(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003328 Py_XDECREF(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003329 Py_XDECREF(self->pers_func);
3330 Py_XDECREF(self->arg);
3331 Py_XDECREF(self->fast_memo);
3332
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003333 PyMemoTable_Del(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003334
3335 Py_TYPE(self)->tp_free((PyObject *)self);
3336}
3337
3338static int
3339Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
3340{
3341 Py_VISIT(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003342 Py_VISIT(self->pers_func);
3343 Py_VISIT(self->arg);
3344 Py_VISIT(self->fast_memo);
3345 return 0;
3346}
3347
3348static int
3349Pickler_clear(PicklerObject *self)
3350{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003351 Py_CLEAR(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003352 Py_CLEAR(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003353 Py_CLEAR(self->pers_func);
3354 Py_CLEAR(self->arg);
3355 Py_CLEAR(self->fast_memo);
3356
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003357 if (self->memo != NULL) {
3358 PyMemoTable *memo = self->memo;
3359 self->memo = NULL;
3360 PyMemoTable_Del(memo);
3361 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003362 return 0;
3363}
3364
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003365
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003366PyDoc_STRVAR(Pickler_doc,
3367"Pickler(file, protocol=None)"
3368"\n"
3369"This takes a binary file for writing a pickle data stream.\n"
3370"\n"
3371"The optional protocol argument tells the pickler to use the\n"
3372"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
3373"protocol is 3; a backward-incompatible protocol designed for\n"
3374"Python 3.0.\n"
3375"\n"
3376"Specifying a negative protocol version selects the highest\n"
3377"protocol version supported. The higher the protocol used, the\n"
3378"more recent the version of Python needed to read the pickle\n"
3379"produced.\n"
3380"\n"
3381"The file argument must have a write() method that accepts a single\n"
3382"bytes argument. It can thus be a file object opened for binary\n"
3383"writing, a io.BytesIO instance, or any other custom object that\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003384"meets this interface.\n"
3385"\n"
3386"If fix_imports is True and protocol is less than 3, pickle will try to\n"
3387"map the new Python 3.x names to the old module names used in Python\n"
3388"2.x, so that the pickle data stream is readable with Python 2.x.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003389
3390static int
3391Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
3392{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003393 static char *kwlist[] = {"file", "protocol", "fix_imports", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003394 PyObject *file;
3395 PyObject *proto_obj = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003396 PyObject *fix_imports = Py_True;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02003397 _Py_IDENTIFIER(persistent_id);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003398
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003399 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:Pickler",
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003400 kwlist, &file, &proto_obj, &fix_imports))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003401 return -1;
3402
3403 /* In case of multiple __init__() calls, clear previous content. */
3404 if (self->write != NULL)
3405 (void)Pickler_clear(self);
3406
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003407 if (_Pickler_SetProtocol(self, proto_obj, fix_imports) < 0)
3408 return -1;
3409
3410 if (_Pickler_SetOutputStream(self, file) < 0)
3411 return -1;
3412
3413 /* memo and output_buffer may have already been created in _Pickler_New */
3414 if (self->memo == NULL) {
3415 self->memo = PyMemoTable_New();
3416 if (self->memo == NULL)
3417 return -1;
3418 }
3419 self->output_len = 0;
3420 if (self->output_buffer == NULL) {
3421 self->max_output_len = WRITE_BUF_SIZE;
3422 self->output_buffer = PyBytes_FromStringAndSize(NULL,
3423 self->max_output_len);
3424 if (self->output_buffer == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003425 return -1;
3426 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003427
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003428 self->arg = NULL;
3429 self->fast = 0;
3430 self->fast_nesting = 0;
3431 self->fast_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003432 self->pers_func = NULL;
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02003433 if (_PyObject_HasAttrId((PyObject *)self, &PyId_persistent_id)) {
3434 self->pers_func = _PyObject_GetAttrId((PyObject *)self,
3435 &PyId_persistent_id);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003436 if (self->pers_func == NULL)
3437 return -1;
3438 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003439 return 0;
3440}
3441
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003442/* Define a proxy object for the Pickler's internal memo object. This is to
3443 * avoid breaking code like:
3444 * pickler.memo.clear()
3445 * and
3446 * pickler.memo = saved_memo
3447 * Is this a good idea? Not really, but we don't want to break code that uses
3448 * it. Note that we don't implement the entire mapping API here. This is
3449 * intentional, as these should be treated as black-box implementation details.
3450 */
3451
3452typedef struct {
3453 PyObject_HEAD
3454 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
3455} PicklerMemoProxyObject;
3456
3457PyDoc_STRVAR(pmp_clear_doc,
3458"memo.clear() -> None. Remove all items from memo.");
3459
3460static PyObject *
3461pmp_clear(PicklerMemoProxyObject *self)
3462{
3463 if (self->pickler->memo)
3464 PyMemoTable_Clear(self->pickler->memo);
3465 Py_RETURN_NONE;
3466}
3467
3468PyDoc_STRVAR(pmp_copy_doc,
3469"memo.copy() -> new_memo. Copy the memo to a new object.");
3470
3471static PyObject *
3472pmp_copy(PicklerMemoProxyObject *self)
3473{
3474 Py_ssize_t i;
3475 PyMemoTable *memo;
3476 PyObject *new_memo = PyDict_New();
3477 if (new_memo == NULL)
3478 return NULL;
3479
3480 memo = self->pickler->memo;
3481 for (i = 0; i < memo->mt_allocated; ++i) {
3482 PyMemoEntry entry = memo->mt_table[i];
3483 if (entry.me_key != NULL) {
3484 int status;
3485 PyObject *key, *value;
3486
3487 key = PyLong_FromVoidPtr(entry.me_key);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003488 value = Py_BuildValue("nO", entry.me_value, entry.me_key);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003489
3490 if (key == NULL || value == NULL) {
3491 Py_XDECREF(key);
3492 Py_XDECREF(value);
3493 goto error;
3494 }
3495 status = PyDict_SetItem(new_memo, key, value);
3496 Py_DECREF(key);
3497 Py_DECREF(value);
3498 if (status < 0)
3499 goto error;
3500 }
3501 }
3502 return new_memo;
3503
3504 error:
3505 Py_XDECREF(new_memo);
3506 return NULL;
3507}
3508
3509PyDoc_STRVAR(pmp_reduce_doc,
3510"memo.__reduce__(). Pickling support.");
3511
3512static PyObject *
3513pmp_reduce(PicklerMemoProxyObject *self, PyObject *args)
3514{
3515 PyObject *reduce_value, *dict_args;
3516 PyObject *contents = pmp_copy(self);
3517 if (contents == NULL)
3518 return NULL;
3519
3520 reduce_value = PyTuple_New(2);
3521 if (reduce_value == NULL) {
3522 Py_DECREF(contents);
3523 return NULL;
3524 }
3525 dict_args = PyTuple_New(1);
3526 if (dict_args == NULL) {
3527 Py_DECREF(contents);
3528 Py_DECREF(reduce_value);
3529 return NULL;
3530 }
3531 PyTuple_SET_ITEM(dict_args, 0, contents);
3532 Py_INCREF((PyObject *)&PyDict_Type);
3533 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
3534 PyTuple_SET_ITEM(reduce_value, 1, dict_args);
3535 return reduce_value;
3536}
3537
3538static PyMethodDef picklerproxy_methods[] = {
3539 {"clear", (PyCFunction)pmp_clear, METH_NOARGS, pmp_clear_doc},
3540 {"copy", (PyCFunction)pmp_copy, METH_NOARGS, pmp_copy_doc},
3541 {"__reduce__", (PyCFunction)pmp_reduce, METH_VARARGS, pmp_reduce_doc},
3542 {NULL, NULL} /* sentinel */
3543};
3544
3545static void
3546PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
3547{
3548 PyObject_GC_UnTrack(self);
3549 Py_XDECREF(self->pickler);
3550 PyObject_GC_Del((PyObject *)self);
3551}
3552
3553static int
3554PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
3555 visitproc visit, void *arg)
3556{
3557 Py_VISIT(self->pickler);
3558 return 0;
3559}
3560
3561static int
3562PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
3563{
3564 Py_CLEAR(self->pickler);
3565 return 0;
3566}
3567
3568static PyTypeObject PicklerMemoProxyType = {
3569 PyVarObject_HEAD_INIT(NULL, 0)
3570 "_pickle.PicklerMemoProxy", /*tp_name*/
3571 sizeof(PicklerMemoProxyObject), /*tp_basicsize*/
3572 0,
3573 (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */
3574 0, /* tp_print */
3575 0, /* tp_getattr */
3576 0, /* tp_setattr */
3577 0, /* tp_compare */
3578 0, /* tp_repr */
3579 0, /* tp_as_number */
3580 0, /* tp_as_sequence */
3581 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00003582 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003583 0, /* tp_call */
3584 0, /* tp_str */
3585 PyObject_GenericGetAttr, /* tp_getattro */
3586 PyObject_GenericSetAttr, /* tp_setattro */
3587 0, /* tp_as_buffer */
3588 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3589 0, /* tp_doc */
3590 (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */
3591 (inquiry)PicklerMemoProxy_clear, /* tp_clear */
3592 0, /* tp_richcompare */
3593 0, /* tp_weaklistoffset */
3594 0, /* tp_iter */
3595 0, /* tp_iternext */
3596 picklerproxy_methods, /* tp_methods */
3597};
3598
3599static PyObject *
3600PicklerMemoProxy_New(PicklerObject *pickler)
3601{
3602 PicklerMemoProxyObject *self;
3603
3604 self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
3605 if (self == NULL)
3606 return NULL;
3607 Py_INCREF(pickler);
3608 self->pickler = pickler;
3609 PyObject_GC_Track(self);
3610 return (PyObject *)self;
3611}
3612
3613/*****************************************************************************/
3614
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003615static PyObject *
3616Pickler_get_memo(PicklerObject *self)
3617{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003618 return PicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003619}
3620
3621static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003622Pickler_set_memo(PicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003623{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003624 PyMemoTable *new_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003625
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003626 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003627 PyErr_SetString(PyExc_TypeError,
3628 "attribute deletion is not supported");
3629 return -1;
3630 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003631
3632 if (Py_TYPE(obj) == &PicklerMemoProxyType) {
3633 PicklerObject *pickler =
3634 ((PicklerMemoProxyObject *)obj)->pickler;
3635
3636 new_memo = PyMemoTable_Copy(pickler->memo);
3637 if (new_memo == NULL)
3638 return -1;
3639 }
3640 else if (PyDict_Check(obj)) {
3641 Py_ssize_t i = 0;
3642 PyObject *key, *value;
3643
3644 new_memo = PyMemoTable_New();
3645 if (new_memo == NULL)
3646 return -1;
3647
3648 while (PyDict_Next(obj, &i, &key, &value)) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003649 Py_ssize_t memo_id;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003650 PyObject *memo_obj;
3651
3652 if (!PyTuple_Check(value) || Py_SIZE(value) != 2) {
3653 PyErr_SetString(PyExc_TypeError,
3654 "'memo' values must be 2-item tuples");
3655 goto error;
3656 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003657 memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003658 if (memo_id == -1 && PyErr_Occurred())
3659 goto error;
3660 memo_obj = PyTuple_GET_ITEM(value, 1);
3661 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
3662 goto error;
3663 }
3664 }
3665 else {
3666 PyErr_Format(PyExc_TypeError,
3667 "'memo' attribute must be an PicklerMemoProxy object"
3668 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003669 return -1;
3670 }
3671
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003672 PyMemoTable_Del(self->memo);
3673 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003674
3675 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003676
3677 error:
3678 if (new_memo)
3679 PyMemoTable_Del(new_memo);
3680 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003681}
3682
3683static PyObject *
3684Pickler_get_persid(PicklerObject *self)
3685{
3686 if (self->pers_func == NULL)
3687 PyErr_SetString(PyExc_AttributeError, "persistent_id");
3688 else
3689 Py_INCREF(self->pers_func);
3690 return self->pers_func;
3691}
3692
3693static int
3694Pickler_set_persid(PicklerObject *self, PyObject *value)
3695{
3696 PyObject *tmp;
3697
3698 if (value == NULL) {
3699 PyErr_SetString(PyExc_TypeError,
3700 "attribute deletion is not supported");
3701 return -1;
3702 }
3703 if (!PyCallable_Check(value)) {
3704 PyErr_SetString(PyExc_TypeError,
3705 "persistent_id must be a callable taking one argument");
3706 return -1;
3707 }
3708
3709 tmp = self->pers_func;
3710 Py_INCREF(value);
3711 self->pers_func = value;
3712 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
3713
3714 return 0;
3715}
3716
3717static PyMemberDef Pickler_members[] = {
3718 {"bin", T_INT, offsetof(PicklerObject, bin)},
3719 {"fast", T_INT, offsetof(PicklerObject, fast)},
3720 {NULL}
3721};
3722
3723static PyGetSetDef Pickler_getsets[] = {
3724 {"memo", (getter)Pickler_get_memo,
3725 (setter)Pickler_set_memo},
3726 {"persistent_id", (getter)Pickler_get_persid,
3727 (setter)Pickler_set_persid},
3728 {NULL}
3729};
3730
3731static PyTypeObject Pickler_Type = {
3732 PyVarObject_HEAD_INIT(NULL, 0)
3733 "_pickle.Pickler" , /*tp_name*/
3734 sizeof(PicklerObject), /*tp_basicsize*/
3735 0, /*tp_itemsize*/
3736 (destructor)Pickler_dealloc, /*tp_dealloc*/
3737 0, /*tp_print*/
3738 0, /*tp_getattr*/
3739 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00003740 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003741 0, /*tp_repr*/
3742 0, /*tp_as_number*/
3743 0, /*tp_as_sequence*/
3744 0, /*tp_as_mapping*/
3745 0, /*tp_hash*/
3746 0, /*tp_call*/
3747 0, /*tp_str*/
3748 0, /*tp_getattro*/
3749 0, /*tp_setattro*/
3750 0, /*tp_as_buffer*/
3751 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3752 Pickler_doc, /*tp_doc*/
3753 (traverseproc)Pickler_traverse, /*tp_traverse*/
3754 (inquiry)Pickler_clear, /*tp_clear*/
3755 0, /*tp_richcompare*/
3756 0, /*tp_weaklistoffset*/
3757 0, /*tp_iter*/
3758 0, /*tp_iternext*/
3759 Pickler_methods, /*tp_methods*/
3760 Pickler_members, /*tp_members*/
3761 Pickler_getsets, /*tp_getset*/
3762 0, /*tp_base*/
3763 0, /*tp_dict*/
3764 0, /*tp_descr_get*/
3765 0, /*tp_descr_set*/
3766 0, /*tp_dictoffset*/
3767 (initproc)Pickler_init, /*tp_init*/
3768 PyType_GenericAlloc, /*tp_alloc*/
3769 PyType_GenericNew, /*tp_new*/
3770 PyObject_GC_Del, /*tp_free*/
3771 0, /*tp_is_gc*/
3772};
3773
Victor Stinner121aab42011-09-29 23:40:53 +02003774/* Temporary helper for calling self.find_class().
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003775
3776 XXX: It would be nice to able to avoid Python function call overhead, by
3777 using directly the C version of find_class(), when find_class() is not
3778 overridden by a subclass. Although, this could become rather hackish. A
3779 simpler optimization would be to call the C function when self is not a
3780 subclass instance. */
3781static PyObject *
3782find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
3783{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02003784 _Py_IDENTIFIER(find_class);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02003785
3786 return _PyObject_CallMethodId((PyObject *)self, &PyId_find_class, "OO",
3787 module_name, global_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003788}
3789
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003790static Py_ssize_t
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003791marker(UnpicklerObject *self)
3792{
3793 if (self->num_marks < 1) {
3794 PyErr_SetString(UnpicklingError, "could not find MARK");
3795 return -1;
3796 }
3797
3798 return self->marks[--self->num_marks];
3799}
3800
3801static int
3802load_none(UnpicklerObject *self)
3803{
3804 PDATA_APPEND(self->stack, Py_None, -1);
3805 return 0;
3806}
3807
3808static int
3809bad_readline(void)
3810{
3811 PyErr_SetString(UnpicklingError, "pickle data was truncated");
3812 return -1;
3813}
3814
3815static int
3816load_int(UnpicklerObject *self)
3817{
3818 PyObject *value;
3819 char *endptr, *s;
3820 Py_ssize_t len;
3821 long x;
3822
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003823 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003824 return -1;
3825 if (len < 2)
3826 return bad_readline();
3827
3828 errno = 0;
Victor Stinner121aab42011-09-29 23:40:53 +02003829 /* XXX: Should the base argument of strtol() be explicitly set to 10?
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003830 XXX(avassalotti): Should this uses PyOS_strtol()? */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003831 x = strtol(s, &endptr, 0);
3832
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003833 if (errno || (*endptr != '\n' && *endptr != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003834 /* Hm, maybe we've got something long. Let's try reading
3835 * it as a Python long object. */
3836 errno = 0;
3837 /* XXX: Same thing about the base here. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003838 value = PyLong_FromString(s, NULL, 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003839 if (value == NULL) {
3840 PyErr_SetString(PyExc_ValueError,
3841 "could not convert string to int");
3842 return -1;
3843 }
3844 }
3845 else {
3846 if (len == 3 && (x == 0 || x == 1)) {
3847 if ((value = PyBool_FromLong(x)) == NULL)
3848 return -1;
3849 }
3850 else {
3851 if ((value = PyLong_FromLong(x)) == NULL)
3852 return -1;
3853 }
3854 }
3855
3856 PDATA_PUSH(self->stack, value, -1);
3857 return 0;
3858}
3859
3860static int
3861load_bool(UnpicklerObject *self, PyObject *boolean)
3862{
3863 assert(boolean == Py_True || boolean == Py_False);
3864 PDATA_APPEND(self->stack, boolean, -1);
3865 return 0;
3866}
3867
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003868/* s contains x bytes of an unsigned little-endian integer. Return its value
3869 * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
3870 */
3871static Py_ssize_t
3872calc_binsize(char *bytes, int size)
3873{
3874 unsigned char *s = (unsigned char *)bytes;
3875 size_t x = 0;
3876
3877 assert(size == 4);
3878
3879 x = (size_t) s[0];
3880 x |= (size_t) s[1] << 8;
3881 x |= (size_t) s[2] << 16;
3882 x |= (size_t) s[3] << 24;
3883
3884 if (x > PY_SSIZE_T_MAX)
3885 return -1;
3886 else
3887 return (Py_ssize_t) x;
3888}
3889
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003890/* s contains x bytes of a little-endian integer. Return its value as a
3891 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
3892 * int, but when x is 4 it's a signed one. This is an historical source
3893 * of x-platform bugs.
3894 */
3895static long
3896calc_binint(char *bytes, int size)
3897{
3898 unsigned char *s = (unsigned char *)bytes;
3899 int i = size;
3900 long x = 0;
3901
3902 for (i = 0; i < size; i++) {
3903 x |= (long)s[i] << (i * 8);
3904 }
3905
3906 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
3907 * is signed, so on a box with longs bigger than 4 bytes we need
3908 * to extend a BININT's sign bit to the full width.
3909 */
3910 if (SIZEOF_LONG > 4 && size == 4) {
3911 x |= -(x & (1L << 31));
3912 }
3913
3914 return x;
3915}
3916
3917static int
3918load_binintx(UnpicklerObject *self, char *s, int size)
3919{
3920 PyObject *value;
3921 long x;
3922
3923 x = calc_binint(s, size);
3924
3925 if ((value = PyLong_FromLong(x)) == NULL)
3926 return -1;
3927
3928 PDATA_PUSH(self->stack, value, -1);
3929 return 0;
3930}
3931
3932static int
3933load_binint(UnpicklerObject *self)
3934{
3935 char *s;
3936
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003937 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003938 return -1;
3939
3940 return load_binintx(self, s, 4);
3941}
3942
3943static int
3944load_binint1(UnpicklerObject *self)
3945{
3946 char *s;
3947
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003948 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003949 return -1;
3950
3951 return load_binintx(self, s, 1);
3952}
3953
3954static int
3955load_binint2(UnpicklerObject *self)
3956{
3957 char *s;
3958
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003959 if (_Unpickler_Read(self, &s, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003960 return -1;
3961
3962 return load_binintx(self, s, 2);
3963}
3964
3965static int
3966load_long(UnpicklerObject *self)
3967{
3968 PyObject *value;
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00003969 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003970 Py_ssize_t len;
3971
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003972 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003973 return -1;
3974 if (len < 2)
3975 return bad_readline();
3976
Mark Dickinson8dd05142009-01-20 20:43:58 +00003977 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
3978 the 'L' before calling PyLong_FromString. In order to maintain
3979 compatibility with Python 3.0.0, we don't actually *require*
3980 the 'L' to be present. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003981 if (s[len-2] == 'L')
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00003982 s[len-2] = '\0';
Alexandre Vassalottie4bccb72009-01-24 01:47:57 +00003983 /* XXX: Should the base argument explicitly set to 10? */
3984 value = PyLong_FromString(s, NULL, 0);
Mark Dickinson8dd05142009-01-20 20:43:58 +00003985 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003986 return -1;
3987
3988 PDATA_PUSH(self->stack, value, -1);
3989 return 0;
3990}
3991
3992/* 'size' bytes contain the # of bytes of little-endian 256's-complement
3993 * data following.
3994 */
3995static int
3996load_counted_long(UnpicklerObject *self, int size)
3997{
3998 PyObject *value;
3999 char *nbytes;
4000 char *pdata;
4001
4002 assert(size == 1 || size == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004003 if (_Unpickler_Read(self, &nbytes, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004004 return -1;
4005
4006 size = calc_binint(nbytes, size);
4007 if (size < 0) {
4008 /* Corrupt or hostile pickle -- we never write one like this */
4009 PyErr_SetString(UnpicklingError,
4010 "LONG pickle has negative byte count");
4011 return -1;
4012 }
4013
4014 if (size == 0)
4015 value = PyLong_FromLong(0L);
4016 else {
4017 /* Read the raw little-endian bytes and convert. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004018 if (_Unpickler_Read(self, &pdata, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004019 return -1;
4020 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
4021 1 /* little endian */ , 1 /* signed */ );
4022 }
4023 if (value == NULL)
4024 return -1;
4025 PDATA_PUSH(self->stack, value, -1);
4026 return 0;
4027}
4028
4029static int
4030load_float(UnpicklerObject *self)
4031{
4032 PyObject *value;
4033 char *endptr, *s;
4034 Py_ssize_t len;
4035 double d;
4036
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004037 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004038 return -1;
4039 if (len < 2)
4040 return bad_readline();
4041
4042 errno = 0;
Mark Dickinson725bfd82009-05-03 20:33:40 +00004043 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
4044 if (d == -1.0 && PyErr_Occurred())
4045 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004046 if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004047 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
4048 return -1;
4049 }
Mark Dickinson725bfd82009-05-03 20:33:40 +00004050 value = PyFloat_FromDouble(d);
4051 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004052 return -1;
4053
4054 PDATA_PUSH(self->stack, value, -1);
4055 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004056}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004057
4058static int
4059load_binfloat(UnpicklerObject *self)
4060{
4061 PyObject *value;
4062 double x;
4063 char *s;
4064
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004065 if (_Unpickler_Read(self, &s, 8) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004066 return -1;
4067
4068 x = _PyFloat_Unpack8((unsigned char *)s, 0);
4069 if (x == -1.0 && PyErr_Occurred())
4070 return -1;
4071
4072 if ((value = PyFloat_FromDouble(x)) == NULL)
4073 return -1;
4074
4075 PDATA_PUSH(self->stack, value, -1);
4076 return 0;
4077}
4078
4079static int
4080load_string(UnpicklerObject *self)
4081{
4082 PyObject *bytes;
4083 PyObject *str = NULL;
4084 Py_ssize_t len;
4085 char *s, *p;
4086
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004087 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004088 return -1;
4089 if (len < 3)
4090 return bad_readline();
4091 if ((s = strdup(s)) == NULL) {
4092 PyErr_NoMemory();
4093 return -1;
4094 }
4095
4096 /* Strip outermost quotes */
4097 while (s[len - 1] <= ' ')
4098 len--;
4099 if (s[0] == '"' && s[len - 1] == '"') {
4100 s[len - 1] = '\0';
4101 p = s + 1;
4102 len -= 2;
4103 }
4104 else if (s[0] == '\'' && s[len - 1] == '\'') {
4105 s[len - 1] = '\0';
4106 p = s + 1;
4107 len -= 2;
4108 }
4109 else {
4110 free(s);
4111 PyErr_SetString(PyExc_ValueError, "insecure string pickle");
4112 return -1;
4113 }
4114
4115 /* Use the PyBytes API to decode the string, since that is what is used
4116 to encode, and then coerce the result to Unicode. */
4117 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
4118 free(s);
4119 if (bytes == NULL)
4120 return -1;
4121 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
4122 Py_DECREF(bytes);
4123 if (str == NULL)
4124 return -1;
4125
4126 PDATA_PUSH(self->stack, str, -1);
4127 return 0;
4128}
4129
4130static int
4131load_binbytes(UnpicklerObject *self)
4132{
4133 PyObject *bytes;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004134 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004135 char *s;
4136
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004137 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004138 return -1;
4139
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004140 x = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004141 if (x < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004142 PyErr_Format(PyExc_OverflowError,
4143 "BINBYTES exceeds system's maximum size of %zd bytes",
4144 PY_SSIZE_T_MAX
4145 );
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004146 return -1;
4147 }
4148
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004149 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004150 return -1;
4151 bytes = PyBytes_FromStringAndSize(s, x);
4152 if (bytes == NULL)
4153 return -1;
4154
4155 PDATA_PUSH(self->stack, bytes, -1);
4156 return 0;
4157}
4158
4159static int
4160load_short_binbytes(UnpicklerObject *self)
4161{
4162 PyObject *bytes;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004163 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004164 char *s;
4165
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004166 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004167 return -1;
4168
4169 x = (unsigned char)s[0];
4170
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004171 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004172 return -1;
4173
4174 bytes = PyBytes_FromStringAndSize(s, x);
4175 if (bytes == NULL)
4176 return -1;
4177
4178 PDATA_PUSH(self->stack, bytes, -1);
4179 return 0;
4180}
4181
4182static int
4183load_binstring(UnpicklerObject *self)
4184{
4185 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004186 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004187 char *s;
4188
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004189 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004190 return -1;
4191
4192 x = calc_binint(s, 4);
4193 if (x < 0) {
Victor Stinner121aab42011-09-29 23:40:53 +02004194 PyErr_SetString(UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004195 "BINSTRING pickle has negative byte count");
4196 return -1;
4197 }
4198
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004199 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004200 return -1;
4201
4202 /* Convert Python 2.x strings to unicode. */
4203 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4204 if (str == NULL)
4205 return -1;
4206
4207 PDATA_PUSH(self->stack, str, -1);
4208 return 0;
4209}
4210
4211static int
4212load_short_binstring(UnpicklerObject *self)
4213{
4214 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004215 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004216 char *s;
4217
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004218 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004219 return -1;
4220
4221 x = (unsigned char)s[0];
4222
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004223 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004224 return -1;
4225
4226 /* Convert Python 2.x strings to unicode. */
4227 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4228 if (str == NULL)
4229 return -1;
4230
4231 PDATA_PUSH(self->stack, str, -1);
4232 return 0;
4233}
4234
4235static int
4236load_unicode(UnpicklerObject *self)
4237{
4238 PyObject *str;
4239 Py_ssize_t len;
4240 char *s;
4241
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004242 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004243 return -1;
4244 if (len < 1)
4245 return bad_readline();
4246
4247 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
4248 if (str == NULL)
4249 return -1;
4250
4251 PDATA_PUSH(self->stack, str, -1);
4252 return 0;
4253}
4254
4255static int
4256load_binunicode(UnpicklerObject *self)
4257{
4258 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004259 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004260 char *s;
4261
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004262 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004263 return -1;
4264
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004265 size = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004266 if (size < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004267 PyErr_Format(PyExc_OverflowError,
4268 "BINUNICODE exceeds system's maximum size of %zd bytes",
4269 PY_SSIZE_T_MAX
4270 );
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004271 return -1;
4272 }
4273
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004274
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004275 if (_Unpickler_Read(self, &s, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004276 return -1;
4277
Victor Stinner485fb562010-04-13 11:07:24 +00004278 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004279 if (str == NULL)
4280 return -1;
4281
4282 PDATA_PUSH(self->stack, str, -1);
4283 return 0;
4284}
4285
4286static int
4287load_tuple(UnpicklerObject *self)
4288{
4289 PyObject *tuple;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004290 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004291
4292 if ((i = marker(self)) < 0)
4293 return -1;
4294
4295 tuple = Pdata_poptuple(self->stack, i);
4296 if (tuple == NULL)
4297 return -1;
4298 PDATA_PUSH(self->stack, tuple, -1);
4299 return 0;
4300}
4301
4302static int
4303load_counted_tuple(UnpicklerObject *self, int len)
4304{
4305 PyObject *tuple;
4306
4307 tuple = PyTuple_New(len);
4308 if (tuple == NULL)
4309 return -1;
4310
4311 while (--len >= 0) {
4312 PyObject *item;
4313
4314 PDATA_POP(self->stack, item);
4315 if (item == NULL)
4316 return -1;
4317 PyTuple_SET_ITEM(tuple, len, item);
4318 }
4319 PDATA_PUSH(self->stack, tuple, -1);
4320 return 0;
4321}
4322
4323static int
4324load_empty_list(UnpicklerObject *self)
4325{
4326 PyObject *list;
4327
4328 if ((list = PyList_New(0)) == NULL)
4329 return -1;
4330 PDATA_PUSH(self->stack, list, -1);
4331 return 0;
4332}
4333
4334static int
4335load_empty_dict(UnpicklerObject *self)
4336{
4337 PyObject *dict;
4338
4339 if ((dict = PyDict_New()) == NULL)
4340 return -1;
4341 PDATA_PUSH(self->stack, dict, -1);
4342 return 0;
4343}
4344
4345static int
4346load_list(UnpicklerObject *self)
4347{
4348 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004349 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004350
4351 if ((i = marker(self)) < 0)
4352 return -1;
4353
4354 list = Pdata_poplist(self->stack, i);
4355 if (list == NULL)
4356 return -1;
4357 PDATA_PUSH(self->stack, list, -1);
4358 return 0;
4359}
4360
4361static int
4362load_dict(UnpicklerObject *self)
4363{
4364 PyObject *dict, *key, *value;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004365 Py_ssize_t i, j, k;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004366
4367 if ((i = marker(self)) < 0)
4368 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004369 j = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004370
4371 if ((dict = PyDict_New()) == NULL)
4372 return -1;
4373
4374 for (k = i + 1; k < j; k += 2) {
4375 key = self->stack->data[k - 1];
4376 value = self->stack->data[k];
4377 if (PyDict_SetItem(dict, key, value) < 0) {
4378 Py_DECREF(dict);
4379 return -1;
4380 }
4381 }
4382 Pdata_clear(self->stack, i);
4383 PDATA_PUSH(self->stack, dict, -1);
4384 return 0;
4385}
4386
4387static PyObject *
4388instantiate(PyObject *cls, PyObject *args)
4389{
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004390 PyObject *result = NULL;
4391 /* Caller must assure args are a tuple. Normally, args come from
4392 Pdata_poptuple which packs objects from the top of the stack
4393 into a newly created tuple. */
4394 assert(PyTuple_Check(args));
4395 if (Py_SIZE(args) > 0 || !PyType_Check(cls) ||
4396 PyObject_HasAttrString(cls, "__getinitargs__")) {
4397 result = PyObject_CallObject(cls, args);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004398 }
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004399 else {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02004400 _Py_IDENTIFIER(__new__);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02004401
4402 result = _PyObject_CallMethodId(cls, &PyId___new__, "O", cls);
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004403 }
4404 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004405}
4406
4407static int
4408load_obj(UnpicklerObject *self)
4409{
4410 PyObject *cls, *args, *obj = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004411 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004412
4413 if ((i = marker(self)) < 0)
4414 return -1;
4415
4416 args = Pdata_poptuple(self->stack, i + 1);
4417 if (args == NULL)
4418 return -1;
4419
4420 PDATA_POP(self->stack, cls);
4421 if (cls) {
4422 obj = instantiate(cls, args);
4423 Py_DECREF(cls);
4424 }
4425 Py_DECREF(args);
4426 if (obj == NULL)
4427 return -1;
4428
4429 PDATA_PUSH(self->stack, obj, -1);
4430 return 0;
4431}
4432
4433static int
4434load_inst(UnpicklerObject *self)
4435{
4436 PyObject *cls = NULL;
4437 PyObject *args = NULL;
4438 PyObject *obj = NULL;
4439 PyObject *module_name;
4440 PyObject *class_name;
4441 Py_ssize_t len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004442 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004443 char *s;
4444
4445 if ((i = marker(self)) < 0)
4446 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004447 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004448 return -1;
4449 if (len < 2)
4450 return bad_readline();
4451
4452 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
4453 identifiers are permitted in Python 3.0, since the INST opcode is only
4454 supported by older protocols on Python 2.x. */
4455 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
4456 if (module_name == NULL)
4457 return -1;
4458
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004459 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004460 if (len < 2)
4461 return bad_readline();
4462 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004463 if (class_name != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004464 cls = find_class(self, module_name, class_name);
4465 Py_DECREF(class_name);
4466 }
4467 }
4468 Py_DECREF(module_name);
4469
4470 if (cls == NULL)
4471 return -1;
4472
4473 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
4474 obj = instantiate(cls, args);
4475 Py_DECREF(args);
4476 }
4477 Py_DECREF(cls);
4478
4479 if (obj == NULL)
4480 return -1;
4481
4482 PDATA_PUSH(self->stack, obj, -1);
4483 return 0;
4484}
4485
4486static int
4487load_newobj(UnpicklerObject *self)
4488{
4489 PyObject *args = NULL;
4490 PyObject *clsraw = NULL;
4491 PyTypeObject *cls; /* clsraw cast to its true type */
4492 PyObject *obj;
4493
4494 /* Stack is ... cls argtuple, and we want to call
4495 * cls.__new__(cls, *argtuple).
4496 */
4497 PDATA_POP(self->stack, args);
4498 if (args == NULL)
4499 goto error;
4500 if (!PyTuple_Check(args)) {
4501 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
4502 goto error;
4503 }
4504
4505 PDATA_POP(self->stack, clsraw);
4506 cls = (PyTypeObject *)clsraw;
4507 if (cls == NULL)
4508 goto error;
4509 if (!PyType_Check(cls)) {
4510 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4511 "isn't a type object");
4512 goto error;
4513 }
4514 if (cls->tp_new == NULL) {
4515 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4516 "has NULL tp_new");
4517 goto error;
4518 }
4519
4520 /* Call __new__. */
4521 obj = cls->tp_new(cls, args, NULL);
4522 if (obj == NULL)
4523 goto error;
4524
4525 Py_DECREF(args);
4526 Py_DECREF(clsraw);
4527 PDATA_PUSH(self->stack, obj, -1);
4528 return 0;
4529
4530 error:
4531 Py_XDECREF(args);
4532 Py_XDECREF(clsraw);
4533 return -1;
4534}
4535
4536static int
4537load_global(UnpicklerObject *self)
4538{
4539 PyObject *global = NULL;
4540 PyObject *module_name;
4541 PyObject *global_name;
4542 Py_ssize_t len;
4543 char *s;
4544
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004545 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004546 return -1;
4547 if (len < 2)
4548 return bad_readline();
4549 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4550 if (!module_name)
4551 return -1;
4552
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004553 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004554 if (len < 2) {
4555 Py_DECREF(module_name);
4556 return bad_readline();
4557 }
4558 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4559 if (global_name) {
4560 global = find_class(self, module_name, global_name);
4561 Py_DECREF(global_name);
4562 }
4563 }
4564 Py_DECREF(module_name);
4565
4566 if (global == NULL)
4567 return -1;
4568 PDATA_PUSH(self->stack, global, -1);
4569 return 0;
4570}
4571
4572static int
4573load_persid(UnpicklerObject *self)
4574{
4575 PyObject *pid;
4576 Py_ssize_t len;
4577 char *s;
4578
4579 if (self->pers_func) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004580 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004581 return -1;
4582 if (len < 2)
4583 return bad_readline();
4584
4585 pid = PyBytes_FromStringAndSize(s, len - 1);
4586 if (pid == NULL)
4587 return -1;
4588
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004589 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004590 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004591 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004592 if (pid == NULL)
4593 return -1;
4594
4595 PDATA_PUSH(self->stack, pid, -1);
4596 return 0;
4597 }
4598 else {
4599 PyErr_SetString(UnpicklingError,
4600 "A load persistent id instruction was encountered,\n"
4601 "but no persistent_load function was specified.");
4602 return -1;
4603 }
4604}
4605
4606static int
4607load_binpersid(UnpicklerObject *self)
4608{
4609 PyObject *pid;
4610
4611 if (self->pers_func) {
4612 PDATA_POP(self->stack, pid);
4613 if (pid == NULL)
4614 return -1;
4615
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004616 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004617 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004618 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004619 if (pid == NULL)
4620 return -1;
4621
4622 PDATA_PUSH(self->stack, pid, -1);
4623 return 0;
4624 }
4625 else {
4626 PyErr_SetString(UnpicklingError,
4627 "A load persistent id instruction was encountered,\n"
4628 "but no persistent_load function was specified.");
4629 return -1;
4630 }
4631}
4632
4633static int
4634load_pop(UnpicklerObject *self)
4635{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004636 Py_ssize_t len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004637
4638 /* Note that we split the (pickle.py) stack into two stacks,
4639 * an object stack and a mark stack. We have to be clever and
4640 * pop the right one. We do this by looking at the top of the
Collin Winter8ca69de2009-05-26 16:53:41 +00004641 * mark stack first, and only signalling a stack underflow if
4642 * the object stack is empty and the mark stack doesn't match
4643 * our expectations.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004644 */
Collin Winter8ca69de2009-05-26 16:53:41 +00004645 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004646 self->num_marks--;
Antoine Pitrou01a15ea2010-01-07 17:57:31 +00004647 } else if (len > 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004648 len--;
4649 Py_DECREF(self->stack->data[len]);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004650 Py_SIZE(self->stack) = len;
Collin Winter8ca69de2009-05-26 16:53:41 +00004651 } else {
4652 return stack_underflow();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004653 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004654 return 0;
4655}
4656
4657static int
4658load_pop_mark(UnpicklerObject *self)
4659{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004660 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004661
4662 if ((i = marker(self)) < 0)
4663 return -1;
4664
4665 Pdata_clear(self->stack, i);
4666
4667 return 0;
4668}
4669
4670static int
4671load_dup(UnpicklerObject *self)
4672{
4673 PyObject *last;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004674 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004675
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004676 if ((len = Py_SIZE(self->stack)) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004677 return stack_underflow();
4678 last = self->stack->data[len - 1];
4679 PDATA_APPEND(self->stack, last, -1);
4680 return 0;
4681}
4682
4683static int
4684load_get(UnpicklerObject *self)
4685{
4686 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004687 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004688 Py_ssize_t len;
4689 char *s;
4690
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004691 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004692 return -1;
4693 if (len < 2)
4694 return bad_readline();
4695
4696 key = PyLong_FromString(s, NULL, 10);
4697 if (key == NULL)
4698 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004699 idx = PyLong_AsSsize_t(key);
4700 if (idx == -1 && PyErr_Occurred()) {
4701 Py_DECREF(key);
4702 return -1;
4703 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004704
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004705 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004706 if (value == NULL) {
4707 if (!PyErr_Occurred())
4708 PyErr_SetObject(PyExc_KeyError, key);
4709 Py_DECREF(key);
4710 return -1;
4711 }
4712 Py_DECREF(key);
4713
4714 PDATA_APPEND(self->stack, value, -1);
4715 return 0;
4716}
4717
4718static int
4719load_binget(UnpicklerObject *self)
4720{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004721 PyObject *value;
4722 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004723 char *s;
4724
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004725 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004726 return -1;
4727
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004728 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004729
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004730 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004731 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004732 PyObject *key = PyLong_FromSsize_t(idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004733 if (!PyErr_Occurred())
4734 PyErr_SetObject(PyExc_KeyError, key);
4735 Py_DECREF(key);
4736 return -1;
4737 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004738
4739 PDATA_APPEND(self->stack, value, -1);
4740 return 0;
4741}
4742
4743static int
4744load_long_binget(UnpicklerObject *self)
4745{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004746 PyObject *value;
4747 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004748 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004749
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004750 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004751 return -1;
4752
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004753 idx = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004754
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004755 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004756 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004757 PyObject *key = PyLong_FromSsize_t(idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004758 if (!PyErr_Occurred())
4759 PyErr_SetObject(PyExc_KeyError, key);
4760 Py_DECREF(key);
4761 return -1;
4762 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004763
4764 PDATA_APPEND(self->stack, value, -1);
4765 return 0;
4766}
4767
4768/* Push an object from the extension registry (EXT[124]). nbytes is
4769 * the number of bytes following the opcode, holding the index (code) value.
4770 */
4771static int
4772load_extension(UnpicklerObject *self, int nbytes)
4773{
4774 char *codebytes; /* the nbytes bytes after the opcode */
4775 long code; /* calc_binint returns long */
4776 PyObject *py_code; /* code as a Python int */
4777 PyObject *obj; /* the object to push */
4778 PyObject *pair; /* (module_name, class_name) */
4779 PyObject *module_name, *class_name;
4780
4781 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004782 if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004783 return -1;
4784 code = calc_binint(codebytes, nbytes);
4785 if (code <= 0) { /* note that 0 is forbidden */
4786 /* Corrupt or hostile pickle. */
4787 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
4788 return -1;
4789 }
4790
4791 /* Look for the code in the cache. */
4792 py_code = PyLong_FromLong(code);
4793 if (py_code == NULL)
4794 return -1;
4795 obj = PyDict_GetItem(extension_cache, py_code);
4796 if (obj != NULL) {
4797 /* Bingo. */
4798 Py_DECREF(py_code);
4799 PDATA_APPEND(self->stack, obj, -1);
4800 return 0;
4801 }
4802
4803 /* Look up the (module_name, class_name) pair. */
4804 pair = PyDict_GetItem(inverted_registry, py_code);
4805 if (pair == NULL) {
4806 Py_DECREF(py_code);
4807 PyErr_Format(PyExc_ValueError, "unregistered extension "
4808 "code %ld", code);
4809 return -1;
4810 }
4811 /* Since the extension registry is manipulable via Python code,
4812 * confirm that pair is really a 2-tuple of strings.
4813 */
4814 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
4815 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
4816 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
4817 Py_DECREF(py_code);
4818 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
4819 "isn't a 2-tuple of strings", code);
4820 return -1;
4821 }
4822 /* Load the object. */
4823 obj = find_class(self, module_name, class_name);
4824 if (obj == NULL) {
4825 Py_DECREF(py_code);
4826 return -1;
4827 }
4828 /* Cache code -> obj. */
4829 code = PyDict_SetItem(extension_cache, py_code, obj);
4830 Py_DECREF(py_code);
4831 if (code < 0) {
4832 Py_DECREF(obj);
4833 return -1;
4834 }
4835 PDATA_PUSH(self->stack, obj, -1);
4836 return 0;
4837}
4838
4839static int
4840load_put(UnpicklerObject *self)
4841{
4842 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004843 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004844 Py_ssize_t len;
4845 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004846
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004847 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004848 return -1;
4849 if (len < 2)
4850 return bad_readline();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004851 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004852 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004853 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004854
4855 key = PyLong_FromString(s, NULL, 10);
4856 if (key == NULL)
4857 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004858 idx = PyLong_AsSsize_t(key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004859 Py_DECREF(key);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004860 if (idx < 0) {
4861 if (!PyErr_Occurred())
4862 PyErr_SetString(PyExc_ValueError,
4863 "negative PUT argument");
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004864 return -1;
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004865 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004866
4867 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004868}
4869
4870static int
4871load_binput(UnpicklerObject *self)
4872{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004873 PyObject *value;
4874 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004875 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004876
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004877 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004878 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004879
4880 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004881 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004882 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004883
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004884 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004885
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004886 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004887}
4888
4889static int
4890load_long_binput(UnpicklerObject *self)
4891{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004892 PyObject *value;
4893 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004894 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004895
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004896 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004897 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004898
4899 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004900 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004901 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004902
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004903 idx = calc_binsize(s, 4);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004904 if (idx < 0) {
4905 PyErr_SetString(PyExc_ValueError,
4906 "negative LONG_BINPUT argument");
4907 return -1;
4908 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004909
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004910 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004911}
4912
4913static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004914do_append(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004915{
4916 PyObject *value;
4917 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004918 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004919
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004920 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004921 if (x > len || x <= 0)
4922 return stack_underflow();
4923 if (len == x) /* nothing to do */
4924 return 0;
4925
4926 list = self->stack->data[x - 1];
4927
4928 if (PyList_Check(list)) {
4929 PyObject *slice;
4930 Py_ssize_t list_len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004931 int ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004932
4933 slice = Pdata_poplist(self->stack, x);
4934 if (!slice)
4935 return -1;
4936 list_len = PyList_GET_SIZE(list);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004937 ret = PyList_SetSlice(list, list_len, list_len, slice);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004938 Py_DECREF(slice);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004939 return ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004940 }
4941 else {
4942 PyObject *append_func;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02004943 _Py_IDENTIFIER(append);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004944
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02004945 append_func = _PyObject_GetAttrId(list, &PyId_append);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004946 if (append_func == NULL)
4947 return -1;
4948 for (i = x; i < len; i++) {
4949 PyObject *result;
4950
4951 value = self->stack->data[i];
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004952 result = _Unpickler_FastCall(self, append_func, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004953 if (result == NULL) {
4954 Pdata_clear(self->stack, i + 1);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004955 Py_SIZE(self->stack) = x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004956 return -1;
4957 }
4958 Py_DECREF(result);
4959 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004960 Py_SIZE(self->stack) = x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004961 }
4962
4963 return 0;
4964}
4965
4966static int
4967load_append(UnpicklerObject *self)
4968{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004969 return do_append(self, Py_SIZE(self->stack) - 1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004970}
4971
4972static int
4973load_appends(UnpicklerObject *self)
4974{
4975 return do_append(self, marker(self));
4976}
4977
4978static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004979do_setitems(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004980{
4981 PyObject *value, *key;
4982 PyObject *dict;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004983 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004984 int status = 0;
4985
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004986 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004987 if (x > len || x <= 0)
4988 return stack_underflow();
4989 if (len == x) /* nothing to do */
4990 return 0;
Victor Stinner121aab42011-09-29 23:40:53 +02004991 if ((len - x) % 2 != 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004992 /* Currupt or hostile pickle -- we never write one like this. */
4993 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
4994 return -1;
4995 }
4996
4997 /* Here, dict does not actually need to be a PyDict; it could be anything
4998 that supports the __setitem__ attribute. */
4999 dict = self->stack->data[x - 1];
5000
5001 for (i = x + 1; i < len; i += 2) {
5002 key = self->stack->data[i - 1];
5003 value = self->stack->data[i];
5004 if (PyObject_SetItem(dict, key, value) < 0) {
5005 status = -1;
5006 break;
5007 }
5008 }
5009
5010 Pdata_clear(self->stack, x);
5011 return status;
5012}
5013
5014static int
5015load_setitem(UnpicklerObject *self)
5016{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005017 return do_setitems(self, Py_SIZE(self->stack) - 2);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005018}
5019
5020static int
5021load_setitems(UnpicklerObject *self)
5022{
5023 return do_setitems(self, marker(self));
5024}
5025
5026static int
5027load_build(UnpicklerObject *self)
5028{
5029 PyObject *state, *inst, *slotstate;
5030 PyObject *setstate;
5031 int status = 0;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005032 _Py_IDENTIFIER(__setstate__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005033
5034 /* Stack is ... instance, state. We want to leave instance at
5035 * the stack top, possibly mutated via instance.__setstate__(state).
5036 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005037 if (Py_SIZE(self->stack) < 2)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005038 return stack_underflow();
5039
5040 PDATA_POP(self->stack, state);
5041 if (state == NULL)
5042 return -1;
5043
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005044 inst = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005045
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005046 setstate = _PyObject_GetAttrId(inst, &PyId___setstate__);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005047 if (setstate == NULL) {
5048 if (PyErr_ExceptionMatches(PyExc_AttributeError))
5049 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00005050 else {
5051 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005052 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00005053 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005054 }
5055 else {
5056 PyObject *result;
5057
5058 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005059 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Antoine Pitroud79dc622008-09-05 00:03:33 +00005060 reference to state first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005061 result = _Unpickler_FastCall(self, setstate, state);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005062 Py_DECREF(setstate);
5063 if (result == NULL)
5064 return -1;
5065 Py_DECREF(result);
5066 return 0;
5067 }
5068
5069 /* A default __setstate__. First see whether state embeds a
5070 * slot state dict too (a proto 2 addition).
5071 */
5072 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
5073 PyObject *tmp = state;
5074
5075 state = PyTuple_GET_ITEM(tmp, 0);
5076 slotstate = PyTuple_GET_ITEM(tmp, 1);
5077 Py_INCREF(state);
5078 Py_INCREF(slotstate);
5079 Py_DECREF(tmp);
5080 }
5081 else
5082 slotstate = NULL;
5083
5084 /* Set inst.__dict__ from the state dict (if any). */
5085 if (state != Py_None) {
5086 PyObject *dict;
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005087 PyObject *d_key, *d_value;
5088 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005089 _Py_IDENTIFIER(__dict__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005090
5091 if (!PyDict_Check(state)) {
5092 PyErr_SetString(UnpicklingError, "state is not a dictionary");
5093 goto error;
5094 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005095 dict = _PyObject_GetAttrId(inst, &PyId___dict__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005096 if (dict == NULL)
5097 goto error;
5098
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005099 i = 0;
5100 while (PyDict_Next(state, &i, &d_key, &d_value)) {
5101 /* normally the keys for instance attributes are
5102 interned. we should try to do that here. */
5103 Py_INCREF(d_key);
5104 if (PyUnicode_CheckExact(d_key))
5105 PyUnicode_InternInPlace(&d_key);
5106 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
5107 Py_DECREF(d_key);
5108 goto error;
5109 }
5110 Py_DECREF(d_key);
5111 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005112 Py_DECREF(dict);
5113 }
5114
5115 /* Also set instance attributes from the slotstate dict (if any). */
5116 if (slotstate != NULL) {
5117 PyObject *d_key, *d_value;
5118 Py_ssize_t i;
5119
5120 if (!PyDict_Check(slotstate)) {
5121 PyErr_SetString(UnpicklingError,
5122 "slot state is not a dictionary");
5123 goto error;
5124 }
5125 i = 0;
5126 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
5127 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
5128 goto error;
5129 }
5130 }
5131
5132 if (0) {
5133 error:
5134 status = -1;
5135 }
5136
5137 Py_DECREF(state);
5138 Py_XDECREF(slotstate);
5139 return status;
5140}
5141
5142static int
5143load_mark(UnpicklerObject *self)
5144{
5145
5146 /* Note that we split the (pickle.py) stack into two stacks, an
5147 * object stack and a mark stack. Here we push a mark onto the
5148 * mark stack.
5149 */
5150
5151 if ((self->num_marks + 1) >= self->marks_size) {
5152 size_t alloc;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005153 Py_ssize_t *marks;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005154
5155 /* Use the size_t type to check for overflow. */
5156 alloc = ((size_t)self->num_marks << 1) + 20;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005157 if (alloc > (PY_SSIZE_T_MAX / sizeof(Py_ssize_t)) ||
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00005158 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005159 PyErr_NoMemory();
5160 return -1;
5161 }
5162
5163 if (self->marks == NULL)
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005164 marks = (Py_ssize_t *) PyMem_Malloc(alloc * sizeof(Py_ssize_t));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005165 else
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005166 marks = (Py_ssize_t *) PyMem_Realloc(self->marks,
5167 alloc * sizeof(Py_ssize_t));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005168 if (marks == NULL) {
5169 PyErr_NoMemory();
5170 return -1;
5171 }
5172 self->marks = marks;
5173 self->marks_size = (Py_ssize_t)alloc;
5174 }
5175
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005176 self->marks[self->num_marks++] = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005177
5178 return 0;
5179}
5180
5181static int
5182load_reduce(UnpicklerObject *self)
5183{
5184 PyObject *callable = NULL;
5185 PyObject *argtup = NULL;
5186 PyObject *obj = NULL;
5187
5188 PDATA_POP(self->stack, argtup);
5189 if (argtup == NULL)
5190 return -1;
5191 PDATA_POP(self->stack, callable);
5192 if (callable) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005193 obj = PyObject_CallObject(callable, argtup);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005194 Py_DECREF(callable);
5195 }
5196 Py_DECREF(argtup);
5197
5198 if (obj == NULL)
5199 return -1;
5200
5201 PDATA_PUSH(self->stack, obj, -1);
5202 return 0;
5203}
5204
5205/* Just raises an error if we don't know the protocol specified. PROTO
5206 * is the first opcode for protocols >= 2.
5207 */
5208static int
5209load_proto(UnpicklerObject *self)
5210{
5211 char *s;
5212 int i;
5213
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005214 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005215 return -1;
5216
5217 i = (unsigned char)s[0];
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005218 if (i <= HIGHEST_PROTOCOL) {
5219 self->proto = i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005220 return 0;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005221 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005222
5223 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
5224 return -1;
5225}
5226
5227static PyObject *
5228load(UnpicklerObject *self)
5229{
5230 PyObject *err;
5231 PyObject *value = NULL;
5232 char *s;
5233
5234 self->num_marks = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005235 if (Py_SIZE(self->stack))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005236 Pdata_clear(self->stack, 0);
5237
5238 /* Convenient macros for the dispatch while-switch loop just below. */
5239#define OP(opcode, load_func) \
5240 case opcode: if (load_func(self) < 0) break; continue;
5241
5242#define OP_ARG(opcode, load_func, arg) \
5243 case opcode: if (load_func(self, (arg)) < 0) break; continue;
5244
5245 while (1) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005246 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005247 break;
5248
5249 switch ((enum opcode)s[0]) {
5250 OP(NONE, load_none)
5251 OP(BININT, load_binint)
5252 OP(BININT1, load_binint1)
5253 OP(BININT2, load_binint2)
5254 OP(INT, load_int)
5255 OP(LONG, load_long)
5256 OP_ARG(LONG1, load_counted_long, 1)
5257 OP_ARG(LONG4, load_counted_long, 4)
5258 OP(FLOAT, load_float)
5259 OP(BINFLOAT, load_binfloat)
5260 OP(BINBYTES, load_binbytes)
5261 OP(SHORT_BINBYTES, load_short_binbytes)
5262 OP(BINSTRING, load_binstring)
5263 OP(SHORT_BINSTRING, load_short_binstring)
5264 OP(STRING, load_string)
5265 OP(UNICODE, load_unicode)
5266 OP(BINUNICODE, load_binunicode)
5267 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
5268 OP_ARG(TUPLE1, load_counted_tuple, 1)
5269 OP_ARG(TUPLE2, load_counted_tuple, 2)
5270 OP_ARG(TUPLE3, load_counted_tuple, 3)
5271 OP(TUPLE, load_tuple)
5272 OP(EMPTY_LIST, load_empty_list)
5273 OP(LIST, load_list)
5274 OP(EMPTY_DICT, load_empty_dict)
5275 OP(DICT, load_dict)
5276 OP(OBJ, load_obj)
5277 OP(INST, load_inst)
5278 OP(NEWOBJ, load_newobj)
5279 OP(GLOBAL, load_global)
5280 OP(APPEND, load_append)
5281 OP(APPENDS, load_appends)
5282 OP(BUILD, load_build)
5283 OP(DUP, load_dup)
5284 OP(BINGET, load_binget)
5285 OP(LONG_BINGET, load_long_binget)
5286 OP(GET, load_get)
5287 OP(MARK, load_mark)
5288 OP(BINPUT, load_binput)
5289 OP(LONG_BINPUT, load_long_binput)
5290 OP(PUT, load_put)
5291 OP(POP, load_pop)
5292 OP(POP_MARK, load_pop_mark)
5293 OP(SETITEM, load_setitem)
5294 OP(SETITEMS, load_setitems)
5295 OP(PERSID, load_persid)
5296 OP(BINPERSID, load_binpersid)
5297 OP(REDUCE, load_reduce)
5298 OP(PROTO, load_proto)
5299 OP_ARG(EXT1, load_extension, 1)
5300 OP_ARG(EXT2, load_extension, 2)
5301 OP_ARG(EXT4, load_extension, 4)
5302 OP_ARG(NEWTRUE, load_bool, Py_True)
5303 OP_ARG(NEWFALSE, load_bool, Py_False)
5304
5305 case STOP:
5306 break;
5307
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005308 default:
Benjamin Petersonadde86d2011-09-23 13:41:41 -04005309 if (s[0] == '\0')
5310 PyErr_SetNone(PyExc_EOFError);
5311 else
5312 PyErr_Format(UnpicklingError,
5313 "invalid load key, '%c'.", s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005314 return NULL;
5315 }
5316
5317 break; /* and we are done! */
5318 }
5319
Antoine Pitrou04248a82010-10-12 20:51:21 +00005320 if (_Unpickler_SkipConsumed(self) < 0)
5321 return NULL;
5322
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005323 /* XXX: It is not clear what this is actually for. */
5324 if ((err = PyErr_Occurred())) {
5325 if (err == PyExc_EOFError) {
5326 PyErr_SetNone(PyExc_EOFError);
5327 }
5328 return NULL;
5329 }
5330
5331 PDATA_POP(self->stack, value);
5332 return value;
5333}
5334
5335PyDoc_STRVAR(Unpickler_load_doc,
5336"load() -> object. Load a pickle."
5337"\n"
5338"Read a pickled object representation from the open file object given in\n"
5339"the constructor, and return the reconstituted object hierarchy specified\n"
5340"therein.\n");
5341
5342static PyObject *
5343Unpickler_load(UnpicklerObject *self)
5344{
5345 /* Check whether the Unpickler was initialized correctly. This prevents
5346 segfaulting if a subclass overridden __init__ with a function that does
5347 not call Unpickler.__init__(). Here, we simply ensure that self->read
5348 is not NULL. */
5349 if (self->read == NULL) {
Victor Stinner121aab42011-09-29 23:40:53 +02005350 PyErr_Format(UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005351 "Unpickler.__init__() was not called by %s.__init__()",
5352 Py_TYPE(self)->tp_name);
5353 return NULL;
5354 }
5355
5356 return load(self);
5357}
5358
5359/* The name of find_class() is misleading. In newer pickle protocols, this
5360 function is used for loading any global (i.e., functions), not just
5361 classes. The name is kept only for backward compatibility. */
5362
5363PyDoc_STRVAR(Unpickler_find_class_doc,
5364"find_class(module_name, global_name) -> object.\n"
5365"\n"
5366"Return an object from a specified module, importing the module if\n"
5367"necessary. Subclasses may override this method (e.g. to restrict\n"
5368"unpickling of arbitrary classes and functions).\n"
5369"\n"
5370"This method is called whenever a class or a function object is\n"
5371"needed. Both arguments passed are str objects.\n");
5372
5373static PyObject *
5374Unpickler_find_class(UnpicklerObject *self, PyObject *args)
5375{
5376 PyObject *global;
5377 PyObject *modules_dict;
5378 PyObject *module;
5379 PyObject *module_name, *global_name;
5380
5381 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
5382 &module_name, &global_name))
5383 return NULL;
5384
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005385 /* Try to map the old names used in Python 2.x to the new ones used in
5386 Python 3.x. We do this only with old pickle protocols and when the
5387 user has not disabled the feature. */
5388 if (self->proto < 3 && self->fix_imports) {
5389 PyObject *key;
5390 PyObject *item;
5391
5392 /* Check if the global (i.e., a function or a class) was renamed
5393 or moved to another module. */
5394 key = PyTuple_Pack(2, module_name, global_name);
5395 if (key == NULL)
5396 return NULL;
5397 item = PyDict_GetItemWithError(name_mapping_2to3, key);
5398 Py_DECREF(key);
5399 if (item) {
5400 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
5401 PyErr_Format(PyExc_RuntimeError,
5402 "_compat_pickle.NAME_MAPPING values should be "
5403 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
5404 return NULL;
5405 }
5406 module_name = PyTuple_GET_ITEM(item, 0);
5407 global_name = PyTuple_GET_ITEM(item, 1);
5408 if (!PyUnicode_Check(module_name) ||
5409 !PyUnicode_Check(global_name)) {
5410 PyErr_Format(PyExc_RuntimeError,
5411 "_compat_pickle.NAME_MAPPING values should be "
5412 "pairs of str, not (%.200s, %.200s)",
5413 Py_TYPE(module_name)->tp_name,
5414 Py_TYPE(global_name)->tp_name);
5415 return NULL;
5416 }
5417 }
5418 else if (PyErr_Occurred()) {
5419 return NULL;
5420 }
5421
5422 /* Check if the module was renamed. */
5423 item = PyDict_GetItemWithError(import_mapping_2to3, module_name);
5424 if (item) {
5425 if (!PyUnicode_Check(item)) {
5426 PyErr_Format(PyExc_RuntimeError,
5427 "_compat_pickle.IMPORT_MAPPING values should be "
5428 "strings, not %.200s", Py_TYPE(item)->tp_name);
5429 return NULL;
5430 }
5431 module_name = item;
5432 }
5433 else if (PyErr_Occurred()) {
5434 return NULL;
5435 }
5436 }
5437
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005438 modules_dict = PySys_GetObject("modules");
5439 if (modules_dict == NULL)
5440 return NULL;
5441
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005442 module = PyDict_GetItemWithError(modules_dict, module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005443 if (module == NULL) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005444 if (PyErr_Occurred())
5445 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005446 module = PyImport_Import(module_name);
5447 if (module == NULL)
5448 return NULL;
5449 global = PyObject_GetAttr(module, global_name);
5450 Py_DECREF(module);
5451 }
Victor Stinner121aab42011-09-29 23:40:53 +02005452 else {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005453 global = PyObject_GetAttr(module, global_name);
5454 }
5455 return global;
5456}
5457
5458static struct PyMethodDef Unpickler_methods[] = {
5459 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
5460 Unpickler_load_doc},
5461 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
5462 Unpickler_find_class_doc},
5463 {NULL, NULL} /* sentinel */
5464};
5465
5466static void
5467Unpickler_dealloc(UnpicklerObject *self)
5468{
5469 PyObject_GC_UnTrack((PyObject *)self);
5470 Py_XDECREF(self->readline);
5471 Py_XDECREF(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005472 Py_XDECREF(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005473 Py_XDECREF(self->stack);
5474 Py_XDECREF(self->pers_func);
5475 Py_XDECREF(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005476 if (self->buffer.buf != NULL) {
5477 PyBuffer_Release(&self->buffer);
5478 self->buffer.buf = NULL;
5479 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005480
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005481 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005482 PyMem_Free(self->marks);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005483 PyMem_Free(self->input_line);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005484 free(self->encoding);
5485 free(self->errors);
5486
5487 Py_TYPE(self)->tp_free((PyObject *)self);
5488}
5489
5490static int
5491Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
5492{
5493 Py_VISIT(self->readline);
5494 Py_VISIT(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005495 Py_VISIT(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005496 Py_VISIT(self->stack);
5497 Py_VISIT(self->pers_func);
5498 Py_VISIT(self->arg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005499 return 0;
5500}
5501
5502static int
5503Unpickler_clear(UnpicklerObject *self)
5504{
5505 Py_CLEAR(self->readline);
5506 Py_CLEAR(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005507 Py_CLEAR(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005508 Py_CLEAR(self->stack);
5509 Py_CLEAR(self->pers_func);
5510 Py_CLEAR(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005511 if (self->buffer.buf != NULL) {
5512 PyBuffer_Release(&self->buffer);
5513 self->buffer.buf = NULL;
5514 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005515
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005516 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005517 PyMem_Free(self->marks);
5518 self->marks = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005519 PyMem_Free(self->input_line);
5520 self->input_line = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005521 free(self->encoding);
5522 self->encoding = NULL;
5523 free(self->errors);
5524 self->errors = NULL;
5525
5526 return 0;
5527}
5528
5529PyDoc_STRVAR(Unpickler_doc,
5530"Unpickler(file, *, encoding='ASCII', errors='strict')"
5531"\n"
5532"This takes a binary file for reading a pickle data stream.\n"
5533"\n"
5534"The protocol version of the pickle is detected automatically, so no\n"
5535"proto argument is needed.\n"
5536"\n"
5537"The file-like object must have two methods, a read() method\n"
5538"that takes an integer argument, and a readline() method that\n"
5539"requires no arguments. Both methods should return bytes.\n"
5540"Thus file-like object can be a binary file object opened for\n"
5541"reading, a BytesIO object, or any other custom object that\n"
5542"meets this interface.\n"
5543"\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005544"Optional keyword arguments are *fix_imports*, *encoding* and *errors*,\n"
5545"which are used to control compatiblity support for pickle stream\n"
5546"generated by Python 2.x. If *fix_imports* is True, pickle will try to\n"
5547"map the old Python 2.x names to the new names used in Python 3.x. The\n"
5548"*encoding* and *errors* tell pickle how to decode 8-bit string\n"
5549"instances pickled by Python 2.x; these default to 'ASCII' and\n"
5550"'strict', respectively.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005551
5552static int
5553Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
5554{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005555 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005556 PyObject *file;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005557 PyObject *fix_imports = Py_True;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005558 char *encoding = NULL;
5559 char *errors = NULL;
5560
5561 /* XXX: That is an horrible error message. But, I don't know how to do
5562 better... */
5563 if (Py_SIZE(args) != 1) {
5564 PyErr_Format(PyExc_TypeError,
5565 "%s takes exactly one positional argument (%zd given)",
5566 Py_TYPE(self)->tp_name, Py_SIZE(args));
5567 return -1;
5568 }
5569
5570 /* Arguments parsing needs to be done in the __init__() method to allow
5571 subclasses to define their own __init__() method, which may (or may
5572 not) support Unpickler arguments. However, this means we need to be
5573 extra careful in the other Unpickler methods, since a subclass could
5574 forget to call Unpickler.__init__() thus breaking our internal
5575 invariants. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005576 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:Unpickler", kwlist,
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005577 &file, &fix_imports, &encoding, &errors))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005578 return -1;
5579
5580 /* In case of multiple __init__() calls, clear previous content. */
5581 if (self->read != NULL)
5582 (void)Unpickler_clear(self);
5583
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005584 if (_Unpickler_SetInputStream(self, file) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005585 return -1;
5586
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005587 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005588 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005589
5590 self->fix_imports = PyObject_IsTrue(fix_imports);
5591 if (self->fix_imports == -1)
5592 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005593
5594 if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005595 _Py_IDENTIFIER(persistent_load);
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005596 self->pers_func = _PyObject_GetAttrId((PyObject *)self,
5597 &PyId_persistent_load);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005598 if (self->pers_func == NULL)
5599 return -1;
5600 }
5601 else {
5602 self->pers_func = NULL;
5603 }
5604
5605 self->stack = (Pdata *)Pdata_New();
5606 if (self->stack == NULL)
5607 return -1;
5608
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005609 self->memo_size = 32;
5610 self->memo = _Unpickler_NewMemo(self->memo_size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005611 if (self->memo == NULL)
5612 return -1;
5613
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005614 self->arg = NULL;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005615 self->proto = 0;
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005616
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005617 return 0;
5618}
5619
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005620/* Define a proxy object for the Unpickler's internal memo object. This is to
5621 * avoid breaking code like:
5622 * unpickler.memo.clear()
5623 * and
5624 * unpickler.memo = saved_memo
5625 * Is this a good idea? Not really, but we don't want to break code that uses
5626 * it. Note that we don't implement the entire mapping API here. This is
5627 * intentional, as these should be treated as black-box implementation details.
5628 *
5629 * We do, however, have to implement pickling/unpickling support because of
Victor Stinner121aab42011-09-29 23:40:53 +02005630 * real-world code like cvs2svn.
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005631 */
5632
5633typedef struct {
5634 PyObject_HEAD
5635 UnpicklerObject *unpickler;
5636} UnpicklerMemoProxyObject;
5637
5638PyDoc_STRVAR(ump_clear_doc,
5639"memo.clear() -> None. Remove all items from memo.");
5640
5641static PyObject *
5642ump_clear(UnpicklerMemoProxyObject *self)
5643{
5644 _Unpickler_MemoCleanup(self->unpickler);
5645 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
5646 if (self->unpickler->memo == NULL)
5647 return NULL;
5648 Py_RETURN_NONE;
5649}
5650
5651PyDoc_STRVAR(ump_copy_doc,
5652"memo.copy() -> new_memo. Copy the memo to a new object.");
5653
5654static PyObject *
5655ump_copy(UnpicklerMemoProxyObject *self)
5656{
5657 Py_ssize_t i;
5658 PyObject *new_memo = PyDict_New();
5659 if (new_memo == NULL)
5660 return NULL;
5661
5662 for (i = 0; i < self->unpickler->memo_size; i++) {
5663 int status;
5664 PyObject *key, *value;
5665
5666 value = self->unpickler->memo[i];
5667 if (value == NULL)
5668 continue;
5669
5670 key = PyLong_FromSsize_t(i);
5671 if (key == NULL)
5672 goto error;
5673 status = PyDict_SetItem(new_memo, key, value);
5674 Py_DECREF(key);
5675 if (status < 0)
5676 goto error;
5677 }
5678 return new_memo;
5679
5680error:
5681 Py_DECREF(new_memo);
5682 return NULL;
5683}
5684
5685PyDoc_STRVAR(ump_reduce_doc,
5686"memo.__reduce__(). Pickling support.");
5687
5688static PyObject *
5689ump_reduce(UnpicklerMemoProxyObject *self, PyObject *args)
5690{
5691 PyObject *reduce_value;
5692 PyObject *constructor_args;
5693 PyObject *contents = ump_copy(self);
5694 if (contents == NULL)
5695 return NULL;
5696
5697 reduce_value = PyTuple_New(2);
5698 if (reduce_value == NULL) {
5699 Py_DECREF(contents);
5700 return NULL;
5701 }
5702 constructor_args = PyTuple_New(1);
5703 if (constructor_args == NULL) {
5704 Py_DECREF(contents);
5705 Py_DECREF(reduce_value);
5706 return NULL;
5707 }
5708 PyTuple_SET_ITEM(constructor_args, 0, contents);
5709 Py_INCREF((PyObject *)&PyDict_Type);
5710 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
5711 PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
5712 return reduce_value;
5713}
5714
5715static PyMethodDef unpicklerproxy_methods[] = {
5716 {"clear", (PyCFunction)ump_clear, METH_NOARGS, ump_clear_doc},
5717 {"copy", (PyCFunction)ump_copy, METH_NOARGS, ump_copy_doc},
5718 {"__reduce__", (PyCFunction)ump_reduce, METH_VARARGS, ump_reduce_doc},
5719 {NULL, NULL} /* sentinel */
5720};
5721
5722static void
5723UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
5724{
5725 PyObject_GC_UnTrack(self);
5726 Py_XDECREF(self->unpickler);
5727 PyObject_GC_Del((PyObject *)self);
5728}
5729
5730static int
5731UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
5732 visitproc visit, void *arg)
5733{
5734 Py_VISIT(self->unpickler);
5735 return 0;
5736}
5737
5738static int
5739UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
5740{
5741 Py_CLEAR(self->unpickler);
5742 return 0;
5743}
5744
5745static PyTypeObject UnpicklerMemoProxyType = {
5746 PyVarObject_HEAD_INIT(NULL, 0)
5747 "_pickle.UnpicklerMemoProxy", /*tp_name*/
5748 sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/
5749 0,
5750 (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */
5751 0, /* tp_print */
5752 0, /* tp_getattr */
5753 0, /* tp_setattr */
5754 0, /* tp_compare */
5755 0, /* tp_repr */
5756 0, /* tp_as_number */
5757 0, /* tp_as_sequence */
5758 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00005759 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005760 0, /* tp_call */
5761 0, /* tp_str */
5762 PyObject_GenericGetAttr, /* tp_getattro */
5763 PyObject_GenericSetAttr, /* tp_setattro */
5764 0, /* tp_as_buffer */
5765 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5766 0, /* tp_doc */
5767 (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */
5768 (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */
5769 0, /* tp_richcompare */
5770 0, /* tp_weaklistoffset */
5771 0, /* tp_iter */
5772 0, /* tp_iternext */
5773 unpicklerproxy_methods, /* tp_methods */
5774};
5775
5776static PyObject *
5777UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
5778{
5779 UnpicklerMemoProxyObject *self;
5780
5781 self = PyObject_GC_New(UnpicklerMemoProxyObject,
5782 &UnpicklerMemoProxyType);
5783 if (self == NULL)
5784 return NULL;
5785 Py_INCREF(unpickler);
5786 self->unpickler = unpickler;
5787 PyObject_GC_Track(self);
5788 return (PyObject *)self;
5789}
5790
5791/*****************************************************************************/
5792
5793
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005794static PyObject *
5795Unpickler_get_memo(UnpicklerObject *self)
5796{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005797 return UnpicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005798}
5799
5800static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005801Unpickler_set_memo(UnpicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005802{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005803 PyObject **new_memo;
5804 Py_ssize_t new_memo_size = 0;
5805 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005806
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005807 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005808 PyErr_SetString(PyExc_TypeError,
5809 "attribute deletion is not supported");
5810 return -1;
5811 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005812
5813 if (Py_TYPE(obj) == &UnpicklerMemoProxyType) {
5814 UnpicklerObject *unpickler =
5815 ((UnpicklerMemoProxyObject *)obj)->unpickler;
5816
5817 new_memo_size = unpickler->memo_size;
5818 new_memo = _Unpickler_NewMemo(new_memo_size);
5819 if (new_memo == NULL)
5820 return -1;
5821
5822 for (i = 0; i < new_memo_size; i++) {
5823 Py_XINCREF(unpickler->memo[i]);
5824 new_memo[i] = unpickler->memo[i];
5825 }
5826 }
5827 else if (PyDict_Check(obj)) {
5828 Py_ssize_t i = 0;
5829 PyObject *key, *value;
5830
5831 new_memo_size = PyDict_Size(obj);
5832 new_memo = _Unpickler_NewMemo(new_memo_size);
5833 if (new_memo == NULL)
5834 return -1;
5835
5836 while (PyDict_Next(obj, &i, &key, &value)) {
5837 Py_ssize_t idx;
5838 if (!PyLong_Check(key)) {
5839 PyErr_SetString(PyExc_TypeError,
5840 "memo key must be integers");
5841 goto error;
5842 }
5843 idx = PyLong_AsSsize_t(key);
5844 if (idx == -1 && PyErr_Occurred())
5845 goto error;
5846 if (_Unpickler_MemoPut(self, idx, value) < 0)
5847 goto error;
5848 }
5849 }
5850 else {
5851 PyErr_Format(PyExc_TypeError,
5852 "'memo' attribute must be an UnpicklerMemoProxy object"
5853 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005854 return -1;
5855 }
5856
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005857 _Unpickler_MemoCleanup(self);
5858 self->memo_size = new_memo_size;
5859 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005860
5861 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005862
5863 error:
5864 if (new_memo_size) {
5865 i = new_memo_size;
5866 while (--i >= 0) {
5867 Py_XDECREF(new_memo[i]);
5868 }
5869 PyMem_FREE(new_memo);
5870 }
5871 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005872}
5873
5874static PyObject *
5875Unpickler_get_persload(UnpicklerObject *self)
5876{
5877 if (self->pers_func == NULL)
5878 PyErr_SetString(PyExc_AttributeError, "persistent_load");
5879 else
5880 Py_INCREF(self->pers_func);
5881 return self->pers_func;
5882}
5883
5884static int
5885Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
5886{
5887 PyObject *tmp;
5888
5889 if (value == NULL) {
5890 PyErr_SetString(PyExc_TypeError,
5891 "attribute deletion is not supported");
5892 return -1;
5893 }
5894 if (!PyCallable_Check(value)) {
5895 PyErr_SetString(PyExc_TypeError,
5896 "persistent_load must be a callable taking "
5897 "one argument");
5898 return -1;
5899 }
5900
5901 tmp = self->pers_func;
5902 Py_INCREF(value);
5903 self->pers_func = value;
5904 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
5905
5906 return 0;
5907}
5908
5909static PyGetSetDef Unpickler_getsets[] = {
5910 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
5911 {"persistent_load", (getter)Unpickler_get_persload,
5912 (setter)Unpickler_set_persload},
5913 {NULL}
5914};
5915
5916static PyTypeObject Unpickler_Type = {
5917 PyVarObject_HEAD_INIT(NULL, 0)
5918 "_pickle.Unpickler", /*tp_name*/
5919 sizeof(UnpicklerObject), /*tp_basicsize*/
5920 0, /*tp_itemsize*/
5921 (destructor)Unpickler_dealloc, /*tp_dealloc*/
5922 0, /*tp_print*/
5923 0, /*tp_getattr*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005924 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00005925 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005926 0, /*tp_repr*/
5927 0, /*tp_as_number*/
5928 0, /*tp_as_sequence*/
5929 0, /*tp_as_mapping*/
5930 0, /*tp_hash*/
5931 0, /*tp_call*/
5932 0, /*tp_str*/
5933 0, /*tp_getattro*/
5934 0, /*tp_setattro*/
5935 0, /*tp_as_buffer*/
5936 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5937 Unpickler_doc, /*tp_doc*/
5938 (traverseproc)Unpickler_traverse, /*tp_traverse*/
5939 (inquiry)Unpickler_clear, /*tp_clear*/
5940 0, /*tp_richcompare*/
5941 0, /*tp_weaklistoffset*/
5942 0, /*tp_iter*/
5943 0, /*tp_iternext*/
5944 Unpickler_methods, /*tp_methods*/
5945 0, /*tp_members*/
5946 Unpickler_getsets, /*tp_getset*/
5947 0, /*tp_base*/
5948 0, /*tp_dict*/
5949 0, /*tp_descr_get*/
5950 0, /*tp_descr_set*/
5951 0, /*tp_dictoffset*/
5952 (initproc)Unpickler_init, /*tp_init*/
5953 PyType_GenericAlloc, /*tp_alloc*/
5954 PyType_GenericNew, /*tp_new*/
5955 PyObject_GC_Del, /*tp_free*/
5956 0, /*tp_is_gc*/
5957};
5958
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005959PyDoc_STRVAR(pickle_dump_doc,
5960"dump(obj, file, protocol=None, *, fix_imports=True) -> None\n"
5961"\n"
5962"Write a pickled representation of obj to the open file object file. This\n"
5963"is equivalent to ``Pickler(file, protocol).dump(obj)``, but may be more\n"
5964"efficient.\n"
5965"\n"
5966"The optional protocol argument tells the pickler to use the given protocol;\n"
5967"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
5968"backward-incompatible protocol designed for Python 3.0.\n"
5969"\n"
5970"Specifying a negative protocol version selects the highest protocol version\n"
5971"supported. The higher the protocol used, the more recent the version of\n"
5972"Python needed to read the pickle produced.\n"
5973"\n"
5974"The file argument must have a write() method that accepts a single bytes\n"
5975"argument. It can thus be a file object opened for binary writing, a\n"
5976"io.BytesIO instance, or any other custom object that meets this interface.\n"
5977"\n"
5978"If fix_imports is True and protocol is less than 3, pickle will try to\n"
5979"map the new Python 3.x names to the old module names used in Python 2.x,\n"
5980"so that the pickle data stream is readable with Python 2.x.\n");
5981
5982static PyObject *
5983pickle_dump(PyObject *self, PyObject *args, PyObject *kwds)
5984{
5985 static char *kwlist[] = {"obj", "file", "protocol", "fix_imports", 0};
5986 PyObject *obj;
5987 PyObject *file;
5988 PyObject *proto = NULL;
5989 PyObject *fix_imports = Py_True;
5990 PicklerObject *pickler;
5991
5992 /* fix_imports is a keyword-only argument. */
5993 if (Py_SIZE(args) > 3) {
5994 PyErr_Format(PyExc_TypeError,
5995 "pickle.dump() takes at most 3 positional "
5996 "argument (%zd given)", Py_SIZE(args));
5997 return NULL;
5998 }
5999
6000 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|OO:dump", kwlist,
6001 &obj, &file, &proto, &fix_imports))
6002 return NULL;
6003
6004 pickler = _Pickler_New();
6005 if (pickler == NULL)
6006 return NULL;
6007
6008 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
6009 goto error;
6010
6011 if (_Pickler_SetOutputStream(pickler, file) < 0)
6012 goto error;
6013
6014 if (dump(pickler, obj) < 0)
6015 goto error;
6016
6017 if (_Pickler_FlushToFile(pickler) < 0)
6018 goto error;
6019
6020 Py_DECREF(pickler);
6021 Py_RETURN_NONE;
6022
6023 error:
6024 Py_XDECREF(pickler);
6025 return NULL;
6026}
6027
6028PyDoc_STRVAR(pickle_dumps_doc,
6029"dumps(obj, protocol=None, *, fix_imports=True) -> bytes\n"
6030"\n"
6031"Return the pickled representation of the object as a bytes\n"
6032"object, instead of writing it to a file.\n"
6033"\n"
6034"The optional protocol argument tells the pickler to use the given protocol;\n"
6035"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
6036"backward-incompatible protocol designed for Python 3.0.\n"
6037"\n"
6038"Specifying a negative protocol version selects the highest protocol version\n"
6039"supported. The higher the protocol used, the more recent the version of\n"
6040"Python needed to read the pickle produced.\n"
6041"\n"
6042"If fix_imports is True and *protocol* is less than 3, pickle will try to\n"
6043"map the new Python 3.x names to the old module names used in Python 2.x,\n"
6044"so that the pickle data stream is readable with Python 2.x.\n");
6045
6046static PyObject *
6047pickle_dumps(PyObject *self, PyObject *args, PyObject *kwds)
6048{
6049 static char *kwlist[] = {"obj", "protocol", "fix_imports", 0};
6050 PyObject *obj;
6051 PyObject *proto = NULL;
6052 PyObject *result;
6053 PyObject *fix_imports = Py_True;
6054 PicklerObject *pickler;
6055
6056 /* fix_imports is a keyword-only argument. */
6057 if (Py_SIZE(args) > 2) {
6058 PyErr_Format(PyExc_TypeError,
6059 "pickle.dumps() takes at most 2 positional "
6060 "argument (%zd given)", Py_SIZE(args));
6061 return NULL;
6062 }
6063
6064 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:dumps", kwlist,
6065 &obj, &proto, &fix_imports))
6066 return NULL;
6067
6068 pickler = _Pickler_New();
6069 if (pickler == NULL)
6070 return NULL;
6071
6072 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
6073 goto error;
6074
6075 if (dump(pickler, obj) < 0)
6076 goto error;
6077
6078 result = _Pickler_GetString(pickler);
6079 Py_DECREF(pickler);
6080 return result;
6081
6082 error:
6083 Py_XDECREF(pickler);
6084 return NULL;
6085}
6086
6087PyDoc_STRVAR(pickle_load_doc,
6088"load(file, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6089"\n"
6090"Read a pickled object representation from the open file object file and\n"
6091"return the reconstituted object hierarchy specified therein. This is\n"
6092"equivalent to ``Unpickler(file).load()``, but may be more efficient.\n"
6093"\n"
6094"The protocol version of the pickle is detected automatically, so no protocol\n"
6095"argument is needed. Bytes past the pickled object's representation are\n"
6096"ignored.\n"
6097"\n"
6098"The argument file must have two methods, a read() method that takes an\n"
6099"integer argument, and a readline() method that requires no arguments. Both\n"
6100"methods should return bytes. Thus *file* can be a binary file object opened\n"
6101"for reading, a BytesIO object, or any other custom object that meets this\n"
6102"interface.\n"
6103"\n"
6104"Optional keyword arguments are fix_imports, encoding and errors,\n"
6105"which are used to control compatiblity support for pickle stream generated\n"
6106"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6107"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6108"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6109"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6110
6111static PyObject *
6112pickle_load(PyObject *self, PyObject *args, PyObject *kwds)
6113{
6114 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
6115 PyObject *file;
6116 PyObject *fix_imports = Py_True;
6117 PyObject *result;
6118 char *encoding = NULL;
6119 char *errors = NULL;
6120 UnpicklerObject *unpickler;
6121
6122 /* fix_imports, encoding and errors are a keyword-only argument. */
6123 if (Py_SIZE(args) != 1) {
6124 PyErr_Format(PyExc_TypeError,
6125 "pickle.load() takes exactly one positional "
6126 "argument (%zd given)", Py_SIZE(args));
6127 return NULL;
6128 }
6129
6130 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:load", kwlist,
6131 &file, &fix_imports, &encoding, &errors))
6132 return NULL;
6133
6134 unpickler = _Unpickler_New();
6135 if (unpickler == NULL)
6136 return NULL;
6137
6138 if (_Unpickler_SetInputStream(unpickler, file) < 0)
6139 goto error;
6140
6141 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6142 goto error;
6143
6144 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6145 if (unpickler->fix_imports == -1)
6146 goto error;
6147
6148 result = load(unpickler);
6149 Py_DECREF(unpickler);
6150 return result;
6151
6152 error:
6153 Py_XDECREF(unpickler);
6154 return NULL;
6155}
6156
6157PyDoc_STRVAR(pickle_loads_doc,
6158"loads(input, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6159"\n"
6160"Read a pickled object hierarchy from a bytes object and return the\n"
6161"reconstituted object hierarchy specified therein\n"
6162"\n"
6163"The protocol version of the pickle is detected automatically, so no protocol\n"
6164"argument is needed. Bytes past the pickled object's representation are\n"
6165"ignored.\n"
6166"\n"
6167"Optional keyword arguments are fix_imports, encoding and errors, which\n"
6168"are used to control compatiblity support for pickle stream generated\n"
6169"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6170"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6171"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6172"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6173
6174static PyObject *
6175pickle_loads(PyObject *self, PyObject *args, PyObject *kwds)
6176{
6177 static char *kwlist[] = {"input", "fix_imports", "encoding", "errors", 0};
6178 PyObject *input;
6179 PyObject *fix_imports = Py_True;
6180 PyObject *result;
6181 char *encoding = NULL;
6182 char *errors = NULL;
6183 UnpicklerObject *unpickler;
6184
6185 /* fix_imports, encoding and errors are a keyword-only argument. */
6186 if (Py_SIZE(args) != 1) {
6187 PyErr_Format(PyExc_TypeError,
6188 "pickle.loads() takes exactly one positional "
6189 "argument (%zd given)", Py_SIZE(args));
6190 return NULL;
6191 }
6192
6193 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:loads", kwlist,
6194 &input, &fix_imports, &encoding, &errors))
6195 return NULL;
6196
6197 unpickler = _Unpickler_New();
6198 if (unpickler == NULL)
6199 return NULL;
6200
6201 if (_Unpickler_SetStringInput(unpickler, input) < 0)
6202 goto error;
6203
6204 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6205 goto error;
6206
6207 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6208 if (unpickler->fix_imports == -1)
6209 goto error;
6210
6211 result = load(unpickler);
6212 Py_DECREF(unpickler);
6213 return result;
6214
6215 error:
6216 Py_XDECREF(unpickler);
6217 return NULL;
6218}
6219
6220
6221static struct PyMethodDef pickle_methods[] = {
6222 {"dump", (PyCFunction)pickle_dump, METH_VARARGS|METH_KEYWORDS,
6223 pickle_dump_doc},
6224 {"dumps", (PyCFunction)pickle_dumps, METH_VARARGS|METH_KEYWORDS,
6225 pickle_dumps_doc},
6226 {"load", (PyCFunction)pickle_load, METH_VARARGS|METH_KEYWORDS,
6227 pickle_load_doc},
6228 {"loads", (PyCFunction)pickle_loads, METH_VARARGS|METH_KEYWORDS,
6229 pickle_loads_doc},
6230 {NULL, NULL} /* sentinel */
6231};
6232
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006233static int
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006234initmodule(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006235{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006236 PyObject *copyreg = NULL;
6237 PyObject *compat_pickle = NULL;
6238
6239 /* XXX: We should ensure that the types of the dictionaries imported are
6240 exactly PyDict objects. Otherwise, it is possible to crash the pickle
6241 since we use the PyDict API directly to access these dictionaries. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006242
6243 copyreg = PyImport_ImportModule("copyreg");
6244 if (!copyreg)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006245 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006246 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
6247 if (!dispatch_table)
6248 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006249 extension_registry = \
6250 PyObject_GetAttrString(copyreg, "_extension_registry");
6251 if (!extension_registry)
6252 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006253 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
6254 if (!inverted_registry)
6255 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006256 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
6257 if (!extension_cache)
6258 goto error;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006259 Py_CLEAR(copyreg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006260
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006261 /* Load the 2.x -> 3.x stdlib module mapping tables */
6262 compat_pickle = PyImport_ImportModule("_compat_pickle");
6263 if (!compat_pickle)
6264 goto error;
6265 name_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
6266 if (!name_mapping_2to3)
6267 goto error;
6268 if (!PyDict_CheckExact(name_mapping_2to3)) {
6269 PyErr_Format(PyExc_RuntimeError,
6270 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
6271 Py_TYPE(name_mapping_2to3)->tp_name);
6272 goto error;
6273 }
6274 import_mapping_2to3 = PyObject_GetAttrString(compat_pickle,
6275 "IMPORT_MAPPING");
6276 if (!import_mapping_2to3)
6277 goto error;
6278 if (!PyDict_CheckExact(import_mapping_2to3)) {
6279 PyErr_Format(PyExc_RuntimeError,
6280 "_compat_pickle.IMPORT_MAPPING should be a dict, "
6281 "not %.200s", Py_TYPE(import_mapping_2to3)->tp_name);
6282 goto error;
6283 }
6284 /* ... and the 3.x -> 2.x mapping tables */
6285 name_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6286 "REVERSE_NAME_MAPPING");
6287 if (!name_mapping_3to2)
6288 goto error;
6289 if (!PyDict_CheckExact(name_mapping_3to2)) {
6290 PyErr_Format(PyExc_RuntimeError,
Ezio Melotti13925002011-03-16 11:05:33 +02006291 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006292 "not %.200s", Py_TYPE(name_mapping_3to2)->tp_name);
6293 goto error;
6294 }
6295 import_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6296 "REVERSE_IMPORT_MAPPING");
6297 if (!import_mapping_3to2)
6298 goto error;
6299 if (!PyDict_CheckExact(import_mapping_3to2)) {
6300 PyErr_Format(PyExc_RuntimeError,
6301 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
6302 "not %.200s", Py_TYPE(import_mapping_3to2)->tp_name);
6303 goto error;
6304 }
6305 Py_CLEAR(compat_pickle);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006306
6307 empty_tuple = PyTuple_New(0);
6308 if (empty_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006309 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006310 two_tuple = PyTuple_New(2);
6311 if (two_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006312 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006313 /* We use this temp container with no regard to refcounts, or to
6314 * keeping containees alive. Exempt from GC, because we don't
6315 * want anything looking at two_tuple() by magic.
6316 */
6317 PyObject_GC_UnTrack(two_tuple);
6318
6319 return 0;
6320
6321 error:
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006322 Py_CLEAR(copyreg);
6323 Py_CLEAR(dispatch_table);
6324 Py_CLEAR(extension_registry);
6325 Py_CLEAR(inverted_registry);
6326 Py_CLEAR(extension_cache);
6327 Py_CLEAR(compat_pickle);
6328 Py_CLEAR(name_mapping_2to3);
6329 Py_CLEAR(import_mapping_2to3);
6330 Py_CLEAR(name_mapping_3to2);
6331 Py_CLEAR(import_mapping_3to2);
6332 Py_CLEAR(empty_tuple);
6333 Py_CLEAR(two_tuple);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006334 return -1;
6335}
6336
6337static struct PyModuleDef _picklemodule = {
6338 PyModuleDef_HEAD_INIT,
6339 "_pickle",
6340 pickle_module_doc,
6341 -1,
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006342 pickle_methods,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006343 NULL,
6344 NULL,
6345 NULL,
6346 NULL
6347};
6348
6349PyMODINIT_FUNC
6350PyInit__pickle(void)
6351{
6352 PyObject *m;
6353
6354 if (PyType_Ready(&Unpickler_Type) < 0)
6355 return NULL;
6356 if (PyType_Ready(&Pickler_Type) < 0)
6357 return NULL;
6358 if (PyType_Ready(&Pdata_Type) < 0)
6359 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006360 if (PyType_Ready(&PicklerMemoProxyType) < 0)
6361 return NULL;
6362 if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
6363 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006364
6365 /* Create the module and add the functions. */
6366 m = PyModule_Create(&_picklemodule);
6367 if (m == NULL)
6368 return NULL;
6369
Antoine Pitrou8391cf42011-07-15 21:01:21 +02006370 Py_INCREF(&Pickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006371 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
6372 return NULL;
Antoine Pitrou8391cf42011-07-15 21:01:21 +02006373 Py_INCREF(&Unpickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006374 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
6375 return NULL;
6376
6377 /* Initialize the exceptions. */
6378 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
6379 if (PickleError == NULL)
6380 return NULL;
6381 PicklingError = \
6382 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
6383 if (PicklingError == NULL)
6384 return NULL;
6385 UnpicklingError = \
6386 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
6387 if (UnpicklingError == NULL)
6388 return NULL;
6389
6390 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
6391 return NULL;
6392 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
6393 return NULL;
6394 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
6395 return NULL;
6396
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006397 if (initmodule() < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006398 return NULL;
6399
6400 return m;
6401}