blob: f79fad3c391de81c6e1b86977f641d94120c790f [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000013/* Pickle opcodes. These must be kept updated with pickle.py.
14 Extensive docs are in pickletools.py. */
15enum opcode {
16 MARK = '(',
17 STOP = '.',
18 POP = '0',
19 POP_MARK = '1',
20 DUP = '2',
21 FLOAT = 'F',
22 INT = 'I',
23 BININT = 'J',
24 BININT1 = 'K',
25 LONG = 'L',
26 BININT2 = 'M',
27 NONE = 'N',
28 PERSID = 'P',
29 BINPERSID = 'Q',
30 REDUCE = 'R',
31 STRING = 'S',
32 BINSTRING = 'T',
33 SHORT_BINSTRING = 'U',
34 UNICODE = 'V',
35 BINUNICODE = 'X',
36 APPEND = 'a',
37 BUILD = 'b',
38 GLOBAL = 'c',
39 DICT = 'd',
40 EMPTY_DICT = '}',
41 APPENDS = 'e',
42 GET = 'g',
43 BINGET = 'h',
44 INST = 'i',
45 LONG_BINGET = 'j',
46 LIST = 'l',
47 EMPTY_LIST = ']',
48 OBJ = 'o',
49 PUT = 'p',
50 BINPUT = 'q',
51 LONG_BINPUT = 'r',
52 SETITEM = 's',
53 TUPLE = 't',
54 EMPTY_TUPLE = ')',
55 SETITEMS = 'u',
56 BINFLOAT = 'G',
57
58 /* Protocol 2. */
59 PROTO = '\x80',
60 NEWOBJ = '\x81',
61 EXT1 = '\x82',
62 EXT2 = '\x83',
63 EXT4 = '\x84',
64 TUPLE1 = '\x85',
65 TUPLE2 = '\x86',
66 TUPLE3 = '\x87',
67 NEWTRUE = '\x88',
68 NEWFALSE = '\x89',
69 LONG1 = '\x8a',
70 LONG4 = '\x8b',
71
72 /* Protocol 3 (Python 3.x) */
73 BINBYTES = 'B',
Victor Stinner132ef6c2010-11-09 09:39:41 +000074 SHORT_BINBYTES = 'C'
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000075};
76
77/* These aren't opcodes -- they're ways to pickle bools before protocol 2
78 * so that unpicklers written before bools were introduced unpickle them
79 * as ints, but unpicklers after can recognize that bools were intended.
80 * Note that protocol 2 added direct ways to pickle bools.
81 */
82#undef TRUE
83#define TRUE "I01\n"
84#undef FALSE
85#define FALSE "I00\n"
86
87enum {
88 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
89 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
90 break if this gets out of synch with pickle.py, but it's unclear that would
91 help anything either. */
92 BATCHSIZE = 1000,
93
94 /* Nesting limit until Pickler, when running in "fast mode", starts
95 checking for self-referential data-structures. */
96 FAST_NESTING_LIMIT = 50,
97
Antoine Pitrouea99c5c2010-09-09 18:33:21 +000098 /* Initial size of the write buffer of Pickler. */
99 WRITE_BUF_SIZE = 4096,
100
101 /* Maximum size of the write buffer of Pickler when pickling to a
102 stream. This is ignored for in-memory pickling. */
103 MAX_WRITE_BUF_SIZE = 64 * 1024,
Antoine Pitrou04248a82010-10-12 20:51:21 +0000104
105 /* Prefetch size when unpickling (disabled on unpeekable streams) */
Victor Stinner132ef6c2010-11-09 09:39:41 +0000106 PREFETCH = 8192 * 16
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000107};
108
109/* Exception classes for pickle. These should override the ones defined in
110 pickle.py, when the C-optimized Pickler and Unpickler are used. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000111static PyObject *PickleError = NULL;
112static PyObject *PicklingError = NULL;
113static PyObject *UnpicklingError = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000114
115/* copyreg.dispatch_table, {type_object: pickling_function} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000116static PyObject *dispatch_table = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000117/* For EXT[124] opcodes. */
118/* copyreg._extension_registry, {(module_name, function_name): code} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000119static PyObject *extension_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000120/* copyreg._inverted_registry, {code: (module_name, function_name)} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000121static PyObject *inverted_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000122/* copyreg._extension_cache, {code: object} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000123static PyObject *extension_cache = NULL;
124
125/* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */
126static PyObject *name_mapping_2to3 = NULL;
127/* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
128static PyObject *import_mapping_2to3 = NULL;
129/* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
130static PyObject *name_mapping_3to2 = NULL;
131static PyObject *import_mapping_3to2 = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000132
133/* XXX: Are these really nescessary? */
134/* As the name says, an empty tuple. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000135static PyObject *empty_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000136/* For looking up name pairs in copyreg._extension_registry. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000137static PyObject *two_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000138
139static int
140stack_underflow(void)
141{
142 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
143 return -1;
144}
145
146/* Internal data type used as the unpickling stack. */
147typedef struct {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000148 PyObject_VAR_HEAD
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000149 PyObject **data;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000150 Py_ssize_t allocated; /* number of slots in data allocated */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000151} Pdata;
152
153static void
154Pdata_dealloc(Pdata *self)
155{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200156 Py_ssize_t i = Py_SIZE(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000157 while (--i >= 0) {
158 Py_DECREF(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000159 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000160 PyMem_FREE(self->data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000161 PyObject_Del(self);
162}
163
164static PyTypeObject Pdata_Type = {
165 PyVarObject_HEAD_INIT(NULL, 0)
166 "_pickle.Pdata", /*tp_name*/
167 sizeof(Pdata), /*tp_basicsize*/
168 0, /*tp_itemsize*/
169 (destructor)Pdata_dealloc, /*tp_dealloc*/
170};
171
172static PyObject *
173Pdata_New(void)
174{
175 Pdata *self;
176
177 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
178 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000179 Py_SIZE(self) = 0;
180 self->allocated = 8;
181 self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000182 if (self->data)
183 return (PyObject *)self;
184 Py_DECREF(self);
185 return PyErr_NoMemory();
186}
187
188
189/* Retain only the initial clearto items. If clearto >= the current
190 * number of items, this is a (non-erroneous) NOP.
191 */
192static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200193Pdata_clear(Pdata *self, Py_ssize_t clearto)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000194{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200195 Py_ssize_t i = Py_SIZE(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000196
197 if (clearto < 0)
198 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000199 if (clearto >= i)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000200 return 0;
201
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000202 while (--i >= clearto) {
203 Py_CLEAR(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000204 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000205 Py_SIZE(self) = clearto;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000206 return 0;
207}
208
209static int
210Pdata_grow(Pdata *self)
211{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000212 PyObject **data = self->data;
213 Py_ssize_t allocated = self->allocated;
214 Py_ssize_t new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000215
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000216 new_allocated = (allocated >> 3) + 6;
217 /* check for integer overflow */
218 if (new_allocated > PY_SSIZE_T_MAX - allocated)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000219 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000220 new_allocated += allocated;
221 if (new_allocated > (PY_SSIZE_T_MAX / sizeof(PyObject *)))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000222 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000223 data = PyMem_REALLOC(data, new_allocated * sizeof(PyObject *));
224 if (data == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000225 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000226
227 self->data = data;
228 self->allocated = new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000229 return 0;
230
231 nomemory:
232 PyErr_NoMemory();
233 return -1;
234}
235
236/* D is a Pdata*. Pop the topmost element and store it into V, which
237 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
238 * is raised and V is set to NULL.
239 */
240static PyObject *
241Pdata_pop(Pdata *self)
242{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000243 if (Py_SIZE(self) == 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000244 PyErr_SetString(UnpicklingError, "bad pickle data");
245 return NULL;
246 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000247 return self->data[--Py_SIZE(self)];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000248}
249#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
250
251static int
252Pdata_push(Pdata *self, PyObject *obj)
253{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000254 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000255 return -1;
256 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000257 self->data[Py_SIZE(self)++] = obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000258 return 0;
259}
260
261/* Push an object on stack, transferring its ownership to the stack. */
262#define PDATA_PUSH(D, O, ER) do { \
263 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
264
265/* Push an object on stack, adding a new reference to the object. */
266#define PDATA_APPEND(D, O, ER) do { \
267 Py_INCREF((O)); \
268 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
269
270static PyObject *
271Pdata_poptuple(Pdata *self, Py_ssize_t start)
272{
273 PyObject *tuple;
274 Py_ssize_t len, i, j;
275
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000276 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000277 tuple = PyTuple_New(len);
278 if (tuple == NULL)
279 return NULL;
280 for (i = start, j = 0; j < len; i++, j++)
281 PyTuple_SET_ITEM(tuple, j, self->data[i]);
282
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000283 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000284 return tuple;
285}
286
287static PyObject *
288Pdata_poplist(Pdata *self, Py_ssize_t start)
289{
290 PyObject *list;
291 Py_ssize_t len, i, j;
292
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000293 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000294 list = PyList_New(len);
295 if (list == NULL)
296 return NULL;
297 for (i = start, j = 0; j < len; i++, j++)
298 PyList_SET_ITEM(list, j, self->data[i]);
299
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000300 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000301 return list;
302}
303
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000304typedef struct {
305 PyObject *me_key;
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200306 Py_ssize_t me_value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000307} PyMemoEntry;
308
309typedef struct {
310 Py_ssize_t mt_mask;
311 Py_ssize_t mt_used;
312 Py_ssize_t mt_allocated;
313 PyMemoEntry *mt_table;
314} PyMemoTable;
315
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000316typedef struct PicklerObject {
317 PyObject_HEAD
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000318 PyMemoTable *memo; /* Memo table, keep track of the seen
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000319 objects to support self-referential objects
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000320 pickling. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000321 PyObject *pers_func; /* persistent_id() method, can be NULL */
Antoine Pitrou8d3c2902012-03-04 18:31:48 +0100322 PyObject *dispatch_table; /* private dispatch_table, can be NULL */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000323 PyObject *arg;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000324
325 PyObject *write; /* write() method of the output stream. */
326 PyObject *output_buffer; /* Write into a local bytearray buffer before
327 flushing to the stream. */
328 Py_ssize_t output_len; /* Length of output_buffer. */
329 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000330 int proto; /* Pickle protocol number, >= 0 */
331 int bin; /* Boolean, true if proto > 0 */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200332 Py_ssize_t buf_size; /* Size of the current buffered pickle data */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000333 int fast; /* Enable fast mode if set to a true value.
334 The fast mode disable the usage of memo,
335 therefore speeding the pickling process by
336 not generating superfluous PUT opcodes. It
337 should not be used if with self-referential
338 objects. */
339 int fast_nesting;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000340 int fix_imports; /* Indicate whether Pickler should fix
341 the name of globals for Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000342 PyObject *fast_memo;
343} PicklerObject;
344
345typedef struct UnpicklerObject {
346 PyObject_HEAD
347 Pdata *stack; /* Pickle data stack, store unpickled objects. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000348
349 /* The unpickler memo is just an array of PyObject *s. Using a dict
350 is unnecessary, since the keys are contiguous ints. */
351 PyObject **memo;
352 Py_ssize_t memo_size;
353
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000354 PyObject *arg;
355 PyObject *pers_func; /* persistent_load() method, can be NULL. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000356
357 Py_buffer buffer;
358 char *input_buffer;
359 char *input_line;
360 Py_ssize_t input_len;
361 Py_ssize_t next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000362 Py_ssize_t prefetched_idx; /* index of first prefetched byte */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000363 PyObject *read; /* read() method of the input stream. */
364 PyObject *readline; /* readline() method of the input stream. */
Antoine Pitrou04248a82010-10-12 20:51:21 +0000365 PyObject *peek; /* peek() method of the input stream, or NULL */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000366
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000367 char *encoding; /* Name of the encoding to be used for
368 decoding strings pickled using Python
369 2.x. The default value is "ASCII" */
370 char *errors; /* Name of errors handling scheme to used when
371 decoding strings. The default value is
372 "strict". */
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -0500373 Py_ssize_t *marks; /* Mark stack, used for unpickling container
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000374 objects. */
375 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
376 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000377 int proto; /* Protocol of the pickle loaded. */
378 int fix_imports; /* Indicate whether Unpickler should fix
379 the name of globals pickled by Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000380} UnpicklerObject;
381
382/* Forward declarations */
383static int save(PicklerObject *, PyObject *, int);
384static int save_reduce(PicklerObject *, PyObject *, PyObject *);
385static PyTypeObject Pickler_Type;
386static PyTypeObject Unpickler_Type;
387
388
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000389/*************************************************************************
Serhiy Storchaka95949422013-08-27 19:40:23 +0300390 A custom hashtable mapping void* to Python ints. This is used by the pickler
391 for memoization. Using a custom hashtable rather than PyDict allows us to skip
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000392 a bunch of unnecessary object creation. This makes a huge performance
393 difference. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000394
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000395#define MT_MINSIZE 8
396#define PERTURB_SHIFT 5
397
398
399static PyMemoTable *
400PyMemoTable_New(void)
401{
402 PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
403 if (memo == NULL) {
404 PyErr_NoMemory();
405 return NULL;
406 }
407
408 memo->mt_used = 0;
409 memo->mt_allocated = MT_MINSIZE;
410 memo->mt_mask = MT_MINSIZE - 1;
411 memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
412 if (memo->mt_table == NULL) {
413 PyMem_FREE(memo);
414 PyErr_NoMemory();
415 return NULL;
416 }
417 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
418
419 return memo;
420}
421
422static PyMemoTable *
423PyMemoTable_Copy(PyMemoTable *self)
424{
425 Py_ssize_t i;
426 PyMemoTable *new = PyMemoTable_New();
427 if (new == NULL)
428 return NULL;
429
430 new->mt_used = self->mt_used;
431 new->mt_allocated = self->mt_allocated;
432 new->mt_mask = self->mt_mask;
433 /* The table we get from _New() is probably smaller than we wanted.
434 Free it and allocate one that's the right size. */
435 PyMem_FREE(new->mt_table);
436 new->mt_table = PyMem_MALLOC(self->mt_allocated * sizeof(PyMemoEntry));
437 if (new->mt_table == NULL) {
438 PyMem_FREE(new);
Victor Stinner42024562013-07-12 00:53:57 +0200439 PyErr_NoMemory();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000440 return NULL;
441 }
442 for (i = 0; i < self->mt_allocated; i++) {
443 Py_XINCREF(self->mt_table[i].me_key);
444 }
445 memcpy(new->mt_table, self->mt_table,
446 sizeof(PyMemoEntry) * self->mt_allocated);
447
448 return new;
449}
450
451static Py_ssize_t
452PyMemoTable_Size(PyMemoTable *self)
453{
454 return self->mt_used;
455}
456
457static int
458PyMemoTable_Clear(PyMemoTable *self)
459{
460 Py_ssize_t i = self->mt_allocated;
461
462 while (--i >= 0) {
463 Py_XDECREF(self->mt_table[i].me_key);
464 }
465 self->mt_used = 0;
466 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
467 return 0;
468}
469
470static void
471PyMemoTable_Del(PyMemoTable *self)
472{
473 if (self == NULL)
474 return;
475 PyMemoTable_Clear(self);
476
477 PyMem_FREE(self->mt_table);
478 PyMem_FREE(self);
479}
480
481/* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
482 can be considerably simpler than dictobject.c's lookdict(). */
483static PyMemoEntry *
484_PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
485{
486 size_t i;
487 size_t perturb;
488 size_t mask = (size_t)self->mt_mask;
489 PyMemoEntry *table = self->mt_table;
490 PyMemoEntry *entry;
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000491 Py_hash_t hash = (Py_hash_t)key >> 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000492
493 i = hash & mask;
494 entry = &table[i];
495 if (entry->me_key == NULL || entry->me_key == key)
496 return entry;
497
498 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
499 i = (i << 2) + i + perturb + 1;
500 entry = &table[i & mask];
501 if (entry->me_key == NULL || entry->me_key == key)
502 return entry;
503 }
504 assert(0); /* Never reached */
505 return NULL;
506}
507
508/* Returns -1 on failure, 0 on success. */
509static int
510_PyMemoTable_ResizeTable(PyMemoTable *self, Py_ssize_t min_size)
511{
512 PyMemoEntry *oldtable = NULL;
513 PyMemoEntry *oldentry, *newentry;
514 Py_ssize_t new_size = MT_MINSIZE;
515 Py_ssize_t to_process;
516
517 assert(min_size > 0);
518
519 /* Find the smallest valid table size >= min_size. */
520 while (new_size < min_size && new_size > 0)
521 new_size <<= 1;
522 if (new_size <= 0) {
523 PyErr_NoMemory();
524 return -1;
525 }
526 /* new_size needs to be a power of two. */
527 assert((new_size & (new_size - 1)) == 0);
528
529 /* Allocate new table. */
530 oldtable = self->mt_table;
531 self->mt_table = PyMem_MALLOC(new_size * sizeof(PyMemoEntry));
532 if (self->mt_table == NULL) {
Victor Stinner8ca72e22013-07-12 00:53:26 +0200533 self->mt_table = oldtable;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000534 PyErr_NoMemory();
535 return -1;
536 }
537 self->mt_allocated = new_size;
538 self->mt_mask = new_size - 1;
539 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
540
541 /* Copy entries from the old table. */
542 to_process = self->mt_used;
543 for (oldentry = oldtable; to_process > 0; oldentry++) {
544 if (oldentry->me_key != NULL) {
545 to_process--;
546 /* newentry is a pointer to a chunk of the new
547 mt_table, so we're setting the key:value pair
548 in-place. */
549 newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
550 newentry->me_key = oldentry->me_key;
551 newentry->me_value = oldentry->me_value;
552 }
553 }
554
555 /* Deallocate the old table. */
556 PyMem_FREE(oldtable);
557 return 0;
558}
559
560/* Returns NULL on failure, a pointer to the value otherwise. */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200561static Py_ssize_t *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000562PyMemoTable_Get(PyMemoTable *self, PyObject *key)
563{
564 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
565 if (entry->me_key == NULL)
566 return NULL;
567 return &entry->me_value;
568}
569
570/* Returns -1 on failure, 0 on success. */
571static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200572PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000573{
574 PyMemoEntry *entry;
575
576 assert(key != NULL);
577
578 entry = _PyMemoTable_Lookup(self, key);
579 if (entry->me_key != NULL) {
580 entry->me_value = value;
581 return 0;
582 }
583 Py_INCREF(key);
584 entry->me_key = key;
585 entry->me_value = value;
586 self->mt_used++;
587
588 /* If we added a key, we can safely resize. Otherwise just return!
589 * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
590 *
591 * Quadrupling the size improves average table sparseness
592 * (reducing collisions) at the cost of some memory. It also halves
593 * the number of expensive resize operations in a growing memo table.
594 *
595 * Very large memo tables (over 50K items) use doubling instead.
596 * This may help applications with severe memory constraints.
597 */
598 if (!(self->mt_used * 3 >= (self->mt_mask + 1) * 2))
599 return 0;
600 return _PyMemoTable_ResizeTable(self,
601 (self->mt_used > 50000 ? 2 : 4) * self->mt_used);
602}
603
604#undef MT_MINSIZE
605#undef PERTURB_SHIFT
606
607/*************************************************************************/
608
609/* Helpers for creating the argument tuple passed to functions. This has the
Victor Stinner121aab42011-09-29 23:40:53 +0200610 performance advantage of calling PyTuple_New() only once.
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000611
612 XXX(avassalotti): Inline directly in _Pickler_FastCall() and
613 _Unpickler_FastCall(). */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000614#define ARG_TUP(self, obj) do { \
615 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
616 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
617 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
618 } \
619 else { \
620 Py_DECREF((obj)); \
621 } \
622 } while (0)
623
624#define FREE_ARG_TUP(self) do { \
625 if ((self)->arg->ob_refcnt > 1) \
626 Py_CLEAR((self)->arg); \
627 } while (0)
628
629/* A temporary cleaner API for fast single argument function call.
630
631 XXX: Does caching the argument tuple provides any real performance benefits?
632
633 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
634 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
635 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
636 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
637 (i.e, call PyTuple_New() and store the returned value in an array), to save
638 one second (wall clock time). Either ways, the loading time a pickle stream
639 large enough to generate this number of calls would be massively
640 overwhelmed by other factors, like I/O throughput, the GC traversal and
641 object allocation overhead. So, I really doubt these functions provide any
642 real benefits.
643
644 On the other hand, oprofile reports that pickle spends a lot of time in
645 these functions. But, that is probably more related to the function call
646 overhead, than the argument tuple allocation.
647
648 XXX: And, what is the reference behavior of these? Steal, borrow? At first
649 glance, it seems to steal the reference of 'arg' and borrow the reference
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000650 of 'func'. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000651static PyObject *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000652_Pickler_FastCall(PicklerObject *self, PyObject *func, PyObject *arg)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000653{
654 PyObject *result = NULL;
655
656 ARG_TUP(self, arg);
657 if (self->arg) {
658 result = PyObject_Call(func, self->arg, NULL);
659 FREE_ARG_TUP(self);
660 }
661 return result;
662}
663
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000664static int
665_Pickler_ClearBuffer(PicklerObject *self)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000666{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000667 Py_CLEAR(self->output_buffer);
668 self->output_buffer =
669 PyBytes_FromStringAndSize(NULL, self->max_output_len);
670 if (self->output_buffer == NULL)
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000671 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000672 self->output_len = 0;
673 return 0;
674}
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000675
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000676static PyObject *
677_Pickler_GetString(PicklerObject *self)
678{
679 PyObject *output_buffer = self->output_buffer;
680
681 assert(self->output_buffer != NULL);
682 self->output_buffer = NULL;
683 /* Resize down to exact size */
684 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
685 return NULL;
686 return output_buffer;
687}
688
689static int
690_Pickler_FlushToFile(PicklerObject *self)
691{
692 PyObject *output, *result;
693
694 assert(self->write != NULL);
695
696 output = _Pickler_GetString(self);
697 if (output == NULL)
698 return -1;
699
700 result = _Pickler_FastCall(self, self->write, output);
701 Py_XDECREF(result);
702 return (result == NULL) ? -1 : 0;
703}
704
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200705static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000706_Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t n)
707{
708 Py_ssize_t i, required;
709 char *buffer;
710
711 assert(s != NULL);
712
713 required = self->output_len + n;
714 if (required > self->max_output_len) {
715 if (self->write != NULL && required > MAX_WRITE_BUF_SIZE) {
716 /* XXX This reallocates a new buffer every time, which is a bit
717 wasteful. */
718 if (_Pickler_FlushToFile(self) < 0)
719 return -1;
720 if (_Pickler_ClearBuffer(self) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000721 return -1;
722 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000723 if (self->write != NULL && n > MAX_WRITE_BUF_SIZE) {
724 /* we already flushed above, so the buffer is empty */
725 PyObject *result;
726 /* XXX we could spare an intermediate copy and pass
727 a memoryview instead */
728 PyObject *output = PyBytes_FromStringAndSize(s, n);
729 if (s == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000730 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000731 result = _Pickler_FastCall(self, self->write, output);
732 Py_XDECREF(result);
733 return (result == NULL) ? -1 : 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000734 }
735 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000736 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
737 PyErr_NoMemory();
738 return -1;
739 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200740 self->max_output_len = (self->output_len + n) / 2 * 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000741 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
742 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000743 }
744 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000745 buffer = PyBytes_AS_STRING(self->output_buffer);
746 if (n < 8) {
747 /* This is faster than memcpy when the string is short. */
748 for (i = 0; i < n; i++) {
749 buffer[self->output_len + i] = s[i];
750 }
751 }
752 else {
753 memcpy(buffer + self->output_len, s, n);
754 }
755 self->output_len += n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000756 return n;
757}
758
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000759static PicklerObject *
760_Pickler_New(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000761{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000762 PicklerObject *self;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000763
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000764 self = PyObject_GC_New(PicklerObject, &Pickler_Type);
765 if (self == NULL)
766 return NULL;
767
768 self->pers_func = NULL;
Antoine Pitrou8d3c2902012-03-04 18:31:48 +0100769 self->dispatch_table = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000770 self->arg = NULL;
771 self->write = NULL;
772 self->proto = 0;
773 self->bin = 0;
774 self->fast = 0;
775 self->fast_nesting = 0;
776 self->fix_imports = 0;
777 self->fast_memo = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000778 self->max_output_len = WRITE_BUF_SIZE;
779 self->output_len = 0;
Victor Stinner68c8ea22013-07-11 22:56:25 +0200780
781 self->memo = PyMemoTable_New();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000782 self->output_buffer = PyBytes_FromStringAndSize(NULL,
783 self->max_output_len);
Victor Stinner68c8ea22013-07-11 22:56:25 +0200784
785 if (self->memo == NULL || self->output_buffer == NULL) {
Victor Stinnerc31df042013-07-12 00:08:59 +0200786 Py_DECREF(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000787 return NULL;
788 }
789 return self;
790}
791
792static int
793_Pickler_SetProtocol(PicklerObject *self, PyObject *proto_obj,
794 PyObject *fix_imports_obj)
795{
796 long proto = 0;
797 int fix_imports;
798
799 if (proto_obj == NULL || proto_obj == Py_None)
800 proto = DEFAULT_PROTOCOL;
801 else {
802 proto = PyLong_AsLong(proto_obj);
803 if (proto == -1 && PyErr_Occurred())
804 return -1;
805 }
806 if (proto < 0)
807 proto = HIGHEST_PROTOCOL;
808 if (proto > HIGHEST_PROTOCOL) {
809 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
810 HIGHEST_PROTOCOL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000811 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000812 }
813 fix_imports = PyObject_IsTrue(fix_imports_obj);
814 if (fix_imports == -1)
815 return -1;
Victor Stinner121aab42011-09-29 23:40:53 +0200816
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000817 self->proto = proto;
818 self->bin = proto > 0;
819 self->fix_imports = fix_imports && proto < 3;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000820
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000821 return 0;
822}
823
824/* Returns -1 (with an exception set) on failure, 0 on success. This may
825 be called once on a freshly created Pickler. */
826static int
827_Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
828{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200829 _Py_IDENTIFIER(write);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000830 assert(file != NULL);
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +0200831 self->write = _PyObject_GetAttrId(file, &PyId_write);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000832 if (self->write == NULL) {
833 if (PyErr_ExceptionMatches(PyExc_AttributeError))
834 PyErr_SetString(PyExc_TypeError,
835 "file must have a 'write' attribute");
836 return -1;
837 }
838
839 return 0;
840}
841
842/* See documentation for _Pickler_FastCall(). */
843static PyObject *
844_Unpickler_FastCall(UnpicklerObject *self, PyObject *func, PyObject *arg)
845{
846 PyObject *result = NULL;
847
848 ARG_TUP(self, arg);
849 if (self->arg) {
850 result = PyObject_Call(func, self->arg, NULL);
851 FREE_ARG_TUP(self);
852 }
853 return result;
854}
855
856/* Returns the size of the input on success, -1 on failure. This takes its
857 own reference to `input`. */
858static Py_ssize_t
859_Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
860{
861 if (self->buffer.buf != NULL)
862 PyBuffer_Release(&self->buffer);
863 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
864 return -1;
865 self->input_buffer = self->buffer.buf;
866 self->input_len = self->buffer.len;
867 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000868 self->prefetched_idx = self->input_len;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000869 return self->input_len;
870}
871
Antoine Pitrou04248a82010-10-12 20:51:21 +0000872static int
873_Unpickler_SkipConsumed(UnpicklerObject *self)
874{
875 Py_ssize_t consumed = self->next_read_idx - self->prefetched_idx;
876
877 if (consumed > 0) {
878 PyObject *r;
879 assert(self->peek); /* otherwise we did something wrong */
880 /* This makes an useless copy... */
881 r = PyObject_CallFunction(self->read, "n", consumed);
882 if (r == NULL)
883 return -1;
884 Py_DECREF(r);
885 self->prefetched_idx = self->next_read_idx;
886 }
887 return 0;
888}
889
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000890static const Py_ssize_t READ_WHOLE_LINE = -1;
891
892/* If reading from a file, we need to only pull the bytes we need, since there
893 may be multiple pickle objects arranged contiguously in the same input
894 buffer.
895
896 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
897 bytes from the input stream/buffer.
898
899 Update the unpickler's input buffer with the newly-read data. Returns -1 on
900 failure; on success, returns the number of bytes read from the file.
901
902 On success, self->input_len will be 0; this is intentional so that when
903 unpickling from a file, the "we've run out of data" code paths will trigger,
904 causing the Unpickler to go back to the file for more data. Use the returned
905 size to tell you how much data you can process. */
906static Py_ssize_t
907_Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
908{
909 PyObject *data;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000910 Py_ssize_t read_size, prefetched_size = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000911
912 assert(self->read != NULL);
Victor Stinner121aab42011-09-29 23:40:53 +0200913
Antoine Pitrou04248a82010-10-12 20:51:21 +0000914 if (_Unpickler_SkipConsumed(self) < 0)
915 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000916
917 if (n == READ_WHOLE_LINE)
918 data = PyObject_Call(self->readline, empty_tuple, NULL);
919 else {
920 PyObject *len = PyLong_FromSsize_t(n);
921 if (len == NULL)
922 return -1;
923 data = _Unpickler_FastCall(self, self->read, len);
924 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000925 if (data == NULL)
926 return -1;
927
Antoine Pitrou04248a82010-10-12 20:51:21 +0000928 /* Prefetch some data without advancing the file pointer, if possible */
929 if (self->peek) {
930 PyObject *len, *prefetched;
931 len = PyLong_FromSsize_t(PREFETCH);
932 if (len == NULL) {
933 Py_DECREF(data);
934 return -1;
935 }
936 prefetched = _Unpickler_FastCall(self, self->peek, len);
937 if (prefetched == NULL) {
938 if (PyErr_ExceptionMatches(PyExc_NotImplementedError)) {
939 /* peek() is probably not supported by the given file object */
940 PyErr_Clear();
941 Py_CLEAR(self->peek);
942 }
943 else {
944 Py_DECREF(data);
945 return -1;
946 }
947 }
948 else {
949 assert(PyBytes_Check(prefetched));
950 prefetched_size = PyBytes_GET_SIZE(prefetched);
951 PyBytes_ConcatAndDel(&data, prefetched);
952 if (data == NULL)
953 return -1;
954 }
955 }
956
957 read_size = _Unpickler_SetStringInput(self, data) - prefetched_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000958 Py_DECREF(data);
Antoine Pitrou04248a82010-10-12 20:51:21 +0000959 self->prefetched_idx = read_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000960 return read_size;
961}
962
963/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
964
965 This should be used for all data reads, rather than accessing the unpickler's
966 input buffer directly. This method deals correctly with reading from input
967 streams, which the input buffer doesn't deal with.
968
969 Note that when reading from a file-like object, self->next_read_idx won't
970 be updated (it should remain at 0 for the entire unpickling process). You
971 should use this function's return value to know how many bytes you can
972 consume.
973
974 Returns -1 (with an exception set) on failure. On success, return the
975 number of chars read. */
976static Py_ssize_t
977_Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n)
978{
Antoine Pitrou04248a82010-10-12 20:51:21 +0000979 Py_ssize_t num_read;
980
Antoine Pitrou04248a82010-10-12 20:51:21 +0000981 if (self->next_read_idx + n <= self->input_len) {
982 *s = self->input_buffer + self->next_read_idx;
983 self->next_read_idx += n;
984 return n;
985 }
986 if (!self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000987 PyErr_Format(PyExc_EOFError, "Ran out of input");
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000988 return -1;
989 }
Antoine Pitrou04248a82010-10-12 20:51:21 +0000990 num_read = _Unpickler_ReadFromFile(self, n);
991 if (num_read < 0)
992 return -1;
993 if (num_read < n) {
994 PyErr_Format(PyExc_EOFError, "Ran out of input");
995 return -1;
996 }
997 *s = self->input_buffer;
998 self->next_read_idx = n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000999 return n;
1000}
1001
1002static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001003_Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1004 char **result)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001005{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001006 char *input_line = PyMem_Realloc(self->input_line, len + 1);
Victor Stinner42024562013-07-12 00:53:57 +02001007 if (input_line == NULL) {
1008 PyErr_NoMemory();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001009 return -1;
Victor Stinner42024562013-07-12 00:53:57 +02001010 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001011
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001012 memcpy(input_line, line, len);
1013 input_line[len] = '\0';
1014 self->input_line = input_line;
1015 *result = self->input_line;
1016 return len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001017}
1018
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001019/* Read a line from the input stream/buffer. If we run off the end of the input
1020 before hitting \n, return the data we found.
1021
1022 Returns the number of chars read, or -1 on failure. */
1023static Py_ssize_t
1024_Unpickler_Readline(UnpicklerObject *self, char **result)
1025{
1026 Py_ssize_t i, num_read;
1027
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001028 for (i = self->next_read_idx; i < self->input_len; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001029 if (self->input_buffer[i] == '\n') {
1030 char *line_start = self->input_buffer + self->next_read_idx;
1031 num_read = i - self->next_read_idx + 1;
1032 self->next_read_idx = i + 1;
1033 return _Unpickler_CopyLine(self, line_start, num_read, result);
1034 }
1035 }
1036 if (self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001037 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1038 if (num_read < 0)
1039 return -1;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001040 self->next_read_idx = num_read;
Antoine Pitrouf6c7a852011-08-11 21:04:02 +02001041 return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001042 }
Victor Stinner121aab42011-09-29 23:40:53 +02001043
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001044 /* If we get here, we've run off the end of the input string. Return the
1045 remaining string and let the caller figure it out. */
1046 *result = self->input_buffer + self->next_read_idx;
1047 num_read = i - self->next_read_idx;
1048 self->next_read_idx = i;
1049 return num_read;
1050}
1051
1052/* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1053 will be modified in place. */
1054static int
1055_Unpickler_ResizeMemoList(UnpicklerObject *self, Py_ssize_t new_size)
1056{
1057 Py_ssize_t i;
1058 PyObject **memo;
1059
1060 assert(new_size > self->memo_size);
1061
1062 memo = PyMem_REALLOC(self->memo, new_size * sizeof(PyObject *));
1063 if (memo == NULL) {
1064 PyErr_NoMemory();
1065 return -1;
1066 }
1067 self->memo = memo;
1068 for (i = self->memo_size; i < new_size; i++)
1069 self->memo[i] = NULL;
1070 self->memo_size = new_size;
1071 return 0;
1072}
1073
1074/* Returns NULL if idx is out of bounds. */
1075static PyObject *
1076_Unpickler_MemoGet(UnpicklerObject *self, Py_ssize_t idx)
1077{
1078 if (idx < 0 || idx >= self->memo_size)
1079 return NULL;
1080
1081 return self->memo[idx];
1082}
1083
1084/* Returns -1 (with an exception set) on failure, 0 on success.
1085 This takes its own reference to `value`. */
1086static int
1087_Unpickler_MemoPut(UnpicklerObject *self, Py_ssize_t idx, PyObject *value)
1088{
1089 PyObject *old_item;
1090
1091 if (idx >= self->memo_size) {
1092 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1093 return -1;
1094 assert(idx < self->memo_size);
1095 }
1096 Py_INCREF(value);
1097 old_item = self->memo[idx];
1098 self->memo[idx] = value;
1099 Py_XDECREF(old_item);
1100 return 0;
1101}
1102
1103static PyObject **
1104_Unpickler_NewMemo(Py_ssize_t new_size)
1105{
1106 PyObject **memo = PyMem_MALLOC(new_size * sizeof(PyObject *));
Victor Stinner42024562013-07-12 00:53:57 +02001107 if (memo == NULL) {
1108 PyErr_NoMemory();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001109 return NULL;
Victor Stinner42024562013-07-12 00:53:57 +02001110 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001111 memset(memo, 0, new_size * sizeof(PyObject *));
1112 return memo;
1113}
1114
1115/* Free the unpickler's memo, taking care to decref any items left in it. */
1116static void
1117_Unpickler_MemoCleanup(UnpicklerObject *self)
1118{
1119 Py_ssize_t i;
1120 PyObject **memo = self->memo;
1121
1122 if (self->memo == NULL)
1123 return;
1124 self->memo = NULL;
1125 i = self->memo_size;
1126 while (--i >= 0) {
1127 Py_XDECREF(memo[i]);
1128 }
1129 PyMem_FREE(memo);
1130}
1131
1132static UnpicklerObject *
1133_Unpickler_New(void)
1134{
1135 UnpicklerObject *self;
1136
1137 self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1138 if (self == NULL)
1139 return NULL;
1140
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001141 self->arg = NULL;
1142 self->pers_func = NULL;
1143 self->input_buffer = NULL;
1144 self->input_line = NULL;
1145 self->input_len = 0;
1146 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001147 self->prefetched_idx = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001148 self->read = NULL;
1149 self->readline = NULL;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001150 self->peek = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001151 self->encoding = NULL;
1152 self->errors = NULL;
1153 self->marks = NULL;
1154 self->num_marks = 0;
1155 self->marks_size = 0;
1156 self->proto = 0;
1157 self->fix_imports = 0;
Victor Stinner68c8ea22013-07-11 22:56:25 +02001158 memset(&self->buffer, 0, sizeof(Py_buffer));
1159 self->memo_size = 32;
1160 self->memo = _Unpickler_NewMemo(self->memo_size);
1161 self->stack = (Pdata *)Pdata_New();
1162
1163 if (self->memo == NULL || self->stack == NULL) {
1164 Py_DECREF(self);
1165 return NULL;
1166 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001167
1168 return self;
1169}
1170
1171/* Returns -1 (with an exception set) on failure, 0 on success. This may
1172 be called once on a freshly created Pickler. */
1173static int
1174_Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1175{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001176 _Py_IDENTIFIER(peek);
1177 _Py_IDENTIFIER(read);
1178 _Py_IDENTIFIER(readline);
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001179
1180 self->peek = _PyObject_GetAttrId(file, &PyId_peek);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001181 if (self->peek == NULL) {
1182 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1183 PyErr_Clear();
1184 else
1185 return -1;
1186 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001187 self->read = _PyObject_GetAttrId(file, &PyId_read);
1188 self->readline = _PyObject_GetAttrId(file, &PyId_readline);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001189 if (self->readline == NULL || self->read == NULL) {
1190 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1191 PyErr_SetString(PyExc_TypeError,
1192 "file must have 'read' and 'readline' attributes");
1193 Py_CLEAR(self->read);
1194 Py_CLEAR(self->readline);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001195 Py_CLEAR(self->peek);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001196 return -1;
1197 }
1198 return 0;
1199}
1200
1201/* Returns -1 (with an exception set) on failure, 0 on success. This may
1202 be called once on a freshly created Pickler. */
1203static int
1204_Unpickler_SetInputEncoding(UnpicklerObject *self,
1205 const char *encoding,
1206 const char *errors)
1207{
1208 if (encoding == NULL)
1209 encoding = "ASCII";
1210 if (errors == NULL)
1211 errors = "strict";
1212
Victor Stinner49fc8ec2013-07-07 23:30:24 +02001213 self->encoding = _PyMem_Strdup(encoding);
1214 self->errors = _PyMem_Strdup(errors);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001215 if (self->encoding == NULL || self->errors == NULL) {
1216 PyErr_NoMemory();
1217 return -1;
1218 }
1219 return 0;
1220}
1221
1222/* Generate a GET opcode for an object stored in the memo. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001223static int
1224memo_get(PicklerObject *self, PyObject *key)
1225{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001226 Py_ssize_t *value;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001227 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001228 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001229
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001230 value = PyMemoTable_Get(self->memo, key);
1231 if (value == NULL) {
1232 PyErr_SetObject(PyExc_KeyError, key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001233 return -1;
1234 }
1235
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001236 if (!self->bin) {
1237 pdata[0] = GET;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001238 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1239 "%" PY_FORMAT_SIZE_T "d\n", *value);
1240 len = strlen(pdata);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001241 }
1242 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001243 if (*value < 256) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001244 pdata[0] = BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001245 pdata[1] = (unsigned char)(*value & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001246 len = 2;
1247 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001248 else if (*value <= 0xffffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001249 pdata[0] = LONG_BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001250 pdata[1] = (unsigned char)(*value & 0xff);
1251 pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1252 pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1253 pdata[4] = (unsigned char)((*value >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001254 len = 5;
1255 }
1256 else { /* unlikely */
1257 PyErr_SetString(PicklingError,
1258 "memo id too large for LONG_BINGET");
1259 return -1;
1260 }
1261 }
1262
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001263 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001264 return -1;
1265
1266 return 0;
1267}
1268
1269/* Store an object in the memo, assign it a new unique ID based on the number
1270 of objects currently stored in the memo and generate a PUT opcode. */
1271static int
1272memo_put(PicklerObject *self, PyObject *obj)
1273{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001274 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001275 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001276 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001277 int status = 0;
1278
1279 if (self->fast)
1280 return 0;
1281
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001282 x = PyMemoTable_Size(self->memo);
1283 if (PyMemoTable_Set(self->memo, obj, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001284 goto error;
1285
1286 if (!self->bin) {
1287 pdata[0] = PUT;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001288 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1289 "%" PY_FORMAT_SIZE_T "d\n", x);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001290 len = strlen(pdata);
1291 }
1292 else {
1293 if (x < 256) {
1294 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00001295 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001296 len = 2;
1297 }
1298 else if (x <= 0xffffffffL) {
1299 pdata[0] = LONG_BINPUT;
1300 pdata[1] = (unsigned char)(x & 0xff);
1301 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1302 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1303 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1304 len = 5;
1305 }
1306 else { /* unlikely */
1307 PyErr_SetString(PicklingError,
1308 "memo id too large for LONG_BINPUT");
1309 return -1;
1310 }
1311 }
1312
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001313 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001314 goto error;
1315
1316 if (0) {
1317 error:
1318 status = -1;
1319 }
1320
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001321 return status;
1322}
1323
1324static PyObject *
1325whichmodule(PyObject *global, PyObject *global_name)
1326{
1327 Py_ssize_t i, j;
1328 static PyObject *module_str = NULL;
1329 static PyObject *main_str = NULL;
1330 PyObject *module_name;
1331 PyObject *modules_dict;
1332 PyObject *module;
1333 PyObject *obj;
1334
1335 if (module_str == NULL) {
1336 module_str = PyUnicode_InternFromString("__module__");
1337 if (module_str == NULL)
1338 return NULL;
1339 main_str = PyUnicode_InternFromString("__main__");
1340 if (main_str == NULL)
1341 return NULL;
1342 }
1343
1344 module_name = PyObject_GetAttr(global, module_str);
1345
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00001346 /* In some rare cases (e.g., bound methods of extension types),
1347 __module__ can be None. If it is so, then search sys.modules
1348 for the module of global. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001349 if (module_name == Py_None) {
1350 Py_DECREF(module_name);
1351 goto search;
1352 }
1353
1354 if (module_name) {
1355 return module_name;
1356 }
1357 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1358 PyErr_Clear();
1359 else
1360 return NULL;
1361
1362 search:
1363 modules_dict = PySys_GetObject("modules");
Victor Stinner1e53bba2013-07-16 22:26:05 +02001364 if (modules_dict == NULL) {
1365 PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001366 return NULL;
Victor Stinner1e53bba2013-07-16 22:26:05 +02001367 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001368
1369 i = 0;
1370 module_name = NULL;
1371 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
Mark Dickinson211c6252009-02-01 10:28:51 +00001372 if (PyObject_RichCompareBool(module_name, main_str, Py_EQ) == 1)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001373 continue;
1374
1375 obj = PyObject_GetAttr(module, global_name);
1376 if (obj == NULL) {
1377 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1378 PyErr_Clear();
1379 else
1380 return NULL;
1381 continue;
1382 }
1383
1384 if (obj != global) {
1385 Py_DECREF(obj);
1386 continue;
1387 }
1388
1389 Py_DECREF(obj);
1390 break;
1391 }
1392
1393 /* If no module is found, use __main__. */
1394 if (!j) {
1395 module_name = main_str;
1396 }
1397
1398 Py_INCREF(module_name);
1399 return module_name;
1400}
1401
1402/* fast_save_enter() and fast_save_leave() are guards against recursive
1403 objects when Pickler is used with the "fast mode" (i.e., with object
1404 memoization disabled). If the nesting of a list or dict object exceed
1405 FAST_NESTING_LIMIT, these guards will start keeping an internal
1406 reference to the seen list or dict objects and check whether these objects
1407 are recursive. These are not strictly necessary, since save() has a
1408 hard-coded recursion limit, but they give a nicer error message than the
1409 typical RuntimeError. */
1410static int
1411fast_save_enter(PicklerObject *self, PyObject *obj)
1412{
1413 /* if fast_nesting < 0, we're doing an error exit. */
1414 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1415 PyObject *key = NULL;
1416 if (self->fast_memo == NULL) {
1417 self->fast_memo = PyDict_New();
1418 if (self->fast_memo == NULL) {
1419 self->fast_nesting = -1;
1420 return 0;
1421 }
1422 }
1423 key = PyLong_FromVoidPtr(obj);
1424 if (key == NULL)
1425 return 0;
1426 if (PyDict_GetItem(self->fast_memo, key)) {
1427 Py_DECREF(key);
1428 PyErr_Format(PyExc_ValueError,
1429 "fast mode: can't pickle cyclic objects "
1430 "including object type %.200s at %p",
1431 obj->ob_type->tp_name, obj);
1432 self->fast_nesting = -1;
1433 return 0;
1434 }
1435 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
1436 Py_DECREF(key);
1437 self->fast_nesting = -1;
1438 return 0;
1439 }
1440 Py_DECREF(key);
1441 }
1442 return 1;
1443}
1444
1445static int
1446fast_save_leave(PicklerObject *self, PyObject *obj)
1447{
1448 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
1449 PyObject *key = PyLong_FromVoidPtr(obj);
1450 if (key == NULL)
1451 return 0;
1452 if (PyDict_DelItem(self->fast_memo, key) < 0) {
1453 Py_DECREF(key);
1454 return 0;
1455 }
1456 Py_DECREF(key);
1457 }
1458 return 1;
1459}
1460
1461static int
1462save_none(PicklerObject *self, PyObject *obj)
1463{
1464 const char none_op = NONE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001465 if (_Pickler_Write(self, &none_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001466 return -1;
1467
1468 return 0;
1469}
1470
1471static int
1472save_bool(PicklerObject *self, PyObject *obj)
1473{
1474 static const char *buf[2] = { FALSE, TRUE };
1475 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
1476 int p = (obj == Py_True);
1477
1478 if (self->proto >= 2) {
1479 const char bool_op = p ? NEWTRUE : NEWFALSE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001480 if (_Pickler_Write(self, &bool_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001481 return -1;
1482 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001483 else if (_Pickler_Write(self, buf[p], len[p]) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001484 return -1;
1485
1486 return 0;
1487}
1488
1489static int
1490save_int(PicklerObject *self, long x)
1491{
1492 char pdata[32];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001493 Py_ssize_t len = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001494
1495 if (!self->bin
1496#if SIZEOF_LONG > 4
1497 || x > 0x7fffffffL || x < -0x80000000L
1498#endif
1499 ) {
1500 /* Text-mode pickle, or long too big to fit in the 4-byte
1501 * signed BININT format: store as a string.
1502 */
Mark Dickinson8dd05142009-01-20 20:43:58 +00001503 pdata[0] = LONG; /* use LONG for consistency with pickle.py */
1504 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ldL\n", x);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001505 if (_Pickler_Write(self, pdata, strlen(pdata)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001506 return -1;
1507 }
1508 else {
1509 /* Binary pickle and x fits in a signed 4-byte int. */
1510 pdata[1] = (unsigned char)(x & 0xff);
1511 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1512 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1513 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1514
1515 if ((pdata[4] == 0) && (pdata[3] == 0)) {
1516 if (pdata[2] == 0) {
1517 pdata[0] = BININT1;
1518 len = 2;
1519 }
1520 else {
1521 pdata[0] = BININT2;
1522 len = 3;
1523 }
1524 }
1525 else {
1526 pdata[0] = BININT;
1527 len = 5;
1528 }
1529
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001530 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001531 return -1;
1532 }
1533
1534 return 0;
1535}
1536
1537static int
1538save_long(PicklerObject *self, PyObject *obj)
1539{
1540 PyObject *repr = NULL;
1541 Py_ssize_t size;
1542 long val = PyLong_AsLong(obj);
1543 int status = 0;
1544
1545 const char long_op = LONG;
1546
1547 if (val == -1 && PyErr_Occurred()) {
1548 /* out of range for int pickling */
1549 PyErr_Clear();
1550 }
Antoine Pitroue58bffb2011-08-13 20:40:32 +02001551 else
1552#if SIZEOF_LONG > 4
1553 if (val <= 0x7fffffffL && val >= -0x80000000L)
1554#endif
1555 return save_int(self, val);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001556
1557 if (self->proto >= 2) {
1558 /* Linear-time pickling. */
1559 size_t nbits;
1560 size_t nbytes;
1561 unsigned char *pdata;
1562 char header[5];
1563 int i;
1564 int sign = _PyLong_Sign(obj);
1565
1566 if (sign == 0) {
1567 header[0] = LONG1;
1568 header[1] = 0; /* It's 0 -- an empty bytestring. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001569 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001570 goto error;
1571 return 0;
1572 }
1573 nbits = _PyLong_NumBits(obj);
1574 if (nbits == (size_t)-1 && PyErr_Occurred())
1575 goto error;
1576 /* How many bytes do we need? There are nbits >> 3 full
1577 * bytes of data, and nbits & 7 leftover bits. If there
1578 * are any leftover bits, then we clearly need another
1579 * byte. Wnat's not so obvious is that we *probably*
1580 * need another byte even if there aren't any leftovers:
1581 * the most-significant bit of the most-significant byte
1582 * acts like a sign bit, and it's usually got a sense
Serhiy Storchaka95949422013-08-27 19:40:23 +03001583 * opposite of the one we need. The exception is ints
1584 * of the form -(2**(8*j-1)) for j > 0. Such an int is
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001585 * its own 256's-complement, so has the right sign bit
1586 * even without the extra byte. That's a pain to check
1587 * for in advance, though, so we always grab an extra
1588 * byte at the start, and cut it back later if possible.
1589 */
1590 nbytes = (nbits >> 3) + 1;
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01001591 if (nbytes > 0x7fffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001592 PyErr_SetString(PyExc_OverflowError,
Serhiy Storchaka95949422013-08-27 19:40:23 +03001593 "int too large to pickle");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001594 goto error;
1595 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001596 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001597 if (repr == NULL)
1598 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001599 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001600 i = _PyLong_AsByteArray((PyLongObject *)obj,
1601 pdata, nbytes,
1602 1 /* little endian */ , 1 /* signed */ );
1603 if (i < 0)
1604 goto error;
Serhiy Storchaka95949422013-08-27 19:40:23 +03001605 /* If the int is negative, this may be a byte more than
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001606 * needed. This is so iff the MSB is all redundant sign
1607 * bits.
1608 */
1609 if (sign < 0 &&
Victor Stinner121aab42011-09-29 23:40:53 +02001610 nbytes > 1 &&
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001611 pdata[nbytes - 1] == 0xff &&
1612 (pdata[nbytes - 2] & 0x80) != 0) {
1613 nbytes--;
1614 }
1615
1616 if (nbytes < 256) {
1617 header[0] = LONG1;
1618 header[1] = (unsigned char)nbytes;
1619 size = 2;
1620 }
1621 else {
1622 header[0] = LONG4;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001623 size = (Py_ssize_t) nbytes;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001624 for (i = 1; i < 5; i++) {
1625 header[i] = (unsigned char)(size & 0xff);
1626 size >>= 8;
1627 }
1628 size = 5;
1629 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001630 if (_Pickler_Write(self, header, size) < 0 ||
1631 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001632 goto error;
1633 }
1634 else {
1635 char *string;
1636
Mark Dickinson8dd05142009-01-20 20:43:58 +00001637 /* proto < 2: write the repr and newline. This is quadratic-time (in
1638 the number of digits), in both directions. We add a trailing 'L'
1639 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001640
1641 repr = PyObject_Repr(obj);
1642 if (repr == NULL)
1643 goto error;
1644
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001645 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001646 if (string == NULL)
1647 goto error;
1648
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001649 if (_Pickler_Write(self, &long_op, 1) < 0 ||
1650 _Pickler_Write(self, string, size) < 0 ||
1651 _Pickler_Write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001652 goto error;
1653 }
1654
1655 if (0) {
1656 error:
1657 status = -1;
1658 }
1659 Py_XDECREF(repr);
1660
1661 return status;
1662}
1663
1664static int
1665save_float(PicklerObject *self, PyObject *obj)
1666{
1667 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1668
1669 if (self->bin) {
1670 char pdata[9];
1671 pdata[0] = BINFLOAT;
1672 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1673 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001674 if (_Pickler_Write(self, pdata, 9) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001675 return -1;
Victor Stinner121aab42011-09-29 23:40:53 +02001676 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001677 else {
Eric Smith0923d1d2009-04-16 20:16:10 +00001678 int result = -1;
1679 char *buf = NULL;
1680 char op = FLOAT;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001681
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001682 if (_Pickler_Write(self, &op, 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001683 goto done;
1684
Mark Dickinson3e09f432009-04-17 08:41:23 +00001685 buf = PyOS_double_to_string(x, 'g', 17, 0, NULL);
Eric Smith0923d1d2009-04-16 20:16:10 +00001686 if (!buf) {
1687 PyErr_NoMemory();
1688 goto done;
1689 }
1690
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001691 if (_Pickler_Write(self, buf, strlen(buf)) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001692 goto done;
1693
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001694 if (_Pickler_Write(self, "\n", 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001695 goto done;
1696
1697 result = 0;
1698done:
1699 PyMem_Free(buf);
1700 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001701 }
1702
1703 return 0;
1704}
1705
1706static int
1707save_bytes(PicklerObject *self, PyObject *obj)
1708{
1709 if (self->proto < 3) {
1710 /* Older pickle protocols do not have an opcode for pickling bytes
1711 objects. Therefore, we need to fake the copy protocol (i.e.,
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001712 the __reduce__ method) to permit bytes object unpickling.
1713
1714 Here we use a hack to be compatible with Python 2. Since in Python
1715 2 'bytes' is just an alias for 'str' (which has different
1716 parameters than the actual bytes object), we use codecs.encode
1717 to create the appropriate 'str' object when unpickled using
1718 Python 2 *and* the appropriate 'bytes' object when unpickled
1719 using Python 3. Again this is a hack and we don't need to do this
1720 with newer protocols. */
1721 static PyObject *codecs_encode = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001722 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001723 int status;
1724
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001725 if (codecs_encode == NULL) {
1726 PyObject *codecs_module = PyImport_ImportModule("codecs");
1727 if (codecs_module == NULL) {
1728 return -1;
1729 }
1730 codecs_encode = PyObject_GetAttrString(codecs_module, "encode");
1731 Py_DECREF(codecs_module);
1732 if (codecs_encode == NULL) {
1733 return -1;
1734 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001735 }
1736
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001737 if (PyBytes_GET_SIZE(obj) == 0) {
1738 reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
1739 }
1740 else {
1741 static PyObject *latin1 = NULL;
1742 PyObject *unicode_str =
1743 PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
1744 PyBytes_GET_SIZE(obj),
1745 "strict");
1746 if (unicode_str == NULL)
1747 return -1;
1748 if (latin1 == NULL) {
1749 latin1 = PyUnicode_InternFromString("latin1");
Christian Heimes82e6b942013-06-29 21:37:34 +02001750 if (latin1 == NULL) {
1751 Py_DECREF(unicode_str);
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001752 return -1;
Christian Heimes82e6b942013-06-29 21:37:34 +02001753 }
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001754 }
1755 reduce_value = Py_BuildValue("(O(OO))",
1756 codecs_encode, unicode_str, latin1);
1757 Py_DECREF(unicode_str);
1758 }
1759
1760 if (reduce_value == NULL)
1761 return -1;
1762
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001763 /* save_reduce() will memoize the object automatically. */
1764 status = save_reduce(self, reduce_value, obj);
1765 Py_DECREF(reduce_value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001766 return status;
1767 }
1768 else {
1769 Py_ssize_t size;
1770 char header[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001771 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001772
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001773 size = PyBytes_GET_SIZE(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001774 if (size < 0)
1775 return -1;
1776
1777 if (size < 256) {
1778 header[0] = SHORT_BINBYTES;
1779 header[1] = (unsigned char)size;
1780 len = 2;
1781 }
1782 else if (size <= 0xffffffffL) {
1783 header[0] = BINBYTES;
1784 header[1] = (unsigned char)(size & 0xff);
1785 header[2] = (unsigned char)((size >> 8) & 0xff);
1786 header[3] = (unsigned char)((size >> 16) & 0xff);
1787 header[4] = (unsigned char)((size >> 24) & 0xff);
1788 len = 5;
1789 }
1790 else {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001791 PyErr_SetString(PyExc_OverflowError,
Serhiy Storchakaf8def282013-02-16 17:29:56 +02001792 "cannot serialize a bytes object larger than 4 GiB");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001793 return -1; /* string too large */
1794 }
1795
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001796 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001797 return -1;
1798
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001799 if (_Pickler_Write(self, PyBytes_AS_STRING(obj), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001800 return -1;
1801
1802 if (memo_put(self, obj) < 0)
1803 return -1;
1804
1805 return 0;
1806 }
1807}
1808
1809/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1810 backslash and newline characters to \uXXXX escapes. */
1811static PyObject *
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001812raw_unicode_escape(PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001813{
1814 PyObject *repr, *result;
1815 char *p;
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001816 Py_ssize_t i, size, expandsize;
1817 void *data;
1818 unsigned int kind;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001819
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001820 if (PyUnicode_READY(obj))
1821 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001822
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001823 size = PyUnicode_GET_LENGTH(obj);
1824 data = PyUnicode_DATA(obj);
1825 kind = PyUnicode_KIND(obj);
1826 if (kind == PyUnicode_4BYTE_KIND)
1827 expandsize = 10;
1828 else
1829 expandsize = 6;
Victor Stinner121aab42011-09-29 23:40:53 +02001830
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001831 if (size > PY_SSIZE_T_MAX / expandsize)
1832 return PyErr_NoMemory();
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001833 repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001834 if (repr == NULL)
1835 return NULL;
1836 if (size == 0)
1837 goto done;
1838
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001839 p = PyByteArray_AS_STRING(repr);
1840 for (i=0; i < size; i++) {
1841 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001842 /* Map 32-bit characters to '\Uxxxxxxxx' */
1843 if (ch >= 0x10000) {
1844 *p++ = '\\';
1845 *p++ = 'U';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001846 *p++ = Py_hexdigits[(ch >> 28) & 0xf];
1847 *p++ = Py_hexdigits[(ch >> 24) & 0xf];
1848 *p++ = Py_hexdigits[(ch >> 20) & 0xf];
1849 *p++ = Py_hexdigits[(ch >> 16) & 0xf];
1850 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
1851 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
1852 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
1853 *p++ = Py_hexdigits[ch & 15];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001854 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001855 /* Map 16-bit characters to '\uxxxx' */
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001856 else if (ch >= 256 || ch == '\\' || ch == '\n') {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001857 *p++ = '\\';
1858 *p++ = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001859 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
1860 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
1861 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
1862 *p++ = Py_hexdigits[ch & 15];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001863 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001864 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001865 else
1866 *p++ = (char) ch;
1867 }
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001868 size = p - PyByteArray_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001869
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001870done:
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001871 result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001872 Py_DECREF(repr);
1873 return result;
1874}
1875
1876static int
Antoine Pitrou299978d2013-04-07 17:38:11 +02001877write_utf8(PicklerObject *self, char *data, Py_ssize_t size)
1878{
1879 char pdata[5];
1880
1881#if SIZEOF_SIZE_T > 4
1882 if (size > 0xffffffffUL) {
1883 /* string too large */
1884 PyErr_SetString(PyExc_OverflowError,
Antoine Pitrou4b7b0f02013-04-07 23:46:52 +02001885 "cannot serialize a string larger than 4GiB");
Antoine Pitrou299978d2013-04-07 17:38:11 +02001886 return -1;
1887 }
1888#endif
1889
1890 pdata[0] = BINUNICODE;
1891 pdata[1] = (unsigned char)(size & 0xff);
1892 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1893 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1894 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1895
1896 if (_Pickler_Write(self, pdata, sizeof(pdata)) < 0)
1897 return -1;
1898
1899 if (_Pickler_Write(self, data, size) < 0)
1900 return -1;
1901
1902 return 0;
1903}
1904
1905static int
1906write_unicode_binary(PicklerObject *self, PyObject *obj)
1907{
1908 PyObject *encoded = NULL;
1909 Py_ssize_t size;
1910 char *data;
1911 int r;
1912
1913 if (PyUnicode_READY(obj))
1914 return -1;
1915
1916 data = PyUnicode_AsUTF8AndSize(obj, &size);
1917 if (data != NULL)
1918 return write_utf8(self, data, size);
1919
1920 /* Issue #8383: for strings with lone surrogates, fallback on the
1921 "surrogatepass" error handler. */
1922 PyErr_Clear();
1923 encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
1924 if (encoded == NULL)
1925 return -1;
1926
1927 r = write_utf8(self, PyBytes_AS_STRING(encoded),
1928 PyBytes_GET_SIZE(encoded));
1929 Py_DECREF(encoded);
1930 return r;
1931}
1932
1933static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001934save_unicode(PicklerObject *self, PyObject *obj)
1935{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001936 if (self->bin) {
Antoine Pitrou299978d2013-04-07 17:38:11 +02001937 if (write_unicode_binary(self, obj) < 0)
1938 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001939 }
1940 else {
Antoine Pitrou299978d2013-04-07 17:38:11 +02001941 PyObject *encoded;
1942 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001943 const char unicode_op = UNICODE;
1944
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001945 encoded = raw_unicode_escape(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001946 if (encoded == NULL)
Antoine Pitrou299978d2013-04-07 17:38:11 +02001947 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001948
Antoine Pitrou299978d2013-04-07 17:38:11 +02001949 if (_Pickler_Write(self, &unicode_op, 1) < 0) {
1950 Py_DECREF(encoded);
1951 return -1;
1952 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001953
1954 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrou299978d2013-04-07 17:38:11 +02001955 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) {
1956 Py_DECREF(encoded);
1957 return -1;
1958 }
1959 Py_DECREF(encoded);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001960
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001961 if (_Pickler_Write(self, "\n", 1) < 0)
Antoine Pitrou299978d2013-04-07 17:38:11 +02001962 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001963 }
1964 if (memo_put(self, obj) < 0)
Antoine Pitrou299978d2013-04-07 17:38:11 +02001965 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001966
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001967 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001968}
1969
1970/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1971static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001972store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001973{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001974 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001975
1976 assert(PyTuple_Size(t) == len);
1977
1978 for (i = 0; i < len; i++) {
1979 PyObject *element = PyTuple_GET_ITEM(t, i);
1980
1981 if (element == NULL)
1982 return -1;
1983 if (save(self, element, 0) < 0)
1984 return -1;
1985 }
1986
1987 return 0;
1988}
1989
1990/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1991 * used across protocols to minimize the space needed to pickle them.
1992 * Tuples are also the only builtin immutable type that can be recursive
1993 * (a tuple can be reached from itself), and that requires some subtle
1994 * magic so that it works in all cases. IOW, this is a long routine.
1995 */
1996static int
1997save_tuple(PicklerObject *self, PyObject *obj)
1998{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001999 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002000
2001 const char mark_op = MARK;
2002 const char tuple_op = TUPLE;
2003 const char pop_op = POP;
2004 const char pop_mark_op = POP_MARK;
2005 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
2006
2007 if ((len = PyTuple_Size(obj)) < 0)
2008 return -1;
2009
2010 if (len == 0) {
2011 char pdata[2];
2012
2013 if (self->proto) {
2014 pdata[0] = EMPTY_TUPLE;
2015 len = 1;
2016 }
2017 else {
2018 pdata[0] = MARK;
2019 pdata[1] = TUPLE;
2020 len = 2;
2021 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002022 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002023 return -1;
2024 return 0;
2025 }
2026
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002027 /* The tuple isn't in the memo now. If it shows up there after
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002028 * saving the tuple elements, the tuple must be recursive, in
2029 * which case we'll pop everything we put on the stack, and fetch
2030 * its value from the memo.
2031 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002032 if (len <= 3 && self->proto >= 2) {
2033 /* Use TUPLE{1,2,3} opcodes. */
2034 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002035 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002036
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002037 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002038 /* pop the len elements */
2039 for (i = 0; i < len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002040 if (_Pickler_Write(self, &pop_op, 1) < 0)
2041 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002042 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002043 if (memo_get(self, obj) < 0)
2044 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002045
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002046 return 0;
2047 }
2048 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002049 if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2050 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002051 }
2052 goto memoize;
2053 }
2054
2055 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2056 * Generate MARK e1 e2 ... TUPLE
2057 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002058 if (_Pickler_Write(self, &mark_op, 1) < 0)
2059 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002060
2061 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002062 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002063
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002064 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002065 /* pop the stack stuff we pushed */
2066 if (self->bin) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002067 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2068 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002069 }
2070 else {
2071 /* Note that we pop one more than len, to remove
2072 * the MARK too.
2073 */
2074 for (i = 0; i <= len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002075 if (_Pickler_Write(self, &pop_op, 1) < 0)
2076 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002077 }
2078 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002079 if (memo_get(self, obj) < 0)
2080 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002081
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002082 return 0;
2083 }
2084 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002085 if (_Pickler_Write(self, &tuple_op, 1) < 0)
2086 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002087 }
2088
2089 memoize:
2090 if (memo_put(self, obj) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002091 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002092
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002093 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002094}
2095
2096/* iter is an iterator giving items, and we batch up chunks of
2097 * MARK item item ... item APPENDS
2098 * opcode sequences. Calling code should have arranged to first create an
2099 * empty list, or list-like object, for the APPENDS to operate on.
2100 * Returns 0 on success, <0 on error.
2101 */
2102static int
2103batch_list(PicklerObject *self, PyObject *iter)
2104{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002105 PyObject *obj = NULL;
2106 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002107 int i, n;
2108
2109 const char mark_op = MARK;
2110 const char append_op = APPEND;
2111 const char appends_op = APPENDS;
2112
2113 assert(iter != NULL);
2114
2115 /* XXX: I think this function could be made faster by avoiding the
2116 iterator interface and fetching objects directly from list using
2117 PyList_GET_ITEM.
2118 */
2119
2120 if (self->proto == 0) {
2121 /* APPENDS isn't available; do one at a time. */
2122 for (;;) {
2123 obj = PyIter_Next(iter);
2124 if (obj == NULL) {
2125 if (PyErr_Occurred())
2126 return -1;
2127 break;
2128 }
2129 i = save(self, obj, 0);
2130 Py_DECREF(obj);
2131 if (i < 0)
2132 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002133 if (_Pickler_Write(self, &append_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002134 return -1;
2135 }
2136 return 0;
2137 }
2138
2139 /* proto > 0: write in batches of BATCHSIZE. */
2140 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002141 /* Get first item */
2142 firstitem = PyIter_Next(iter);
2143 if (firstitem == NULL) {
2144 if (PyErr_Occurred())
2145 goto error;
2146
2147 /* nothing more to add */
2148 break;
2149 }
2150
2151 /* Try to get a second item */
2152 obj = PyIter_Next(iter);
2153 if (obj == NULL) {
2154 if (PyErr_Occurred())
2155 goto error;
2156
2157 /* Only one item to write */
2158 if (save(self, firstitem, 0) < 0)
2159 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002160 if (_Pickler_Write(self, &append_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002161 goto error;
2162 Py_CLEAR(firstitem);
2163 break;
2164 }
2165
2166 /* More than one item to write */
2167
2168 /* Pump out MARK, items, APPENDS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002169 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002170 goto error;
2171
2172 if (save(self, firstitem, 0) < 0)
2173 goto error;
2174 Py_CLEAR(firstitem);
2175 n = 1;
2176
2177 /* Fetch and save up to BATCHSIZE items */
2178 while (obj) {
2179 if (save(self, obj, 0) < 0)
2180 goto error;
2181 Py_CLEAR(obj);
2182 n += 1;
2183
2184 if (n == BATCHSIZE)
2185 break;
2186
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002187 obj = PyIter_Next(iter);
2188 if (obj == NULL) {
2189 if (PyErr_Occurred())
2190 goto error;
2191 break;
2192 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002193 }
2194
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002195 if (_Pickler_Write(self, &appends_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002196 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002197
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002198 } while (n == BATCHSIZE);
2199 return 0;
2200
2201 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002202 Py_XDECREF(firstitem);
2203 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002204 return -1;
2205}
2206
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002207/* This is a variant of batch_list() above, specialized for lists (with no
2208 * support for list subclasses). Like batch_list(), we batch up chunks of
2209 * MARK item item ... item APPENDS
2210 * opcode sequences. Calling code should have arranged to first create an
2211 * empty list, or list-like object, for the APPENDS to operate on.
2212 * Returns 0 on success, -1 on error.
2213 *
2214 * This version is considerably faster than batch_list(), if less general.
2215 *
2216 * Note that this only works for protocols > 0.
2217 */
2218static int
2219batch_list_exact(PicklerObject *self, PyObject *obj)
2220{
2221 PyObject *item = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002222 Py_ssize_t this_batch, total;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002223
2224 const char append_op = APPEND;
2225 const char appends_op = APPENDS;
2226 const char mark_op = MARK;
2227
2228 assert(obj != NULL);
2229 assert(self->proto > 0);
2230 assert(PyList_CheckExact(obj));
2231
2232 if (PyList_GET_SIZE(obj) == 1) {
2233 item = PyList_GET_ITEM(obj, 0);
2234 if (save(self, item, 0) < 0)
2235 return -1;
2236 if (_Pickler_Write(self, &append_op, 1) < 0)
2237 return -1;
2238 return 0;
2239 }
2240
2241 /* Write in batches of BATCHSIZE. */
2242 total = 0;
2243 do {
2244 this_batch = 0;
2245 if (_Pickler_Write(self, &mark_op, 1) < 0)
2246 return -1;
2247 while (total < PyList_GET_SIZE(obj)) {
2248 item = PyList_GET_ITEM(obj, total);
2249 if (save(self, item, 0) < 0)
2250 return -1;
2251 total++;
2252 if (++this_batch == BATCHSIZE)
2253 break;
2254 }
2255 if (_Pickler_Write(self, &appends_op, 1) < 0)
2256 return -1;
2257
2258 } while (total < PyList_GET_SIZE(obj));
2259
2260 return 0;
2261}
2262
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002263static int
2264save_list(PicklerObject *self, PyObject *obj)
2265{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002266 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002267 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002268 int status = 0;
2269
2270 if (self->fast && !fast_save_enter(self, obj))
2271 goto error;
2272
2273 /* Create an empty list. */
2274 if (self->bin) {
2275 header[0] = EMPTY_LIST;
2276 len = 1;
2277 }
2278 else {
2279 header[0] = MARK;
2280 header[1] = LIST;
2281 len = 2;
2282 }
2283
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002284 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002285 goto error;
2286
2287 /* Get list length, and bow out early if empty. */
2288 if ((len = PyList_Size(obj)) < 0)
2289 goto error;
2290
2291 if (memo_put(self, obj) < 0)
2292 goto error;
2293
2294 if (len != 0) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002295 /* Materialize the list elements. */
2296 if (PyList_CheckExact(obj) && self->proto > 0) {
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002297 if (Py_EnterRecursiveCall(" while pickling an object"))
2298 goto error;
2299 status = batch_list_exact(self, obj);
2300 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002301 } else {
2302 PyObject *iter = PyObject_GetIter(obj);
2303 if (iter == NULL)
2304 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002305
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002306 if (Py_EnterRecursiveCall(" while pickling an object")) {
2307 Py_DECREF(iter);
2308 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002309 }
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002310 status = batch_list(self, iter);
2311 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002312 Py_DECREF(iter);
2313 }
2314 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002315 if (0) {
2316 error:
2317 status = -1;
2318 }
2319
2320 if (self->fast && !fast_save_leave(self, obj))
2321 status = -1;
2322
2323 return status;
2324}
2325
2326/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
2327 * MARK key value ... key value SETITEMS
2328 * opcode sequences. Calling code should have arranged to first create an
2329 * empty dict, or dict-like object, for the SETITEMS to operate on.
2330 * Returns 0 on success, <0 on error.
2331 *
2332 * This is very much like batch_list(). The difference between saving
2333 * elements directly, and picking apart two-tuples, is so long-winded at
2334 * the C level, though, that attempts to combine these routines were too
2335 * ugly to bear.
2336 */
2337static int
2338batch_dict(PicklerObject *self, PyObject *iter)
2339{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002340 PyObject *obj = NULL;
2341 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002342 int i, n;
2343
2344 const char mark_op = MARK;
2345 const char setitem_op = SETITEM;
2346 const char setitems_op = SETITEMS;
2347
2348 assert(iter != NULL);
2349
2350 if (self->proto == 0) {
2351 /* SETITEMS isn't available; do one at a time. */
2352 for (;;) {
2353 obj = PyIter_Next(iter);
2354 if (obj == NULL) {
2355 if (PyErr_Occurred())
2356 return -1;
2357 break;
2358 }
2359 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2360 PyErr_SetString(PyExc_TypeError, "dict items "
2361 "iterator must return 2-tuples");
2362 return -1;
2363 }
2364 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
2365 if (i >= 0)
2366 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
2367 Py_DECREF(obj);
2368 if (i < 0)
2369 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002370 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002371 return -1;
2372 }
2373 return 0;
2374 }
2375
2376 /* proto > 0: write in batches of BATCHSIZE. */
2377 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002378 /* Get first item */
2379 firstitem = PyIter_Next(iter);
2380 if (firstitem == NULL) {
2381 if (PyErr_Occurred())
2382 goto error;
2383
2384 /* nothing more to add */
2385 break;
2386 }
2387 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
2388 PyErr_SetString(PyExc_TypeError, "dict items "
2389 "iterator must return 2-tuples");
2390 goto error;
2391 }
2392
2393 /* Try to get a second item */
2394 obj = PyIter_Next(iter);
2395 if (obj == NULL) {
2396 if (PyErr_Occurred())
2397 goto error;
2398
2399 /* Only one item to write */
2400 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2401 goto error;
2402 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2403 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002404 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002405 goto error;
2406 Py_CLEAR(firstitem);
2407 break;
2408 }
2409
2410 /* More than one item to write */
2411
2412 /* Pump out MARK, items, SETITEMS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002413 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002414 goto error;
2415
2416 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2417 goto error;
2418 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2419 goto error;
2420 Py_CLEAR(firstitem);
2421 n = 1;
2422
2423 /* Fetch and save up to BATCHSIZE items */
2424 while (obj) {
2425 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2426 PyErr_SetString(PyExc_TypeError, "dict items "
2427 "iterator must return 2-tuples");
2428 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002429 }
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002430 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
2431 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
2432 goto error;
2433 Py_CLEAR(obj);
2434 n += 1;
2435
2436 if (n == BATCHSIZE)
2437 break;
2438
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002439 obj = PyIter_Next(iter);
2440 if (obj == NULL) {
2441 if (PyErr_Occurred())
2442 goto error;
2443 break;
2444 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002445 }
2446
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002447 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002448 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002449
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002450 } while (n == BATCHSIZE);
2451 return 0;
2452
2453 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002454 Py_XDECREF(firstitem);
2455 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002456 return -1;
2457}
2458
Collin Winter5c9b02d2009-05-25 05:43:30 +00002459/* This is a variant of batch_dict() above that specializes for dicts, with no
2460 * support for dict subclasses. Like batch_dict(), we batch up chunks of
2461 * MARK key value ... key value SETITEMS
2462 * opcode sequences. Calling code should have arranged to first create an
2463 * empty dict, or dict-like object, for the SETITEMS to operate on.
2464 * Returns 0 on success, -1 on error.
2465 *
2466 * Note that this currently doesn't work for protocol 0.
2467 */
2468static int
2469batch_dict_exact(PicklerObject *self, PyObject *obj)
2470{
2471 PyObject *key = NULL, *value = NULL;
2472 int i;
2473 Py_ssize_t dict_size, ppos = 0;
2474
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00002475 const char mark_op = MARK;
2476 const char setitem_op = SETITEM;
2477 const char setitems_op = SETITEMS;
Collin Winter5c9b02d2009-05-25 05:43:30 +00002478
2479 assert(obj != NULL);
2480 assert(self->proto > 0);
2481
2482 dict_size = PyDict_Size(obj);
2483
2484 /* Special-case len(d) == 1 to save space. */
2485 if (dict_size == 1) {
2486 PyDict_Next(obj, &ppos, &key, &value);
2487 if (save(self, key, 0) < 0)
2488 return -1;
2489 if (save(self, value, 0) < 0)
2490 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002491 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002492 return -1;
2493 return 0;
2494 }
2495
2496 /* Write in batches of BATCHSIZE. */
2497 do {
2498 i = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002499 if (_Pickler_Write(self, &mark_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002500 return -1;
2501 while (PyDict_Next(obj, &ppos, &key, &value)) {
2502 if (save(self, key, 0) < 0)
2503 return -1;
2504 if (save(self, value, 0) < 0)
2505 return -1;
2506 if (++i == BATCHSIZE)
2507 break;
2508 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002509 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002510 return -1;
2511 if (PyDict_Size(obj) != dict_size) {
2512 PyErr_Format(
2513 PyExc_RuntimeError,
2514 "dictionary changed size during iteration");
2515 return -1;
2516 }
2517
2518 } while (i == BATCHSIZE);
2519 return 0;
2520}
2521
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002522static int
2523save_dict(PicklerObject *self, PyObject *obj)
2524{
2525 PyObject *items, *iter;
2526 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002527 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002528 int status = 0;
2529
2530 if (self->fast && !fast_save_enter(self, obj))
2531 goto error;
2532
2533 /* Create an empty dict. */
2534 if (self->bin) {
2535 header[0] = EMPTY_DICT;
2536 len = 1;
2537 }
2538 else {
2539 header[0] = MARK;
2540 header[1] = DICT;
2541 len = 2;
2542 }
2543
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002544 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002545 goto error;
2546
2547 /* Get dict size, and bow out early if empty. */
2548 if ((len = PyDict_Size(obj)) < 0)
2549 goto error;
2550
2551 if (memo_put(self, obj) < 0)
2552 goto error;
2553
2554 if (len != 0) {
2555 /* Save the dict items. */
Collin Winter5c9b02d2009-05-25 05:43:30 +00002556 if (PyDict_CheckExact(obj) && self->proto > 0) {
2557 /* We can take certain shortcuts if we know this is a dict and
2558 not a dict subclass. */
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002559 if (Py_EnterRecursiveCall(" while pickling an object"))
2560 goto error;
2561 status = batch_dict_exact(self, obj);
2562 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002563 } else {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02002564 _Py_IDENTIFIER(items);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002565
2566 items = _PyObject_CallMethodId(obj, &PyId_items, "()");
Collin Winter5c9b02d2009-05-25 05:43:30 +00002567 if (items == NULL)
2568 goto error;
2569 iter = PyObject_GetIter(items);
2570 Py_DECREF(items);
2571 if (iter == NULL)
2572 goto error;
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002573 if (Py_EnterRecursiveCall(" while pickling an object")) {
2574 Py_DECREF(iter);
2575 goto error;
2576 }
Collin Winter5c9b02d2009-05-25 05:43:30 +00002577 status = batch_dict(self, iter);
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002578 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002579 Py_DECREF(iter);
2580 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002581 }
2582
2583 if (0) {
2584 error:
2585 status = -1;
2586 }
2587
2588 if (self->fast && !fast_save_leave(self, obj))
2589 status = -1;
2590
2591 return status;
2592}
2593
2594static int
2595save_global(PicklerObject *self, PyObject *obj, PyObject *name)
2596{
2597 static PyObject *name_str = NULL;
2598 PyObject *global_name = NULL;
2599 PyObject *module_name = NULL;
2600 PyObject *module = NULL;
2601 PyObject *cls;
2602 int status = 0;
2603
2604 const char global_op = GLOBAL;
2605
2606 if (name_str == NULL) {
2607 name_str = PyUnicode_InternFromString("__name__");
2608 if (name_str == NULL)
2609 goto error;
2610 }
2611
2612 if (name) {
2613 global_name = name;
2614 Py_INCREF(global_name);
2615 }
2616 else {
2617 global_name = PyObject_GetAttr(obj, name_str);
2618 if (global_name == NULL)
2619 goto error;
2620 }
2621
2622 module_name = whichmodule(obj, global_name);
2623 if (module_name == NULL)
2624 goto error;
2625
2626 /* XXX: Change to use the import C API directly with level=0 to disallow
2627 relative imports.
2628
2629 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
2630 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
2631 custom import functions (IMHO, this would be a nice security
2632 feature). The import C API would need to be extended to support the
2633 extra parameters of __import__ to fix that. */
2634 module = PyImport_Import(module_name);
2635 if (module == NULL) {
2636 PyErr_Format(PicklingError,
2637 "Can't pickle %R: import of module %R failed",
2638 obj, module_name);
2639 goto error;
2640 }
2641 cls = PyObject_GetAttr(module, global_name);
2642 if (cls == NULL) {
2643 PyErr_Format(PicklingError,
2644 "Can't pickle %R: attribute lookup %S.%S failed",
2645 obj, module_name, global_name);
2646 goto error;
2647 }
2648 if (cls != obj) {
2649 Py_DECREF(cls);
2650 PyErr_Format(PicklingError,
2651 "Can't pickle %R: it's not the same object as %S.%S",
2652 obj, module_name, global_name);
2653 goto error;
2654 }
2655 Py_DECREF(cls);
2656
2657 if (self->proto >= 2) {
2658 /* See whether this is in the extension registry, and if
2659 * so generate an EXT opcode.
2660 */
2661 PyObject *code_obj; /* extension code as Python object */
2662 long code; /* extension code as C value */
2663 char pdata[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002664 Py_ssize_t n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002665
2666 PyTuple_SET_ITEM(two_tuple, 0, module_name);
2667 PyTuple_SET_ITEM(two_tuple, 1, global_name);
2668 code_obj = PyDict_GetItem(extension_registry, two_tuple);
2669 /* The object is not registered in the extension registry.
2670 This is the most likely code path. */
2671 if (code_obj == NULL)
2672 goto gen_global;
2673
2674 /* XXX: pickle.py doesn't check neither the type, nor the range
2675 of the value returned by the extension_registry. It should for
2676 consistency. */
2677
2678 /* Verify code_obj has the right type and value. */
2679 if (!PyLong_Check(code_obj)) {
2680 PyErr_Format(PicklingError,
2681 "Can't pickle %R: extension code %R isn't an integer",
2682 obj, code_obj);
2683 goto error;
2684 }
2685 code = PyLong_AS_LONG(code_obj);
2686 if (code <= 0 || code > 0x7fffffffL) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002687 if (!PyErr_Occurred())
2688 PyErr_Format(PicklingError,
2689 "Can't pickle %R: extension code %ld is out of range",
2690 obj, code);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002691 goto error;
2692 }
2693
2694 /* Generate an EXT opcode. */
2695 if (code <= 0xff) {
2696 pdata[0] = EXT1;
2697 pdata[1] = (unsigned char)code;
2698 n = 2;
2699 }
2700 else if (code <= 0xffff) {
2701 pdata[0] = EXT2;
2702 pdata[1] = (unsigned char)(code & 0xff);
2703 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2704 n = 3;
2705 }
2706 else {
2707 pdata[0] = EXT4;
2708 pdata[1] = (unsigned char)(code & 0xff);
2709 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2710 pdata[3] = (unsigned char)((code >> 16) & 0xff);
2711 pdata[4] = (unsigned char)((code >> 24) & 0xff);
2712 n = 5;
2713 }
2714
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002715 if (_Pickler_Write(self, pdata, n) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002716 goto error;
2717 }
2718 else {
2719 /* Generate a normal global opcode if we are using a pickle
2720 protocol <= 2, or if the object is not registered in the
2721 extension registry. */
2722 PyObject *encoded;
2723 PyObject *(*unicode_encoder)(PyObject *);
2724
2725 gen_global:
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002726 if (_Pickler_Write(self, &global_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002727 goto error;
2728
2729 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
2730 the module name and the global name using UTF-8. We do so only when
2731 we are using the pickle protocol newer than version 3. This is to
2732 ensure compatibility with older Unpickler running on Python 2.x. */
2733 if (self->proto >= 3) {
2734 unicode_encoder = PyUnicode_AsUTF8String;
2735 }
2736 else {
2737 unicode_encoder = PyUnicode_AsASCIIString;
2738 }
2739
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002740 /* For protocol < 3 and if the user didn't request against doing so,
2741 we convert module names to the old 2.x module names. */
2742 if (self->fix_imports) {
2743 PyObject *key;
2744 PyObject *item;
2745
2746 key = PyTuple_Pack(2, module_name, global_name);
2747 if (key == NULL)
2748 goto error;
2749 item = PyDict_GetItemWithError(name_mapping_3to2, key);
2750 Py_DECREF(key);
2751 if (item) {
2752 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
2753 PyErr_Format(PyExc_RuntimeError,
2754 "_compat_pickle.REVERSE_NAME_MAPPING values "
2755 "should be 2-tuples, not %.200s",
2756 Py_TYPE(item)->tp_name);
2757 goto error;
2758 }
2759 Py_CLEAR(module_name);
2760 Py_CLEAR(global_name);
2761 module_name = PyTuple_GET_ITEM(item, 0);
2762 global_name = PyTuple_GET_ITEM(item, 1);
2763 if (!PyUnicode_Check(module_name) ||
2764 !PyUnicode_Check(global_name)) {
2765 PyErr_Format(PyExc_RuntimeError,
2766 "_compat_pickle.REVERSE_NAME_MAPPING values "
2767 "should be pairs of str, not (%.200s, %.200s)",
2768 Py_TYPE(module_name)->tp_name,
2769 Py_TYPE(global_name)->tp_name);
2770 goto error;
2771 }
2772 Py_INCREF(module_name);
2773 Py_INCREF(global_name);
2774 }
2775 else if (PyErr_Occurred()) {
2776 goto error;
2777 }
2778
2779 item = PyDict_GetItemWithError(import_mapping_3to2, module_name);
2780 if (item) {
2781 if (!PyUnicode_Check(item)) {
2782 PyErr_Format(PyExc_RuntimeError,
2783 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
2784 "should be strings, not %.200s",
2785 Py_TYPE(item)->tp_name);
2786 goto error;
2787 }
2788 Py_CLEAR(module_name);
2789 module_name = item;
2790 Py_INCREF(module_name);
2791 }
2792 else if (PyErr_Occurred()) {
2793 goto error;
2794 }
2795 }
2796
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002797 /* Save the name of the module. */
2798 encoded = unicode_encoder(module_name);
2799 if (encoded == NULL) {
2800 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2801 PyErr_Format(PicklingError,
2802 "can't pickle module identifier '%S' using "
2803 "pickle protocol %i", module_name, self->proto);
2804 goto error;
2805 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002806 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002807 PyBytes_GET_SIZE(encoded)) < 0) {
2808 Py_DECREF(encoded);
2809 goto error;
2810 }
2811 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002812 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002813 goto error;
2814
2815 /* Save the name of the module. */
2816 encoded = unicode_encoder(global_name);
2817 if (encoded == NULL) {
2818 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2819 PyErr_Format(PicklingError,
2820 "can't pickle global identifier '%S' using "
2821 "pickle protocol %i", global_name, self->proto);
2822 goto error;
2823 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002824 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002825 PyBytes_GET_SIZE(encoded)) < 0) {
2826 Py_DECREF(encoded);
2827 goto error;
2828 }
2829 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002830 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002831 goto error;
2832
2833 /* Memoize the object. */
2834 if (memo_put(self, obj) < 0)
2835 goto error;
2836 }
2837
2838 if (0) {
2839 error:
2840 status = -1;
2841 }
2842 Py_XDECREF(module_name);
2843 Py_XDECREF(global_name);
2844 Py_XDECREF(module);
2845
2846 return status;
2847}
2848
2849static int
Łukasz Langaf3078fb2012-03-12 19:46:12 +01002850save_ellipsis(PicklerObject *self, PyObject *obj)
2851{
Łukasz Langadbd78252012-03-12 22:59:11 +01002852 PyObject *str = PyUnicode_FromString("Ellipsis");
Benjamin Petersone80b29b2012-03-16 18:45:31 -05002853 int res;
Łukasz Langadbd78252012-03-12 22:59:11 +01002854 if (str == NULL)
Łukasz Langacad1a072012-03-12 23:41:07 +01002855 return -1;
Benjamin Petersone80b29b2012-03-16 18:45:31 -05002856 res = save_global(self, Py_Ellipsis, str);
2857 Py_DECREF(str);
2858 return res;
Łukasz Langaf3078fb2012-03-12 19:46:12 +01002859}
2860
2861static int
2862save_notimplemented(PicklerObject *self, PyObject *obj)
2863{
Łukasz Langadbd78252012-03-12 22:59:11 +01002864 PyObject *str = PyUnicode_FromString("NotImplemented");
Benjamin Petersone80b29b2012-03-16 18:45:31 -05002865 int res;
Łukasz Langadbd78252012-03-12 22:59:11 +01002866 if (str == NULL)
Łukasz Langacad1a072012-03-12 23:41:07 +01002867 return -1;
Benjamin Petersone80b29b2012-03-16 18:45:31 -05002868 res = save_global(self, Py_NotImplemented, str);
2869 Py_DECREF(str);
2870 return res;
Łukasz Langaf3078fb2012-03-12 19:46:12 +01002871}
2872
2873static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002874save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
2875{
2876 PyObject *pid = NULL;
2877 int status = 0;
2878
2879 const char persid_op = PERSID;
2880 const char binpersid_op = BINPERSID;
2881
2882 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002883 pid = _Pickler_FastCall(self, func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002884 if (pid == NULL)
2885 return -1;
2886
2887 if (pid != Py_None) {
2888 if (self->bin) {
2889 if (save(self, pid, 1) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002890 _Pickler_Write(self, &binpersid_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002891 goto error;
2892 }
2893 else {
2894 PyObject *pid_str = NULL;
2895 char *pid_ascii_bytes;
2896 Py_ssize_t size;
2897
2898 pid_str = PyObject_Str(pid);
2899 if (pid_str == NULL)
2900 goto error;
2901
2902 /* XXX: Should it check whether the persistent id only contains
2903 ASCII characters? And what if the pid contains embedded
2904 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00002905 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002906 Py_DECREF(pid_str);
2907 if (pid_ascii_bytes == NULL)
2908 goto error;
2909
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002910 if (_Pickler_Write(self, &persid_op, 1) < 0 ||
2911 _Pickler_Write(self, pid_ascii_bytes, size) < 0 ||
2912 _Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002913 goto error;
2914 }
2915 status = 1;
2916 }
2917
2918 if (0) {
2919 error:
2920 status = -1;
2921 }
2922 Py_XDECREF(pid);
2923
2924 return status;
2925}
2926
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002927static PyObject *
2928get_class(PyObject *obj)
2929{
2930 PyObject *cls;
2931 static PyObject *str_class;
2932
2933 if (str_class == NULL) {
2934 str_class = PyUnicode_InternFromString("__class__");
2935 if (str_class == NULL)
2936 return NULL;
2937 }
2938 cls = PyObject_GetAttr(obj, str_class);
2939 if (cls == NULL) {
2940 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2941 PyErr_Clear();
2942 cls = (PyObject *) Py_TYPE(obj);
2943 Py_INCREF(cls);
2944 }
2945 }
2946 return cls;
2947}
2948
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002949/* We're saving obj, and args is the 2-thru-5 tuple returned by the
2950 * appropriate __reduce__ method for obj.
2951 */
2952static int
2953save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
2954{
2955 PyObject *callable;
2956 PyObject *argtup;
2957 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002958 PyObject *listitems = Py_None;
2959 PyObject *dictitems = Py_None;
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002960 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002961
2962 int use_newobj = self->proto >= 2;
2963
2964 const char reduce_op = REDUCE;
2965 const char build_op = BUILD;
2966 const char newobj_op = NEWOBJ;
2967
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002968 size = PyTuple_Size(args);
2969 if (size < 2 || size > 5) {
2970 PyErr_SetString(PicklingError, "tuple returned by "
2971 "__reduce__ must contain 2 through 5 elements");
2972 return -1;
2973 }
2974
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002975 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
2976 &callable, &argtup, &state, &listitems, &dictitems))
2977 return -1;
2978
2979 if (!PyCallable_Check(callable)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002980 PyErr_SetString(PicklingError, "first item of the tuple "
2981 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002982 return -1;
2983 }
2984 if (!PyTuple_Check(argtup)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002985 PyErr_SetString(PicklingError, "second item of the tuple "
2986 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002987 return -1;
2988 }
2989
2990 if (state == Py_None)
2991 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002992
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002993 if (listitems == Py_None)
2994 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002995 else if (!PyIter_Check(listitems)) {
Alexandre Vassalotti00d83f22013-04-14 01:28:01 -07002996 PyErr_Format(PicklingError, "fourth element of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002997 "returned by __reduce__ must be an iterator, not %s",
2998 Py_TYPE(listitems)->tp_name);
2999 return -1;
3000 }
3001
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003002 if (dictitems == Py_None)
3003 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003004 else if (!PyIter_Check(dictitems)) {
Alexandre Vassalotti00d83f22013-04-14 01:28:01 -07003005 PyErr_Format(PicklingError, "fifth element of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003006 "returned by __reduce__ must be an iterator, not %s",
3007 Py_TYPE(dictitems)->tp_name);
3008 return -1;
3009 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003010
3011 /* Protocol 2 special case: if callable's name is __newobj__, use
3012 NEWOBJ. */
3013 if (use_newobj) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003014 static PyObject *newobj_str = NULL, *name_str = NULL;
3015 PyObject *name;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003016
3017 if (newobj_str == NULL) {
3018 newobj_str = PyUnicode_InternFromString("__newobj__");
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003019 name_str = PyUnicode_InternFromString("__name__");
3020 if (newobj_str == NULL || name_str == NULL)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003021 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003022 }
3023
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003024 name = PyObject_GetAttr(callable, name_str);
3025 if (name == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003026 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3027 PyErr_Clear();
3028 else
3029 return -1;
3030 use_newobj = 0;
3031 }
3032 else {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003033 use_newobj = PyUnicode_Check(name) &&
3034 PyUnicode_Compare(name, newobj_str) == 0;
3035 Py_DECREF(name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003036 }
3037 }
3038 if (use_newobj) {
3039 PyObject *cls;
3040 PyObject *newargtup;
3041 PyObject *obj_class;
3042 int p;
3043
3044 /* Sanity checks. */
3045 if (Py_SIZE(argtup) < 1) {
3046 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
3047 return -1;
3048 }
3049
3050 cls = PyTuple_GET_ITEM(argtup, 0);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003051 if (!PyType_Check(cls)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003052 PyErr_SetString(PicklingError, "args[0] from "
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003053 "__newobj__ args is not a type");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003054 return -1;
3055 }
3056
3057 if (obj != NULL) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003058 obj_class = get_class(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003059 p = obj_class != cls; /* true iff a problem */
3060 Py_DECREF(obj_class);
3061 if (p) {
3062 PyErr_SetString(PicklingError, "args[0] from "
3063 "__newobj__ args has the wrong class");
3064 return -1;
3065 }
3066 }
3067 /* XXX: These calls save() are prone to infinite recursion. Imagine
3068 what happen if the value returned by the __reduce__() method of
3069 some extension type contains another object of the same type. Ouch!
3070
3071 Here is a quick example, that I ran into, to illustrate what I
3072 mean:
3073
3074 >>> import pickle, copyreg
3075 >>> copyreg.dispatch_table.pop(complex)
3076 >>> pickle.dumps(1+2j)
3077 Traceback (most recent call last):
3078 ...
3079 RuntimeError: maximum recursion depth exceeded
3080
3081 Removing the complex class from copyreg.dispatch_table made the
3082 __reduce_ex__() method emit another complex object:
3083
3084 >>> (1+1j).__reduce_ex__(2)
3085 (<function __newobj__ at 0xb7b71c3c>,
3086 (<class 'complex'>, (1+1j)), None, None, None)
3087
3088 Thus when save() was called on newargstup (the 2nd item) recursion
3089 ensued. Of course, the bug was in the complex class which had a
3090 broken __getnewargs__() that emitted another complex object. But,
3091 the point, here, is it is quite easy to end up with a broken reduce
3092 function. */
3093
3094 /* Save the class and its __new__ arguments. */
3095 if (save(self, cls, 0) < 0)
3096 return -1;
3097
3098 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
3099 if (newargtup == NULL)
3100 return -1;
3101
3102 p = save(self, newargtup, 0);
3103 Py_DECREF(newargtup);
3104 if (p < 0)
3105 return -1;
3106
3107 /* Add NEWOBJ opcode. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003108 if (_Pickler_Write(self, &newobj_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003109 return -1;
3110 }
3111 else { /* Not using NEWOBJ. */
3112 if (save(self, callable, 0) < 0 ||
3113 save(self, argtup, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003114 _Pickler_Write(self, &reduce_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003115 return -1;
3116 }
3117
3118 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
3119 the caller do not want to memoize the object. Not particularly useful,
3120 but that is to mimic the behavior save_reduce() in pickle.py when
3121 obj is None. */
3122 if (obj && memo_put(self, obj) < 0)
3123 return -1;
3124
3125 if (listitems && batch_list(self, listitems) < 0)
3126 return -1;
3127
3128 if (dictitems && batch_dict(self, dictitems) < 0)
3129 return -1;
3130
3131 if (state) {
Victor Stinner121aab42011-09-29 23:40:53 +02003132 if (save(self, state, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003133 _Pickler_Write(self, &build_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003134 return -1;
3135 }
3136
3137 return 0;
3138}
3139
3140static int
3141save(PicklerObject *self, PyObject *obj, int pers_save)
3142{
3143 PyTypeObject *type;
3144 PyObject *reduce_func = NULL;
3145 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003146 int status = 0;
3147
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003148 if (Py_EnterRecursiveCall(" while pickling an object"))
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003149 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003150
3151 /* The extra pers_save argument is necessary to avoid calling save_pers()
3152 on its returned object. */
3153 if (!pers_save && self->pers_func) {
3154 /* save_pers() returns:
3155 -1 to signal an error;
3156 0 if it did nothing successfully;
3157 1 if a persistent id was saved.
3158 */
3159 if ((status = save_pers(self, obj, self->pers_func)) != 0)
3160 goto done;
3161 }
3162
3163 type = Py_TYPE(obj);
3164
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003165 /* The old cPickle had an optimization that used switch-case statement
3166 dispatching on the first letter of the type name. This has was removed
3167 since benchmarks shown that this optimization was actually slowing
3168 things down. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003169
3170 /* Atom types; these aren't memoized, so don't check the memo. */
3171
3172 if (obj == Py_None) {
3173 status = save_none(self, obj);
3174 goto done;
3175 }
Łukasz Langaf3078fb2012-03-12 19:46:12 +01003176 else if (obj == Py_Ellipsis) {
3177 status = save_ellipsis(self, obj);
3178 goto done;
3179 }
3180 else if (obj == Py_NotImplemented) {
3181 status = save_notimplemented(self, obj);
3182 goto done;
3183 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003184 else if (obj == Py_False || obj == Py_True) {
3185 status = save_bool(self, obj);
3186 goto done;
3187 }
3188 else if (type == &PyLong_Type) {
3189 status = save_long(self, obj);
3190 goto done;
3191 }
3192 else if (type == &PyFloat_Type) {
3193 status = save_float(self, obj);
3194 goto done;
3195 }
3196
3197 /* Check the memo to see if it has the object. If so, generate
3198 a GET (or BINGET) opcode, instead of pickling the object
3199 once again. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003200 if (PyMemoTable_Get(self->memo, obj)) {
3201 if (memo_get(self, obj) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003202 goto error;
3203 goto done;
3204 }
3205
3206 if (type == &PyBytes_Type) {
3207 status = save_bytes(self, obj);
3208 goto done;
3209 }
3210 else if (type == &PyUnicode_Type) {
3211 status = save_unicode(self, obj);
3212 goto done;
3213 }
3214 else if (type == &PyDict_Type) {
3215 status = save_dict(self, obj);
3216 goto done;
3217 }
3218 else if (type == &PyList_Type) {
3219 status = save_list(self, obj);
3220 goto done;
3221 }
3222 else if (type == &PyTuple_Type) {
3223 status = save_tuple(self, obj);
3224 goto done;
3225 }
3226 else if (type == &PyType_Type) {
3227 status = save_global(self, obj, NULL);
3228 goto done;
3229 }
3230 else if (type == &PyFunction_Type) {
3231 status = save_global(self, obj, NULL);
3232 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
3233 /* fall back to reduce */
3234 PyErr_Clear();
3235 }
3236 else {
3237 goto done;
3238 }
3239 }
3240 else if (type == &PyCFunction_Type) {
3241 status = save_global(self, obj, NULL);
3242 goto done;
3243 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003244
3245 /* XXX: This part needs some unit tests. */
3246
3247 /* Get a reduction callable, and call it. This may come from
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003248 * self.dispatch_table, copyreg.dispatch_table, the object's
3249 * __reduce_ex__ method, or the object's __reduce__ method.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003250 */
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003251 if (self->dispatch_table == NULL) {
3252 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
3253 /* PyDict_GetItem() unlike PyObject_GetItem() and
3254 PyObject_GetAttr() returns a borrowed ref */
3255 Py_XINCREF(reduce_func);
3256 } else {
3257 reduce_func = PyObject_GetItem(self->dispatch_table, (PyObject *)type);
3258 if (reduce_func == NULL) {
3259 if (PyErr_ExceptionMatches(PyExc_KeyError))
3260 PyErr_Clear();
3261 else
3262 goto error;
3263 }
3264 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003265 if (reduce_func != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003266 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003267 reduce_value = _Pickler_FastCall(self, reduce_func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003268 }
Antoine Pitrouffd41d92011-10-04 09:23:04 +02003269 else if (PyType_IsSubtype(type, &PyType_Type)) {
3270 status = save_global(self, obj, NULL);
3271 goto done;
3272 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003273 else {
3274 static PyObject *reduce_str = NULL;
3275 static PyObject *reduce_ex_str = NULL;
3276
3277 /* Cache the name of the reduce methods. */
3278 if (reduce_str == NULL) {
3279 reduce_str = PyUnicode_InternFromString("__reduce__");
3280 if (reduce_str == NULL)
3281 goto error;
3282 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
3283 if (reduce_ex_str == NULL)
3284 goto error;
3285 }
3286
3287 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
3288 automatically defined as __reduce__. While this is convenient, this
3289 make it impossible to know which method was actually called. Of
3290 course, this is not a big deal. But still, it would be nice to let
3291 the user know which method was called when something go
3292 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
3293 don't actually have to check for a __reduce__ method. */
3294
3295 /* Check for a __reduce_ex__ method. */
3296 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
3297 if (reduce_func != NULL) {
3298 PyObject *proto;
3299 proto = PyLong_FromLong(self->proto);
3300 if (proto != NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003301 reduce_value = _Pickler_FastCall(self, reduce_func, proto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003302 }
3303 }
3304 else {
3305 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3306 PyErr_Clear();
3307 else
3308 goto error;
3309 /* Check for a __reduce__ method. */
3310 reduce_func = PyObject_GetAttr(obj, reduce_str);
3311 if (reduce_func != NULL) {
3312 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
3313 }
3314 else {
3315 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
3316 type->tp_name, obj);
3317 goto error;
3318 }
3319 }
3320 }
3321
3322 if (reduce_value == NULL)
3323 goto error;
3324
3325 if (PyUnicode_Check(reduce_value)) {
3326 status = save_global(self, obj, reduce_value);
3327 goto done;
3328 }
3329
3330 if (!PyTuple_Check(reduce_value)) {
3331 PyErr_SetString(PicklingError,
3332 "__reduce__ must return a string or tuple");
3333 goto error;
3334 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003335
3336 status = save_reduce(self, reduce_value, obj);
3337
3338 if (0) {
3339 error:
3340 status = -1;
3341 }
3342 done:
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003343 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003344 Py_XDECREF(reduce_func);
3345 Py_XDECREF(reduce_value);
3346
3347 return status;
3348}
3349
3350static int
3351dump(PicklerObject *self, PyObject *obj)
3352{
3353 const char stop_op = STOP;
3354
3355 if (self->proto >= 2) {
3356 char header[2];
3357
3358 header[0] = PROTO;
3359 assert(self->proto >= 0 && self->proto < 256);
3360 header[1] = (unsigned char)self->proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003361 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003362 return -1;
3363 }
3364
3365 if (save(self, obj, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003366 _Pickler_Write(self, &stop_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003367 return -1;
3368
3369 return 0;
3370}
3371
3372PyDoc_STRVAR(Pickler_clear_memo_doc,
3373"clear_memo() -> None. Clears the pickler's \"memo\"."
3374"\n"
3375"The memo is the data structure that remembers which objects the\n"
3376"pickler has already seen, so that shared or recursive objects are\n"
3377"pickled by reference and not by value. This method is useful when\n"
3378"re-using picklers.");
3379
3380static PyObject *
3381Pickler_clear_memo(PicklerObject *self)
3382{
3383 if (self->memo)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003384 PyMemoTable_Clear(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003385
3386 Py_RETURN_NONE;
3387}
3388
3389PyDoc_STRVAR(Pickler_dump_doc,
3390"dump(obj) -> None. Write a pickled representation of obj to the open file.");
3391
3392static PyObject *
3393Pickler_dump(PicklerObject *self, PyObject *args)
3394{
3395 PyObject *obj;
3396
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00003397 /* Check whether the Pickler was initialized correctly (issue3664).
3398 Developers often forget to call __init__() in their subclasses, which
3399 would trigger a segfault without this check. */
3400 if (self->write == NULL) {
Victor Stinner121aab42011-09-29 23:40:53 +02003401 PyErr_Format(PicklingError,
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00003402 "Pickler.__init__() was not called by %s.__init__()",
3403 Py_TYPE(self)->tp_name);
3404 return NULL;
3405 }
3406
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003407 if (!PyArg_ParseTuple(args, "O:dump", &obj))
3408 return NULL;
3409
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003410 if (_Pickler_ClearBuffer(self) < 0)
3411 return NULL;
3412
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003413 if (dump(self, obj) < 0)
3414 return NULL;
3415
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003416 if (_Pickler_FlushToFile(self) < 0)
3417 return NULL;
3418
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003419 Py_RETURN_NONE;
3420}
3421
3422static struct PyMethodDef Pickler_methods[] = {
3423 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
3424 Pickler_dump_doc},
3425 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
3426 Pickler_clear_memo_doc},
3427 {NULL, NULL} /* sentinel */
3428};
3429
3430static void
3431Pickler_dealloc(PicklerObject *self)
3432{
3433 PyObject_GC_UnTrack(self);
3434
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003435 Py_XDECREF(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003436 Py_XDECREF(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003437 Py_XDECREF(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003438 Py_XDECREF(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003439 Py_XDECREF(self->arg);
3440 Py_XDECREF(self->fast_memo);
3441
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003442 PyMemoTable_Del(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003443
3444 Py_TYPE(self)->tp_free((PyObject *)self);
3445}
3446
3447static int
3448Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
3449{
3450 Py_VISIT(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003451 Py_VISIT(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003452 Py_VISIT(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003453 Py_VISIT(self->arg);
3454 Py_VISIT(self->fast_memo);
3455 return 0;
3456}
3457
3458static int
3459Pickler_clear(PicklerObject *self)
3460{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003461 Py_CLEAR(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003462 Py_CLEAR(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003463 Py_CLEAR(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003464 Py_CLEAR(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003465 Py_CLEAR(self->arg);
3466 Py_CLEAR(self->fast_memo);
3467
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003468 if (self->memo != NULL) {
3469 PyMemoTable *memo = self->memo;
3470 self->memo = NULL;
3471 PyMemoTable_Del(memo);
3472 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003473 return 0;
3474}
3475
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003476
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003477PyDoc_STRVAR(Pickler_doc,
3478"Pickler(file, protocol=None)"
3479"\n"
3480"This takes a binary file for writing a pickle data stream.\n"
3481"\n"
3482"The optional protocol argument tells the pickler to use the\n"
3483"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
3484"protocol is 3; a backward-incompatible protocol designed for\n"
3485"Python 3.0.\n"
3486"\n"
3487"Specifying a negative protocol version selects the highest\n"
3488"protocol version supported. The higher the protocol used, the\n"
3489"more recent the version of Python needed to read the pickle\n"
3490"produced.\n"
3491"\n"
3492"The file argument must have a write() method that accepts a single\n"
3493"bytes argument. It can thus be a file object opened for binary\n"
3494"writing, a io.BytesIO instance, or any other custom object that\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003495"meets this interface.\n"
3496"\n"
3497"If fix_imports is True and protocol is less than 3, pickle will try to\n"
3498"map the new Python 3.x names to the old module names used in Python\n"
3499"2.x, so that the pickle data stream is readable with Python 2.x.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003500
3501static int
3502Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
3503{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003504 static char *kwlist[] = {"file", "protocol", "fix_imports", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003505 PyObject *file;
3506 PyObject *proto_obj = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003507 PyObject *fix_imports = Py_True;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02003508 _Py_IDENTIFIER(persistent_id);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003509 _Py_IDENTIFIER(dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003510
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003511 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:Pickler",
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003512 kwlist, &file, &proto_obj, &fix_imports))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003513 return -1;
3514
3515 /* In case of multiple __init__() calls, clear previous content. */
3516 if (self->write != NULL)
3517 (void)Pickler_clear(self);
3518
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003519 if (_Pickler_SetProtocol(self, proto_obj, fix_imports) < 0)
3520 return -1;
3521
3522 if (_Pickler_SetOutputStream(self, file) < 0)
3523 return -1;
3524
3525 /* memo and output_buffer may have already been created in _Pickler_New */
3526 if (self->memo == NULL) {
3527 self->memo = PyMemoTable_New();
3528 if (self->memo == NULL)
3529 return -1;
3530 }
3531 self->output_len = 0;
3532 if (self->output_buffer == NULL) {
3533 self->max_output_len = WRITE_BUF_SIZE;
3534 self->output_buffer = PyBytes_FromStringAndSize(NULL,
3535 self->max_output_len);
3536 if (self->output_buffer == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003537 return -1;
3538 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003539
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003540 self->arg = NULL;
3541 self->fast = 0;
3542 self->fast_nesting = 0;
3543 self->fast_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003544 self->pers_func = NULL;
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02003545 if (_PyObject_HasAttrId((PyObject *)self, &PyId_persistent_id)) {
3546 self->pers_func = _PyObject_GetAttrId((PyObject *)self,
3547 &PyId_persistent_id);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003548 if (self->pers_func == NULL)
3549 return -1;
3550 }
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003551 self->dispatch_table = NULL;
3552 if (_PyObject_HasAttrId((PyObject *)self, &PyId_dispatch_table)) {
3553 self->dispatch_table = _PyObject_GetAttrId((PyObject *)self,
3554 &PyId_dispatch_table);
3555 if (self->dispatch_table == NULL)
3556 return -1;
3557 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003558 return 0;
3559}
3560
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003561/* Define a proxy object for the Pickler's internal memo object. This is to
3562 * avoid breaking code like:
3563 * pickler.memo.clear()
3564 * and
3565 * pickler.memo = saved_memo
3566 * Is this a good idea? Not really, but we don't want to break code that uses
3567 * it. Note that we don't implement the entire mapping API here. This is
3568 * intentional, as these should be treated as black-box implementation details.
3569 */
3570
3571typedef struct {
3572 PyObject_HEAD
3573 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
3574} PicklerMemoProxyObject;
3575
3576PyDoc_STRVAR(pmp_clear_doc,
3577"memo.clear() -> None. Remove all items from memo.");
3578
3579static PyObject *
3580pmp_clear(PicklerMemoProxyObject *self)
3581{
3582 if (self->pickler->memo)
3583 PyMemoTable_Clear(self->pickler->memo);
3584 Py_RETURN_NONE;
3585}
3586
3587PyDoc_STRVAR(pmp_copy_doc,
3588"memo.copy() -> new_memo. Copy the memo to a new object.");
3589
3590static PyObject *
3591pmp_copy(PicklerMemoProxyObject *self)
3592{
3593 Py_ssize_t i;
3594 PyMemoTable *memo;
3595 PyObject *new_memo = PyDict_New();
3596 if (new_memo == NULL)
3597 return NULL;
3598
3599 memo = self->pickler->memo;
3600 for (i = 0; i < memo->mt_allocated; ++i) {
3601 PyMemoEntry entry = memo->mt_table[i];
3602 if (entry.me_key != NULL) {
3603 int status;
3604 PyObject *key, *value;
3605
3606 key = PyLong_FromVoidPtr(entry.me_key);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003607 value = Py_BuildValue("nO", entry.me_value, entry.me_key);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003608
3609 if (key == NULL || value == NULL) {
3610 Py_XDECREF(key);
3611 Py_XDECREF(value);
3612 goto error;
3613 }
3614 status = PyDict_SetItem(new_memo, key, value);
3615 Py_DECREF(key);
3616 Py_DECREF(value);
3617 if (status < 0)
3618 goto error;
3619 }
3620 }
3621 return new_memo;
3622
3623 error:
3624 Py_XDECREF(new_memo);
3625 return NULL;
3626}
3627
3628PyDoc_STRVAR(pmp_reduce_doc,
3629"memo.__reduce__(). Pickling support.");
3630
3631static PyObject *
3632pmp_reduce(PicklerMemoProxyObject *self, PyObject *args)
3633{
3634 PyObject *reduce_value, *dict_args;
3635 PyObject *contents = pmp_copy(self);
3636 if (contents == NULL)
3637 return NULL;
3638
3639 reduce_value = PyTuple_New(2);
3640 if (reduce_value == NULL) {
3641 Py_DECREF(contents);
3642 return NULL;
3643 }
3644 dict_args = PyTuple_New(1);
3645 if (dict_args == NULL) {
3646 Py_DECREF(contents);
3647 Py_DECREF(reduce_value);
3648 return NULL;
3649 }
3650 PyTuple_SET_ITEM(dict_args, 0, contents);
3651 Py_INCREF((PyObject *)&PyDict_Type);
3652 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
3653 PyTuple_SET_ITEM(reduce_value, 1, dict_args);
3654 return reduce_value;
3655}
3656
3657static PyMethodDef picklerproxy_methods[] = {
3658 {"clear", (PyCFunction)pmp_clear, METH_NOARGS, pmp_clear_doc},
3659 {"copy", (PyCFunction)pmp_copy, METH_NOARGS, pmp_copy_doc},
3660 {"__reduce__", (PyCFunction)pmp_reduce, METH_VARARGS, pmp_reduce_doc},
3661 {NULL, NULL} /* sentinel */
3662};
3663
3664static void
3665PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
3666{
3667 PyObject_GC_UnTrack(self);
3668 Py_XDECREF(self->pickler);
3669 PyObject_GC_Del((PyObject *)self);
3670}
3671
3672static int
3673PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
3674 visitproc visit, void *arg)
3675{
3676 Py_VISIT(self->pickler);
3677 return 0;
3678}
3679
3680static int
3681PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
3682{
3683 Py_CLEAR(self->pickler);
3684 return 0;
3685}
3686
3687static PyTypeObject PicklerMemoProxyType = {
3688 PyVarObject_HEAD_INIT(NULL, 0)
3689 "_pickle.PicklerMemoProxy", /*tp_name*/
3690 sizeof(PicklerMemoProxyObject), /*tp_basicsize*/
3691 0,
3692 (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */
3693 0, /* tp_print */
3694 0, /* tp_getattr */
3695 0, /* tp_setattr */
3696 0, /* tp_compare */
3697 0, /* tp_repr */
3698 0, /* tp_as_number */
3699 0, /* tp_as_sequence */
3700 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00003701 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003702 0, /* tp_call */
3703 0, /* tp_str */
3704 PyObject_GenericGetAttr, /* tp_getattro */
3705 PyObject_GenericSetAttr, /* tp_setattro */
3706 0, /* tp_as_buffer */
3707 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3708 0, /* tp_doc */
3709 (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */
3710 (inquiry)PicklerMemoProxy_clear, /* tp_clear */
3711 0, /* tp_richcompare */
3712 0, /* tp_weaklistoffset */
3713 0, /* tp_iter */
3714 0, /* tp_iternext */
3715 picklerproxy_methods, /* tp_methods */
3716};
3717
3718static PyObject *
3719PicklerMemoProxy_New(PicklerObject *pickler)
3720{
3721 PicklerMemoProxyObject *self;
3722
3723 self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
3724 if (self == NULL)
3725 return NULL;
3726 Py_INCREF(pickler);
3727 self->pickler = pickler;
3728 PyObject_GC_Track(self);
3729 return (PyObject *)self;
3730}
3731
3732/*****************************************************************************/
3733
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003734static PyObject *
3735Pickler_get_memo(PicklerObject *self)
3736{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003737 return PicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003738}
3739
3740static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003741Pickler_set_memo(PicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003742{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003743 PyMemoTable *new_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003744
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003745 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003746 PyErr_SetString(PyExc_TypeError,
3747 "attribute deletion is not supported");
3748 return -1;
3749 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003750
3751 if (Py_TYPE(obj) == &PicklerMemoProxyType) {
3752 PicklerObject *pickler =
3753 ((PicklerMemoProxyObject *)obj)->pickler;
3754
3755 new_memo = PyMemoTable_Copy(pickler->memo);
3756 if (new_memo == NULL)
3757 return -1;
3758 }
3759 else if (PyDict_Check(obj)) {
3760 Py_ssize_t i = 0;
3761 PyObject *key, *value;
3762
3763 new_memo = PyMemoTable_New();
3764 if (new_memo == NULL)
3765 return -1;
3766
3767 while (PyDict_Next(obj, &i, &key, &value)) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003768 Py_ssize_t memo_id;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003769 PyObject *memo_obj;
3770
3771 if (!PyTuple_Check(value) || Py_SIZE(value) != 2) {
3772 PyErr_SetString(PyExc_TypeError,
3773 "'memo' values must be 2-item tuples");
3774 goto error;
3775 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003776 memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003777 if (memo_id == -1 && PyErr_Occurred())
3778 goto error;
3779 memo_obj = PyTuple_GET_ITEM(value, 1);
3780 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
3781 goto error;
3782 }
3783 }
3784 else {
3785 PyErr_Format(PyExc_TypeError,
3786 "'memo' attribute must be an PicklerMemoProxy object"
3787 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003788 return -1;
3789 }
3790
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003791 PyMemoTable_Del(self->memo);
3792 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003793
3794 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003795
3796 error:
3797 if (new_memo)
3798 PyMemoTable_Del(new_memo);
3799 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003800}
3801
3802static PyObject *
3803Pickler_get_persid(PicklerObject *self)
3804{
3805 if (self->pers_func == NULL)
3806 PyErr_SetString(PyExc_AttributeError, "persistent_id");
3807 else
3808 Py_INCREF(self->pers_func);
3809 return self->pers_func;
3810}
3811
3812static int
3813Pickler_set_persid(PicklerObject *self, PyObject *value)
3814{
3815 PyObject *tmp;
3816
3817 if (value == NULL) {
3818 PyErr_SetString(PyExc_TypeError,
3819 "attribute deletion is not supported");
3820 return -1;
3821 }
3822 if (!PyCallable_Check(value)) {
3823 PyErr_SetString(PyExc_TypeError,
3824 "persistent_id must be a callable taking one argument");
3825 return -1;
3826 }
3827
3828 tmp = self->pers_func;
3829 Py_INCREF(value);
3830 self->pers_func = value;
3831 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
3832
3833 return 0;
3834}
3835
3836static PyMemberDef Pickler_members[] = {
3837 {"bin", T_INT, offsetof(PicklerObject, bin)},
3838 {"fast", T_INT, offsetof(PicklerObject, fast)},
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003839 {"dispatch_table", T_OBJECT_EX, offsetof(PicklerObject, dispatch_table)},
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003840 {NULL}
3841};
3842
3843static PyGetSetDef Pickler_getsets[] = {
3844 {"memo", (getter)Pickler_get_memo,
3845 (setter)Pickler_set_memo},
3846 {"persistent_id", (getter)Pickler_get_persid,
3847 (setter)Pickler_set_persid},
3848 {NULL}
3849};
3850
3851static PyTypeObject Pickler_Type = {
3852 PyVarObject_HEAD_INIT(NULL, 0)
3853 "_pickle.Pickler" , /*tp_name*/
3854 sizeof(PicklerObject), /*tp_basicsize*/
3855 0, /*tp_itemsize*/
3856 (destructor)Pickler_dealloc, /*tp_dealloc*/
3857 0, /*tp_print*/
3858 0, /*tp_getattr*/
3859 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00003860 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003861 0, /*tp_repr*/
3862 0, /*tp_as_number*/
3863 0, /*tp_as_sequence*/
3864 0, /*tp_as_mapping*/
3865 0, /*tp_hash*/
3866 0, /*tp_call*/
3867 0, /*tp_str*/
3868 0, /*tp_getattro*/
3869 0, /*tp_setattro*/
3870 0, /*tp_as_buffer*/
3871 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3872 Pickler_doc, /*tp_doc*/
3873 (traverseproc)Pickler_traverse, /*tp_traverse*/
3874 (inquiry)Pickler_clear, /*tp_clear*/
3875 0, /*tp_richcompare*/
3876 0, /*tp_weaklistoffset*/
3877 0, /*tp_iter*/
3878 0, /*tp_iternext*/
3879 Pickler_methods, /*tp_methods*/
3880 Pickler_members, /*tp_members*/
3881 Pickler_getsets, /*tp_getset*/
3882 0, /*tp_base*/
3883 0, /*tp_dict*/
3884 0, /*tp_descr_get*/
3885 0, /*tp_descr_set*/
3886 0, /*tp_dictoffset*/
3887 (initproc)Pickler_init, /*tp_init*/
3888 PyType_GenericAlloc, /*tp_alloc*/
3889 PyType_GenericNew, /*tp_new*/
3890 PyObject_GC_Del, /*tp_free*/
3891 0, /*tp_is_gc*/
3892};
3893
Victor Stinner121aab42011-09-29 23:40:53 +02003894/* Temporary helper for calling self.find_class().
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003895
3896 XXX: It would be nice to able to avoid Python function call overhead, by
3897 using directly the C version of find_class(), when find_class() is not
3898 overridden by a subclass. Although, this could become rather hackish. A
3899 simpler optimization would be to call the C function when self is not a
3900 subclass instance. */
3901static PyObject *
3902find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
3903{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02003904 _Py_IDENTIFIER(find_class);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02003905
3906 return _PyObject_CallMethodId((PyObject *)self, &PyId_find_class, "OO",
3907 module_name, global_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003908}
3909
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003910static Py_ssize_t
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003911marker(UnpicklerObject *self)
3912{
3913 if (self->num_marks < 1) {
3914 PyErr_SetString(UnpicklingError, "could not find MARK");
3915 return -1;
3916 }
3917
3918 return self->marks[--self->num_marks];
3919}
3920
3921static int
3922load_none(UnpicklerObject *self)
3923{
3924 PDATA_APPEND(self->stack, Py_None, -1);
3925 return 0;
3926}
3927
3928static int
3929bad_readline(void)
3930{
3931 PyErr_SetString(UnpicklingError, "pickle data was truncated");
3932 return -1;
3933}
3934
3935static int
3936load_int(UnpicklerObject *self)
3937{
3938 PyObject *value;
3939 char *endptr, *s;
3940 Py_ssize_t len;
3941 long x;
3942
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003943 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003944 return -1;
3945 if (len < 2)
3946 return bad_readline();
3947
3948 errno = 0;
Victor Stinner121aab42011-09-29 23:40:53 +02003949 /* XXX: Should the base argument of strtol() be explicitly set to 10?
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003950 XXX(avassalotti): Should this uses PyOS_strtol()? */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003951 x = strtol(s, &endptr, 0);
3952
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003953 if (errno || (*endptr != '\n' && *endptr != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003954 /* Hm, maybe we've got something long. Let's try reading
Serhiy Storchaka95949422013-08-27 19:40:23 +03003955 * it as a Python int object. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003956 errno = 0;
3957 /* XXX: Same thing about the base here. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003958 value = PyLong_FromString(s, NULL, 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003959 if (value == NULL) {
3960 PyErr_SetString(PyExc_ValueError,
3961 "could not convert string to int");
3962 return -1;
3963 }
3964 }
3965 else {
3966 if (len == 3 && (x == 0 || x == 1)) {
3967 if ((value = PyBool_FromLong(x)) == NULL)
3968 return -1;
3969 }
3970 else {
3971 if ((value = PyLong_FromLong(x)) == NULL)
3972 return -1;
3973 }
3974 }
3975
3976 PDATA_PUSH(self->stack, value, -1);
3977 return 0;
3978}
3979
3980static int
3981load_bool(UnpicklerObject *self, PyObject *boolean)
3982{
3983 assert(boolean == Py_True || boolean == Py_False);
3984 PDATA_APPEND(self->stack, boolean, -1);
3985 return 0;
3986}
3987
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003988/* s contains x bytes of an unsigned little-endian integer. Return its value
3989 * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
3990 */
3991static Py_ssize_t
3992calc_binsize(char *bytes, int size)
3993{
3994 unsigned char *s = (unsigned char *)bytes;
3995 size_t x = 0;
3996
3997 assert(size == 4);
3998
3999 x = (size_t) s[0];
4000 x |= (size_t) s[1] << 8;
4001 x |= (size_t) s[2] << 16;
4002 x |= (size_t) s[3] << 24;
4003
4004 if (x > PY_SSIZE_T_MAX)
4005 return -1;
4006 else
4007 return (Py_ssize_t) x;
4008}
4009
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004010/* s contains x bytes of a little-endian integer. Return its value as a
4011 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
4012 * int, but when x is 4 it's a signed one. This is an historical source
4013 * of x-platform bugs.
4014 */
4015static long
4016calc_binint(char *bytes, int size)
4017{
4018 unsigned char *s = (unsigned char *)bytes;
4019 int i = size;
4020 long x = 0;
4021
4022 for (i = 0; i < size; i++) {
4023 x |= (long)s[i] << (i * 8);
4024 }
4025
4026 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
4027 * is signed, so on a box with longs bigger than 4 bytes we need
4028 * to extend a BININT's sign bit to the full width.
4029 */
4030 if (SIZEOF_LONG > 4 && size == 4) {
4031 x |= -(x & (1L << 31));
4032 }
4033
4034 return x;
4035}
4036
4037static int
4038load_binintx(UnpicklerObject *self, char *s, int size)
4039{
4040 PyObject *value;
4041 long x;
4042
4043 x = calc_binint(s, size);
4044
4045 if ((value = PyLong_FromLong(x)) == NULL)
4046 return -1;
4047
4048 PDATA_PUSH(self->stack, value, -1);
4049 return 0;
4050}
4051
4052static int
4053load_binint(UnpicklerObject *self)
4054{
4055 char *s;
4056
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004057 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004058 return -1;
4059
4060 return load_binintx(self, s, 4);
4061}
4062
4063static int
4064load_binint1(UnpicklerObject *self)
4065{
4066 char *s;
4067
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004068 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004069 return -1;
4070
4071 return load_binintx(self, s, 1);
4072}
4073
4074static int
4075load_binint2(UnpicklerObject *self)
4076{
4077 char *s;
4078
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004079 if (_Unpickler_Read(self, &s, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004080 return -1;
4081
4082 return load_binintx(self, s, 2);
4083}
4084
4085static int
4086load_long(UnpicklerObject *self)
4087{
4088 PyObject *value;
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00004089 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004090 Py_ssize_t len;
4091
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004092 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004093 return -1;
4094 if (len < 2)
4095 return bad_readline();
4096
Mark Dickinson8dd05142009-01-20 20:43:58 +00004097 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
4098 the 'L' before calling PyLong_FromString. In order to maintain
4099 compatibility with Python 3.0.0, we don't actually *require*
4100 the 'L' to be present. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004101 if (s[len-2] == 'L')
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00004102 s[len-2] = '\0';
Alexandre Vassalottie4bccb72009-01-24 01:47:57 +00004103 /* XXX: Should the base argument explicitly set to 10? */
4104 value = PyLong_FromString(s, NULL, 0);
Mark Dickinson8dd05142009-01-20 20:43:58 +00004105 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004106 return -1;
4107
4108 PDATA_PUSH(self->stack, value, -1);
4109 return 0;
4110}
4111
4112/* 'size' bytes contain the # of bytes of little-endian 256's-complement
4113 * data following.
4114 */
4115static int
4116load_counted_long(UnpicklerObject *self, int size)
4117{
4118 PyObject *value;
4119 char *nbytes;
4120 char *pdata;
4121
4122 assert(size == 1 || size == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004123 if (_Unpickler_Read(self, &nbytes, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004124 return -1;
4125
4126 size = calc_binint(nbytes, size);
4127 if (size < 0) {
4128 /* Corrupt or hostile pickle -- we never write one like this */
4129 PyErr_SetString(UnpicklingError,
4130 "LONG pickle has negative byte count");
4131 return -1;
4132 }
4133
4134 if (size == 0)
4135 value = PyLong_FromLong(0L);
4136 else {
4137 /* Read the raw little-endian bytes and convert. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004138 if (_Unpickler_Read(self, &pdata, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004139 return -1;
4140 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
4141 1 /* little endian */ , 1 /* signed */ );
4142 }
4143 if (value == NULL)
4144 return -1;
4145 PDATA_PUSH(self->stack, value, -1);
4146 return 0;
4147}
4148
4149static int
4150load_float(UnpicklerObject *self)
4151{
4152 PyObject *value;
4153 char *endptr, *s;
4154 Py_ssize_t len;
4155 double d;
4156
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004157 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004158 return -1;
4159 if (len < 2)
4160 return bad_readline();
4161
4162 errno = 0;
Mark Dickinson725bfd82009-05-03 20:33:40 +00004163 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
4164 if (d == -1.0 && PyErr_Occurred())
4165 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004166 if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004167 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
4168 return -1;
4169 }
Mark Dickinson725bfd82009-05-03 20:33:40 +00004170 value = PyFloat_FromDouble(d);
4171 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004172 return -1;
4173
4174 PDATA_PUSH(self->stack, value, -1);
4175 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004176}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004177
4178static int
4179load_binfloat(UnpicklerObject *self)
4180{
4181 PyObject *value;
4182 double x;
4183 char *s;
4184
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004185 if (_Unpickler_Read(self, &s, 8) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004186 return -1;
4187
4188 x = _PyFloat_Unpack8((unsigned char *)s, 0);
4189 if (x == -1.0 && PyErr_Occurred())
4190 return -1;
4191
4192 if ((value = PyFloat_FromDouble(x)) == NULL)
4193 return -1;
4194
4195 PDATA_PUSH(self->stack, value, -1);
4196 return 0;
4197}
4198
4199static int
4200load_string(UnpicklerObject *self)
4201{
4202 PyObject *bytes;
4203 PyObject *str = NULL;
4204 Py_ssize_t len;
4205 char *s, *p;
4206
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004207 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004208 return -1;
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07004209 /* Strip the newline */
4210 len--;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004211 /* Strip outermost quotes */
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07004212 if (len >= 2 && s[0] == s[len - 1] && (s[0] == '\'' || s[0] == '"')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004213 p = s + 1;
4214 len -= 2;
4215 }
4216 else {
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07004217 PyErr_SetString(UnpicklingError,
4218 "the STRING opcode argument must be quoted");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004219 return -1;
4220 }
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07004221 assert(len >= 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004222
4223 /* Use the PyBytes API to decode the string, since that is what is used
4224 to encode, and then coerce the result to Unicode. */
4225 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004226 if (bytes == NULL)
4227 return -1;
4228 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
4229 Py_DECREF(bytes);
4230 if (str == NULL)
4231 return -1;
4232
4233 PDATA_PUSH(self->stack, str, -1);
4234 return 0;
4235}
4236
4237static int
4238load_binbytes(UnpicklerObject *self)
4239{
4240 PyObject *bytes;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004241 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004242 char *s;
4243
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004244 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004245 return -1;
4246
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004247 x = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004248 if (x < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004249 PyErr_Format(PyExc_OverflowError,
4250 "BINBYTES exceeds system's maximum size of %zd bytes",
Alexandre Vassalotticc757172013-04-14 02:25:10 -07004251 PY_SSIZE_T_MAX);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004252 return -1;
4253 }
4254
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004255 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004256 return -1;
4257 bytes = PyBytes_FromStringAndSize(s, x);
4258 if (bytes == NULL)
4259 return -1;
4260
4261 PDATA_PUSH(self->stack, bytes, -1);
4262 return 0;
4263}
4264
4265static int
4266load_short_binbytes(UnpicklerObject *self)
4267{
4268 PyObject *bytes;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004269 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004270 char *s;
4271
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004272 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004273 return -1;
4274
4275 x = (unsigned char)s[0];
4276
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004277 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004278 return -1;
4279
4280 bytes = PyBytes_FromStringAndSize(s, x);
4281 if (bytes == NULL)
4282 return -1;
4283
4284 PDATA_PUSH(self->stack, bytes, -1);
4285 return 0;
4286}
4287
4288static int
4289load_binstring(UnpicklerObject *self)
4290{
4291 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004292 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004293 char *s;
4294
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004295 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004296 return -1;
4297
4298 x = calc_binint(s, 4);
4299 if (x < 0) {
Victor Stinner121aab42011-09-29 23:40:53 +02004300 PyErr_SetString(UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004301 "BINSTRING pickle has negative byte count");
4302 return -1;
4303 }
4304
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004305 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004306 return -1;
4307
4308 /* Convert Python 2.x strings to unicode. */
4309 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4310 if (str == NULL)
4311 return -1;
4312
4313 PDATA_PUSH(self->stack, str, -1);
4314 return 0;
4315}
4316
4317static int
4318load_short_binstring(UnpicklerObject *self)
4319{
4320 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004321 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004322 char *s;
4323
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004324 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004325 return -1;
4326
4327 x = (unsigned char)s[0];
4328
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004329 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004330 return -1;
4331
4332 /* Convert Python 2.x strings to unicode. */
4333 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4334 if (str == NULL)
4335 return -1;
4336
4337 PDATA_PUSH(self->stack, str, -1);
4338 return 0;
4339}
4340
4341static int
4342load_unicode(UnpicklerObject *self)
4343{
4344 PyObject *str;
4345 Py_ssize_t len;
4346 char *s;
4347
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004348 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004349 return -1;
4350 if (len < 1)
4351 return bad_readline();
4352
4353 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
4354 if (str == NULL)
4355 return -1;
4356
4357 PDATA_PUSH(self->stack, str, -1);
4358 return 0;
4359}
4360
4361static int
4362load_binunicode(UnpicklerObject *self)
4363{
4364 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004365 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004366 char *s;
4367
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004368 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004369 return -1;
4370
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004371 size = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004372 if (size < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004373 PyErr_Format(PyExc_OverflowError,
4374 "BINUNICODE exceeds system's maximum size of %zd bytes",
Alexandre Vassalotticc757172013-04-14 02:25:10 -07004375 PY_SSIZE_T_MAX);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004376 return -1;
4377 }
4378
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004379
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004380 if (_Unpickler_Read(self, &s, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004381 return -1;
4382
Victor Stinner485fb562010-04-13 11:07:24 +00004383 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004384 if (str == NULL)
4385 return -1;
4386
4387 PDATA_PUSH(self->stack, str, -1);
4388 return 0;
4389}
4390
4391static int
4392load_tuple(UnpicklerObject *self)
4393{
4394 PyObject *tuple;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004395 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004396
4397 if ((i = marker(self)) < 0)
4398 return -1;
4399
4400 tuple = Pdata_poptuple(self->stack, i);
4401 if (tuple == NULL)
4402 return -1;
4403 PDATA_PUSH(self->stack, tuple, -1);
4404 return 0;
4405}
4406
4407static int
4408load_counted_tuple(UnpicklerObject *self, int len)
4409{
4410 PyObject *tuple;
4411
4412 tuple = PyTuple_New(len);
4413 if (tuple == NULL)
4414 return -1;
4415
4416 while (--len >= 0) {
4417 PyObject *item;
4418
4419 PDATA_POP(self->stack, item);
4420 if (item == NULL)
4421 return -1;
4422 PyTuple_SET_ITEM(tuple, len, item);
4423 }
4424 PDATA_PUSH(self->stack, tuple, -1);
4425 return 0;
4426}
4427
4428static int
4429load_empty_list(UnpicklerObject *self)
4430{
4431 PyObject *list;
4432
4433 if ((list = PyList_New(0)) == NULL)
4434 return -1;
4435 PDATA_PUSH(self->stack, list, -1);
4436 return 0;
4437}
4438
4439static int
4440load_empty_dict(UnpicklerObject *self)
4441{
4442 PyObject *dict;
4443
4444 if ((dict = PyDict_New()) == NULL)
4445 return -1;
4446 PDATA_PUSH(self->stack, dict, -1);
4447 return 0;
4448}
4449
4450static int
4451load_list(UnpicklerObject *self)
4452{
4453 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004454 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004455
4456 if ((i = marker(self)) < 0)
4457 return -1;
4458
4459 list = Pdata_poplist(self->stack, i);
4460 if (list == NULL)
4461 return -1;
4462 PDATA_PUSH(self->stack, list, -1);
4463 return 0;
4464}
4465
4466static int
4467load_dict(UnpicklerObject *self)
4468{
4469 PyObject *dict, *key, *value;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004470 Py_ssize_t i, j, k;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004471
4472 if ((i = marker(self)) < 0)
4473 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004474 j = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004475
4476 if ((dict = PyDict_New()) == NULL)
4477 return -1;
4478
4479 for (k = i + 1; k < j; k += 2) {
4480 key = self->stack->data[k - 1];
4481 value = self->stack->data[k];
4482 if (PyDict_SetItem(dict, key, value) < 0) {
4483 Py_DECREF(dict);
4484 return -1;
4485 }
4486 }
4487 Pdata_clear(self->stack, i);
4488 PDATA_PUSH(self->stack, dict, -1);
4489 return 0;
4490}
4491
4492static PyObject *
4493instantiate(PyObject *cls, PyObject *args)
4494{
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004495 PyObject *result = NULL;
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02004496 _Py_IDENTIFIER(__getinitargs__);
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004497 /* Caller must assure args are a tuple. Normally, args come from
4498 Pdata_poptuple which packs objects from the top of the stack
4499 into a newly created tuple. */
4500 assert(PyTuple_Check(args));
4501 if (Py_SIZE(args) > 0 || !PyType_Check(cls) ||
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02004502 _PyObject_HasAttrId(cls, &PyId___getinitargs__)) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004503 result = PyObject_CallObject(cls, args);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004504 }
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004505 else {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02004506 _Py_IDENTIFIER(__new__);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02004507
4508 result = _PyObject_CallMethodId(cls, &PyId___new__, "O", cls);
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004509 }
4510 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004511}
4512
4513static int
4514load_obj(UnpicklerObject *self)
4515{
4516 PyObject *cls, *args, *obj = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004517 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004518
4519 if ((i = marker(self)) < 0)
4520 return -1;
4521
4522 args = Pdata_poptuple(self->stack, i + 1);
4523 if (args == NULL)
4524 return -1;
4525
4526 PDATA_POP(self->stack, cls);
4527 if (cls) {
4528 obj = instantiate(cls, args);
4529 Py_DECREF(cls);
4530 }
4531 Py_DECREF(args);
4532 if (obj == NULL)
4533 return -1;
4534
4535 PDATA_PUSH(self->stack, obj, -1);
4536 return 0;
4537}
4538
4539static int
4540load_inst(UnpicklerObject *self)
4541{
4542 PyObject *cls = NULL;
4543 PyObject *args = NULL;
4544 PyObject *obj = NULL;
4545 PyObject *module_name;
4546 PyObject *class_name;
4547 Py_ssize_t len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004548 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004549 char *s;
4550
4551 if ((i = marker(self)) < 0)
4552 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004553 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004554 return -1;
4555 if (len < 2)
4556 return bad_readline();
4557
4558 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
4559 identifiers are permitted in Python 3.0, since the INST opcode is only
4560 supported by older protocols on Python 2.x. */
4561 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
4562 if (module_name == NULL)
4563 return -1;
4564
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004565 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004566 if (len < 2)
4567 return bad_readline();
4568 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004569 if (class_name != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004570 cls = find_class(self, module_name, class_name);
4571 Py_DECREF(class_name);
4572 }
4573 }
4574 Py_DECREF(module_name);
4575
4576 if (cls == NULL)
4577 return -1;
4578
4579 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
4580 obj = instantiate(cls, args);
4581 Py_DECREF(args);
4582 }
4583 Py_DECREF(cls);
4584
4585 if (obj == NULL)
4586 return -1;
4587
4588 PDATA_PUSH(self->stack, obj, -1);
4589 return 0;
4590}
4591
4592static int
4593load_newobj(UnpicklerObject *self)
4594{
4595 PyObject *args = NULL;
4596 PyObject *clsraw = NULL;
4597 PyTypeObject *cls; /* clsraw cast to its true type */
4598 PyObject *obj;
4599
4600 /* Stack is ... cls argtuple, and we want to call
4601 * cls.__new__(cls, *argtuple).
4602 */
4603 PDATA_POP(self->stack, args);
4604 if (args == NULL)
4605 goto error;
4606 if (!PyTuple_Check(args)) {
4607 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
4608 goto error;
4609 }
4610
4611 PDATA_POP(self->stack, clsraw);
4612 cls = (PyTypeObject *)clsraw;
4613 if (cls == NULL)
4614 goto error;
4615 if (!PyType_Check(cls)) {
4616 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4617 "isn't a type object");
4618 goto error;
4619 }
4620 if (cls->tp_new == NULL) {
4621 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4622 "has NULL tp_new");
4623 goto error;
4624 }
4625
4626 /* Call __new__. */
4627 obj = cls->tp_new(cls, args, NULL);
4628 if (obj == NULL)
4629 goto error;
4630
4631 Py_DECREF(args);
4632 Py_DECREF(clsraw);
4633 PDATA_PUSH(self->stack, obj, -1);
4634 return 0;
4635
4636 error:
4637 Py_XDECREF(args);
4638 Py_XDECREF(clsraw);
4639 return -1;
4640}
4641
4642static int
4643load_global(UnpicklerObject *self)
4644{
4645 PyObject *global = NULL;
4646 PyObject *module_name;
4647 PyObject *global_name;
4648 Py_ssize_t len;
4649 char *s;
4650
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004651 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004652 return -1;
4653 if (len < 2)
4654 return bad_readline();
4655 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4656 if (!module_name)
4657 return -1;
4658
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004659 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004660 if (len < 2) {
4661 Py_DECREF(module_name);
4662 return bad_readline();
4663 }
4664 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4665 if (global_name) {
4666 global = find_class(self, module_name, global_name);
4667 Py_DECREF(global_name);
4668 }
4669 }
4670 Py_DECREF(module_name);
4671
4672 if (global == NULL)
4673 return -1;
4674 PDATA_PUSH(self->stack, global, -1);
4675 return 0;
4676}
4677
4678static int
4679load_persid(UnpicklerObject *self)
4680{
4681 PyObject *pid;
4682 Py_ssize_t len;
4683 char *s;
4684
4685 if (self->pers_func) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004686 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004687 return -1;
4688 if (len < 2)
4689 return bad_readline();
4690
4691 pid = PyBytes_FromStringAndSize(s, len - 1);
4692 if (pid == NULL)
4693 return -1;
4694
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004695 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004696 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004697 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004698 if (pid == NULL)
4699 return -1;
4700
4701 PDATA_PUSH(self->stack, pid, -1);
4702 return 0;
4703 }
4704 else {
4705 PyErr_SetString(UnpicklingError,
4706 "A load persistent id instruction was encountered,\n"
4707 "but no persistent_load function was specified.");
4708 return -1;
4709 }
4710}
4711
4712static int
4713load_binpersid(UnpicklerObject *self)
4714{
4715 PyObject *pid;
4716
4717 if (self->pers_func) {
4718 PDATA_POP(self->stack, pid);
4719 if (pid == NULL)
4720 return -1;
4721
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004722 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004723 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004724 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004725 if (pid == NULL)
4726 return -1;
4727
4728 PDATA_PUSH(self->stack, pid, -1);
4729 return 0;
4730 }
4731 else {
4732 PyErr_SetString(UnpicklingError,
4733 "A load persistent id instruction was encountered,\n"
4734 "but no persistent_load function was specified.");
4735 return -1;
4736 }
4737}
4738
4739static int
4740load_pop(UnpicklerObject *self)
4741{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004742 Py_ssize_t len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004743
4744 /* Note that we split the (pickle.py) stack into two stacks,
4745 * an object stack and a mark stack. We have to be clever and
4746 * pop the right one. We do this by looking at the top of the
Collin Winter8ca69de2009-05-26 16:53:41 +00004747 * mark stack first, and only signalling a stack underflow if
4748 * the object stack is empty and the mark stack doesn't match
4749 * our expectations.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004750 */
Collin Winter8ca69de2009-05-26 16:53:41 +00004751 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004752 self->num_marks--;
Antoine Pitrou01a15ea2010-01-07 17:57:31 +00004753 } else if (len > 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004754 len--;
4755 Py_DECREF(self->stack->data[len]);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004756 Py_SIZE(self->stack) = len;
Collin Winter8ca69de2009-05-26 16:53:41 +00004757 } else {
4758 return stack_underflow();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004759 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004760 return 0;
4761}
4762
4763static int
4764load_pop_mark(UnpicklerObject *self)
4765{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004766 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004767
4768 if ((i = marker(self)) < 0)
4769 return -1;
4770
4771 Pdata_clear(self->stack, i);
4772
4773 return 0;
4774}
4775
4776static int
4777load_dup(UnpicklerObject *self)
4778{
4779 PyObject *last;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004780 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004781
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004782 if ((len = Py_SIZE(self->stack)) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004783 return stack_underflow();
4784 last = self->stack->data[len - 1];
4785 PDATA_APPEND(self->stack, last, -1);
4786 return 0;
4787}
4788
4789static int
4790load_get(UnpicklerObject *self)
4791{
4792 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004793 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004794 Py_ssize_t len;
4795 char *s;
4796
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004797 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004798 return -1;
4799 if (len < 2)
4800 return bad_readline();
4801
4802 key = PyLong_FromString(s, NULL, 10);
4803 if (key == NULL)
4804 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004805 idx = PyLong_AsSsize_t(key);
4806 if (idx == -1 && PyErr_Occurred()) {
4807 Py_DECREF(key);
4808 return -1;
4809 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004810
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004811 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004812 if (value == NULL) {
4813 if (!PyErr_Occurred())
4814 PyErr_SetObject(PyExc_KeyError, key);
4815 Py_DECREF(key);
4816 return -1;
4817 }
4818 Py_DECREF(key);
4819
4820 PDATA_APPEND(self->stack, value, -1);
4821 return 0;
4822}
4823
4824static int
4825load_binget(UnpicklerObject *self)
4826{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004827 PyObject *value;
4828 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004829 char *s;
4830
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004831 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004832 return -1;
4833
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004834 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004835
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004836 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004837 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004838 PyObject *key = PyLong_FromSsize_t(idx);
Christian Heimes9ee5c372013-07-26 22:45:00 +02004839 if (key != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004840 PyErr_SetObject(PyExc_KeyError, key);
Christian Heimes9ee5c372013-07-26 22:45:00 +02004841 Py_DECREF(key);
4842 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004843 return -1;
4844 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004845
4846 PDATA_APPEND(self->stack, value, -1);
4847 return 0;
4848}
4849
4850static int
4851load_long_binget(UnpicklerObject *self)
4852{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004853 PyObject *value;
4854 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004855 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004856
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004857 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004858 return -1;
4859
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004860 idx = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004861
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004862 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004863 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004864 PyObject *key = PyLong_FromSsize_t(idx);
Christian Heimes9ee5c372013-07-26 22:45:00 +02004865 if (key != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004866 PyErr_SetObject(PyExc_KeyError, key);
Christian Heimes9ee5c372013-07-26 22:45:00 +02004867 Py_DECREF(key);
4868 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004869 return -1;
4870 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004871
4872 PDATA_APPEND(self->stack, value, -1);
4873 return 0;
4874}
4875
4876/* Push an object from the extension registry (EXT[124]). nbytes is
4877 * the number of bytes following the opcode, holding the index (code) value.
4878 */
4879static int
4880load_extension(UnpicklerObject *self, int nbytes)
4881{
4882 char *codebytes; /* the nbytes bytes after the opcode */
4883 long code; /* calc_binint returns long */
4884 PyObject *py_code; /* code as a Python int */
4885 PyObject *obj; /* the object to push */
4886 PyObject *pair; /* (module_name, class_name) */
4887 PyObject *module_name, *class_name;
4888
4889 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004890 if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004891 return -1;
4892 code = calc_binint(codebytes, nbytes);
4893 if (code <= 0) { /* note that 0 is forbidden */
4894 /* Corrupt or hostile pickle. */
4895 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
4896 return -1;
4897 }
4898
4899 /* Look for the code in the cache. */
4900 py_code = PyLong_FromLong(code);
4901 if (py_code == NULL)
4902 return -1;
4903 obj = PyDict_GetItem(extension_cache, py_code);
4904 if (obj != NULL) {
4905 /* Bingo. */
4906 Py_DECREF(py_code);
4907 PDATA_APPEND(self->stack, obj, -1);
4908 return 0;
4909 }
4910
4911 /* Look up the (module_name, class_name) pair. */
4912 pair = PyDict_GetItem(inverted_registry, py_code);
4913 if (pair == NULL) {
4914 Py_DECREF(py_code);
4915 PyErr_Format(PyExc_ValueError, "unregistered extension "
4916 "code %ld", code);
4917 return -1;
4918 }
4919 /* Since the extension registry is manipulable via Python code,
4920 * confirm that pair is really a 2-tuple of strings.
4921 */
4922 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
4923 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
4924 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
4925 Py_DECREF(py_code);
4926 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
4927 "isn't a 2-tuple of strings", code);
4928 return -1;
4929 }
4930 /* Load the object. */
4931 obj = find_class(self, module_name, class_name);
4932 if (obj == NULL) {
4933 Py_DECREF(py_code);
4934 return -1;
4935 }
4936 /* Cache code -> obj. */
4937 code = PyDict_SetItem(extension_cache, py_code, obj);
4938 Py_DECREF(py_code);
4939 if (code < 0) {
4940 Py_DECREF(obj);
4941 return -1;
4942 }
4943 PDATA_PUSH(self->stack, obj, -1);
4944 return 0;
4945}
4946
4947static int
4948load_put(UnpicklerObject *self)
4949{
4950 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004951 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004952 Py_ssize_t len;
4953 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004954
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004955 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004956 return -1;
4957 if (len < 2)
4958 return bad_readline();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004959 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004960 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004961 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004962
4963 key = PyLong_FromString(s, NULL, 10);
4964 if (key == NULL)
4965 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004966 idx = PyLong_AsSsize_t(key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004967 Py_DECREF(key);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004968 if (idx < 0) {
4969 if (!PyErr_Occurred())
4970 PyErr_SetString(PyExc_ValueError,
4971 "negative PUT argument");
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004972 return -1;
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004973 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004974
4975 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004976}
4977
4978static int
4979load_binput(UnpicklerObject *self)
4980{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004981 PyObject *value;
4982 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004983 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004984
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004985 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004986 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004987
4988 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004989 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004990 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004991
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004992 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004993
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004994 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004995}
4996
4997static int
4998load_long_binput(UnpicklerObject *self)
4999{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005000 PyObject *value;
5001 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005002 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005003
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005004 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005005 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005006
5007 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005008 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005009 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005010
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005011 idx = calc_binsize(s, 4);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02005012 if (idx < 0) {
5013 PyErr_SetString(PyExc_ValueError,
5014 "negative LONG_BINPUT argument");
5015 return -1;
5016 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005017
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005018 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005019}
5020
5021static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005022do_append(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005023{
5024 PyObject *value;
5025 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005026 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005027
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005028 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005029 if (x > len || x <= 0)
5030 return stack_underflow();
5031 if (len == x) /* nothing to do */
5032 return 0;
5033
5034 list = self->stack->data[x - 1];
5035
5036 if (PyList_Check(list)) {
5037 PyObject *slice;
5038 Py_ssize_t list_len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005039 int ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005040
5041 slice = Pdata_poplist(self->stack, x);
5042 if (!slice)
5043 return -1;
5044 list_len = PyList_GET_SIZE(list);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005045 ret = PyList_SetSlice(list, list_len, list_len, slice);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005046 Py_DECREF(slice);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005047 return ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005048 }
5049 else {
5050 PyObject *append_func;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005051 _Py_IDENTIFIER(append);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005052
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005053 append_func = _PyObject_GetAttrId(list, &PyId_append);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005054 if (append_func == NULL)
5055 return -1;
5056 for (i = x; i < len; i++) {
5057 PyObject *result;
5058
5059 value = self->stack->data[i];
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005060 result = _Unpickler_FastCall(self, append_func, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005061 if (result == NULL) {
5062 Pdata_clear(self->stack, i + 1);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005063 Py_SIZE(self->stack) = x;
Alexandre Vassalotti637c7c42013-04-20 21:28:21 -07005064 Py_DECREF(append_func);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005065 return -1;
5066 }
5067 Py_DECREF(result);
5068 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005069 Py_SIZE(self->stack) = x;
Alexandre Vassalotti637c7c42013-04-20 21:28:21 -07005070 Py_DECREF(append_func);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005071 }
5072
5073 return 0;
5074}
5075
5076static int
5077load_append(UnpicklerObject *self)
5078{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005079 return do_append(self, Py_SIZE(self->stack) - 1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005080}
5081
5082static int
5083load_appends(UnpicklerObject *self)
5084{
5085 return do_append(self, marker(self));
5086}
5087
5088static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005089do_setitems(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005090{
5091 PyObject *value, *key;
5092 PyObject *dict;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005093 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005094 int status = 0;
5095
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005096 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005097 if (x > len || x <= 0)
5098 return stack_underflow();
5099 if (len == x) /* nothing to do */
5100 return 0;
Victor Stinner121aab42011-09-29 23:40:53 +02005101 if ((len - x) % 2 != 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005102 /* Currupt or hostile pickle -- we never write one like this. */
5103 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
5104 return -1;
5105 }
5106
5107 /* Here, dict does not actually need to be a PyDict; it could be anything
5108 that supports the __setitem__ attribute. */
5109 dict = self->stack->data[x - 1];
5110
5111 for (i = x + 1; i < len; i += 2) {
5112 key = self->stack->data[i - 1];
5113 value = self->stack->data[i];
5114 if (PyObject_SetItem(dict, key, value) < 0) {
5115 status = -1;
5116 break;
5117 }
5118 }
5119
5120 Pdata_clear(self->stack, x);
5121 return status;
5122}
5123
5124static int
5125load_setitem(UnpicklerObject *self)
5126{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005127 return do_setitems(self, Py_SIZE(self->stack) - 2);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005128}
5129
5130static int
5131load_setitems(UnpicklerObject *self)
5132{
5133 return do_setitems(self, marker(self));
5134}
5135
5136static int
5137load_build(UnpicklerObject *self)
5138{
5139 PyObject *state, *inst, *slotstate;
5140 PyObject *setstate;
5141 int status = 0;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005142 _Py_IDENTIFIER(__setstate__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005143
5144 /* Stack is ... instance, state. We want to leave instance at
5145 * the stack top, possibly mutated via instance.__setstate__(state).
5146 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005147 if (Py_SIZE(self->stack) < 2)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005148 return stack_underflow();
5149
5150 PDATA_POP(self->stack, state);
5151 if (state == NULL)
5152 return -1;
5153
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005154 inst = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005155
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005156 setstate = _PyObject_GetAttrId(inst, &PyId___setstate__);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005157 if (setstate == NULL) {
5158 if (PyErr_ExceptionMatches(PyExc_AttributeError))
5159 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00005160 else {
5161 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005162 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00005163 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005164 }
5165 else {
5166 PyObject *result;
5167
5168 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005169 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Antoine Pitroud79dc622008-09-05 00:03:33 +00005170 reference to state first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005171 result = _Unpickler_FastCall(self, setstate, state);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005172 Py_DECREF(setstate);
5173 if (result == NULL)
5174 return -1;
5175 Py_DECREF(result);
5176 return 0;
5177 }
5178
5179 /* A default __setstate__. First see whether state embeds a
5180 * slot state dict too (a proto 2 addition).
5181 */
5182 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
5183 PyObject *tmp = state;
5184
5185 state = PyTuple_GET_ITEM(tmp, 0);
5186 slotstate = PyTuple_GET_ITEM(tmp, 1);
5187 Py_INCREF(state);
5188 Py_INCREF(slotstate);
5189 Py_DECREF(tmp);
5190 }
5191 else
5192 slotstate = NULL;
5193
5194 /* Set inst.__dict__ from the state dict (if any). */
5195 if (state != Py_None) {
5196 PyObject *dict;
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005197 PyObject *d_key, *d_value;
5198 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005199 _Py_IDENTIFIER(__dict__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005200
5201 if (!PyDict_Check(state)) {
5202 PyErr_SetString(UnpicklingError, "state is not a dictionary");
5203 goto error;
5204 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005205 dict = _PyObject_GetAttrId(inst, &PyId___dict__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005206 if (dict == NULL)
5207 goto error;
5208
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005209 i = 0;
5210 while (PyDict_Next(state, &i, &d_key, &d_value)) {
5211 /* normally the keys for instance attributes are
5212 interned. we should try to do that here. */
5213 Py_INCREF(d_key);
5214 if (PyUnicode_CheckExact(d_key))
5215 PyUnicode_InternInPlace(&d_key);
5216 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
5217 Py_DECREF(d_key);
5218 goto error;
5219 }
5220 Py_DECREF(d_key);
5221 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005222 Py_DECREF(dict);
5223 }
5224
5225 /* Also set instance attributes from the slotstate dict (if any). */
5226 if (slotstate != NULL) {
5227 PyObject *d_key, *d_value;
5228 Py_ssize_t i;
5229
5230 if (!PyDict_Check(slotstate)) {
5231 PyErr_SetString(UnpicklingError,
5232 "slot state is not a dictionary");
5233 goto error;
5234 }
5235 i = 0;
5236 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
5237 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
5238 goto error;
5239 }
5240 }
5241
5242 if (0) {
5243 error:
5244 status = -1;
5245 }
5246
5247 Py_DECREF(state);
5248 Py_XDECREF(slotstate);
5249 return status;
5250}
5251
5252static int
5253load_mark(UnpicklerObject *self)
5254{
5255
5256 /* Note that we split the (pickle.py) stack into two stacks, an
5257 * object stack and a mark stack. Here we push a mark onto the
5258 * mark stack.
5259 */
5260
5261 if ((self->num_marks + 1) >= self->marks_size) {
5262 size_t alloc;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005263 Py_ssize_t *marks;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005264
5265 /* Use the size_t type to check for overflow. */
5266 alloc = ((size_t)self->num_marks << 1) + 20;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005267 if (alloc > (PY_SSIZE_T_MAX / sizeof(Py_ssize_t)) ||
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00005268 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005269 PyErr_NoMemory();
5270 return -1;
5271 }
5272
5273 if (self->marks == NULL)
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005274 marks = (Py_ssize_t *) PyMem_Malloc(alloc * sizeof(Py_ssize_t));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005275 else
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005276 marks = (Py_ssize_t *) PyMem_Realloc(self->marks,
5277 alloc * sizeof(Py_ssize_t));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005278 if (marks == NULL) {
5279 PyErr_NoMemory();
5280 return -1;
5281 }
5282 self->marks = marks;
5283 self->marks_size = (Py_ssize_t)alloc;
5284 }
5285
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005286 self->marks[self->num_marks++] = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005287
5288 return 0;
5289}
5290
5291static int
5292load_reduce(UnpicklerObject *self)
5293{
5294 PyObject *callable = NULL;
5295 PyObject *argtup = NULL;
5296 PyObject *obj = NULL;
5297
5298 PDATA_POP(self->stack, argtup);
5299 if (argtup == NULL)
5300 return -1;
5301 PDATA_POP(self->stack, callable);
5302 if (callable) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005303 obj = PyObject_CallObject(callable, argtup);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005304 Py_DECREF(callable);
5305 }
5306 Py_DECREF(argtup);
5307
5308 if (obj == NULL)
5309 return -1;
5310
5311 PDATA_PUSH(self->stack, obj, -1);
5312 return 0;
5313}
5314
5315/* Just raises an error if we don't know the protocol specified. PROTO
5316 * is the first opcode for protocols >= 2.
5317 */
5318static int
5319load_proto(UnpicklerObject *self)
5320{
5321 char *s;
5322 int i;
5323
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005324 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005325 return -1;
5326
5327 i = (unsigned char)s[0];
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005328 if (i <= HIGHEST_PROTOCOL) {
5329 self->proto = i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005330 return 0;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005331 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005332
5333 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
5334 return -1;
5335}
5336
5337static PyObject *
5338load(UnpicklerObject *self)
5339{
5340 PyObject *err;
5341 PyObject *value = NULL;
5342 char *s;
5343
5344 self->num_marks = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005345 if (Py_SIZE(self->stack))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005346 Pdata_clear(self->stack, 0);
5347
5348 /* Convenient macros for the dispatch while-switch loop just below. */
5349#define OP(opcode, load_func) \
5350 case opcode: if (load_func(self) < 0) break; continue;
5351
5352#define OP_ARG(opcode, load_func, arg) \
5353 case opcode: if (load_func(self, (arg)) < 0) break; continue;
5354
5355 while (1) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005356 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005357 break;
5358
5359 switch ((enum opcode)s[0]) {
5360 OP(NONE, load_none)
5361 OP(BININT, load_binint)
5362 OP(BININT1, load_binint1)
5363 OP(BININT2, load_binint2)
5364 OP(INT, load_int)
5365 OP(LONG, load_long)
5366 OP_ARG(LONG1, load_counted_long, 1)
5367 OP_ARG(LONG4, load_counted_long, 4)
5368 OP(FLOAT, load_float)
5369 OP(BINFLOAT, load_binfloat)
5370 OP(BINBYTES, load_binbytes)
5371 OP(SHORT_BINBYTES, load_short_binbytes)
5372 OP(BINSTRING, load_binstring)
5373 OP(SHORT_BINSTRING, load_short_binstring)
5374 OP(STRING, load_string)
5375 OP(UNICODE, load_unicode)
5376 OP(BINUNICODE, load_binunicode)
5377 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
5378 OP_ARG(TUPLE1, load_counted_tuple, 1)
5379 OP_ARG(TUPLE2, load_counted_tuple, 2)
5380 OP_ARG(TUPLE3, load_counted_tuple, 3)
5381 OP(TUPLE, load_tuple)
5382 OP(EMPTY_LIST, load_empty_list)
5383 OP(LIST, load_list)
5384 OP(EMPTY_DICT, load_empty_dict)
5385 OP(DICT, load_dict)
5386 OP(OBJ, load_obj)
5387 OP(INST, load_inst)
5388 OP(NEWOBJ, load_newobj)
5389 OP(GLOBAL, load_global)
5390 OP(APPEND, load_append)
5391 OP(APPENDS, load_appends)
5392 OP(BUILD, load_build)
5393 OP(DUP, load_dup)
5394 OP(BINGET, load_binget)
5395 OP(LONG_BINGET, load_long_binget)
5396 OP(GET, load_get)
5397 OP(MARK, load_mark)
5398 OP(BINPUT, load_binput)
5399 OP(LONG_BINPUT, load_long_binput)
5400 OP(PUT, load_put)
5401 OP(POP, load_pop)
5402 OP(POP_MARK, load_pop_mark)
5403 OP(SETITEM, load_setitem)
5404 OP(SETITEMS, load_setitems)
5405 OP(PERSID, load_persid)
5406 OP(BINPERSID, load_binpersid)
5407 OP(REDUCE, load_reduce)
5408 OP(PROTO, load_proto)
5409 OP_ARG(EXT1, load_extension, 1)
5410 OP_ARG(EXT2, load_extension, 2)
5411 OP_ARG(EXT4, load_extension, 4)
5412 OP_ARG(NEWTRUE, load_bool, Py_True)
5413 OP_ARG(NEWFALSE, load_bool, Py_False)
5414
5415 case STOP:
5416 break;
5417
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005418 default:
Benjamin Petersonadde86d2011-09-23 13:41:41 -04005419 if (s[0] == '\0')
5420 PyErr_SetNone(PyExc_EOFError);
5421 else
5422 PyErr_Format(UnpicklingError,
5423 "invalid load key, '%c'.", s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005424 return NULL;
5425 }
5426
5427 break; /* and we are done! */
5428 }
5429
Antoine Pitrou04248a82010-10-12 20:51:21 +00005430 if (_Unpickler_SkipConsumed(self) < 0)
5431 return NULL;
5432
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005433 /* XXX: It is not clear what this is actually for. */
5434 if ((err = PyErr_Occurred())) {
5435 if (err == PyExc_EOFError) {
5436 PyErr_SetNone(PyExc_EOFError);
5437 }
5438 return NULL;
5439 }
5440
5441 PDATA_POP(self->stack, value);
5442 return value;
5443}
5444
5445PyDoc_STRVAR(Unpickler_load_doc,
5446"load() -> object. Load a pickle."
5447"\n"
5448"Read a pickled object representation from the open file object given in\n"
5449"the constructor, and return the reconstituted object hierarchy specified\n"
5450"therein.\n");
5451
5452static PyObject *
5453Unpickler_load(UnpicklerObject *self)
5454{
5455 /* Check whether the Unpickler was initialized correctly. This prevents
5456 segfaulting if a subclass overridden __init__ with a function that does
5457 not call Unpickler.__init__(). Here, we simply ensure that self->read
5458 is not NULL. */
5459 if (self->read == NULL) {
Victor Stinner121aab42011-09-29 23:40:53 +02005460 PyErr_Format(UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005461 "Unpickler.__init__() was not called by %s.__init__()",
5462 Py_TYPE(self)->tp_name);
5463 return NULL;
5464 }
5465
5466 return load(self);
5467}
5468
5469/* The name of find_class() is misleading. In newer pickle protocols, this
5470 function is used for loading any global (i.e., functions), not just
5471 classes. The name is kept only for backward compatibility. */
5472
5473PyDoc_STRVAR(Unpickler_find_class_doc,
5474"find_class(module_name, global_name) -> object.\n"
5475"\n"
5476"Return an object from a specified module, importing the module if\n"
5477"necessary. Subclasses may override this method (e.g. to restrict\n"
5478"unpickling of arbitrary classes and functions).\n"
5479"\n"
5480"This method is called whenever a class or a function object is\n"
5481"needed. Both arguments passed are str objects.\n");
5482
5483static PyObject *
5484Unpickler_find_class(UnpicklerObject *self, PyObject *args)
5485{
5486 PyObject *global;
5487 PyObject *modules_dict;
5488 PyObject *module;
5489 PyObject *module_name, *global_name;
5490
5491 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
5492 &module_name, &global_name))
5493 return NULL;
5494
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005495 /* Try to map the old names used in Python 2.x to the new ones used in
5496 Python 3.x. We do this only with old pickle protocols and when the
5497 user has not disabled the feature. */
5498 if (self->proto < 3 && self->fix_imports) {
5499 PyObject *key;
5500 PyObject *item;
5501
5502 /* Check if the global (i.e., a function or a class) was renamed
5503 or moved to another module. */
5504 key = PyTuple_Pack(2, module_name, global_name);
5505 if (key == NULL)
5506 return NULL;
5507 item = PyDict_GetItemWithError(name_mapping_2to3, key);
5508 Py_DECREF(key);
5509 if (item) {
5510 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
5511 PyErr_Format(PyExc_RuntimeError,
5512 "_compat_pickle.NAME_MAPPING values should be "
5513 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
5514 return NULL;
5515 }
5516 module_name = PyTuple_GET_ITEM(item, 0);
5517 global_name = PyTuple_GET_ITEM(item, 1);
5518 if (!PyUnicode_Check(module_name) ||
5519 !PyUnicode_Check(global_name)) {
5520 PyErr_Format(PyExc_RuntimeError,
5521 "_compat_pickle.NAME_MAPPING values should be "
5522 "pairs of str, not (%.200s, %.200s)",
5523 Py_TYPE(module_name)->tp_name,
5524 Py_TYPE(global_name)->tp_name);
5525 return NULL;
5526 }
5527 }
5528 else if (PyErr_Occurred()) {
5529 return NULL;
5530 }
5531
5532 /* Check if the module was renamed. */
5533 item = PyDict_GetItemWithError(import_mapping_2to3, module_name);
5534 if (item) {
5535 if (!PyUnicode_Check(item)) {
5536 PyErr_Format(PyExc_RuntimeError,
5537 "_compat_pickle.IMPORT_MAPPING values should be "
5538 "strings, not %.200s", Py_TYPE(item)->tp_name);
5539 return NULL;
5540 }
5541 module_name = item;
5542 }
5543 else if (PyErr_Occurred()) {
5544 return NULL;
5545 }
5546 }
5547
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005548 modules_dict = PySys_GetObject("modules");
Victor Stinner1e53bba2013-07-16 22:26:05 +02005549 if (modules_dict == NULL) {
5550 PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005551 return NULL;
Victor Stinner1e53bba2013-07-16 22:26:05 +02005552 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005553
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005554 module = PyDict_GetItemWithError(modules_dict, module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005555 if (module == NULL) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005556 if (PyErr_Occurred())
5557 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005558 module = PyImport_Import(module_name);
5559 if (module == NULL)
5560 return NULL;
5561 global = PyObject_GetAttr(module, global_name);
5562 Py_DECREF(module);
5563 }
Victor Stinner121aab42011-09-29 23:40:53 +02005564 else {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005565 global = PyObject_GetAttr(module, global_name);
5566 }
5567 return global;
5568}
5569
5570static struct PyMethodDef Unpickler_methods[] = {
5571 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
5572 Unpickler_load_doc},
5573 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
5574 Unpickler_find_class_doc},
5575 {NULL, NULL} /* sentinel */
5576};
5577
5578static void
5579Unpickler_dealloc(UnpicklerObject *self)
5580{
5581 PyObject_GC_UnTrack((PyObject *)self);
5582 Py_XDECREF(self->readline);
5583 Py_XDECREF(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005584 Py_XDECREF(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005585 Py_XDECREF(self->stack);
5586 Py_XDECREF(self->pers_func);
5587 Py_XDECREF(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005588 if (self->buffer.buf != NULL) {
5589 PyBuffer_Release(&self->buffer);
5590 self->buffer.buf = NULL;
5591 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005592
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005593 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005594 PyMem_Free(self->marks);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005595 PyMem_Free(self->input_line);
Victor Stinner49fc8ec2013-07-07 23:30:24 +02005596 PyMem_Free(self->encoding);
5597 PyMem_Free(self->errors);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005598
5599 Py_TYPE(self)->tp_free((PyObject *)self);
5600}
5601
5602static int
5603Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
5604{
5605 Py_VISIT(self->readline);
5606 Py_VISIT(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005607 Py_VISIT(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005608 Py_VISIT(self->stack);
5609 Py_VISIT(self->pers_func);
5610 Py_VISIT(self->arg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005611 return 0;
5612}
5613
5614static int
5615Unpickler_clear(UnpicklerObject *self)
5616{
5617 Py_CLEAR(self->readline);
5618 Py_CLEAR(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005619 Py_CLEAR(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005620 Py_CLEAR(self->stack);
5621 Py_CLEAR(self->pers_func);
5622 Py_CLEAR(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005623 if (self->buffer.buf != NULL) {
5624 PyBuffer_Release(&self->buffer);
5625 self->buffer.buf = NULL;
5626 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005627
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005628 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005629 PyMem_Free(self->marks);
5630 self->marks = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005631 PyMem_Free(self->input_line);
5632 self->input_line = NULL;
Victor Stinner49fc8ec2013-07-07 23:30:24 +02005633 PyMem_Free(self->encoding);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005634 self->encoding = NULL;
Victor Stinner49fc8ec2013-07-07 23:30:24 +02005635 PyMem_Free(self->errors);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005636 self->errors = NULL;
5637
5638 return 0;
5639}
5640
5641PyDoc_STRVAR(Unpickler_doc,
5642"Unpickler(file, *, encoding='ASCII', errors='strict')"
5643"\n"
5644"This takes a binary file for reading a pickle data stream.\n"
5645"\n"
5646"The protocol version of the pickle is detected automatically, so no\n"
5647"proto argument is needed.\n"
5648"\n"
5649"The file-like object must have two methods, a read() method\n"
5650"that takes an integer argument, and a readline() method that\n"
5651"requires no arguments. Both methods should return bytes.\n"
5652"Thus file-like object can be a binary file object opened for\n"
5653"reading, a BytesIO object, or any other custom object that\n"
5654"meets this interface.\n"
5655"\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005656"Optional keyword arguments are *fix_imports*, *encoding* and *errors*,\n"
5657"which are used to control compatiblity support for pickle stream\n"
5658"generated by Python 2.x. If *fix_imports* is True, pickle will try to\n"
5659"map the old Python 2.x names to the new names used in Python 3.x. The\n"
5660"*encoding* and *errors* tell pickle how to decode 8-bit string\n"
5661"instances pickled by Python 2.x; these default to 'ASCII' and\n"
5662"'strict', respectively.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005663
5664static int
5665Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
5666{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005667 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005668 PyObject *file;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005669 PyObject *fix_imports = Py_True;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005670 char *encoding = NULL;
5671 char *errors = NULL;
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02005672 _Py_IDENTIFIER(persistent_load);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005673
5674 /* XXX: That is an horrible error message. But, I don't know how to do
5675 better... */
5676 if (Py_SIZE(args) != 1) {
5677 PyErr_Format(PyExc_TypeError,
5678 "%s takes exactly one positional argument (%zd given)",
5679 Py_TYPE(self)->tp_name, Py_SIZE(args));
5680 return -1;
5681 }
5682
5683 /* Arguments parsing needs to be done in the __init__() method to allow
5684 subclasses to define their own __init__() method, which may (or may
5685 not) support Unpickler arguments. However, this means we need to be
5686 extra careful in the other Unpickler methods, since a subclass could
5687 forget to call Unpickler.__init__() thus breaking our internal
5688 invariants. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005689 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:Unpickler", kwlist,
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005690 &file, &fix_imports, &encoding, &errors))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005691 return -1;
5692
5693 /* In case of multiple __init__() calls, clear previous content. */
5694 if (self->read != NULL)
5695 (void)Unpickler_clear(self);
5696
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005697 if (_Unpickler_SetInputStream(self, file) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005698 return -1;
5699
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005700 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005701 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005702
5703 self->fix_imports = PyObject_IsTrue(fix_imports);
5704 if (self->fix_imports == -1)
5705 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005706
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02005707 if (_PyObject_HasAttrId((PyObject *)self, &PyId_persistent_load)) {
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005708 self->pers_func = _PyObject_GetAttrId((PyObject *)self,
5709 &PyId_persistent_load);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005710 if (self->pers_func == NULL)
5711 return -1;
5712 }
5713 else {
5714 self->pers_func = NULL;
5715 }
5716
5717 self->stack = (Pdata *)Pdata_New();
5718 if (self->stack == NULL)
5719 return -1;
5720
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005721 self->memo_size = 32;
5722 self->memo = _Unpickler_NewMemo(self->memo_size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005723 if (self->memo == NULL)
5724 return -1;
5725
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005726 self->arg = NULL;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005727 self->proto = 0;
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005728
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005729 return 0;
5730}
5731
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005732/* Define a proxy object for the Unpickler's internal memo object. This is to
5733 * avoid breaking code like:
5734 * unpickler.memo.clear()
5735 * and
5736 * unpickler.memo = saved_memo
5737 * Is this a good idea? Not really, but we don't want to break code that uses
5738 * it. Note that we don't implement the entire mapping API here. This is
5739 * intentional, as these should be treated as black-box implementation details.
5740 *
5741 * We do, however, have to implement pickling/unpickling support because of
Victor Stinner121aab42011-09-29 23:40:53 +02005742 * real-world code like cvs2svn.
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005743 */
5744
5745typedef struct {
5746 PyObject_HEAD
5747 UnpicklerObject *unpickler;
5748} UnpicklerMemoProxyObject;
5749
5750PyDoc_STRVAR(ump_clear_doc,
5751"memo.clear() -> None. Remove all items from memo.");
5752
5753static PyObject *
5754ump_clear(UnpicklerMemoProxyObject *self)
5755{
5756 _Unpickler_MemoCleanup(self->unpickler);
5757 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
5758 if (self->unpickler->memo == NULL)
5759 return NULL;
5760 Py_RETURN_NONE;
5761}
5762
5763PyDoc_STRVAR(ump_copy_doc,
5764"memo.copy() -> new_memo. Copy the memo to a new object.");
5765
5766static PyObject *
5767ump_copy(UnpicklerMemoProxyObject *self)
5768{
5769 Py_ssize_t i;
5770 PyObject *new_memo = PyDict_New();
5771 if (new_memo == NULL)
5772 return NULL;
5773
5774 for (i = 0; i < self->unpickler->memo_size; i++) {
5775 int status;
5776 PyObject *key, *value;
5777
5778 value = self->unpickler->memo[i];
5779 if (value == NULL)
5780 continue;
5781
5782 key = PyLong_FromSsize_t(i);
5783 if (key == NULL)
5784 goto error;
5785 status = PyDict_SetItem(new_memo, key, value);
5786 Py_DECREF(key);
5787 if (status < 0)
5788 goto error;
5789 }
5790 return new_memo;
5791
5792error:
5793 Py_DECREF(new_memo);
5794 return NULL;
5795}
5796
5797PyDoc_STRVAR(ump_reduce_doc,
5798"memo.__reduce__(). Pickling support.");
5799
5800static PyObject *
5801ump_reduce(UnpicklerMemoProxyObject *self, PyObject *args)
5802{
5803 PyObject *reduce_value;
5804 PyObject *constructor_args;
5805 PyObject *contents = ump_copy(self);
5806 if (contents == NULL)
5807 return NULL;
5808
5809 reduce_value = PyTuple_New(2);
5810 if (reduce_value == NULL) {
5811 Py_DECREF(contents);
5812 return NULL;
5813 }
5814 constructor_args = PyTuple_New(1);
5815 if (constructor_args == NULL) {
5816 Py_DECREF(contents);
5817 Py_DECREF(reduce_value);
5818 return NULL;
5819 }
5820 PyTuple_SET_ITEM(constructor_args, 0, contents);
5821 Py_INCREF((PyObject *)&PyDict_Type);
5822 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
5823 PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
5824 return reduce_value;
5825}
5826
5827static PyMethodDef unpicklerproxy_methods[] = {
5828 {"clear", (PyCFunction)ump_clear, METH_NOARGS, ump_clear_doc},
5829 {"copy", (PyCFunction)ump_copy, METH_NOARGS, ump_copy_doc},
5830 {"__reduce__", (PyCFunction)ump_reduce, METH_VARARGS, ump_reduce_doc},
5831 {NULL, NULL} /* sentinel */
5832};
5833
5834static void
5835UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
5836{
5837 PyObject_GC_UnTrack(self);
5838 Py_XDECREF(self->unpickler);
5839 PyObject_GC_Del((PyObject *)self);
5840}
5841
5842static int
5843UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
5844 visitproc visit, void *arg)
5845{
5846 Py_VISIT(self->unpickler);
5847 return 0;
5848}
5849
5850static int
5851UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
5852{
5853 Py_CLEAR(self->unpickler);
5854 return 0;
5855}
5856
5857static PyTypeObject UnpicklerMemoProxyType = {
5858 PyVarObject_HEAD_INIT(NULL, 0)
5859 "_pickle.UnpicklerMemoProxy", /*tp_name*/
5860 sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/
5861 0,
5862 (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */
5863 0, /* tp_print */
5864 0, /* tp_getattr */
5865 0, /* tp_setattr */
5866 0, /* tp_compare */
5867 0, /* tp_repr */
5868 0, /* tp_as_number */
5869 0, /* tp_as_sequence */
5870 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00005871 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005872 0, /* tp_call */
5873 0, /* tp_str */
5874 PyObject_GenericGetAttr, /* tp_getattro */
5875 PyObject_GenericSetAttr, /* tp_setattro */
5876 0, /* tp_as_buffer */
5877 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5878 0, /* tp_doc */
5879 (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */
5880 (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */
5881 0, /* tp_richcompare */
5882 0, /* tp_weaklistoffset */
5883 0, /* tp_iter */
5884 0, /* tp_iternext */
5885 unpicklerproxy_methods, /* tp_methods */
5886};
5887
5888static PyObject *
5889UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
5890{
5891 UnpicklerMemoProxyObject *self;
5892
5893 self = PyObject_GC_New(UnpicklerMemoProxyObject,
5894 &UnpicklerMemoProxyType);
5895 if (self == NULL)
5896 return NULL;
5897 Py_INCREF(unpickler);
5898 self->unpickler = unpickler;
5899 PyObject_GC_Track(self);
5900 return (PyObject *)self;
5901}
5902
5903/*****************************************************************************/
5904
5905
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005906static PyObject *
5907Unpickler_get_memo(UnpicklerObject *self)
5908{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005909 return UnpicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005910}
5911
5912static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005913Unpickler_set_memo(UnpicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005914{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005915 PyObject **new_memo;
5916 Py_ssize_t new_memo_size = 0;
5917 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005918
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005919 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005920 PyErr_SetString(PyExc_TypeError,
5921 "attribute deletion is not supported");
5922 return -1;
5923 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005924
5925 if (Py_TYPE(obj) == &UnpicklerMemoProxyType) {
5926 UnpicklerObject *unpickler =
5927 ((UnpicklerMemoProxyObject *)obj)->unpickler;
5928
5929 new_memo_size = unpickler->memo_size;
5930 new_memo = _Unpickler_NewMemo(new_memo_size);
5931 if (new_memo == NULL)
5932 return -1;
5933
5934 for (i = 0; i < new_memo_size; i++) {
5935 Py_XINCREF(unpickler->memo[i]);
5936 new_memo[i] = unpickler->memo[i];
5937 }
5938 }
5939 else if (PyDict_Check(obj)) {
5940 Py_ssize_t i = 0;
5941 PyObject *key, *value;
5942
5943 new_memo_size = PyDict_Size(obj);
5944 new_memo = _Unpickler_NewMemo(new_memo_size);
5945 if (new_memo == NULL)
5946 return -1;
5947
5948 while (PyDict_Next(obj, &i, &key, &value)) {
5949 Py_ssize_t idx;
5950 if (!PyLong_Check(key)) {
5951 PyErr_SetString(PyExc_TypeError,
5952 "memo key must be integers");
5953 goto error;
5954 }
5955 idx = PyLong_AsSsize_t(key);
5956 if (idx == -1 && PyErr_Occurred())
5957 goto error;
Christian Heimesa24b4d22013-07-01 15:17:45 +02005958 if (idx < 0) {
5959 PyErr_SetString(PyExc_ValueError,
Christian Heimes80878792013-07-01 15:23:39 +02005960 "memo key must be positive integers.");
Christian Heimesa24b4d22013-07-01 15:17:45 +02005961 goto error;
5962 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005963 if (_Unpickler_MemoPut(self, idx, value) < 0)
5964 goto error;
5965 }
5966 }
5967 else {
5968 PyErr_Format(PyExc_TypeError,
5969 "'memo' attribute must be an UnpicklerMemoProxy object"
5970 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005971 return -1;
5972 }
5973
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005974 _Unpickler_MemoCleanup(self);
5975 self->memo_size = new_memo_size;
5976 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005977
5978 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005979
5980 error:
5981 if (new_memo_size) {
5982 i = new_memo_size;
5983 while (--i >= 0) {
5984 Py_XDECREF(new_memo[i]);
5985 }
5986 PyMem_FREE(new_memo);
5987 }
5988 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005989}
5990
5991static PyObject *
5992Unpickler_get_persload(UnpicklerObject *self)
5993{
5994 if (self->pers_func == NULL)
5995 PyErr_SetString(PyExc_AttributeError, "persistent_load");
5996 else
5997 Py_INCREF(self->pers_func);
5998 return self->pers_func;
5999}
6000
6001static int
6002Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
6003{
6004 PyObject *tmp;
6005
6006 if (value == NULL) {
6007 PyErr_SetString(PyExc_TypeError,
6008 "attribute deletion is not supported");
6009 return -1;
6010 }
6011 if (!PyCallable_Check(value)) {
6012 PyErr_SetString(PyExc_TypeError,
6013 "persistent_load must be a callable taking "
6014 "one argument");
6015 return -1;
6016 }
6017
6018 tmp = self->pers_func;
6019 Py_INCREF(value);
6020 self->pers_func = value;
6021 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
6022
6023 return 0;
6024}
6025
6026static PyGetSetDef Unpickler_getsets[] = {
6027 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
6028 {"persistent_load", (getter)Unpickler_get_persload,
6029 (setter)Unpickler_set_persload},
6030 {NULL}
6031};
6032
6033static PyTypeObject Unpickler_Type = {
6034 PyVarObject_HEAD_INIT(NULL, 0)
6035 "_pickle.Unpickler", /*tp_name*/
6036 sizeof(UnpicklerObject), /*tp_basicsize*/
6037 0, /*tp_itemsize*/
6038 (destructor)Unpickler_dealloc, /*tp_dealloc*/
6039 0, /*tp_print*/
6040 0, /*tp_getattr*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006041 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00006042 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006043 0, /*tp_repr*/
6044 0, /*tp_as_number*/
6045 0, /*tp_as_sequence*/
6046 0, /*tp_as_mapping*/
6047 0, /*tp_hash*/
6048 0, /*tp_call*/
6049 0, /*tp_str*/
6050 0, /*tp_getattro*/
6051 0, /*tp_setattro*/
6052 0, /*tp_as_buffer*/
6053 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
6054 Unpickler_doc, /*tp_doc*/
6055 (traverseproc)Unpickler_traverse, /*tp_traverse*/
6056 (inquiry)Unpickler_clear, /*tp_clear*/
6057 0, /*tp_richcompare*/
6058 0, /*tp_weaklistoffset*/
6059 0, /*tp_iter*/
6060 0, /*tp_iternext*/
6061 Unpickler_methods, /*tp_methods*/
6062 0, /*tp_members*/
6063 Unpickler_getsets, /*tp_getset*/
6064 0, /*tp_base*/
6065 0, /*tp_dict*/
6066 0, /*tp_descr_get*/
6067 0, /*tp_descr_set*/
6068 0, /*tp_dictoffset*/
6069 (initproc)Unpickler_init, /*tp_init*/
6070 PyType_GenericAlloc, /*tp_alloc*/
6071 PyType_GenericNew, /*tp_new*/
6072 PyObject_GC_Del, /*tp_free*/
6073 0, /*tp_is_gc*/
6074};
6075
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006076PyDoc_STRVAR(pickle_dump_doc,
6077"dump(obj, file, protocol=None, *, fix_imports=True) -> None\n"
6078"\n"
6079"Write a pickled representation of obj to the open file object file. This\n"
6080"is equivalent to ``Pickler(file, protocol).dump(obj)``, but may be more\n"
6081"efficient.\n"
6082"\n"
6083"The optional protocol argument tells the pickler to use the given protocol;\n"
6084"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
6085"backward-incompatible protocol designed for Python 3.0.\n"
6086"\n"
6087"Specifying a negative protocol version selects the highest protocol version\n"
6088"supported. The higher the protocol used, the more recent the version of\n"
6089"Python needed to read the pickle produced.\n"
6090"\n"
6091"The file argument must have a write() method that accepts a single bytes\n"
6092"argument. It can thus be a file object opened for binary writing, a\n"
6093"io.BytesIO instance, or any other custom object that meets this interface.\n"
6094"\n"
6095"If fix_imports is True and protocol is less than 3, pickle will try to\n"
6096"map the new Python 3.x names to the old module names used in Python 2.x,\n"
6097"so that the pickle data stream is readable with Python 2.x.\n");
6098
6099static PyObject *
6100pickle_dump(PyObject *self, PyObject *args, PyObject *kwds)
6101{
6102 static char *kwlist[] = {"obj", "file", "protocol", "fix_imports", 0};
6103 PyObject *obj;
6104 PyObject *file;
6105 PyObject *proto = NULL;
6106 PyObject *fix_imports = Py_True;
6107 PicklerObject *pickler;
6108
6109 /* fix_imports is a keyword-only argument. */
6110 if (Py_SIZE(args) > 3) {
6111 PyErr_Format(PyExc_TypeError,
6112 "pickle.dump() takes at most 3 positional "
6113 "argument (%zd given)", Py_SIZE(args));
6114 return NULL;
6115 }
6116
6117 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|OO:dump", kwlist,
6118 &obj, &file, &proto, &fix_imports))
6119 return NULL;
6120
6121 pickler = _Pickler_New();
6122 if (pickler == NULL)
6123 return NULL;
6124
6125 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
6126 goto error;
6127
6128 if (_Pickler_SetOutputStream(pickler, file) < 0)
6129 goto error;
6130
6131 if (dump(pickler, obj) < 0)
6132 goto error;
6133
6134 if (_Pickler_FlushToFile(pickler) < 0)
6135 goto error;
6136
6137 Py_DECREF(pickler);
6138 Py_RETURN_NONE;
6139
6140 error:
6141 Py_XDECREF(pickler);
6142 return NULL;
6143}
6144
6145PyDoc_STRVAR(pickle_dumps_doc,
6146"dumps(obj, protocol=None, *, fix_imports=True) -> bytes\n"
6147"\n"
6148"Return the pickled representation of the object as a bytes\n"
6149"object, instead of writing it to a file.\n"
6150"\n"
6151"The optional protocol argument tells the pickler to use the given protocol;\n"
6152"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
6153"backward-incompatible protocol designed for Python 3.0.\n"
6154"\n"
6155"Specifying a negative protocol version selects the highest protocol version\n"
6156"supported. The higher the protocol used, the more recent the version of\n"
6157"Python needed to read the pickle produced.\n"
6158"\n"
6159"If fix_imports is True and *protocol* is less than 3, pickle will try to\n"
6160"map the new Python 3.x names to the old module names used in Python 2.x,\n"
6161"so that the pickle data stream is readable with Python 2.x.\n");
6162
6163static PyObject *
6164pickle_dumps(PyObject *self, PyObject *args, PyObject *kwds)
6165{
6166 static char *kwlist[] = {"obj", "protocol", "fix_imports", 0};
6167 PyObject *obj;
6168 PyObject *proto = NULL;
6169 PyObject *result;
6170 PyObject *fix_imports = Py_True;
6171 PicklerObject *pickler;
6172
6173 /* fix_imports is a keyword-only argument. */
6174 if (Py_SIZE(args) > 2) {
6175 PyErr_Format(PyExc_TypeError,
6176 "pickle.dumps() takes at most 2 positional "
6177 "argument (%zd given)", Py_SIZE(args));
6178 return NULL;
6179 }
6180
6181 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:dumps", kwlist,
6182 &obj, &proto, &fix_imports))
6183 return NULL;
6184
6185 pickler = _Pickler_New();
6186 if (pickler == NULL)
6187 return NULL;
6188
6189 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
6190 goto error;
6191
6192 if (dump(pickler, obj) < 0)
6193 goto error;
6194
6195 result = _Pickler_GetString(pickler);
6196 Py_DECREF(pickler);
6197 return result;
6198
6199 error:
6200 Py_XDECREF(pickler);
6201 return NULL;
6202}
6203
6204PyDoc_STRVAR(pickle_load_doc,
6205"load(file, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6206"\n"
6207"Read a pickled object representation from the open file object file and\n"
6208"return the reconstituted object hierarchy specified therein. This is\n"
6209"equivalent to ``Unpickler(file).load()``, but may be more efficient.\n"
6210"\n"
6211"The protocol version of the pickle is detected automatically, so no protocol\n"
6212"argument is needed. Bytes past the pickled object's representation are\n"
6213"ignored.\n"
6214"\n"
6215"The argument file must have two methods, a read() method that takes an\n"
6216"integer argument, and a readline() method that requires no arguments. Both\n"
6217"methods should return bytes. Thus *file* can be a binary file object opened\n"
6218"for reading, a BytesIO object, or any other custom object that meets this\n"
6219"interface.\n"
6220"\n"
6221"Optional keyword arguments are fix_imports, encoding and errors,\n"
6222"which are used to control compatiblity support for pickle stream generated\n"
6223"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6224"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6225"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6226"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6227
6228static PyObject *
6229pickle_load(PyObject *self, PyObject *args, PyObject *kwds)
6230{
6231 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
6232 PyObject *file;
6233 PyObject *fix_imports = Py_True;
6234 PyObject *result;
6235 char *encoding = NULL;
6236 char *errors = NULL;
6237 UnpicklerObject *unpickler;
6238
6239 /* fix_imports, encoding and errors are a keyword-only argument. */
6240 if (Py_SIZE(args) != 1) {
6241 PyErr_Format(PyExc_TypeError,
6242 "pickle.load() takes exactly one positional "
6243 "argument (%zd given)", Py_SIZE(args));
6244 return NULL;
6245 }
6246
6247 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:load", kwlist,
6248 &file, &fix_imports, &encoding, &errors))
6249 return NULL;
6250
6251 unpickler = _Unpickler_New();
6252 if (unpickler == NULL)
6253 return NULL;
6254
6255 if (_Unpickler_SetInputStream(unpickler, file) < 0)
6256 goto error;
6257
6258 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6259 goto error;
6260
6261 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6262 if (unpickler->fix_imports == -1)
6263 goto error;
6264
6265 result = load(unpickler);
6266 Py_DECREF(unpickler);
6267 return result;
6268
6269 error:
6270 Py_XDECREF(unpickler);
6271 return NULL;
6272}
6273
6274PyDoc_STRVAR(pickle_loads_doc,
6275"loads(input, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6276"\n"
6277"Read a pickled object hierarchy from a bytes object and return the\n"
6278"reconstituted object hierarchy specified therein\n"
6279"\n"
6280"The protocol version of the pickle is detected automatically, so no protocol\n"
6281"argument is needed. Bytes past the pickled object's representation are\n"
6282"ignored.\n"
6283"\n"
6284"Optional keyword arguments are fix_imports, encoding and errors, which\n"
6285"are used to control compatiblity support for pickle stream generated\n"
6286"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6287"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6288"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6289"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6290
6291static PyObject *
6292pickle_loads(PyObject *self, PyObject *args, PyObject *kwds)
6293{
6294 static char *kwlist[] = {"input", "fix_imports", "encoding", "errors", 0};
6295 PyObject *input;
6296 PyObject *fix_imports = Py_True;
6297 PyObject *result;
6298 char *encoding = NULL;
6299 char *errors = NULL;
6300 UnpicklerObject *unpickler;
6301
6302 /* fix_imports, encoding and errors are a keyword-only argument. */
6303 if (Py_SIZE(args) != 1) {
6304 PyErr_Format(PyExc_TypeError,
6305 "pickle.loads() takes exactly one positional "
6306 "argument (%zd given)", Py_SIZE(args));
6307 return NULL;
6308 }
6309
6310 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:loads", kwlist,
6311 &input, &fix_imports, &encoding, &errors))
6312 return NULL;
6313
6314 unpickler = _Unpickler_New();
6315 if (unpickler == NULL)
6316 return NULL;
6317
6318 if (_Unpickler_SetStringInput(unpickler, input) < 0)
6319 goto error;
6320
6321 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6322 goto error;
6323
6324 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6325 if (unpickler->fix_imports == -1)
6326 goto error;
6327
6328 result = load(unpickler);
6329 Py_DECREF(unpickler);
6330 return result;
6331
6332 error:
6333 Py_XDECREF(unpickler);
6334 return NULL;
6335}
6336
6337
6338static struct PyMethodDef pickle_methods[] = {
6339 {"dump", (PyCFunction)pickle_dump, METH_VARARGS|METH_KEYWORDS,
6340 pickle_dump_doc},
6341 {"dumps", (PyCFunction)pickle_dumps, METH_VARARGS|METH_KEYWORDS,
6342 pickle_dumps_doc},
6343 {"load", (PyCFunction)pickle_load, METH_VARARGS|METH_KEYWORDS,
6344 pickle_load_doc},
6345 {"loads", (PyCFunction)pickle_loads, METH_VARARGS|METH_KEYWORDS,
6346 pickle_loads_doc},
6347 {NULL, NULL} /* sentinel */
6348};
6349
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006350static int
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006351initmodule(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006352{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006353 PyObject *copyreg = NULL;
6354 PyObject *compat_pickle = NULL;
6355
6356 /* XXX: We should ensure that the types of the dictionaries imported are
6357 exactly PyDict objects. Otherwise, it is possible to crash the pickle
6358 since we use the PyDict API directly to access these dictionaries. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006359
6360 copyreg = PyImport_ImportModule("copyreg");
6361 if (!copyreg)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006362 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006363 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
6364 if (!dispatch_table)
6365 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006366 extension_registry = \
6367 PyObject_GetAttrString(copyreg, "_extension_registry");
6368 if (!extension_registry)
6369 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006370 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
6371 if (!inverted_registry)
6372 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006373 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
6374 if (!extension_cache)
6375 goto error;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006376 Py_CLEAR(copyreg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006377
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006378 /* Load the 2.x -> 3.x stdlib module mapping tables */
6379 compat_pickle = PyImport_ImportModule("_compat_pickle");
6380 if (!compat_pickle)
6381 goto error;
6382 name_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
6383 if (!name_mapping_2to3)
6384 goto error;
6385 if (!PyDict_CheckExact(name_mapping_2to3)) {
6386 PyErr_Format(PyExc_RuntimeError,
6387 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
6388 Py_TYPE(name_mapping_2to3)->tp_name);
6389 goto error;
6390 }
6391 import_mapping_2to3 = PyObject_GetAttrString(compat_pickle,
6392 "IMPORT_MAPPING");
6393 if (!import_mapping_2to3)
6394 goto error;
6395 if (!PyDict_CheckExact(import_mapping_2to3)) {
6396 PyErr_Format(PyExc_RuntimeError,
6397 "_compat_pickle.IMPORT_MAPPING should be a dict, "
6398 "not %.200s", Py_TYPE(import_mapping_2to3)->tp_name);
6399 goto error;
6400 }
6401 /* ... and the 3.x -> 2.x mapping tables */
6402 name_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6403 "REVERSE_NAME_MAPPING");
6404 if (!name_mapping_3to2)
6405 goto error;
6406 if (!PyDict_CheckExact(name_mapping_3to2)) {
6407 PyErr_Format(PyExc_RuntimeError,
Ezio Melotti13925002011-03-16 11:05:33 +02006408 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006409 "not %.200s", Py_TYPE(name_mapping_3to2)->tp_name);
6410 goto error;
6411 }
6412 import_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6413 "REVERSE_IMPORT_MAPPING");
6414 if (!import_mapping_3to2)
6415 goto error;
6416 if (!PyDict_CheckExact(import_mapping_3to2)) {
6417 PyErr_Format(PyExc_RuntimeError,
6418 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
6419 "not %.200s", Py_TYPE(import_mapping_3to2)->tp_name);
6420 goto error;
6421 }
6422 Py_CLEAR(compat_pickle);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006423
6424 empty_tuple = PyTuple_New(0);
6425 if (empty_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006426 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006427 two_tuple = PyTuple_New(2);
6428 if (two_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006429 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006430 /* We use this temp container with no regard to refcounts, or to
6431 * keeping containees alive. Exempt from GC, because we don't
6432 * want anything looking at two_tuple() by magic.
6433 */
6434 PyObject_GC_UnTrack(two_tuple);
6435
6436 return 0;
6437
6438 error:
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006439 Py_CLEAR(copyreg);
6440 Py_CLEAR(dispatch_table);
6441 Py_CLEAR(extension_registry);
6442 Py_CLEAR(inverted_registry);
6443 Py_CLEAR(extension_cache);
6444 Py_CLEAR(compat_pickle);
6445 Py_CLEAR(name_mapping_2to3);
6446 Py_CLEAR(import_mapping_2to3);
6447 Py_CLEAR(name_mapping_3to2);
6448 Py_CLEAR(import_mapping_3to2);
6449 Py_CLEAR(empty_tuple);
6450 Py_CLEAR(two_tuple);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006451 return -1;
6452}
6453
6454static struct PyModuleDef _picklemodule = {
6455 PyModuleDef_HEAD_INIT,
6456 "_pickle",
6457 pickle_module_doc,
6458 -1,
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006459 pickle_methods,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006460 NULL,
6461 NULL,
6462 NULL,
6463 NULL
6464};
6465
6466PyMODINIT_FUNC
6467PyInit__pickle(void)
6468{
6469 PyObject *m;
6470
6471 if (PyType_Ready(&Unpickler_Type) < 0)
6472 return NULL;
6473 if (PyType_Ready(&Pickler_Type) < 0)
6474 return NULL;
6475 if (PyType_Ready(&Pdata_Type) < 0)
6476 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006477 if (PyType_Ready(&PicklerMemoProxyType) < 0)
6478 return NULL;
6479 if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
6480 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006481
6482 /* Create the module and add the functions. */
6483 m = PyModule_Create(&_picklemodule);
6484 if (m == NULL)
6485 return NULL;
6486
Antoine Pitrou8391cf42011-07-15 21:01:21 +02006487 Py_INCREF(&Pickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006488 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
6489 return NULL;
Antoine Pitrou8391cf42011-07-15 21:01:21 +02006490 Py_INCREF(&Unpickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006491 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
6492 return NULL;
6493
6494 /* Initialize the exceptions. */
6495 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
6496 if (PickleError == NULL)
6497 return NULL;
6498 PicklingError = \
6499 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
6500 if (PicklingError == NULL)
6501 return NULL;
6502 UnpicklingError = \
6503 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
6504 if (UnpicklingError == NULL)
6505 return NULL;
6506
6507 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
6508 return NULL;
6509 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
6510 return NULL;
6511 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
6512 return NULL;
6513
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006514 if (initmodule() < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006515 return NULL;
6516
6517 return m;
6518}