blob: a14a258fec7a26726ed176b05db3b73068b174d5 [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000013/* Pickle opcodes. These must be kept updated with pickle.py.
14 Extensive docs are in pickletools.py. */
15enum opcode {
16 MARK = '(',
17 STOP = '.',
18 POP = '0',
19 POP_MARK = '1',
20 DUP = '2',
21 FLOAT = 'F',
22 INT = 'I',
23 BININT = 'J',
24 BININT1 = 'K',
25 LONG = 'L',
26 BININT2 = 'M',
27 NONE = 'N',
28 PERSID = 'P',
29 BINPERSID = 'Q',
30 REDUCE = 'R',
31 STRING = 'S',
32 BINSTRING = 'T',
33 SHORT_BINSTRING = 'U',
34 UNICODE = 'V',
35 BINUNICODE = 'X',
36 APPEND = 'a',
37 BUILD = 'b',
38 GLOBAL = 'c',
39 DICT = 'd',
40 EMPTY_DICT = '}',
41 APPENDS = 'e',
42 GET = 'g',
43 BINGET = 'h',
44 INST = 'i',
45 LONG_BINGET = 'j',
46 LIST = 'l',
47 EMPTY_LIST = ']',
48 OBJ = 'o',
49 PUT = 'p',
50 BINPUT = 'q',
51 LONG_BINPUT = 'r',
52 SETITEM = 's',
53 TUPLE = 't',
54 EMPTY_TUPLE = ')',
55 SETITEMS = 'u',
56 BINFLOAT = 'G',
57
58 /* Protocol 2. */
59 PROTO = '\x80',
60 NEWOBJ = '\x81',
61 EXT1 = '\x82',
62 EXT2 = '\x83',
63 EXT4 = '\x84',
64 TUPLE1 = '\x85',
65 TUPLE2 = '\x86',
66 TUPLE3 = '\x87',
67 NEWTRUE = '\x88',
68 NEWFALSE = '\x89',
69 LONG1 = '\x8a',
70 LONG4 = '\x8b',
71
72 /* Protocol 3 (Python 3.x) */
73 BINBYTES = 'B',
Victor Stinner132ef6c2010-11-09 09:39:41 +000074 SHORT_BINBYTES = 'C'
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000075};
76
77/* These aren't opcodes -- they're ways to pickle bools before protocol 2
78 * so that unpicklers written before bools were introduced unpickle them
79 * as ints, but unpicklers after can recognize that bools were intended.
80 * Note that protocol 2 added direct ways to pickle bools.
81 */
82#undef TRUE
83#define TRUE "I01\n"
84#undef FALSE
85#define FALSE "I00\n"
86
87enum {
88 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
89 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
90 break if this gets out of synch with pickle.py, but it's unclear that would
91 help anything either. */
92 BATCHSIZE = 1000,
93
94 /* Nesting limit until Pickler, when running in "fast mode", starts
95 checking for self-referential data-structures. */
96 FAST_NESTING_LIMIT = 50,
97
Antoine Pitrouea99c5c2010-09-09 18:33:21 +000098 /* Initial size of the write buffer of Pickler. */
99 WRITE_BUF_SIZE = 4096,
100
101 /* Maximum size of the write buffer of Pickler when pickling to a
102 stream. This is ignored for in-memory pickling. */
103 MAX_WRITE_BUF_SIZE = 64 * 1024,
Antoine Pitrou04248a82010-10-12 20:51:21 +0000104
105 /* Prefetch size when unpickling (disabled on unpeekable streams) */
Victor Stinner132ef6c2010-11-09 09:39:41 +0000106 PREFETCH = 8192 * 16
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000107};
108
109/* Exception classes for pickle. These should override the ones defined in
110 pickle.py, when the C-optimized Pickler and Unpickler are used. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000111static PyObject *PickleError = NULL;
112static PyObject *PicklingError = NULL;
113static PyObject *UnpicklingError = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000114
115/* copyreg.dispatch_table, {type_object: pickling_function} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000116static PyObject *dispatch_table = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000117/* For EXT[124] opcodes. */
118/* copyreg._extension_registry, {(module_name, function_name): code} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000119static PyObject *extension_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000120/* copyreg._inverted_registry, {code: (module_name, function_name)} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000121static PyObject *inverted_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000122/* copyreg._extension_cache, {code: object} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000123static PyObject *extension_cache = NULL;
124
125/* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */
126static PyObject *name_mapping_2to3 = NULL;
127/* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
128static PyObject *import_mapping_2to3 = NULL;
129/* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
130static PyObject *name_mapping_3to2 = NULL;
131static PyObject *import_mapping_3to2 = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000132
133/* XXX: Are these really nescessary? */
134/* As the name says, an empty tuple. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000135static PyObject *empty_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000136/* For looking up name pairs in copyreg._extension_registry. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000137static PyObject *two_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000138
139static int
140stack_underflow(void)
141{
142 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
143 return -1;
144}
145
146/* Internal data type used as the unpickling stack. */
147typedef struct {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000148 PyObject_VAR_HEAD
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000149 PyObject **data;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000150 Py_ssize_t allocated; /* number of slots in data allocated */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000151} Pdata;
152
153static void
154Pdata_dealloc(Pdata *self)
155{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200156 Py_ssize_t i = Py_SIZE(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000157 while (--i >= 0) {
158 Py_DECREF(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000159 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000160 PyMem_FREE(self->data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000161 PyObject_Del(self);
162}
163
164static PyTypeObject Pdata_Type = {
165 PyVarObject_HEAD_INIT(NULL, 0)
166 "_pickle.Pdata", /*tp_name*/
167 sizeof(Pdata), /*tp_basicsize*/
168 0, /*tp_itemsize*/
169 (destructor)Pdata_dealloc, /*tp_dealloc*/
170};
171
172static PyObject *
173Pdata_New(void)
174{
175 Pdata *self;
176
177 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
178 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000179 Py_SIZE(self) = 0;
180 self->allocated = 8;
181 self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000182 if (self->data)
183 return (PyObject *)self;
184 Py_DECREF(self);
185 return PyErr_NoMemory();
186}
187
188
189/* Retain only the initial clearto items. If clearto >= the current
190 * number of items, this is a (non-erroneous) NOP.
191 */
192static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200193Pdata_clear(Pdata *self, Py_ssize_t clearto)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000194{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200195 Py_ssize_t i = Py_SIZE(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000196
197 if (clearto < 0)
198 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000199 if (clearto >= i)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000200 return 0;
201
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000202 while (--i >= clearto) {
203 Py_CLEAR(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000204 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000205 Py_SIZE(self) = clearto;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000206 return 0;
207}
208
209static int
210Pdata_grow(Pdata *self)
211{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000212 PyObject **data = self->data;
213 Py_ssize_t allocated = self->allocated;
214 Py_ssize_t new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000215
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000216 new_allocated = (allocated >> 3) + 6;
217 /* check for integer overflow */
218 if (new_allocated > PY_SSIZE_T_MAX - allocated)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000219 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000220 new_allocated += allocated;
221 if (new_allocated > (PY_SSIZE_T_MAX / sizeof(PyObject *)))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000222 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000223 data = PyMem_REALLOC(data, new_allocated * sizeof(PyObject *));
224 if (data == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000225 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000226
227 self->data = data;
228 self->allocated = new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000229 return 0;
230
231 nomemory:
232 PyErr_NoMemory();
233 return -1;
234}
235
236/* D is a Pdata*. Pop the topmost element and store it into V, which
237 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
238 * is raised and V is set to NULL.
239 */
240static PyObject *
241Pdata_pop(Pdata *self)
242{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000243 if (Py_SIZE(self) == 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000244 PyErr_SetString(UnpicklingError, "bad pickle data");
245 return NULL;
246 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000247 return self->data[--Py_SIZE(self)];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000248}
249#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
250
251static int
252Pdata_push(Pdata *self, PyObject *obj)
253{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000254 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000255 return -1;
256 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000257 self->data[Py_SIZE(self)++] = obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000258 return 0;
259}
260
261/* Push an object on stack, transferring its ownership to the stack. */
262#define PDATA_PUSH(D, O, ER) do { \
263 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
264
265/* Push an object on stack, adding a new reference to the object. */
266#define PDATA_APPEND(D, O, ER) do { \
267 Py_INCREF((O)); \
268 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
269
270static PyObject *
271Pdata_poptuple(Pdata *self, Py_ssize_t start)
272{
273 PyObject *tuple;
274 Py_ssize_t len, i, j;
275
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000276 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000277 tuple = PyTuple_New(len);
278 if (tuple == NULL)
279 return NULL;
280 for (i = start, j = 0; j < len; i++, j++)
281 PyTuple_SET_ITEM(tuple, j, self->data[i]);
282
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000283 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000284 return tuple;
285}
286
287static PyObject *
288Pdata_poplist(Pdata *self, Py_ssize_t start)
289{
290 PyObject *list;
291 Py_ssize_t len, i, j;
292
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000293 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000294 list = PyList_New(len);
295 if (list == NULL)
296 return NULL;
297 for (i = start, j = 0; j < len; i++, j++)
298 PyList_SET_ITEM(list, j, self->data[i]);
299
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000300 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000301 return list;
302}
303
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000304typedef struct {
305 PyObject *me_key;
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200306 Py_ssize_t me_value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000307} PyMemoEntry;
308
309typedef struct {
310 Py_ssize_t mt_mask;
311 Py_ssize_t mt_used;
312 Py_ssize_t mt_allocated;
313 PyMemoEntry *mt_table;
314} PyMemoTable;
315
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000316typedef struct PicklerObject {
317 PyObject_HEAD
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000318 PyMemoTable *memo; /* Memo table, keep track of the seen
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000319 objects to support self-referential objects
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000320 pickling. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000321 PyObject *pers_func; /* persistent_id() method, can be NULL */
Antoine Pitrou8d3c2902012-03-04 18:31:48 +0100322 PyObject *dispatch_table; /* private dispatch_table, can be NULL */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000323 PyObject *arg;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000324
325 PyObject *write; /* write() method of the output stream. */
326 PyObject *output_buffer; /* Write into a local bytearray buffer before
327 flushing to the stream. */
328 Py_ssize_t output_len; /* Length of output_buffer. */
329 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000330 int proto; /* Pickle protocol number, >= 0 */
331 int bin; /* Boolean, true if proto > 0 */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200332 Py_ssize_t buf_size; /* Size of the current buffered pickle data */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000333 int fast; /* Enable fast mode if set to a true value.
334 The fast mode disable the usage of memo,
335 therefore speeding the pickling process by
336 not generating superfluous PUT opcodes. It
337 should not be used if with self-referential
338 objects. */
339 int fast_nesting;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000340 int fix_imports; /* Indicate whether Pickler should fix
341 the name of globals for Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000342 PyObject *fast_memo;
343} PicklerObject;
344
345typedef struct UnpicklerObject {
346 PyObject_HEAD
347 Pdata *stack; /* Pickle data stack, store unpickled objects. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000348
349 /* The unpickler memo is just an array of PyObject *s. Using a dict
350 is unnecessary, since the keys are contiguous ints. */
351 PyObject **memo;
352 Py_ssize_t memo_size;
353
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000354 PyObject *arg;
355 PyObject *pers_func; /* persistent_load() method, can be NULL. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000356
357 Py_buffer buffer;
358 char *input_buffer;
359 char *input_line;
360 Py_ssize_t input_len;
361 Py_ssize_t next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000362 Py_ssize_t prefetched_idx; /* index of first prefetched byte */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000363 PyObject *read; /* read() method of the input stream. */
364 PyObject *readline; /* readline() method of the input stream. */
Antoine Pitrou04248a82010-10-12 20:51:21 +0000365 PyObject *peek; /* peek() method of the input stream, or NULL */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000366
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000367 char *encoding; /* Name of the encoding to be used for
368 decoding strings pickled using Python
369 2.x. The default value is "ASCII" */
370 char *errors; /* Name of errors handling scheme to used when
371 decoding strings. The default value is
372 "strict". */
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -0500373 Py_ssize_t *marks; /* Mark stack, used for unpickling container
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000374 objects. */
375 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
376 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000377 int proto; /* Protocol of the pickle loaded. */
378 int fix_imports; /* Indicate whether Unpickler should fix
379 the name of globals pickled by Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000380} UnpicklerObject;
381
382/* Forward declarations */
383static int save(PicklerObject *, PyObject *, int);
384static int save_reduce(PicklerObject *, PyObject *, PyObject *);
385static PyTypeObject Pickler_Type;
386static PyTypeObject Unpickler_Type;
387
388
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000389/*************************************************************************
Serhiy Storchaka95949422013-08-27 19:40:23 +0300390 A custom hashtable mapping void* to Python ints. This is used by the pickler
391 for memoization. Using a custom hashtable rather than PyDict allows us to skip
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000392 a bunch of unnecessary object creation. This makes a huge performance
393 difference. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000394
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000395#define MT_MINSIZE 8
396#define PERTURB_SHIFT 5
397
398
399static PyMemoTable *
400PyMemoTable_New(void)
401{
402 PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
403 if (memo == NULL) {
404 PyErr_NoMemory();
405 return NULL;
406 }
407
408 memo->mt_used = 0;
409 memo->mt_allocated = MT_MINSIZE;
410 memo->mt_mask = MT_MINSIZE - 1;
411 memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
412 if (memo->mt_table == NULL) {
413 PyMem_FREE(memo);
414 PyErr_NoMemory();
415 return NULL;
416 }
417 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
418
419 return memo;
420}
421
422static PyMemoTable *
423PyMemoTable_Copy(PyMemoTable *self)
424{
425 Py_ssize_t i;
426 PyMemoTable *new = PyMemoTable_New();
427 if (new == NULL)
428 return NULL;
429
430 new->mt_used = self->mt_used;
431 new->mt_allocated = self->mt_allocated;
432 new->mt_mask = self->mt_mask;
433 /* The table we get from _New() is probably smaller than we wanted.
434 Free it and allocate one that's the right size. */
435 PyMem_FREE(new->mt_table);
436 new->mt_table = PyMem_MALLOC(self->mt_allocated * sizeof(PyMemoEntry));
437 if (new->mt_table == NULL) {
438 PyMem_FREE(new);
Victor Stinner42024562013-07-12 00:53:57 +0200439 PyErr_NoMemory();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000440 return NULL;
441 }
442 for (i = 0; i < self->mt_allocated; i++) {
443 Py_XINCREF(self->mt_table[i].me_key);
444 }
445 memcpy(new->mt_table, self->mt_table,
446 sizeof(PyMemoEntry) * self->mt_allocated);
447
448 return new;
449}
450
451static Py_ssize_t
452PyMemoTable_Size(PyMemoTable *self)
453{
454 return self->mt_used;
455}
456
457static int
458PyMemoTable_Clear(PyMemoTable *self)
459{
460 Py_ssize_t i = self->mt_allocated;
461
462 while (--i >= 0) {
463 Py_XDECREF(self->mt_table[i].me_key);
464 }
465 self->mt_used = 0;
466 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
467 return 0;
468}
469
470static void
471PyMemoTable_Del(PyMemoTable *self)
472{
473 if (self == NULL)
474 return;
475 PyMemoTable_Clear(self);
476
477 PyMem_FREE(self->mt_table);
478 PyMem_FREE(self);
479}
480
481/* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
482 can be considerably simpler than dictobject.c's lookdict(). */
483static PyMemoEntry *
484_PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
485{
486 size_t i;
487 size_t perturb;
488 size_t mask = (size_t)self->mt_mask;
489 PyMemoEntry *table = self->mt_table;
490 PyMemoEntry *entry;
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000491 Py_hash_t hash = (Py_hash_t)key >> 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000492
493 i = hash & mask;
494 entry = &table[i];
495 if (entry->me_key == NULL || entry->me_key == key)
496 return entry;
497
498 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
499 i = (i << 2) + i + perturb + 1;
500 entry = &table[i & mask];
501 if (entry->me_key == NULL || entry->me_key == key)
502 return entry;
503 }
504 assert(0); /* Never reached */
505 return NULL;
506}
507
508/* Returns -1 on failure, 0 on success. */
509static int
510_PyMemoTable_ResizeTable(PyMemoTable *self, Py_ssize_t min_size)
511{
512 PyMemoEntry *oldtable = NULL;
513 PyMemoEntry *oldentry, *newentry;
514 Py_ssize_t new_size = MT_MINSIZE;
515 Py_ssize_t to_process;
516
517 assert(min_size > 0);
518
519 /* Find the smallest valid table size >= min_size. */
520 while (new_size < min_size && new_size > 0)
521 new_size <<= 1;
522 if (new_size <= 0) {
523 PyErr_NoMemory();
524 return -1;
525 }
526 /* new_size needs to be a power of two. */
527 assert((new_size & (new_size - 1)) == 0);
528
529 /* Allocate new table. */
530 oldtable = self->mt_table;
531 self->mt_table = PyMem_MALLOC(new_size * sizeof(PyMemoEntry));
532 if (self->mt_table == NULL) {
Victor Stinner8ca72e22013-07-12 00:53:26 +0200533 self->mt_table = oldtable;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000534 PyErr_NoMemory();
535 return -1;
536 }
537 self->mt_allocated = new_size;
538 self->mt_mask = new_size - 1;
539 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
540
541 /* Copy entries from the old table. */
542 to_process = self->mt_used;
543 for (oldentry = oldtable; to_process > 0; oldentry++) {
544 if (oldentry->me_key != NULL) {
545 to_process--;
546 /* newentry is a pointer to a chunk of the new
547 mt_table, so we're setting the key:value pair
548 in-place. */
549 newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
550 newentry->me_key = oldentry->me_key;
551 newentry->me_value = oldentry->me_value;
552 }
553 }
554
555 /* Deallocate the old table. */
556 PyMem_FREE(oldtable);
557 return 0;
558}
559
560/* Returns NULL on failure, a pointer to the value otherwise. */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200561static Py_ssize_t *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000562PyMemoTable_Get(PyMemoTable *self, PyObject *key)
563{
564 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
565 if (entry->me_key == NULL)
566 return NULL;
567 return &entry->me_value;
568}
569
570/* Returns -1 on failure, 0 on success. */
571static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200572PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000573{
574 PyMemoEntry *entry;
575
576 assert(key != NULL);
577
578 entry = _PyMemoTable_Lookup(self, key);
579 if (entry->me_key != NULL) {
580 entry->me_value = value;
581 return 0;
582 }
583 Py_INCREF(key);
584 entry->me_key = key;
585 entry->me_value = value;
586 self->mt_used++;
587
588 /* If we added a key, we can safely resize. Otherwise just return!
589 * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
590 *
591 * Quadrupling the size improves average table sparseness
592 * (reducing collisions) at the cost of some memory. It also halves
593 * the number of expensive resize operations in a growing memo table.
594 *
595 * Very large memo tables (over 50K items) use doubling instead.
596 * This may help applications with severe memory constraints.
597 */
598 if (!(self->mt_used * 3 >= (self->mt_mask + 1) * 2))
599 return 0;
600 return _PyMemoTable_ResizeTable(self,
601 (self->mt_used > 50000 ? 2 : 4) * self->mt_used);
602}
603
604#undef MT_MINSIZE
605#undef PERTURB_SHIFT
606
607/*************************************************************************/
608
609/* Helpers for creating the argument tuple passed to functions. This has the
Victor Stinner121aab42011-09-29 23:40:53 +0200610 performance advantage of calling PyTuple_New() only once.
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000611
612 XXX(avassalotti): Inline directly in _Pickler_FastCall() and
613 _Unpickler_FastCall(). */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000614#define ARG_TUP(self, obj) do { \
615 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
616 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
617 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
618 } \
619 else { \
620 Py_DECREF((obj)); \
621 } \
622 } while (0)
623
624#define FREE_ARG_TUP(self) do { \
625 if ((self)->arg->ob_refcnt > 1) \
626 Py_CLEAR((self)->arg); \
627 } while (0)
628
629/* A temporary cleaner API for fast single argument function call.
630
631 XXX: Does caching the argument tuple provides any real performance benefits?
632
633 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
634 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
635 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
636 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
637 (i.e, call PyTuple_New() and store the returned value in an array), to save
638 one second (wall clock time). Either ways, the loading time a pickle stream
639 large enough to generate this number of calls would be massively
640 overwhelmed by other factors, like I/O throughput, the GC traversal and
641 object allocation overhead. So, I really doubt these functions provide any
642 real benefits.
643
644 On the other hand, oprofile reports that pickle spends a lot of time in
645 these functions. But, that is probably more related to the function call
646 overhead, than the argument tuple allocation.
647
648 XXX: And, what is the reference behavior of these? Steal, borrow? At first
649 glance, it seems to steal the reference of 'arg' and borrow the reference
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000650 of 'func'. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000651static PyObject *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000652_Pickler_FastCall(PicklerObject *self, PyObject *func, PyObject *arg)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000653{
654 PyObject *result = NULL;
655
656 ARG_TUP(self, arg);
657 if (self->arg) {
658 result = PyObject_Call(func, self->arg, NULL);
659 FREE_ARG_TUP(self);
660 }
661 return result;
662}
663
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000664static int
665_Pickler_ClearBuffer(PicklerObject *self)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000666{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000667 Py_CLEAR(self->output_buffer);
668 self->output_buffer =
669 PyBytes_FromStringAndSize(NULL, self->max_output_len);
670 if (self->output_buffer == NULL)
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000671 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000672 self->output_len = 0;
673 return 0;
674}
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000675
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000676static PyObject *
677_Pickler_GetString(PicklerObject *self)
678{
679 PyObject *output_buffer = self->output_buffer;
680
681 assert(self->output_buffer != NULL);
682 self->output_buffer = NULL;
683 /* Resize down to exact size */
684 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
685 return NULL;
686 return output_buffer;
687}
688
689static int
690_Pickler_FlushToFile(PicklerObject *self)
691{
692 PyObject *output, *result;
693
694 assert(self->write != NULL);
695
696 output = _Pickler_GetString(self);
697 if (output == NULL)
698 return -1;
699
700 result = _Pickler_FastCall(self, self->write, output);
701 Py_XDECREF(result);
702 return (result == NULL) ? -1 : 0;
703}
704
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200705static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000706_Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t n)
707{
708 Py_ssize_t i, required;
709 char *buffer;
710
711 assert(s != NULL);
712
713 required = self->output_len + n;
714 if (required > self->max_output_len) {
715 if (self->write != NULL && required > MAX_WRITE_BUF_SIZE) {
716 /* XXX This reallocates a new buffer every time, which is a bit
717 wasteful. */
718 if (_Pickler_FlushToFile(self) < 0)
719 return -1;
720 if (_Pickler_ClearBuffer(self) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000721 return -1;
722 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000723 if (self->write != NULL && n > MAX_WRITE_BUF_SIZE) {
724 /* we already flushed above, so the buffer is empty */
725 PyObject *result;
726 /* XXX we could spare an intermediate copy and pass
727 a memoryview instead */
728 PyObject *output = PyBytes_FromStringAndSize(s, n);
729 if (s == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000730 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000731 result = _Pickler_FastCall(self, self->write, output);
732 Py_XDECREF(result);
733 return (result == NULL) ? -1 : 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000734 }
735 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000736 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
737 PyErr_NoMemory();
738 return -1;
739 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200740 self->max_output_len = (self->output_len + n) / 2 * 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000741 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
742 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000743 }
744 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000745 buffer = PyBytes_AS_STRING(self->output_buffer);
746 if (n < 8) {
747 /* This is faster than memcpy when the string is short. */
748 for (i = 0; i < n; i++) {
749 buffer[self->output_len + i] = s[i];
750 }
751 }
752 else {
753 memcpy(buffer + self->output_len, s, n);
754 }
755 self->output_len += n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000756 return n;
757}
758
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000759static PicklerObject *
760_Pickler_New(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000761{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000762 PicklerObject *self;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000763
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000764 self = PyObject_GC_New(PicklerObject, &Pickler_Type);
765 if (self == NULL)
766 return NULL;
767
768 self->pers_func = NULL;
Antoine Pitrou8d3c2902012-03-04 18:31:48 +0100769 self->dispatch_table = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000770 self->arg = NULL;
771 self->write = NULL;
772 self->proto = 0;
773 self->bin = 0;
774 self->fast = 0;
775 self->fast_nesting = 0;
776 self->fix_imports = 0;
777 self->fast_memo = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000778 self->max_output_len = WRITE_BUF_SIZE;
779 self->output_len = 0;
Victor Stinner68c8ea22013-07-11 22:56:25 +0200780
781 self->memo = PyMemoTable_New();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000782 self->output_buffer = PyBytes_FromStringAndSize(NULL,
783 self->max_output_len);
Victor Stinner68c8ea22013-07-11 22:56:25 +0200784
785 if (self->memo == NULL || self->output_buffer == NULL) {
Victor Stinnerc31df042013-07-12 00:08:59 +0200786 Py_DECREF(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000787 return NULL;
788 }
789 return self;
790}
791
792static int
793_Pickler_SetProtocol(PicklerObject *self, PyObject *proto_obj,
794 PyObject *fix_imports_obj)
795{
796 long proto = 0;
797 int fix_imports;
798
799 if (proto_obj == NULL || proto_obj == Py_None)
800 proto = DEFAULT_PROTOCOL;
801 else {
802 proto = PyLong_AsLong(proto_obj);
803 if (proto == -1 && PyErr_Occurred())
804 return -1;
805 }
806 if (proto < 0)
807 proto = HIGHEST_PROTOCOL;
808 if (proto > HIGHEST_PROTOCOL) {
809 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
810 HIGHEST_PROTOCOL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000811 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000812 }
813 fix_imports = PyObject_IsTrue(fix_imports_obj);
814 if (fix_imports == -1)
815 return -1;
Victor Stinner121aab42011-09-29 23:40:53 +0200816
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000817 self->proto = proto;
818 self->bin = proto > 0;
819 self->fix_imports = fix_imports && proto < 3;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000820
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000821 return 0;
822}
823
824/* Returns -1 (with an exception set) on failure, 0 on success. This may
825 be called once on a freshly created Pickler. */
826static int
827_Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
828{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200829 _Py_IDENTIFIER(write);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000830 assert(file != NULL);
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +0200831 self->write = _PyObject_GetAttrId(file, &PyId_write);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000832 if (self->write == NULL) {
833 if (PyErr_ExceptionMatches(PyExc_AttributeError))
834 PyErr_SetString(PyExc_TypeError,
835 "file must have a 'write' attribute");
836 return -1;
837 }
838
839 return 0;
840}
841
842/* See documentation for _Pickler_FastCall(). */
843static PyObject *
844_Unpickler_FastCall(UnpicklerObject *self, PyObject *func, PyObject *arg)
845{
846 PyObject *result = NULL;
847
848 ARG_TUP(self, arg);
849 if (self->arg) {
850 result = PyObject_Call(func, self->arg, NULL);
851 FREE_ARG_TUP(self);
852 }
853 return result;
854}
855
856/* Returns the size of the input on success, -1 on failure. This takes its
857 own reference to `input`. */
858static Py_ssize_t
859_Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
860{
861 if (self->buffer.buf != NULL)
862 PyBuffer_Release(&self->buffer);
863 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
864 return -1;
865 self->input_buffer = self->buffer.buf;
866 self->input_len = self->buffer.len;
867 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000868 self->prefetched_idx = self->input_len;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000869 return self->input_len;
870}
871
Antoine Pitrou04248a82010-10-12 20:51:21 +0000872static int
873_Unpickler_SkipConsumed(UnpicklerObject *self)
874{
Victor Stinnerb43ad1d2013-10-31 13:38:42 +0100875 Py_ssize_t consumed;
876 PyObject *r;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000877
Victor Stinnerb43ad1d2013-10-31 13:38:42 +0100878 consumed = self->next_read_idx - self->prefetched_idx;
879 if (consumed <= 0)
880 return 0;
881
882 assert(self->peek); /* otherwise we did something wrong */
883 /* This makes an useless copy... */
884 r = PyObject_CallFunction(self->read, "n", consumed);
885 if (r == NULL)
886 return -1;
887 Py_DECREF(r);
888
889 self->prefetched_idx = self->next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000890 return 0;
891}
892
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000893static const Py_ssize_t READ_WHOLE_LINE = -1;
894
895/* If reading from a file, we need to only pull the bytes we need, since there
896 may be multiple pickle objects arranged contiguously in the same input
897 buffer.
898
899 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
900 bytes from the input stream/buffer.
901
902 Update the unpickler's input buffer with the newly-read data. Returns -1 on
903 failure; on success, returns the number of bytes read from the file.
904
905 On success, self->input_len will be 0; this is intentional so that when
906 unpickling from a file, the "we've run out of data" code paths will trigger,
907 causing the Unpickler to go back to the file for more data. Use the returned
908 size to tell you how much data you can process. */
909static Py_ssize_t
910_Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
911{
912 PyObject *data;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000913 Py_ssize_t read_size, prefetched_size = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000914
915 assert(self->read != NULL);
Victor Stinner121aab42011-09-29 23:40:53 +0200916
Antoine Pitrou04248a82010-10-12 20:51:21 +0000917 if (_Unpickler_SkipConsumed(self) < 0)
918 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000919
920 if (n == READ_WHOLE_LINE)
921 data = PyObject_Call(self->readline, empty_tuple, NULL);
922 else {
923 PyObject *len = PyLong_FromSsize_t(n);
924 if (len == NULL)
925 return -1;
926 data = _Unpickler_FastCall(self, self->read, len);
927 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000928 if (data == NULL)
929 return -1;
930
Antoine Pitrou04248a82010-10-12 20:51:21 +0000931 /* Prefetch some data without advancing the file pointer, if possible */
932 if (self->peek) {
933 PyObject *len, *prefetched;
934 len = PyLong_FromSsize_t(PREFETCH);
935 if (len == NULL) {
936 Py_DECREF(data);
937 return -1;
938 }
939 prefetched = _Unpickler_FastCall(self, self->peek, len);
940 if (prefetched == NULL) {
941 if (PyErr_ExceptionMatches(PyExc_NotImplementedError)) {
942 /* peek() is probably not supported by the given file object */
943 PyErr_Clear();
944 Py_CLEAR(self->peek);
945 }
946 else {
947 Py_DECREF(data);
948 return -1;
949 }
950 }
951 else {
952 assert(PyBytes_Check(prefetched));
953 prefetched_size = PyBytes_GET_SIZE(prefetched);
954 PyBytes_ConcatAndDel(&data, prefetched);
955 if (data == NULL)
956 return -1;
957 }
958 }
959
960 read_size = _Unpickler_SetStringInput(self, data) - prefetched_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000961 Py_DECREF(data);
Antoine Pitrou04248a82010-10-12 20:51:21 +0000962 self->prefetched_idx = read_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000963 return read_size;
964}
965
966/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
967
968 This should be used for all data reads, rather than accessing the unpickler's
969 input buffer directly. This method deals correctly with reading from input
970 streams, which the input buffer doesn't deal with.
971
972 Note that when reading from a file-like object, self->next_read_idx won't
973 be updated (it should remain at 0 for the entire unpickling process). You
974 should use this function's return value to know how many bytes you can
975 consume.
976
977 Returns -1 (with an exception set) on failure. On success, return the
978 number of chars read. */
979static Py_ssize_t
980_Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n)
981{
Antoine Pitrou04248a82010-10-12 20:51:21 +0000982 Py_ssize_t num_read;
983
Antoine Pitrou04248a82010-10-12 20:51:21 +0000984 if (self->next_read_idx + n <= self->input_len) {
985 *s = self->input_buffer + self->next_read_idx;
986 self->next_read_idx += n;
987 return n;
988 }
989 if (!self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000990 PyErr_Format(PyExc_EOFError, "Ran out of input");
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000991 return -1;
992 }
Antoine Pitrou04248a82010-10-12 20:51:21 +0000993 num_read = _Unpickler_ReadFromFile(self, n);
994 if (num_read < 0)
995 return -1;
996 if (num_read < n) {
997 PyErr_Format(PyExc_EOFError, "Ran out of input");
998 return -1;
999 }
1000 *s = self->input_buffer;
1001 self->next_read_idx = n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001002 return n;
1003}
1004
1005static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001006_Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1007 char **result)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001008{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001009 char *input_line = PyMem_Realloc(self->input_line, len + 1);
Victor Stinner42024562013-07-12 00:53:57 +02001010 if (input_line == NULL) {
1011 PyErr_NoMemory();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001012 return -1;
Victor Stinner42024562013-07-12 00:53:57 +02001013 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001014
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001015 memcpy(input_line, line, len);
1016 input_line[len] = '\0';
1017 self->input_line = input_line;
1018 *result = self->input_line;
1019 return len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001020}
1021
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001022/* Read a line from the input stream/buffer. If we run off the end of the input
1023 before hitting \n, return the data we found.
1024
1025 Returns the number of chars read, or -1 on failure. */
1026static Py_ssize_t
1027_Unpickler_Readline(UnpicklerObject *self, char **result)
1028{
1029 Py_ssize_t i, num_read;
1030
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001031 for (i = self->next_read_idx; i < self->input_len; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001032 if (self->input_buffer[i] == '\n') {
1033 char *line_start = self->input_buffer + self->next_read_idx;
1034 num_read = i - self->next_read_idx + 1;
1035 self->next_read_idx = i + 1;
1036 return _Unpickler_CopyLine(self, line_start, num_read, result);
1037 }
1038 }
1039 if (self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001040 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1041 if (num_read < 0)
1042 return -1;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001043 self->next_read_idx = num_read;
Antoine Pitrouf6c7a852011-08-11 21:04:02 +02001044 return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001045 }
Victor Stinner121aab42011-09-29 23:40:53 +02001046
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001047 /* If we get here, we've run off the end of the input string. Return the
1048 remaining string and let the caller figure it out. */
1049 *result = self->input_buffer + self->next_read_idx;
1050 num_read = i - self->next_read_idx;
1051 self->next_read_idx = i;
1052 return num_read;
1053}
1054
1055/* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1056 will be modified in place. */
1057static int
1058_Unpickler_ResizeMemoList(UnpicklerObject *self, Py_ssize_t new_size)
1059{
1060 Py_ssize_t i;
1061 PyObject **memo;
1062
1063 assert(new_size > self->memo_size);
1064
1065 memo = PyMem_REALLOC(self->memo, new_size * sizeof(PyObject *));
1066 if (memo == NULL) {
1067 PyErr_NoMemory();
1068 return -1;
1069 }
1070 self->memo = memo;
1071 for (i = self->memo_size; i < new_size; i++)
1072 self->memo[i] = NULL;
1073 self->memo_size = new_size;
1074 return 0;
1075}
1076
1077/* Returns NULL if idx is out of bounds. */
1078static PyObject *
1079_Unpickler_MemoGet(UnpicklerObject *self, Py_ssize_t idx)
1080{
1081 if (idx < 0 || idx >= self->memo_size)
1082 return NULL;
1083
1084 return self->memo[idx];
1085}
1086
1087/* Returns -1 (with an exception set) on failure, 0 on success.
1088 This takes its own reference to `value`. */
1089static int
1090_Unpickler_MemoPut(UnpicklerObject *self, Py_ssize_t idx, PyObject *value)
1091{
1092 PyObject *old_item;
1093
1094 if (idx >= self->memo_size) {
1095 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1096 return -1;
1097 assert(idx < self->memo_size);
1098 }
1099 Py_INCREF(value);
1100 old_item = self->memo[idx];
1101 self->memo[idx] = value;
1102 Py_XDECREF(old_item);
1103 return 0;
1104}
1105
1106static PyObject **
1107_Unpickler_NewMemo(Py_ssize_t new_size)
1108{
1109 PyObject **memo = PyMem_MALLOC(new_size * sizeof(PyObject *));
Victor Stinner42024562013-07-12 00:53:57 +02001110 if (memo == NULL) {
1111 PyErr_NoMemory();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001112 return NULL;
Victor Stinner42024562013-07-12 00:53:57 +02001113 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001114 memset(memo, 0, new_size * sizeof(PyObject *));
1115 return memo;
1116}
1117
1118/* Free the unpickler's memo, taking care to decref any items left in it. */
1119static void
1120_Unpickler_MemoCleanup(UnpicklerObject *self)
1121{
1122 Py_ssize_t i;
1123 PyObject **memo = self->memo;
1124
1125 if (self->memo == NULL)
1126 return;
1127 self->memo = NULL;
1128 i = self->memo_size;
1129 while (--i >= 0) {
1130 Py_XDECREF(memo[i]);
1131 }
1132 PyMem_FREE(memo);
1133}
1134
1135static UnpicklerObject *
1136_Unpickler_New(void)
1137{
1138 UnpicklerObject *self;
1139
1140 self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1141 if (self == NULL)
1142 return NULL;
1143
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001144 self->arg = NULL;
1145 self->pers_func = NULL;
1146 self->input_buffer = NULL;
1147 self->input_line = NULL;
1148 self->input_len = 0;
1149 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001150 self->prefetched_idx = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001151 self->read = NULL;
1152 self->readline = NULL;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001153 self->peek = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001154 self->encoding = NULL;
1155 self->errors = NULL;
1156 self->marks = NULL;
1157 self->num_marks = 0;
1158 self->marks_size = 0;
1159 self->proto = 0;
1160 self->fix_imports = 0;
Victor Stinner68c8ea22013-07-11 22:56:25 +02001161 memset(&self->buffer, 0, sizeof(Py_buffer));
1162 self->memo_size = 32;
1163 self->memo = _Unpickler_NewMemo(self->memo_size);
1164 self->stack = (Pdata *)Pdata_New();
1165
1166 if (self->memo == NULL || self->stack == NULL) {
1167 Py_DECREF(self);
1168 return NULL;
1169 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001170
1171 return self;
1172}
1173
1174/* Returns -1 (with an exception set) on failure, 0 on success. This may
1175 be called once on a freshly created Pickler. */
1176static int
1177_Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1178{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001179 _Py_IDENTIFIER(peek);
1180 _Py_IDENTIFIER(read);
1181 _Py_IDENTIFIER(readline);
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001182
1183 self->peek = _PyObject_GetAttrId(file, &PyId_peek);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001184 if (self->peek == NULL) {
1185 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1186 PyErr_Clear();
1187 else
1188 return -1;
1189 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001190 self->read = _PyObject_GetAttrId(file, &PyId_read);
1191 self->readline = _PyObject_GetAttrId(file, &PyId_readline);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001192 if (self->readline == NULL || self->read == NULL) {
1193 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1194 PyErr_SetString(PyExc_TypeError,
1195 "file must have 'read' and 'readline' attributes");
1196 Py_CLEAR(self->read);
1197 Py_CLEAR(self->readline);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001198 Py_CLEAR(self->peek);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001199 return -1;
1200 }
1201 return 0;
1202}
1203
1204/* Returns -1 (with an exception set) on failure, 0 on success. This may
1205 be called once on a freshly created Pickler. */
1206static int
1207_Unpickler_SetInputEncoding(UnpicklerObject *self,
1208 const char *encoding,
1209 const char *errors)
1210{
1211 if (encoding == NULL)
1212 encoding = "ASCII";
1213 if (errors == NULL)
1214 errors = "strict";
1215
Victor Stinner49fc8ec2013-07-07 23:30:24 +02001216 self->encoding = _PyMem_Strdup(encoding);
1217 self->errors = _PyMem_Strdup(errors);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001218 if (self->encoding == NULL || self->errors == NULL) {
1219 PyErr_NoMemory();
1220 return -1;
1221 }
1222 return 0;
1223}
1224
1225/* Generate a GET opcode for an object stored in the memo. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001226static int
1227memo_get(PicklerObject *self, PyObject *key)
1228{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001229 Py_ssize_t *value;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001230 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001231 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001232
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001233 value = PyMemoTable_Get(self->memo, key);
1234 if (value == NULL) {
1235 PyErr_SetObject(PyExc_KeyError, key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001236 return -1;
1237 }
1238
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001239 if (!self->bin) {
1240 pdata[0] = GET;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001241 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1242 "%" PY_FORMAT_SIZE_T "d\n", *value);
1243 len = strlen(pdata);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001244 }
1245 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001246 if (*value < 256) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001247 pdata[0] = BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001248 pdata[1] = (unsigned char)(*value & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001249 len = 2;
1250 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001251 else if (*value <= 0xffffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001252 pdata[0] = LONG_BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001253 pdata[1] = (unsigned char)(*value & 0xff);
1254 pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1255 pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1256 pdata[4] = (unsigned char)((*value >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001257 len = 5;
1258 }
1259 else { /* unlikely */
1260 PyErr_SetString(PicklingError,
1261 "memo id too large for LONG_BINGET");
1262 return -1;
1263 }
1264 }
1265
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001266 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001267 return -1;
1268
1269 return 0;
1270}
1271
1272/* Store an object in the memo, assign it a new unique ID based on the number
1273 of objects currently stored in the memo and generate a PUT opcode. */
1274static int
1275memo_put(PicklerObject *self, PyObject *obj)
1276{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001277 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001278 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001279 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001280 int status = 0;
1281
1282 if (self->fast)
1283 return 0;
1284
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001285 x = PyMemoTable_Size(self->memo);
1286 if (PyMemoTable_Set(self->memo, obj, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001287 goto error;
1288
1289 if (!self->bin) {
1290 pdata[0] = PUT;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001291 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1292 "%" PY_FORMAT_SIZE_T "d\n", x);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001293 len = strlen(pdata);
1294 }
1295 else {
1296 if (x < 256) {
1297 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00001298 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001299 len = 2;
1300 }
1301 else if (x <= 0xffffffffL) {
1302 pdata[0] = LONG_BINPUT;
1303 pdata[1] = (unsigned char)(x & 0xff);
1304 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1305 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1306 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1307 len = 5;
1308 }
1309 else { /* unlikely */
1310 PyErr_SetString(PicklingError,
1311 "memo id too large for LONG_BINPUT");
1312 return -1;
1313 }
1314 }
1315
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001316 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001317 goto error;
1318
1319 if (0) {
1320 error:
1321 status = -1;
1322 }
1323
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001324 return status;
1325}
1326
1327static PyObject *
1328whichmodule(PyObject *global, PyObject *global_name)
1329{
1330 Py_ssize_t i, j;
1331 static PyObject *module_str = NULL;
1332 static PyObject *main_str = NULL;
1333 PyObject *module_name;
1334 PyObject *modules_dict;
1335 PyObject *module;
1336 PyObject *obj;
1337
1338 if (module_str == NULL) {
1339 module_str = PyUnicode_InternFromString("__module__");
1340 if (module_str == NULL)
1341 return NULL;
1342 main_str = PyUnicode_InternFromString("__main__");
1343 if (main_str == NULL)
1344 return NULL;
1345 }
1346
1347 module_name = PyObject_GetAttr(global, module_str);
1348
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00001349 /* In some rare cases (e.g., bound methods of extension types),
1350 __module__ can be None. If it is so, then search sys.modules
1351 for the module of global. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001352 if (module_name == Py_None) {
1353 Py_DECREF(module_name);
1354 goto search;
1355 }
1356
1357 if (module_name) {
1358 return module_name;
1359 }
1360 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1361 PyErr_Clear();
1362 else
1363 return NULL;
1364
1365 search:
1366 modules_dict = PySys_GetObject("modules");
Victor Stinner1e53bba2013-07-16 22:26:05 +02001367 if (modules_dict == NULL) {
1368 PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001369 return NULL;
Victor Stinner1e53bba2013-07-16 22:26:05 +02001370 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001371
1372 i = 0;
1373 module_name = NULL;
1374 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
Mark Dickinson211c6252009-02-01 10:28:51 +00001375 if (PyObject_RichCompareBool(module_name, main_str, Py_EQ) == 1)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001376 continue;
1377
1378 obj = PyObject_GetAttr(module, global_name);
1379 if (obj == NULL) {
1380 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1381 PyErr_Clear();
1382 else
1383 return NULL;
1384 continue;
1385 }
1386
1387 if (obj != global) {
1388 Py_DECREF(obj);
1389 continue;
1390 }
1391
1392 Py_DECREF(obj);
1393 break;
1394 }
1395
1396 /* If no module is found, use __main__. */
1397 if (!j) {
1398 module_name = main_str;
1399 }
1400
1401 Py_INCREF(module_name);
1402 return module_name;
1403}
1404
1405/* fast_save_enter() and fast_save_leave() are guards against recursive
1406 objects when Pickler is used with the "fast mode" (i.e., with object
1407 memoization disabled). If the nesting of a list or dict object exceed
1408 FAST_NESTING_LIMIT, these guards will start keeping an internal
1409 reference to the seen list or dict objects and check whether these objects
1410 are recursive. These are not strictly necessary, since save() has a
1411 hard-coded recursion limit, but they give a nicer error message than the
1412 typical RuntimeError. */
1413static int
1414fast_save_enter(PicklerObject *self, PyObject *obj)
1415{
1416 /* if fast_nesting < 0, we're doing an error exit. */
1417 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1418 PyObject *key = NULL;
1419 if (self->fast_memo == NULL) {
1420 self->fast_memo = PyDict_New();
1421 if (self->fast_memo == NULL) {
1422 self->fast_nesting = -1;
1423 return 0;
1424 }
1425 }
1426 key = PyLong_FromVoidPtr(obj);
1427 if (key == NULL)
1428 return 0;
1429 if (PyDict_GetItem(self->fast_memo, key)) {
1430 Py_DECREF(key);
1431 PyErr_Format(PyExc_ValueError,
1432 "fast mode: can't pickle cyclic objects "
1433 "including object type %.200s at %p",
1434 obj->ob_type->tp_name, obj);
1435 self->fast_nesting = -1;
1436 return 0;
1437 }
1438 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
1439 Py_DECREF(key);
1440 self->fast_nesting = -1;
1441 return 0;
1442 }
1443 Py_DECREF(key);
1444 }
1445 return 1;
1446}
1447
1448static int
1449fast_save_leave(PicklerObject *self, PyObject *obj)
1450{
1451 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
1452 PyObject *key = PyLong_FromVoidPtr(obj);
1453 if (key == NULL)
1454 return 0;
1455 if (PyDict_DelItem(self->fast_memo, key) < 0) {
1456 Py_DECREF(key);
1457 return 0;
1458 }
1459 Py_DECREF(key);
1460 }
1461 return 1;
1462}
1463
1464static int
1465save_none(PicklerObject *self, PyObject *obj)
1466{
1467 const char none_op = NONE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001468 if (_Pickler_Write(self, &none_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001469 return -1;
1470
1471 return 0;
1472}
1473
1474static int
1475save_bool(PicklerObject *self, PyObject *obj)
1476{
1477 static const char *buf[2] = { FALSE, TRUE };
1478 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
1479 int p = (obj == Py_True);
1480
1481 if (self->proto >= 2) {
1482 const char bool_op = p ? NEWTRUE : NEWFALSE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001483 if (_Pickler_Write(self, &bool_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001484 return -1;
1485 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001486 else if (_Pickler_Write(self, buf[p], len[p]) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001487 return -1;
1488
1489 return 0;
1490}
1491
1492static int
1493save_int(PicklerObject *self, long x)
1494{
1495 char pdata[32];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001496 Py_ssize_t len = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001497
1498 if (!self->bin
1499#if SIZEOF_LONG > 4
1500 || x > 0x7fffffffL || x < -0x80000000L
1501#endif
1502 ) {
1503 /* Text-mode pickle, or long too big to fit in the 4-byte
1504 * signed BININT format: store as a string.
1505 */
Mark Dickinson8dd05142009-01-20 20:43:58 +00001506 pdata[0] = LONG; /* use LONG for consistency with pickle.py */
1507 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ldL\n", x);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001508 if (_Pickler_Write(self, pdata, strlen(pdata)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001509 return -1;
1510 }
1511 else {
1512 /* Binary pickle and x fits in a signed 4-byte int. */
1513 pdata[1] = (unsigned char)(x & 0xff);
1514 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1515 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1516 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1517
1518 if ((pdata[4] == 0) && (pdata[3] == 0)) {
1519 if (pdata[2] == 0) {
1520 pdata[0] = BININT1;
1521 len = 2;
1522 }
1523 else {
1524 pdata[0] = BININT2;
1525 len = 3;
1526 }
1527 }
1528 else {
1529 pdata[0] = BININT;
1530 len = 5;
1531 }
1532
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001533 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001534 return -1;
1535 }
1536
1537 return 0;
1538}
1539
1540static int
1541save_long(PicklerObject *self, PyObject *obj)
1542{
1543 PyObject *repr = NULL;
1544 Py_ssize_t size;
1545 long val = PyLong_AsLong(obj);
1546 int status = 0;
1547
1548 const char long_op = LONG;
1549
1550 if (val == -1 && PyErr_Occurred()) {
1551 /* out of range for int pickling */
1552 PyErr_Clear();
1553 }
Antoine Pitroue58bffb2011-08-13 20:40:32 +02001554 else
1555#if SIZEOF_LONG > 4
1556 if (val <= 0x7fffffffL && val >= -0x80000000L)
1557#endif
1558 return save_int(self, val);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001559
1560 if (self->proto >= 2) {
1561 /* Linear-time pickling. */
1562 size_t nbits;
1563 size_t nbytes;
1564 unsigned char *pdata;
1565 char header[5];
1566 int i;
1567 int sign = _PyLong_Sign(obj);
1568
1569 if (sign == 0) {
1570 header[0] = LONG1;
1571 header[1] = 0; /* It's 0 -- an empty bytestring. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001572 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001573 goto error;
1574 return 0;
1575 }
1576 nbits = _PyLong_NumBits(obj);
1577 if (nbits == (size_t)-1 && PyErr_Occurred())
1578 goto error;
1579 /* How many bytes do we need? There are nbits >> 3 full
1580 * bytes of data, and nbits & 7 leftover bits. If there
1581 * are any leftover bits, then we clearly need another
1582 * byte. Wnat's not so obvious is that we *probably*
1583 * need another byte even if there aren't any leftovers:
1584 * the most-significant bit of the most-significant byte
1585 * acts like a sign bit, and it's usually got a sense
Serhiy Storchaka95949422013-08-27 19:40:23 +03001586 * opposite of the one we need. The exception is ints
1587 * of the form -(2**(8*j-1)) for j > 0. Such an int is
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001588 * its own 256's-complement, so has the right sign bit
1589 * even without the extra byte. That's a pain to check
1590 * for in advance, though, so we always grab an extra
1591 * byte at the start, and cut it back later if possible.
1592 */
1593 nbytes = (nbits >> 3) + 1;
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01001594 if (nbytes > 0x7fffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001595 PyErr_SetString(PyExc_OverflowError,
Serhiy Storchaka95949422013-08-27 19:40:23 +03001596 "int too large to pickle");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001597 goto error;
1598 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001599 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001600 if (repr == NULL)
1601 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001602 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001603 i = _PyLong_AsByteArray((PyLongObject *)obj,
1604 pdata, nbytes,
1605 1 /* little endian */ , 1 /* signed */ );
1606 if (i < 0)
1607 goto error;
Serhiy Storchaka95949422013-08-27 19:40:23 +03001608 /* If the int is negative, this may be a byte more than
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001609 * needed. This is so iff the MSB is all redundant sign
1610 * bits.
1611 */
1612 if (sign < 0 &&
Victor Stinner121aab42011-09-29 23:40:53 +02001613 nbytes > 1 &&
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001614 pdata[nbytes - 1] == 0xff &&
1615 (pdata[nbytes - 2] & 0x80) != 0) {
1616 nbytes--;
1617 }
1618
1619 if (nbytes < 256) {
1620 header[0] = LONG1;
1621 header[1] = (unsigned char)nbytes;
1622 size = 2;
1623 }
1624 else {
1625 header[0] = LONG4;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001626 size = (Py_ssize_t) nbytes;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001627 for (i = 1; i < 5; i++) {
1628 header[i] = (unsigned char)(size & 0xff);
1629 size >>= 8;
1630 }
1631 size = 5;
1632 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001633 if (_Pickler_Write(self, header, size) < 0 ||
1634 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001635 goto error;
1636 }
1637 else {
1638 char *string;
1639
Mark Dickinson8dd05142009-01-20 20:43:58 +00001640 /* proto < 2: write the repr and newline. This is quadratic-time (in
1641 the number of digits), in both directions. We add a trailing 'L'
1642 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001643
1644 repr = PyObject_Repr(obj);
1645 if (repr == NULL)
1646 goto error;
1647
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001648 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001649 if (string == NULL)
1650 goto error;
1651
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001652 if (_Pickler_Write(self, &long_op, 1) < 0 ||
1653 _Pickler_Write(self, string, size) < 0 ||
1654 _Pickler_Write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001655 goto error;
1656 }
1657
1658 if (0) {
1659 error:
1660 status = -1;
1661 }
1662 Py_XDECREF(repr);
1663
1664 return status;
1665}
1666
1667static int
1668save_float(PicklerObject *self, PyObject *obj)
1669{
1670 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1671
1672 if (self->bin) {
1673 char pdata[9];
1674 pdata[0] = BINFLOAT;
1675 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1676 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001677 if (_Pickler_Write(self, pdata, 9) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001678 return -1;
Victor Stinner121aab42011-09-29 23:40:53 +02001679 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001680 else {
Eric Smith0923d1d2009-04-16 20:16:10 +00001681 int result = -1;
1682 char *buf = NULL;
1683 char op = FLOAT;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001684
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001685 if (_Pickler_Write(self, &op, 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001686 goto done;
1687
Mark Dickinson3e09f432009-04-17 08:41:23 +00001688 buf = PyOS_double_to_string(x, 'g', 17, 0, NULL);
Eric Smith0923d1d2009-04-16 20:16:10 +00001689 if (!buf) {
1690 PyErr_NoMemory();
1691 goto done;
1692 }
1693
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001694 if (_Pickler_Write(self, buf, strlen(buf)) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001695 goto done;
1696
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001697 if (_Pickler_Write(self, "\n", 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001698 goto done;
1699
1700 result = 0;
1701done:
1702 PyMem_Free(buf);
1703 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001704 }
1705
1706 return 0;
1707}
1708
1709static int
1710save_bytes(PicklerObject *self, PyObject *obj)
1711{
1712 if (self->proto < 3) {
1713 /* Older pickle protocols do not have an opcode for pickling bytes
1714 objects. Therefore, we need to fake the copy protocol (i.e.,
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001715 the __reduce__ method) to permit bytes object unpickling.
1716
1717 Here we use a hack to be compatible with Python 2. Since in Python
1718 2 'bytes' is just an alias for 'str' (which has different
1719 parameters than the actual bytes object), we use codecs.encode
1720 to create the appropriate 'str' object when unpickled using
1721 Python 2 *and* the appropriate 'bytes' object when unpickled
1722 using Python 3. Again this is a hack and we don't need to do this
1723 with newer protocols. */
1724 static PyObject *codecs_encode = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001725 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001726 int status;
1727
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001728 if (codecs_encode == NULL) {
1729 PyObject *codecs_module = PyImport_ImportModule("codecs");
1730 if (codecs_module == NULL) {
1731 return -1;
1732 }
1733 codecs_encode = PyObject_GetAttrString(codecs_module, "encode");
1734 Py_DECREF(codecs_module);
1735 if (codecs_encode == NULL) {
1736 return -1;
1737 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001738 }
1739
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001740 if (PyBytes_GET_SIZE(obj) == 0) {
1741 reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
1742 }
1743 else {
1744 static PyObject *latin1 = NULL;
1745 PyObject *unicode_str =
1746 PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
1747 PyBytes_GET_SIZE(obj),
1748 "strict");
1749 if (unicode_str == NULL)
1750 return -1;
1751 if (latin1 == NULL) {
1752 latin1 = PyUnicode_InternFromString("latin1");
Christian Heimes82e6b942013-06-29 21:37:34 +02001753 if (latin1 == NULL) {
1754 Py_DECREF(unicode_str);
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001755 return -1;
Christian Heimes82e6b942013-06-29 21:37:34 +02001756 }
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001757 }
1758 reduce_value = Py_BuildValue("(O(OO))",
1759 codecs_encode, unicode_str, latin1);
1760 Py_DECREF(unicode_str);
1761 }
1762
1763 if (reduce_value == NULL)
1764 return -1;
1765
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001766 /* save_reduce() will memoize the object automatically. */
1767 status = save_reduce(self, reduce_value, obj);
1768 Py_DECREF(reduce_value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001769 return status;
1770 }
1771 else {
1772 Py_ssize_t size;
1773 char header[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001774 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001775
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001776 size = PyBytes_GET_SIZE(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001777 if (size < 0)
1778 return -1;
1779
1780 if (size < 256) {
1781 header[0] = SHORT_BINBYTES;
1782 header[1] = (unsigned char)size;
1783 len = 2;
1784 }
1785 else if (size <= 0xffffffffL) {
1786 header[0] = BINBYTES;
1787 header[1] = (unsigned char)(size & 0xff);
1788 header[2] = (unsigned char)((size >> 8) & 0xff);
1789 header[3] = (unsigned char)((size >> 16) & 0xff);
1790 header[4] = (unsigned char)((size >> 24) & 0xff);
1791 len = 5;
1792 }
1793 else {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001794 PyErr_SetString(PyExc_OverflowError,
Serhiy Storchakaf8def282013-02-16 17:29:56 +02001795 "cannot serialize a bytes object larger than 4 GiB");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001796 return -1; /* string too large */
1797 }
1798
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001799 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001800 return -1;
1801
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001802 if (_Pickler_Write(self, PyBytes_AS_STRING(obj), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001803 return -1;
1804
1805 if (memo_put(self, obj) < 0)
1806 return -1;
1807
1808 return 0;
1809 }
1810}
1811
1812/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1813 backslash and newline characters to \uXXXX escapes. */
1814static PyObject *
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001815raw_unicode_escape(PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001816{
1817 PyObject *repr, *result;
1818 char *p;
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001819 Py_ssize_t i, size, expandsize;
1820 void *data;
1821 unsigned int kind;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001822
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001823 if (PyUnicode_READY(obj))
1824 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001825
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001826 size = PyUnicode_GET_LENGTH(obj);
1827 data = PyUnicode_DATA(obj);
1828 kind = PyUnicode_KIND(obj);
1829 if (kind == PyUnicode_4BYTE_KIND)
1830 expandsize = 10;
1831 else
1832 expandsize = 6;
Victor Stinner121aab42011-09-29 23:40:53 +02001833
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001834 if (size > PY_SSIZE_T_MAX / expandsize)
1835 return PyErr_NoMemory();
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001836 repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001837 if (repr == NULL)
1838 return NULL;
1839 if (size == 0)
1840 goto done;
1841
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001842 p = PyByteArray_AS_STRING(repr);
1843 for (i=0; i < size; i++) {
1844 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001845 /* Map 32-bit characters to '\Uxxxxxxxx' */
1846 if (ch >= 0x10000) {
1847 *p++ = '\\';
1848 *p++ = 'U';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001849 *p++ = Py_hexdigits[(ch >> 28) & 0xf];
1850 *p++ = Py_hexdigits[(ch >> 24) & 0xf];
1851 *p++ = Py_hexdigits[(ch >> 20) & 0xf];
1852 *p++ = Py_hexdigits[(ch >> 16) & 0xf];
1853 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
1854 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
1855 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
1856 *p++ = Py_hexdigits[ch & 15];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001857 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001858 /* Map 16-bit characters to '\uxxxx' */
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001859 else if (ch >= 256 || ch == '\\' || ch == '\n') {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001860 *p++ = '\\';
1861 *p++ = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001862 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
1863 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
1864 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
1865 *p++ = Py_hexdigits[ch & 15];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001866 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001867 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001868 else
1869 *p++ = (char) ch;
1870 }
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001871 size = p - PyByteArray_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001872
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001873done:
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001874 result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001875 Py_DECREF(repr);
1876 return result;
1877}
1878
1879static int
Antoine Pitrou299978d2013-04-07 17:38:11 +02001880write_utf8(PicklerObject *self, char *data, Py_ssize_t size)
1881{
1882 char pdata[5];
1883
1884#if SIZEOF_SIZE_T > 4
1885 if (size > 0xffffffffUL) {
1886 /* string too large */
1887 PyErr_SetString(PyExc_OverflowError,
Antoine Pitrou4b7b0f02013-04-07 23:46:52 +02001888 "cannot serialize a string larger than 4GiB");
Antoine Pitrou299978d2013-04-07 17:38:11 +02001889 return -1;
1890 }
1891#endif
1892
1893 pdata[0] = BINUNICODE;
1894 pdata[1] = (unsigned char)(size & 0xff);
1895 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1896 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1897 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1898
1899 if (_Pickler_Write(self, pdata, sizeof(pdata)) < 0)
1900 return -1;
1901
1902 if (_Pickler_Write(self, data, size) < 0)
1903 return -1;
1904
1905 return 0;
1906}
1907
1908static int
1909write_unicode_binary(PicklerObject *self, PyObject *obj)
1910{
1911 PyObject *encoded = NULL;
1912 Py_ssize_t size;
1913 char *data;
1914 int r;
1915
1916 if (PyUnicode_READY(obj))
1917 return -1;
1918
1919 data = PyUnicode_AsUTF8AndSize(obj, &size);
1920 if (data != NULL)
1921 return write_utf8(self, data, size);
1922
1923 /* Issue #8383: for strings with lone surrogates, fallback on the
1924 "surrogatepass" error handler. */
1925 PyErr_Clear();
1926 encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
1927 if (encoded == NULL)
1928 return -1;
1929
1930 r = write_utf8(self, PyBytes_AS_STRING(encoded),
1931 PyBytes_GET_SIZE(encoded));
1932 Py_DECREF(encoded);
1933 return r;
1934}
1935
1936static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001937save_unicode(PicklerObject *self, PyObject *obj)
1938{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001939 if (self->bin) {
Antoine Pitrou299978d2013-04-07 17:38:11 +02001940 if (write_unicode_binary(self, obj) < 0)
1941 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001942 }
1943 else {
Antoine Pitrou299978d2013-04-07 17:38:11 +02001944 PyObject *encoded;
1945 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001946 const char unicode_op = UNICODE;
1947
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001948 encoded = raw_unicode_escape(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001949 if (encoded == NULL)
Antoine Pitrou299978d2013-04-07 17:38:11 +02001950 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001951
Antoine Pitrou299978d2013-04-07 17:38:11 +02001952 if (_Pickler_Write(self, &unicode_op, 1) < 0) {
1953 Py_DECREF(encoded);
1954 return -1;
1955 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001956
1957 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrou299978d2013-04-07 17:38:11 +02001958 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) {
1959 Py_DECREF(encoded);
1960 return -1;
1961 }
1962 Py_DECREF(encoded);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001963
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001964 if (_Pickler_Write(self, "\n", 1) < 0)
Antoine Pitrou299978d2013-04-07 17:38:11 +02001965 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001966 }
1967 if (memo_put(self, obj) < 0)
Antoine Pitrou299978d2013-04-07 17:38:11 +02001968 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001969
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001970 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001971}
1972
1973/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1974static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001975store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001976{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001977 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001978
1979 assert(PyTuple_Size(t) == len);
1980
1981 for (i = 0; i < len; i++) {
1982 PyObject *element = PyTuple_GET_ITEM(t, i);
1983
1984 if (element == NULL)
1985 return -1;
1986 if (save(self, element, 0) < 0)
1987 return -1;
1988 }
1989
1990 return 0;
1991}
1992
1993/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1994 * used across protocols to minimize the space needed to pickle them.
1995 * Tuples are also the only builtin immutable type that can be recursive
1996 * (a tuple can be reached from itself), and that requires some subtle
1997 * magic so that it works in all cases. IOW, this is a long routine.
1998 */
1999static int
2000save_tuple(PicklerObject *self, PyObject *obj)
2001{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002002 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002003
2004 const char mark_op = MARK;
2005 const char tuple_op = TUPLE;
2006 const char pop_op = POP;
2007 const char pop_mark_op = POP_MARK;
2008 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
2009
2010 if ((len = PyTuple_Size(obj)) < 0)
2011 return -1;
2012
2013 if (len == 0) {
2014 char pdata[2];
2015
2016 if (self->proto) {
2017 pdata[0] = EMPTY_TUPLE;
2018 len = 1;
2019 }
2020 else {
2021 pdata[0] = MARK;
2022 pdata[1] = TUPLE;
2023 len = 2;
2024 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002025 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002026 return -1;
2027 return 0;
2028 }
2029
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002030 /* The tuple isn't in the memo now. If it shows up there after
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002031 * saving the tuple elements, the tuple must be recursive, in
2032 * which case we'll pop everything we put on the stack, and fetch
2033 * its value from the memo.
2034 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002035 if (len <= 3 && self->proto >= 2) {
2036 /* Use TUPLE{1,2,3} opcodes. */
2037 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002038 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002039
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002040 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002041 /* pop the len elements */
2042 for (i = 0; i < len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002043 if (_Pickler_Write(self, &pop_op, 1) < 0)
2044 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002045 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002046 if (memo_get(self, obj) < 0)
2047 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002048
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002049 return 0;
2050 }
2051 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002052 if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2053 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002054 }
2055 goto memoize;
2056 }
2057
2058 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2059 * Generate MARK e1 e2 ... TUPLE
2060 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002061 if (_Pickler_Write(self, &mark_op, 1) < 0)
2062 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002063
2064 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002065 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002066
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002067 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002068 /* pop the stack stuff we pushed */
2069 if (self->bin) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002070 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2071 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002072 }
2073 else {
2074 /* Note that we pop one more than len, to remove
2075 * the MARK too.
2076 */
2077 for (i = 0; i <= len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002078 if (_Pickler_Write(self, &pop_op, 1) < 0)
2079 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002080 }
2081 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002082 if (memo_get(self, obj) < 0)
2083 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002084
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002085 return 0;
2086 }
2087 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002088 if (_Pickler_Write(self, &tuple_op, 1) < 0)
2089 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002090 }
2091
2092 memoize:
2093 if (memo_put(self, obj) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002094 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002095
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002096 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002097}
2098
2099/* iter is an iterator giving items, and we batch up chunks of
2100 * MARK item item ... item APPENDS
2101 * opcode sequences. Calling code should have arranged to first create an
2102 * empty list, or list-like object, for the APPENDS to operate on.
2103 * Returns 0 on success, <0 on error.
2104 */
2105static int
2106batch_list(PicklerObject *self, PyObject *iter)
2107{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002108 PyObject *obj = NULL;
2109 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002110 int i, n;
2111
2112 const char mark_op = MARK;
2113 const char append_op = APPEND;
2114 const char appends_op = APPENDS;
2115
2116 assert(iter != NULL);
2117
2118 /* XXX: I think this function could be made faster by avoiding the
2119 iterator interface and fetching objects directly from list using
2120 PyList_GET_ITEM.
2121 */
2122
2123 if (self->proto == 0) {
2124 /* APPENDS isn't available; do one at a time. */
2125 for (;;) {
2126 obj = PyIter_Next(iter);
2127 if (obj == NULL) {
2128 if (PyErr_Occurred())
2129 return -1;
2130 break;
2131 }
2132 i = save(self, obj, 0);
2133 Py_DECREF(obj);
2134 if (i < 0)
2135 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002136 if (_Pickler_Write(self, &append_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002137 return -1;
2138 }
2139 return 0;
2140 }
2141
2142 /* proto > 0: write in batches of BATCHSIZE. */
2143 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002144 /* Get first item */
2145 firstitem = PyIter_Next(iter);
2146 if (firstitem == NULL) {
2147 if (PyErr_Occurred())
2148 goto error;
2149
2150 /* nothing more to add */
2151 break;
2152 }
2153
2154 /* Try to get a second item */
2155 obj = PyIter_Next(iter);
2156 if (obj == NULL) {
2157 if (PyErr_Occurred())
2158 goto error;
2159
2160 /* Only one item to write */
2161 if (save(self, firstitem, 0) < 0)
2162 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002163 if (_Pickler_Write(self, &append_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002164 goto error;
2165 Py_CLEAR(firstitem);
2166 break;
2167 }
2168
2169 /* More than one item to write */
2170
2171 /* Pump out MARK, items, APPENDS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002172 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002173 goto error;
2174
2175 if (save(self, firstitem, 0) < 0)
2176 goto error;
2177 Py_CLEAR(firstitem);
2178 n = 1;
2179
2180 /* Fetch and save up to BATCHSIZE items */
2181 while (obj) {
2182 if (save(self, obj, 0) < 0)
2183 goto error;
2184 Py_CLEAR(obj);
2185 n += 1;
2186
2187 if (n == BATCHSIZE)
2188 break;
2189
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002190 obj = PyIter_Next(iter);
2191 if (obj == NULL) {
2192 if (PyErr_Occurred())
2193 goto error;
2194 break;
2195 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002196 }
2197
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002198 if (_Pickler_Write(self, &appends_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002199 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002200
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002201 } while (n == BATCHSIZE);
2202 return 0;
2203
2204 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002205 Py_XDECREF(firstitem);
2206 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002207 return -1;
2208}
2209
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002210/* This is a variant of batch_list() above, specialized for lists (with no
2211 * support for list subclasses). Like batch_list(), we batch up chunks of
2212 * MARK item item ... item APPENDS
2213 * opcode sequences. Calling code should have arranged to first create an
2214 * empty list, or list-like object, for the APPENDS to operate on.
2215 * Returns 0 on success, -1 on error.
2216 *
2217 * This version is considerably faster than batch_list(), if less general.
2218 *
2219 * Note that this only works for protocols > 0.
2220 */
2221static int
2222batch_list_exact(PicklerObject *self, PyObject *obj)
2223{
2224 PyObject *item = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002225 Py_ssize_t this_batch, total;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002226
2227 const char append_op = APPEND;
2228 const char appends_op = APPENDS;
2229 const char mark_op = MARK;
2230
2231 assert(obj != NULL);
2232 assert(self->proto > 0);
2233 assert(PyList_CheckExact(obj));
2234
2235 if (PyList_GET_SIZE(obj) == 1) {
2236 item = PyList_GET_ITEM(obj, 0);
2237 if (save(self, item, 0) < 0)
2238 return -1;
2239 if (_Pickler_Write(self, &append_op, 1) < 0)
2240 return -1;
2241 return 0;
2242 }
2243
2244 /* Write in batches of BATCHSIZE. */
2245 total = 0;
2246 do {
2247 this_batch = 0;
2248 if (_Pickler_Write(self, &mark_op, 1) < 0)
2249 return -1;
2250 while (total < PyList_GET_SIZE(obj)) {
2251 item = PyList_GET_ITEM(obj, total);
2252 if (save(self, item, 0) < 0)
2253 return -1;
2254 total++;
2255 if (++this_batch == BATCHSIZE)
2256 break;
2257 }
2258 if (_Pickler_Write(self, &appends_op, 1) < 0)
2259 return -1;
2260
2261 } while (total < PyList_GET_SIZE(obj));
2262
2263 return 0;
2264}
2265
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002266static int
2267save_list(PicklerObject *self, PyObject *obj)
2268{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002269 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002270 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002271 int status = 0;
2272
2273 if (self->fast && !fast_save_enter(self, obj))
2274 goto error;
2275
2276 /* Create an empty list. */
2277 if (self->bin) {
2278 header[0] = EMPTY_LIST;
2279 len = 1;
2280 }
2281 else {
2282 header[0] = MARK;
2283 header[1] = LIST;
2284 len = 2;
2285 }
2286
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002287 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002288 goto error;
2289
2290 /* Get list length, and bow out early if empty. */
2291 if ((len = PyList_Size(obj)) < 0)
2292 goto error;
2293
2294 if (memo_put(self, obj) < 0)
2295 goto error;
2296
2297 if (len != 0) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002298 /* Materialize the list elements. */
2299 if (PyList_CheckExact(obj) && self->proto > 0) {
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002300 if (Py_EnterRecursiveCall(" while pickling an object"))
2301 goto error;
2302 status = batch_list_exact(self, obj);
2303 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002304 } else {
2305 PyObject *iter = PyObject_GetIter(obj);
2306 if (iter == NULL)
2307 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002308
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002309 if (Py_EnterRecursiveCall(" while pickling an object")) {
2310 Py_DECREF(iter);
2311 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002312 }
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002313 status = batch_list(self, iter);
2314 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002315 Py_DECREF(iter);
2316 }
2317 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002318 if (0) {
2319 error:
2320 status = -1;
2321 }
2322
2323 if (self->fast && !fast_save_leave(self, obj))
2324 status = -1;
2325
2326 return status;
2327}
2328
2329/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
2330 * MARK key value ... key value SETITEMS
2331 * opcode sequences. Calling code should have arranged to first create an
2332 * empty dict, or dict-like object, for the SETITEMS to operate on.
2333 * Returns 0 on success, <0 on error.
2334 *
2335 * This is very much like batch_list(). The difference between saving
2336 * elements directly, and picking apart two-tuples, is so long-winded at
2337 * the C level, though, that attempts to combine these routines were too
2338 * ugly to bear.
2339 */
2340static int
2341batch_dict(PicklerObject *self, PyObject *iter)
2342{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002343 PyObject *obj = NULL;
2344 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002345 int i, n;
2346
2347 const char mark_op = MARK;
2348 const char setitem_op = SETITEM;
2349 const char setitems_op = SETITEMS;
2350
2351 assert(iter != NULL);
2352
2353 if (self->proto == 0) {
2354 /* SETITEMS isn't available; do one at a time. */
2355 for (;;) {
2356 obj = PyIter_Next(iter);
2357 if (obj == NULL) {
2358 if (PyErr_Occurred())
2359 return -1;
2360 break;
2361 }
2362 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2363 PyErr_SetString(PyExc_TypeError, "dict items "
2364 "iterator must return 2-tuples");
2365 return -1;
2366 }
2367 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
2368 if (i >= 0)
2369 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
2370 Py_DECREF(obj);
2371 if (i < 0)
2372 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002373 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002374 return -1;
2375 }
2376 return 0;
2377 }
2378
2379 /* proto > 0: write in batches of BATCHSIZE. */
2380 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002381 /* Get first item */
2382 firstitem = PyIter_Next(iter);
2383 if (firstitem == NULL) {
2384 if (PyErr_Occurred())
2385 goto error;
2386
2387 /* nothing more to add */
2388 break;
2389 }
2390 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
2391 PyErr_SetString(PyExc_TypeError, "dict items "
2392 "iterator must return 2-tuples");
2393 goto error;
2394 }
2395
2396 /* Try to get a second item */
2397 obj = PyIter_Next(iter);
2398 if (obj == NULL) {
2399 if (PyErr_Occurred())
2400 goto error;
2401
2402 /* Only one item to write */
2403 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2404 goto error;
2405 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2406 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002407 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002408 goto error;
2409 Py_CLEAR(firstitem);
2410 break;
2411 }
2412
2413 /* More than one item to write */
2414
2415 /* Pump out MARK, items, SETITEMS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002416 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002417 goto error;
2418
2419 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2420 goto error;
2421 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2422 goto error;
2423 Py_CLEAR(firstitem);
2424 n = 1;
2425
2426 /* Fetch and save up to BATCHSIZE items */
2427 while (obj) {
2428 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2429 PyErr_SetString(PyExc_TypeError, "dict items "
2430 "iterator must return 2-tuples");
2431 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002432 }
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002433 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
2434 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
2435 goto error;
2436 Py_CLEAR(obj);
2437 n += 1;
2438
2439 if (n == BATCHSIZE)
2440 break;
2441
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002442 obj = PyIter_Next(iter);
2443 if (obj == NULL) {
2444 if (PyErr_Occurred())
2445 goto error;
2446 break;
2447 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002448 }
2449
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002450 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002451 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002452
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002453 } while (n == BATCHSIZE);
2454 return 0;
2455
2456 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002457 Py_XDECREF(firstitem);
2458 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002459 return -1;
2460}
2461
Collin Winter5c9b02d2009-05-25 05:43:30 +00002462/* This is a variant of batch_dict() above that specializes for dicts, with no
2463 * support for dict subclasses. Like batch_dict(), we batch up chunks of
2464 * MARK key value ... key value SETITEMS
2465 * opcode sequences. Calling code should have arranged to first create an
2466 * empty dict, or dict-like object, for the SETITEMS to operate on.
2467 * Returns 0 on success, -1 on error.
2468 *
2469 * Note that this currently doesn't work for protocol 0.
2470 */
2471static int
2472batch_dict_exact(PicklerObject *self, PyObject *obj)
2473{
2474 PyObject *key = NULL, *value = NULL;
2475 int i;
2476 Py_ssize_t dict_size, ppos = 0;
2477
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00002478 const char mark_op = MARK;
2479 const char setitem_op = SETITEM;
2480 const char setitems_op = SETITEMS;
Collin Winter5c9b02d2009-05-25 05:43:30 +00002481
2482 assert(obj != NULL);
2483 assert(self->proto > 0);
2484
2485 dict_size = PyDict_Size(obj);
2486
2487 /* Special-case len(d) == 1 to save space. */
2488 if (dict_size == 1) {
2489 PyDict_Next(obj, &ppos, &key, &value);
2490 if (save(self, key, 0) < 0)
2491 return -1;
2492 if (save(self, value, 0) < 0)
2493 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002494 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002495 return -1;
2496 return 0;
2497 }
2498
2499 /* Write in batches of BATCHSIZE. */
2500 do {
2501 i = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002502 if (_Pickler_Write(self, &mark_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002503 return -1;
2504 while (PyDict_Next(obj, &ppos, &key, &value)) {
2505 if (save(self, key, 0) < 0)
2506 return -1;
2507 if (save(self, value, 0) < 0)
2508 return -1;
2509 if (++i == BATCHSIZE)
2510 break;
2511 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002512 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002513 return -1;
2514 if (PyDict_Size(obj) != dict_size) {
2515 PyErr_Format(
2516 PyExc_RuntimeError,
2517 "dictionary changed size during iteration");
2518 return -1;
2519 }
2520
2521 } while (i == BATCHSIZE);
2522 return 0;
2523}
2524
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002525static int
2526save_dict(PicklerObject *self, PyObject *obj)
2527{
2528 PyObject *items, *iter;
2529 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002530 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002531 int status = 0;
2532
2533 if (self->fast && !fast_save_enter(self, obj))
2534 goto error;
2535
2536 /* Create an empty dict. */
2537 if (self->bin) {
2538 header[0] = EMPTY_DICT;
2539 len = 1;
2540 }
2541 else {
2542 header[0] = MARK;
2543 header[1] = DICT;
2544 len = 2;
2545 }
2546
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002547 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002548 goto error;
2549
2550 /* Get dict size, and bow out early if empty. */
2551 if ((len = PyDict_Size(obj)) < 0)
2552 goto error;
2553
2554 if (memo_put(self, obj) < 0)
2555 goto error;
2556
2557 if (len != 0) {
2558 /* Save the dict items. */
Collin Winter5c9b02d2009-05-25 05:43:30 +00002559 if (PyDict_CheckExact(obj) && self->proto > 0) {
2560 /* We can take certain shortcuts if we know this is a dict and
2561 not a dict subclass. */
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002562 if (Py_EnterRecursiveCall(" while pickling an object"))
2563 goto error;
2564 status = batch_dict_exact(self, obj);
2565 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002566 } else {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02002567 _Py_IDENTIFIER(items);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002568
2569 items = _PyObject_CallMethodId(obj, &PyId_items, "()");
Collin Winter5c9b02d2009-05-25 05:43:30 +00002570 if (items == NULL)
2571 goto error;
2572 iter = PyObject_GetIter(items);
2573 Py_DECREF(items);
2574 if (iter == NULL)
2575 goto error;
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002576 if (Py_EnterRecursiveCall(" while pickling an object")) {
2577 Py_DECREF(iter);
2578 goto error;
2579 }
Collin Winter5c9b02d2009-05-25 05:43:30 +00002580 status = batch_dict(self, iter);
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002581 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002582 Py_DECREF(iter);
2583 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002584 }
2585
2586 if (0) {
2587 error:
2588 status = -1;
2589 }
2590
2591 if (self->fast && !fast_save_leave(self, obj))
2592 status = -1;
2593
2594 return status;
2595}
2596
2597static int
2598save_global(PicklerObject *self, PyObject *obj, PyObject *name)
2599{
2600 static PyObject *name_str = NULL;
2601 PyObject *global_name = NULL;
2602 PyObject *module_name = NULL;
2603 PyObject *module = NULL;
2604 PyObject *cls;
2605 int status = 0;
2606
2607 const char global_op = GLOBAL;
2608
2609 if (name_str == NULL) {
2610 name_str = PyUnicode_InternFromString("__name__");
2611 if (name_str == NULL)
2612 goto error;
2613 }
2614
2615 if (name) {
2616 global_name = name;
2617 Py_INCREF(global_name);
2618 }
2619 else {
2620 global_name = PyObject_GetAttr(obj, name_str);
2621 if (global_name == NULL)
2622 goto error;
2623 }
2624
2625 module_name = whichmodule(obj, global_name);
2626 if (module_name == NULL)
2627 goto error;
2628
2629 /* XXX: Change to use the import C API directly with level=0 to disallow
2630 relative imports.
2631
2632 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
2633 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
2634 custom import functions (IMHO, this would be a nice security
2635 feature). The import C API would need to be extended to support the
2636 extra parameters of __import__ to fix that. */
2637 module = PyImport_Import(module_name);
2638 if (module == NULL) {
2639 PyErr_Format(PicklingError,
2640 "Can't pickle %R: import of module %R failed",
2641 obj, module_name);
2642 goto error;
2643 }
2644 cls = PyObject_GetAttr(module, global_name);
2645 if (cls == NULL) {
2646 PyErr_Format(PicklingError,
2647 "Can't pickle %R: attribute lookup %S.%S failed",
2648 obj, module_name, global_name);
2649 goto error;
2650 }
2651 if (cls != obj) {
2652 Py_DECREF(cls);
2653 PyErr_Format(PicklingError,
2654 "Can't pickle %R: it's not the same object as %S.%S",
2655 obj, module_name, global_name);
2656 goto error;
2657 }
2658 Py_DECREF(cls);
2659
2660 if (self->proto >= 2) {
2661 /* See whether this is in the extension registry, and if
2662 * so generate an EXT opcode.
2663 */
2664 PyObject *code_obj; /* extension code as Python object */
2665 long code; /* extension code as C value */
2666 char pdata[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002667 Py_ssize_t n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002668
2669 PyTuple_SET_ITEM(two_tuple, 0, module_name);
2670 PyTuple_SET_ITEM(two_tuple, 1, global_name);
2671 code_obj = PyDict_GetItem(extension_registry, two_tuple);
2672 /* The object is not registered in the extension registry.
2673 This is the most likely code path. */
2674 if (code_obj == NULL)
2675 goto gen_global;
2676
2677 /* XXX: pickle.py doesn't check neither the type, nor the range
2678 of the value returned by the extension_registry. It should for
2679 consistency. */
2680
2681 /* Verify code_obj has the right type and value. */
2682 if (!PyLong_Check(code_obj)) {
2683 PyErr_Format(PicklingError,
2684 "Can't pickle %R: extension code %R isn't an integer",
2685 obj, code_obj);
2686 goto error;
2687 }
2688 code = PyLong_AS_LONG(code_obj);
2689 if (code <= 0 || code > 0x7fffffffL) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002690 if (!PyErr_Occurred())
2691 PyErr_Format(PicklingError,
2692 "Can't pickle %R: extension code %ld is out of range",
2693 obj, code);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002694 goto error;
2695 }
2696
2697 /* Generate an EXT opcode. */
2698 if (code <= 0xff) {
2699 pdata[0] = EXT1;
2700 pdata[1] = (unsigned char)code;
2701 n = 2;
2702 }
2703 else if (code <= 0xffff) {
2704 pdata[0] = EXT2;
2705 pdata[1] = (unsigned char)(code & 0xff);
2706 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2707 n = 3;
2708 }
2709 else {
2710 pdata[0] = EXT4;
2711 pdata[1] = (unsigned char)(code & 0xff);
2712 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2713 pdata[3] = (unsigned char)((code >> 16) & 0xff);
2714 pdata[4] = (unsigned char)((code >> 24) & 0xff);
2715 n = 5;
2716 }
2717
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002718 if (_Pickler_Write(self, pdata, n) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002719 goto error;
2720 }
2721 else {
2722 /* Generate a normal global opcode if we are using a pickle
2723 protocol <= 2, or if the object is not registered in the
2724 extension registry. */
2725 PyObject *encoded;
2726 PyObject *(*unicode_encoder)(PyObject *);
2727
2728 gen_global:
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002729 if (_Pickler_Write(self, &global_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002730 goto error;
2731
2732 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
2733 the module name and the global name using UTF-8. We do so only when
2734 we are using the pickle protocol newer than version 3. This is to
2735 ensure compatibility with older Unpickler running on Python 2.x. */
2736 if (self->proto >= 3) {
2737 unicode_encoder = PyUnicode_AsUTF8String;
2738 }
2739 else {
2740 unicode_encoder = PyUnicode_AsASCIIString;
2741 }
2742
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002743 /* For protocol < 3 and if the user didn't request against doing so,
2744 we convert module names to the old 2.x module names. */
2745 if (self->fix_imports) {
2746 PyObject *key;
2747 PyObject *item;
2748
2749 key = PyTuple_Pack(2, module_name, global_name);
2750 if (key == NULL)
2751 goto error;
2752 item = PyDict_GetItemWithError(name_mapping_3to2, key);
2753 Py_DECREF(key);
2754 if (item) {
2755 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
2756 PyErr_Format(PyExc_RuntimeError,
2757 "_compat_pickle.REVERSE_NAME_MAPPING values "
2758 "should be 2-tuples, not %.200s",
2759 Py_TYPE(item)->tp_name);
2760 goto error;
2761 }
2762 Py_CLEAR(module_name);
2763 Py_CLEAR(global_name);
2764 module_name = PyTuple_GET_ITEM(item, 0);
2765 global_name = PyTuple_GET_ITEM(item, 1);
2766 if (!PyUnicode_Check(module_name) ||
2767 !PyUnicode_Check(global_name)) {
2768 PyErr_Format(PyExc_RuntimeError,
2769 "_compat_pickle.REVERSE_NAME_MAPPING values "
2770 "should be pairs of str, not (%.200s, %.200s)",
2771 Py_TYPE(module_name)->tp_name,
2772 Py_TYPE(global_name)->tp_name);
2773 goto error;
2774 }
2775 Py_INCREF(module_name);
2776 Py_INCREF(global_name);
2777 }
2778 else if (PyErr_Occurred()) {
2779 goto error;
2780 }
2781
2782 item = PyDict_GetItemWithError(import_mapping_3to2, module_name);
2783 if (item) {
2784 if (!PyUnicode_Check(item)) {
2785 PyErr_Format(PyExc_RuntimeError,
2786 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
2787 "should be strings, not %.200s",
2788 Py_TYPE(item)->tp_name);
2789 goto error;
2790 }
2791 Py_CLEAR(module_name);
2792 module_name = item;
2793 Py_INCREF(module_name);
2794 }
2795 else if (PyErr_Occurred()) {
2796 goto error;
2797 }
2798 }
2799
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002800 /* Save the name of the module. */
2801 encoded = unicode_encoder(module_name);
2802 if (encoded == NULL) {
2803 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2804 PyErr_Format(PicklingError,
2805 "can't pickle module identifier '%S' using "
2806 "pickle protocol %i", module_name, self->proto);
2807 goto error;
2808 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002809 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002810 PyBytes_GET_SIZE(encoded)) < 0) {
2811 Py_DECREF(encoded);
2812 goto error;
2813 }
2814 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002815 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002816 goto error;
2817
2818 /* Save the name of the module. */
2819 encoded = unicode_encoder(global_name);
2820 if (encoded == NULL) {
2821 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2822 PyErr_Format(PicklingError,
2823 "can't pickle global identifier '%S' using "
2824 "pickle protocol %i", global_name, self->proto);
2825 goto error;
2826 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002827 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002828 PyBytes_GET_SIZE(encoded)) < 0) {
2829 Py_DECREF(encoded);
2830 goto error;
2831 }
2832 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002833 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002834 goto error;
2835
2836 /* Memoize the object. */
2837 if (memo_put(self, obj) < 0)
2838 goto error;
2839 }
2840
2841 if (0) {
2842 error:
2843 status = -1;
2844 }
2845 Py_XDECREF(module_name);
2846 Py_XDECREF(global_name);
2847 Py_XDECREF(module);
2848
2849 return status;
2850}
2851
2852static int
Łukasz Langaf3078fb2012-03-12 19:46:12 +01002853save_ellipsis(PicklerObject *self, PyObject *obj)
2854{
Łukasz Langadbd78252012-03-12 22:59:11 +01002855 PyObject *str = PyUnicode_FromString("Ellipsis");
Benjamin Petersone80b29b2012-03-16 18:45:31 -05002856 int res;
Łukasz Langadbd78252012-03-12 22:59:11 +01002857 if (str == NULL)
Łukasz Langacad1a072012-03-12 23:41:07 +01002858 return -1;
Benjamin Petersone80b29b2012-03-16 18:45:31 -05002859 res = save_global(self, Py_Ellipsis, str);
2860 Py_DECREF(str);
2861 return res;
Łukasz Langaf3078fb2012-03-12 19:46:12 +01002862}
2863
2864static int
2865save_notimplemented(PicklerObject *self, PyObject *obj)
2866{
Łukasz Langadbd78252012-03-12 22:59:11 +01002867 PyObject *str = PyUnicode_FromString("NotImplemented");
Benjamin Petersone80b29b2012-03-16 18:45:31 -05002868 int res;
Łukasz Langadbd78252012-03-12 22:59:11 +01002869 if (str == NULL)
Łukasz Langacad1a072012-03-12 23:41:07 +01002870 return -1;
Benjamin Petersone80b29b2012-03-16 18:45:31 -05002871 res = save_global(self, Py_NotImplemented, str);
2872 Py_DECREF(str);
2873 return res;
Łukasz Langaf3078fb2012-03-12 19:46:12 +01002874}
2875
2876static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002877save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
2878{
2879 PyObject *pid = NULL;
2880 int status = 0;
2881
2882 const char persid_op = PERSID;
2883 const char binpersid_op = BINPERSID;
2884
2885 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002886 pid = _Pickler_FastCall(self, func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002887 if (pid == NULL)
2888 return -1;
2889
2890 if (pid != Py_None) {
2891 if (self->bin) {
2892 if (save(self, pid, 1) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002893 _Pickler_Write(self, &binpersid_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002894 goto error;
2895 }
2896 else {
2897 PyObject *pid_str = NULL;
2898 char *pid_ascii_bytes;
2899 Py_ssize_t size;
2900
2901 pid_str = PyObject_Str(pid);
2902 if (pid_str == NULL)
2903 goto error;
2904
2905 /* XXX: Should it check whether the persistent id only contains
2906 ASCII characters? And what if the pid contains embedded
2907 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00002908 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002909 Py_DECREF(pid_str);
2910 if (pid_ascii_bytes == NULL)
2911 goto error;
2912
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002913 if (_Pickler_Write(self, &persid_op, 1) < 0 ||
2914 _Pickler_Write(self, pid_ascii_bytes, size) < 0 ||
2915 _Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002916 goto error;
2917 }
2918 status = 1;
2919 }
2920
2921 if (0) {
2922 error:
2923 status = -1;
2924 }
2925 Py_XDECREF(pid);
2926
2927 return status;
2928}
2929
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002930static PyObject *
2931get_class(PyObject *obj)
2932{
2933 PyObject *cls;
2934 static PyObject *str_class;
2935
2936 if (str_class == NULL) {
2937 str_class = PyUnicode_InternFromString("__class__");
2938 if (str_class == NULL)
2939 return NULL;
2940 }
2941 cls = PyObject_GetAttr(obj, str_class);
2942 if (cls == NULL) {
2943 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2944 PyErr_Clear();
2945 cls = (PyObject *) Py_TYPE(obj);
2946 Py_INCREF(cls);
2947 }
2948 }
2949 return cls;
2950}
2951
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002952/* We're saving obj, and args is the 2-thru-5 tuple returned by the
2953 * appropriate __reduce__ method for obj.
2954 */
2955static int
2956save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
2957{
2958 PyObject *callable;
2959 PyObject *argtup;
2960 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002961 PyObject *listitems = Py_None;
2962 PyObject *dictitems = Py_None;
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002963 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002964
2965 int use_newobj = self->proto >= 2;
2966
2967 const char reduce_op = REDUCE;
2968 const char build_op = BUILD;
2969 const char newobj_op = NEWOBJ;
2970
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002971 size = PyTuple_Size(args);
2972 if (size < 2 || size > 5) {
2973 PyErr_SetString(PicklingError, "tuple returned by "
2974 "__reduce__ must contain 2 through 5 elements");
2975 return -1;
2976 }
2977
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002978 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
2979 &callable, &argtup, &state, &listitems, &dictitems))
2980 return -1;
2981
2982 if (!PyCallable_Check(callable)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002983 PyErr_SetString(PicklingError, "first item of the tuple "
2984 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002985 return -1;
2986 }
2987 if (!PyTuple_Check(argtup)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002988 PyErr_SetString(PicklingError, "second item of the tuple "
2989 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002990 return -1;
2991 }
2992
2993 if (state == Py_None)
2994 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002995
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002996 if (listitems == Py_None)
2997 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002998 else if (!PyIter_Check(listitems)) {
Alexandre Vassalotti00d83f22013-04-14 01:28:01 -07002999 PyErr_Format(PicklingError, "fourth element of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003000 "returned by __reduce__ must be an iterator, not %s",
3001 Py_TYPE(listitems)->tp_name);
3002 return -1;
3003 }
3004
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003005 if (dictitems == Py_None)
3006 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003007 else if (!PyIter_Check(dictitems)) {
Alexandre Vassalotti00d83f22013-04-14 01:28:01 -07003008 PyErr_Format(PicklingError, "fifth element of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003009 "returned by __reduce__ must be an iterator, not %s",
3010 Py_TYPE(dictitems)->tp_name);
3011 return -1;
3012 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003013
3014 /* Protocol 2 special case: if callable's name is __newobj__, use
3015 NEWOBJ. */
3016 if (use_newobj) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003017 static PyObject *newobj_str = NULL, *name_str = NULL;
3018 PyObject *name;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003019
3020 if (newobj_str == NULL) {
3021 newobj_str = PyUnicode_InternFromString("__newobj__");
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003022 name_str = PyUnicode_InternFromString("__name__");
3023 if (newobj_str == NULL || name_str == NULL)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003024 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003025 }
3026
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003027 name = PyObject_GetAttr(callable, name_str);
3028 if (name == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003029 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3030 PyErr_Clear();
3031 else
3032 return -1;
3033 use_newobj = 0;
3034 }
3035 else {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003036 use_newobj = PyUnicode_Check(name) &&
3037 PyUnicode_Compare(name, newobj_str) == 0;
3038 Py_DECREF(name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003039 }
3040 }
3041 if (use_newobj) {
3042 PyObject *cls;
3043 PyObject *newargtup;
3044 PyObject *obj_class;
3045 int p;
3046
3047 /* Sanity checks. */
3048 if (Py_SIZE(argtup) < 1) {
3049 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
3050 return -1;
3051 }
3052
3053 cls = PyTuple_GET_ITEM(argtup, 0);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003054 if (!PyType_Check(cls)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003055 PyErr_SetString(PicklingError, "args[0] from "
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003056 "__newobj__ args is not a type");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003057 return -1;
3058 }
3059
3060 if (obj != NULL) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003061 obj_class = get_class(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003062 p = obj_class != cls; /* true iff a problem */
3063 Py_DECREF(obj_class);
3064 if (p) {
3065 PyErr_SetString(PicklingError, "args[0] from "
3066 "__newobj__ args has the wrong class");
3067 return -1;
3068 }
3069 }
3070 /* XXX: These calls save() are prone to infinite recursion. Imagine
3071 what happen if the value returned by the __reduce__() method of
3072 some extension type contains another object of the same type. Ouch!
3073
3074 Here is a quick example, that I ran into, to illustrate what I
3075 mean:
3076
3077 >>> import pickle, copyreg
3078 >>> copyreg.dispatch_table.pop(complex)
3079 >>> pickle.dumps(1+2j)
3080 Traceback (most recent call last):
3081 ...
3082 RuntimeError: maximum recursion depth exceeded
3083
3084 Removing the complex class from copyreg.dispatch_table made the
3085 __reduce_ex__() method emit another complex object:
3086
3087 >>> (1+1j).__reduce_ex__(2)
3088 (<function __newobj__ at 0xb7b71c3c>,
3089 (<class 'complex'>, (1+1j)), None, None, None)
3090
3091 Thus when save() was called on newargstup (the 2nd item) recursion
3092 ensued. Of course, the bug was in the complex class which had a
3093 broken __getnewargs__() that emitted another complex object. But,
3094 the point, here, is it is quite easy to end up with a broken reduce
3095 function. */
3096
3097 /* Save the class and its __new__ arguments. */
3098 if (save(self, cls, 0) < 0)
3099 return -1;
3100
3101 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
3102 if (newargtup == NULL)
3103 return -1;
3104
3105 p = save(self, newargtup, 0);
3106 Py_DECREF(newargtup);
3107 if (p < 0)
3108 return -1;
3109
3110 /* Add NEWOBJ opcode. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003111 if (_Pickler_Write(self, &newobj_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003112 return -1;
3113 }
3114 else { /* Not using NEWOBJ. */
3115 if (save(self, callable, 0) < 0 ||
3116 save(self, argtup, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003117 _Pickler_Write(self, &reduce_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003118 return -1;
3119 }
3120
3121 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
3122 the caller do not want to memoize the object. Not particularly useful,
3123 but that is to mimic the behavior save_reduce() in pickle.py when
3124 obj is None. */
3125 if (obj && memo_put(self, obj) < 0)
3126 return -1;
3127
3128 if (listitems && batch_list(self, listitems) < 0)
3129 return -1;
3130
3131 if (dictitems && batch_dict(self, dictitems) < 0)
3132 return -1;
3133
3134 if (state) {
Victor Stinner121aab42011-09-29 23:40:53 +02003135 if (save(self, state, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003136 _Pickler_Write(self, &build_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003137 return -1;
3138 }
3139
3140 return 0;
3141}
3142
3143static int
3144save(PicklerObject *self, PyObject *obj, int pers_save)
3145{
3146 PyTypeObject *type;
3147 PyObject *reduce_func = NULL;
3148 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003149 int status = 0;
3150
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003151 if (Py_EnterRecursiveCall(" while pickling an object"))
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003152 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003153
3154 /* The extra pers_save argument is necessary to avoid calling save_pers()
3155 on its returned object. */
3156 if (!pers_save && self->pers_func) {
3157 /* save_pers() returns:
3158 -1 to signal an error;
3159 0 if it did nothing successfully;
3160 1 if a persistent id was saved.
3161 */
3162 if ((status = save_pers(self, obj, self->pers_func)) != 0)
3163 goto done;
3164 }
3165
3166 type = Py_TYPE(obj);
3167
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003168 /* The old cPickle had an optimization that used switch-case statement
3169 dispatching on the first letter of the type name. This has was removed
3170 since benchmarks shown that this optimization was actually slowing
3171 things down. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003172
3173 /* Atom types; these aren't memoized, so don't check the memo. */
3174
3175 if (obj == Py_None) {
3176 status = save_none(self, obj);
3177 goto done;
3178 }
Łukasz Langaf3078fb2012-03-12 19:46:12 +01003179 else if (obj == Py_Ellipsis) {
3180 status = save_ellipsis(self, obj);
3181 goto done;
3182 }
3183 else if (obj == Py_NotImplemented) {
3184 status = save_notimplemented(self, obj);
3185 goto done;
3186 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003187 else if (obj == Py_False || obj == Py_True) {
3188 status = save_bool(self, obj);
3189 goto done;
3190 }
3191 else if (type == &PyLong_Type) {
3192 status = save_long(self, obj);
3193 goto done;
3194 }
3195 else if (type == &PyFloat_Type) {
3196 status = save_float(self, obj);
3197 goto done;
3198 }
3199
3200 /* Check the memo to see if it has the object. If so, generate
3201 a GET (or BINGET) opcode, instead of pickling the object
3202 once again. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003203 if (PyMemoTable_Get(self->memo, obj)) {
3204 if (memo_get(self, obj) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003205 goto error;
3206 goto done;
3207 }
3208
3209 if (type == &PyBytes_Type) {
3210 status = save_bytes(self, obj);
3211 goto done;
3212 }
3213 else if (type == &PyUnicode_Type) {
3214 status = save_unicode(self, obj);
3215 goto done;
3216 }
3217 else if (type == &PyDict_Type) {
3218 status = save_dict(self, obj);
3219 goto done;
3220 }
3221 else if (type == &PyList_Type) {
3222 status = save_list(self, obj);
3223 goto done;
3224 }
3225 else if (type == &PyTuple_Type) {
3226 status = save_tuple(self, obj);
3227 goto done;
3228 }
3229 else if (type == &PyType_Type) {
3230 status = save_global(self, obj, NULL);
3231 goto done;
3232 }
3233 else if (type == &PyFunction_Type) {
3234 status = save_global(self, obj, NULL);
3235 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
3236 /* fall back to reduce */
3237 PyErr_Clear();
3238 }
3239 else {
3240 goto done;
3241 }
3242 }
3243 else if (type == &PyCFunction_Type) {
3244 status = save_global(self, obj, NULL);
3245 goto done;
3246 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003247
3248 /* XXX: This part needs some unit tests. */
3249
3250 /* Get a reduction callable, and call it. This may come from
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003251 * self.dispatch_table, copyreg.dispatch_table, the object's
3252 * __reduce_ex__ method, or the object's __reduce__ method.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003253 */
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003254 if (self->dispatch_table == NULL) {
3255 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
3256 /* PyDict_GetItem() unlike PyObject_GetItem() and
3257 PyObject_GetAttr() returns a borrowed ref */
3258 Py_XINCREF(reduce_func);
3259 } else {
3260 reduce_func = PyObject_GetItem(self->dispatch_table, (PyObject *)type);
3261 if (reduce_func == NULL) {
3262 if (PyErr_ExceptionMatches(PyExc_KeyError))
3263 PyErr_Clear();
3264 else
3265 goto error;
3266 }
3267 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003268 if (reduce_func != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003269 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003270 reduce_value = _Pickler_FastCall(self, reduce_func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003271 }
Antoine Pitrouffd41d92011-10-04 09:23:04 +02003272 else if (PyType_IsSubtype(type, &PyType_Type)) {
3273 status = save_global(self, obj, NULL);
3274 goto done;
3275 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003276 else {
3277 static PyObject *reduce_str = NULL;
3278 static PyObject *reduce_ex_str = NULL;
3279
3280 /* Cache the name of the reduce methods. */
3281 if (reduce_str == NULL) {
3282 reduce_str = PyUnicode_InternFromString("__reduce__");
3283 if (reduce_str == NULL)
3284 goto error;
3285 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
3286 if (reduce_ex_str == NULL)
3287 goto error;
3288 }
3289
3290 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
3291 automatically defined as __reduce__. While this is convenient, this
3292 make it impossible to know which method was actually called. Of
3293 course, this is not a big deal. But still, it would be nice to let
3294 the user know which method was called when something go
3295 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
3296 don't actually have to check for a __reduce__ method. */
3297
3298 /* Check for a __reduce_ex__ method. */
3299 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
3300 if (reduce_func != NULL) {
3301 PyObject *proto;
3302 proto = PyLong_FromLong(self->proto);
3303 if (proto != NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003304 reduce_value = _Pickler_FastCall(self, reduce_func, proto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003305 }
3306 }
3307 else {
3308 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3309 PyErr_Clear();
3310 else
3311 goto error;
3312 /* Check for a __reduce__ method. */
3313 reduce_func = PyObject_GetAttr(obj, reduce_str);
3314 if (reduce_func != NULL) {
3315 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
3316 }
3317 else {
3318 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
3319 type->tp_name, obj);
3320 goto error;
3321 }
3322 }
3323 }
3324
3325 if (reduce_value == NULL)
3326 goto error;
3327
3328 if (PyUnicode_Check(reduce_value)) {
3329 status = save_global(self, obj, reduce_value);
3330 goto done;
3331 }
3332
3333 if (!PyTuple_Check(reduce_value)) {
3334 PyErr_SetString(PicklingError,
3335 "__reduce__ must return a string or tuple");
3336 goto error;
3337 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003338
3339 status = save_reduce(self, reduce_value, obj);
3340
3341 if (0) {
3342 error:
3343 status = -1;
3344 }
3345 done:
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003346 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003347 Py_XDECREF(reduce_func);
3348 Py_XDECREF(reduce_value);
3349
3350 return status;
3351}
3352
3353static int
3354dump(PicklerObject *self, PyObject *obj)
3355{
3356 const char stop_op = STOP;
3357
3358 if (self->proto >= 2) {
3359 char header[2];
3360
3361 header[0] = PROTO;
3362 assert(self->proto >= 0 && self->proto < 256);
3363 header[1] = (unsigned char)self->proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003364 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003365 return -1;
3366 }
3367
3368 if (save(self, obj, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003369 _Pickler_Write(self, &stop_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003370 return -1;
3371
3372 return 0;
3373}
3374
3375PyDoc_STRVAR(Pickler_clear_memo_doc,
3376"clear_memo() -> None. Clears the pickler's \"memo\"."
3377"\n"
3378"The memo is the data structure that remembers which objects the\n"
3379"pickler has already seen, so that shared or recursive objects are\n"
3380"pickled by reference and not by value. This method is useful when\n"
3381"re-using picklers.");
3382
3383static PyObject *
3384Pickler_clear_memo(PicklerObject *self)
3385{
3386 if (self->memo)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003387 PyMemoTable_Clear(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003388
3389 Py_RETURN_NONE;
3390}
3391
3392PyDoc_STRVAR(Pickler_dump_doc,
3393"dump(obj) -> None. Write a pickled representation of obj to the open file.");
3394
3395static PyObject *
3396Pickler_dump(PicklerObject *self, PyObject *args)
3397{
3398 PyObject *obj;
3399
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00003400 /* Check whether the Pickler was initialized correctly (issue3664).
3401 Developers often forget to call __init__() in their subclasses, which
3402 would trigger a segfault without this check. */
3403 if (self->write == NULL) {
Victor Stinner121aab42011-09-29 23:40:53 +02003404 PyErr_Format(PicklingError,
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00003405 "Pickler.__init__() was not called by %s.__init__()",
3406 Py_TYPE(self)->tp_name);
3407 return NULL;
3408 }
3409
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003410 if (!PyArg_ParseTuple(args, "O:dump", &obj))
3411 return NULL;
3412
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003413 if (_Pickler_ClearBuffer(self) < 0)
3414 return NULL;
3415
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003416 if (dump(self, obj) < 0)
3417 return NULL;
3418
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003419 if (_Pickler_FlushToFile(self) < 0)
3420 return NULL;
3421
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003422 Py_RETURN_NONE;
3423}
3424
3425static struct PyMethodDef Pickler_methods[] = {
3426 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
3427 Pickler_dump_doc},
3428 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
3429 Pickler_clear_memo_doc},
3430 {NULL, NULL} /* sentinel */
3431};
3432
3433static void
3434Pickler_dealloc(PicklerObject *self)
3435{
3436 PyObject_GC_UnTrack(self);
3437
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003438 Py_XDECREF(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003439 Py_XDECREF(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003440 Py_XDECREF(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003441 Py_XDECREF(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003442 Py_XDECREF(self->arg);
3443 Py_XDECREF(self->fast_memo);
3444
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003445 PyMemoTable_Del(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003446
3447 Py_TYPE(self)->tp_free((PyObject *)self);
3448}
3449
3450static int
3451Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
3452{
3453 Py_VISIT(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003454 Py_VISIT(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003455 Py_VISIT(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003456 Py_VISIT(self->arg);
3457 Py_VISIT(self->fast_memo);
3458 return 0;
3459}
3460
3461static int
3462Pickler_clear(PicklerObject *self)
3463{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003464 Py_CLEAR(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003465 Py_CLEAR(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003466 Py_CLEAR(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003467 Py_CLEAR(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003468 Py_CLEAR(self->arg);
3469 Py_CLEAR(self->fast_memo);
3470
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003471 if (self->memo != NULL) {
3472 PyMemoTable *memo = self->memo;
3473 self->memo = NULL;
3474 PyMemoTable_Del(memo);
3475 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003476 return 0;
3477}
3478
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003479
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003480PyDoc_STRVAR(Pickler_doc,
3481"Pickler(file, protocol=None)"
3482"\n"
3483"This takes a binary file for writing a pickle data stream.\n"
3484"\n"
3485"The optional protocol argument tells the pickler to use the\n"
3486"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
3487"protocol is 3; a backward-incompatible protocol designed for\n"
3488"Python 3.0.\n"
3489"\n"
3490"Specifying a negative protocol version selects the highest\n"
3491"protocol version supported. The higher the protocol used, the\n"
3492"more recent the version of Python needed to read the pickle\n"
3493"produced.\n"
3494"\n"
3495"The file argument must have a write() method that accepts a single\n"
3496"bytes argument. It can thus be a file object opened for binary\n"
3497"writing, a io.BytesIO instance, or any other custom object that\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003498"meets this interface.\n"
3499"\n"
3500"If fix_imports is True and protocol is less than 3, pickle will try to\n"
3501"map the new Python 3.x names to the old module names used in Python\n"
3502"2.x, so that the pickle data stream is readable with Python 2.x.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003503
3504static int
3505Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
3506{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003507 static char *kwlist[] = {"file", "protocol", "fix_imports", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003508 PyObject *file;
3509 PyObject *proto_obj = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003510 PyObject *fix_imports = Py_True;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02003511 _Py_IDENTIFIER(persistent_id);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003512 _Py_IDENTIFIER(dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003513
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003514 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:Pickler",
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003515 kwlist, &file, &proto_obj, &fix_imports))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003516 return -1;
3517
3518 /* In case of multiple __init__() calls, clear previous content. */
3519 if (self->write != NULL)
3520 (void)Pickler_clear(self);
3521
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003522 if (_Pickler_SetProtocol(self, proto_obj, fix_imports) < 0)
3523 return -1;
3524
3525 if (_Pickler_SetOutputStream(self, file) < 0)
3526 return -1;
3527
3528 /* memo and output_buffer may have already been created in _Pickler_New */
3529 if (self->memo == NULL) {
3530 self->memo = PyMemoTable_New();
3531 if (self->memo == NULL)
3532 return -1;
3533 }
3534 self->output_len = 0;
3535 if (self->output_buffer == NULL) {
3536 self->max_output_len = WRITE_BUF_SIZE;
3537 self->output_buffer = PyBytes_FromStringAndSize(NULL,
3538 self->max_output_len);
3539 if (self->output_buffer == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003540 return -1;
3541 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003542
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003543 self->arg = NULL;
3544 self->fast = 0;
3545 self->fast_nesting = 0;
3546 self->fast_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003547 self->pers_func = NULL;
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02003548 if (_PyObject_HasAttrId((PyObject *)self, &PyId_persistent_id)) {
3549 self->pers_func = _PyObject_GetAttrId((PyObject *)self,
3550 &PyId_persistent_id);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003551 if (self->pers_func == NULL)
3552 return -1;
3553 }
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003554 self->dispatch_table = NULL;
3555 if (_PyObject_HasAttrId((PyObject *)self, &PyId_dispatch_table)) {
3556 self->dispatch_table = _PyObject_GetAttrId((PyObject *)self,
3557 &PyId_dispatch_table);
3558 if (self->dispatch_table == NULL)
3559 return -1;
3560 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003561 return 0;
3562}
3563
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003564/* Define a proxy object for the Pickler's internal memo object. This is to
3565 * avoid breaking code like:
3566 * pickler.memo.clear()
3567 * and
3568 * pickler.memo = saved_memo
3569 * Is this a good idea? Not really, but we don't want to break code that uses
3570 * it. Note that we don't implement the entire mapping API here. This is
3571 * intentional, as these should be treated as black-box implementation details.
3572 */
3573
3574typedef struct {
3575 PyObject_HEAD
3576 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
3577} PicklerMemoProxyObject;
3578
3579PyDoc_STRVAR(pmp_clear_doc,
3580"memo.clear() -> None. Remove all items from memo.");
3581
3582static PyObject *
3583pmp_clear(PicklerMemoProxyObject *self)
3584{
3585 if (self->pickler->memo)
3586 PyMemoTable_Clear(self->pickler->memo);
3587 Py_RETURN_NONE;
3588}
3589
3590PyDoc_STRVAR(pmp_copy_doc,
3591"memo.copy() -> new_memo. Copy the memo to a new object.");
3592
3593static PyObject *
3594pmp_copy(PicklerMemoProxyObject *self)
3595{
3596 Py_ssize_t i;
3597 PyMemoTable *memo;
3598 PyObject *new_memo = PyDict_New();
3599 if (new_memo == NULL)
3600 return NULL;
3601
3602 memo = self->pickler->memo;
3603 for (i = 0; i < memo->mt_allocated; ++i) {
3604 PyMemoEntry entry = memo->mt_table[i];
3605 if (entry.me_key != NULL) {
3606 int status;
3607 PyObject *key, *value;
3608
3609 key = PyLong_FromVoidPtr(entry.me_key);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003610 value = Py_BuildValue("nO", entry.me_value, entry.me_key);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003611
3612 if (key == NULL || value == NULL) {
3613 Py_XDECREF(key);
3614 Py_XDECREF(value);
3615 goto error;
3616 }
3617 status = PyDict_SetItem(new_memo, key, value);
3618 Py_DECREF(key);
3619 Py_DECREF(value);
3620 if (status < 0)
3621 goto error;
3622 }
3623 }
3624 return new_memo;
3625
3626 error:
3627 Py_XDECREF(new_memo);
3628 return NULL;
3629}
3630
3631PyDoc_STRVAR(pmp_reduce_doc,
3632"memo.__reduce__(). Pickling support.");
3633
3634static PyObject *
3635pmp_reduce(PicklerMemoProxyObject *self, PyObject *args)
3636{
3637 PyObject *reduce_value, *dict_args;
3638 PyObject *contents = pmp_copy(self);
3639 if (contents == NULL)
3640 return NULL;
3641
3642 reduce_value = PyTuple_New(2);
3643 if (reduce_value == NULL) {
3644 Py_DECREF(contents);
3645 return NULL;
3646 }
3647 dict_args = PyTuple_New(1);
3648 if (dict_args == NULL) {
3649 Py_DECREF(contents);
3650 Py_DECREF(reduce_value);
3651 return NULL;
3652 }
3653 PyTuple_SET_ITEM(dict_args, 0, contents);
3654 Py_INCREF((PyObject *)&PyDict_Type);
3655 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
3656 PyTuple_SET_ITEM(reduce_value, 1, dict_args);
3657 return reduce_value;
3658}
3659
3660static PyMethodDef picklerproxy_methods[] = {
3661 {"clear", (PyCFunction)pmp_clear, METH_NOARGS, pmp_clear_doc},
3662 {"copy", (PyCFunction)pmp_copy, METH_NOARGS, pmp_copy_doc},
3663 {"__reduce__", (PyCFunction)pmp_reduce, METH_VARARGS, pmp_reduce_doc},
3664 {NULL, NULL} /* sentinel */
3665};
3666
3667static void
3668PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
3669{
3670 PyObject_GC_UnTrack(self);
3671 Py_XDECREF(self->pickler);
3672 PyObject_GC_Del((PyObject *)self);
3673}
3674
3675static int
3676PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
3677 visitproc visit, void *arg)
3678{
3679 Py_VISIT(self->pickler);
3680 return 0;
3681}
3682
3683static int
3684PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
3685{
3686 Py_CLEAR(self->pickler);
3687 return 0;
3688}
3689
3690static PyTypeObject PicklerMemoProxyType = {
3691 PyVarObject_HEAD_INIT(NULL, 0)
3692 "_pickle.PicklerMemoProxy", /*tp_name*/
3693 sizeof(PicklerMemoProxyObject), /*tp_basicsize*/
3694 0,
3695 (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */
3696 0, /* tp_print */
3697 0, /* tp_getattr */
3698 0, /* tp_setattr */
3699 0, /* tp_compare */
3700 0, /* tp_repr */
3701 0, /* tp_as_number */
3702 0, /* tp_as_sequence */
3703 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00003704 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003705 0, /* tp_call */
3706 0, /* tp_str */
3707 PyObject_GenericGetAttr, /* tp_getattro */
3708 PyObject_GenericSetAttr, /* tp_setattro */
3709 0, /* tp_as_buffer */
3710 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3711 0, /* tp_doc */
3712 (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */
3713 (inquiry)PicklerMemoProxy_clear, /* tp_clear */
3714 0, /* tp_richcompare */
3715 0, /* tp_weaklistoffset */
3716 0, /* tp_iter */
3717 0, /* tp_iternext */
3718 picklerproxy_methods, /* tp_methods */
3719};
3720
3721static PyObject *
3722PicklerMemoProxy_New(PicklerObject *pickler)
3723{
3724 PicklerMemoProxyObject *self;
3725
3726 self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
3727 if (self == NULL)
3728 return NULL;
3729 Py_INCREF(pickler);
3730 self->pickler = pickler;
3731 PyObject_GC_Track(self);
3732 return (PyObject *)self;
3733}
3734
3735/*****************************************************************************/
3736
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003737static PyObject *
3738Pickler_get_memo(PicklerObject *self)
3739{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003740 return PicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003741}
3742
3743static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003744Pickler_set_memo(PicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003745{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003746 PyMemoTable *new_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003747
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003748 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003749 PyErr_SetString(PyExc_TypeError,
3750 "attribute deletion is not supported");
3751 return -1;
3752 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003753
3754 if (Py_TYPE(obj) == &PicklerMemoProxyType) {
3755 PicklerObject *pickler =
3756 ((PicklerMemoProxyObject *)obj)->pickler;
3757
3758 new_memo = PyMemoTable_Copy(pickler->memo);
3759 if (new_memo == NULL)
3760 return -1;
3761 }
3762 else if (PyDict_Check(obj)) {
3763 Py_ssize_t i = 0;
3764 PyObject *key, *value;
3765
3766 new_memo = PyMemoTable_New();
3767 if (new_memo == NULL)
3768 return -1;
3769
3770 while (PyDict_Next(obj, &i, &key, &value)) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003771 Py_ssize_t memo_id;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003772 PyObject *memo_obj;
3773
3774 if (!PyTuple_Check(value) || Py_SIZE(value) != 2) {
3775 PyErr_SetString(PyExc_TypeError,
3776 "'memo' values must be 2-item tuples");
3777 goto error;
3778 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003779 memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003780 if (memo_id == -1 && PyErr_Occurred())
3781 goto error;
3782 memo_obj = PyTuple_GET_ITEM(value, 1);
3783 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
3784 goto error;
3785 }
3786 }
3787 else {
3788 PyErr_Format(PyExc_TypeError,
3789 "'memo' attribute must be an PicklerMemoProxy object"
3790 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003791 return -1;
3792 }
3793
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003794 PyMemoTable_Del(self->memo);
3795 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003796
3797 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003798
3799 error:
3800 if (new_memo)
3801 PyMemoTable_Del(new_memo);
3802 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003803}
3804
3805static PyObject *
3806Pickler_get_persid(PicklerObject *self)
3807{
3808 if (self->pers_func == NULL)
3809 PyErr_SetString(PyExc_AttributeError, "persistent_id");
3810 else
3811 Py_INCREF(self->pers_func);
3812 return self->pers_func;
3813}
3814
3815static int
3816Pickler_set_persid(PicklerObject *self, PyObject *value)
3817{
3818 PyObject *tmp;
3819
3820 if (value == NULL) {
3821 PyErr_SetString(PyExc_TypeError,
3822 "attribute deletion is not supported");
3823 return -1;
3824 }
3825 if (!PyCallable_Check(value)) {
3826 PyErr_SetString(PyExc_TypeError,
3827 "persistent_id must be a callable taking one argument");
3828 return -1;
3829 }
3830
3831 tmp = self->pers_func;
3832 Py_INCREF(value);
3833 self->pers_func = value;
3834 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
3835
3836 return 0;
3837}
3838
3839static PyMemberDef Pickler_members[] = {
3840 {"bin", T_INT, offsetof(PicklerObject, bin)},
3841 {"fast", T_INT, offsetof(PicklerObject, fast)},
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003842 {"dispatch_table", T_OBJECT_EX, offsetof(PicklerObject, dispatch_table)},
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003843 {NULL}
3844};
3845
3846static PyGetSetDef Pickler_getsets[] = {
3847 {"memo", (getter)Pickler_get_memo,
3848 (setter)Pickler_set_memo},
3849 {"persistent_id", (getter)Pickler_get_persid,
3850 (setter)Pickler_set_persid},
3851 {NULL}
3852};
3853
3854static PyTypeObject Pickler_Type = {
3855 PyVarObject_HEAD_INIT(NULL, 0)
3856 "_pickle.Pickler" , /*tp_name*/
3857 sizeof(PicklerObject), /*tp_basicsize*/
3858 0, /*tp_itemsize*/
3859 (destructor)Pickler_dealloc, /*tp_dealloc*/
3860 0, /*tp_print*/
3861 0, /*tp_getattr*/
3862 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00003863 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003864 0, /*tp_repr*/
3865 0, /*tp_as_number*/
3866 0, /*tp_as_sequence*/
3867 0, /*tp_as_mapping*/
3868 0, /*tp_hash*/
3869 0, /*tp_call*/
3870 0, /*tp_str*/
3871 0, /*tp_getattro*/
3872 0, /*tp_setattro*/
3873 0, /*tp_as_buffer*/
3874 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3875 Pickler_doc, /*tp_doc*/
3876 (traverseproc)Pickler_traverse, /*tp_traverse*/
3877 (inquiry)Pickler_clear, /*tp_clear*/
3878 0, /*tp_richcompare*/
3879 0, /*tp_weaklistoffset*/
3880 0, /*tp_iter*/
3881 0, /*tp_iternext*/
3882 Pickler_methods, /*tp_methods*/
3883 Pickler_members, /*tp_members*/
3884 Pickler_getsets, /*tp_getset*/
3885 0, /*tp_base*/
3886 0, /*tp_dict*/
3887 0, /*tp_descr_get*/
3888 0, /*tp_descr_set*/
3889 0, /*tp_dictoffset*/
3890 (initproc)Pickler_init, /*tp_init*/
3891 PyType_GenericAlloc, /*tp_alloc*/
3892 PyType_GenericNew, /*tp_new*/
3893 PyObject_GC_Del, /*tp_free*/
3894 0, /*tp_is_gc*/
3895};
3896
Victor Stinner121aab42011-09-29 23:40:53 +02003897/* Temporary helper for calling self.find_class().
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003898
3899 XXX: It would be nice to able to avoid Python function call overhead, by
3900 using directly the C version of find_class(), when find_class() is not
3901 overridden by a subclass. Although, this could become rather hackish. A
3902 simpler optimization would be to call the C function when self is not a
3903 subclass instance. */
3904static PyObject *
3905find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
3906{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02003907 _Py_IDENTIFIER(find_class);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02003908
3909 return _PyObject_CallMethodId((PyObject *)self, &PyId_find_class, "OO",
3910 module_name, global_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003911}
3912
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003913static Py_ssize_t
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003914marker(UnpicklerObject *self)
3915{
3916 if (self->num_marks < 1) {
3917 PyErr_SetString(UnpicklingError, "could not find MARK");
3918 return -1;
3919 }
3920
3921 return self->marks[--self->num_marks];
3922}
3923
3924static int
3925load_none(UnpicklerObject *self)
3926{
3927 PDATA_APPEND(self->stack, Py_None, -1);
3928 return 0;
3929}
3930
3931static int
3932bad_readline(void)
3933{
3934 PyErr_SetString(UnpicklingError, "pickle data was truncated");
3935 return -1;
3936}
3937
3938static int
3939load_int(UnpicklerObject *self)
3940{
3941 PyObject *value;
3942 char *endptr, *s;
3943 Py_ssize_t len;
3944 long x;
3945
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003946 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003947 return -1;
3948 if (len < 2)
3949 return bad_readline();
3950
3951 errno = 0;
Victor Stinner121aab42011-09-29 23:40:53 +02003952 /* XXX: Should the base argument of strtol() be explicitly set to 10?
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003953 XXX(avassalotti): Should this uses PyOS_strtol()? */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003954 x = strtol(s, &endptr, 0);
3955
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003956 if (errno || (*endptr != '\n' && *endptr != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003957 /* Hm, maybe we've got something long. Let's try reading
Serhiy Storchaka95949422013-08-27 19:40:23 +03003958 * it as a Python int object. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003959 errno = 0;
3960 /* XXX: Same thing about the base here. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003961 value = PyLong_FromString(s, NULL, 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003962 if (value == NULL) {
3963 PyErr_SetString(PyExc_ValueError,
3964 "could not convert string to int");
3965 return -1;
3966 }
3967 }
3968 else {
3969 if (len == 3 && (x == 0 || x == 1)) {
3970 if ((value = PyBool_FromLong(x)) == NULL)
3971 return -1;
3972 }
3973 else {
3974 if ((value = PyLong_FromLong(x)) == NULL)
3975 return -1;
3976 }
3977 }
3978
3979 PDATA_PUSH(self->stack, value, -1);
3980 return 0;
3981}
3982
3983static int
3984load_bool(UnpicklerObject *self, PyObject *boolean)
3985{
3986 assert(boolean == Py_True || boolean == Py_False);
3987 PDATA_APPEND(self->stack, boolean, -1);
3988 return 0;
3989}
3990
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003991/* s contains x bytes of an unsigned little-endian integer. Return its value
3992 * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
3993 */
3994static Py_ssize_t
3995calc_binsize(char *bytes, int size)
3996{
3997 unsigned char *s = (unsigned char *)bytes;
3998 size_t x = 0;
3999
4000 assert(size == 4);
4001
4002 x = (size_t) s[0];
4003 x |= (size_t) s[1] << 8;
4004 x |= (size_t) s[2] << 16;
4005 x |= (size_t) s[3] << 24;
4006
4007 if (x > PY_SSIZE_T_MAX)
4008 return -1;
4009 else
4010 return (Py_ssize_t) x;
4011}
4012
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004013/* s contains x bytes of a little-endian integer. Return its value as a
4014 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
4015 * int, but when x is 4 it's a signed one. This is an historical source
4016 * of x-platform bugs.
4017 */
4018static long
4019calc_binint(char *bytes, int size)
4020{
4021 unsigned char *s = (unsigned char *)bytes;
4022 int i = size;
4023 long x = 0;
4024
4025 for (i = 0; i < size; i++) {
4026 x |= (long)s[i] << (i * 8);
4027 }
4028
4029 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
4030 * is signed, so on a box with longs bigger than 4 bytes we need
4031 * to extend a BININT's sign bit to the full width.
4032 */
4033 if (SIZEOF_LONG > 4 && size == 4) {
4034 x |= -(x & (1L << 31));
4035 }
4036
4037 return x;
4038}
4039
4040static int
4041load_binintx(UnpicklerObject *self, char *s, int size)
4042{
4043 PyObject *value;
4044 long x;
4045
4046 x = calc_binint(s, size);
4047
4048 if ((value = PyLong_FromLong(x)) == NULL)
4049 return -1;
4050
4051 PDATA_PUSH(self->stack, value, -1);
4052 return 0;
4053}
4054
4055static int
4056load_binint(UnpicklerObject *self)
4057{
4058 char *s;
4059
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004060 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004061 return -1;
4062
4063 return load_binintx(self, s, 4);
4064}
4065
4066static int
4067load_binint1(UnpicklerObject *self)
4068{
4069 char *s;
4070
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004071 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004072 return -1;
4073
4074 return load_binintx(self, s, 1);
4075}
4076
4077static int
4078load_binint2(UnpicklerObject *self)
4079{
4080 char *s;
4081
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004082 if (_Unpickler_Read(self, &s, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004083 return -1;
4084
4085 return load_binintx(self, s, 2);
4086}
4087
4088static int
4089load_long(UnpicklerObject *self)
4090{
4091 PyObject *value;
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00004092 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004093 Py_ssize_t len;
4094
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004095 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004096 return -1;
4097 if (len < 2)
4098 return bad_readline();
4099
Mark Dickinson8dd05142009-01-20 20:43:58 +00004100 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
4101 the 'L' before calling PyLong_FromString. In order to maintain
4102 compatibility with Python 3.0.0, we don't actually *require*
4103 the 'L' to be present. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004104 if (s[len-2] == 'L')
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00004105 s[len-2] = '\0';
Alexandre Vassalottie4bccb72009-01-24 01:47:57 +00004106 /* XXX: Should the base argument explicitly set to 10? */
4107 value = PyLong_FromString(s, NULL, 0);
Mark Dickinson8dd05142009-01-20 20:43:58 +00004108 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004109 return -1;
4110
4111 PDATA_PUSH(self->stack, value, -1);
4112 return 0;
4113}
4114
4115/* 'size' bytes contain the # of bytes of little-endian 256's-complement
4116 * data following.
4117 */
4118static int
4119load_counted_long(UnpicklerObject *self, int size)
4120{
4121 PyObject *value;
4122 char *nbytes;
4123 char *pdata;
4124
4125 assert(size == 1 || size == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004126 if (_Unpickler_Read(self, &nbytes, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004127 return -1;
4128
4129 size = calc_binint(nbytes, size);
4130 if (size < 0) {
4131 /* Corrupt or hostile pickle -- we never write one like this */
4132 PyErr_SetString(UnpicklingError,
4133 "LONG pickle has negative byte count");
4134 return -1;
4135 }
4136
4137 if (size == 0)
4138 value = PyLong_FromLong(0L);
4139 else {
4140 /* Read the raw little-endian bytes and convert. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004141 if (_Unpickler_Read(self, &pdata, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004142 return -1;
4143 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
4144 1 /* little endian */ , 1 /* signed */ );
4145 }
4146 if (value == NULL)
4147 return -1;
4148 PDATA_PUSH(self->stack, value, -1);
4149 return 0;
4150}
4151
4152static int
4153load_float(UnpicklerObject *self)
4154{
4155 PyObject *value;
4156 char *endptr, *s;
4157 Py_ssize_t len;
4158 double d;
4159
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004160 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004161 return -1;
4162 if (len < 2)
4163 return bad_readline();
4164
4165 errno = 0;
Mark Dickinson725bfd82009-05-03 20:33:40 +00004166 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
4167 if (d == -1.0 && PyErr_Occurred())
4168 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004169 if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004170 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
4171 return -1;
4172 }
Mark Dickinson725bfd82009-05-03 20:33:40 +00004173 value = PyFloat_FromDouble(d);
4174 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004175 return -1;
4176
4177 PDATA_PUSH(self->stack, value, -1);
4178 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004179}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004180
4181static int
4182load_binfloat(UnpicklerObject *self)
4183{
4184 PyObject *value;
4185 double x;
4186 char *s;
4187
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004188 if (_Unpickler_Read(self, &s, 8) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004189 return -1;
4190
4191 x = _PyFloat_Unpack8((unsigned char *)s, 0);
4192 if (x == -1.0 && PyErr_Occurred())
4193 return -1;
4194
4195 if ((value = PyFloat_FromDouble(x)) == NULL)
4196 return -1;
4197
4198 PDATA_PUSH(self->stack, value, -1);
4199 return 0;
4200}
4201
4202static int
4203load_string(UnpicklerObject *self)
4204{
4205 PyObject *bytes;
4206 PyObject *str = NULL;
4207 Py_ssize_t len;
4208 char *s, *p;
4209
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004210 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004211 return -1;
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07004212 /* Strip the newline */
4213 len--;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004214 /* Strip outermost quotes */
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07004215 if (len >= 2 && s[0] == s[len - 1] && (s[0] == '\'' || s[0] == '"')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004216 p = s + 1;
4217 len -= 2;
4218 }
4219 else {
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07004220 PyErr_SetString(UnpicklingError,
4221 "the STRING opcode argument must be quoted");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004222 return -1;
4223 }
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07004224 assert(len >= 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004225
4226 /* Use the PyBytes API to decode the string, since that is what is used
4227 to encode, and then coerce the result to Unicode. */
4228 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004229 if (bytes == NULL)
4230 return -1;
4231 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
4232 Py_DECREF(bytes);
4233 if (str == NULL)
4234 return -1;
4235
4236 PDATA_PUSH(self->stack, str, -1);
4237 return 0;
4238}
4239
4240static int
4241load_binbytes(UnpicklerObject *self)
4242{
4243 PyObject *bytes;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004244 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004245 char *s;
4246
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004247 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004248 return -1;
4249
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004250 x = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004251 if (x < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004252 PyErr_Format(PyExc_OverflowError,
4253 "BINBYTES exceeds system's maximum size of %zd bytes",
Alexandre Vassalotticc757172013-04-14 02:25:10 -07004254 PY_SSIZE_T_MAX);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004255 return -1;
4256 }
4257
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004258 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004259 return -1;
4260 bytes = PyBytes_FromStringAndSize(s, x);
4261 if (bytes == NULL)
4262 return -1;
4263
4264 PDATA_PUSH(self->stack, bytes, -1);
4265 return 0;
4266}
4267
4268static int
4269load_short_binbytes(UnpicklerObject *self)
4270{
4271 PyObject *bytes;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004272 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004273 char *s;
4274
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004275 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004276 return -1;
4277
4278 x = (unsigned char)s[0];
4279
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004280 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004281 return -1;
4282
4283 bytes = PyBytes_FromStringAndSize(s, x);
4284 if (bytes == NULL)
4285 return -1;
4286
4287 PDATA_PUSH(self->stack, bytes, -1);
4288 return 0;
4289}
4290
4291static int
4292load_binstring(UnpicklerObject *self)
4293{
4294 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004295 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004296 char *s;
4297
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004298 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004299 return -1;
4300
4301 x = calc_binint(s, 4);
4302 if (x < 0) {
Victor Stinner121aab42011-09-29 23:40:53 +02004303 PyErr_SetString(UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004304 "BINSTRING pickle has negative byte count");
4305 return -1;
4306 }
4307
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004308 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004309 return -1;
4310
4311 /* Convert Python 2.x strings to unicode. */
4312 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4313 if (str == NULL)
4314 return -1;
4315
4316 PDATA_PUSH(self->stack, str, -1);
4317 return 0;
4318}
4319
4320static int
4321load_short_binstring(UnpicklerObject *self)
4322{
4323 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004324 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004325 char *s;
4326
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004327 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004328 return -1;
4329
4330 x = (unsigned char)s[0];
4331
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004332 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004333 return -1;
4334
4335 /* Convert Python 2.x strings to unicode. */
4336 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4337 if (str == NULL)
4338 return -1;
4339
4340 PDATA_PUSH(self->stack, str, -1);
4341 return 0;
4342}
4343
4344static int
4345load_unicode(UnpicklerObject *self)
4346{
4347 PyObject *str;
4348 Py_ssize_t len;
4349 char *s;
4350
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004351 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004352 return -1;
4353 if (len < 1)
4354 return bad_readline();
4355
4356 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
4357 if (str == NULL)
4358 return -1;
4359
4360 PDATA_PUSH(self->stack, str, -1);
4361 return 0;
4362}
4363
4364static int
4365load_binunicode(UnpicklerObject *self)
4366{
4367 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004368 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004369 char *s;
4370
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004371 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004372 return -1;
4373
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004374 size = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004375 if (size < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004376 PyErr_Format(PyExc_OverflowError,
4377 "BINUNICODE exceeds system's maximum size of %zd bytes",
Alexandre Vassalotticc757172013-04-14 02:25:10 -07004378 PY_SSIZE_T_MAX);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004379 return -1;
4380 }
4381
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004382
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004383 if (_Unpickler_Read(self, &s, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004384 return -1;
4385
Victor Stinner485fb562010-04-13 11:07:24 +00004386 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004387 if (str == NULL)
4388 return -1;
4389
4390 PDATA_PUSH(self->stack, str, -1);
4391 return 0;
4392}
4393
4394static int
4395load_tuple(UnpicklerObject *self)
4396{
4397 PyObject *tuple;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004398 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004399
4400 if ((i = marker(self)) < 0)
4401 return -1;
4402
4403 tuple = Pdata_poptuple(self->stack, i);
4404 if (tuple == NULL)
4405 return -1;
4406 PDATA_PUSH(self->stack, tuple, -1);
4407 return 0;
4408}
4409
4410static int
4411load_counted_tuple(UnpicklerObject *self, int len)
4412{
4413 PyObject *tuple;
4414
4415 tuple = PyTuple_New(len);
4416 if (tuple == NULL)
4417 return -1;
4418
4419 while (--len >= 0) {
4420 PyObject *item;
4421
4422 PDATA_POP(self->stack, item);
4423 if (item == NULL)
4424 return -1;
4425 PyTuple_SET_ITEM(tuple, len, item);
4426 }
4427 PDATA_PUSH(self->stack, tuple, -1);
4428 return 0;
4429}
4430
4431static int
4432load_empty_list(UnpicklerObject *self)
4433{
4434 PyObject *list;
4435
4436 if ((list = PyList_New(0)) == NULL)
4437 return -1;
4438 PDATA_PUSH(self->stack, list, -1);
4439 return 0;
4440}
4441
4442static int
4443load_empty_dict(UnpicklerObject *self)
4444{
4445 PyObject *dict;
4446
4447 if ((dict = PyDict_New()) == NULL)
4448 return -1;
4449 PDATA_PUSH(self->stack, dict, -1);
4450 return 0;
4451}
4452
4453static int
4454load_list(UnpicklerObject *self)
4455{
4456 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004457 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004458
4459 if ((i = marker(self)) < 0)
4460 return -1;
4461
4462 list = Pdata_poplist(self->stack, i);
4463 if (list == NULL)
4464 return -1;
4465 PDATA_PUSH(self->stack, list, -1);
4466 return 0;
4467}
4468
4469static int
4470load_dict(UnpicklerObject *self)
4471{
4472 PyObject *dict, *key, *value;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004473 Py_ssize_t i, j, k;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004474
4475 if ((i = marker(self)) < 0)
4476 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004477 j = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004478
4479 if ((dict = PyDict_New()) == NULL)
4480 return -1;
4481
4482 for (k = i + 1; k < j; k += 2) {
4483 key = self->stack->data[k - 1];
4484 value = self->stack->data[k];
4485 if (PyDict_SetItem(dict, key, value) < 0) {
4486 Py_DECREF(dict);
4487 return -1;
4488 }
4489 }
4490 Pdata_clear(self->stack, i);
4491 PDATA_PUSH(self->stack, dict, -1);
4492 return 0;
4493}
4494
4495static PyObject *
4496instantiate(PyObject *cls, PyObject *args)
4497{
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004498 PyObject *result = NULL;
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02004499 _Py_IDENTIFIER(__getinitargs__);
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004500 /* Caller must assure args are a tuple. Normally, args come from
4501 Pdata_poptuple which packs objects from the top of the stack
4502 into a newly created tuple. */
4503 assert(PyTuple_Check(args));
4504 if (Py_SIZE(args) > 0 || !PyType_Check(cls) ||
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02004505 _PyObject_HasAttrId(cls, &PyId___getinitargs__)) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004506 result = PyObject_CallObject(cls, args);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004507 }
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004508 else {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02004509 _Py_IDENTIFIER(__new__);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02004510
4511 result = _PyObject_CallMethodId(cls, &PyId___new__, "O", cls);
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004512 }
4513 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004514}
4515
4516static int
4517load_obj(UnpicklerObject *self)
4518{
4519 PyObject *cls, *args, *obj = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004520 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004521
4522 if ((i = marker(self)) < 0)
4523 return -1;
4524
4525 args = Pdata_poptuple(self->stack, i + 1);
4526 if (args == NULL)
4527 return -1;
4528
4529 PDATA_POP(self->stack, cls);
4530 if (cls) {
4531 obj = instantiate(cls, args);
4532 Py_DECREF(cls);
4533 }
4534 Py_DECREF(args);
4535 if (obj == NULL)
4536 return -1;
4537
4538 PDATA_PUSH(self->stack, obj, -1);
4539 return 0;
4540}
4541
4542static int
4543load_inst(UnpicklerObject *self)
4544{
4545 PyObject *cls = NULL;
4546 PyObject *args = NULL;
4547 PyObject *obj = NULL;
4548 PyObject *module_name;
4549 PyObject *class_name;
4550 Py_ssize_t len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004551 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004552 char *s;
4553
4554 if ((i = marker(self)) < 0)
4555 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004556 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004557 return -1;
4558 if (len < 2)
4559 return bad_readline();
4560
4561 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
4562 identifiers are permitted in Python 3.0, since the INST opcode is only
4563 supported by older protocols on Python 2.x. */
4564 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
4565 if (module_name == NULL)
4566 return -1;
4567
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004568 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004569 if (len < 2)
4570 return bad_readline();
4571 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004572 if (class_name != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004573 cls = find_class(self, module_name, class_name);
4574 Py_DECREF(class_name);
4575 }
4576 }
4577 Py_DECREF(module_name);
4578
4579 if (cls == NULL)
4580 return -1;
4581
4582 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
4583 obj = instantiate(cls, args);
4584 Py_DECREF(args);
4585 }
4586 Py_DECREF(cls);
4587
4588 if (obj == NULL)
4589 return -1;
4590
4591 PDATA_PUSH(self->stack, obj, -1);
4592 return 0;
4593}
4594
4595static int
4596load_newobj(UnpicklerObject *self)
4597{
4598 PyObject *args = NULL;
4599 PyObject *clsraw = NULL;
4600 PyTypeObject *cls; /* clsraw cast to its true type */
4601 PyObject *obj;
4602
4603 /* Stack is ... cls argtuple, and we want to call
4604 * cls.__new__(cls, *argtuple).
4605 */
4606 PDATA_POP(self->stack, args);
4607 if (args == NULL)
4608 goto error;
4609 if (!PyTuple_Check(args)) {
4610 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
4611 goto error;
4612 }
4613
4614 PDATA_POP(self->stack, clsraw);
4615 cls = (PyTypeObject *)clsraw;
4616 if (cls == NULL)
4617 goto error;
4618 if (!PyType_Check(cls)) {
4619 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4620 "isn't a type object");
4621 goto error;
4622 }
4623 if (cls->tp_new == NULL) {
4624 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4625 "has NULL tp_new");
4626 goto error;
4627 }
4628
4629 /* Call __new__. */
4630 obj = cls->tp_new(cls, args, NULL);
4631 if (obj == NULL)
4632 goto error;
4633
4634 Py_DECREF(args);
4635 Py_DECREF(clsraw);
4636 PDATA_PUSH(self->stack, obj, -1);
4637 return 0;
4638
4639 error:
4640 Py_XDECREF(args);
4641 Py_XDECREF(clsraw);
4642 return -1;
4643}
4644
4645static int
4646load_global(UnpicklerObject *self)
4647{
4648 PyObject *global = NULL;
4649 PyObject *module_name;
4650 PyObject *global_name;
4651 Py_ssize_t len;
4652 char *s;
4653
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004654 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004655 return -1;
4656 if (len < 2)
4657 return bad_readline();
4658 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4659 if (!module_name)
4660 return -1;
4661
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004662 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004663 if (len < 2) {
4664 Py_DECREF(module_name);
4665 return bad_readline();
4666 }
4667 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4668 if (global_name) {
4669 global = find_class(self, module_name, global_name);
4670 Py_DECREF(global_name);
4671 }
4672 }
4673 Py_DECREF(module_name);
4674
4675 if (global == NULL)
4676 return -1;
4677 PDATA_PUSH(self->stack, global, -1);
4678 return 0;
4679}
4680
4681static int
4682load_persid(UnpicklerObject *self)
4683{
4684 PyObject *pid;
4685 Py_ssize_t len;
4686 char *s;
4687
4688 if (self->pers_func) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004689 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004690 return -1;
4691 if (len < 2)
4692 return bad_readline();
4693
4694 pid = PyBytes_FromStringAndSize(s, len - 1);
4695 if (pid == NULL)
4696 return -1;
4697
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004698 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004699 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004700 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004701 if (pid == NULL)
4702 return -1;
4703
4704 PDATA_PUSH(self->stack, pid, -1);
4705 return 0;
4706 }
4707 else {
4708 PyErr_SetString(UnpicklingError,
4709 "A load persistent id instruction was encountered,\n"
4710 "but no persistent_load function was specified.");
4711 return -1;
4712 }
4713}
4714
4715static int
4716load_binpersid(UnpicklerObject *self)
4717{
4718 PyObject *pid;
4719
4720 if (self->pers_func) {
4721 PDATA_POP(self->stack, pid);
4722 if (pid == NULL)
4723 return -1;
4724
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004725 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004726 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004727 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004728 if (pid == NULL)
4729 return -1;
4730
4731 PDATA_PUSH(self->stack, pid, -1);
4732 return 0;
4733 }
4734 else {
4735 PyErr_SetString(UnpicklingError,
4736 "A load persistent id instruction was encountered,\n"
4737 "but no persistent_load function was specified.");
4738 return -1;
4739 }
4740}
4741
4742static int
4743load_pop(UnpicklerObject *self)
4744{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004745 Py_ssize_t len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004746
4747 /* Note that we split the (pickle.py) stack into two stacks,
4748 * an object stack and a mark stack. We have to be clever and
4749 * pop the right one. We do this by looking at the top of the
Collin Winter8ca69de2009-05-26 16:53:41 +00004750 * mark stack first, and only signalling a stack underflow if
4751 * the object stack is empty and the mark stack doesn't match
4752 * our expectations.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004753 */
Collin Winter8ca69de2009-05-26 16:53:41 +00004754 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004755 self->num_marks--;
Antoine Pitrou01a15ea2010-01-07 17:57:31 +00004756 } else if (len > 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004757 len--;
4758 Py_DECREF(self->stack->data[len]);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004759 Py_SIZE(self->stack) = len;
Collin Winter8ca69de2009-05-26 16:53:41 +00004760 } else {
4761 return stack_underflow();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004762 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004763 return 0;
4764}
4765
4766static int
4767load_pop_mark(UnpicklerObject *self)
4768{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004769 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004770
4771 if ((i = marker(self)) < 0)
4772 return -1;
4773
4774 Pdata_clear(self->stack, i);
4775
4776 return 0;
4777}
4778
4779static int
4780load_dup(UnpicklerObject *self)
4781{
4782 PyObject *last;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004783 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004784
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004785 if ((len = Py_SIZE(self->stack)) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004786 return stack_underflow();
4787 last = self->stack->data[len - 1];
4788 PDATA_APPEND(self->stack, last, -1);
4789 return 0;
4790}
4791
4792static int
4793load_get(UnpicklerObject *self)
4794{
4795 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004796 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004797 Py_ssize_t len;
4798 char *s;
4799
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004800 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004801 return -1;
4802 if (len < 2)
4803 return bad_readline();
4804
4805 key = PyLong_FromString(s, NULL, 10);
4806 if (key == NULL)
4807 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004808 idx = PyLong_AsSsize_t(key);
4809 if (idx == -1 && PyErr_Occurred()) {
4810 Py_DECREF(key);
4811 return -1;
4812 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004813
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004814 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004815 if (value == NULL) {
4816 if (!PyErr_Occurred())
4817 PyErr_SetObject(PyExc_KeyError, key);
4818 Py_DECREF(key);
4819 return -1;
4820 }
4821 Py_DECREF(key);
4822
4823 PDATA_APPEND(self->stack, value, -1);
4824 return 0;
4825}
4826
4827static int
4828load_binget(UnpicklerObject *self)
4829{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004830 PyObject *value;
4831 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004832 char *s;
4833
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004834 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004835 return -1;
4836
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004837 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004838
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004839 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004840 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004841 PyObject *key = PyLong_FromSsize_t(idx);
Christian Heimes9ee5c372013-07-26 22:45:00 +02004842 if (key != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004843 PyErr_SetObject(PyExc_KeyError, key);
Christian Heimes9ee5c372013-07-26 22:45:00 +02004844 Py_DECREF(key);
4845 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004846 return -1;
4847 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004848
4849 PDATA_APPEND(self->stack, value, -1);
4850 return 0;
4851}
4852
4853static int
4854load_long_binget(UnpicklerObject *self)
4855{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004856 PyObject *value;
4857 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004858 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004859
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004860 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004861 return -1;
4862
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004863 idx = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004864
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004865 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004866 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004867 PyObject *key = PyLong_FromSsize_t(idx);
Christian Heimes9ee5c372013-07-26 22:45:00 +02004868 if (key != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004869 PyErr_SetObject(PyExc_KeyError, key);
Christian Heimes9ee5c372013-07-26 22:45:00 +02004870 Py_DECREF(key);
4871 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004872 return -1;
4873 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004874
4875 PDATA_APPEND(self->stack, value, -1);
4876 return 0;
4877}
4878
4879/* Push an object from the extension registry (EXT[124]). nbytes is
4880 * the number of bytes following the opcode, holding the index (code) value.
4881 */
4882static int
4883load_extension(UnpicklerObject *self, int nbytes)
4884{
4885 char *codebytes; /* the nbytes bytes after the opcode */
4886 long code; /* calc_binint returns long */
4887 PyObject *py_code; /* code as a Python int */
4888 PyObject *obj; /* the object to push */
4889 PyObject *pair; /* (module_name, class_name) */
4890 PyObject *module_name, *class_name;
4891
4892 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004893 if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004894 return -1;
4895 code = calc_binint(codebytes, nbytes);
4896 if (code <= 0) { /* note that 0 is forbidden */
4897 /* Corrupt or hostile pickle. */
4898 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
4899 return -1;
4900 }
4901
4902 /* Look for the code in the cache. */
4903 py_code = PyLong_FromLong(code);
4904 if (py_code == NULL)
4905 return -1;
4906 obj = PyDict_GetItem(extension_cache, py_code);
4907 if (obj != NULL) {
4908 /* Bingo. */
4909 Py_DECREF(py_code);
4910 PDATA_APPEND(self->stack, obj, -1);
4911 return 0;
4912 }
4913
4914 /* Look up the (module_name, class_name) pair. */
4915 pair = PyDict_GetItem(inverted_registry, py_code);
4916 if (pair == NULL) {
4917 Py_DECREF(py_code);
4918 PyErr_Format(PyExc_ValueError, "unregistered extension "
4919 "code %ld", code);
4920 return -1;
4921 }
4922 /* Since the extension registry is manipulable via Python code,
4923 * confirm that pair is really a 2-tuple of strings.
4924 */
4925 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
4926 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
4927 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
4928 Py_DECREF(py_code);
4929 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
4930 "isn't a 2-tuple of strings", code);
4931 return -1;
4932 }
4933 /* Load the object. */
4934 obj = find_class(self, module_name, class_name);
4935 if (obj == NULL) {
4936 Py_DECREF(py_code);
4937 return -1;
4938 }
4939 /* Cache code -> obj. */
4940 code = PyDict_SetItem(extension_cache, py_code, obj);
4941 Py_DECREF(py_code);
4942 if (code < 0) {
4943 Py_DECREF(obj);
4944 return -1;
4945 }
4946 PDATA_PUSH(self->stack, obj, -1);
4947 return 0;
4948}
4949
4950static int
4951load_put(UnpicklerObject *self)
4952{
4953 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004954 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004955 Py_ssize_t len;
4956 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004957
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004958 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004959 return -1;
4960 if (len < 2)
4961 return bad_readline();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004962 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004963 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004964 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004965
4966 key = PyLong_FromString(s, NULL, 10);
4967 if (key == NULL)
4968 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004969 idx = PyLong_AsSsize_t(key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004970 Py_DECREF(key);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004971 if (idx < 0) {
4972 if (!PyErr_Occurred())
4973 PyErr_SetString(PyExc_ValueError,
4974 "negative PUT argument");
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004975 return -1;
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004976 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004977
4978 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004979}
4980
4981static int
4982load_binput(UnpicklerObject *self)
4983{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004984 PyObject *value;
4985 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004986 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004987
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004988 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004989 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004990
4991 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004992 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004993 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004994
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004995 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004996
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004997 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004998}
4999
5000static int
5001load_long_binput(UnpicklerObject *self)
5002{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005003 PyObject *value;
5004 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005005 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005006
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005007 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005008 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005009
5010 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005011 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005012 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005013
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005014 idx = calc_binsize(s, 4);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02005015 if (idx < 0) {
5016 PyErr_SetString(PyExc_ValueError,
5017 "negative LONG_BINPUT argument");
5018 return -1;
5019 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005020
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005021 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005022}
5023
5024static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005025do_append(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005026{
5027 PyObject *value;
5028 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005029 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005030
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005031 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005032 if (x > len || x <= 0)
5033 return stack_underflow();
5034 if (len == x) /* nothing to do */
5035 return 0;
5036
5037 list = self->stack->data[x - 1];
5038
5039 if (PyList_Check(list)) {
5040 PyObject *slice;
5041 Py_ssize_t list_len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005042 int ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005043
5044 slice = Pdata_poplist(self->stack, x);
5045 if (!slice)
5046 return -1;
5047 list_len = PyList_GET_SIZE(list);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005048 ret = PyList_SetSlice(list, list_len, list_len, slice);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005049 Py_DECREF(slice);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005050 return ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005051 }
5052 else {
5053 PyObject *append_func;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005054 _Py_IDENTIFIER(append);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005055
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005056 append_func = _PyObject_GetAttrId(list, &PyId_append);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005057 if (append_func == NULL)
5058 return -1;
5059 for (i = x; i < len; i++) {
5060 PyObject *result;
5061
5062 value = self->stack->data[i];
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005063 result = _Unpickler_FastCall(self, append_func, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005064 if (result == NULL) {
5065 Pdata_clear(self->stack, i + 1);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005066 Py_SIZE(self->stack) = x;
Alexandre Vassalotti637c7c42013-04-20 21:28:21 -07005067 Py_DECREF(append_func);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005068 return -1;
5069 }
5070 Py_DECREF(result);
5071 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005072 Py_SIZE(self->stack) = x;
Alexandre Vassalotti637c7c42013-04-20 21:28:21 -07005073 Py_DECREF(append_func);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005074 }
5075
5076 return 0;
5077}
5078
5079static int
5080load_append(UnpicklerObject *self)
5081{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005082 return do_append(self, Py_SIZE(self->stack) - 1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005083}
5084
5085static int
5086load_appends(UnpicklerObject *self)
5087{
5088 return do_append(self, marker(self));
5089}
5090
5091static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005092do_setitems(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005093{
5094 PyObject *value, *key;
5095 PyObject *dict;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005096 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005097 int status = 0;
5098
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005099 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005100 if (x > len || x <= 0)
5101 return stack_underflow();
5102 if (len == x) /* nothing to do */
5103 return 0;
Victor Stinner121aab42011-09-29 23:40:53 +02005104 if ((len - x) % 2 != 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005105 /* Currupt or hostile pickle -- we never write one like this. */
5106 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
5107 return -1;
5108 }
5109
5110 /* Here, dict does not actually need to be a PyDict; it could be anything
5111 that supports the __setitem__ attribute. */
5112 dict = self->stack->data[x - 1];
5113
5114 for (i = x + 1; i < len; i += 2) {
5115 key = self->stack->data[i - 1];
5116 value = self->stack->data[i];
5117 if (PyObject_SetItem(dict, key, value) < 0) {
5118 status = -1;
5119 break;
5120 }
5121 }
5122
5123 Pdata_clear(self->stack, x);
5124 return status;
5125}
5126
5127static int
5128load_setitem(UnpicklerObject *self)
5129{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005130 return do_setitems(self, Py_SIZE(self->stack) - 2);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005131}
5132
5133static int
5134load_setitems(UnpicklerObject *self)
5135{
5136 return do_setitems(self, marker(self));
5137}
5138
5139static int
5140load_build(UnpicklerObject *self)
5141{
5142 PyObject *state, *inst, *slotstate;
5143 PyObject *setstate;
5144 int status = 0;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005145 _Py_IDENTIFIER(__setstate__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005146
5147 /* Stack is ... instance, state. We want to leave instance at
5148 * the stack top, possibly mutated via instance.__setstate__(state).
5149 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005150 if (Py_SIZE(self->stack) < 2)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005151 return stack_underflow();
5152
5153 PDATA_POP(self->stack, state);
5154 if (state == NULL)
5155 return -1;
5156
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005157 inst = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005158
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005159 setstate = _PyObject_GetAttrId(inst, &PyId___setstate__);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005160 if (setstate == NULL) {
5161 if (PyErr_ExceptionMatches(PyExc_AttributeError))
5162 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00005163 else {
5164 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005165 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00005166 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005167 }
5168 else {
5169 PyObject *result;
5170
5171 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005172 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Antoine Pitroud79dc622008-09-05 00:03:33 +00005173 reference to state first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005174 result = _Unpickler_FastCall(self, setstate, state);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005175 Py_DECREF(setstate);
5176 if (result == NULL)
5177 return -1;
5178 Py_DECREF(result);
5179 return 0;
5180 }
5181
5182 /* A default __setstate__. First see whether state embeds a
5183 * slot state dict too (a proto 2 addition).
5184 */
5185 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
5186 PyObject *tmp = state;
5187
5188 state = PyTuple_GET_ITEM(tmp, 0);
5189 slotstate = PyTuple_GET_ITEM(tmp, 1);
5190 Py_INCREF(state);
5191 Py_INCREF(slotstate);
5192 Py_DECREF(tmp);
5193 }
5194 else
5195 slotstate = NULL;
5196
5197 /* Set inst.__dict__ from the state dict (if any). */
5198 if (state != Py_None) {
5199 PyObject *dict;
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005200 PyObject *d_key, *d_value;
5201 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005202 _Py_IDENTIFIER(__dict__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005203
5204 if (!PyDict_Check(state)) {
5205 PyErr_SetString(UnpicklingError, "state is not a dictionary");
5206 goto error;
5207 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005208 dict = _PyObject_GetAttrId(inst, &PyId___dict__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005209 if (dict == NULL)
5210 goto error;
5211
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005212 i = 0;
5213 while (PyDict_Next(state, &i, &d_key, &d_value)) {
5214 /* normally the keys for instance attributes are
5215 interned. we should try to do that here. */
5216 Py_INCREF(d_key);
5217 if (PyUnicode_CheckExact(d_key))
5218 PyUnicode_InternInPlace(&d_key);
5219 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
5220 Py_DECREF(d_key);
5221 goto error;
5222 }
5223 Py_DECREF(d_key);
5224 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005225 Py_DECREF(dict);
5226 }
5227
5228 /* Also set instance attributes from the slotstate dict (if any). */
5229 if (slotstate != NULL) {
5230 PyObject *d_key, *d_value;
5231 Py_ssize_t i;
5232
5233 if (!PyDict_Check(slotstate)) {
5234 PyErr_SetString(UnpicklingError,
5235 "slot state is not a dictionary");
5236 goto error;
5237 }
5238 i = 0;
5239 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
5240 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
5241 goto error;
5242 }
5243 }
5244
5245 if (0) {
5246 error:
5247 status = -1;
5248 }
5249
5250 Py_DECREF(state);
5251 Py_XDECREF(slotstate);
5252 return status;
5253}
5254
5255static int
5256load_mark(UnpicklerObject *self)
5257{
5258
5259 /* Note that we split the (pickle.py) stack into two stacks, an
5260 * object stack and a mark stack. Here we push a mark onto the
5261 * mark stack.
5262 */
5263
5264 if ((self->num_marks + 1) >= self->marks_size) {
5265 size_t alloc;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005266 Py_ssize_t *marks;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005267
5268 /* Use the size_t type to check for overflow. */
5269 alloc = ((size_t)self->num_marks << 1) + 20;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005270 if (alloc > (PY_SSIZE_T_MAX / sizeof(Py_ssize_t)) ||
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00005271 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005272 PyErr_NoMemory();
5273 return -1;
5274 }
5275
5276 if (self->marks == NULL)
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005277 marks = (Py_ssize_t *) PyMem_Malloc(alloc * sizeof(Py_ssize_t));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005278 else
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005279 marks = (Py_ssize_t *) PyMem_Realloc(self->marks,
5280 alloc * sizeof(Py_ssize_t));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005281 if (marks == NULL) {
5282 PyErr_NoMemory();
5283 return -1;
5284 }
5285 self->marks = marks;
5286 self->marks_size = (Py_ssize_t)alloc;
5287 }
5288
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005289 self->marks[self->num_marks++] = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005290
5291 return 0;
5292}
5293
5294static int
5295load_reduce(UnpicklerObject *self)
5296{
5297 PyObject *callable = NULL;
5298 PyObject *argtup = NULL;
5299 PyObject *obj = NULL;
5300
5301 PDATA_POP(self->stack, argtup);
5302 if (argtup == NULL)
5303 return -1;
5304 PDATA_POP(self->stack, callable);
5305 if (callable) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005306 obj = PyObject_CallObject(callable, argtup);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005307 Py_DECREF(callable);
5308 }
5309 Py_DECREF(argtup);
5310
5311 if (obj == NULL)
5312 return -1;
5313
5314 PDATA_PUSH(self->stack, obj, -1);
5315 return 0;
5316}
5317
5318/* Just raises an error if we don't know the protocol specified. PROTO
5319 * is the first opcode for protocols >= 2.
5320 */
5321static int
5322load_proto(UnpicklerObject *self)
5323{
5324 char *s;
5325 int i;
5326
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005327 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005328 return -1;
5329
5330 i = (unsigned char)s[0];
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005331 if (i <= HIGHEST_PROTOCOL) {
5332 self->proto = i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005333 return 0;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005334 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005335
5336 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
5337 return -1;
5338}
5339
5340static PyObject *
5341load(UnpicklerObject *self)
5342{
5343 PyObject *err;
5344 PyObject *value = NULL;
5345 char *s;
5346
5347 self->num_marks = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005348 if (Py_SIZE(self->stack))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005349 Pdata_clear(self->stack, 0);
5350
5351 /* Convenient macros for the dispatch while-switch loop just below. */
5352#define OP(opcode, load_func) \
5353 case opcode: if (load_func(self) < 0) break; continue;
5354
5355#define OP_ARG(opcode, load_func, arg) \
5356 case opcode: if (load_func(self, (arg)) < 0) break; continue;
5357
5358 while (1) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005359 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005360 break;
5361
5362 switch ((enum opcode)s[0]) {
5363 OP(NONE, load_none)
5364 OP(BININT, load_binint)
5365 OP(BININT1, load_binint1)
5366 OP(BININT2, load_binint2)
5367 OP(INT, load_int)
5368 OP(LONG, load_long)
5369 OP_ARG(LONG1, load_counted_long, 1)
5370 OP_ARG(LONG4, load_counted_long, 4)
5371 OP(FLOAT, load_float)
5372 OP(BINFLOAT, load_binfloat)
5373 OP(BINBYTES, load_binbytes)
5374 OP(SHORT_BINBYTES, load_short_binbytes)
5375 OP(BINSTRING, load_binstring)
5376 OP(SHORT_BINSTRING, load_short_binstring)
5377 OP(STRING, load_string)
5378 OP(UNICODE, load_unicode)
5379 OP(BINUNICODE, load_binunicode)
5380 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
5381 OP_ARG(TUPLE1, load_counted_tuple, 1)
5382 OP_ARG(TUPLE2, load_counted_tuple, 2)
5383 OP_ARG(TUPLE3, load_counted_tuple, 3)
5384 OP(TUPLE, load_tuple)
5385 OP(EMPTY_LIST, load_empty_list)
5386 OP(LIST, load_list)
5387 OP(EMPTY_DICT, load_empty_dict)
5388 OP(DICT, load_dict)
5389 OP(OBJ, load_obj)
5390 OP(INST, load_inst)
5391 OP(NEWOBJ, load_newobj)
5392 OP(GLOBAL, load_global)
5393 OP(APPEND, load_append)
5394 OP(APPENDS, load_appends)
5395 OP(BUILD, load_build)
5396 OP(DUP, load_dup)
5397 OP(BINGET, load_binget)
5398 OP(LONG_BINGET, load_long_binget)
5399 OP(GET, load_get)
5400 OP(MARK, load_mark)
5401 OP(BINPUT, load_binput)
5402 OP(LONG_BINPUT, load_long_binput)
5403 OP(PUT, load_put)
5404 OP(POP, load_pop)
5405 OP(POP_MARK, load_pop_mark)
5406 OP(SETITEM, load_setitem)
5407 OP(SETITEMS, load_setitems)
5408 OP(PERSID, load_persid)
5409 OP(BINPERSID, load_binpersid)
5410 OP(REDUCE, load_reduce)
5411 OP(PROTO, load_proto)
5412 OP_ARG(EXT1, load_extension, 1)
5413 OP_ARG(EXT2, load_extension, 2)
5414 OP_ARG(EXT4, load_extension, 4)
5415 OP_ARG(NEWTRUE, load_bool, Py_True)
5416 OP_ARG(NEWFALSE, load_bool, Py_False)
5417
5418 case STOP:
5419 break;
5420
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005421 default:
Benjamin Petersonadde86d2011-09-23 13:41:41 -04005422 if (s[0] == '\0')
5423 PyErr_SetNone(PyExc_EOFError);
5424 else
5425 PyErr_Format(UnpicklingError,
5426 "invalid load key, '%c'.", s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005427 return NULL;
5428 }
5429
5430 break; /* and we are done! */
5431 }
5432
5433 /* XXX: It is not clear what this is actually for. */
5434 if ((err = PyErr_Occurred())) {
5435 if (err == PyExc_EOFError) {
5436 PyErr_SetNone(PyExc_EOFError);
5437 }
5438 return NULL;
5439 }
5440
Victor Stinner2ae57e32013-10-31 13:39:23 +01005441 if (_Unpickler_SkipConsumed(self) < 0)
5442 return NULL;
5443
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005444 PDATA_POP(self->stack, value);
5445 return value;
5446}
5447
5448PyDoc_STRVAR(Unpickler_load_doc,
5449"load() -> object. Load a pickle."
5450"\n"
5451"Read a pickled object representation from the open file object given in\n"
5452"the constructor, and return the reconstituted object hierarchy specified\n"
5453"therein.\n");
5454
5455static PyObject *
5456Unpickler_load(UnpicklerObject *self)
5457{
5458 /* Check whether the Unpickler was initialized correctly. This prevents
5459 segfaulting if a subclass overridden __init__ with a function that does
5460 not call Unpickler.__init__(). Here, we simply ensure that self->read
5461 is not NULL. */
5462 if (self->read == NULL) {
Victor Stinner121aab42011-09-29 23:40:53 +02005463 PyErr_Format(UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005464 "Unpickler.__init__() was not called by %s.__init__()",
5465 Py_TYPE(self)->tp_name);
5466 return NULL;
5467 }
5468
5469 return load(self);
5470}
5471
5472/* The name of find_class() is misleading. In newer pickle protocols, this
5473 function is used for loading any global (i.e., functions), not just
5474 classes. The name is kept only for backward compatibility. */
5475
5476PyDoc_STRVAR(Unpickler_find_class_doc,
5477"find_class(module_name, global_name) -> object.\n"
5478"\n"
5479"Return an object from a specified module, importing the module if\n"
5480"necessary. Subclasses may override this method (e.g. to restrict\n"
5481"unpickling of arbitrary classes and functions).\n"
5482"\n"
5483"This method is called whenever a class or a function object is\n"
5484"needed. Both arguments passed are str objects.\n");
5485
5486static PyObject *
5487Unpickler_find_class(UnpicklerObject *self, PyObject *args)
5488{
5489 PyObject *global;
5490 PyObject *modules_dict;
5491 PyObject *module;
5492 PyObject *module_name, *global_name;
5493
5494 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
5495 &module_name, &global_name))
5496 return NULL;
5497
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005498 /* Try to map the old names used in Python 2.x to the new ones used in
5499 Python 3.x. We do this only with old pickle protocols and when the
5500 user has not disabled the feature. */
5501 if (self->proto < 3 && self->fix_imports) {
5502 PyObject *key;
5503 PyObject *item;
5504
5505 /* Check if the global (i.e., a function or a class) was renamed
5506 or moved to another module. */
5507 key = PyTuple_Pack(2, module_name, global_name);
5508 if (key == NULL)
5509 return NULL;
5510 item = PyDict_GetItemWithError(name_mapping_2to3, key);
5511 Py_DECREF(key);
5512 if (item) {
5513 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
5514 PyErr_Format(PyExc_RuntimeError,
5515 "_compat_pickle.NAME_MAPPING values should be "
5516 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
5517 return NULL;
5518 }
5519 module_name = PyTuple_GET_ITEM(item, 0);
5520 global_name = PyTuple_GET_ITEM(item, 1);
5521 if (!PyUnicode_Check(module_name) ||
5522 !PyUnicode_Check(global_name)) {
5523 PyErr_Format(PyExc_RuntimeError,
5524 "_compat_pickle.NAME_MAPPING values should be "
5525 "pairs of str, not (%.200s, %.200s)",
5526 Py_TYPE(module_name)->tp_name,
5527 Py_TYPE(global_name)->tp_name);
5528 return NULL;
5529 }
5530 }
5531 else if (PyErr_Occurred()) {
5532 return NULL;
5533 }
5534
5535 /* Check if the module was renamed. */
5536 item = PyDict_GetItemWithError(import_mapping_2to3, module_name);
5537 if (item) {
5538 if (!PyUnicode_Check(item)) {
5539 PyErr_Format(PyExc_RuntimeError,
5540 "_compat_pickle.IMPORT_MAPPING values should be "
5541 "strings, not %.200s", Py_TYPE(item)->tp_name);
5542 return NULL;
5543 }
5544 module_name = item;
5545 }
5546 else if (PyErr_Occurred()) {
5547 return NULL;
5548 }
5549 }
5550
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005551 modules_dict = PySys_GetObject("modules");
Victor Stinner1e53bba2013-07-16 22:26:05 +02005552 if (modules_dict == NULL) {
5553 PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005554 return NULL;
Victor Stinner1e53bba2013-07-16 22:26:05 +02005555 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005556
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005557 module = PyDict_GetItemWithError(modules_dict, module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005558 if (module == NULL) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005559 if (PyErr_Occurred())
5560 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005561 module = PyImport_Import(module_name);
5562 if (module == NULL)
5563 return NULL;
5564 global = PyObject_GetAttr(module, global_name);
5565 Py_DECREF(module);
5566 }
Victor Stinner121aab42011-09-29 23:40:53 +02005567 else {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005568 global = PyObject_GetAttr(module, global_name);
5569 }
5570 return global;
5571}
5572
5573static struct PyMethodDef Unpickler_methods[] = {
5574 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
5575 Unpickler_load_doc},
5576 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
5577 Unpickler_find_class_doc},
5578 {NULL, NULL} /* sentinel */
5579};
5580
5581static void
5582Unpickler_dealloc(UnpicklerObject *self)
5583{
5584 PyObject_GC_UnTrack((PyObject *)self);
5585 Py_XDECREF(self->readline);
5586 Py_XDECREF(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005587 Py_XDECREF(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005588 Py_XDECREF(self->stack);
5589 Py_XDECREF(self->pers_func);
5590 Py_XDECREF(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005591 if (self->buffer.buf != NULL) {
5592 PyBuffer_Release(&self->buffer);
5593 self->buffer.buf = NULL;
5594 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005595
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005596 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005597 PyMem_Free(self->marks);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005598 PyMem_Free(self->input_line);
Victor Stinner49fc8ec2013-07-07 23:30:24 +02005599 PyMem_Free(self->encoding);
5600 PyMem_Free(self->errors);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005601
5602 Py_TYPE(self)->tp_free((PyObject *)self);
5603}
5604
5605static int
5606Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
5607{
5608 Py_VISIT(self->readline);
5609 Py_VISIT(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005610 Py_VISIT(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005611 Py_VISIT(self->stack);
5612 Py_VISIT(self->pers_func);
5613 Py_VISIT(self->arg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005614 return 0;
5615}
5616
5617static int
5618Unpickler_clear(UnpicklerObject *self)
5619{
5620 Py_CLEAR(self->readline);
5621 Py_CLEAR(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005622 Py_CLEAR(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005623 Py_CLEAR(self->stack);
5624 Py_CLEAR(self->pers_func);
5625 Py_CLEAR(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005626 if (self->buffer.buf != NULL) {
5627 PyBuffer_Release(&self->buffer);
5628 self->buffer.buf = NULL;
5629 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005630
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005631 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005632 PyMem_Free(self->marks);
5633 self->marks = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005634 PyMem_Free(self->input_line);
5635 self->input_line = NULL;
Victor Stinner49fc8ec2013-07-07 23:30:24 +02005636 PyMem_Free(self->encoding);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005637 self->encoding = NULL;
Victor Stinner49fc8ec2013-07-07 23:30:24 +02005638 PyMem_Free(self->errors);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005639 self->errors = NULL;
5640
5641 return 0;
5642}
5643
5644PyDoc_STRVAR(Unpickler_doc,
5645"Unpickler(file, *, encoding='ASCII', errors='strict')"
5646"\n"
5647"This takes a binary file for reading a pickle data stream.\n"
5648"\n"
5649"The protocol version of the pickle is detected automatically, so no\n"
5650"proto argument is needed.\n"
5651"\n"
5652"The file-like object must have two methods, a read() method\n"
5653"that takes an integer argument, and a readline() method that\n"
5654"requires no arguments. Both methods should return bytes.\n"
5655"Thus file-like object can be a binary file object opened for\n"
5656"reading, a BytesIO object, or any other custom object that\n"
5657"meets this interface.\n"
5658"\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005659"Optional keyword arguments are *fix_imports*, *encoding* and *errors*,\n"
5660"which are used to control compatiblity support for pickle stream\n"
5661"generated by Python 2.x. If *fix_imports* is True, pickle will try to\n"
5662"map the old Python 2.x names to the new names used in Python 3.x. The\n"
5663"*encoding* and *errors* tell pickle how to decode 8-bit string\n"
5664"instances pickled by Python 2.x; these default to 'ASCII' and\n"
5665"'strict', respectively.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005666
5667static int
5668Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
5669{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005670 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005671 PyObject *file;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005672 PyObject *fix_imports = Py_True;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005673 char *encoding = NULL;
5674 char *errors = NULL;
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02005675 _Py_IDENTIFIER(persistent_load);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005676
5677 /* XXX: That is an horrible error message. But, I don't know how to do
5678 better... */
5679 if (Py_SIZE(args) != 1) {
5680 PyErr_Format(PyExc_TypeError,
5681 "%s takes exactly one positional argument (%zd given)",
5682 Py_TYPE(self)->tp_name, Py_SIZE(args));
5683 return -1;
5684 }
5685
5686 /* Arguments parsing needs to be done in the __init__() method to allow
5687 subclasses to define their own __init__() method, which may (or may
5688 not) support Unpickler arguments. However, this means we need to be
5689 extra careful in the other Unpickler methods, since a subclass could
5690 forget to call Unpickler.__init__() thus breaking our internal
5691 invariants. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005692 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:Unpickler", kwlist,
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005693 &file, &fix_imports, &encoding, &errors))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005694 return -1;
5695
5696 /* In case of multiple __init__() calls, clear previous content. */
5697 if (self->read != NULL)
5698 (void)Unpickler_clear(self);
5699
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005700 if (_Unpickler_SetInputStream(self, file) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005701 return -1;
5702
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005703 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005704 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005705
5706 self->fix_imports = PyObject_IsTrue(fix_imports);
5707 if (self->fix_imports == -1)
5708 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005709
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02005710 if (_PyObject_HasAttrId((PyObject *)self, &PyId_persistent_load)) {
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005711 self->pers_func = _PyObject_GetAttrId((PyObject *)self,
5712 &PyId_persistent_load);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005713 if (self->pers_func == NULL)
5714 return -1;
5715 }
5716 else {
5717 self->pers_func = NULL;
5718 }
5719
5720 self->stack = (Pdata *)Pdata_New();
5721 if (self->stack == NULL)
5722 return -1;
5723
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005724 self->memo_size = 32;
5725 self->memo = _Unpickler_NewMemo(self->memo_size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005726 if (self->memo == NULL)
5727 return -1;
5728
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005729 self->arg = NULL;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005730 self->proto = 0;
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005731
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005732 return 0;
5733}
5734
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005735/* Define a proxy object for the Unpickler's internal memo object. This is to
5736 * avoid breaking code like:
5737 * unpickler.memo.clear()
5738 * and
5739 * unpickler.memo = saved_memo
5740 * Is this a good idea? Not really, but we don't want to break code that uses
5741 * it. Note that we don't implement the entire mapping API here. This is
5742 * intentional, as these should be treated as black-box implementation details.
5743 *
5744 * We do, however, have to implement pickling/unpickling support because of
Victor Stinner121aab42011-09-29 23:40:53 +02005745 * real-world code like cvs2svn.
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005746 */
5747
5748typedef struct {
5749 PyObject_HEAD
5750 UnpicklerObject *unpickler;
5751} UnpicklerMemoProxyObject;
5752
5753PyDoc_STRVAR(ump_clear_doc,
5754"memo.clear() -> None. Remove all items from memo.");
5755
5756static PyObject *
5757ump_clear(UnpicklerMemoProxyObject *self)
5758{
5759 _Unpickler_MemoCleanup(self->unpickler);
5760 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
5761 if (self->unpickler->memo == NULL)
5762 return NULL;
5763 Py_RETURN_NONE;
5764}
5765
5766PyDoc_STRVAR(ump_copy_doc,
5767"memo.copy() -> new_memo. Copy the memo to a new object.");
5768
5769static PyObject *
5770ump_copy(UnpicklerMemoProxyObject *self)
5771{
5772 Py_ssize_t i;
5773 PyObject *new_memo = PyDict_New();
5774 if (new_memo == NULL)
5775 return NULL;
5776
5777 for (i = 0; i < self->unpickler->memo_size; i++) {
5778 int status;
5779 PyObject *key, *value;
5780
5781 value = self->unpickler->memo[i];
5782 if (value == NULL)
5783 continue;
5784
5785 key = PyLong_FromSsize_t(i);
5786 if (key == NULL)
5787 goto error;
5788 status = PyDict_SetItem(new_memo, key, value);
5789 Py_DECREF(key);
5790 if (status < 0)
5791 goto error;
5792 }
5793 return new_memo;
5794
5795error:
5796 Py_DECREF(new_memo);
5797 return NULL;
5798}
5799
5800PyDoc_STRVAR(ump_reduce_doc,
5801"memo.__reduce__(). Pickling support.");
5802
5803static PyObject *
5804ump_reduce(UnpicklerMemoProxyObject *self, PyObject *args)
5805{
5806 PyObject *reduce_value;
5807 PyObject *constructor_args;
5808 PyObject *contents = ump_copy(self);
5809 if (contents == NULL)
5810 return NULL;
5811
5812 reduce_value = PyTuple_New(2);
5813 if (reduce_value == NULL) {
5814 Py_DECREF(contents);
5815 return NULL;
5816 }
5817 constructor_args = PyTuple_New(1);
5818 if (constructor_args == NULL) {
5819 Py_DECREF(contents);
5820 Py_DECREF(reduce_value);
5821 return NULL;
5822 }
5823 PyTuple_SET_ITEM(constructor_args, 0, contents);
5824 Py_INCREF((PyObject *)&PyDict_Type);
5825 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
5826 PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
5827 return reduce_value;
5828}
5829
5830static PyMethodDef unpicklerproxy_methods[] = {
5831 {"clear", (PyCFunction)ump_clear, METH_NOARGS, ump_clear_doc},
5832 {"copy", (PyCFunction)ump_copy, METH_NOARGS, ump_copy_doc},
5833 {"__reduce__", (PyCFunction)ump_reduce, METH_VARARGS, ump_reduce_doc},
5834 {NULL, NULL} /* sentinel */
5835};
5836
5837static void
5838UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
5839{
5840 PyObject_GC_UnTrack(self);
5841 Py_XDECREF(self->unpickler);
5842 PyObject_GC_Del((PyObject *)self);
5843}
5844
5845static int
5846UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
5847 visitproc visit, void *arg)
5848{
5849 Py_VISIT(self->unpickler);
5850 return 0;
5851}
5852
5853static int
5854UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
5855{
5856 Py_CLEAR(self->unpickler);
5857 return 0;
5858}
5859
5860static PyTypeObject UnpicklerMemoProxyType = {
5861 PyVarObject_HEAD_INIT(NULL, 0)
5862 "_pickle.UnpicklerMemoProxy", /*tp_name*/
5863 sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/
5864 0,
5865 (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */
5866 0, /* tp_print */
5867 0, /* tp_getattr */
5868 0, /* tp_setattr */
5869 0, /* tp_compare */
5870 0, /* tp_repr */
5871 0, /* tp_as_number */
5872 0, /* tp_as_sequence */
5873 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00005874 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005875 0, /* tp_call */
5876 0, /* tp_str */
5877 PyObject_GenericGetAttr, /* tp_getattro */
5878 PyObject_GenericSetAttr, /* tp_setattro */
5879 0, /* tp_as_buffer */
5880 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5881 0, /* tp_doc */
5882 (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */
5883 (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */
5884 0, /* tp_richcompare */
5885 0, /* tp_weaklistoffset */
5886 0, /* tp_iter */
5887 0, /* tp_iternext */
5888 unpicklerproxy_methods, /* tp_methods */
5889};
5890
5891static PyObject *
5892UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
5893{
5894 UnpicklerMemoProxyObject *self;
5895
5896 self = PyObject_GC_New(UnpicklerMemoProxyObject,
5897 &UnpicklerMemoProxyType);
5898 if (self == NULL)
5899 return NULL;
5900 Py_INCREF(unpickler);
5901 self->unpickler = unpickler;
5902 PyObject_GC_Track(self);
5903 return (PyObject *)self;
5904}
5905
5906/*****************************************************************************/
5907
5908
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005909static PyObject *
5910Unpickler_get_memo(UnpicklerObject *self)
5911{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005912 return UnpicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005913}
5914
5915static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005916Unpickler_set_memo(UnpicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005917{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005918 PyObject **new_memo;
5919 Py_ssize_t new_memo_size = 0;
5920 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005921
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005922 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005923 PyErr_SetString(PyExc_TypeError,
5924 "attribute deletion is not supported");
5925 return -1;
5926 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005927
5928 if (Py_TYPE(obj) == &UnpicklerMemoProxyType) {
5929 UnpicklerObject *unpickler =
5930 ((UnpicklerMemoProxyObject *)obj)->unpickler;
5931
5932 new_memo_size = unpickler->memo_size;
5933 new_memo = _Unpickler_NewMemo(new_memo_size);
5934 if (new_memo == NULL)
5935 return -1;
5936
5937 for (i = 0; i < new_memo_size; i++) {
5938 Py_XINCREF(unpickler->memo[i]);
5939 new_memo[i] = unpickler->memo[i];
5940 }
5941 }
5942 else if (PyDict_Check(obj)) {
5943 Py_ssize_t i = 0;
5944 PyObject *key, *value;
5945
5946 new_memo_size = PyDict_Size(obj);
5947 new_memo = _Unpickler_NewMemo(new_memo_size);
5948 if (new_memo == NULL)
5949 return -1;
5950
5951 while (PyDict_Next(obj, &i, &key, &value)) {
5952 Py_ssize_t idx;
5953 if (!PyLong_Check(key)) {
5954 PyErr_SetString(PyExc_TypeError,
5955 "memo key must be integers");
5956 goto error;
5957 }
5958 idx = PyLong_AsSsize_t(key);
5959 if (idx == -1 && PyErr_Occurred())
5960 goto error;
Christian Heimesa24b4d22013-07-01 15:17:45 +02005961 if (idx < 0) {
5962 PyErr_SetString(PyExc_ValueError,
Christian Heimes80878792013-07-01 15:23:39 +02005963 "memo key must be positive integers.");
Christian Heimesa24b4d22013-07-01 15:17:45 +02005964 goto error;
5965 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005966 if (_Unpickler_MemoPut(self, idx, value) < 0)
5967 goto error;
5968 }
5969 }
5970 else {
5971 PyErr_Format(PyExc_TypeError,
5972 "'memo' attribute must be an UnpicklerMemoProxy object"
5973 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005974 return -1;
5975 }
5976
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005977 _Unpickler_MemoCleanup(self);
5978 self->memo_size = new_memo_size;
5979 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005980
5981 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005982
5983 error:
5984 if (new_memo_size) {
5985 i = new_memo_size;
5986 while (--i >= 0) {
5987 Py_XDECREF(new_memo[i]);
5988 }
5989 PyMem_FREE(new_memo);
5990 }
5991 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005992}
5993
5994static PyObject *
5995Unpickler_get_persload(UnpicklerObject *self)
5996{
5997 if (self->pers_func == NULL)
5998 PyErr_SetString(PyExc_AttributeError, "persistent_load");
5999 else
6000 Py_INCREF(self->pers_func);
6001 return self->pers_func;
6002}
6003
6004static int
6005Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
6006{
6007 PyObject *tmp;
6008
6009 if (value == NULL) {
6010 PyErr_SetString(PyExc_TypeError,
6011 "attribute deletion is not supported");
6012 return -1;
6013 }
6014 if (!PyCallable_Check(value)) {
6015 PyErr_SetString(PyExc_TypeError,
6016 "persistent_load must be a callable taking "
6017 "one argument");
6018 return -1;
6019 }
6020
6021 tmp = self->pers_func;
6022 Py_INCREF(value);
6023 self->pers_func = value;
6024 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
6025
6026 return 0;
6027}
6028
6029static PyGetSetDef Unpickler_getsets[] = {
6030 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
6031 {"persistent_load", (getter)Unpickler_get_persload,
6032 (setter)Unpickler_set_persload},
6033 {NULL}
6034};
6035
6036static PyTypeObject Unpickler_Type = {
6037 PyVarObject_HEAD_INIT(NULL, 0)
6038 "_pickle.Unpickler", /*tp_name*/
6039 sizeof(UnpicklerObject), /*tp_basicsize*/
6040 0, /*tp_itemsize*/
6041 (destructor)Unpickler_dealloc, /*tp_dealloc*/
6042 0, /*tp_print*/
6043 0, /*tp_getattr*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006044 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00006045 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006046 0, /*tp_repr*/
6047 0, /*tp_as_number*/
6048 0, /*tp_as_sequence*/
6049 0, /*tp_as_mapping*/
6050 0, /*tp_hash*/
6051 0, /*tp_call*/
6052 0, /*tp_str*/
6053 0, /*tp_getattro*/
6054 0, /*tp_setattro*/
6055 0, /*tp_as_buffer*/
6056 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
6057 Unpickler_doc, /*tp_doc*/
6058 (traverseproc)Unpickler_traverse, /*tp_traverse*/
6059 (inquiry)Unpickler_clear, /*tp_clear*/
6060 0, /*tp_richcompare*/
6061 0, /*tp_weaklistoffset*/
6062 0, /*tp_iter*/
6063 0, /*tp_iternext*/
6064 Unpickler_methods, /*tp_methods*/
6065 0, /*tp_members*/
6066 Unpickler_getsets, /*tp_getset*/
6067 0, /*tp_base*/
6068 0, /*tp_dict*/
6069 0, /*tp_descr_get*/
6070 0, /*tp_descr_set*/
6071 0, /*tp_dictoffset*/
6072 (initproc)Unpickler_init, /*tp_init*/
6073 PyType_GenericAlloc, /*tp_alloc*/
6074 PyType_GenericNew, /*tp_new*/
6075 PyObject_GC_Del, /*tp_free*/
6076 0, /*tp_is_gc*/
6077};
6078
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006079PyDoc_STRVAR(pickle_dump_doc,
6080"dump(obj, file, protocol=None, *, fix_imports=True) -> None\n"
6081"\n"
6082"Write a pickled representation of obj to the open file object file. This\n"
6083"is equivalent to ``Pickler(file, protocol).dump(obj)``, but may be more\n"
6084"efficient.\n"
6085"\n"
6086"The optional protocol argument tells the pickler to use the given protocol;\n"
6087"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
6088"backward-incompatible protocol designed for Python 3.0.\n"
6089"\n"
6090"Specifying a negative protocol version selects the highest protocol version\n"
6091"supported. The higher the protocol used, the more recent the version of\n"
6092"Python needed to read the pickle produced.\n"
6093"\n"
6094"The file argument must have a write() method that accepts a single bytes\n"
6095"argument. It can thus be a file object opened for binary writing, a\n"
6096"io.BytesIO instance, or any other custom object that meets this interface.\n"
6097"\n"
6098"If fix_imports is True and protocol is less than 3, pickle will try to\n"
6099"map the new Python 3.x names to the old module names used in Python 2.x,\n"
6100"so that the pickle data stream is readable with Python 2.x.\n");
6101
6102static PyObject *
6103pickle_dump(PyObject *self, PyObject *args, PyObject *kwds)
6104{
6105 static char *kwlist[] = {"obj", "file", "protocol", "fix_imports", 0};
6106 PyObject *obj;
6107 PyObject *file;
6108 PyObject *proto = NULL;
6109 PyObject *fix_imports = Py_True;
6110 PicklerObject *pickler;
6111
6112 /* fix_imports is a keyword-only argument. */
6113 if (Py_SIZE(args) > 3) {
6114 PyErr_Format(PyExc_TypeError,
6115 "pickle.dump() takes at most 3 positional "
6116 "argument (%zd given)", Py_SIZE(args));
6117 return NULL;
6118 }
6119
6120 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|OO:dump", kwlist,
6121 &obj, &file, &proto, &fix_imports))
6122 return NULL;
6123
6124 pickler = _Pickler_New();
6125 if (pickler == NULL)
6126 return NULL;
6127
6128 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
6129 goto error;
6130
6131 if (_Pickler_SetOutputStream(pickler, file) < 0)
6132 goto error;
6133
6134 if (dump(pickler, obj) < 0)
6135 goto error;
6136
6137 if (_Pickler_FlushToFile(pickler) < 0)
6138 goto error;
6139
6140 Py_DECREF(pickler);
6141 Py_RETURN_NONE;
6142
6143 error:
6144 Py_XDECREF(pickler);
6145 return NULL;
6146}
6147
6148PyDoc_STRVAR(pickle_dumps_doc,
6149"dumps(obj, protocol=None, *, fix_imports=True) -> bytes\n"
6150"\n"
6151"Return the pickled representation of the object as a bytes\n"
6152"object, instead of writing it to a file.\n"
6153"\n"
6154"The optional protocol argument tells the pickler to use the given protocol;\n"
6155"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
6156"backward-incompatible protocol designed for Python 3.0.\n"
6157"\n"
6158"Specifying a negative protocol version selects the highest protocol version\n"
6159"supported. The higher the protocol used, the more recent the version of\n"
6160"Python needed to read the pickle produced.\n"
6161"\n"
6162"If fix_imports is True and *protocol* is less than 3, pickle will try to\n"
6163"map the new Python 3.x names to the old module names used in Python 2.x,\n"
6164"so that the pickle data stream is readable with Python 2.x.\n");
6165
6166static PyObject *
6167pickle_dumps(PyObject *self, PyObject *args, PyObject *kwds)
6168{
6169 static char *kwlist[] = {"obj", "protocol", "fix_imports", 0};
6170 PyObject *obj;
6171 PyObject *proto = NULL;
6172 PyObject *result;
6173 PyObject *fix_imports = Py_True;
6174 PicklerObject *pickler;
6175
6176 /* fix_imports is a keyword-only argument. */
6177 if (Py_SIZE(args) > 2) {
6178 PyErr_Format(PyExc_TypeError,
6179 "pickle.dumps() takes at most 2 positional "
6180 "argument (%zd given)", Py_SIZE(args));
6181 return NULL;
6182 }
6183
6184 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:dumps", kwlist,
6185 &obj, &proto, &fix_imports))
6186 return NULL;
6187
6188 pickler = _Pickler_New();
6189 if (pickler == NULL)
6190 return NULL;
6191
6192 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
6193 goto error;
6194
6195 if (dump(pickler, obj) < 0)
6196 goto error;
6197
6198 result = _Pickler_GetString(pickler);
6199 Py_DECREF(pickler);
6200 return result;
6201
6202 error:
6203 Py_XDECREF(pickler);
6204 return NULL;
6205}
6206
6207PyDoc_STRVAR(pickle_load_doc,
6208"load(file, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6209"\n"
6210"Read a pickled object representation from the open file object file and\n"
6211"return the reconstituted object hierarchy specified therein. This is\n"
6212"equivalent to ``Unpickler(file).load()``, but may be more efficient.\n"
6213"\n"
6214"The protocol version of the pickle is detected automatically, so no protocol\n"
6215"argument is needed. Bytes past the pickled object's representation are\n"
6216"ignored.\n"
6217"\n"
6218"The argument file must have two methods, a read() method that takes an\n"
6219"integer argument, and a readline() method that requires no arguments. Both\n"
6220"methods should return bytes. Thus *file* can be a binary file object opened\n"
6221"for reading, a BytesIO object, or any other custom object that meets this\n"
6222"interface.\n"
6223"\n"
6224"Optional keyword arguments are fix_imports, encoding and errors,\n"
6225"which are used to control compatiblity support for pickle stream generated\n"
6226"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6227"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6228"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6229"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6230
6231static PyObject *
6232pickle_load(PyObject *self, PyObject *args, PyObject *kwds)
6233{
6234 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
6235 PyObject *file;
6236 PyObject *fix_imports = Py_True;
6237 PyObject *result;
6238 char *encoding = NULL;
6239 char *errors = NULL;
6240 UnpicklerObject *unpickler;
6241
6242 /* fix_imports, encoding and errors are a keyword-only argument. */
6243 if (Py_SIZE(args) != 1) {
6244 PyErr_Format(PyExc_TypeError,
6245 "pickle.load() takes exactly one positional "
6246 "argument (%zd given)", Py_SIZE(args));
6247 return NULL;
6248 }
6249
6250 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:load", kwlist,
6251 &file, &fix_imports, &encoding, &errors))
6252 return NULL;
6253
6254 unpickler = _Unpickler_New();
6255 if (unpickler == NULL)
6256 return NULL;
6257
6258 if (_Unpickler_SetInputStream(unpickler, file) < 0)
6259 goto error;
6260
6261 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6262 goto error;
6263
6264 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6265 if (unpickler->fix_imports == -1)
6266 goto error;
6267
6268 result = load(unpickler);
6269 Py_DECREF(unpickler);
6270 return result;
6271
6272 error:
6273 Py_XDECREF(unpickler);
6274 return NULL;
6275}
6276
6277PyDoc_STRVAR(pickle_loads_doc,
6278"loads(input, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6279"\n"
6280"Read a pickled object hierarchy from a bytes object and return the\n"
6281"reconstituted object hierarchy specified therein\n"
6282"\n"
6283"The protocol version of the pickle is detected automatically, so no protocol\n"
6284"argument is needed. Bytes past the pickled object's representation are\n"
6285"ignored.\n"
6286"\n"
6287"Optional keyword arguments are fix_imports, encoding and errors, which\n"
6288"are used to control compatiblity support for pickle stream generated\n"
6289"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6290"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6291"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6292"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6293
6294static PyObject *
6295pickle_loads(PyObject *self, PyObject *args, PyObject *kwds)
6296{
6297 static char *kwlist[] = {"input", "fix_imports", "encoding", "errors", 0};
6298 PyObject *input;
6299 PyObject *fix_imports = Py_True;
6300 PyObject *result;
6301 char *encoding = NULL;
6302 char *errors = NULL;
6303 UnpicklerObject *unpickler;
6304
6305 /* fix_imports, encoding and errors are a keyword-only argument. */
6306 if (Py_SIZE(args) != 1) {
6307 PyErr_Format(PyExc_TypeError,
6308 "pickle.loads() takes exactly one positional "
6309 "argument (%zd given)", Py_SIZE(args));
6310 return NULL;
6311 }
6312
6313 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:loads", kwlist,
6314 &input, &fix_imports, &encoding, &errors))
6315 return NULL;
6316
6317 unpickler = _Unpickler_New();
6318 if (unpickler == NULL)
6319 return NULL;
6320
6321 if (_Unpickler_SetStringInput(unpickler, input) < 0)
6322 goto error;
6323
6324 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6325 goto error;
6326
6327 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6328 if (unpickler->fix_imports == -1)
6329 goto error;
6330
6331 result = load(unpickler);
6332 Py_DECREF(unpickler);
6333 return result;
6334
6335 error:
6336 Py_XDECREF(unpickler);
6337 return NULL;
6338}
6339
6340
6341static struct PyMethodDef pickle_methods[] = {
6342 {"dump", (PyCFunction)pickle_dump, METH_VARARGS|METH_KEYWORDS,
6343 pickle_dump_doc},
6344 {"dumps", (PyCFunction)pickle_dumps, METH_VARARGS|METH_KEYWORDS,
6345 pickle_dumps_doc},
6346 {"load", (PyCFunction)pickle_load, METH_VARARGS|METH_KEYWORDS,
6347 pickle_load_doc},
6348 {"loads", (PyCFunction)pickle_loads, METH_VARARGS|METH_KEYWORDS,
6349 pickle_loads_doc},
6350 {NULL, NULL} /* sentinel */
6351};
6352
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006353static int
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006354initmodule(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006355{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006356 PyObject *copyreg = NULL;
6357 PyObject *compat_pickle = NULL;
6358
6359 /* XXX: We should ensure that the types of the dictionaries imported are
6360 exactly PyDict objects. Otherwise, it is possible to crash the pickle
6361 since we use the PyDict API directly to access these dictionaries. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006362
6363 copyreg = PyImport_ImportModule("copyreg");
6364 if (!copyreg)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006365 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006366 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
6367 if (!dispatch_table)
6368 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006369 extension_registry = \
6370 PyObject_GetAttrString(copyreg, "_extension_registry");
6371 if (!extension_registry)
6372 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006373 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
6374 if (!inverted_registry)
6375 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006376 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
6377 if (!extension_cache)
6378 goto error;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006379 Py_CLEAR(copyreg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006380
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006381 /* Load the 2.x -> 3.x stdlib module mapping tables */
6382 compat_pickle = PyImport_ImportModule("_compat_pickle");
6383 if (!compat_pickle)
6384 goto error;
6385 name_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
6386 if (!name_mapping_2to3)
6387 goto error;
6388 if (!PyDict_CheckExact(name_mapping_2to3)) {
6389 PyErr_Format(PyExc_RuntimeError,
6390 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
6391 Py_TYPE(name_mapping_2to3)->tp_name);
6392 goto error;
6393 }
6394 import_mapping_2to3 = PyObject_GetAttrString(compat_pickle,
6395 "IMPORT_MAPPING");
6396 if (!import_mapping_2to3)
6397 goto error;
6398 if (!PyDict_CheckExact(import_mapping_2to3)) {
6399 PyErr_Format(PyExc_RuntimeError,
6400 "_compat_pickle.IMPORT_MAPPING should be a dict, "
6401 "not %.200s", Py_TYPE(import_mapping_2to3)->tp_name);
6402 goto error;
6403 }
6404 /* ... and the 3.x -> 2.x mapping tables */
6405 name_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6406 "REVERSE_NAME_MAPPING");
6407 if (!name_mapping_3to2)
6408 goto error;
6409 if (!PyDict_CheckExact(name_mapping_3to2)) {
6410 PyErr_Format(PyExc_RuntimeError,
Ezio Melotti13925002011-03-16 11:05:33 +02006411 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006412 "not %.200s", Py_TYPE(name_mapping_3to2)->tp_name);
6413 goto error;
6414 }
6415 import_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6416 "REVERSE_IMPORT_MAPPING");
6417 if (!import_mapping_3to2)
6418 goto error;
6419 if (!PyDict_CheckExact(import_mapping_3to2)) {
6420 PyErr_Format(PyExc_RuntimeError,
6421 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
6422 "not %.200s", Py_TYPE(import_mapping_3to2)->tp_name);
6423 goto error;
6424 }
6425 Py_CLEAR(compat_pickle);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006426
6427 empty_tuple = PyTuple_New(0);
6428 if (empty_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006429 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006430 two_tuple = PyTuple_New(2);
6431 if (two_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006432 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006433 /* We use this temp container with no regard to refcounts, or to
6434 * keeping containees alive. Exempt from GC, because we don't
6435 * want anything looking at two_tuple() by magic.
6436 */
6437 PyObject_GC_UnTrack(two_tuple);
6438
6439 return 0;
6440
6441 error:
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006442 Py_CLEAR(copyreg);
6443 Py_CLEAR(dispatch_table);
6444 Py_CLEAR(extension_registry);
6445 Py_CLEAR(inverted_registry);
6446 Py_CLEAR(extension_cache);
6447 Py_CLEAR(compat_pickle);
6448 Py_CLEAR(name_mapping_2to3);
6449 Py_CLEAR(import_mapping_2to3);
6450 Py_CLEAR(name_mapping_3to2);
6451 Py_CLEAR(import_mapping_3to2);
6452 Py_CLEAR(empty_tuple);
6453 Py_CLEAR(two_tuple);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006454 return -1;
6455}
6456
6457static struct PyModuleDef _picklemodule = {
6458 PyModuleDef_HEAD_INIT,
6459 "_pickle",
6460 pickle_module_doc,
6461 -1,
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006462 pickle_methods,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006463 NULL,
6464 NULL,
6465 NULL,
6466 NULL
6467};
6468
6469PyMODINIT_FUNC
6470PyInit__pickle(void)
6471{
6472 PyObject *m;
6473
6474 if (PyType_Ready(&Unpickler_Type) < 0)
6475 return NULL;
6476 if (PyType_Ready(&Pickler_Type) < 0)
6477 return NULL;
6478 if (PyType_Ready(&Pdata_Type) < 0)
6479 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006480 if (PyType_Ready(&PicklerMemoProxyType) < 0)
6481 return NULL;
6482 if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
6483 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006484
6485 /* Create the module and add the functions. */
6486 m = PyModule_Create(&_picklemodule);
6487 if (m == NULL)
6488 return NULL;
6489
Antoine Pitrou8391cf42011-07-15 21:01:21 +02006490 Py_INCREF(&Pickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006491 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
6492 return NULL;
Antoine Pitrou8391cf42011-07-15 21:01:21 +02006493 Py_INCREF(&Unpickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006494 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
6495 return NULL;
6496
6497 /* Initialize the exceptions. */
6498 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
6499 if (PickleError == NULL)
6500 return NULL;
6501 PicklingError = \
6502 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
6503 if (PicklingError == NULL)
6504 return NULL;
6505 UnpicklingError = \
6506 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
6507 if (UnpicklingError == NULL)
6508 return NULL;
6509
6510 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
6511 return NULL;
6512 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
6513 return NULL;
6514 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
6515 return NULL;
6516
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006517 if (initmodule() < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006518 return NULL;
6519
6520 return m;
6521}