blob: ec8bd6c3bae878e8131bbf5c80333b7e1e6395e5 [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000013/* Pickle opcodes. These must be kept updated with pickle.py.
14 Extensive docs are in pickletools.py. */
15enum opcode {
16 MARK = '(',
17 STOP = '.',
18 POP = '0',
19 POP_MARK = '1',
20 DUP = '2',
21 FLOAT = 'F',
22 INT = 'I',
23 BININT = 'J',
24 BININT1 = 'K',
25 LONG = 'L',
26 BININT2 = 'M',
27 NONE = 'N',
28 PERSID = 'P',
29 BINPERSID = 'Q',
30 REDUCE = 'R',
31 STRING = 'S',
32 BINSTRING = 'T',
33 SHORT_BINSTRING = 'U',
34 UNICODE = 'V',
35 BINUNICODE = 'X',
36 APPEND = 'a',
37 BUILD = 'b',
38 GLOBAL = 'c',
39 DICT = 'd',
40 EMPTY_DICT = '}',
41 APPENDS = 'e',
42 GET = 'g',
43 BINGET = 'h',
44 INST = 'i',
45 LONG_BINGET = 'j',
46 LIST = 'l',
47 EMPTY_LIST = ']',
48 OBJ = 'o',
49 PUT = 'p',
50 BINPUT = 'q',
51 LONG_BINPUT = 'r',
52 SETITEM = 's',
53 TUPLE = 't',
54 EMPTY_TUPLE = ')',
55 SETITEMS = 'u',
56 BINFLOAT = 'G',
57
58 /* Protocol 2. */
59 PROTO = '\x80',
60 NEWOBJ = '\x81',
61 EXT1 = '\x82',
62 EXT2 = '\x83',
63 EXT4 = '\x84',
64 TUPLE1 = '\x85',
65 TUPLE2 = '\x86',
66 TUPLE3 = '\x87',
67 NEWTRUE = '\x88',
68 NEWFALSE = '\x89',
69 LONG1 = '\x8a',
70 LONG4 = '\x8b',
71
72 /* Protocol 3 (Python 3.x) */
73 BINBYTES = 'B',
Victor Stinner132ef6c2010-11-09 09:39:41 +000074 SHORT_BINBYTES = 'C'
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000075};
76
77/* These aren't opcodes -- they're ways to pickle bools before protocol 2
78 * so that unpicklers written before bools were introduced unpickle them
79 * as ints, but unpicklers after can recognize that bools were intended.
80 * Note that protocol 2 added direct ways to pickle bools.
81 */
82#undef TRUE
83#define TRUE "I01\n"
84#undef FALSE
85#define FALSE "I00\n"
86
87enum {
88 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
89 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
90 break if this gets out of synch with pickle.py, but it's unclear that would
91 help anything either. */
92 BATCHSIZE = 1000,
93
94 /* Nesting limit until Pickler, when running in "fast mode", starts
95 checking for self-referential data-structures. */
96 FAST_NESTING_LIMIT = 50,
97
Antoine Pitrouea99c5c2010-09-09 18:33:21 +000098 /* Initial size of the write buffer of Pickler. */
99 WRITE_BUF_SIZE = 4096,
100
101 /* Maximum size of the write buffer of Pickler when pickling to a
102 stream. This is ignored for in-memory pickling. */
103 MAX_WRITE_BUF_SIZE = 64 * 1024,
Antoine Pitrou04248a82010-10-12 20:51:21 +0000104
105 /* Prefetch size when unpickling (disabled on unpeekable streams) */
Victor Stinner132ef6c2010-11-09 09:39:41 +0000106 PREFETCH = 8192 * 16
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000107};
108
109/* Exception classes for pickle. These should override the ones defined in
110 pickle.py, when the C-optimized Pickler and Unpickler are used. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000111static PyObject *PickleError = NULL;
112static PyObject *PicklingError = NULL;
113static PyObject *UnpicklingError = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000114
115/* copyreg.dispatch_table, {type_object: pickling_function} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000116static PyObject *dispatch_table = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000117/* For EXT[124] opcodes. */
118/* copyreg._extension_registry, {(module_name, function_name): code} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000119static PyObject *extension_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000120/* copyreg._inverted_registry, {code: (module_name, function_name)} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000121static PyObject *inverted_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000122/* copyreg._extension_cache, {code: object} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000123static PyObject *extension_cache = NULL;
124
125/* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */
126static PyObject *name_mapping_2to3 = NULL;
127/* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
128static PyObject *import_mapping_2to3 = NULL;
129/* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
130static PyObject *name_mapping_3to2 = NULL;
131static PyObject *import_mapping_3to2 = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000132
133/* XXX: Are these really nescessary? */
134/* As the name says, an empty tuple. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000135static PyObject *empty_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000136/* For looking up name pairs in copyreg._extension_registry. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000137static PyObject *two_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000138
139static int
140stack_underflow(void)
141{
142 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
143 return -1;
144}
145
146/* Internal data type used as the unpickling stack. */
147typedef struct {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000148 PyObject_VAR_HEAD
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000149 PyObject **data;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000150 Py_ssize_t allocated; /* number of slots in data allocated */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000151} Pdata;
152
153static void
154Pdata_dealloc(Pdata *self)
155{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200156 Py_ssize_t i = Py_SIZE(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000157 while (--i >= 0) {
158 Py_DECREF(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000159 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000160 PyMem_FREE(self->data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000161 PyObject_Del(self);
162}
163
164static PyTypeObject Pdata_Type = {
165 PyVarObject_HEAD_INIT(NULL, 0)
166 "_pickle.Pdata", /*tp_name*/
167 sizeof(Pdata), /*tp_basicsize*/
168 0, /*tp_itemsize*/
169 (destructor)Pdata_dealloc, /*tp_dealloc*/
170};
171
172static PyObject *
173Pdata_New(void)
174{
175 Pdata *self;
176
177 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
178 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000179 Py_SIZE(self) = 0;
180 self->allocated = 8;
181 self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000182 if (self->data)
183 return (PyObject *)self;
184 Py_DECREF(self);
185 return PyErr_NoMemory();
186}
187
188
189/* Retain only the initial clearto items. If clearto >= the current
190 * number of items, this is a (non-erroneous) NOP.
191 */
192static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200193Pdata_clear(Pdata *self, Py_ssize_t clearto)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000194{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200195 Py_ssize_t i = Py_SIZE(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000196
197 if (clearto < 0)
198 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000199 if (clearto >= i)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000200 return 0;
201
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000202 while (--i >= clearto) {
203 Py_CLEAR(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000204 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000205 Py_SIZE(self) = clearto;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000206 return 0;
207}
208
209static int
210Pdata_grow(Pdata *self)
211{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000212 PyObject **data = self->data;
213 Py_ssize_t allocated = self->allocated;
214 Py_ssize_t new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000215
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000216 new_allocated = (allocated >> 3) + 6;
217 /* check for integer overflow */
218 if (new_allocated > PY_SSIZE_T_MAX - allocated)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000219 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000220 new_allocated += allocated;
Benjamin Peterson59b08c12015-06-27 13:41:33 -0500221 PyMem_RESIZE(data, PyObject *, new_allocated);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000222 if (data == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000223 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000224
225 self->data = data;
226 self->allocated = new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000227 return 0;
228
229 nomemory:
230 PyErr_NoMemory();
231 return -1;
232}
233
234/* D is a Pdata*. Pop the topmost element and store it into V, which
235 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
236 * is raised and V is set to NULL.
237 */
238static PyObject *
239Pdata_pop(Pdata *self)
240{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000241 if (Py_SIZE(self) == 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000242 PyErr_SetString(UnpicklingError, "bad pickle data");
243 return NULL;
244 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000245 return self->data[--Py_SIZE(self)];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000246}
247#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
248
249static int
250Pdata_push(Pdata *self, PyObject *obj)
251{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000252 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000253 return -1;
254 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000255 self->data[Py_SIZE(self)++] = obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000256 return 0;
257}
258
259/* Push an object on stack, transferring its ownership to the stack. */
260#define PDATA_PUSH(D, O, ER) do { \
261 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
262
263/* Push an object on stack, adding a new reference to the object. */
264#define PDATA_APPEND(D, O, ER) do { \
265 Py_INCREF((O)); \
266 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
267
268static PyObject *
269Pdata_poptuple(Pdata *self, Py_ssize_t start)
270{
271 PyObject *tuple;
272 Py_ssize_t len, i, j;
273
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000274 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000275 tuple = PyTuple_New(len);
276 if (tuple == NULL)
277 return NULL;
278 for (i = start, j = 0; j < len; i++, j++)
279 PyTuple_SET_ITEM(tuple, j, self->data[i]);
280
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000281 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000282 return tuple;
283}
284
285static PyObject *
286Pdata_poplist(Pdata *self, Py_ssize_t start)
287{
288 PyObject *list;
289 Py_ssize_t len, i, j;
290
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000291 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000292 list = PyList_New(len);
293 if (list == NULL)
294 return NULL;
295 for (i = start, j = 0; j < len; i++, j++)
296 PyList_SET_ITEM(list, j, self->data[i]);
297
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000298 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000299 return list;
300}
301
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000302typedef struct {
303 PyObject *me_key;
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200304 Py_ssize_t me_value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000305} PyMemoEntry;
306
307typedef struct {
308 Py_ssize_t mt_mask;
309 Py_ssize_t mt_used;
310 Py_ssize_t mt_allocated;
311 PyMemoEntry *mt_table;
312} PyMemoTable;
313
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000314typedef struct PicklerObject {
315 PyObject_HEAD
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000316 PyMemoTable *memo; /* Memo table, keep track of the seen
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000317 objects to support self-referential objects
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000318 pickling. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000319 PyObject *pers_func; /* persistent_id() method, can be NULL */
Antoine Pitrou8d3c2902012-03-04 18:31:48 +0100320 PyObject *dispatch_table; /* private dispatch_table, can be NULL */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000321 PyObject *arg;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000322
323 PyObject *write; /* write() method of the output stream. */
324 PyObject *output_buffer; /* Write into a local bytearray buffer before
325 flushing to the stream. */
326 Py_ssize_t output_len; /* Length of output_buffer. */
327 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000328 int proto; /* Pickle protocol number, >= 0 */
329 int bin; /* Boolean, true if proto > 0 */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200330 Py_ssize_t buf_size; /* Size of the current buffered pickle data */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000331 int fast; /* Enable fast mode if set to a true value.
332 The fast mode disable the usage of memo,
333 therefore speeding the pickling process by
334 not generating superfluous PUT opcodes. It
335 should not be used if with self-referential
336 objects. */
337 int fast_nesting;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000338 int fix_imports; /* Indicate whether Pickler should fix
339 the name of globals for Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000340 PyObject *fast_memo;
341} PicklerObject;
342
343typedef struct UnpicklerObject {
344 PyObject_HEAD
345 Pdata *stack; /* Pickle data stack, store unpickled objects. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000346
347 /* The unpickler memo is just an array of PyObject *s. Using a dict
348 is unnecessary, since the keys are contiguous ints. */
349 PyObject **memo;
350 Py_ssize_t memo_size;
351
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000352 PyObject *arg;
353 PyObject *pers_func; /* persistent_load() method, can be NULL. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000354
355 Py_buffer buffer;
356 char *input_buffer;
357 char *input_line;
358 Py_ssize_t input_len;
359 Py_ssize_t next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000360 Py_ssize_t prefetched_idx; /* index of first prefetched byte */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000361 PyObject *read; /* read() method of the input stream. */
362 PyObject *readline; /* readline() method of the input stream. */
Antoine Pitrou04248a82010-10-12 20:51:21 +0000363 PyObject *peek; /* peek() method of the input stream, or NULL */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000364
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000365 char *encoding; /* Name of the encoding to be used for
366 decoding strings pickled using Python
367 2.x. The default value is "ASCII" */
368 char *errors; /* Name of errors handling scheme to used when
369 decoding strings. The default value is
370 "strict". */
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -0500371 Py_ssize_t *marks; /* Mark stack, used for unpickling container
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000372 objects. */
373 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
374 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000375 int proto; /* Protocol of the pickle loaded. */
376 int fix_imports; /* Indicate whether Unpickler should fix
377 the name of globals pickled by Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000378} UnpicklerObject;
379
380/* Forward declarations */
381static int save(PicklerObject *, PyObject *, int);
382static int save_reduce(PicklerObject *, PyObject *, PyObject *);
383static PyTypeObject Pickler_Type;
384static PyTypeObject Unpickler_Type;
385
386
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000387/*************************************************************************
Serhiy Storchaka95949422013-08-27 19:40:23 +0300388 A custom hashtable mapping void* to Python ints. This is used by the pickler
389 for memoization. Using a custom hashtable rather than PyDict allows us to skip
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000390 a bunch of unnecessary object creation. This makes a huge performance
391 difference. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000392
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000393#define MT_MINSIZE 8
394#define PERTURB_SHIFT 5
395
396
397static PyMemoTable *
398PyMemoTable_New(void)
399{
400 PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
401 if (memo == NULL) {
402 PyErr_NoMemory();
403 return NULL;
404 }
405
406 memo->mt_used = 0;
407 memo->mt_allocated = MT_MINSIZE;
408 memo->mt_mask = MT_MINSIZE - 1;
409 memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
410 if (memo->mt_table == NULL) {
411 PyMem_FREE(memo);
412 PyErr_NoMemory();
413 return NULL;
414 }
415 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
416
417 return memo;
418}
419
420static PyMemoTable *
421PyMemoTable_Copy(PyMemoTable *self)
422{
423 Py_ssize_t i;
424 PyMemoTable *new = PyMemoTable_New();
425 if (new == NULL)
426 return NULL;
427
428 new->mt_used = self->mt_used;
429 new->mt_allocated = self->mt_allocated;
430 new->mt_mask = self->mt_mask;
431 /* The table we get from _New() is probably smaller than we wanted.
432 Free it and allocate one that's the right size. */
433 PyMem_FREE(new->mt_table);
Benjamin Peterson59b08c12015-06-27 13:41:33 -0500434 new->mt_table = PyMem_NEW(PyMemoEntry, self->mt_allocated);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000435 if (new->mt_table == NULL) {
436 PyMem_FREE(new);
437 return NULL;
438 }
439 for (i = 0; i < self->mt_allocated; i++) {
440 Py_XINCREF(self->mt_table[i].me_key);
441 }
442 memcpy(new->mt_table, self->mt_table,
443 sizeof(PyMemoEntry) * self->mt_allocated);
444
445 return new;
446}
447
448static Py_ssize_t
449PyMemoTable_Size(PyMemoTable *self)
450{
451 return self->mt_used;
452}
453
454static int
455PyMemoTable_Clear(PyMemoTable *self)
456{
457 Py_ssize_t i = self->mt_allocated;
458
459 while (--i >= 0) {
460 Py_XDECREF(self->mt_table[i].me_key);
461 }
462 self->mt_used = 0;
463 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
464 return 0;
465}
466
467static void
468PyMemoTable_Del(PyMemoTable *self)
469{
470 if (self == NULL)
471 return;
472 PyMemoTable_Clear(self);
473
474 PyMem_FREE(self->mt_table);
475 PyMem_FREE(self);
476}
477
478/* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
479 can be considerably simpler than dictobject.c's lookdict(). */
480static PyMemoEntry *
481_PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
482{
483 size_t i;
484 size_t perturb;
485 size_t mask = (size_t)self->mt_mask;
486 PyMemoEntry *table = self->mt_table;
487 PyMemoEntry *entry;
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000488 Py_hash_t hash = (Py_hash_t)key >> 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000489
490 i = hash & mask;
491 entry = &table[i];
492 if (entry->me_key == NULL || entry->me_key == key)
493 return entry;
494
495 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
496 i = (i << 2) + i + perturb + 1;
497 entry = &table[i & mask];
498 if (entry->me_key == NULL || entry->me_key == key)
499 return entry;
500 }
501 assert(0); /* Never reached */
502 return NULL;
503}
504
505/* Returns -1 on failure, 0 on success. */
506static int
507_PyMemoTable_ResizeTable(PyMemoTable *self, Py_ssize_t min_size)
508{
509 PyMemoEntry *oldtable = NULL;
510 PyMemoEntry *oldentry, *newentry;
511 Py_ssize_t new_size = MT_MINSIZE;
512 Py_ssize_t to_process;
513
514 assert(min_size > 0);
515
516 /* Find the smallest valid table size >= min_size. */
517 while (new_size < min_size && new_size > 0)
518 new_size <<= 1;
519 if (new_size <= 0) {
520 PyErr_NoMemory();
521 return -1;
522 }
523 /* new_size needs to be a power of two. */
524 assert((new_size & (new_size - 1)) == 0);
525
526 /* Allocate new table. */
527 oldtable = self->mt_table;
Benjamin Peterson59b08c12015-06-27 13:41:33 -0500528 self->mt_table = PyMem_NEW(PyMemoEntry, new_size);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000529 if (self->mt_table == NULL) {
530 PyMem_FREE(oldtable);
531 PyErr_NoMemory();
532 return -1;
533 }
534 self->mt_allocated = new_size;
535 self->mt_mask = new_size - 1;
536 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
537
538 /* Copy entries from the old table. */
539 to_process = self->mt_used;
540 for (oldentry = oldtable; to_process > 0; oldentry++) {
541 if (oldentry->me_key != NULL) {
542 to_process--;
543 /* newentry is a pointer to a chunk of the new
544 mt_table, so we're setting the key:value pair
545 in-place. */
546 newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
547 newentry->me_key = oldentry->me_key;
548 newentry->me_value = oldentry->me_value;
549 }
550 }
551
552 /* Deallocate the old table. */
553 PyMem_FREE(oldtable);
554 return 0;
555}
556
557/* Returns NULL on failure, a pointer to the value otherwise. */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200558static Py_ssize_t *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000559PyMemoTable_Get(PyMemoTable *self, PyObject *key)
560{
561 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
562 if (entry->me_key == NULL)
563 return NULL;
564 return &entry->me_value;
565}
566
567/* Returns -1 on failure, 0 on success. */
568static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200569PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000570{
571 PyMemoEntry *entry;
572
573 assert(key != NULL);
574
575 entry = _PyMemoTable_Lookup(self, key);
576 if (entry->me_key != NULL) {
577 entry->me_value = value;
578 return 0;
579 }
580 Py_INCREF(key);
581 entry->me_key = key;
582 entry->me_value = value;
583 self->mt_used++;
584
585 /* If we added a key, we can safely resize. Otherwise just return!
586 * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
587 *
588 * Quadrupling the size improves average table sparseness
589 * (reducing collisions) at the cost of some memory. It also halves
590 * the number of expensive resize operations in a growing memo table.
591 *
592 * Very large memo tables (over 50K items) use doubling instead.
593 * This may help applications with severe memory constraints.
594 */
595 if (!(self->mt_used * 3 >= (self->mt_mask + 1) * 2))
596 return 0;
597 return _PyMemoTable_ResizeTable(self,
598 (self->mt_used > 50000 ? 2 : 4) * self->mt_used);
599}
600
601#undef MT_MINSIZE
602#undef PERTURB_SHIFT
603
604/*************************************************************************/
605
606/* Helpers for creating the argument tuple passed to functions. This has the
Victor Stinner121aab42011-09-29 23:40:53 +0200607 performance advantage of calling PyTuple_New() only once.
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000608
609 XXX(avassalotti): Inline directly in _Pickler_FastCall() and
610 _Unpickler_FastCall(). */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000611#define ARG_TUP(self, obj) do { \
612 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
613 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
614 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
615 } \
616 else { \
617 Py_DECREF((obj)); \
618 } \
619 } while (0)
620
621#define FREE_ARG_TUP(self) do { \
622 if ((self)->arg->ob_refcnt > 1) \
623 Py_CLEAR((self)->arg); \
624 } while (0)
625
626/* A temporary cleaner API for fast single argument function call.
627
628 XXX: Does caching the argument tuple provides any real performance benefits?
629
630 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
631 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
632 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
633 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
634 (i.e, call PyTuple_New() and store the returned value in an array), to save
635 one second (wall clock time). Either ways, the loading time a pickle stream
636 large enough to generate this number of calls would be massively
637 overwhelmed by other factors, like I/O throughput, the GC traversal and
638 object allocation overhead. So, I really doubt these functions provide any
639 real benefits.
640
641 On the other hand, oprofile reports that pickle spends a lot of time in
642 these functions. But, that is probably more related to the function call
643 overhead, than the argument tuple allocation.
644
645 XXX: And, what is the reference behavior of these? Steal, borrow? At first
646 glance, it seems to steal the reference of 'arg' and borrow the reference
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000647 of 'func'. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000648static PyObject *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000649_Pickler_FastCall(PicklerObject *self, PyObject *func, PyObject *arg)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000650{
651 PyObject *result = NULL;
652
653 ARG_TUP(self, arg);
654 if (self->arg) {
655 result = PyObject_Call(func, self->arg, NULL);
656 FREE_ARG_TUP(self);
657 }
658 return result;
659}
660
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000661static int
662_Pickler_ClearBuffer(PicklerObject *self)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000663{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000664 Py_CLEAR(self->output_buffer);
665 self->output_buffer =
666 PyBytes_FromStringAndSize(NULL, self->max_output_len);
667 if (self->output_buffer == NULL)
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000668 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000669 self->output_len = 0;
670 return 0;
671}
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000672
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000673static PyObject *
674_Pickler_GetString(PicklerObject *self)
675{
676 PyObject *output_buffer = self->output_buffer;
677
678 assert(self->output_buffer != NULL);
679 self->output_buffer = NULL;
680 /* Resize down to exact size */
681 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
682 return NULL;
683 return output_buffer;
684}
685
686static int
687_Pickler_FlushToFile(PicklerObject *self)
688{
689 PyObject *output, *result;
690
691 assert(self->write != NULL);
692
693 output = _Pickler_GetString(self);
694 if (output == NULL)
695 return -1;
696
697 result = _Pickler_FastCall(self, self->write, output);
698 Py_XDECREF(result);
699 return (result == NULL) ? -1 : 0;
700}
701
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200702static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000703_Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t n)
704{
705 Py_ssize_t i, required;
706 char *buffer;
707
708 assert(s != NULL);
709
710 required = self->output_len + n;
711 if (required > self->max_output_len) {
712 if (self->write != NULL && required > MAX_WRITE_BUF_SIZE) {
713 /* XXX This reallocates a new buffer every time, which is a bit
714 wasteful. */
715 if (_Pickler_FlushToFile(self) < 0)
716 return -1;
717 if (_Pickler_ClearBuffer(self) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000718 return -1;
719 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000720 if (self->write != NULL && n > MAX_WRITE_BUF_SIZE) {
721 /* we already flushed above, so the buffer is empty */
722 PyObject *result;
723 /* XXX we could spare an intermediate copy and pass
724 a memoryview instead */
725 PyObject *output = PyBytes_FromStringAndSize(s, n);
726 if (s == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000727 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000728 result = _Pickler_FastCall(self, self->write, output);
729 Py_XDECREF(result);
730 return (result == NULL) ? -1 : 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000731 }
732 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000733 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
734 PyErr_NoMemory();
735 return -1;
736 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200737 self->max_output_len = (self->output_len + n) / 2 * 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000738 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
739 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000740 }
741 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000742 buffer = PyBytes_AS_STRING(self->output_buffer);
743 if (n < 8) {
744 /* This is faster than memcpy when the string is short. */
745 for (i = 0; i < n; i++) {
746 buffer[self->output_len + i] = s[i];
747 }
748 }
749 else {
750 memcpy(buffer + self->output_len, s, n);
751 }
752 self->output_len += n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000753 return n;
754}
755
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000756static PicklerObject *
757_Pickler_New(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000758{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000759 PicklerObject *self;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000760
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000761 self = PyObject_GC_New(PicklerObject, &Pickler_Type);
762 if (self == NULL)
763 return NULL;
764
765 self->pers_func = NULL;
Antoine Pitrou8d3c2902012-03-04 18:31:48 +0100766 self->dispatch_table = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000767 self->arg = NULL;
768 self->write = NULL;
769 self->proto = 0;
770 self->bin = 0;
771 self->fast = 0;
772 self->fast_nesting = 0;
773 self->fix_imports = 0;
774 self->fast_memo = NULL;
775
776 self->memo = PyMemoTable_New();
777 if (self->memo == NULL) {
778 Py_DECREF(self);
779 return NULL;
780 }
781 self->max_output_len = WRITE_BUF_SIZE;
782 self->output_len = 0;
783 self->output_buffer = PyBytes_FromStringAndSize(NULL,
784 self->max_output_len);
785 if (self->output_buffer == NULL) {
786 Py_DECREF(self);
787 return NULL;
788 }
789 return self;
790}
791
792static int
793_Pickler_SetProtocol(PicklerObject *self, PyObject *proto_obj,
794 PyObject *fix_imports_obj)
795{
796 long proto = 0;
797 int fix_imports;
798
799 if (proto_obj == NULL || proto_obj == Py_None)
800 proto = DEFAULT_PROTOCOL;
801 else {
802 proto = PyLong_AsLong(proto_obj);
803 if (proto == -1 && PyErr_Occurred())
804 return -1;
805 }
806 if (proto < 0)
807 proto = HIGHEST_PROTOCOL;
808 if (proto > HIGHEST_PROTOCOL) {
809 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
810 HIGHEST_PROTOCOL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000811 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000812 }
813 fix_imports = PyObject_IsTrue(fix_imports_obj);
814 if (fix_imports == -1)
815 return -1;
Victor Stinner121aab42011-09-29 23:40:53 +0200816
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000817 self->proto = proto;
818 self->bin = proto > 0;
819 self->fix_imports = fix_imports && proto < 3;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000820
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000821 return 0;
822}
823
824/* Returns -1 (with an exception set) on failure, 0 on success. This may
825 be called once on a freshly created Pickler. */
826static int
827_Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
828{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200829 _Py_IDENTIFIER(write);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000830 assert(file != NULL);
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +0200831 self->write = _PyObject_GetAttrId(file, &PyId_write);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000832 if (self->write == NULL) {
833 if (PyErr_ExceptionMatches(PyExc_AttributeError))
834 PyErr_SetString(PyExc_TypeError,
835 "file must have a 'write' attribute");
836 return -1;
837 }
838
839 return 0;
840}
841
842/* See documentation for _Pickler_FastCall(). */
843static PyObject *
844_Unpickler_FastCall(UnpicklerObject *self, PyObject *func, PyObject *arg)
845{
846 PyObject *result = NULL;
847
848 ARG_TUP(self, arg);
849 if (self->arg) {
850 result = PyObject_Call(func, self->arg, NULL);
851 FREE_ARG_TUP(self);
852 }
853 return result;
854}
855
856/* Returns the size of the input on success, -1 on failure. This takes its
857 own reference to `input`. */
858static Py_ssize_t
859_Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
860{
861 if (self->buffer.buf != NULL)
862 PyBuffer_Release(&self->buffer);
863 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
864 return -1;
865 self->input_buffer = self->buffer.buf;
866 self->input_len = self->buffer.len;
867 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000868 self->prefetched_idx = self->input_len;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000869 return self->input_len;
870}
871
Antoine Pitrou04248a82010-10-12 20:51:21 +0000872static int
873_Unpickler_SkipConsumed(UnpicklerObject *self)
874{
875 Py_ssize_t consumed = self->next_read_idx - self->prefetched_idx;
876
877 if (consumed > 0) {
878 PyObject *r;
879 assert(self->peek); /* otherwise we did something wrong */
880 /* This makes an useless copy... */
881 r = PyObject_CallFunction(self->read, "n", consumed);
882 if (r == NULL)
883 return -1;
884 Py_DECREF(r);
885 self->prefetched_idx = self->next_read_idx;
886 }
887 return 0;
888}
889
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000890static const Py_ssize_t READ_WHOLE_LINE = -1;
891
892/* If reading from a file, we need to only pull the bytes we need, since there
893 may be multiple pickle objects arranged contiguously in the same input
894 buffer.
895
896 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
897 bytes from the input stream/buffer.
898
899 Update the unpickler's input buffer with the newly-read data. Returns -1 on
900 failure; on success, returns the number of bytes read from the file.
901
902 On success, self->input_len will be 0; this is intentional so that when
903 unpickling from a file, the "we've run out of data" code paths will trigger,
904 causing the Unpickler to go back to the file for more data. Use the returned
905 size to tell you how much data you can process. */
906static Py_ssize_t
907_Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
908{
909 PyObject *data;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000910 Py_ssize_t read_size, prefetched_size = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000911
912 assert(self->read != NULL);
Victor Stinner121aab42011-09-29 23:40:53 +0200913
Antoine Pitrou04248a82010-10-12 20:51:21 +0000914 if (_Unpickler_SkipConsumed(self) < 0)
915 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000916
917 if (n == READ_WHOLE_LINE)
918 data = PyObject_Call(self->readline, empty_tuple, NULL);
919 else {
920 PyObject *len = PyLong_FromSsize_t(n);
921 if (len == NULL)
922 return -1;
923 data = _Unpickler_FastCall(self, self->read, len);
924 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000925 if (data == NULL)
926 return -1;
927
Antoine Pitrou04248a82010-10-12 20:51:21 +0000928 /* Prefetch some data without advancing the file pointer, if possible */
929 if (self->peek) {
930 PyObject *len, *prefetched;
931 len = PyLong_FromSsize_t(PREFETCH);
932 if (len == NULL) {
933 Py_DECREF(data);
934 return -1;
935 }
936 prefetched = _Unpickler_FastCall(self, self->peek, len);
937 if (prefetched == NULL) {
938 if (PyErr_ExceptionMatches(PyExc_NotImplementedError)) {
939 /* peek() is probably not supported by the given file object */
940 PyErr_Clear();
941 Py_CLEAR(self->peek);
942 }
943 else {
944 Py_DECREF(data);
945 return -1;
946 }
947 }
948 else {
949 assert(PyBytes_Check(prefetched));
950 prefetched_size = PyBytes_GET_SIZE(prefetched);
951 PyBytes_ConcatAndDel(&data, prefetched);
952 if (data == NULL)
953 return -1;
954 }
955 }
956
957 read_size = _Unpickler_SetStringInput(self, data) - prefetched_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000958 Py_DECREF(data);
Antoine Pitrou04248a82010-10-12 20:51:21 +0000959 self->prefetched_idx = read_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000960 return read_size;
961}
962
963/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
964
965 This should be used for all data reads, rather than accessing the unpickler's
966 input buffer directly. This method deals correctly with reading from input
967 streams, which the input buffer doesn't deal with.
968
969 Note that when reading from a file-like object, self->next_read_idx won't
970 be updated (it should remain at 0 for the entire unpickling process). You
971 should use this function's return value to know how many bytes you can
972 consume.
973
974 Returns -1 (with an exception set) on failure. On success, return the
975 number of chars read. */
976static Py_ssize_t
977_Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n)
978{
Antoine Pitrou04248a82010-10-12 20:51:21 +0000979 Py_ssize_t num_read;
980
Antoine Pitrou04248a82010-10-12 20:51:21 +0000981 if (self->next_read_idx + n <= self->input_len) {
982 *s = self->input_buffer + self->next_read_idx;
983 self->next_read_idx += n;
984 return n;
985 }
986 if (!self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000987 PyErr_Format(PyExc_EOFError, "Ran out of input");
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000988 return -1;
989 }
Antoine Pitrou04248a82010-10-12 20:51:21 +0000990 num_read = _Unpickler_ReadFromFile(self, n);
991 if (num_read < 0)
992 return -1;
993 if (num_read < n) {
994 PyErr_Format(PyExc_EOFError, "Ran out of input");
995 return -1;
996 }
997 *s = self->input_buffer;
998 self->next_read_idx = n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000999 return n;
1000}
1001
1002static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001003_Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1004 char **result)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001005{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001006 char *input_line = PyMem_Realloc(self->input_line, len + 1);
1007 if (input_line == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001008 return -1;
1009
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001010 memcpy(input_line, line, len);
1011 input_line[len] = '\0';
1012 self->input_line = input_line;
1013 *result = self->input_line;
1014 return len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001015}
1016
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001017/* Read a line from the input stream/buffer. If we run off the end of the input
1018 before hitting \n, return the data we found.
1019
1020 Returns the number of chars read, or -1 on failure. */
1021static Py_ssize_t
1022_Unpickler_Readline(UnpicklerObject *self, char **result)
1023{
1024 Py_ssize_t i, num_read;
1025
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001026 for (i = self->next_read_idx; i < self->input_len; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001027 if (self->input_buffer[i] == '\n') {
1028 char *line_start = self->input_buffer + self->next_read_idx;
1029 num_read = i - self->next_read_idx + 1;
1030 self->next_read_idx = i + 1;
1031 return _Unpickler_CopyLine(self, line_start, num_read, result);
1032 }
1033 }
1034 if (self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001035 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1036 if (num_read < 0)
1037 return -1;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001038 self->next_read_idx = num_read;
Antoine Pitrouf6c7a852011-08-11 21:04:02 +02001039 return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001040 }
Victor Stinner121aab42011-09-29 23:40:53 +02001041
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001042 /* If we get here, we've run off the end of the input string. Return the
1043 remaining string and let the caller figure it out. */
1044 *result = self->input_buffer + self->next_read_idx;
1045 num_read = i - self->next_read_idx;
1046 self->next_read_idx = i;
1047 return num_read;
1048}
1049
1050/* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1051 will be modified in place. */
1052static int
1053_Unpickler_ResizeMemoList(UnpicklerObject *self, Py_ssize_t new_size)
1054{
1055 Py_ssize_t i;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001056
1057 assert(new_size > self->memo_size);
1058
Benjamin Peterson59b08c12015-06-27 13:41:33 -05001059 PyMem_RESIZE(self->memo, PyObject *, new_size);
1060 if (self->memo == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001061 PyErr_NoMemory();
1062 return -1;
1063 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001064 for (i = self->memo_size; i < new_size; i++)
1065 self->memo[i] = NULL;
1066 self->memo_size = new_size;
1067 return 0;
1068}
1069
1070/* Returns NULL if idx is out of bounds. */
1071static PyObject *
1072_Unpickler_MemoGet(UnpicklerObject *self, Py_ssize_t idx)
1073{
1074 if (idx < 0 || idx >= self->memo_size)
1075 return NULL;
1076
1077 return self->memo[idx];
1078}
1079
1080/* Returns -1 (with an exception set) on failure, 0 on success.
1081 This takes its own reference to `value`. */
1082static int
1083_Unpickler_MemoPut(UnpicklerObject *self, Py_ssize_t idx, PyObject *value)
1084{
1085 PyObject *old_item;
1086
1087 if (idx >= self->memo_size) {
1088 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1089 return -1;
1090 assert(idx < self->memo_size);
1091 }
1092 Py_INCREF(value);
1093 old_item = self->memo[idx];
1094 self->memo[idx] = value;
1095 Py_XDECREF(old_item);
1096 return 0;
1097}
1098
1099static PyObject **
1100_Unpickler_NewMemo(Py_ssize_t new_size)
1101{
Benjamin Peterson59b08c12015-06-27 13:41:33 -05001102 PyObject **memo = PyMem_NEW(PyObject *, new_size);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001103 if (memo == NULL)
1104 return NULL;
1105 memset(memo, 0, new_size * sizeof(PyObject *));
1106 return memo;
1107}
1108
1109/* Free the unpickler's memo, taking care to decref any items left in it. */
1110static void
1111_Unpickler_MemoCleanup(UnpicklerObject *self)
1112{
1113 Py_ssize_t i;
1114 PyObject **memo = self->memo;
1115
1116 if (self->memo == NULL)
1117 return;
1118 self->memo = NULL;
1119 i = self->memo_size;
1120 while (--i >= 0) {
1121 Py_XDECREF(memo[i]);
1122 }
1123 PyMem_FREE(memo);
1124}
1125
1126static UnpicklerObject *
1127_Unpickler_New(void)
1128{
1129 UnpicklerObject *self;
1130
1131 self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1132 if (self == NULL)
1133 return NULL;
1134
1135 self->stack = (Pdata *)Pdata_New();
1136 if (self->stack == NULL) {
1137 Py_DECREF(self);
1138 return NULL;
1139 }
1140 memset(&self->buffer, 0, sizeof(Py_buffer));
1141
1142 self->memo_size = 32;
1143 self->memo = _Unpickler_NewMemo(self->memo_size);
1144 if (self->memo == NULL) {
1145 Py_DECREF(self);
1146 return NULL;
1147 }
1148
1149 self->arg = NULL;
1150 self->pers_func = NULL;
1151 self->input_buffer = NULL;
1152 self->input_line = NULL;
1153 self->input_len = 0;
1154 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001155 self->prefetched_idx = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001156 self->read = NULL;
1157 self->readline = NULL;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001158 self->peek = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001159 self->encoding = NULL;
1160 self->errors = NULL;
1161 self->marks = NULL;
1162 self->num_marks = 0;
1163 self->marks_size = 0;
1164 self->proto = 0;
1165 self->fix_imports = 0;
1166
1167 return self;
1168}
1169
1170/* Returns -1 (with an exception set) on failure, 0 on success. This may
1171 be called once on a freshly created Pickler. */
1172static int
1173_Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1174{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001175 _Py_IDENTIFIER(peek);
1176 _Py_IDENTIFIER(read);
1177 _Py_IDENTIFIER(readline);
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001178
1179 self->peek = _PyObject_GetAttrId(file, &PyId_peek);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001180 if (self->peek == NULL) {
1181 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1182 PyErr_Clear();
1183 else
1184 return -1;
1185 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001186 self->read = _PyObject_GetAttrId(file, &PyId_read);
1187 self->readline = _PyObject_GetAttrId(file, &PyId_readline);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001188 if (self->readline == NULL || self->read == NULL) {
1189 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1190 PyErr_SetString(PyExc_TypeError,
1191 "file must have 'read' and 'readline' attributes");
1192 Py_CLEAR(self->read);
1193 Py_CLEAR(self->readline);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001194 Py_CLEAR(self->peek);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001195 return -1;
1196 }
1197 return 0;
1198}
1199
1200/* Returns -1 (with an exception set) on failure, 0 on success. This may
1201 be called once on a freshly created Pickler. */
1202static int
1203_Unpickler_SetInputEncoding(UnpicklerObject *self,
1204 const char *encoding,
1205 const char *errors)
1206{
1207 if (encoding == NULL)
1208 encoding = "ASCII";
1209 if (errors == NULL)
1210 errors = "strict";
1211
1212 self->encoding = strdup(encoding);
1213 self->errors = strdup(errors);
1214 if (self->encoding == NULL || self->errors == NULL) {
1215 PyErr_NoMemory();
1216 return -1;
1217 }
1218 return 0;
1219}
1220
1221/* Generate a GET opcode for an object stored in the memo. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001222static int
1223memo_get(PicklerObject *self, PyObject *key)
1224{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001225 Py_ssize_t *value;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001226 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001227 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001228
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001229 value = PyMemoTable_Get(self->memo, key);
1230 if (value == NULL) {
1231 PyErr_SetObject(PyExc_KeyError, key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001232 return -1;
1233 }
1234
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001235 if (!self->bin) {
1236 pdata[0] = GET;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001237 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1238 "%" PY_FORMAT_SIZE_T "d\n", *value);
1239 len = strlen(pdata);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001240 }
1241 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001242 if (*value < 256) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001243 pdata[0] = BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001244 pdata[1] = (unsigned char)(*value & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001245 len = 2;
1246 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001247 else if (*value <= 0xffffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001248 pdata[0] = LONG_BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001249 pdata[1] = (unsigned char)(*value & 0xff);
1250 pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1251 pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1252 pdata[4] = (unsigned char)((*value >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001253 len = 5;
1254 }
1255 else { /* unlikely */
1256 PyErr_SetString(PicklingError,
1257 "memo id too large for LONG_BINGET");
1258 return -1;
1259 }
1260 }
1261
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001262 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001263 return -1;
1264
1265 return 0;
1266}
1267
1268/* Store an object in the memo, assign it a new unique ID based on the number
1269 of objects currently stored in the memo and generate a PUT opcode. */
1270static int
1271memo_put(PicklerObject *self, PyObject *obj)
1272{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001273 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001274 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001275 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001276 int status = 0;
1277
1278 if (self->fast)
1279 return 0;
1280
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001281 x = PyMemoTable_Size(self->memo);
1282 if (PyMemoTable_Set(self->memo, obj, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001283 goto error;
1284
1285 if (!self->bin) {
1286 pdata[0] = PUT;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001287 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1288 "%" PY_FORMAT_SIZE_T "d\n", x);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001289 len = strlen(pdata);
1290 }
1291 else {
1292 if (x < 256) {
1293 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00001294 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001295 len = 2;
1296 }
1297 else if (x <= 0xffffffffL) {
1298 pdata[0] = LONG_BINPUT;
1299 pdata[1] = (unsigned char)(x & 0xff);
1300 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1301 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1302 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1303 len = 5;
1304 }
1305 else { /* unlikely */
1306 PyErr_SetString(PicklingError,
1307 "memo id too large for LONG_BINPUT");
1308 return -1;
1309 }
1310 }
1311
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001312 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001313 goto error;
1314
1315 if (0) {
1316 error:
1317 status = -1;
1318 }
1319
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001320 return status;
1321}
1322
1323static PyObject *
1324whichmodule(PyObject *global, PyObject *global_name)
1325{
1326 Py_ssize_t i, j;
1327 static PyObject *module_str = NULL;
1328 static PyObject *main_str = NULL;
1329 PyObject *module_name;
1330 PyObject *modules_dict;
1331 PyObject *module;
1332 PyObject *obj;
1333
1334 if (module_str == NULL) {
1335 module_str = PyUnicode_InternFromString("__module__");
1336 if (module_str == NULL)
1337 return NULL;
1338 main_str = PyUnicode_InternFromString("__main__");
1339 if (main_str == NULL)
1340 return NULL;
1341 }
1342
1343 module_name = PyObject_GetAttr(global, module_str);
1344
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00001345 /* In some rare cases (e.g., bound methods of extension types),
1346 __module__ can be None. If it is so, then search sys.modules
1347 for the module of global. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001348 if (module_name == Py_None) {
1349 Py_DECREF(module_name);
1350 goto search;
1351 }
1352
1353 if (module_name) {
1354 return module_name;
1355 }
1356 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1357 PyErr_Clear();
1358 else
1359 return NULL;
1360
1361 search:
1362 modules_dict = PySys_GetObject("modules");
1363 if (modules_dict == NULL)
1364 return NULL;
1365
1366 i = 0;
1367 module_name = NULL;
1368 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
Mark Dickinson211c6252009-02-01 10:28:51 +00001369 if (PyObject_RichCompareBool(module_name, main_str, Py_EQ) == 1)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001370 continue;
1371
1372 obj = PyObject_GetAttr(module, global_name);
1373 if (obj == NULL) {
1374 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1375 PyErr_Clear();
1376 else
1377 return NULL;
1378 continue;
1379 }
1380
1381 if (obj != global) {
1382 Py_DECREF(obj);
1383 continue;
1384 }
1385
1386 Py_DECREF(obj);
1387 break;
1388 }
1389
1390 /* If no module is found, use __main__. */
1391 if (!j) {
1392 module_name = main_str;
1393 }
1394
1395 Py_INCREF(module_name);
1396 return module_name;
1397}
1398
1399/* fast_save_enter() and fast_save_leave() are guards against recursive
1400 objects when Pickler is used with the "fast mode" (i.e., with object
1401 memoization disabled). If the nesting of a list or dict object exceed
1402 FAST_NESTING_LIMIT, these guards will start keeping an internal
1403 reference to the seen list or dict objects and check whether these objects
1404 are recursive. These are not strictly necessary, since save() has a
1405 hard-coded recursion limit, but they give a nicer error message than the
1406 typical RuntimeError. */
1407static int
1408fast_save_enter(PicklerObject *self, PyObject *obj)
1409{
1410 /* if fast_nesting < 0, we're doing an error exit. */
1411 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1412 PyObject *key = NULL;
1413 if (self->fast_memo == NULL) {
1414 self->fast_memo = PyDict_New();
1415 if (self->fast_memo == NULL) {
1416 self->fast_nesting = -1;
1417 return 0;
1418 }
1419 }
1420 key = PyLong_FromVoidPtr(obj);
1421 if (key == NULL)
1422 return 0;
1423 if (PyDict_GetItem(self->fast_memo, key)) {
1424 Py_DECREF(key);
1425 PyErr_Format(PyExc_ValueError,
1426 "fast mode: can't pickle cyclic objects "
1427 "including object type %.200s at %p",
1428 obj->ob_type->tp_name, obj);
1429 self->fast_nesting = -1;
1430 return 0;
1431 }
1432 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
1433 Py_DECREF(key);
1434 self->fast_nesting = -1;
1435 return 0;
1436 }
1437 Py_DECREF(key);
1438 }
1439 return 1;
1440}
1441
1442static int
1443fast_save_leave(PicklerObject *self, PyObject *obj)
1444{
1445 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
1446 PyObject *key = PyLong_FromVoidPtr(obj);
1447 if (key == NULL)
1448 return 0;
1449 if (PyDict_DelItem(self->fast_memo, key) < 0) {
1450 Py_DECREF(key);
1451 return 0;
1452 }
1453 Py_DECREF(key);
1454 }
1455 return 1;
1456}
1457
1458static int
1459save_none(PicklerObject *self, PyObject *obj)
1460{
1461 const char none_op = NONE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001462 if (_Pickler_Write(self, &none_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001463 return -1;
1464
1465 return 0;
1466}
1467
1468static int
1469save_bool(PicklerObject *self, PyObject *obj)
1470{
1471 static const char *buf[2] = { FALSE, TRUE };
1472 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
1473 int p = (obj == Py_True);
1474
1475 if (self->proto >= 2) {
1476 const char bool_op = p ? NEWTRUE : NEWFALSE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001477 if (_Pickler_Write(self, &bool_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001478 return -1;
1479 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001480 else if (_Pickler_Write(self, buf[p], len[p]) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001481 return -1;
1482
1483 return 0;
1484}
1485
1486static int
1487save_int(PicklerObject *self, long x)
1488{
1489 char pdata[32];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001490 Py_ssize_t len = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001491
1492 if (!self->bin
1493#if SIZEOF_LONG > 4
1494 || x > 0x7fffffffL || x < -0x80000000L
1495#endif
1496 ) {
1497 /* Text-mode pickle, or long too big to fit in the 4-byte
1498 * signed BININT format: store as a string.
1499 */
Mark Dickinson8dd05142009-01-20 20:43:58 +00001500 pdata[0] = LONG; /* use LONG for consistency with pickle.py */
1501 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ldL\n", x);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001502 if (_Pickler_Write(self, pdata, strlen(pdata)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001503 return -1;
1504 }
1505 else {
1506 /* Binary pickle and x fits in a signed 4-byte int. */
1507 pdata[1] = (unsigned char)(x & 0xff);
1508 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1509 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1510 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1511
1512 if ((pdata[4] == 0) && (pdata[3] == 0)) {
1513 if (pdata[2] == 0) {
1514 pdata[0] = BININT1;
1515 len = 2;
1516 }
1517 else {
1518 pdata[0] = BININT2;
1519 len = 3;
1520 }
1521 }
1522 else {
1523 pdata[0] = BININT;
1524 len = 5;
1525 }
1526
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001527 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001528 return -1;
1529 }
1530
1531 return 0;
1532}
1533
1534static int
1535save_long(PicklerObject *self, PyObject *obj)
1536{
1537 PyObject *repr = NULL;
1538 Py_ssize_t size;
1539 long val = PyLong_AsLong(obj);
1540 int status = 0;
1541
1542 const char long_op = LONG;
1543
1544 if (val == -1 && PyErr_Occurred()) {
1545 /* out of range for int pickling */
1546 PyErr_Clear();
1547 }
Antoine Pitroue58bffb2011-08-13 20:40:32 +02001548 else
1549#if SIZEOF_LONG > 4
1550 if (val <= 0x7fffffffL && val >= -0x80000000L)
1551#endif
1552 return save_int(self, val);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001553
1554 if (self->proto >= 2) {
1555 /* Linear-time pickling. */
1556 size_t nbits;
1557 size_t nbytes;
1558 unsigned char *pdata;
1559 char header[5];
1560 int i;
1561 int sign = _PyLong_Sign(obj);
1562
1563 if (sign == 0) {
1564 header[0] = LONG1;
1565 header[1] = 0; /* It's 0 -- an empty bytestring. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001566 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001567 goto error;
1568 return 0;
1569 }
1570 nbits = _PyLong_NumBits(obj);
1571 if (nbits == (size_t)-1 && PyErr_Occurred())
1572 goto error;
1573 /* How many bytes do we need? There are nbits >> 3 full
1574 * bytes of data, and nbits & 7 leftover bits. If there
1575 * are any leftover bits, then we clearly need another
1576 * byte. Wnat's not so obvious is that we *probably*
1577 * need another byte even if there aren't any leftovers:
1578 * the most-significant bit of the most-significant byte
1579 * acts like a sign bit, and it's usually got a sense
Serhiy Storchaka95949422013-08-27 19:40:23 +03001580 * opposite of the one we need. The exception is ints
1581 * of the form -(2**(8*j-1)) for j > 0. Such an int is
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001582 * its own 256's-complement, so has the right sign bit
1583 * even without the extra byte. That's a pain to check
1584 * for in advance, though, so we always grab an extra
1585 * byte at the start, and cut it back later if possible.
1586 */
1587 nbytes = (nbits >> 3) + 1;
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01001588 if (nbytes > 0x7fffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001589 PyErr_SetString(PyExc_OverflowError,
Serhiy Storchaka95949422013-08-27 19:40:23 +03001590 "int too large to pickle");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001591 goto error;
1592 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001593 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001594 if (repr == NULL)
1595 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001596 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001597 i = _PyLong_AsByteArray((PyLongObject *)obj,
1598 pdata, nbytes,
1599 1 /* little endian */ , 1 /* signed */ );
1600 if (i < 0)
1601 goto error;
Serhiy Storchaka95949422013-08-27 19:40:23 +03001602 /* If the int is negative, this may be a byte more than
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001603 * needed. This is so iff the MSB is all redundant sign
1604 * bits.
1605 */
1606 if (sign < 0 &&
Victor Stinner121aab42011-09-29 23:40:53 +02001607 nbytes > 1 &&
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001608 pdata[nbytes - 1] == 0xff &&
1609 (pdata[nbytes - 2] & 0x80) != 0) {
1610 nbytes--;
1611 }
1612
1613 if (nbytes < 256) {
1614 header[0] = LONG1;
1615 header[1] = (unsigned char)nbytes;
1616 size = 2;
1617 }
1618 else {
1619 header[0] = LONG4;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001620 size = (Py_ssize_t) nbytes;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001621 for (i = 1; i < 5; i++) {
1622 header[i] = (unsigned char)(size & 0xff);
1623 size >>= 8;
1624 }
1625 size = 5;
1626 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001627 if (_Pickler_Write(self, header, size) < 0 ||
1628 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001629 goto error;
1630 }
1631 else {
1632 char *string;
1633
Mark Dickinson8dd05142009-01-20 20:43:58 +00001634 /* proto < 2: write the repr and newline. This is quadratic-time (in
1635 the number of digits), in both directions. We add a trailing 'L'
1636 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001637
1638 repr = PyObject_Repr(obj);
1639 if (repr == NULL)
1640 goto error;
1641
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001642 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001643 if (string == NULL)
1644 goto error;
1645
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001646 if (_Pickler_Write(self, &long_op, 1) < 0 ||
1647 _Pickler_Write(self, string, size) < 0 ||
1648 _Pickler_Write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001649 goto error;
1650 }
1651
1652 if (0) {
1653 error:
1654 status = -1;
1655 }
1656 Py_XDECREF(repr);
1657
1658 return status;
1659}
1660
1661static int
1662save_float(PicklerObject *self, PyObject *obj)
1663{
1664 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1665
1666 if (self->bin) {
1667 char pdata[9];
1668 pdata[0] = BINFLOAT;
1669 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1670 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001671 if (_Pickler_Write(self, pdata, 9) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001672 return -1;
Victor Stinner121aab42011-09-29 23:40:53 +02001673 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001674 else {
Eric Smith0923d1d2009-04-16 20:16:10 +00001675 int result = -1;
1676 char *buf = NULL;
1677 char op = FLOAT;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001678
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001679 if (_Pickler_Write(self, &op, 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001680 goto done;
1681
Mark Dickinson3e09f432009-04-17 08:41:23 +00001682 buf = PyOS_double_to_string(x, 'g', 17, 0, NULL);
Eric Smith0923d1d2009-04-16 20:16:10 +00001683 if (!buf) {
1684 PyErr_NoMemory();
1685 goto done;
1686 }
1687
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001688 if (_Pickler_Write(self, buf, strlen(buf)) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001689 goto done;
1690
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001691 if (_Pickler_Write(self, "\n", 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001692 goto done;
1693
1694 result = 0;
1695done:
1696 PyMem_Free(buf);
1697 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001698 }
1699
1700 return 0;
1701}
1702
1703static int
1704save_bytes(PicklerObject *self, PyObject *obj)
1705{
1706 if (self->proto < 3) {
1707 /* Older pickle protocols do not have an opcode for pickling bytes
1708 objects. Therefore, we need to fake the copy protocol (i.e.,
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001709 the __reduce__ method) to permit bytes object unpickling.
1710
1711 Here we use a hack to be compatible with Python 2. Since in Python
1712 2 'bytes' is just an alias for 'str' (which has different
1713 parameters than the actual bytes object), we use codecs.encode
1714 to create the appropriate 'str' object when unpickled using
1715 Python 2 *and* the appropriate 'bytes' object when unpickled
1716 using Python 3. Again this is a hack and we don't need to do this
1717 with newer protocols. */
1718 static PyObject *codecs_encode = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001719 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001720 int status;
1721
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001722 if (codecs_encode == NULL) {
1723 PyObject *codecs_module = PyImport_ImportModule("codecs");
1724 if (codecs_module == NULL) {
1725 return -1;
1726 }
1727 codecs_encode = PyObject_GetAttrString(codecs_module, "encode");
1728 Py_DECREF(codecs_module);
1729 if (codecs_encode == NULL) {
1730 return -1;
1731 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001732 }
1733
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001734 if (PyBytes_GET_SIZE(obj) == 0) {
1735 reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
1736 }
1737 else {
1738 static PyObject *latin1 = NULL;
1739 PyObject *unicode_str =
1740 PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
1741 PyBytes_GET_SIZE(obj),
1742 "strict");
1743 if (unicode_str == NULL)
1744 return -1;
1745 if (latin1 == NULL) {
1746 latin1 = PyUnicode_InternFromString("latin1");
1747 if (latin1 == NULL)
1748 return -1;
1749 }
1750 reduce_value = Py_BuildValue("(O(OO))",
1751 codecs_encode, unicode_str, latin1);
1752 Py_DECREF(unicode_str);
1753 }
1754
1755 if (reduce_value == NULL)
1756 return -1;
1757
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001758 /* save_reduce() will memoize the object automatically. */
1759 status = save_reduce(self, reduce_value, obj);
1760 Py_DECREF(reduce_value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001761 return status;
1762 }
1763 else {
1764 Py_ssize_t size;
1765 char header[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001766 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001767
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001768 size = PyBytes_GET_SIZE(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001769 if (size < 0)
1770 return -1;
1771
1772 if (size < 256) {
1773 header[0] = SHORT_BINBYTES;
1774 header[1] = (unsigned char)size;
1775 len = 2;
1776 }
1777 else if (size <= 0xffffffffL) {
1778 header[0] = BINBYTES;
1779 header[1] = (unsigned char)(size & 0xff);
1780 header[2] = (unsigned char)((size >> 8) & 0xff);
1781 header[3] = (unsigned char)((size >> 16) & 0xff);
1782 header[4] = (unsigned char)((size >> 24) & 0xff);
1783 len = 5;
1784 }
1785 else {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001786 PyErr_SetString(PyExc_OverflowError,
Serhiy Storchakaf8def282013-02-16 17:29:56 +02001787 "cannot serialize a bytes object larger than 4 GiB");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001788 return -1; /* string too large */
1789 }
1790
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001791 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001792 return -1;
1793
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001794 if (_Pickler_Write(self, PyBytes_AS_STRING(obj), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001795 return -1;
1796
1797 if (memo_put(self, obj) < 0)
1798 return -1;
1799
1800 return 0;
1801 }
1802}
1803
1804/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1805 backslash and newline characters to \uXXXX escapes. */
1806static PyObject *
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001807raw_unicode_escape(PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001808{
1809 PyObject *repr, *result;
1810 char *p;
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001811 Py_ssize_t i, size, expandsize;
1812 void *data;
1813 unsigned int kind;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001814
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001815 if (PyUnicode_READY(obj))
1816 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001817
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001818 size = PyUnicode_GET_LENGTH(obj);
1819 data = PyUnicode_DATA(obj);
1820 kind = PyUnicode_KIND(obj);
1821 if (kind == PyUnicode_4BYTE_KIND)
1822 expandsize = 10;
1823 else
1824 expandsize = 6;
Victor Stinner121aab42011-09-29 23:40:53 +02001825
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001826 if (size > PY_SSIZE_T_MAX / expandsize)
1827 return PyErr_NoMemory();
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001828 repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001829 if (repr == NULL)
1830 return NULL;
1831 if (size == 0)
1832 goto done;
1833
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001834 p = PyByteArray_AS_STRING(repr);
1835 for (i=0; i < size; i++) {
1836 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001837 /* Map 32-bit characters to '\Uxxxxxxxx' */
1838 if (ch >= 0x10000) {
1839 *p++ = '\\';
1840 *p++ = 'U';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001841 *p++ = Py_hexdigits[(ch >> 28) & 0xf];
1842 *p++ = Py_hexdigits[(ch >> 24) & 0xf];
1843 *p++ = Py_hexdigits[(ch >> 20) & 0xf];
1844 *p++ = Py_hexdigits[(ch >> 16) & 0xf];
1845 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
1846 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
1847 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
1848 *p++ = Py_hexdigits[ch & 15];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001849 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001850 /* Map 16-bit characters to '\uxxxx' */
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001851 else if (ch >= 256 || ch == '\\' || ch == '\n') {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001852 *p++ = '\\';
1853 *p++ = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001854 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
1855 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
1856 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
1857 *p++ = Py_hexdigits[ch & 15];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001858 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001859 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001860 else
1861 *p++ = (char) ch;
1862 }
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001863 size = p - PyByteArray_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001864
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001865done:
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001866 result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001867 Py_DECREF(repr);
1868 return result;
1869}
1870
1871static int
1872save_unicode(PicklerObject *self, PyObject *obj)
1873{
1874 Py_ssize_t size;
1875 PyObject *encoded = NULL;
1876
1877 if (self->bin) {
1878 char pdata[5];
1879
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001880 encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001881 if (encoded == NULL)
1882 goto error;
1883
1884 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001885 if (size > 0xffffffffL) {
1886 PyErr_SetString(PyExc_OverflowError,
Serhiy Storchakaf8def282013-02-16 17:29:56 +02001887 "cannot serialize a string larger than 4 GiB");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001888 goto error; /* string too large */
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001889 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001890
1891 pdata[0] = BINUNICODE;
1892 pdata[1] = (unsigned char)(size & 0xff);
1893 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1894 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1895 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1896
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001897 if (_Pickler_Write(self, pdata, 5) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001898 goto error;
1899
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001900 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001901 goto error;
1902 }
1903 else {
1904 const char unicode_op = UNICODE;
1905
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001906 encoded = raw_unicode_escape(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001907 if (encoded == NULL)
1908 goto error;
1909
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001910 if (_Pickler_Write(self, &unicode_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001911 goto error;
1912
1913 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001914 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001915 goto error;
1916
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001917 if (_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001918 goto error;
1919 }
1920 if (memo_put(self, obj) < 0)
1921 goto error;
1922
1923 Py_DECREF(encoded);
1924 return 0;
1925
1926 error:
1927 Py_XDECREF(encoded);
1928 return -1;
1929}
1930
1931/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1932static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001933store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001934{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001935 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001936
1937 assert(PyTuple_Size(t) == len);
1938
1939 for (i = 0; i < len; i++) {
1940 PyObject *element = PyTuple_GET_ITEM(t, i);
1941
1942 if (element == NULL)
1943 return -1;
1944 if (save(self, element, 0) < 0)
1945 return -1;
1946 }
1947
1948 return 0;
1949}
1950
1951/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1952 * used across protocols to minimize the space needed to pickle them.
1953 * Tuples are also the only builtin immutable type that can be recursive
1954 * (a tuple can be reached from itself), and that requires some subtle
1955 * magic so that it works in all cases. IOW, this is a long routine.
1956 */
1957static int
1958save_tuple(PicklerObject *self, PyObject *obj)
1959{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001960 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001961
1962 const char mark_op = MARK;
1963 const char tuple_op = TUPLE;
1964 const char pop_op = POP;
1965 const char pop_mark_op = POP_MARK;
1966 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
1967
1968 if ((len = PyTuple_Size(obj)) < 0)
1969 return -1;
1970
1971 if (len == 0) {
1972 char pdata[2];
1973
1974 if (self->proto) {
1975 pdata[0] = EMPTY_TUPLE;
1976 len = 1;
1977 }
1978 else {
1979 pdata[0] = MARK;
1980 pdata[1] = TUPLE;
1981 len = 2;
1982 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001983 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001984 return -1;
1985 return 0;
1986 }
1987
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001988 /* The tuple isn't in the memo now. If it shows up there after
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001989 * saving the tuple elements, the tuple must be recursive, in
1990 * which case we'll pop everything we put on the stack, and fetch
1991 * its value from the memo.
1992 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001993 if (len <= 3 && self->proto >= 2) {
1994 /* Use TUPLE{1,2,3} opcodes. */
1995 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001996 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001997
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001998 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001999 /* pop the len elements */
2000 for (i = 0; i < len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002001 if (_Pickler_Write(self, &pop_op, 1) < 0)
2002 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002003 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002004 if (memo_get(self, obj) < 0)
2005 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002006
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002007 return 0;
2008 }
2009 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002010 if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2011 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002012 }
2013 goto memoize;
2014 }
2015
2016 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2017 * Generate MARK e1 e2 ... TUPLE
2018 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002019 if (_Pickler_Write(self, &mark_op, 1) < 0)
2020 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002021
2022 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002023 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002024
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002025 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002026 /* pop the stack stuff we pushed */
2027 if (self->bin) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002028 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2029 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002030 }
2031 else {
2032 /* Note that we pop one more than len, to remove
2033 * the MARK too.
2034 */
2035 for (i = 0; i <= len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002036 if (_Pickler_Write(self, &pop_op, 1) < 0)
2037 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002038 }
2039 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002040 if (memo_get(self, obj) < 0)
2041 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002042
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002043 return 0;
2044 }
2045 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002046 if (_Pickler_Write(self, &tuple_op, 1) < 0)
2047 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002048 }
2049
2050 memoize:
2051 if (memo_put(self, obj) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002052 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002053
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002054 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002055}
2056
2057/* iter is an iterator giving items, and we batch up chunks of
2058 * MARK item item ... item APPENDS
2059 * opcode sequences. Calling code should have arranged to first create an
2060 * empty list, or list-like object, for the APPENDS to operate on.
2061 * Returns 0 on success, <0 on error.
2062 */
2063static int
2064batch_list(PicklerObject *self, PyObject *iter)
2065{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002066 PyObject *obj = NULL;
2067 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002068 int i, n;
2069
2070 const char mark_op = MARK;
2071 const char append_op = APPEND;
2072 const char appends_op = APPENDS;
2073
2074 assert(iter != NULL);
2075
2076 /* XXX: I think this function could be made faster by avoiding the
2077 iterator interface and fetching objects directly from list using
2078 PyList_GET_ITEM.
2079 */
2080
2081 if (self->proto == 0) {
2082 /* APPENDS isn't available; do one at a time. */
2083 for (;;) {
2084 obj = PyIter_Next(iter);
2085 if (obj == NULL) {
2086 if (PyErr_Occurred())
2087 return -1;
2088 break;
2089 }
2090 i = save(self, obj, 0);
2091 Py_DECREF(obj);
2092 if (i < 0)
2093 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002094 if (_Pickler_Write(self, &append_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002095 return -1;
2096 }
2097 return 0;
2098 }
2099
2100 /* proto > 0: write in batches of BATCHSIZE. */
2101 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002102 /* Get first item */
2103 firstitem = PyIter_Next(iter);
2104 if (firstitem == NULL) {
2105 if (PyErr_Occurred())
2106 goto error;
2107
2108 /* nothing more to add */
2109 break;
2110 }
2111
2112 /* Try to get a second item */
2113 obj = PyIter_Next(iter);
2114 if (obj == NULL) {
2115 if (PyErr_Occurred())
2116 goto error;
2117
2118 /* Only one item to write */
2119 if (save(self, firstitem, 0) < 0)
2120 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002121 if (_Pickler_Write(self, &append_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002122 goto error;
2123 Py_CLEAR(firstitem);
2124 break;
2125 }
2126
2127 /* More than one item to write */
2128
2129 /* Pump out MARK, items, APPENDS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002130 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002131 goto error;
2132
2133 if (save(self, firstitem, 0) < 0)
2134 goto error;
2135 Py_CLEAR(firstitem);
2136 n = 1;
2137
2138 /* Fetch and save up to BATCHSIZE items */
2139 while (obj) {
2140 if (save(self, obj, 0) < 0)
2141 goto error;
2142 Py_CLEAR(obj);
2143 n += 1;
2144
2145 if (n == BATCHSIZE)
2146 break;
2147
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002148 obj = PyIter_Next(iter);
2149 if (obj == NULL) {
2150 if (PyErr_Occurred())
2151 goto error;
2152 break;
2153 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002154 }
2155
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002156 if (_Pickler_Write(self, &appends_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002157 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002158
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002159 } while (n == BATCHSIZE);
2160 return 0;
2161
2162 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002163 Py_XDECREF(firstitem);
2164 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002165 return -1;
2166}
2167
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002168/* This is a variant of batch_list() above, specialized for lists (with no
2169 * support for list subclasses). Like batch_list(), we batch up chunks of
2170 * MARK item item ... item APPENDS
2171 * opcode sequences. Calling code should have arranged to first create an
2172 * empty list, or list-like object, for the APPENDS to operate on.
2173 * Returns 0 on success, -1 on error.
2174 *
2175 * This version is considerably faster than batch_list(), if less general.
2176 *
2177 * Note that this only works for protocols > 0.
2178 */
2179static int
2180batch_list_exact(PicklerObject *self, PyObject *obj)
2181{
2182 PyObject *item = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002183 Py_ssize_t this_batch, total;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002184
2185 const char append_op = APPEND;
2186 const char appends_op = APPENDS;
2187 const char mark_op = MARK;
2188
2189 assert(obj != NULL);
2190 assert(self->proto > 0);
2191 assert(PyList_CheckExact(obj));
2192
2193 if (PyList_GET_SIZE(obj) == 1) {
2194 item = PyList_GET_ITEM(obj, 0);
2195 if (save(self, item, 0) < 0)
2196 return -1;
2197 if (_Pickler_Write(self, &append_op, 1) < 0)
2198 return -1;
2199 return 0;
2200 }
2201
2202 /* Write in batches of BATCHSIZE. */
2203 total = 0;
2204 do {
2205 this_batch = 0;
2206 if (_Pickler_Write(self, &mark_op, 1) < 0)
2207 return -1;
2208 while (total < PyList_GET_SIZE(obj)) {
2209 item = PyList_GET_ITEM(obj, total);
2210 if (save(self, item, 0) < 0)
2211 return -1;
2212 total++;
2213 if (++this_batch == BATCHSIZE)
2214 break;
2215 }
2216 if (_Pickler_Write(self, &appends_op, 1) < 0)
2217 return -1;
2218
2219 } while (total < PyList_GET_SIZE(obj));
2220
2221 return 0;
2222}
2223
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002224static int
2225save_list(PicklerObject *self, PyObject *obj)
2226{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002227 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002228 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002229 int status = 0;
2230
2231 if (self->fast && !fast_save_enter(self, obj))
2232 goto error;
2233
2234 /* Create an empty list. */
2235 if (self->bin) {
2236 header[0] = EMPTY_LIST;
2237 len = 1;
2238 }
2239 else {
2240 header[0] = MARK;
2241 header[1] = LIST;
2242 len = 2;
2243 }
2244
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002245 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002246 goto error;
2247
2248 /* Get list length, and bow out early if empty. */
2249 if ((len = PyList_Size(obj)) < 0)
2250 goto error;
2251
2252 if (memo_put(self, obj) < 0)
2253 goto error;
2254
2255 if (len != 0) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002256 /* Materialize the list elements. */
2257 if (PyList_CheckExact(obj) && self->proto > 0) {
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002258 if (Py_EnterRecursiveCall(" while pickling an object"))
2259 goto error;
2260 status = batch_list_exact(self, obj);
2261 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002262 } else {
2263 PyObject *iter = PyObject_GetIter(obj);
2264 if (iter == NULL)
2265 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002266
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002267 if (Py_EnterRecursiveCall(" while pickling an object")) {
2268 Py_DECREF(iter);
2269 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002270 }
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002271 status = batch_list(self, iter);
2272 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002273 Py_DECREF(iter);
2274 }
2275 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002276 if (0) {
2277 error:
2278 status = -1;
2279 }
2280
2281 if (self->fast && !fast_save_leave(self, obj))
2282 status = -1;
2283
2284 return status;
2285}
2286
2287/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
2288 * MARK key value ... key value SETITEMS
2289 * opcode sequences. Calling code should have arranged to first create an
2290 * empty dict, or dict-like object, for the SETITEMS to operate on.
2291 * Returns 0 on success, <0 on error.
2292 *
2293 * This is very much like batch_list(). The difference between saving
2294 * elements directly, and picking apart two-tuples, is so long-winded at
2295 * the C level, though, that attempts to combine these routines were too
2296 * ugly to bear.
2297 */
2298static int
2299batch_dict(PicklerObject *self, PyObject *iter)
2300{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002301 PyObject *obj = NULL;
2302 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002303 int i, n;
2304
2305 const char mark_op = MARK;
2306 const char setitem_op = SETITEM;
2307 const char setitems_op = SETITEMS;
2308
2309 assert(iter != NULL);
2310
2311 if (self->proto == 0) {
2312 /* SETITEMS isn't available; do one at a time. */
2313 for (;;) {
2314 obj = PyIter_Next(iter);
2315 if (obj == NULL) {
2316 if (PyErr_Occurred())
2317 return -1;
2318 break;
2319 }
2320 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2321 PyErr_SetString(PyExc_TypeError, "dict items "
2322 "iterator must return 2-tuples");
2323 return -1;
2324 }
2325 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
2326 if (i >= 0)
2327 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
2328 Py_DECREF(obj);
2329 if (i < 0)
2330 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002331 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002332 return -1;
2333 }
2334 return 0;
2335 }
2336
2337 /* proto > 0: write in batches of BATCHSIZE. */
2338 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002339 /* Get first item */
2340 firstitem = PyIter_Next(iter);
2341 if (firstitem == NULL) {
2342 if (PyErr_Occurred())
2343 goto error;
2344
2345 /* nothing more to add */
2346 break;
2347 }
2348 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
2349 PyErr_SetString(PyExc_TypeError, "dict items "
2350 "iterator must return 2-tuples");
2351 goto error;
2352 }
2353
2354 /* Try to get a second item */
2355 obj = PyIter_Next(iter);
2356 if (obj == NULL) {
2357 if (PyErr_Occurred())
2358 goto error;
2359
2360 /* Only one item to write */
2361 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2362 goto error;
2363 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2364 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002365 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002366 goto error;
2367 Py_CLEAR(firstitem);
2368 break;
2369 }
2370
2371 /* More than one item to write */
2372
2373 /* Pump out MARK, items, SETITEMS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002374 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002375 goto error;
2376
2377 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2378 goto error;
2379 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2380 goto error;
2381 Py_CLEAR(firstitem);
2382 n = 1;
2383
2384 /* Fetch and save up to BATCHSIZE items */
2385 while (obj) {
2386 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2387 PyErr_SetString(PyExc_TypeError, "dict items "
2388 "iterator must return 2-tuples");
2389 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002390 }
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002391 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
2392 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
2393 goto error;
2394 Py_CLEAR(obj);
2395 n += 1;
2396
2397 if (n == BATCHSIZE)
2398 break;
2399
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002400 obj = PyIter_Next(iter);
2401 if (obj == NULL) {
2402 if (PyErr_Occurred())
2403 goto error;
2404 break;
2405 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002406 }
2407
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002408 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002409 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002410
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002411 } while (n == BATCHSIZE);
2412 return 0;
2413
2414 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002415 Py_XDECREF(firstitem);
2416 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002417 return -1;
2418}
2419
Collin Winter5c9b02d2009-05-25 05:43:30 +00002420/* This is a variant of batch_dict() above that specializes for dicts, with no
2421 * support for dict subclasses. Like batch_dict(), we batch up chunks of
2422 * MARK key value ... key value SETITEMS
2423 * opcode sequences. Calling code should have arranged to first create an
2424 * empty dict, or dict-like object, for the SETITEMS to operate on.
2425 * Returns 0 on success, -1 on error.
2426 *
2427 * Note that this currently doesn't work for protocol 0.
2428 */
2429static int
2430batch_dict_exact(PicklerObject *self, PyObject *obj)
2431{
2432 PyObject *key = NULL, *value = NULL;
2433 int i;
2434 Py_ssize_t dict_size, ppos = 0;
2435
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00002436 const char mark_op = MARK;
2437 const char setitem_op = SETITEM;
2438 const char setitems_op = SETITEMS;
Collin Winter5c9b02d2009-05-25 05:43:30 +00002439
2440 assert(obj != NULL);
2441 assert(self->proto > 0);
2442
2443 dict_size = PyDict_Size(obj);
2444
2445 /* Special-case len(d) == 1 to save space. */
2446 if (dict_size == 1) {
2447 PyDict_Next(obj, &ppos, &key, &value);
2448 if (save(self, key, 0) < 0)
2449 return -1;
2450 if (save(self, value, 0) < 0)
2451 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002452 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002453 return -1;
2454 return 0;
2455 }
2456
2457 /* Write in batches of BATCHSIZE. */
2458 do {
2459 i = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002460 if (_Pickler_Write(self, &mark_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002461 return -1;
2462 while (PyDict_Next(obj, &ppos, &key, &value)) {
2463 if (save(self, key, 0) < 0)
2464 return -1;
2465 if (save(self, value, 0) < 0)
2466 return -1;
2467 if (++i == BATCHSIZE)
2468 break;
2469 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002470 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002471 return -1;
2472 if (PyDict_Size(obj) != dict_size) {
2473 PyErr_Format(
2474 PyExc_RuntimeError,
2475 "dictionary changed size during iteration");
2476 return -1;
2477 }
2478
2479 } while (i == BATCHSIZE);
2480 return 0;
2481}
2482
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002483static int
2484save_dict(PicklerObject *self, PyObject *obj)
2485{
2486 PyObject *items, *iter;
2487 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002488 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002489 int status = 0;
2490
2491 if (self->fast && !fast_save_enter(self, obj))
2492 goto error;
2493
2494 /* Create an empty dict. */
2495 if (self->bin) {
2496 header[0] = EMPTY_DICT;
2497 len = 1;
2498 }
2499 else {
2500 header[0] = MARK;
2501 header[1] = DICT;
2502 len = 2;
2503 }
2504
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002505 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002506 goto error;
2507
2508 /* Get dict size, and bow out early if empty. */
2509 if ((len = PyDict_Size(obj)) < 0)
2510 goto error;
2511
2512 if (memo_put(self, obj) < 0)
2513 goto error;
2514
2515 if (len != 0) {
2516 /* Save the dict items. */
Collin Winter5c9b02d2009-05-25 05:43:30 +00002517 if (PyDict_CheckExact(obj) && self->proto > 0) {
2518 /* We can take certain shortcuts if we know this is a dict and
2519 not a dict subclass. */
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002520 if (Py_EnterRecursiveCall(" while pickling an object"))
2521 goto error;
2522 status = batch_dict_exact(self, obj);
2523 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002524 } else {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02002525 _Py_IDENTIFIER(items);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002526
2527 items = _PyObject_CallMethodId(obj, &PyId_items, "()");
Collin Winter5c9b02d2009-05-25 05:43:30 +00002528 if (items == NULL)
2529 goto error;
2530 iter = PyObject_GetIter(items);
2531 Py_DECREF(items);
2532 if (iter == NULL)
2533 goto error;
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002534 if (Py_EnterRecursiveCall(" while pickling an object")) {
2535 Py_DECREF(iter);
2536 goto error;
2537 }
Collin Winter5c9b02d2009-05-25 05:43:30 +00002538 status = batch_dict(self, iter);
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002539 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002540 Py_DECREF(iter);
2541 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002542 }
2543
2544 if (0) {
2545 error:
2546 status = -1;
2547 }
2548
2549 if (self->fast && !fast_save_leave(self, obj))
2550 status = -1;
2551
2552 return status;
2553}
2554
2555static int
2556save_global(PicklerObject *self, PyObject *obj, PyObject *name)
2557{
2558 static PyObject *name_str = NULL;
2559 PyObject *global_name = NULL;
2560 PyObject *module_name = NULL;
2561 PyObject *module = NULL;
2562 PyObject *cls;
2563 int status = 0;
2564
2565 const char global_op = GLOBAL;
2566
2567 if (name_str == NULL) {
2568 name_str = PyUnicode_InternFromString("__name__");
2569 if (name_str == NULL)
2570 goto error;
2571 }
2572
2573 if (name) {
2574 global_name = name;
2575 Py_INCREF(global_name);
2576 }
2577 else {
2578 global_name = PyObject_GetAttr(obj, name_str);
2579 if (global_name == NULL)
2580 goto error;
2581 }
2582
2583 module_name = whichmodule(obj, global_name);
2584 if (module_name == NULL)
2585 goto error;
2586
2587 /* XXX: Change to use the import C API directly with level=0 to disallow
2588 relative imports.
2589
2590 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
2591 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
2592 custom import functions (IMHO, this would be a nice security
2593 feature). The import C API would need to be extended to support the
2594 extra parameters of __import__ to fix that. */
2595 module = PyImport_Import(module_name);
2596 if (module == NULL) {
2597 PyErr_Format(PicklingError,
2598 "Can't pickle %R: import of module %R failed",
2599 obj, module_name);
2600 goto error;
2601 }
2602 cls = PyObject_GetAttr(module, global_name);
2603 if (cls == NULL) {
2604 PyErr_Format(PicklingError,
2605 "Can't pickle %R: attribute lookup %S.%S failed",
2606 obj, module_name, global_name);
2607 goto error;
2608 }
2609 if (cls != obj) {
2610 Py_DECREF(cls);
2611 PyErr_Format(PicklingError,
2612 "Can't pickle %R: it's not the same object as %S.%S",
2613 obj, module_name, global_name);
2614 goto error;
2615 }
2616 Py_DECREF(cls);
2617
2618 if (self->proto >= 2) {
2619 /* See whether this is in the extension registry, and if
2620 * so generate an EXT opcode.
2621 */
2622 PyObject *code_obj; /* extension code as Python object */
2623 long code; /* extension code as C value */
2624 char pdata[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002625 Py_ssize_t n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002626
2627 PyTuple_SET_ITEM(two_tuple, 0, module_name);
2628 PyTuple_SET_ITEM(two_tuple, 1, global_name);
2629 code_obj = PyDict_GetItem(extension_registry, two_tuple);
2630 /* The object is not registered in the extension registry.
2631 This is the most likely code path. */
2632 if (code_obj == NULL)
2633 goto gen_global;
2634
2635 /* XXX: pickle.py doesn't check neither the type, nor the range
2636 of the value returned by the extension_registry. It should for
2637 consistency. */
2638
2639 /* Verify code_obj has the right type and value. */
2640 if (!PyLong_Check(code_obj)) {
2641 PyErr_Format(PicklingError,
2642 "Can't pickle %R: extension code %R isn't an integer",
2643 obj, code_obj);
2644 goto error;
2645 }
2646 code = PyLong_AS_LONG(code_obj);
2647 if (code <= 0 || code > 0x7fffffffL) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002648 if (!PyErr_Occurred())
2649 PyErr_Format(PicklingError,
2650 "Can't pickle %R: extension code %ld is out of range",
2651 obj, code);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002652 goto error;
2653 }
2654
2655 /* Generate an EXT opcode. */
2656 if (code <= 0xff) {
2657 pdata[0] = EXT1;
2658 pdata[1] = (unsigned char)code;
2659 n = 2;
2660 }
2661 else if (code <= 0xffff) {
2662 pdata[0] = EXT2;
2663 pdata[1] = (unsigned char)(code & 0xff);
2664 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2665 n = 3;
2666 }
2667 else {
2668 pdata[0] = EXT4;
2669 pdata[1] = (unsigned char)(code & 0xff);
2670 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2671 pdata[3] = (unsigned char)((code >> 16) & 0xff);
2672 pdata[4] = (unsigned char)((code >> 24) & 0xff);
2673 n = 5;
2674 }
2675
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002676 if (_Pickler_Write(self, pdata, n) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002677 goto error;
2678 }
2679 else {
2680 /* Generate a normal global opcode if we are using a pickle
2681 protocol <= 2, or if the object is not registered in the
2682 extension registry. */
2683 PyObject *encoded;
2684 PyObject *(*unicode_encoder)(PyObject *);
2685
2686 gen_global:
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002687 if (_Pickler_Write(self, &global_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002688 goto error;
2689
2690 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
2691 the module name and the global name using UTF-8. We do so only when
2692 we are using the pickle protocol newer than version 3. This is to
2693 ensure compatibility with older Unpickler running on Python 2.x. */
2694 if (self->proto >= 3) {
2695 unicode_encoder = PyUnicode_AsUTF8String;
2696 }
2697 else {
2698 unicode_encoder = PyUnicode_AsASCIIString;
2699 }
2700
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002701 /* For protocol < 3 and if the user didn't request against doing so,
2702 we convert module names to the old 2.x module names. */
2703 if (self->fix_imports) {
2704 PyObject *key;
2705 PyObject *item;
2706
2707 key = PyTuple_Pack(2, module_name, global_name);
2708 if (key == NULL)
2709 goto error;
2710 item = PyDict_GetItemWithError(name_mapping_3to2, key);
2711 Py_DECREF(key);
2712 if (item) {
2713 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
2714 PyErr_Format(PyExc_RuntimeError,
2715 "_compat_pickle.REVERSE_NAME_MAPPING values "
2716 "should be 2-tuples, not %.200s",
2717 Py_TYPE(item)->tp_name);
2718 goto error;
2719 }
2720 Py_CLEAR(module_name);
2721 Py_CLEAR(global_name);
2722 module_name = PyTuple_GET_ITEM(item, 0);
2723 global_name = PyTuple_GET_ITEM(item, 1);
2724 if (!PyUnicode_Check(module_name) ||
2725 !PyUnicode_Check(global_name)) {
2726 PyErr_Format(PyExc_RuntimeError,
2727 "_compat_pickle.REVERSE_NAME_MAPPING values "
2728 "should be pairs of str, not (%.200s, %.200s)",
2729 Py_TYPE(module_name)->tp_name,
2730 Py_TYPE(global_name)->tp_name);
2731 goto error;
2732 }
2733 Py_INCREF(module_name);
2734 Py_INCREF(global_name);
2735 }
2736 else if (PyErr_Occurred()) {
2737 goto error;
2738 }
2739
2740 item = PyDict_GetItemWithError(import_mapping_3to2, module_name);
2741 if (item) {
2742 if (!PyUnicode_Check(item)) {
2743 PyErr_Format(PyExc_RuntimeError,
2744 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
2745 "should be strings, not %.200s",
2746 Py_TYPE(item)->tp_name);
2747 goto error;
2748 }
2749 Py_CLEAR(module_name);
2750 module_name = item;
2751 Py_INCREF(module_name);
2752 }
2753 else if (PyErr_Occurred()) {
2754 goto error;
2755 }
2756 }
2757
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002758 /* Save the name of the module. */
2759 encoded = unicode_encoder(module_name);
2760 if (encoded == NULL) {
2761 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2762 PyErr_Format(PicklingError,
2763 "can't pickle module identifier '%S' using "
2764 "pickle protocol %i", module_name, self->proto);
2765 goto error;
2766 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002767 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002768 PyBytes_GET_SIZE(encoded)) < 0) {
2769 Py_DECREF(encoded);
2770 goto error;
2771 }
2772 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002773 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002774 goto error;
2775
2776 /* Save the name of the module. */
2777 encoded = unicode_encoder(global_name);
2778 if (encoded == NULL) {
2779 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2780 PyErr_Format(PicklingError,
2781 "can't pickle global identifier '%S' using "
2782 "pickle protocol %i", global_name, self->proto);
2783 goto error;
2784 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002785 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002786 PyBytes_GET_SIZE(encoded)) < 0) {
2787 Py_DECREF(encoded);
2788 goto error;
2789 }
2790 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002791 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002792 goto error;
2793
2794 /* Memoize the object. */
2795 if (memo_put(self, obj) < 0)
2796 goto error;
2797 }
2798
2799 if (0) {
2800 error:
2801 status = -1;
2802 }
2803 Py_XDECREF(module_name);
2804 Py_XDECREF(global_name);
2805 Py_XDECREF(module);
2806
2807 return status;
2808}
2809
2810static int
Łukasz Langaf3078fb2012-03-12 19:46:12 +01002811save_ellipsis(PicklerObject *self, PyObject *obj)
2812{
Łukasz Langadbd78252012-03-12 22:59:11 +01002813 PyObject *str = PyUnicode_FromString("Ellipsis");
Benjamin Petersone80b29b2012-03-16 18:45:31 -05002814 int res;
Łukasz Langadbd78252012-03-12 22:59:11 +01002815 if (str == NULL)
Łukasz Langacad1a072012-03-12 23:41:07 +01002816 return -1;
Benjamin Petersone80b29b2012-03-16 18:45:31 -05002817 res = save_global(self, Py_Ellipsis, str);
2818 Py_DECREF(str);
2819 return res;
Łukasz Langaf3078fb2012-03-12 19:46:12 +01002820}
2821
2822static int
2823save_notimplemented(PicklerObject *self, PyObject *obj)
2824{
Łukasz Langadbd78252012-03-12 22:59:11 +01002825 PyObject *str = PyUnicode_FromString("NotImplemented");
Benjamin Petersone80b29b2012-03-16 18:45:31 -05002826 int res;
Łukasz Langadbd78252012-03-12 22:59:11 +01002827 if (str == NULL)
Łukasz Langacad1a072012-03-12 23:41:07 +01002828 return -1;
Benjamin Petersone80b29b2012-03-16 18:45:31 -05002829 res = save_global(self, Py_NotImplemented, str);
2830 Py_DECREF(str);
2831 return res;
Łukasz Langaf3078fb2012-03-12 19:46:12 +01002832}
2833
2834static int
Alexandre Vassalotti19b6fa62013-11-30 16:06:39 -08002835save_singleton_type(PicklerObject *self, PyObject *obj, PyObject *singleton)
2836{
2837 PyObject *reduce_value;
2838 int status;
2839
2840 reduce_value = Py_BuildValue("O(O)", &PyType_Type, singleton);
2841 if (reduce_value == NULL) {
2842 return -1;
2843 }
2844 status = save_reduce(self, reduce_value, obj);
2845 Py_DECREF(reduce_value);
2846 return status;
2847}
2848
2849static int
2850save_type(PicklerObject *self, PyObject *obj)
2851{
Alexandre Vassalotti65846c62013-11-30 17:55:48 -08002852 if (obj == (PyObject *)&_PyNone_Type) {
Alexandre Vassalotti19b6fa62013-11-30 16:06:39 -08002853 return save_singleton_type(self, obj, Py_None);
2854 }
2855 else if (obj == (PyObject *)&PyEllipsis_Type) {
2856 return save_singleton_type(self, obj, Py_Ellipsis);
2857 }
Alexandre Vassalotti65846c62013-11-30 17:55:48 -08002858 else if (obj == (PyObject *)&_PyNotImplemented_Type) {
Alexandre Vassalotti19b6fa62013-11-30 16:06:39 -08002859 return save_singleton_type(self, obj, Py_NotImplemented);
2860 }
2861 return save_global(self, obj, NULL);
2862}
2863
2864static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002865save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
2866{
2867 PyObject *pid = NULL;
2868 int status = 0;
2869
2870 const char persid_op = PERSID;
2871 const char binpersid_op = BINPERSID;
2872
2873 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002874 pid = _Pickler_FastCall(self, func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002875 if (pid == NULL)
2876 return -1;
2877
2878 if (pid != Py_None) {
2879 if (self->bin) {
2880 if (save(self, pid, 1) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002881 _Pickler_Write(self, &binpersid_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002882 goto error;
2883 }
2884 else {
2885 PyObject *pid_str = NULL;
2886 char *pid_ascii_bytes;
2887 Py_ssize_t size;
2888
2889 pid_str = PyObject_Str(pid);
2890 if (pid_str == NULL)
2891 goto error;
2892
2893 /* XXX: Should it check whether the persistent id only contains
2894 ASCII characters? And what if the pid contains embedded
2895 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00002896 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002897 Py_DECREF(pid_str);
2898 if (pid_ascii_bytes == NULL)
2899 goto error;
2900
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002901 if (_Pickler_Write(self, &persid_op, 1) < 0 ||
2902 _Pickler_Write(self, pid_ascii_bytes, size) < 0 ||
2903 _Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002904 goto error;
2905 }
2906 status = 1;
2907 }
2908
2909 if (0) {
2910 error:
2911 status = -1;
2912 }
2913 Py_XDECREF(pid);
2914
2915 return status;
2916}
2917
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002918static PyObject *
2919get_class(PyObject *obj)
2920{
2921 PyObject *cls;
2922 static PyObject *str_class;
2923
2924 if (str_class == NULL) {
2925 str_class = PyUnicode_InternFromString("__class__");
2926 if (str_class == NULL)
2927 return NULL;
2928 }
2929 cls = PyObject_GetAttr(obj, str_class);
2930 if (cls == NULL) {
2931 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2932 PyErr_Clear();
2933 cls = (PyObject *) Py_TYPE(obj);
2934 Py_INCREF(cls);
2935 }
2936 }
2937 return cls;
2938}
2939
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002940/* We're saving obj, and args is the 2-thru-5 tuple returned by the
2941 * appropriate __reduce__ method for obj.
2942 */
2943static int
2944save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
2945{
2946 PyObject *callable;
2947 PyObject *argtup;
2948 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002949 PyObject *listitems = Py_None;
2950 PyObject *dictitems = Py_None;
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002951 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002952
2953 int use_newobj = self->proto >= 2;
2954
2955 const char reduce_op = REDUCE;
2956 const char build_op = BUILD;
2957 const char newobj_op = NEWOBJ;
2958
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002959 size = PyTuple_Size(args);
2960 if (size < 2 || size > 5) {
2961 PyErr_SetString(PicklingError, "tuple returned by "
2962 "__reduce__ must contain 2 through 5 elements");
2963 return -1;
2964 }
2965
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002966 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
2967 &callable, &argtup, &state, &listitems, &dictitems))
2968 return -1;
2969
2970 if (!PyCallable_Check(callable)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002971 PyErr_SetString(PicklingError, "first item of the tuple "
2972 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002973 return -1;
2974 }
2975 if (!PyTuple_Check(argtup)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002976 PyErr_SetString(PicklingError, "second item of the tuple "
2977 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002978 return -1;
2979 }
2980
2981 if (state == Py_None)
2982 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002983
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002984 if (listitems == Py_None)
2985 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002986 else if (!PyIter_Check(listitems)) {
Alexandre Vassalotti00d83f22013-04-14 01:28:01 -07002987 PyErr_Format(PicklingError, "fourth element of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002988 "returned by __reduce__ must be an iterator, not %s",
2989 Py_TYPE(listitems)->tp_name);
2990 return -1;
2991 }
2992
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002993 if (dictitems == Py_None)
2994 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002995 else if (!PyIter_Check(dictitems)) {
Alexandre Vassalotti00d83f22013-04-14 01:28:01 -07002996 PyErr_Format(PicklingError, "fifth element of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002997 "returned by __reduce__ must be an iterator, not %s",
2998 Py_TYPE(dictitems)->tp_name);
2999 return -1;
3000 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003001
3002 /* Protocol 2 special case: if callable's name is __newobj__, use
3003 NEWOBJ. */
3004 if (use_newobj) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003005 static PyObject *newobj_str = NULL, *name_str = NULL;
3006 PyObject *name;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003007
3008 if (newobj_str == NULL) {
3009 newobj_str = PyUnicode_InternFromString("__newobj__");
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003010 name_str = PyUnicode_InternFromString("__name__");
3011 if (newobj_str == NULL || name_str == NULL)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003012 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003013 }
3014
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003015 name = PyObject_GetAttr(callable, name_str);
3016 if (name == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003017 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3018 PyErr_Clear();
3019 else
3020 return -1;
3021 use_newobj = 0;
3022 }
3023 else {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003024 use_newobj = PyUnicode_Check(name) &&
3025 PyUnicode_Compare(name, newobj_str) == 0;
3026 Py_DECREF(name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003027 }
3028 }
3029 if (use_newobj) {
3030 PyObject *cls;
3031 PyObject *newargtup;
3032 PyObject *obj_class;
3033 int p;
3034
3035 /* Sanity checks. */
3036 if (Py_SIZE(argtup) < 1) {
3037 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
3038 return -1;
3039 }
3040
3041 cls = PyTuple_GET_ITEM(argtup, 0);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003042 if (!PyType_Check(cls)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003043 PyErr_SetString(PicklingError, "args[0] from "
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003044 "__newobj__ args is not a type");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003045 return -1;
3046 }
3047
3048 if (obj != NULL) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003049 obj_class = get_class(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003050 p = obj_class != cls; /* true iff a problem */
3051 Py_DECREF(obj_class);
3052 if (p) {
3053 PyErr_SetString(PicklingError, "args[0] from "
3054 "__newobj__ args has the wrong class");
3055 return -1;
3056 }
3057 }
3058 /* XXX: These calls save() are prone to infinite recursion. Imagine
3059 what happen if the value returned by the __reduce__() method of
3060 some extension type contains another object of the same type. Ouch!
3061
3062 Here is a quick example, that I ran into, to illustrate what I
3063 mean:
3064
3065 >>> import pickle, copyreg
3066 >>> copyreg.dispatch_table.pop(complex)
3067 >>> pickle.dumps(1+2j)
3068 Traceback (most recent call last):
3069 ...
3070 RuntimeError: maximum recursion depth exceeded
3071
3072 Removing the complex class from copyreg.dispatch_table made the
3073 __reduce_ex__() method emit another complex object:
3074
3075 >>> (1+1j).__reduce_ex__(2)
3076 (<function __newobj__ at 0xb7b71c3c>,
3077 (<class 'complex'>, (1+1j)), None, None, None)
3078
3079 Thus when save() was called on newargstup (the 2nd item) recursion
3080 ensued. Of course, the bug was in the complex class which had a
3081 broken __getnewargs__() that emitted another complex object. But,
3082 the point, here, is it is quite easy to end up with a broken reduce
3083 function. */
3084
3085 /* Save the class and its __new__ arguments. */
3086 if (save(self, cls, 0) < 0)
3087 return -1;
3088
3089 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
3090 if (newargtup == NULL)
3091 return -1;
3092
3093 p = save(self, newargtup, 0);
3094 Py_DECREF(newargtup);
3095 if (p < 0)
3096 return -1;
3097
3098 /* Add NEWOBJ opcode. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003099 if (_Pickler_Write(self, &newobj_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003100 return -1;
3101 }
3102 else { /* Not using NEWOBJ. */
3103 if (save(self, callable, 0) < 0 ||
3104 save(self, argtup, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003105 _Pickler_Write(self, &reduce_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003106 return -1;
3107 }
3108
3109 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
3110 the caller do not want to memoize the object. Not particularly useful,
3111 but that is to mimic the behavior save_reduce() in pickle.py when
3112 obj is None. */
3113 if (obj && memo_put(self, obj) < 0)
3114 return -1;
3115
3116 if (listitems && batch_list(self, listitems) < 0)
3117 return -1;
3118
3119 if (dictitems && batch_dict(self, dictitems) < 0)
3120 return -1;
3121
3122 if (state) {
Victor Stinner121aab42011-09-29 23:40:53 +02003123 if (save(self, state, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003124 _Pickler_Write(self, &build_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003125 return -1;
3126 }
3127
3128 return 0;
3129}
3130
3131static int
3132save(PicklerObject *self, PyObject *obj, int pers_save)
3133{
3134 PyTypeObject *type;
3135 PyObject *reduce_func = NULL;
3136 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003137 int status = 0;
3138
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003139 if (Py_EnterRecursiveCall(" while pickling an object"))
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003140 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003141
3142 /* The extra pers_save argument is necessary to avoid calling save_pers()
3143 on its returned object. */
3144 if (!pers_save && self->pers_func) {
3145 /* save_pers() returns:
3146 -1 to signal an error;
3147 0 if it did nothing successfully;
3148 1 if a persistent id was saved.
3149 */
3150 if ((status = save_pers(self, obj, self->pers_func)) != 0)
3151 goto done;
3152 }
3153
3154 type = Py_TYPE(obj);
3155
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003156 /* The old cPickle had an optimization that used switch-case statement
3157 dispatching on the first letter of the type name. This has was removed
3158 since benchmarks shown that this optimization was actually slowing
3159 things down. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003160
3161 /* Atom types; these aren't memoized, so don't check the memo. */
3162
3163 if (obj == Py_None) {
3164 status = save_none(self, obj);
3165 goto done;
3166 }
Łukasz Langaf3078fb2012-03-12 19:46:12 +01003167 else if (obj == Py_Ellipsis) {
3168 status = save_ellipsis(self, obj);
3169 goto done;
3170 }
3171 else if (obj == Py_NotImplemented) {
3172 status = save_notimplemented(self, obj);
3173 goto done;
3174 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003175 else if (obj == Py_False || obj == Py_True) {
3176 status = save_bool(self, obj);
3177 goto done;
3178 }
3179 else if (type == &PyLong_Type) {
3180 status = save_long(self, obj);
3181 goto done;
3182 }
3183 else if (type == &PyFloat_Type) {
3184 status = save_float(self, obj);
3185 goto done;
3186 }
3187
3188 /* Check the memo to see if it has the object. If so, generate
3189 a GET (or BINGET) opcode, instead of pickling the object
3190 once again. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003191 if (PyMemoTable_Get(self->memo, obj)) {
3192 if (memo_get(self, obj) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003193 goto error;
3194 goto done;
3195 }
3196
3197 if (type == &PyBytes_Type) {
3198 status = save_bytes(self, obj);
3199 goto done;
3200 }
3201 else if (type == &PyUnicode_Type) {
3202 status = save_unicode(self, obj);
3203 goto done;
3204 }
3205 else if (type == &PyDict_Type) {
3206 status = save_dict(self, obj);
3207 goto done;
3208 }
3209 else if (type == &PyList_Type) {
3210 status = save_list(self, obj);
3211 goto done;
3212 }
3213 else if (type == &PyTuple_Type) {
3214 status = save_tuple(self, obj);
3215 goto done;
3216 }
3217 else if (type == &PyType_Type) {
Alexandre Vassalotti19b6fa62013-11-30 16:06:39 -08003218 status = save_type(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003219 goto done;
3220 }
3221 else if (type == &PyFunction_Type) {
3222 status = save_global(self, obj, NULL);
3223 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
3224 /* fall back to reduce */
3225 PyErr_Clear();
3226 }
3227 else {
3228 goto done;
3229 }
3230 }
3231 else if (type == &PyCFunction_Type) {
3232 status = save_global(self, obj, NULL);
3233 goto done;
3234 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003235
3236 /* XXX: This part needs some unit tests. */
3237
3238 /* Get a reduction callable, and call it. This may come from
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003239 * self.dispatch_table, copyreg.dispatch_table, the object's
3240 * __reduce_ex__ method, or the object's __reduce__ method.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003241 */
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003242 if (self->dispatch_table == NULL) {
3243 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
3244 /* PyDict_GetItem() unlike PyObject_GetItem() and
3245 PyObject_GetAttr() returns a borrowed ref */
3246 Py_XINCREF(reduce_func);
3247 } else {
3248 reduce_func = PyObject_GetItem(self->dispatch_table, (PyObject *)type);
3249 if (reduce_func == NULL) {
3250 if (PyErr_ExceptionMatches(PyExc_KeyError))
3251 PyErr_Clear();
3252 else
3253 goto error;
3254 }
3255 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003256 if (reduce_func != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003257 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003258 reduce_value = _Pickler_FastCall(self, reduce_func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003259 }
Antoine Pitrouffd41d92011-10-04 09:23:04 +02003260 else if (PyType_IsSubtype(type, &PyType_Type)) {
3261 status = save_global(self, obj, NULL);
3262 goto done;
3263 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003264 else {
3265 static PyObject *reduce_str = NULL;
3266 static PyObject *reduce_ex_str = NULL;
3267
3268 /* Cache the name of the reduce methods. */
3269 if (reduce_str == NULL) {
3270 reduce_str = PyUnicode_InternFromString("__reduce__");
3271 if (reduce_str == NULL)
3272 goto error;
3273 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
3274 if (reduce_ex_str == NULL)
3275 goto error;
3276 }
3277
3278 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
3279 automatically defined as __reduce__. While this is convenient, this
3280 make it impossible to know which method was actually called. Of
3281 course, this is not a big deal. But still, it would be nice to let
3282 the user know which method was called when something go
3283 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
3284 don't actually have to check for a __reduce__ method. */
3285
3286 /* Check for a __reduce_ex__ method. */
3287 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
3288 if (reduce_func != NULL) {
3289 PyObject *proto;
3290 proto = PyLong_FromLong(self->proto);
3291 if (proto != NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003292 reduce_value = _Pickler_FastCall(self, reduce_func, proto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003293 }
3294 }
3295 else {
3296 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3297 PyErr_Clear();
3298 else
3299 goto error;
3300 /* Check for a __reduce__ method. */
3301 reduce_func = PyObject_GetAttr(obj, reduce_str);
3302 if (reduce_func != NULL) {
3303 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
3304 }
3305 else {
3306 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
3307 type->tp_name, obj);
3308 goto error;
3309 }
3310 }
3311 }
3312
3313 if (reduce_value == NULL)
3314 goto error;
3315
3316 if (PyUnicode_Check(reduce_value)) {
3317 status = save_global(self, obj, reduce_value);
3318 goto done;
3319 }
3320
3321 if (!PyTuple_Check(reduce_value)) {
3322 PyErr_SetString(PicklingError,
3323 "__reduce__ must return a string or tuple");
3324 goto error;
3325 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003326
3327 status = save_reduce(self, reduce_value, obj);
3328
3329 if (0) {
3330 error:
3331 status = -1;
3332 }
3333 done:
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003334 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003335 Py_XDECREF(reduce_func);
3336 Py_XDECREF(reduce_value);
3337
3338 return status;
3339}
3340
3341static int
3342dump(PicklerObject *self, PyObject *obj)
3343{
3344 const char stop_op = STOP;
3345
3346 if (self->proto >= 2) {
3347 char header[2];
3348
3349 header[0] = PROTO;
3350 assert(self->proto >= 0 && self->proto < 256);
3351 header[1] = (unsigned char)self->proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003352 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003353 return -1;
3354 }
3355
3356 if (save(self, obj, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003357 _Pickler_Write(self, &stop_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003358 return -1;
3359
3360 return 0;
3361}
3362
3363PyDoc_STRVAR(Pickler_clear_memo_doc,
3364"clear_memo() -> None. Clears the pickler's \"memo\"."
3365"\n"
3366"The memo is the data structure that remembers which objects the\n"
3367"pickler has already seen, so that shared or recursive objects are\n"
3368"pickled by reference and not by value. This method is useful when\n"
3369"re-using picklers.");
3370
3371static PyObject *
3372Pickler_clear_memo(PicklerObject *self)
3373{
3374 if (self->memo)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003375 PyMemoTable_Clear(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003376
3377 Py_RETURN_NONE;
3378}
3379
3380PyDoc_STRVAR(Pickler_dump_doc,
3381"dump(obj) -> None. Write a pickled representation of obj to the open file.");
3382
3383static PyObject *
3384Pickler_dump(PicklerObject *self, PyObject *args)
3385{
3386 PyObject *obj;
3387
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00003388 /* Check whether the Pickler was initialized correctly (issue3664).
3389 Developers often forget to call __init__() in their subclasses, which
3390 would trigger a segfault without this check. */
3391 if (self->write == NULL) {
Victor Stinner121aab42011-09-29 23:40:53 +02003392 PyErr_Format(PicklingError,
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00003393 "Pickler.__init__() was not called by %s.__init__()",
3394 Py_TYPE(self)->tp_name);
3395 return NULL;
3396 }
3397
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003398 if (!PyArg_ParseTuple(args, "O:dump", &obj))
3399 return NULL;
3400
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003401 if (_Pickler_ClearBuffer(self) < 0)
3402 return NULL;
3403
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003404 if (dump(self, obj) < 0)
3405 return NULL;
3406
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003407 if (_Pickler_FlushToFile(self) < 0)
3408 return NULL;
3409
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003410 Py_RETURN_NONE;
3411}
3412
3413static struct PyMethodDef Pickler_methods[] = {
3414 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
3415 Pickler_dump_doc},
3416 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
3417 Pickler_clear_memo_doc},
3418 {NULL, NULL} /* sentinel */
3419};
3420
3421static void
3422Pickler_dealloc(PicklerObject *self)
3423{
3424 PyObject_GC_UnTrack(self);
3425
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003426 Py_XDECREF(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003427 Py_XDECREF(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003428 Py_XDECREF(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003429 Py_XDECREF(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003430 Py_XDECREF(self->arg);
3431 Py_XDECREF(self->fast_memo);
3432
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003433 PyMemoTable_Del(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003434
3435 Py_TYPE(self)->tp_free((PyObject *)self);
3436}
3437
3438static int
3439Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
3440{
3441 Py_VISIT(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003442 Py_VISIT(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003443 Py_VISIT(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003444 Py_VISIT(self->arg);
3445 Py_VISIT(self->fast_memo);
3446 return 0;
3447}
3448
3449static int
3450Pickler_clear(PicklerObject *self)
3451{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003452 Py_CLEAR(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003453 Py_CLEAR(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003454 Py_CLEAR(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003455 Py_CLEAR(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003456 Py_CLEAR(self->arg);
3457 Py_CLEAR(self->fast_memo);
3458
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003459 if (self->memo != NULL) {
3460 PyMemoTable *memo = self->memo;
3461 self->memo = NULL;
3462 PyMemoTable_Del(memo);
3463 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003464 return 0;
3465}
3466
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003467
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003468PyDoc_STRVAR(Pickler_doc,
3469"Pickler(file, protocol=None)"
3470"\n"
3471"This takes a binary file for writing a pickle data stream.\n"
3472"\n"
3473"The optional protocol argument tells the pickler to use the\n"
3474"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
3475"protocol is 3; a backward-incompatible protocol designed for\n"
3476"Python 3.0.\n"
3477"\n"
3478"Specifying a negative protocol version selects the highest\n"
3479"protocol version supported. The higher the protocol used, the\n"
3480"more recent the version of Python needed to read the pickle\n"
3481"produced.\n"
3482"\n"
3483"The file argument must have a write() method that accepts a single\n"
3484"bytes argument. It can thus be a file object opened for binary\n"
3485"writing, a io.BytesIO instance, or any other custom object that\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003486"meets this interface.\n"
3487"\n"
3488"If fix_imports is True and protocol is less than 3, pickle will try to\n"
3489"map the new Python 3.x names to the old module names used in Python\n"
3490"2.x, so that the pickle data stream is readable with Python 2.x.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003491
3492static int
3493Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
3494{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003495 static char *kwlist[] = {"file", "protocol", "fix_imports", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003496 PyObject *file;
3497 PyObject *proto_obj = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003498 PyObject *fix_imports = Py_True;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02003499 _Py_IDENTIFIER(persistent_id);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003500 _Py_IDENTIFIER(dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003501
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003502 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:Pickler",
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003503 kwlist, &file, &proto_obj, &fix_imports))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003504 return -1;
3505
3506 /* In case of multiple __init__() calls, clear previous content. */
3507 if (self->write != NULL)
3508 (void)Pickler_clear(self);
3509
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003510 if (_Pickler_SetProtocol(self, proto_obj, fix_imports) < 0)
3511 return -1;
3512
3513 if (_Pickler_SetOutputStream(self, file) < 0)
3514 return -1;
3515
3516 /* memo and output_buffer may have already been created in _Pickler_New */
3517 if (self->memo == NULL) {
3518 self->memo = PyMemoTable_New();
3519 if (self->memo == NULL)
3520 return -1;
3521 }
3522 self->output_len = 0;
3523 if (self->output_buffer == NULL) {
3524 self->max_output_len = WRITE_BUF_SIZE;
3525 self->output_buffer = PyBytes_FromStringAndSize(NULL,
3526 self->max_output_len);
3527 if (self->output_buffer == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003528 return -1;
3529 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003530
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003531 self->arg = NULL;
3532 self->fast = 0;
3533 self->fast_nesting = 0;
3534 self->fast_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003535 self->pers_func = NULL;
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02003536 if (_PyObject_HasAttrId((PyObject *)self, &PyId_persistent_id)) {
3537 self->pers_func = _PyObject_GetAttrId((PyObject *)self,
3538 &PyId_persistent_id);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003539 if (self->pers_func == NULL)
3540 return -1;
3541 }
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003542 self->dispatch_table = NULL;
3543 if (_PyObject_HasAttrId((PyObject *)self, &PyId_dispatch_table)) {
3544 self->dispatch_table = _PyObject_GetAttrId((PyObject *)self,
3545 &PyId_dispatch_table);
3546 if (self->dispatch_table == NULL)
3547 return -1;
3548 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003549 return 0;
3550}
3551
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003552/* Define a proxy object for the Pickler's internal memo object. This is to
3553 * avoid breaking code like:
3554 * pickler.memo.clear()
3555 * and
3556 * pickler.memo = saved_memo
3557 * Is this a good idea? Not really, but we don't want to break code that uses
3558 * it. Note that we don't implement the entire mapping API here. This is
3559 * intentional, as these should be treated as black-box implementation details.
3560 */
3561
3562typedef struct {
3563 PyObject_HEAD
3564 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
3565} PicklerMemoProxyObject;
3566
3567PyDoc_STRVAR(pmp_clear_doc,
3568"memo.clear() -> None. Remove all items from memo.");
3569
3570static PyObject *
3571pmp_clear(PicklerMemoProxyObject *self)
3572{
3573 if (self->pickler->memo)
3574 PyMemoTable_Clear(self->pickler->memo);
3575 Py_RETURN_NONE;
3576}
3577
3578PyDoc_STRVAR(pmp_copy_doc,
3579"memo.copy() -> new_memo. Copy the memo to a new object.");
3580
3581static PyObject *
3582pmp_copy(PicklerMemoProxyObject *self)
3583{
3584 Py_ssize_t i;
3585 PyMemoTable *memo;
3586 PyObject *new_memo = PyDict_New();
3587 if (new_memo == NULL)
3588 return NULL;
3589
3590 memo = self->pickler->memo;
3591 for (i = 0; i < memo->mt_allocated; ++i) {
3592 PyMemoEntry entry = memo->mt_table[i];
3593 if (entry.me_key != NULL) {
3594 int status;
3595 PyObject *key, *value;
3596
3597 key = PyLong_FromVoidPtr(entry.me_key);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003598 value = Py_BuildValue("nO", entry.me_value, entry.me_key);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003599
3600 if (key == NULL || value == NULL) {
3601 Py_XDECREF(key);
3602 Py_XDECREF(value);
3603 goto error;
3604 }
3605 status = PyDict_SetItem(new_memo, key, value);
3606 Py_DECREF(key);
3607 Py_DECREF(value);
3608 if (status < 0)
3609 goto error;
3610 }
3611 }
3612 return new_memo;
3613
3614 error:
3615 Py_XDECREF(new_memo);
3616 return NULL;
3617}
3618
3619PyDoc_STRVAR(pmp_reduce_doc,
3620"memo.__reduce__(). Pickling support.");
3621
3622static PyObject *
3623pmp_reduce(PicklerMemoProxyObject *self, PyObject *args)
3624{
3625 PyObject *reduce_value, *dict_args;
3626 PyObject *contents = pmp_copy(self);
3627 if (contents == NULL)
3628 return NULL;
3629
3630 reduce_value = PyTuple_New(2);
3631 if (reduce_value == NULL) {
3632 Py_DECREF(contents);
3633 return NULL;
3634 }
3635 dict_args = PyTuple_New(1);
3636 if (dict_args == NULL) {
3637 Py_DECREF(contents);
3638 Py_DECREF(reduce_value);
3639 return NULL;
3640 }
3641 PyTuple_SET_ITEM(dict_args, 0, contents);
3642 Py_INCREF((PyObject *)&PyDict_Type);
3643 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
3644 PyTuple_SET_ITEM(reduce_value, 1, dict_args);
3645 return reduce_value;
3646}
3647
3648static PyMethodDef picklerproxy_methods[] = {
3649 {"clear", (PyCFunction)pmp_clear, METH_NOARGS, pmp_clear_doc},
3650 {"copy", (PyCFunction)pmp_copy, METH_NOARGS, pmp_copy_doc},
3651 {"__reduce__", (PyCFunction)pmp_reduce, METH_VARARGS, pmp_reduce_doc},
3652 {NULL, NULL} /* sentinel */
3653};
3654
3655static void
3656PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
3657{
3658 PyObject_GC_UnTrack(self);
3659 Py_XDECREF(self->pickler);
3660 PyObject_GC_Del((PyObject *)self);
3661}
3662
3663static int
3664PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
3665 visitproc visit, void *arg)
3666{
3667 Py_VISIT(self->pickler);
3668 return 0;
3669}
3670
3671static int
3672PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
3673{
3674 Py_CLEAR(self->pickler);
3675 return 0;
3676}
3677
3678static PyTypeObject PicklerMemoProxyType = {
3679 PyVarObject_HEAD_INIT(NULL, 0)
3680 "_pickle.PicklerMemoProxy", /*tp_name*/
3681 sizeof(PicklerMemoProxyObject), /*tp_basicsize*/
3682 0,
3683 (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */
3684 0, /* tp_print */
3685 0, /* tp_getattr */
3686 0, /* tp_setattr */
3687 0, /* tp_compare */
3688 0, /* tp_repr */
3689 0, /* tp_as_number */
3690 0, /* tp_as_sequence */
3691 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00003692 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003693 0, /* tp_call */
3694 0, /* tp_str */
3695 PyObject_GenericGetAttr, /* tp_getattro */
3696 PyObject_GenericSetAttr, /* tp_setattro */
3697 0, /* tp_as_buffer */
3698 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3699 0, /* tp_doc */
3700 (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */
3701 (inquiry)PicklerMemoProxy_clear, /* tp_clear */
3702 0, /* tp_richcompare */
3703 0, /* tp_weaklistoffset */
3704 0, /* tp_iter */
3705 0, /* tp_iternext */
3706 picklerproxy_methods, /* tp_methods */
3707};
3708
3709static PyObject *
3710PicklerMemoProxy_New(PicklerObject *pickler)
3711{
3712 PicklerMemoProxyObject *self;
3713
3714 self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
3715 if (self == NULL)
3716 return NULL;
3717 Py_INCREF(pickler);
3718 self->pickler = pickler;
3719 PyObject_GC_Track(self);
3720 return (PyObject *)self;
3721}
3722
3723/*****************************************************************************/
3724
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003725static PyObject *
3726Pickler_get_memo(PicklerObject *self)
3727{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003728 return PicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003729}
3730
3731static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003732Pickler_set_memo(PicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003733{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003734 PyMemoTable *new_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003735
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003736 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003737 PyErr_SetString(PyExc_TypeError,
3738 "attribute deletion is not supported");
3739 return -1;
3740 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003741
3742 if (Py_TYPE(obj) == &PicklerMemoProxyType) {
3743 PicklerObject *pickler =
3744 ((PicklerMemoProxyObject *)obj)->pickler;
3745
3746 new_memo = PyMemoTable_Copy(pickler->memo);
3747 if (new_memo == NULL)
3748 return -1;
3749 }
3750 else if (PyDict_Check(obj)) {
3751 Py_ssize_t i = 0;
3752 PyObject *key, *value;
3753
3754 new_memo = PyMemoTable_New();
3755 if (new_memo == NULL)
3756 return -1;
3757
3758 while (PyDict_Next(obj, &i, &key, &value)) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003759 Py_ssize_t memo_id;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003760 PyObject *memo_obj;
3761
3762 if (!PyTuple_Check(value) || Py_SIZE(value) != 2) {
3763 PyErr_SetString(PyExc_TypeError,
3764 "'memo' values must be 2-item tuples");
3765 goto error;
3766 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003767 memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003768 if (memo_id == -1 && PyErr_Occurred())
3769 goto error;
3770 memo_obj = PyTuple_GET_ITEM(value, 1);
3771 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
3772 goto error;
3773 }
3774 }
3775 else {
3776 PyErr_Format(PyExc_TypeError,
3777 "'memo' attribute must be an PicklerMemoProxy object"
3778 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003779 return -1;
3780 }
3781
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003782 PyMemoTable_Del(self->memo);
3783 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003784
3785 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003786
3787 error:
3788 if (new_memo)
3789 PyMemoTable_Del(new_memo);
3790 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003791}
3792
3793static PyObject *
3794Pickler_get_persid(PicklerObject *self)
3795{
3796 if (self->pers_func == NULL)
3797 PyErr_SetString(PyExc_AttributeError, "persistent_id");
3798 else
3799 Py_INCREF(self->pers_func);
3800 return self->pers_func;
3801}
3802
3803static int
3804Pickler_set_persid(PicklerObject *self, PyObject *value)
3805{
3806 PyObject *tmp;
3807
3808 if (value == NULL) {
3809 PyErr_SetString(PyExc_TypeError,
3810 "attribute deletion is not supported");
3811 return -1;
3812 }
3813 if (!PyCallable_Check(value)) {
3814 PyErr_SetString(PyExc_TypeError,
3815 "persistent_id must be a callable taking one argument");
3816 return -1;
3817 }
3818
3819 tmp = self->pers_func;
3820 Py_INCREF(value);
3821 self->pers_func = value;
3822 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
3823
3824 return 0;
3825}
3826
3827static PyMemberDef Pickler_members[] = {
3828 {"bin", T_INT, offsetof(PicklerObject, bin)},
3829 {"fast", T_INT, offsetof(PicklerObject, fast)},
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003830 {"dispatch_table", T_OBJECT_EX, offsetof(PicklerObject, dispatch_table)},
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003831 {NULL}
3832};
3833
3834static PyGetSetDef Pickler_getsets[] = {
3835 {"memo", (getter)Pickler_get_memo,
3836 (setter)Pickler_set_memo},
3837 {"persistent_id", (getter)Pickler_get_persid,
3838 (setter)Pickler_set_persid},
3839 {NULL}
3840};
3841
3842static PyTypeObject Pickler_Type = {
3843 PyVarObject_HEAD_INIT(NULL, 0)
3844 "_pickle.Pickler" , /*tp_name*/
3845 sizeof(PicklerObject), /*tp_basicsize*/
3846 0, /*tp_itemsize*/
3847 (destructor)Pickler_dealloc, /*tp_dealloc*/
3848 0, /*tp_print*/
3849 0, /*tp_getattr*/
3850 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00003851 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003852 0, /*tp_repr*/
3853 0, /*tp_as_number*/
3854 0, /*tp_as_sequence*/
3855 0, /*tp_as_mapping*/
3856 0, /*tp_hash*/
3857 0, /*tp_call*/
3858 0, /*tp_str*/
3859 0, /*tp_getattro*/
3860 0, /*tp_setattro*/
3861 0, /*tp_as_buffer*/
3862 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3863 Pickler_doc, /*tp_doc*/
3864 (traverseproc)Pickler_traverse, /*tp_traverse*/
3865 (inquiry)Pickler_clear, /*tp_clear*/
3866 0, /*tp_richcompare*/
3867 0, /*tp_weaklistoffset*/
3868 0, /*tp_iter*/
3869 0, /*tp_iternext*/
3870 Pickler_methods, /*tp_methods*/
3871 Pickler_members, /*tp_members*/
3872 Pickler_getsets, /*tp_getset*/
3873 0, /*tp_base*/
3874 0, /*tp_dict*/
3875 0, /*tp_descr_get*/
3876 0, /*tp_descr_set*/
3877 0, /*tp_dictoffset*/
3878 (initproc)Pickler_init, /*tp_init*/
3879 PyType_GenericAlloc, /*tp_alloc*/
3880 PyType_GenericNew, /*tp_new*/
3881 PyObject_GC_Del, /*tp_free*/
3882 0, /*tp_is_gc*/
3883};
3884
Victor Stinner121aab42011-09-29 23:40:53 +02003885/* Temporary helper for calling self.find_class().
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003886
3887 XXX: It would be nice to able to avoid Python function call overhead, by
3888 using directly the C version of find_class(), when find_class() is not
3889 overridden by a subclass. Although, this could become rather hackish. A
3890 simpler optimization would be to call the C function when self is not a
3891 subclass instance. */
3892static PyObject *
3893find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
3894{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02003895 _Py_IDENTIFIER(find_class);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02003896
3897 return _PyObject_CallMethodId((PyObject *)self, &PyId_find_class, "OO",
3898 module_name, global_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003899}
3900
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003901static Py_ssize_t
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003902marker(UnpicklerObject *self)
3903{
3904 if (self->num_marks < 1) {
3905 PyErr_SetString(UnpicklingError, "could not find MARK");
3906 return -1;
3907 }
3908
3909 return self->marks[--self->num_marks];
3910}
3911
3912static int
3913load_none(UnpicklerObject *self)
3914{
3915 PDATA_APPEND(self->stack, Py_None, -1);
3916 return 0;
3917}
3918
3919static int
3920bad_readline(void)
3921{
3922 PyErr_SetString(UnpicklingError, "pickle data was truncated");
3923 return -1;
3924}
3925
3926static int
3927load_int(UnpicklerObject *self)
3928{
3929 PyObject *value;
3930 char *endptr, *s;
3931 Py_ssize_t len;
3932 long x;
3933
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003934 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003935 return -1;
3936 if (len < 2)
3937 return bad_readline();
3938
3939 errno = 0;
Victor Stinner121aab42011-09-29 23:40:53 +02003940 /* XXX: Should the base argument of strtol() be explicitly set to 10?
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003941 XXX(avassalotti): Should this uses PyOS_strtol()? */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003942 x = strtol(s, &endptr, 0);
3943
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003944 if (errno || (*endptr != '\n' && *endptr != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003945 /* Hm, maybe we've got something long. Let's try reading
Serhiy Storchaka95949422013-08-27 19:40:23 +03003946 * it as a Python int object. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003947 errno = 0;
3948 /* XXX: Same thing about the base here. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003949 value = PyLong_FromString(s, NULL, 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003950 if (value == NULL) {
3951 PyErr_SetString(PyExc_ValueError,
3952 "could not convert string to int");
3953 return -1;
3954 }
3955 }
3956 else {
3957 if (len == 3 && (x == 0 || x == 1)) {
3958 if ((value = PyBool_FromLong(x)) == NULL)
3959 return -1;
3960 }
3961 else {
3962 if ((value = PyLong_FromLong(x)) == NULL)
3963 return -1;
3964 }
3965 }
3966
3967 PDATA_PUSH(self->stack, value, -1);
3968 return 0;
3969}
3970
3971static int
3972load_bool(UnpicklerObject *self, PyObject *boolean)
3973{
3974 assert(boolean == Py_True || boolean == Py_False);
3975 PDATA_APPEND(self->stack, boolean, -1);
3976 return 0;
3977}
3978
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003979/* s contains x bytes of an unsigned little-endian integer. Return its value
3980 * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
3981 */
3982static Py_ssize_t
3983calc_binsize(char *bytes, int size)
3984{
3985 unsigned char *s = (unsigned char *)bytes;
3986 size_t x = 0;
3987
3988 assert(size == 4);
3989
3990 x = (size_t) s[0];
3991 x |= (size_t) s[1] << 8;
3992 x |= (size_t) s[2] << 16;
3993 x |= (size_t) s[3] << 24;
3994
3995 if (x > PY_SSIZE_T_MAX)
3996 return -1;
3997 else
3998 return (Py_ssize_t) x;
3999}
4000
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004001/* s contains x bytes of a little-endian integer. Return its value as a
4002 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
4003 * int, but when x is 4 it's a signed one. This is an historical source
4004 * of x-platform bugs.
4005 */
4006static long
4007calc_binint(char *bytes, int size)
4008{
4009 unsigned char *s = (unsigned char *)bytes;
4010 int i = size;
4011 long x = 0;
4012
4013 for (i = 0; i < size; i++) {
4014 x |= (long)s[i] << (i * 8);
4015 }
4016
4017 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
4018 * is signed, so on a box with longs bigger than 4 bytes we need
4019 * to extend a BININT's sign bit to the full width.
4020 */
4021 if (SIZEOF_LONG > 4 && size == 4) {
4022 x |= -(x & (1L << 31));
4023 }
4024
4025 return x;
4026}
4027
4028static int
4029load_binintx(UnpicklerObject *self, char *s, int size)
4030{
4031 PyObject *value;
4032 long x;
4033
4034 x = calc_binint(s, size);
4035
4036 if ((value = PyLong_FromLong(x)) == NULL)
4037 return -1;
4038
4039 PDATA_PUSH(self->stack, value, -1);
4040 return 0;
4041}
4042
4043static int
4044load_binint(UnpicklerObject *self)
4045{
4046 char *s;
4047
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004048 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004049 return -1;
4050
4051 return load_binintx(self, s, 4);
4052}
4053
4054static int
4055load_binint1(UnpicklerObject *self)
4056{
4057 char *s;
4058
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004059 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004060 return -1;
4061
4062 return load_binintx(self, s, 1);
4063}
4064
4065static int
4066load_binint2(UnpicklerObject *self)
4067{
4068 char *s;
4069
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004070 if (_Unpickler_Read(self, &s, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004071 return -1;
4072
4073 return load_binintx(self, s, 2);
4074}
4075
4076static int
4077load_long(UnpicklerObject *self)
4078{
4079 PyObject *value;
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00004080 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004081 Py_ssize_t len;
4082
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004083 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004084 return -1;
4085 if (len < 2)
4086 return bad_readline();
4087
Mark Dickinson8dd05142009-01-20 20:43:58 +00004088 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
4089 the 'L' before calling PyLong_FromString. In order to maintain
4090 compatibility with Python 3.0.0, we don't actually *require*
4091 the 'L' to be present. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004092 if (s[len-2] == 'L')
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00004093 s[len-2] = '\0';
Alexandre Vassalottie4bccb72009-01-24 01:47:57 +00004094 /* XXX: Should the base argument explicitly set to 10? */
4095 value = PyLong_FromString(s, NULL, 0);
Mark Dickinson8dd05142009-01-20 20:43:58 +00004096 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004097 return -1;
4098
4099 PDATA_PUSH(self->stack, value, -1);
4100 return 0;
4101}
4102
4103/* 'size' bytes contain the # of bytes of little-endian 256's-complement
4104 * data following.
4105 */
4106static int
4107load_counted_long(UnpicklerObject *self, int size)
4108{
4109 PyObject *value;
4110 char *nbytes;
4111 char *pdata;
4112
4113 assert(size == 1 || size == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004114 if (_Unpickler_Read(self, &nbytes, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004115 return -1;
4116
4117 size = calc_binint(nbytes, size);
4118 if (size < 0) {
4119 /* Corrupt or hostile pickle -- we never write one like this */
4120 PyErr_SetString(UnpicklingError,
4121 "LONG pickle has negative byte count");
4122 return -1;
4123 }
4124
4125 if (size == 0)
4126 value = PyLong_FromLong(0L);
4127 else {
4128 /* Read the raw little-endian bytes and convert. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004129 if (_Unpickler_Read(self, &pdata, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004130 return -1;
4131 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
4132 1 /* little endian */ , 1 /* signed */ );
4133 }
4134 if (value == NULL)
4135 return -1;
4136 PDATA_PUSH(self->stack, value, -1);
4137 return 0;
4138}
4139
4140static int
4141load_float(UnpicklerObject *self)
4142{
4143 PyObject *value;
4144 char *endptr, *s;
4145 Py_ssize_t len;
4146 double d;
4147
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004148 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004149 return -1;
4150 if (len < 2)
4151 return bad_readline();
4152
4153 errno = 0;
Mark Dickinson725bfd82009-05-03 20:33:40 +00004154 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
4155 if (d == -1.0 && PyErr_Occurred())
4156 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004157 if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004158 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
4159 return -1;
4160 }
Mark Dickinson725bfd82009-05-03 20:33:40 +00004161 value = PyFloat_FromDouble(d);
4162 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004163 return -1;
4164
4165 PDATA_PUSH(self->stack, value, -1);
4166 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004167}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004168
4169static int
4170load_binfloat(UnpicklerObject *self)
4171{
4172 PyObject *value;
4173 double x;
4174 char *s;
4175
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004176 if (_Unpickler_Read(self, &s, 8) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004177 return -1;
4178
4179 x = _PyFloat_Unpack8((unsigned char *)s, 0);
4180 if (x == -1.0 && PyErr_Occurred())
4181 return -1;
4182
4183 if ((value = PyFloat_FromDouble(x)) == NULL)
4184 return -1;
4185
4186 PDATA_PUSH(self->stack, value, -1);
4187 return 0;
4188}
4189
4190static int
4191load_string(UnpicklerObject *self)
4192{
4193 PyObject *bytes;
4194 PyObject *str = NULL;
4195 Py_ssize_t len;
4196 char *s, *p;
4197
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004198 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004199 return -1;
Antoine Pitrou3034efd2013-04-15 21:51:09 +02004200 if (len < 2)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004201 return bad_readline();
4202 if ((s = strdup(s)) == NULL) {
4203 PyErr_NoMemory();
4204 return -1;
4205 }
4206
4207 /* Strip outermost quotes */
Antoine Pitrou3034efd2013-04-15 21:51:09 +02004208 while (len > 0 && s[len - 1] <= ' ')
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004209 len--;
Antoine Pitrou3034efd2013-04-15 21:51:09 +02004210 if (len > 1 && s[0] == '"' && s[len - 1] == '"') {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004211 s[len - 1] = '\0';
4212 p = s + 1;
4213 len -= 2;
4214 }
Antoine Pitrou3034efd2013-04-15 21:51:09 +02004215 else if (len > 1 && s[0] == '\'' && s[len - 1] == '\'') {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004216 s[len - 1] = '\0';
4217 p = s + 1;
4218 len -= 2;
4219 }
4220 else {
4221 free(s);
4222 PyErr_SetString(PyExc_ValueError, "insecure string pickle");
4223 return -1;
4224 }
4225
4226 /* Use the PyBytes API to decode the string, since that is what is used
4227 to encode, and then coerce the result to Unicode. */
4228 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
4229 free(s);
4230 if (bytes == NULL)
4231 return -1;
4232 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
4233 Py_DECREF(bytes);
4234 if (str == NULL)
4235 return -1;
4236
4237 PDATA_PUSH(self->stack, str, -1);
4238 return 0;
4239}
4240
4241static int
4242load_binbytes(UnpicklerObject *self)
4243{
4244 PyObject *bytes;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004245 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004246 char *s;
4247
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004248 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004249 return -1;
4250
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004251 x = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004252 if (x < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004253 PyErr_Format(PyExc_OverflowError,
4254 "BINBYTES exceeds system's maximum size of %zd bytes",
4255 PY_SSIZE_T_MAX
4256 );
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004257 return -1;
4258 }
4259
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004260 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004261 return -1;
4262 bytes = PyBytes_FromStringAndSize(s, x);
4263 if (bytes == NULL)
4264 return -1;
4265
4266 PDATA_PUSH(self->stack, bytes, -1);
4267 return 0;
4268}
4269
4270static int
4271load_short_binbytes(UnpicklerObject *self)
4272{
4273 PyObject *bytes;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004274 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004275 char *s;
4276
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004277 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004278 return -1;
4279
4280 x = (unsigned char)s[0];
4281
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004282 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004283 return -1;
4284
4285 bytes = PyBytes_FromStringAndSize(s, x);
4286 if (bytes == NULL)
4287 return -1;
4288
4289 PDATA_PUSH(self->stack, bytes, -1);
4290 return 0;
4291}
4292
4293static int
4294load_binstring(UnpicklerObject *self)
4295{
4296 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004297 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004298 char *s;
4299
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004300 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004301 return -1;
4302
4303 x = calc_binint(s, 4);
4304 if (x < 0) {
Victor Stinner121aab42011-09-29 23:40:53 +02004305 PyErr_SetString(UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004306 "BINSTRING pickle has negative byte count");
4307 return -1;
4308 }
4309
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004310 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004311 return -1;
4312
4313 /* Convert Python 2.x strings to unicode. */
4314 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4315 if (str == NULL)
4316 return -1;
4317
4318 PDATA_PUSH(self->stack, str, -1);
4319 return 0;
4320}
4321
4322static int
4323load_short_binstring(UnpicklerObject *self)
4324{
4325 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004326 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004327 char *s;
4328
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004329 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004330 return -1;
4331
4332 x = (unsigned char)s[0];
4333
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004334 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004335 return -1;
4336
4337 /* Convert Python 2.x strings to unicode. */
4338 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4339 if (str == NULL)
4340 return -1;
4341
4342 PDATA_PUSH(self->stack, str, -1);
4343 return 0;
4344}
4345
4346static int
4347load_unicode(UnpicklerObject *self)
4348{
4349 PyObject *str;
4350 Py_ssize_t len;
4351 char *s;
4352
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004353 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004354 return -1;
4355 if (len < 1)
4356 return bad_readline();
4357
4358 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
4359 if (str == NULL)
4360 return -1;
4361
4362 PDATA_PUSH(self->stack, str, -1);
4363 return 0;
4364}
4365
4366static int
4367load_binunicode(UnpicklerObject *self)
4368{
4369 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004370 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004371 char *s;
4372
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004373 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004374 return -1;
4375
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004376 size = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004377 if (size < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004378 PyErr_Format(PyExc_OverflowError,
4379 "BINUNICODE exceeds system's maximum size of %zd bytes",
4380 PY_SSIZE_T_MAX
4381 );
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004382 return -1;
4383 }
4384
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004385
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004386 if (_Unpickler_Read(self, &s, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004387 return -1;
4388
Victor Stinner485fb562010-04-13 11:07:24 +00004389 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004390 if (str == NULL)
4391 return -1;
4392
4393 PDATA_PUSH(self->stack, str, -1);
4394 return 0;
4395}
4396
4397static int
4398load_tuple(UnpicklerObject *self)
4399{
4400 PyObject *tuple;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004401 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004402
4403 if ((i = marker(self)) < 0)
4404 return -1;
4405
4406 tuple = Pdata_poptuple(self->stack, i);
4407 if (tuple == NULL)
4408 return -1;
4409 PDATA_PUSH(self->stack, tuple, -1);
4410 return 0;
4411}
4412
4413static int
4414load_counted_tuple(UnpicklerObject *self, int len)
4415{
4416 PyObject *tuple;
4417
4418 tuple = PyTuple_New(len);
4419 if (tuple == NULL)
4420 return -1;
4421
4422 while (--len >= 0) {
4423 PyObject *item;
4424
4425 PDATA_POP(self->stack, item);
4426 if (item == NULL)
4427 return -1;
4428 PyTuple_SET_ITEM(tuple, len, item);
4429 }
4430 PDATA_PUSH(self->stack, tuple, -1);
4431 return 0;
4432}
4433
4434static int
4435load_empty_list(UnpicklerObject *self)
4436{
4437 PyObject *list;
4438
4439 if ((list = PyList_New(0)) == NULL)
4440 return -1;
4441 PDATA_PUSH(self->stack, list, -1);
4442 return 0;
4443}
4444
4445static int
4446load_empty_dict(UnpicklerObject *self)
4447{
4448 PyObject *dict;
4449
4450 if ((dict = PyDict_New()) == NULL)
4451 return -1;
4452 PDATA_PUSH(self->stack, dict, -1);
4453 return 0;
4454}
4455
4456static int
4457load_list(UnpicklerObject *self)
4458{
4459 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004460 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004461
4462 if ((i = marker(self)) < 0)
4463 return -1;
4464
4465 list = Pdata_poplist(self->stack, i);
4466 if (list == NULL)
4467 return -1;
4468 PDATA_PUSH(self->stack, list, -1);
4469 return 0;
4470}
4471
4472static int
4473load_dict(UnpicklerObject *self)
4474{
4475 PyObject *dict, *key, *value;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004476 Py_ssize_t i, j, k;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004477
4478 if ((i = marker(self)) < 0)
4479 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004480 j = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004481
4482 if ((dict = PyDict_New()) == NULL)
4483 return -1;
4484
4485 for (k = i + 1; k < j; k += 2) {
4486 key = self->stack->data[k - 1];
4487 value = self->stack->data[k];
4488 if (PyDict_SetItem(dict, key, value) < 0) {
4489 Py_DECREF(dict);
4490 return -1;
4491 }
4492 }
4493 Pdata_clear(self->stack, i);
4494 PDATA_PUSH(self->stack, dict, -1);
4495 return 0;
4496}
4497
4498static PyObject *
4499instantiate(PyObject *cls, PyObject *args)
4500{
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004501 PyObject *result = NULL;
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02004502 _Py_IDENTIFIER(__getinitargs__);
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004503 /* Caller must assure args are a tuple. Normally, args come from
4504 Pdata_poptuple which packs objects from the top of the stack
4505 into a newly created tuple. */
4506 assert(PyTuple_Check(args));
4507 if (Py_SIZE(args) > 0 || !PyType_Check(cls) ||
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02004508 _PyObject_HasAttrId(cls, &PyId___getinitargs__)) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004509 result = PyObject_CallObject(cls, args);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004510 }
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004511 else {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02004512 _Py_IDENTIFIER(__new__);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02004513
4514 result = _PyObject_CallMethodId(cls, &PyId___new__, "O", cls);
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004515 }
4516 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004517}
4518
4519static int
4520load_obj(UnpicklerObject *self)
4521{
4522 PyObject *cls, *args, *obj = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004523 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004524
4525 if ((i = marker(self)) < 0)
4526 return -1;
4527
4528 args = Pdata_poptuple(self->stack, i + 1);
4529 if (args == NULL)
4530 return -1;
4531
4532 PDATA_POP(self->stack, cls);
4533 if (cls) {
4534 obj = instantiate(cls, args);
4535 Py_DECREF(cls);
4536 }
4537 Py_DECREF(args);
4538 if (obj == NULL)
4539 return -1;
4540
4541 PDATA_PUSH(self->stack, obj, -1);
4542 return 0;
4543}
4544
4545static int
4546load_inst(UnpicklerObject *self)
4547{
4548 PyObject *cls = NULL;
4549 PyObject *args = NULL;
4550 PyObject *obj = NULL;
4551 PyObject *module_name;
4552 PyObject *class_name;
4553 Py_ssize_t len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004554 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004555 char *s;
4556
4557 if ((i = marker(self)) < 0)
4558 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004559 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004560 return -1;
4561 if (len < 2)
4562 return bad_readline();
4563
4564 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
4565 identifiers are permitted in Python 3.0, since the INST opcode is only
4566 supported by older protocols on Python 2.x. */
4567 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
4568 if (module_name == NULL)
4569 return -1;
4570
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004571 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004572 if (len < 2)
4573 return bad_readline();
4574 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004575 if (class_name != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004576 cls = find_class(self, module_name, class_name);
4577 Py_DECREF(class_name);
4578 }
4579 }
4580 Py_DECREF(module_name);
4581
4582 if (cls == NULL)
4583 return -1;
4584
4585 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
4586 obj = instantiate(cls, args);
4587 Py_DECREF(args);
4588 }
4589 Py_DECREF(cls);
4590
4591 if (obj == NULL)
4592 return -1;
4593
4594 PDATA_PUSH(self->stack, obj, -1);
4595 return 0;
4596}
4597
4598static int
4599load_newobj(UnpicklerObject *self)
4600{
4601 PyObject *args = NULL;
4602 PyObject *clsraw = NULL;
4603 PyTypeObject *cls; /* clsraw cast to its true type */
4604 PyObject *obj;
4605
4606 /* Stack is ... cls argtuple, and we want to call
4607 * cls.__new__(cls, *argtuple).
4608 */
4609 PDATA_POP(self->stack, args);
4610 if (args == NULL)
4611 goto error;
4612 if (!PyTuple_Check(args)) {
4613 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
4614 goto error;
4615 }
4616
4617 PDATA_POP(self->stack, clsraw);
4618 cls = (PyTypeObject *)clsraw;
4619 if (cls == NULL)
4620 goto error;
4621 if (!PyType_Check(cls)) {
4622 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4623 "isn't a type object");
4624 goto error;
4625 }
4626 if (cls->tp_new == NULL) {
4627 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4628 "has NULL tp_new");
4629 goto error;
4630 }
4631
4632 /* Call __new__. */
4633 obj = cls->tp_new(cls, args, NULL);
4634 if (obj == NULL)
4635 goto error;
4636
4637 Py_DECREF(args);
4638 Py_DECREF(clsraw);
4639 PDATA_PUSH(self->stack, obj, -1);
4640 return 0;
4641
4642 error:
4643 Py_XDECREF(args);
4644 Py_XDECREF(clsraw);
4645 return -1;
4646}
4647
4648static int
4649load_global(UnpicklerObject *self)
4650{
4651 PyObject *global = NULL;
4652 PyObject *module_name;
4653 PyObject *global_name;
4654 Py_ssize_t len;
4655 char *s;
4656
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004657 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004658 return -1;
4659 if (len < 2)
4660 return bad_readline();
4661 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4662 if (!module_name)
4663 return -1;
4664
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004665 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004666 if (len < 2) {
4667 Py_DECREF(module_name);
4668 return bad_readline();
4669 }
4670 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4671 if (global_name) {
4672 global = find_class(self, module_name, global_name);
4673 Py_DECREF(global_name);
4674 }
4675 }
4676 Py_DECREF(module_name);
4677
4678 if (global == NULL)
4679 return -1;
4680 PDATA_PUSH(self->stack, global, -1);
4681 return 0;
4682}
4683
4684static int
4685load_persid(UnpicklerObject *self)
4686{
4687 PyObject *pid;
4688 Py_ssize_t len;
4689 char *s;
4690
4691 if (self->pers_func) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004692 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004693 return -1;
Alexandre Vassalotti896414f2013-11-30 13:52:35 -08004694 if (len < 1)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004695 return bad_readline();
4696
4697 pid = PyBytes_FromStringAndSize(s, len - 1);
4698 if (pid == NULL)
4699 return -1;
4700
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004701 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004702 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004703 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004704 if (pid == NULL)
4705 return -1;
4706
4707 PDATA_PUSH(self->stack, pid, -1);
4708 return 0;
4709 }
4710 else {
4711 PyErr_SetString(UnpicklingError,
4712 "A load persistent id instruction was encountered,\n"
4713 "but no persistent_load function was specified.");
4714 return -1;
4715 }
4716}
4717
4718static int
4719load_binpersid(UnpicklerObject *self)
4720{
4721 PyObject *pid;
4722
4723 if (self->pers_func) {
4724 PDATA_POP(self->stack, pid);
4725 if (pid == NULL)
4726 return -1;
4727
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004728 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004729 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004730 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004731 if (pid == NULL)
4732 return -1;
4733
4734 PDATA_PUSH(self->stack, pid, -1);
4735 return 0;
4736 }
4737 else {
4738 PyErr_SetString(UnpicklingError,
4739 "A load persistent id instruction was encountered,\n"
4740 "but no persistent_load function was specified.");
4741 return -1;
4742 }
4743}
4744
4745static int
4746load_pop(UnpicklerObject *self)
4747{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004748 Py_ssize_t len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004749
4750 /* Note that we split the (pickle.py) stack into two stacks,
4751 * an object stack and a mark stack. We have to be clever and
4752 * pop the right one. We do this by looking at the top of the
Collin Winter8ca69de2009-05-26 16:53:41 +00004753 * mark stack first, and only signalling a stack underflow if
4754 * the object stack is empty and the mark stack doesn't match
4755 * our expectations.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004756 */
Collin Winter8ca69de2009-05-26 16:53:41 +00004757 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004758 self->num_marks--;
Antoine Pitrou01a15ea2010-01-07 17:57:31 +00004759 } else if (len > 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004760 len--;
4761 Py_DECREF(self->stack->data[len]);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004762 Py_SIZE(self->stack) = len;
Collin Winter8ca69de2009-05-26 16:53:41 +00004763 } else {
4764 return stack_underflow();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004765 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004766 return 0;
4767}
4768
4769static int
4770load_pop_mark(UnpicklerObject *self)
4771{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004772 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004773
4774 if ((i = marker(self)) < 0)
4775 return -1;
4776
4777 Pdata_clear(self->stack, i);
4778
4779 return 0;
4780}
4781
4782static int
4783load_dup(UnpicklerObject *self)
4784{
4785 PyObject *last;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004786 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004787
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004788 if ((len = Py_SIZE(self->stack)) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004789 return stack_underflow();
4790 last = self->stack->data[len - 1];
4791 PDATA_APPEND(self->stack, last, -1);
4792 return 0;
4793}
4794
4795static int
4796load_get(UnpicklerObject *self)
4797{
4798 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004799 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004800 Py_ssize_t len;
4801 char *s;
4802
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004803 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004804 return -1;
4805 if (len < 2)
4806 return bad_readline();
4807
4808 key = PyLong_FromString(s, NULL, 10);
4809 if (key == NULL)
4810 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004811 idx = PyLong_AsSsize_t(key);
4812 if (idx == -1 && PyErr_Occurred()) {
4813 Py_DECREF(key);
4814 return -1;
4815 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004816
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004817 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004818 if (value == NULL) {
4819 if (!PyErr_Occurred())
4820 PyErr_SetObject(PyExc_KeyError, key);
4821 Py_DECREF(key);
4822 return -1;
4823 }
4824 Py_DECREF(key);
4825
4826 PDATA_APPEND(self->stack, value, -1);
4827 return 0;
4828}
4829
4830static int
4831load_binget(UnpicklerObject *self)
4832{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004833 PyObject *value;
4834 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004835 char *s;
4836
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004837 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004838 return -1;
4839
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004840 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004841
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004842 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004843 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004844 PyObject *key = PyLong_FromSsize_t(idx);
Christian Heimes9ee5c372013-07-26 22:45:00 +02004845 if (key != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004846 PyErr_SetObject(PyExc_KeyError, key);
Christian Heimes9ee5c372013-07-26 22:45:00 +02004847 Py_DECREF(key);
4848 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004849 return -1;
4850 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004851
4852 PDATA_APPEND(self->stack, value, -1);
4853 return 0;
4854}
4855
4856static int
4857load_long_binget(UnpicklerObject *self)
4858{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004859 PyObject *value;
4860 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004861 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004862
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004863 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004864 return -1;
4865
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004866 idx = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004867
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004868 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004869 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004870 PyObject *key = PyLong_FromSsize_t(idx);
Christian Heimes9ee5c372013-07-26 22:45:00 +02004871 if (key != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004872 PyErr_SetObject(PyExc_KeyError, key);
Christian Heimes9ee5c372013-07-26 22:45:00 +02004873 Py_DECREF(key);
4874 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004875 return -1;
4876 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004877
4878 PDATA_APPEND(self->stack, value, -1);
4879 return 0;
4880}
4881
4882/* Push an object from the extension registry (EXT[124]). nbytes is
4883 * the number of bytes following the opcode, holding the index (code) value.
4884 */
4885static int
4886load_extension(UnpicklerObject *self, int nbytes)
4887{
4888 char *codebytes; /* the nbytes bytes after the opcode */
4889 long code; /* calc_binint returns long */
4890 PyObject *py_code; /* code as a Python int */
4891 PyObject *obj; /* the object to push */
4892 PyObject *pair; /* (module_name, class_name) */
4893 PyObject *module_name, *class_name;
4894
4895 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004896 if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004897 return -1;
4898 code = calc_binint(codebytes, nbytes);
4899 if (code <= 0) { /* note that 0 is forbidden */
4900 /* Corrupt or hostile pickle. */
4901 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
4902 return -1;
4903 }
4904
4905 /* Look for the code in the cache. */
4906 py_code = PyLong_FromLong(code);
4907 if (py_code == NULL)
4908 return -1;
4909 obj = PyDict_GetItem(extension_cache, py_code);
4910 if (obj != NULL) {
4911 /* Bingo. */
4912 Py_DECREF(py_code);
4913 PDATA_APPEND(self->stack, obj, -1);
4914 return 0;
4915 }
4916
4917 /* Look up the (module_name, class_name) pair. */
4918 pair = PyDict_GetItem(inverted_registry, py_code);
4919 if (pair == NULL) {
4920 Py_DECREF(py_code);
4921 PyErr_Format(PyExc_ValueError, "unregistered extension "
4922 "code %ld", code);
4923 return -1;
4924 }
4925 /* Since the extension registry is manipulable via Python code,
4926 * confirm that pair is really a 2-tuple of strings.
4927 */
4928 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
4929 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
4930 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
4931 Py_DECREF(py_code);
4932 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
4933 "isn't a 2-tuple of strings", code);
4934 return -1;
4935 }
4936 /* Load the object. */
4937 obj = find_class(self, module_name, class_name);
4938 if (obj == NULL) {
4939 Py_DECREF(py_code);
4940 return -1;
4941 }
4942 /* Cache code -> obj. */
4943 code = PyDict_SetItem(extension_cache, py_code, obj);
4944 Py_DECREF(py_code);
4945 if (code < 0) {
4946 Py_DECREF(obj);
4947 return -1;
4948 }
4949 PDATA_PUSH(self->stack, obj, -1);
4950 return 0;
4951}
4952
4953static int
4954load_put(UnpicklerObject *self)
4955{
4956 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004957 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004958 Py_ssize_t len;
4959 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004960
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004961 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004962 return -1;
4963 if (len < 2)
4964 return bad_readline();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004965 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004966 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004967 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004968
4969 key = PyLong_FromString(s, NULL, 10);
4970 if (key == NULL)
4971 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004972 idx = PyLong_AsSsize_t(key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004973 Py_DECREF(key);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004974 if (idx < 0) {
4975 if (!PyErr_Occurred())
4976 PyErr_SetString(PyExc_ValueError,
4977 "negative PUT argument");
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004978 return -1;
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004979 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004980
4981 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004982}
4983
4984static int
4985load_binput(UnpicklerObject *self)
4986{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004987 PyObject *value;
4988 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004989 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004990
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004991 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004992 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004993
4994 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004995 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004996 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004997
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004998 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004999
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005000 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005001}
5002
5003static int
5004load_long_binput(UnpicklerObject *self)
5005{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005006 PyObject *value;
5007 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005008 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005009
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005010 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005011 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005012
5013 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005014 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005015 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005016
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005017 idx = calc_binsize(s, 4);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02005018 if (idx < 0) {
5019 PyErr_SetString(PyExc_ValueError,
5020 "negative LONG_BINPUT argument");
5021 return -1;
5022 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005023
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005024 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005025}
5026
5027static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005028do_append(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005029{
5030 PyObject *value;
5031 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005032 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005033
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005034 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005035 if (x > len || x <= 0)
5036 return stack_underflow();
5037 if (len == x) /* nothing to do */
5038 return 0;
5039
5040 list = self->stack->data[x - 1];
5041
5042 if (PyList_Check(list)) {
5043 PyObject *slice;
5044 Py_ssize_t list_len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005045 int ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005046
5047 slice = Pdata_poplist(self->stack, x);
5048 if (!slice)
5049 return -1;
5050 list_len = PyList_GET_SIZE(list);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005051 ret = PyList_SetSlice(list, list_len, list_len, slice);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005052 Py_DECREF(slice);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005053 return ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005054 }
5055 else {
5056 PyObject *append_func;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005057 _Py_IDENTIFIER(append);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005058
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005059 append_func = _PyObject_GetAttrId(list, &PyId_append);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005060 if (append_func == NULL)
5061 return -1;
5062 for (i = x; i < len; i++) {
5063 PyObject *result;
5064
5065 value = self->stack->data[i];
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005066 result = _Unpickler_FastCall(self, append_func, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005067 if (result == NULL) {
5068 Pdata_clear(self->stack, i + 1);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005069 Py_SIZE(self->stack) = x;
Alexandre Vassalotti637c7c42013-04-20 21:28:21 -07005070 Py_DECREF(append_func);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005071 return -1;
5072 }
5073 Py_DECREF(result);
5074 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005075 Py_SIZE(self->stack) = x;
Alexandre Vassalotti637c7c42013-04-20 21:28:21 -07005076 Py_DECREF(append_func);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005077 }
5078
5079 return 0;
5080}
5081
5082static int
5083load_append(UnpicklerObject *self)
5084{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005085 return do_append(self, Py_SIZE(self->stack) - 1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005086}
5087
5088static int
5089load_appends(UnpicklerObject *self)
5090{
5091 return do_append(self, marker(self));
5092}
5093
5094static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005095do_setitems(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005096{
5097 PyObject *value, *key;
5098 PyObject *dict;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005099 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005100 int status = 0;
5101
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005102 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005103 if (x > len || x <= 0)
5104 return stack_underflow();
5105 if (len == x) /* nothing to do */
5106 return 0;
Victor Stinner121aab42011-09-29 23:40:53 +02005107 if ((len - x) % 2 != 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005108 /* Currupt or hostile pickle -- we never write one like this. */
5109 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
5110 return -1;
5111 }
5112
5113 /* Here, dict does not actually need to be a PyDict; it could be anything
5114 that supports the __setitem__ attribute. */
5115 dict = self->stack->data[x - 1];
5116
5117 for (i = x + 1; i < len; i += 2) {
5118 key = self->stack->data[i - 1];
5119 value = self->stack->data[i];
5120 if (PyObject_SetItem(dict, key, value) < 0) {
5121 status = -1;
5122 break;
5123 }
5124 }
5125
5126 Pdata_clear(self->stack, x);
5127 return status;
5128}
5129
5130static int
5131load_setitem(UnpicklerObject *self)
5132{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005133 return do_setitems(self, Py_SIZE(self->stack) - 2);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005134}
5135
5136static int
5137load_setitems(UnpicklerObject *self)
5138{
5139 return do_setitems(self, marker(self));
5140}
5141
5142static int
5143load_build(UnpicklerObject *self)
5144{
5145 PyObject *state, *inst, *slotstate;
5146 PyObject *setstate;
5147 int status = 0;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005148 _Py_IDENTIFIER(__setstate__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005149
5150 /* Stack is ... instance, state. We want to leave instance at
5151 * the stack top, possibly mutated via instance.__setstate__(state).
5152 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005153 if (Py_SIZE(self->stack) < 2)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005154 return stack_underflow();
5155
5156 PDATA_POP(self->stack, state);
5157 if (state == NULL)
5158 return -1;
5159
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005160 inst = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005161
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005162 setstate = _PyObject_GetAttrId(inst, &PyId___setstate__);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005163 if (setstate == NULL) {
5164 if (PyErr_ExceptionMatches(PyExc_AttributeError))
5165 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00005166 else {
5167 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005168 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00005169 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005170 }
5171 else {
5172 PyObject *result;
5173
5174 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005175 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Antoine Pitroud79dc622008-09-05 00:03:33 +00005176 reference to state first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005177 result = _Unpickler_FastCall(self, setstate, state);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005178 Py_DECREF(setstate);
5179 if (result == NULL)
5180 return -1;
5181 Py_DECREF(result);
5182 return 0;
5183 }
5184
5185 /* A default __setstate__. First see whether state embeds a
5186 * slot state dict too (a proto 2 addition).
5187 */
5188 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
5189 PyObject *tmp = state;
5190
5191 state = PyTuple_GET_ITEM(tmp, 0);
5192 slotstate = PyTuple_GET_ITEM(tmp, 1);
5193 Py_INCREF(state);
5194 Py_INCREF(slotstate);
5195 Py_DECREF(tmp);
5196 }
5197 else
5198 slotstate = NULL;
5199
5200 /* Set inst.__dict__ from the state dict (if any). */
5201 if (state != Py_None) {
5202 PyObject *dict;
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005203 PyObject *d_key, *d_value;
5204 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005205 _Py_IDENTIFIER(__dict__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005206
5207 if (!PyDict_Check(state)) {
5208 PyErr_SetString(UnpicklingError, "state is not a dictionary");
5209 goto error;
5210 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005211 dict = _PyObject_GetAttrId(inst, &PyId___dict__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005212 if (dict == NULL)
5213 goto error;
5214
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005215 i = 0;
5216 while (PyDict_Next(state, &i, &d_key, &d_value)) {
5217 /* normally the keys for instance attributes are
5218 interned. we should try to do that here. */
5219 Py_INCREF(d_key);
5220 if (PyUnicode_CheckExact(d_key))
5221 PyUnicode_InternInPlace(&d_key);
5222 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
5223 Py_DECREF(d_key);
5224 goto error;
5225 }
5226 Py_DECREF(d_key);
5227 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005228 Py_DECREF(dict);
5229 }
5230
5231 /* Also set instance attributes from the slotstate dict (if any). */
5232 if (slotstate != NULL) {
5233 PyObject *d_key, *d_value;
5234 Py_ssize_t i;
5235
5236 if (!PyDict_Check(slotstate)) {
5237 PyErr_SetString(UnpicklingError,
5238 "slot state is not a dictionary");
5239 goto error;
5240 }
5241 i = 0;
5242 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
5243 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
5244 goto error;
5245 }
5246 }
5247
5248 if (0) {
5249 error:
5250 status = -1;
5251 }
5252
5253 Py_DECREF(state);
5254 Py_XDECREF(slotstate);
5255 return status;
5256}
5257
5258static int
5259load_mark(UnpicklerObject *self)
5260{
5261
5262 /* Note that we split the (pickle.py) stack into two stacks, an
5263 * object stack and a mark stack. Here we push a mark onto the
5264 * mark stack.
5265 */
5266
5267 if ((self->num_marks + 1) >= self->marks_size) {
5268 size_t alloc;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005269
5270 /* Use the size_t type to check for overflow. */
5271 alloc = ((size_t)self->num_marks << 1) + 20;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005272 if (alloc > (PY_SSIZE_T_MAX / sizeof(Py_ssize_t)) ||
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00005273 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005274 PyErr_NoMemory();
5275 return -1;
5276 }
5277
5278 if (self->marks == NULL)
Benjamin Peterson59b08c12015-06-27 13:41:33 -05005279 self->marks = PyMem_NEW(Py_ssize_t, alloc);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005280 else
Benjamin Peterson59b08c12015-06-27 13:41:33 -05005281 PyMem_RESIZE(self->marks, Py_ssize_t, alloc);
5282 if (self->marks == NULL) {
5283 self->marks_size = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005284 PyErr_NoMemory();
5285 return -1;
5286 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005287 self->marks_size = (Py_ssize_t)alloc;
5288 }
5289
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005290 self->marks[self->num_marks++] = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005291
5292 return 0;
5293}
5294
5295static int
5296load_reduce(UnpicklerObject *self)
5297{
5298 PyObject *callable = NULL;
5299 PyObject *argtup = NULL;
5300 PyObject *obj = NULL;
5301
5302 PDATA_POP(self->stack, argtup);
5303 if (argtup == NULL)
5304 return -1;
5305 PDATA_POP(self->stack, callable);
5306 if (callable) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005307 obj = PyObject_CallObject(callable, argtup);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005308 Py_DECREF(callable);
5309 }
5310 Py_DECREF(argtup);
5311
5312 if (obj == NULL)
5313 return -1;
5314
5315 PDATA_PUSH(self->stack, obj, -1);
5316 return 0;
5317}
5318
5319/* Just raises an error if we don't know the protocol specified. PROTO
5320 * is the first opcode for protocols >= 2.
5321 */
5322static int
5323load_proto(UnpicklerObject *self)
5324{
5325 char *s;
5326 int i;
5327
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005328 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005329 return -1;
5330
5331 i = (unsigned char)s[0];
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005332 if (i <= HIGHEST_PROTOCOL) {
5333 self->proto = i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005334 return 0;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005335 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005336
5337 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
5338 return -1;
5339}
5340
5341static PyObject *
5342load(UnpicklerObject *self)
5343{
5344 PyObject *err;
5345 PyObject *value = NULL;
5346 char *s;
5347
5348 self->num_marks = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005349 if (Py_SIZE(self->stack))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005350 Pdata_clear(self->stack, 0);
5351
5352 /* Convenient macros for the dispatch while-switch loop just below. */
5353#define OP(opcode, load_func) \
5354 case opcode: if (load_func(self) < 0) break; continue;
5355
5356#define OP_ARG(opcode, load_func, arg) \
5357 case opcode: if (load_func(self, (arg)) < 0) break; continue;
5358
5359 while (1) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005360 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005361 break;
5362
5363 switch ((enum opcode)s[0]) {
5364 OP(NONE, load_none)
5365 OP(BININT, load_binint)
5366 OP(BININT1, load_binint1)
5367 OP(BININT2, load_binint2)
5368 OP(INT, load_int)
5369 OP(LONG, load_long)
5370 OP_ARG(LONG1, load_counted_long, 1)
5371 OP_ARG(LONG4, load_counted_long, 4)
5372 OP(FLOAT, load_float)
5373 OP(BINFLOAT, load_binfloat)
5374 OP(BINBYTES, load_binbytes)
5375 OP(SHORT_BINBYTES, load_short_binbytes)
5376 OP(BINSTRING, load_binstring)
5377 OP(SHORT_BINSTRING, load_short_binstring)
5378 OP(STRING, load_string)
5379 OP(UNICODE, load_unicode)
5380 OP(BINUNICODE, load_binunicode)
5381 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
5382 OP_ARG(TUPLE1, load_counted_tuple, 1)
5383 OP_ARG(TUPLE2, load_counted_tuple, 2)
5384 OP_ARG(TUPLE3, load_counted_tuple, 3)
5385 OP(TUPLE, load_tuple)
5386 OP(EMPTY_LIST, load_empty_list)
5387 OP(LIST, load_list)
5388 OP(EMPTY_DICT, load_empty_dict)
5389 OP(DICT, load_dict)
5390 OP(OBJ, load_obj)
5391 OP(INST, load_inst)
5392 OP(NEWOBJ, load_newobj)
5393 OP(GLOBAL, load_global)
5394 OP(APPEND, load_append)
5395 OP(APPENDS, load_appends)
5396 OP(BUILD, load_build)
5397 OP(DUP, load_dup)
5398 OP(BINGET, load_binget)
5399 OP(LONG_BINGET, load_long_binget)
5400 OP(GET, load_get)
5401 OP(MARK, load_mark)
5402 OP(BINPUT, load_binput)
5403 OP(LONG_BINPUT, load_long_binput)
5404 OP(PUT, load_put)
5405 OP(POP, load_pop)
5406 OP(POP_MARK, load_pop_mark)
5407 OP(SETITEM, load_setitem)
5408 OP(SETITEMS, load_setitems)
5409 OP(PERSID, load_persid)
5410 OP(BINPERSID, load_binpersid)
5411 OP(REDUCE, load_reduce)
5412 OP(PROTO, load_proto)
5413 OP_ARG(EXT1, load_extension, 1)
5414 OP_ARG(EXT2, load_extension, 2)
5415 OP_ARG(EXT4, load_extension, 4)
5416 OP_ARG(NEWTRUE, load_bool, Py_True)
5417 OP_ARG(NEWFALSE, load_bool, Py_False)
5418
5419 case STOP:
5420 break;
5421
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005422 default:
Benjamin Petersonadde86d2011-09-23 13:41:41 -04005423 if (s[0] == '\0')
5424 PyErr_SetNone(PyExc_EOFError);
5425 else
5426 PyErr_Format(UnpicklingError,
5427 "invalid load key, '%c'.", s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005428 return NULL;
5429 }
5430
5431 break; /* and we are done! */
5432 }
5433
Antoine Pitrou04248a82010-10-12 20:51:21 +00005434 if (_Unpickler_SkipConsumed(self) < 0)
5435 return NULL;
5436
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005437 /* XXX: It is not clear what this is actually for. */
5438 if ((err = PyErr_Occurred())) {
5439 if (err == PyExc_EOFError) {
5440 PyErr_SetNone(PyExc_EOFError);
5441 }
5442 return NULL;
5443 }
5444
5445 PDATA_POP(self->stack, value);
5446 return value;
5447}
5448
5449PyDoc_STRVAR(Unpickler_load_doc,
5450"load() -> object. Load a pickle."
5451"\n"
5452"Read a pickled object representation from the open file object given in\n"
5453"the constructor, and return the reconstituted object hierarchy specified\n"
5454"therein.\n");
5455
5456static PyObject *
5457Unpickler_load(UnpicklerObject *self)
5458{
5459 /* Check whether the Unpickler was initialized correctly. This prevents
5460 segfaulting if a subclass overridden __init__ with a function that does
5461 not call Unpickler.__init__(). Here, we simply ensure that self->read
5462 is not NULL. */
5463 if (self->read == NULL) {
Victor Stinner121aab42011-09-29 23:40:53 +02005464 PyErr_Format(UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005465 "Unpickler.__init__() was not called by %s.__init__()",
5466 Py_TYPE(self)->tp_name);
5467 return NULL;
5468 }
5469
5470 return load(self);
5471}
5472
5473/* The name of find_class() is misleading. In newer pickle protocols, this
5474 function is used for loading any global (i.e., functions), not just
5475 classes. The name is kept only for backward compatibility. */
5476
5477PyDoc_STRVAR(Unpickler_find_class_doc,
5478"find_class(module_name, global_name) -> object.\n"
5479"\n"
5480"Return an object from a specified module, importing the module if\n"
5481"necessary. Subclasses may override this method (e.g. to restrict\n"
5482"unpickling of arbitrary classes and functions).\n"
5483"\n"
5484"This method is called whenever a class or a function object is\n"
5485"needed. Both arguments passed are str objects.\n");
5486
5487static PyObject *
5488Unpickler_find_class(UnpicklerObject *self, PyObject *args)
5489{
5490 PyObject *global;
5491 PyObject *modules_dict;
5492 PyObject *module;
5493 PyObject *module_name, *global_name;
5494
5495 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
5496 &module_name, &global_name))
5497 return NULL;
5498
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005499 /* Try to map the old names used in Python 2.x to the new ones used in
5500 Python 3.x. We do this only with old pickle protocols and when the
5501 user has not disabled the feature. */
5502 if (self->proto < 3 && self->fix_imports) {
5503 PyObject *key;
5504 PyObject *item;
5505
5506 /* Check if the global (i.e., a function or a class) was renamed
5507 or moved to another module. */
5508 key = PyTuple_Pack(2, module_name, global_name);
5509 if (key == NULL)
5510 return NULL;
5511 item = PyDict_GetItemWithError(name_mapping_2to3, key);
5512 Py_DECREF(key);
5513 if (item) {
5514 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
5515 PyErr_Format(PyExc_RuntimeError,
5516 "_compat_pickle.NAME_MAPPING values should be "
5517 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
5518 return NULL;
5519 }
5520 module_name = PyTuple_GET_ITEM(item, 0);
5521 global_name = PyTuple_GET_ITEM(item, 1);
5522 if (!PyUnicode_Check(module_name) ||
5523 !PyUnicode_Check(global_name)) {
5524 PyErr_Format(PyExc_RuntimeError,
5525 "_compat_pickle.NAME_MAPPING values should be "
5526 "pairs of str, not (%.200s, %.200s)",
5527 Py_TYPE(module_name)->tp_name,
5528 Py_TYPE(global_name)->tp_name);
5529 return NULL;
5530 }
5531 }
5532 else if (PyErr_Occurred()) {
5533 return NULL;
5534 }
5535
5536 /* Check if the module was renamed. */
5537 item = PyDict_GetItemWithError(import_mapping_2to3, module_name);
5538 if (item) {
5539 if (!PyUnicode_Check(item)) {
5540 PyErr_Format(PyExc_RuntimeError,
5541 "_compat_pickle.IMPORT_MAPPING values should be "
5542 "strings, not %.200s", Py_TYPE(item)->tp_name);
5543 return NULL;
5544 }
5545 module_name = item;
5546 }
5547 else if (PyErr_Occurred()) {
5548 return NULL;
5549 }
5550 }
5551
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005552 modules_dict = PySys_GetObject("modules");
5553 if (modules_dict == NULL)
5554 return NULL;
5555
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005556 module = PyDict_GetItemWithError(modules_dict, module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005557 if (module == NULL) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005558 if (PyErr_Occurred())
5559 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005560 module = PyImport_Import(module_name);
5561 if (module == NULL)
5562 return NULL;
5563 global = PyObject_GetAttr(module, global_name);
5564 Py_DECREF(module);
5565 }
Victor Stinner121aab42011-09-29 23:40:53 +02005566 else {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005567 global = PyObject_GetAttr(module, global_name);
5568 }
5569 return global;
5570}
5571
5572static struct PyMethodDef Unpickler_methods[] = {
5573 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
5574 Unpickler_load_doc},
5575 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
5576 Unpickler_find_class_doc},
5577 {NULL, NULL} /* sentinel */
5578};
5579
5580static void
5581Unpickler_dealloc(UnpicklerObject *self)
5582{
5583 PyObject_GC_UnTrack((PyObject *)self);
5584 Py_XDECREF(self->readline);
5585 Py_XDECREF(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005586 Py_XDECREF(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005587 Py_XDECREF(self->stack);
5588 Py_XDECREF(self->pers_func);
5589 Py_XDECREF(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005590 if (self->buffer.buf != NULL) {
5591 PyBuffer_Release(&self->buffer);
5592 self->buffer.buf = NULL;
5593 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005594
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005595 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005596 PyMem_Free(self->marks);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005597 PyMem_Free(self->input_line);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005598 free(self->encoding);
5599 free(self->errors);
5600
5601 Py_TYPE(self)->tp_free((PyObject *)self);
5602}
5603
5604static int
5605Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
5606{
5607 Py_VISIT(self->readline);
5608 Py_VISIT(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005609 Py_VISIT(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005610 Py_VISIT(self->stack);
5611 Py_VISIT(self->pers_func);
5612 Py_VISIT(self->arg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005613 return 0;
5614}
5615
5616static int
5617Unpickler_clear(UnpicklerObject *self)
5618{
5619 Py_CLEAR(self->readline);
5620 Py_CLEAR(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005621 Py_CLEAR(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005622 Py_CLEAR(self->stack);
5623 Py_CLEAR(self->pers_func);
5624 Py_CLEAR(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005625 if (self->buffer.buf != NULL) {
5626 PyBuffer_Release(&self->buffer);
5627 self->buffer.buf = NULL;
5628 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005629
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005630 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005631 PyMem_Free(self->marks);
5632 self->marks = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005633 PyMem_Free(self->input_line);
5634 self->input_line = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005635 free(self->encoding);
5636 self->encoding = NULL;
5637 free(self->errors);
5638 self->errors = NULL;
5639
5640 return 0;
5641}
5642
5643PyDoc_STRVAR(Unpickler_doc,
5644"Unpickler(file, *, encoding='ASCII', errors='strict')"
5645"\n"
5646"This takes a binary file for reading a pickle data stream.\n"
5647"\n"
5648"The protocol version of the pickle is detected automatically, so no\n"
5649"proto argument is needed.\n"
5650"\n"
5651"The file-like object must have two methods, a read() method\n"
5652"that takes an integer argument, and a readline() method that\n"
5653"requires no arguments. Both methods should return bytes.\n"
5654"Thus file-like object can be a binary file object opened for\n"
5655"reading, a BytesIO object, or any other custom object that\n"
5656"meets this interface.\n"
5657"\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005658"Optional keyword arguments are *fix_imports*, *encoding* and *errors*,\n"
5659"which are used to control compatiblity support for pickle stream\n"
5660"generated by Python 2.x. If *fix_imports* is True, pickle will try to\n"
5661"map the old Python 2.x names to the new names used in Python 3.x. The\n"
5662"*encoding* and *errors* tell pickle how to decode 8-bit string\n"
5663"instances pickled by Python 2.x; these default to 'ASCII' and\n"
5664"'strict', respectively.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005665
5666static int
5667Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
5668{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005669 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005670 PyObject *file;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005671 PyObject *fix_imports = Py_True;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005672 char *encoding = NULL;
5673 char *errors = NULL;
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02005674 _Py_IDENTIFIER(persistent_load);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005675
5676 /* XXX: That is an horrible error message. But, I don't know how to do
5677 better... */
5678 if (Py_SIZE(args) != 1) {
5679 PyErr_Format(PyExc_TypeError,
5680 "%s takes exactly one positional argument (%zd given)",
5681 Py_TYPE(self)->tp_name, Py_SIZE(args));
5682 return -1;
5683 }
5684
5685 /* Arguments parsing needs to be done in the __init__() method to allow
5686 subclasses to define their own __init__() method, which may (or may
5687 not) support Unpickler arguments. However, this means we need to be
5688 extra careful in the other Unpickler methods, since a subclass could
5689 forget to call Unpickler.__init__() thus breaking our internal
5690 invariants. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005691 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:Unpickler", kwlist,
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005692 &file, &fix_imports, &encoding, &errors))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005693 return -1;
5694
5695 /* In case of multiple __init__() calls, clear previous content. */
5696 if (self->read != NULL)
5697 (void)Unpickler_clear(self);
5698
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005699 if (_Unpickler_SetInputStream(self, file) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005700 return -1;
5701
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005702 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005703 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005704
5705 self->fix_imports = PyObject_IsTrue(fix_imports);
5706 if (self->fix_imports == -1)
5707 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005708
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02005709 if (_PyObject_HasAttrId((PyObject *)self, &PyId_persistent_load)) {
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005710 self->pers_func = _PyObject_GetAttrId((PyObject *)self,
5711 &PyId_persistent_load);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005712 if (self->pers_func == NULL)
5713 return -1;
5714 }
5715 else {
5716 self->pers_func = NULL;
5717 }
5718
5719 self->stack = (Pdata *)Pdata_New();
5720 if (self->stack == NULL)
5721 return -1;
5722
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005723 self->memo_size = 32;
5724 self->memo = _Unpickler_NewMemo(self->memo_size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005725 if (self->memo == NULL)
5726 return -1;
5727
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005728 self->arg = NULL;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005729 self->proto = 0;
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005730
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005731 return 0;
5732}
5733
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005734/* Define a proxy object for the Unpickler's internal memo object. This is to
5735 * avoid breaking code like:
5736 * unpickler.memo.clear()
5737 * and
5738 * unpickler.memo = saved_memo
5739 * Is this a good idea? Not really, but we don't want to break code that uses
5740 * it. Note that we don't implement the entire mapping API here. This is
5741 * intentional, as these should be treated as black-box implementation details.
5742 *
5743 * We do, however, have to implement pickling/unpickling support because of
Victor Stinner121aab42011-09-29 23:40:53 +02005744 * real-world code like cvs2svn.
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005745 */
5746
5747typedef struct {
5748 PyObject_HEAD
5749 UnpicklerObject *unpickler;
5750} UnpicklerMemoProxyObject;
5751
5752PyDoc_STRVAR(ump_clear_doc,
5753"memo.clear() -> None. Remove all items from memo.");
5754
5755static PyObject *
5756ump_clear(UnpicklerMemoProxyObject *self)
5757{
5758 _Unpickler_MemoCleanup(self->unpickler);
5759 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
5760 if (self->unpickler->memo == NULL)
5761 return NULL;
5762 Py_RETURN_NONE;
5763}
5764
5765PyDoc_STRVAR(ump_copy_doc,
5766"memo.copy() -> new_memo. Copy the memo to a new object.");
5767
5768static PyObject *
5769ump_copy(UnpicklerMemoProxyObject *self)
5770{
5771 Py_ssize_t i;
5772 PyObject *new_memo = PyDict_New();
5773 if (new_memo == NULL)
5774 return NULL;
5775
5776 for (i = 0; i < self->unpickler->memo_size; i++) {
5777 int status;
5778 PyObject *key, *value;
5779
5780 value = self->unpickler->memo[i];
5781 if (value == NULL)
5782 continue;
5783
5784 key = PyLong_FromSsize_t(i);
5785 if (key == NULL)
5786 goto error;
5787 status = PyDict_SetItem(new_memo, key, value);
5788 Py_DECREF(key);
5789 if (status < 0)
5790 goto error;
5791 }
5792 return new_memo;
5793
5794error:
5795 Py_DECREF(new_memo);
5796 return NULL;
5797}
5798
5799PyDoc_STRVAR(ump_reduce_doc,
5800"memo.__reduce__(). Pickling support.");
5801
5802static PyObject *
5803ump_reduce(UnpicklerMemoProxyObject *self, PyObject *args)
5804{
5805 PyObject *reduce_value;
5806 PyObject *constructor_args;
5807 PyObject *contents = ump_copy(self);
5808 if (contents == NULL)
5809 return NULL;
5810
5811 reduce_value = PyTuple_New(2);
5812 if (reduce_value == NULL) {
5813 Py_DECREF(contents);
5814 return NULL;
5815 }
5816 constructor_args = PyTuple_New(1);
5817 if (constructor_args == NULL) {
5818 Py_DECREF(contents);
5819 Py_DECREF(reduce_value);
5820 return NULL;
5821 }
5822 PyTuple_SET_ITEM(constructor_args, 0, contents);
5823 Py_INCREF((PyObject *)&PyDict_Type);
5824 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
5825 PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
5826 return reduce_value;
5827}
5828
5829static PyMethodDef unpicklerproxy_methods[] = {
5830 {"clear", (PyCFunction)ump_clear, METH_NOARGS, ump_clear_doc},
5831 {"copy", (PyCFunction)ump_copy, METH_NOARGS, ump_copy_doc},
5832 {"__reduce__", (PyCFunction)ump_reduce, METH_VARARGS, ump_reduce_doc},
5833 {NULL, NULL} /* sentinel */
5834};
5835
5836static void
5837UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
5838{
5839 PyObject_GC_UnTrack(self);
5840 Py_XDECREF(self->unpickler);
5841 PyObject_GC_Del((PyObject *)self);
5842}
5843
5844static int
5845UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
5846 visitproc visit, void *arg)
5847{
5848 Py_VISIT(self->unpickler);
5849 return 0;
5850}
5851
5852static int
5853UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
5854{
5855 Py_CLEAR(self->unpickler);
5856 return 0;
5857}
5858
5859static PyTypeObject UnpicklerMemoProxyType = {
5860 PyVarObject_HEAD_INIT(NULL, 0)
5861 "_pickle.UnpicklerMemoProxy", /*tp_name*/
5862 sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/
5863 0,
5864 (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */
5865 0, /* tp_print */
5866 0, /* tp_getattr */
5867 0, /* tp_setattr */
5868 0, /* tp_compare */
5869 0, /* tp_repr */
5870 0, /* tp_as_number */
5871 0, /* tp_as_sequence */
5872 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00005873 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005874 0, /* tp_call */
5875 0, /* tp_str */
5876 PyObject_GenericGetAttr, /* tp_getattro */
5877 PyObject_GenericSetAttr, /* tp_setattro */
5878 0, /* tp_as_buffer */
5879 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5880 0, /* tp_doc */
5881 (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */
5882 (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */
5883 0, /* tp_richcompare */
5884 0, /* tp_weaklistoffset */
5885 0, /* tp_iter */
5886 0, /* tp_iternext */
5887 unpicklerproxy_methods, /* tp_methods */
5888};
5889
5890static PyObject *
5891UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
5892{
5893 UnpicklerMemoProxyObject *self;
5894
5895 self = PyObject_GC_New(UnpicklerMemoProxyObject,
5896 &UnpicklerMemoProxyType);
5897 if (self == NULL)
5898 return NULL;
5899 Py_INCREF(unpickler);
5900 self->unpickler = unpickler;
5901 PyObject_GC_Track(self);
5902 return (PyObject *)self;
5903}
5904
5905/*****************************************************************************/
5906
5907
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005908static PyObject *
5909Unpickler_get_memo(UnpicklerObject *self)
5910{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005911 return UnpicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005912}
5913
5914static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005915Unpickler_set_memo(UnpicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005916{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005917 PyObject **new_memo;
5918 Py_ssize_t new_memo_size = 0;
5919 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005920
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005921 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005922 PyErr_SetString(PyExc_TypeError,
5923 "attribute deletion is not supported");
5924 return -1;
5925 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005926
5927 if (Py_TYPE(obj) == &UnpicklerMemoProxyType) {
5928 UnpicklerObject *unpickler =
5929 ((UnpicklerMemoProxyObject *)obj)->unpickler;
5930
5931 new_memo_size = unpickler->memo_size;
5932 new_memo = _Unpickler_NewMemo(new_memo_size);
5933 if (new_memo == NULL)
5934 return -1;
5935
5936 for (i = 0; i < new_memo_size; i++) {
5937 Py_XINCREF(unpickler->memo[i]);
5938 new_memo[i] = unpickler->memo[i];
5939 }
5940 }
5941 else if (PyDict_Check(obj)) {
5942 Py_ssize_t i = 0;
5943 PyObject *key, *value;
5944
5945 new_memo_size = PyDict_Size(obj);
5946 new_memo = _Unpickler_NewMemo(new_memo_size);
5947 if (new_memo == NULL)
5948 return -1;
5949
5950 while (PyDict_Next(obj, &i, &key, &value)) {
5951 Py_ssize_t idx;
5952 if (!PyLong_Check(key)) {
5953 PyErr_SetString(PyExc_TypeError,
5954 "memo key must be integers");
5955 goto error;
5956 }
5957 idx = PyLong_AsSsize_t(key);
5958 if (idx == -1 && PyErr_Occurred())
5959 goto error;
Christian Heimesa24b4d22013-07-01 15:17:45 +02005960 if (idx < 0) {
5961 PyErr_SetString(PyExc_ValueError,
Christian Heimes80878792013-07-01 15:23:39 +02005962 "memo key must be positive integers.");
Christian Heimesa24b4d22013-07-01 15:17:45 +02005963 goto error;
5964 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005965 if (_Unpickler_MemoPut(self, idx, value) < 0)
5966 goto error;
5967 }
5968 }
5969 else {
5970 PyErr_Format(PyExc_TypeError,
5971 "'memo' attribute must be an UnpicklerMemoProxy object"
5972 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005973 return -1;
5974 }
5975
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005976 _Unpickler_MemoCleanup(self);
5977 self->memo_size = new_memo_size;
5978 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005979
5980 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005981
5982 error:
5983 if (new_memo_size) {
5984 i = new_memo_size;
5985 while (--i >= 0) {
5986 Py_XDECREF(new_memo[i]);
5987 }
5988 PyMem_FREE(new_memo);
5989 }
5990 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005991}
5992
5993static PyObject *
5994Unpickler_get_persload(UnpicklerObject *self)
5995{
5996 if (self->pers_func == NULL)
5997 PyErr_SetString(PyExc_AttributeError, "persistent_load");
5998 else
5999 Py_INCREF(self->pers_func);
6000 return self->pers_func;
6001}
6002
6003static int
6004Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
6005{
6006 PyObject *tmp;
6007
6008 if (value == NULL) {
6009 PyErr_SetString(PyExc_TypeError,
6010 "attribute deletion is not supported");
6011 return -1;
6012 }
6013 if (!PyCallable_Check(value)) {
6014 PyErr_SetString(PyExc_TypeError,
6015 "persistent_load must be a callable taking "
6016 "one argument");
6017 return -1;
6018 }
6019
6020 tmp = self->pers_func;
6021 Py_INCREF(value);
6022 self->pers_func = value;
6023 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
6024
6025 return 0;
6026}
6027
6028static PyGetSetDef Unpickler_getsets[] = {
6029 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
6030 {"persistent_load", (getter)Unpickler_get_persload,
6031 (setter)Unpickler_set_persload},
6032 {NULL}
6033};
6034
6035static PyTypeObject Unpickler_Type = {
6036 PyVarObject_HEAD_INIT(NULL, 0)
6037 "_pickle.Unpickler", /*tp_name*/
6038 sizeof(UnpicklerObject), /*tp_basicsize*/
6039 0, /*tp_itemsize*/
6040 (destructor)Unpickler_dealloc, /*tp_dealloc*/
6041 0, /*tp_print*/
6042 0, /*tp_getattr*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006043 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00006044 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006045 0, /*tp_repr*/
6046 0, /*tp_as_number*/
6047 0, /*tp_as_sequence*/
6048 0, /*tp_as_mapping*/
6049 0, /*tp_hash*/
6050 0, /*tp_call*/
6051 0, /*tp_str*/
6052 0, /*tp_getattro*/
6053 0, /*tp_setattro*/
6054 0, /*tp_as_buffer*/
6055 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
6056 Unpickler_doc, /*tp_doc*/
6057 (traverseproc)Unpickler_traverse, /*tp_traverse*/
6058 (inquiry)Unpickler_clear, /*tp_clear*/
6059 0, /*tp_richcompare*/
6060 0, /*tp_weaklistoffset*/
6061 0, /*tp_iter*/
6062 0, /*tp_iternext*/
6063 Unpickler_methods, /*tp_methods*/
6064 0, /*tp_members*/
6065 Unpickler_getsets, /*tp_getset*/
6066 0, /*tp_base*/
6067 0, /*tp_dict*/
6068 0, /*tp_descr_get*/
6069 0, /*tp_descr_set*/
6070 0, /*tp_dictoffset*/
6071 (initproc)Unpickler_init, /*tp_init*/
6072 PyType_GenericAlloc, /*tp_alloc*/
6073 PyType_GenericNew, /*tp_new*/
6074 PyObject_GC_Del, /*tp_free*/
6075 0, /*tp_is_gc*/
6076};
6077
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006078PyDoc_STRVAR(pickle_dump_doc,
6079"dump(obj, file, protocol=None, *, fix_imports=True) -> None\n"
6080"\n"
6081"Write a pickled representation of obj to the open file object file. This\n"
6082"is equivalent to ``Pickler(file, protocol).dump(obj)``, but may be more\n"
6083"efficient.\n"
6084"\n"
6085"The optional protocol argument tells the pickler to use the given protocol;\n"
6086"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
6087"backward-incompatible protocol designed for Python 3.0.\n"
6088"\n"
6089"Specifying a negative protocol version selects the highest protocol version\n"
6090"supported. The higher the protocol used, the more recent the version of\n"
6091"Python needed to read the pickle produced.\n"
6092"\n"
6093"The file argument must have a write() method that accepts a single bytes\n"
6094"argument. It can thus be a file object opened for binary writing, a\n"
6095"io.BytesIO instance, or any other custom object that meets this interface.\n"
6096"\n"
6097"If fix_imports is True and protocol is less than 3, pickle will try to\n"
6098"map the new Python 3.x names to the old module names used in Python 2.x,\n"
6099"so that the pickle data stream is readable with Python 2.x.\n");
6100
6101static PyObject *
6102pickle_dump(PyObject *self, PyObject *args, PyObject *kwds)
6103{
6104 static char *kwlist[] = {"obj", "file", "protocol", "fix_imports", 0};
6105 PyObject *obj;
6106 PyObject *file;
6107 PyObject *proto = NULL;
6108 PyObject *fix_imports = Py_True;
6109 PicklerObject *pickler;
6110
6111 /* fix_imports is a keyword-only argument. */
6112 if (Py_SIZE(args) > 3) {
6113 PyErr_Format(PyExc_TypeError,
6114 "pickle.dump() takes at most 3 positional "
6115 "argument (%zd given)", Py_SIZE(args));
6116 return NULL;
6117 }
6118
6119 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|OO:dump", kwlist,
6120 &obj, &file, &proto, &fix_imports))
6121 return NULL;
6122
6123 pickler = _Pickler_New();
6124 if (pickler == NULL)
6125 return NULL;
6126
6127 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
6128 goto error;
6129
6130 if (_Pickler_SetOutputStream(pickler, file) < 0)
6131 goto error;
6132
6133 if (dump(pickler, obj) < 0)
6134 goto error;
6135
6136 if (_Pickler_FlushToFile(pickler) < 0)
6137 goto error;
6138
6139 Py_DECREF(pickler);
6140 Py_RETURN_NONE;
6141
6142 error:
6143 Py_XDECREF(pickler);
6144 return NULL;
6145}
6146
6147PyDoc_STRVAR(pickle_dumps_doc,
6148"dumps(obj, protocol=None, *, fix_imports=True) -> bytes\n"
6149"\n"
6150"Return the pickled representation of the object as a bytes\n"
6151"object, instead of writing it to a file.\n"
6152"\n"
6153"The optional protocol argument tells the pickler to use the given protocol;\n"
6154"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
6155"backward-incompatible protocol designed for Python 3.0.\n"
6156"\n"
6157"Specifying a negative protocol version selects the highest protocol version\n"
6158"supported. The higher the protocol used, the more recent the version of\n"
6159"Python needed to read the pickle produced.\n"
6160"\n"
6161"If fix_imports is True and *protocol* is less than 3, pickle will try to\n"
6162"map the new Python 3.x names to the old module names used in Python 2.x,\n"
6163"so that the pickle data stream is readable with Python 2.x.\n");
6164
6165static PyObject *
6166pickle_dumps(PyObject *self, PyObject *args, PyObject *kwds)
6167{
6168 static char *kwlist[] = {"obj", "protocol", "fix_imports", 0};
6169 PyObject *obj;
6170 PyObject *proto = NULL;
6171 PyObject *result;
6172 PyObject *fix_imports = Py_True;
6173 PicklerObject *pickler;
6174
6175 /* fix_imports is a keyword-only argument. */
6176 if (Py_SIZE(args) > 2) {
6177 PyErr_Format(PyExc_TypeError,
6178 "pickle.dumps() takes at most 2 positional "
6179 "argument (%zd given)", Py_SIZE(args));
6180 return NULL;
6181 }
6182
6183 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:dumps", kwlist,
6184 &obj, &proto, &fix_imports))
6185 return NULL;
6186
6187 pickler = _Pickler_New();
6188 if (pickler == NULL)
6189 return NULL;
6190
6191 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
6192 goto error;
6193
6194 if (dump(pickler, obj) < 0)
6195 goto error;
6196
6197 result = _Pickler_GetString(pickler);
6198 Py_DECREF(pickler);
6199 return result;
6200
6201 error:
6202 Py_XDECREF(pickler);
6203 return NULL;
6204}
6205
6206PyDoc_STRVAR(pickle_load_doc,
6207"load(file, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6208"\n"
6209"Read a pickled object representation from the open file object file and\n"
6210"return the reconstituted object hierarchy specified therein. This is\n"
6211"equivalent to ``Unpickler(file).load()``, but may be more efficient.\n"
6212"\n"
6213"The protocol version of the pickle is detected automatically, so no protocol\n"
6214"argument is needed. Bytes past the pickled object's representation are\n"
6215"ignored.\n"
6216"\n"
6217"The argument file must have two methods, a read() method that takes an\n"
6218"integer argument, and a readline() method that requires no arguments. Both\n"
6219"methods should return bytes. Thus *file* can be a binary file object opened\n"
6220"for reading, a BytesIO object, or any other custom object that meets this\n"
6221"interface.\n"
6222"\n"
6223"Optional keyword arguments are fix_imports, encoding and errors,\n"
6224"which are used to control compatiblity support for pickle stream generated\n"
6225"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6226"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6227"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6228"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6229
6230static PyObject *
6231pickle_load(PyObject *self, PyObject *args, PyObject *kwds)
6232{
6233 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
6234 PyObject *file;
6235 PyObject *fix_imports = Py_True;
6236 PyObject *result;
6237 char *encoding = NULL;
6238 char *errors = NULL;
6239 UnpicklerObject *unpickler;
6240
6241 /* fix_imports, encoding and errors are a keyword-only argument. */
6242 if (Py_SIZE(args) != 1) {
6243 PyErr_Format(PyExc_TypeError,
6244 "pickle.load() takes exactly one positional "
6245 "argument (%zd given)", Py_SIZE(args));
6246 return NULL;
6247 }
6248
6249 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:load", kwlist,
6250 &file, &fix_imports, &encoding, &errors))
6251 return NULL;
6252
6253 unpickler = _Unpickler_New();
6254 if (unpickler == NULL)
6255 return NULL;
6256
6257 if (_Unpickler_SetInputStream(unpickler, file) < 0)
6258 goto error;
6259
6260 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6261 goto error;
6262
6263 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6264 if (unpickler->fix_imports == -1)
6265 goto error;
6266
6267 result = load(unpickler);
6268 Py_DECREF(unpickler);
6269 return result;
6270
6271 error:
6272 Py_XDECREF(unpickler);
6273 return NULL;
6274}
6275
6276PyDoc_STRVAR(pickle_loads_doc,
6277"loads(input, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6278"\n"
6279"Read a pickled object hierarchy from a bytes object and return the\n"
6280"reconstituted object hierarchy specified therein\n"
6281"\n"
6282"The protocol version of the pickle is detected automatically, so no protocol\n"
6283"argument is needed. Bytes past the pickled object's representation are\n"
6284"ignored.\n"
6285"\n"
6286"Optional keyword arguments are fix_imports, encoding and errors, which\n"
6287"are used to control compatiblity support for pickle stream generated\n"
6288"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6289"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6290"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6291"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6292
6293static PyObject *
6294pickle_loads(PyObject *self, PyObject *args, PyObject *kwds)
6295{
6296 static char *kwlist[] = {"input", "fix_imports", "encoding", "errors", 0};
6297 PyObject *input;
6298 PyObject *fix_imports = Py_True;
6299 PyObject *result;
6300 char *encoding = NULL;
6301 char *errors = NULL;
6302 UnpicklerObject *unpickler;
6303
6304 /* fix_imports, encoding and errors are a keyword-only argument. */
6305 if (Py_SIZE(args) != 1) {
6306 PyErr_Format(PyExc_TypeError,
6307 "pickle.loads() takes exactly one positional "
6308 "argument (%zd given)", Py_SIZE(args));
6309 return NULL;
6310 }
6311
6312 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:loads", kwlist,
6313 &input, &fix_imports, &encoding, &errors))
6314 return NULL;
6315
6316 unpickler = _Unpickler_New();
6317 if (unpickler == NULL)
6318 return NULL;
6319
6320 if (_Unpickler_SetStringInput(unpickler, input) < 0)
6321 goto error;
6322
6323 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6324 goto error;
6325
6326 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6327 if (unpickler->fix_imports == -1)
6328 goto error;
6329
6330 result = load(unpickler);
6331 Py_DECREF(unpickler);
6332 return result;
6333
6334 error:
6335 Py_XDECREF(unpickler);
6336 return NULL;
6337}
6338
6339
6340static struct PyMethodDef pickle_methods[] = {
6341 {"dump", (PyCFunction)pickle_dump, METH_VARARGS|METH_KEYWORDS,
6342 pickle_dump_doc},
6343 {"dumps", (PyCFunction)pickle_dumps, METH_VARARGS|METH_KEYWORDS,
6344 pickle_dumps_doc},
6345 {"load", (PyCFunction)pickle_load, METH_VARARGS|METH_KEYWORDS,
6346 pickle_load_doc},
6347 {"loads", (PyCFunction)pickle_loads, METH_VARARGS|METH_KEYWORDS,
6348 pickle_loads_doc},
6349 {NULL, NULL} /* sentinel */
6350};
6351
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006352static int
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006353initmodule(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006354{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006355 PyObject *copyreg = NULL;
6356 PyObject *compat_pickle = NULL;
6357
6358 /* XXX: We should ensure that the types of the dictionaries imported are
6359 exactly PyDict objects. Otherwise, it is possible to crash the pickle
6360 since we use the PyDict API directly to access these dictionaries. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006361
6362 copyreg = PyImport_ImportModule("copyreg");
6363 if (!copyreg)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006364 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006365 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
6366 if (!dispatch_table)
6367 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006368 extension_registry = \
6369 PyObject_GetAttrString(copyreg, "_extension_registry");
6370 if (!extension_registry)
6371 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006372 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
6373 if (!inverted_registry)
6374 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006375 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
6376 if (!extension_cache)
6377 goto error;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006378 Py_CLEAR(copyreg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006379
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006380 /* Load the 2.x -> 3.x stdlib module mapping tables */
6381 compat_pickle = PyImport_ImportModule("_compat_pickle");
6382 if (!compat_pickle)
6383 goto error;
6384 name_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
6385 if (!name_mapping_2to3)
6386 goto error;
6387 if (!PyDict_CheckExact(name_mapping_2to3)) {
6388 PyErr_Format(PyExc_RuntimeError,
6389 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
6390 Py_TYPE(name_mapping_2to3)->tp_name);
6391 goto error;
6392 }
6393 import_mapping_2to3 = PyObject_GetAttrString(compat_pickle,
6394 "IMPORT_MAPPING");
6395 if (!import_mapping_2to3)
6396 goto error;
6397 if (!PyDict_CheckExact(import_mapping_2to3)) {
6398 PyErr_Format(PyExc_RuntimeError,
6399 "_compat_pickle.IMPORT_MAPPING should be a dict, "
6400 "not %.200s", Py_TYPE(import_mapping_2to3)->tp_name);
6401 goto error;
6402 }
6403 /* ... and the 3.x -> 2.x mapping tables */
6404 name_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6405 "REVERSE_NAME_MAPPING");
6406 if (!name_mapping_3to2)
6407 goto error;
6408 if (!PyDict_CheckExact(name_mapping_3to2)) {
6409 PyErr_Format(PyExc_RuntimeError,
Ezio Melotti13925002011-03-16 11:05:33 +02006410 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006411 "not %.200s", Py_TYPE(name_mapping_3to2)->tp_name);
6412 goto error;
6413 }
6414 import_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6415 "REVERSE_IMPORT_MAPPING");
6416 if (!import_mapping_3to2)
6417 goto error;
6418 if (!PyDict_CheckExact(import_mapping_3to2)) {
6419 PyErr_Format(PyExc_RuntimeError,
6420 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
6421 "not %.200s", Py_TYPE(import_mapping_3to2)->tp_name);
6422 goto error;
6423 }
6424 Py_CLEAR(compat_pickle);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006425
6426 empty_tuple = PyTuple_New(0);
6427 if (empty_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006428 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006429 two_tuple = PyTuple_New(2);
6430 if (two_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006431 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006432 /* We use this temp container with no regard to refcounts, or to
6433 * keeping containees alive. Exempt from GC, because we don't
6434 * want anything looking at two_tuple() by magic.
6435 */
6436 PyObject_GC_UnTrack(two_tuple);
6437
6438 return 0;
6439
6440 error:
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006441 Py_CLEAR(copyreg);
6442 Py_CLEAR(dispatch_table);
6443 Py_CLEAR(extension_registry);
6444 Py_CLEAR(inverted_registry);
6445 Py_CLEAR(extension_cache);
6446 Py_CLEAR(compat_pickle);
6447 Py_CLEAR(name_mapping_2to3);
6448 Py_CLEAR(import_mapping_2to3);
6449 Py_CLEAR(name_mapping_3to2);
6450 Py_CLEAR(import_mapping_3to2);
6451 Py_CLEAR(empty_tuple);
6452 Py_CLEAR(two_tuple);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006453 return -1;
6454}
6455
6456static struct PyModuleDef _picklemodule = {
6457 PyModuleDef_HEAD_INIT,
6458 "_pickle",
6459 pickle_module_doc,
6460 -1,
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006461 pickle_methods,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006462 NULL,
6463 NULL,
6464 NULL,
6465 NULL
6466};
6467
6468PyMODINIT_FUNC
6469PyInit__pickle(void)
6470{
6471 PyObject *m;
6472
6473 if (PyType_Ready(&Unpickler_Type) < 0)
6474 return NULL;
6475 if (PyType_Ready(&Pickler_Type) < 0)
6476 return NULL;
6477 if (PyType_Ready(&Pdata_Type) < 0)
6478 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006479 if (PyType_Ready(&PicklerMemoProxyType) < 0)
6480 return NULL;
6481 if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
6482 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006483
6484 /* Create the module and add the functions. */
6485 m = PyModule_Create(&_picklemodule);
6486 if (m == NULL)
6487 return NULL;
6488
Antoine Pitrou8391cf42011-07-15 21:01:21 +02006489 Py_INCREF(&Pickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006490 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
6491 return NULL;
Antoine Pitrou8391cf42011-07-15 21:01:21 +02006492 Py_INCREF(&Unpickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006493 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
6494 return NULL;
6495
6496 /* Initialize the exceptions. */
6497 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
6498 if (PickleError == NULL)
6499 return NULL;
6500 PicklingError = \
6501 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
6502 if (PicklingError == NULL)
6503 return NULL;
6504 UnpicklingError = \
6505 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
6506 if (UnpicklingError == NULL)
6507 return NULL;
6508
6509 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
6510 return NULL;
6511 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
6512 return NULL;
6513 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
6514 return NULL;
6515
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006516 if (initmodule() < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006517 return NULL;
6518
6519 return m;
6520}