blob: d75df3e3f272e037ff2ddfdaa4637f4d1e0ff401 [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004/*[clinic]
5module _pickle
6class _pickle.Pickler
7class _pickle.PicklerMemoProxy
8class _pickle.Unpickler
9class _pickle.UnpicklerMemoProxy
10[clinic]*/
11/*[clinic checksum: da39a3ee5e6b4b0d3255bfef95601890afd80709]*/
12
13/*[python]
14class PicklerObject_converter(self_converter):
15 type = "PicklerObject *"
16
17class PicklerMemoProxyObject_converter(self_converter):
18 type = "PicklerMemoProxyObject *"
19
20class UnpicklerObject_converter(self_converter):
21 type = "UnpicklerObject *"
22
23class UnpicklerMemoProxyObject_converter(self_converter):
24 type = "UnpicklerMemoProxyObject *"
25[python]*/
26/*[python checksum: da39a3ee5e6b4b0d3255bfef95601890afd80709]*/
27
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000028PyDoc_STRVAR(pickle_module_doc,
29"Optimized C implementation for the Python pickle module.");
30
31/* Bump this when new opcodes are added to the pickle protocol. */
32enum {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +010033 HIGHEST_PROTOCOL = 4,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000034 DEFAULT_PROTOCOL = 3
35};
36
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000037/* Pickle opcodes. These must be kept updated with pickle.py.
38 Extensive docs are in pickletools.py. */
39enum opcode {
40 MARK = '(',
41 STOP = '.',
42 POP = '0',
43 POP_MARK = '1',
44 DUP = '2',
45 FLOAT = 'F',
46 INT = 'I',
47 BININT = 'J',
48 BININT1 = 'K',
49 LONG = 'L',
50 BININT2 = 'M',
51 NONE = 'N',
52 PERSID = 'P',
53 BINPERSID = 'Q',
54 REDUCE = 'R',
55 STRING = 'S',
56 BINSTRING = 'T',
57 SHORT_BINSTRING = 'U',
58 UNICODE = 'V',
59 BINUNICODE = 'X',
60 APPEND = 'a',
61 BUILD = 'b',
62 GLOBAL = 'c',
63 DICT = 'd',
64 EMPTY_DICT = '}',
65 APPENDS = 'e',
66 GET = 'g',
67 BINGET = 'h',
68 INST = 'i',
69 LONG_BINGET = 'j',
70 LIST = 'l',
71 EMPTY_LIST = ']',
72 OBJ = 'o',
73 PUT = 'p',
74 BINPUT = 'q',
75 LONG_BINPUT = 'r',
76 SETITEM = 's',
77 TUPLE = 't',
78 EMPTY_TUPLE = ')',
79 SETITEMS = 'u',
80 BINFLOAT = 'G',
81
82 /* Protocol 2. */
83 PROTO = '\x80',
84 NEWOBJ = '\x81',
85 EXT1 = '\x82',
86 EXT2 = '\x83',
87 EXT4 = '\x84',
88 TUPLE1 = '\x85',
89 TUPLE2 = '\x86',
90 TUPLE3 = '\x87',
91 NEWTRUE = '\x88',
92 NEWFALSE = '\x89',
93 LONG1 = '\x8a',
94 LONG4 = '\x8b',
95
96 /* Protocol 3 (Python 3.x) */
97 BINBYTES = 'B',
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +010098 SHORT_BINBYTES = 'C',
99
100 /* Protocol 4 */
101 SHORT_BINUNICODE = '\x8c',
102 BINUNICODE8 = '\x8d',
103 BINBYTES8 = '\x8e',
104 EMPTY_SET = '\x8f',
105 ADDITEMS = '\x90',
106 FROZENSET = '\x91',
107 NEWOBJ_EX = '\x92',
108 STACK_GLOBAL = '\x93',
109 MEMOIZE = '\x94',
110 FRAME = '\x95'
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000111};
112
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000113enum {
114 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
115 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
116 break if this gets out of synch with pickle.py, but it's unclear that would
117 help anything either. */
118 BATCHSIZE = 1000,
119
120 /* Nesting limit until Pickler, when running in "fast mode", starts
121 checking for self-referential data-structures. */
122 FAST_NESTING_LIMIT = 50,
123
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000124 /* Initial size of the write buffer of Pickler. */
125 WRITE_BUF_SIZE = 4096,
126
Antoine Pitrou04248a82010-10-12 20:51:21 +0000127 /* Prefetch size when unpickling (disabled on unpeekable streams) */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100128 PREFETCH = 8192 * 16,
129
130 FRAME_SIZE_TARGET = 64 * 1024,
131
132 FRAME_HEADER_SIZE = 9
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000133};
134
135/* Exception classes for pickle. These should override the ones defined in
136 pickle.py, when the C-optimized Pickler and Unpickler are used. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000137static PyObject *PickleError = NULL;
138static PyObject *PicklingError = NULL;
139static PyObject *UnpicklingError = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000140
141/* copyreg.dispatch_table, {type_object: pickling_function} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000142static PyObject *dispatch_table = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000143/* For EXT[124] opcodes. */
144/* copyreg._extension_registry, {(module_name, function_name): code} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000145static PyObject *extension_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000146/* copyreg._inverted_registry, {code: (module_name, function_name)} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000147static PyObject *inverted_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000148/* copyreg._extension_cache, {code: object} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000149static PyObject *extension_cache = NULL;
150
151/* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */
152static PyObject *name_mapping_2to3 = NULL;
153/* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
154static PyObject *import_mapping_2to3 = NULL;
155/* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
156static PyObject *name_mapping_3to2 = NULL;
157static PyObject *import_mapping_3to2 = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000158
159/* XXX: Are these really nescessary? */
160/* As the name says, an empty tuple. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000161static PyObject *empty_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000162/* For looking up name pairs in copyreg._extension_registry. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000163static PyObject *two_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000164
165static int
166stack_underflow(void)
167{
168 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
169 return -1;
170}
171
172/* Internal data type used as the unpickling stack. */
173typedef struct {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000174 PyObject_VAR_HEAD
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000175 PyObject **data;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000176 Py_ssize_t allocated; /* number of slots in data allocated */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000177} Pdata;
178
179static void
180Pdata_dealloc(Pdata *self)
181{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200182 Py_ssize_t i = Py_SIZE(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000183 while (--i >= 0) {
184 Py_DECREF(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000185 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000186 PyMem_FREE(self->data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000187 PyObject_Del(self);
188}
189
190static PyTypeObject Pdata_Type = {
191 PyVarObject_HEAD_INIT(NULL, 0)
192 "_pickle.Pdata", /*tp_name*/
193 sizeof(Pdata), /*tp_basicsize*/
194 0, /*tp_itemsize*/
195 (destructor)Pdata_dealloc, /*tp_dealloc*/
196};
197
198static PyObject *
199Pdata_New(void)
200{
201 Pdata *self;
202
203 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
204 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000205 Py_SIZE(self) = 0;
206 self->allocated = 8;
207 self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000208 if (self->data)
209 return (PyObject *)self;
210 Py_DECREF(self);
211 return PyErr_NoMemory();
212}
213
214
215/* Retain only the initial clearto items. If clearto >= the current
216 * number of items, this is a (non-erroneous) NOP.
217 */
218static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200219Pdata_clear(Pdata *self, Py_ssize_t clearto)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000220{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200221 Py_ssize_t i = Py_SIZE(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000222
223 if (clearto < 0)
224 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000225 if (clearto >= i)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000226 return 0;
227
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000228 while (--i >= clearto) {
229 Py_CLEAR(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000230 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000231 Py_SIZE(self) = clearto;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000232 return 0;
233}
234
235static int
236Pdata_grow(Pdata *self)
237{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000238 PyObject **data = self->data;
239 Py_ssize_t allocated = self->allocated;
240 Py_ssize_t new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000241
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000242 new_allocated = (allocated >> 3) + 6;
243 /* check for integer overflow */
244 if (new_allocated > PY_SSIZE_T_MAX - allocated)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000245 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000246 new_allocated += allocated;
247 if (new_allocated > (PY_SSIZE_T_MAX / sizeof(PyObject *)))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000248 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000249 data = PyMem_REALLOC(data, new_allocated * sizeof(PyObject *));
250 if (data == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000251 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000252
253 self->data = data;
254 self->allocated = new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000255 return 0;
256
257 nomemory:
258 PyErr_NoMemory();
259 return -1;
260}
261
262/* D is a Pdata*. Pop the topmost element and store it into V, which
263 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
264 * is raised and V is set to NULL.
265 */
266static PyObject *
267Pdata_pop(Pdata *self)
268{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000269 if (Py_SIZE(self) == 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000270 PyErr_SetString(UnpicklingError, "bad pickle data");
271 return NULL;
272 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000273 return self->data[--Py_SIZE(self)];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000274}
275#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
276
277static int
278Pdata_push(Pdata *self, PyObject *obj)
279{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000280 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000281 return -1;
282 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000283 self->data[Py_SIZE(self)++] = obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000284 return 0;
285}
286
287/* Push an object on stack, transferring its ownership to the stack. */
288#define PDATA_PUSH(D, O, ER) do { \
289 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
290
291/* Push an object on stack, adding a new reference to the object. */
292#define PDATA_APPEND(D, O, ER) do { \
293 Py_INCREF((O)); \
294 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
295
296static PyObject *
297Pdata_poptuple(Pdata *self, Py_ssize_t start)
298{
299 PyObject *tuple;
300 Py_ssize_t len, i, j;
301
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000302 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000303 tuple = PyTuple_New(len);
304 if (tuple == NULL)
305 return NULL;
306 for (i = start, j = 0; j < len; i++, j++)
307 PyTuple_SET_ITEM(tuple, j, self->data[i]);
308
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000309 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000310 return tuple;
311}
312
313static PyObject *
314Pdata_poplist(Pdata *self, Py_ssize_t start)
315{
316 PyObject *list;
317 Py_ssize_t len, i, j;
318
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000319 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000320 list = PyList_New(len);
321 if (list == NULL)
322 return NULL;
323 for (i = start, j = 0; j < len; i++, j++)
324 PyList_SET_ITEM(list, j, self->data[i]);
325
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000326 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000327 return list;
328}
329
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000330typedef struct {
331 PyObject *me_key;
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200332 Py_ssize_t me_value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000333} PyMemoEntry;
334
335typedef struct {
336 Py_ssize_t mt_mask;
337 Py_ssize_t mt_used;
338 Py_ssize_t mt_allocated;
339 PyMemoEntry *mt_table;
340} PyMemoTable;
341
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000342typedef struct PicklerObject {
343 PyObject_HEAD
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000344 PyMemoTable *memo; /* Memo table, keep track of the seen
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000345 objects to support self-referential objects
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000346 pickling. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000347 PyObject *pers_func; /* persistent_id() method, can be NULL */
Antoine Pitrou8d3c2902012-03-04 18:31:48 +0100348 PyObject *dispatch_table; /* private dispatch_table, can be NULL */
Alexandre Vassalottib4a04fb2013-11-25 13:25:12 -0800349 PyObject *arg;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000350
351 PyObject *write; /* write() method of the output stream. */
352 PyObject *output_buffer; /* Write into a local bytearray buffer before
353 flushing to the stream. */
354 Py_ssize_t output_len; /* Length of output_buffer. */
355 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000356 int proto; /* Pickle protocol number, >= 0 */
357 int bin; /* Boolean, true if proto > 0 */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100358 int framing; /* True when framing is enabled, proto >= 4 */
359 Py_ssize_t frame_start; /* Position in output_buffer where the
360 where the current frame begins. -1 if there
361 is no frame currently open. */
362
363 Py_ssize_t buf_size; /* Size of the current buffered pickle data */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000364 int fast; /* Enable fast mode if set to a true value.
365 The fast mode disable the usage of memo,
366 therefore speeding the pickling process by
367 not generating superfluous PUT opcodes. It
368 should not be used if with self-referential
369 objects. */
370 int fast_nesting;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000371 int fix_imports; /* Indicate whether Pickler should fix
372 the name of globals for Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000373 PyObject *fast_memo;
374} PicklerObject;
375
376typedef struct UnpicklerObject {
377 PyObject_HEAD
378 Pdata *stack; /* Pickle data stack, store unpickled objects. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000379
380 /* The unpickler memo is just an array of PyObject *s. Using a dict
381 is unnecessary, since the keys are contiguous ints. */
382 PyObject **memo;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100383 Py_ssize_t memo_size; /* Capacity of the memo array */
384 Py_ssize_t memo_len; /* Number of objects in the memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000385
Alexandre Vassalottib4a04fb2013-11-25 13:25:12 -0800386 PyObject *arg;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000387 PyObject *pers_func; /* persistent_load() method, can be NULL. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000388
389 Py_buffer buffer;
390 char *input_buffer;
391 char *input_line;
392 Py_ssize_t input_len;
393 Py_ssize_t next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000394 Py_ssize_t prefetched_idx; /* index of first prefetched byte */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100395
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000396 PyObject *read; /* read() method of the input stream. */
397 PyObject *readline; /* readline() method of the input stream. */
Antoine Pitrou04248a82010-10-12 20:51:21 +0000398 PyObject *peek; /* peek() method of the input stream, or NULL */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000399
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000400 char *encoding; /* Name of the encoding to be used for
401 decoding strings pickled using Python
402 2.x. The default value is "ASCII" */
403 char *errors; /* Name of errors handling scheme to used when
404 decoding strings. The default value is
405 "strict". */
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -0500406 Py_ssize_t *marks; /* Mark stack, used for unpickling container
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000407 objects. */
408 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
409 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000410 int proto; /* Protocol of the pickle loaded. */
411 int fix_imports; /* Indicate whether Unpickler should fix
412 the name of globals pickled by Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000413} UnpicklerObject;
414
415/* Forward declarations */
416static int save(PicklerObject *, PyObject *, int);
417static int save_reduce(PicklerObject *, PyObject *, PyObject *);
418static PyTypeObject Pickler_Type;
419static PyTypeObject Unpickler_Type;
420
421
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000422/*************************************************************************
Serhiy Storchaka95949422013-08-27 19:40:23 +0300423 A custom hashtable mapping void* to Python ints. This is used by the pickler
424 for memoization. Using a custom hashtable rather than PyDict allows us to skip
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000425 a bunch of unnecessary object creation. This makes a huge performance
426 difference. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000427
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000428#define MT_MINSIZE 8
429#define PERTURB_SHIFT 5
430
431
432static PyMemoTable *
433PyMemoTable_New(void)
434{
435 PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
436 if (memo == NULL) {
437 PyErr_NoMemory();
438 return NULL;
439 }
440
441 memo->mt_used = 0;
442 memo->mt_allocated = MT_MINSIZE;
443 memo->mt_mask = MT_MINSIZE - 1;
444 memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
445 if (memo->mt_table == NULL) {
446 PyMem_FREE(memo);
447 PyErr_NoMemory();
448 return NULL;
449 }
450 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
451
452 return memo;
453}
454
455static PyMemoTable *
456PyMemoTable_Copy(PyMemoTable *self)
457{
458 Py_ssize_t i;
459 PyMemoTable *new = PyMemoTable_New();
460 if (new == NULL)
461 return NULL;
462
463 new->mt_used = self->mt_used;
464 new->mt_allocated = self->mt_allocated;
465 new->mt_mask = self->mt_mask;
466 /* The table we get from _New() is probably smaller than we wanted.
467 Free it and allocate one that's the right size. */
468 PyMem_FREE(new->mt_table);
469 new->mt_table = PyMem_MALLOC(self->mt_allocated * sizeof(PyMemoEntry));
470 if (new->mt_table == NULL) {
471 PyMem_FREE(new);
Victor Stinner42024562013-07-12 00:53:57 +0200472 PyErr_NoMemory();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000473 return NULL;
474 }
475 for (i = 0; i < self->mt_allocated; i++) {
476 Py_XINCREF(self->mt_table[i].me_key);
477 }
478 memcpy(new->mt_table, self->mt_table,
479 sizeof(PyMemoEntry) * self->mt_allocated);
480
481 return new;
482}
483
484static Py_ssize_t
485PyMemoTable_Size(PyMemoTable *self)
486{
487 return self->mt_used;
488}
489
490static int
491PyMemoTable_Clear(PyMemoTable *self)
492{
493 Py_ssize_t i = self->mt_allocated;
494
495 while (--i >= 0) {
496 Py_XDECREF(self->mt_table[i].me_key);
497 }
498 self->mt_used = 0;
499 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
500 return 0;
501}
502
503static void
504PyMemoTable_Del(PyMemoTable *self)
505{
506 if (self == NULL)
507 return;
508 PyMemoTable_Clear(self);
509
510 PyMem_FREE(self->mt_table);
511 PyMem_FREE(self);
512}
513
514/* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
515 can be considerably simpler than dictobject.c's lookdict(). */
516static PyMemoEntry *
517_PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
518{
519 size_t i;
520 size_t perturb;
521 size_t mask = (size_t)self->mt_mask;
522 PyMemoEntry *table = self->mt_table;
523 PyMemoEntry *entry;
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000524 Py_hash_t hash = (Py_hash_t)key >> 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000525
526 i = hash & mask;
527 entry = &table[i];
528 if (entry->me_key == NULL || entry->me_key == key)
529 return entry;
530
531 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
532 i = (i << 2) + i + perturb + 1;
533 entry = &table[i & mask];
534 if (entry->me_key == NULL || entry->me_key == key)
535 return entry;
536 }
537 assert(0); /* Never reached */
538 return NULL;
539}
540
541/* Returns -1 on failure, 0 on success. */
542static int
543_PyMemoTable_ResizeTable(PyMemoTable *self, Py_ssize_t min_size)
544{
545 PyMemoEntry *oldtable = NULL;
546 PyMemoEntry *oldentry, *newentry;
547 Py_ssize_t new_size = MT_MINSIZE;
548 Py_ssize_t to_process;
549
550 assert(min_size > 0);
551
552 /* Find the smallest valid table size >= min_size. */
553 while (new_size < min_size && new_size > 0)
554 new_size <<= 1;
555 if (new_size <= 0) {
556 PyErr_NoMemory();
557 return -1;
558 }
559 /* new_size needs to be a power of two. */
560 assert((new_size & (new_size - 1)) == 0);
561
562 /* Allocate new table. */
563 oldtable = self->mt_table;
564 self->mt_table = PyMem_MALLOC(new_size * sizeof(PyMemoEntry));
565 if (self->mt_table == NULL) {
Victor Stinner8ca72e22013-07-12 00:53:26 +0200566 self->mt_table = oldtable;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000567 PyErr_NoMemory();
568 return -1;
569 }
570 self->mt_allocated = new_size;
571 self->mt_mask = new_size - 1;
572 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
573
574 /* Copy entries from the old table. */
575 to_process = self->mt_used;
576 for (oldentry = oldtable; to_process > 0; oldentry++) {
577 if (oldentry->me_key != NULL) {
578 to_process--;
579 /* newentry is a pointer to a chunk of the new
580 mt_table, so we're setting the key:value pair
581 in-place. */
582 newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
583 newentry->me_key = oldentry->me_key;
584 newentry->me_value = oldentry->me_value;
585 }
586 }
587
588 /* Deallocate the old table. */
589 PyMem_FREE(oldtable);
590 return 0;
591}
592
593/* Returns NULL on failure, a pointer to the value otherwise. */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200594static Py_ssize_t *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000595PyMemoTable_Get(PyMemoTable *self, PyObject *key)
596{
597 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
598 if (entry->me_key == NULL)
599 return NULL;
600 return &entry->me_value;
601}
602
603/* Returns -1 on failure, 0 on success. */
604static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200605PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000606{
607 PyMemoEntry *entry;
608
609 assert(key != NULL);
610
611 entry = _PyMemoTable_Lookup(self, key);
612 if (entry->me_key != NULL) {
613 entry->me_value = value;
614 return 0;
615 }
616 Py_INCREF(key);
617 entry->me_key = key;
618 entry->me_value = value;
619 self->mt_used++;
620
621 /* If we added a key, we can safely resize. Otherwise just return!
622 * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
623 *
624 * Quadrupling the size improves average table sparseness
625 * (reducing collisions) at the cost of some memory. It also halves
626 * the number of expensive resize operations in a growing memo table.
627 *
628 * Very large memo tables (over 50K items) use doubling instead.
629 * This may help applications with severe memory constraints.
630 */
631 if (!(self->mt_used * 3 >= (self->mt_mask + 1) * 2))
632 return 0;
633 return _PyMemoTable_ResizeTable(self,
634 (self->mt_used > 50000 ? 2 : 4) * self->mt_used);
635}
636
637#undef MT_MINSIZE
638#undef PERTURB_SHIFT
639
640/*************************************************************************/
641
Alexandre Vassalottib4a04fb2013-11-25 13:25:12 -0800642/* Helpers for creating the argument tuple passed to functions. This has the
643 performance advantage of calling PyTuple_New() only once.
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000644
Alexandre Vassalottib4a04fb2013-11-25 13:25:12 -0800645 XXX(avassalotti): Inline directly in _Pickler_FastCall() and
646 _Unpickler_FastCall(). */
647#define ARG_TUP(self, obj) do { \
648 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
649 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
650 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
651 } \
652 else { \
653 Py_DECREF((obj)); \
654 } \
655 } while (0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000656
Alexandre Vassalottib4a04fb2013-11-25 13:25:12 -0800657#define FREE_ARG_TUP(self) do { \
658 if ((self)->arg->ob_refcnt > 1) \
659 Py_CLEAR((self)->arg); \
660 } while (0)
661
662/* A temporary cleaner API for fast single argument function call.
663
664 XXX: Does caching the argument tuple provides any real performance benefits?
665
666 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
667 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
668 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
669 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
670 (i.e, call PyTuple_New() and store the returned value in an array), to save
671 one second (wall clock time). Either ways, the loading time a pickle stream
672 large enough to generate this number of calls would be massively
673 overwhelmed by other factors, like I/O throughput, the GC traversal and
674 object allocation overhead. So, I really doubt these functions provide any
675 real benefits.
676
677 On the other hand, oprofile reports that pickle spends a lot of time in
678 these functions. But, that is probably more related to the function call
679 overhead, than the argument tuple allocation.
680
681 XXX: And, what is the reference behavior of these? Steal, borrow? At first
682 glance, it seems to steal the reference of 'arg' and borrow the reference
683 of 'func'. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000684static PyObject *
Alexandre Vassalottib4a04fb2013-11-25 13:25:12 -0800685_Pickler_FastCall(PicklerObject *self, PyObject *func, PyObject *arg)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000686{
Alexandre Vassalottib4a04fb2013-11-25 13:25:12 -0800687 PyObject *result = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000688
Alexandre Vassalottib4a04fb2013-11-25 13:25:12 -0800689 ARG_TUP(self, arg);
690 if (self->arg) {
691 result = PyObject_Call(func, self->arg, NULL);
692 FREE_ARG_TUP(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000693 }
694 return result;
695}
696
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000697static int
698_Pickler_ClearBuffer(PicklerObject *self)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000699{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000700 Py_CLEAR(self->output_buffer);
701 self->output_buffer =
702 PyBytes_FromStringAndSize(NULL, self->max_output_len);
703 if (self->output_buffer == NULL)
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000704 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000705 self->output_len = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100706 self->frame_start = -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000707 return 0;
708}
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000709
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100710static void
Antoine Pitrou8f2ee6e2013-11-23 21:05:08 +0100711_write_size64(char *out, size_t value)
712{
Alexandre Vassalotti1048fb52013-11-25 11:35:46 -0800713 int i;
714
715 assert(sizeof(size_t) <= 8);
716
717 for (i = 0; i < sizeof(size_t); i++) {
718 out[i] = (unsigned char)((value >> (8 * i)) & 0xff);
719 }
720 for (i = sizeof(size_t); i < 8; i++) {
721 out[i] = 0;
Alexandre Vassalottided929b2013-11-24 22:41:13 -0800722 }
Antoine Pitrou8f2ee6e2013-11-23 21:05:08 +0100723}
724
725static void
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100726_Pickler_WriteFrameHeader(PicklerObject *self, char *qdata, size_t frame_len)
727{
Antoine Pitrou8f2ee6e2013-11-23 21:05:08 +0100728 qdata[0] = FRAME;
729 _write_size64(qdata + 1, frame_len);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100730}
731
732static int
733_Pickler_CommitFrame(PicklerObject *self)
734{
735 size_t frame_len;
736 char *qdata;
737
738 if (!self->framing || self->frame_start == -1)
739 return 0;
740 frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
741 qdata = PyBytes_AS_STRING(self->output_buffer) + self->frame_start;
742 _Pickler_WriteFrameHeader(self, qdata, frame_len);
743 self->frame_start = -1;
744 return 0;
745}
746
747static int
748_Pickler_OpcodeBoundary(PicklerObject *self)
749{
750 Py_ssize_t frame_len;
751
752 if (!self->framing || self->frame_start == -1)
753 return 0;
754 frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
755 if (frame_len >= FRAME_SIZE_TARGET)
756 return _Pickler_CommitFrame(self);
757 else
758 return 0;
759}
760
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000761static PyObject *
762_Pickler_GetString(PicklerObject *self)
763{
764 PyObject *output_buffer = self->output_buffer;
765
766 assert(self->output_buffer != NULL);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100767
768 if (_Pickler_CommitFrame(self))
769 return NULL;
770
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000771 self->output_buffer = NULL;
772 /* Resize down to exact size */
773 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
774 return NULL;
775 return output_buffer;
776}
777
778static int
779_Pickler_FlushToFile(PicklerObject *self)
780{
781 PyObject *output, *result;
782
783 assert(self->write != NULL);
784
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100785 /* This will commit the frame first */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000786 output = _Pickler_GetString(self);
787 if (output == NULL)
788 return -1;
789
Alexandre Vassalottib4a04fb2013-11-25 13:25:12 -0800790 result = _Pickler_FastCall(self, self->write, output);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000791 Py_XDECREF(result);
792 return (result == NULL) ? -1 : 0;
793}
794
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200795static Py_ssize_t
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100796_Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t data_len)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000797{
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100798 Py_ssize_t i, n, required;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000799 char *buffer;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100800 int need_new_frame;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000801
802 assert(s != NULL);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100803 need_new_frame = (self->framing && self->frame_start == -1);
804
805 if (need_new_frame)
806 n = data_len + FRAME_HEADER_SIZE;
807 else
808 n = data_len;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000809
810 required = self->output_len + n;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100811 if (required > self->max_output_len) {
812 /* Make place in buffer for the pickle chunk */
813 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
814 PyErr_NoMemory();
815 return -1;
816 }
817 self->max_output_len = (self->output_len + n) / 2 * 3;
818 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
819 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000820 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000821 buffer = PyBytes_AS_STRING(self->output_buffer);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100822 if (need_new_frame) {
823 /* Setup new frame */
824 Py_ssize_t frame_start = self->output_len;
825 self->frame_start = frame_start;
826 for (i = 0; i < FRAME_HEADER_SIZE; i++) {
827 /* Write an invalid value, for debugging */
828 buffer[frame_start + i] = 0xFE;
829 }
830 self->output_len += FRAME_HEADER_SIZE;
831 }
832 if (data_len < 8) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000833 /* This is faster than memcpy when the string is short. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100834 for (i = 0; i < data_len; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000835 buffer[self->output_len + i] = s[i];
836 }
837 }
838 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100839 memcpy(buffer + self->output_len, s, data_len);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000840 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100841 self->output_len += data_len;
842 return data_len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000843}
844
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000845static PicklerObject *
846_Pickler_New(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000847{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000848 PicklerObject *self;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000849
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000850 self = PyObject_GC_New(PicklerObject, &Pickler_Type);
851 if (self == NULL)
852 return NULL;
853
854 self->pers_func = NULL;
Antoine Pitrou8d3c2902012-03-04 18:31:48 +0100855 self->dispatch_table = NULL;
Alexandre Vassalottib4a04fb2013-11-25 13:25:12 -0800856 self->arg = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000857 self->write = NULL;
858 self->proto = 0;
859 self->bin = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100860 self->framing = 0;
861 self->frame_start = -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000862 self->fast = 0;
863 self->fast_nesting = 0;
864 self->fix_imports = 0;
865 self->fast_memo = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000866 self->max_output_len = WRITE_BUF_SIZE;
867 self->output_len = 0;
Victor Stinner68c8ea22013-07-11 22:56:25 +0200868
869 self->memo = PyMemoTable_New();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000870 self->output_buffer = PyBytes_FromStringAndSize(NULL,
871 self->max_output_len);
Victor Stinner68c8ea22013-07-11 22:56:25 +0200872
873 if (self->memo == NULL || self->output_buffer == NULL) {
Victor Stinnerc31df042013-07-12 00:08:59 +0200874 Py_DECREF(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000875 return NULL;
876 }
877 return self;
878}
879
880static int
Alexandre Vassalottied8c9062013-11-24 12:25:48 -0800881_Pickler_SetProtocol(PicklerObject *self, PyObject *protocol, int fix_imports)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000882{
Alexandre Vassalottied8c9062013-11-24 12:25:48 -0800883 long proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000884
Alexandre Vassalottied8c9062013-11-24 12:25:48 -0800885 if (protocol == NULL || protocol == Py_None) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000886 proto = DEFAULT_PROTOCOL;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -0800887 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000888 else {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -0800889 proto = PyLong_AsLong(protocol);
890 if (proto < 0) {
891 if (proto == -1 && PyErr_Occurred())
892 return -1;
893 proto = HIGHEST_PROTOCOL;
894 }
895 else if (proto > HIGHEST_PROTOCOL) {
896 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
897 HIGHEST_PROTOCOL);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000898 return -1;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -0800899 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000900 }
Alexandre Vassalottied8c9062013-11-24 12:25:48 -0800901 self->proto = (int)proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000902 self->bin = proto > 0;
903 self->fix_imports = fix_imports && proto < 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000904 return 0;
905}
906
907/* Returns -1 (with an exception set) on failure, 0 on success. This may
908 be called once on a freshly created Pickler. */
909static int
910_Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
911{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200912 _Py_IDENTIFIER(write);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000913 assert(file != NULL);
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +0200914 self->write = _PyObject_GetAttrId(file, &PyId_write);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000915 if (self->write == NULL) {
916 if (PyErr_ExceptionMatches(PyExc_AttributeError))
917 PyErr_SetString(PyExc_TypeError,
918 "file must have a 'write' attribute");
919 return -1;
920 }
921
922 return 0;
923}
924
Alexandre Vassalottib4a04fb2013-11-25 13:25:12 -0800925/* See documentation for _Pickler_FastCall(). */
926static PyObject *
927_Unpickler_FastCall(UnpicklerObject *self, PyObject *func, PyObject *arg)
928{
929 PyObject *result = NULL;
930
931 ARG_TUP(self, arg);
932 if (self->arg) {
933 result = PyObject_Call(func, self->arg, NULL);
934 FREE_ARG_TUP(self);
935 }
936 return result;
937}
938
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000939/* Returns the size of the input on success, -1 on failure. This takes its
940 own reference to `input`. */
941static Py_ssize_t
942_Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
943{
944 if (self->buffer.buf != NULL)
945 PyBuffer_Release(&self->buffer);
946 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
947 return -1;
948 self->input_buffer = self->buffer.buf;
949 self->input_len = self->buffer.len;
950 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000951 self->prefetched_idx = self->input_len;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000952 return self->input_len;
953}
954
Antoine Pitrou04248a82010-10-12 20:51:21 +0000955static int
956_Unpickler_SkipConsumed(UnpicklerObject *self)
957{
Victor Stinnerb43ad1d2013-10-31 13:38:42 +0100958 Py_ssize_t consumed;
959 PyObject *r;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000960
Victor Stinnerb43ad1d2013-10-31 13:38:42 +0100961 consumed = self->next_read_idx - self->prefetched_idx;
962 if (consumed <= 0)
963 return 0;
964
965 assert(self->peek); /* otherwise we did something wrong */
966 /* This makes an useless copy... */
967 r = PyObject_CallFunction(self->read, "n", consumed);
968 if (r == NULL)
969 return -1;
970 Py_DECREF(r);
971
972 self->prefetched_idx = self->next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000973 return 0;
974}
975
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000976static const Py_ssize_t READ_WHOLE_LINE = -1;
977
978/* If reading from a file, we need to only pull the bytes we need, since there
979 may be multiple pickle objects arranged contiguously in the same input
980 buffer.
981
982 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
983 bytes from the input stream/buffer.
984
985 Update the unpickler's input buffer with the newly-read data. Returns -1 on
986 failure; on success, returns the number of bytes read from the file.
987
988 On success, self->input_len will be 0; this is intentional so that when
989 unpickling from a file, the "we've run out of data" code paths will trigger,
990 causing the Unpickler to go back to the file for more data. Use the returned
991 size to tell you how much data you can process. */
992static Py_ssize_t
993_Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
994{
995 PyObject *data;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000996 Py_ssize_t read_size, prefetched_size = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000997
998 assert(self->read != NULL);
Victor Stinner121aab42011-09-29 23:40:53 +0200999
Antoine Pitrou04248a82010-10-12 20:51:21 +00001000 if (_Unpickler_SkipConsumed(self) < 0)
1001 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001002
1003 if (n == READ_WHOLE_LINE)
1004 data = PyObject_Call(self->readline, empty_tuple, NULL);
1005 else {
1006 PyObject *len = PyLong_FromSsize_t(n);
1007 if (len == NULL)
1008 return -1;
Alexandre Vassalottib4a04fb2013-11-25 13:25:12 -08001009 data = _Unpickler_FastCall(self, self->read, len);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001010 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001011 if (data == NULL)
1012 return -1;
1013
Antoine Pitrou04248a82010-10-12 20:51:21 +00001014 /* Prefetch some data without advancing the file pointer, if possible */
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08001015 if (self->peek) {
Antoine Pitrou04248a82010-10-12 20:51:21 +00001016 PyObject *len, *prefetched;
1017 len = PyLong_FromSsize_t(PREFETCH);
1018 if (len == NULL) {
1019 Py_DECREF(data);
1020 return -1;
1021 }
Alexandre Vassalottib4a04fb2013-11-25 13:25:12 -08001022 prefetched = _Unpickler_FastCall(self, self->peek, len);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001023 if (prefetched == NULL) {
1024 if (PyErr_ExceptionMatches(PyExc_NotImplementedError)) {
1025 /* peek() is probably not supported by the given file object */
1026 PyErr_Clear();
1027 Py_CLEAR(self->peek);
1028 }
1029 else {
1030 Py_DECREF(data);
1031 return -1;
1032 }
1033 }
1034 else {
1035 assert(PyBytes_Check(prefetched));
1036 prefetched_size = PyBytes_GET_SIZE(prefetched);
1037 PyBytes_ConcatAndDel(&data, prefetched);
1038 if (data == NULL)
1039 return -1;
1040 }
1041 }
1042
1043 read_size = _Unpickler_SetStringInput(self, data) - prefetched_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001044 Py_DECREF(data);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001045 self->prefetched_idx = read_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001046 return read_size;
1047}
1048
1049/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
1050
1051 This should be used for all data reads, rather than accessing the unpickler's
1052 input buffer directly. This method deals correctly with reading from input
1053 streams, which the input buffer doesn't deal with.
1054
1055 Note that when reading from a file-like object, self->next_read_idx won't
1056 be updated (it should remain at 0 for the entire unpickling process). You
1057 should use this function's return value to know how many bytes you can
1058 consume.
1059
1060 Returns -1 (with an exception set) on failure. On success, return the
1061 number of chars read. */
1062static Py_ssize_t
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08001063_Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001064{
Antoine Pitrou04248a82010-10-12 20:51:21 +00001065 Py_ssize_t num_read;
1066
Antoine Pitrou04248a82010-10-12 20:51:21 +00001067 if (self->next_read_idx + n <= self->input_len) {
1068 *s = self->input_buffer + self->next_read_idx;
1069 self->next_read_idx += n;
1070 return n;
1071 }
1072 if (!self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001073 PyErr_Format(PyExc_EOFError, "Ran out of input");
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +00001074 return -1;
1075 }
Antoine Pitrou04248a82010-10-12 20:51:21 +00001076 num_read = _Unpickler_ReadFromFile(self, n);
1077 if (num_read < 0)
1078 return -1;
1079 if (num_read < n) {
1080 PyErr_Format(PyExc_EOFError, "Ran out of input");
1081 return -1;
1082 }
1083 *s = self->input_buffer;
1084 self->next_read_idx = n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001085 return n;
1086}
1087
1088static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001089_Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1090 char **result)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001091{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001092 char *input_line = PyMem_Realloc(self->input_line, len + 1);
Victor Stinner42024562013-07-12 00:53:57 +02001093 if (input_line == NULL) {
1094 PyErr_NoMemory();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001095 return -1;
Victor Stinner42024562013-07-12 00:53:57 +02001096 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001097
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001098 memcpy(input_line, line, len);
1099 input_line[len] = '\0';
1100 self->input_line = input_line;
1101 *result = self->input_line;
1102 return len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001103}
1104
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001105/* Read a line from the input stream/buffer. If we run off the end of the input
1106 before hitting \n, return the data we found.
1107
1108 Returns the number of chars read, or -1 on failure. */
1109static Py_ssize_t
1110_Unpickler_Readline(UnpicklerObject *self, char **result)
1111{
1112 Py_ssize_t i, num_read;
1113
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001114 for (i = self->next_read_idx; i < self->input_len; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001115 if (self->input_buffer[i] == '\n') {
1116 char *line_start = self->input_buffer + self->next_read_idx;
1117 num_read = i - self->next_read_idx + 1;
1118 self->next_read_idx = i + 1;
1119 return _Unpickler_CopyLine(self, line_start, num_read, result);
1120 }
1121 }
1122 if (self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001123 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1124 if (num_read < 0)
1125 return -1;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001126 self->next_read_idx = num_read;
Antoine Pitrouf6c7a852011-08-11 21:04:02 +02001127 return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001128 }
Victor Stinner121aab42011-09-29 23:40:53 +02001129
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001130 /* If we get here, we've run off the end of the input string. Return the
1131 remaining string and let the caller figure it out. */
1132 *result = self->input_buffer + self->next_read_idx;
1133 num_read = i - self->next_read_idx;
1134 self->next_read_idx = i;
1135 return num_read;
1136}
1137
1138/* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1139 will be modified in place. */
1140static int
1141_Unpickler_ResizeMemoList(UnpicklerObject *self, Py_ssize_t new_size)
1142{
1143 Py_ssize_t i;
1144 PyObject **memo;
1145
1146 assert(new_size > self->memo_size);
1147
1148 memo = PyMem_REALLOC(self->memo, new_size * sizeof(PyObject *));
1149 if (memo == NULL) {
1150 PyErr_NoMemory();
1151 return -1;
1152 }
1153 self->memo = memo;
1154 for (i = self->memo_size; i < new_size; i++)
1155 self->memo[i] = NULL;
1156 self->memo_size = new_size;
1157 return 0;
1158}
1159
1160/* Returns NULL if idx is out of bounds. */
1161static PyObject *
1162_Unpickler_MemoGet(UnpicklerObject *self, Py_ssize_t idx)
1163{
1164 if (idx < 0 || idx >= self->memo_size)
1165 return NULL;
1166
1167 return self->memo[idx];
1168}
1169
1170/* Returns -1 (with an exception set) on failure, 0 on success.
1171 This takes its own reference to `value`. */
1172static int
1173_Unpickler_MemoPut(UnpicklerObject *self, Py_ssize_t idx, PyObject *value)
1174{
1175 PyObject *old_item;
1176
1177 if (idx >= self->memo_size) {
1178 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1179 return -1;
1180 assert(idx < self->memo_size);
1181 }
1182 Py_INCREF(value);
1183 old_item = self->memo[idx];
1184 self->memo[idx] = value;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001185 if (old_item != NULL) {
1186 Py_DECREF(old_item);
1187 }
1188 else {
1189 self->memo_len++;
1190 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001191 return 0;
1192}
1193
1194static PyObject **
1195_Unpickler_NewMemo(Py_ssize_t new_size)
1196{
1197 PyObject **memo = PyMem_MALLOC(new_size * sizeof(PyObject *));
Victor Stinner42024562013-07-12 00:53:57 +02001198 if (memo == NULL) {
1199 PyErr_NoMemory();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001200 return NULL;
Victor Stinner42024562013-07-12 00:53:57 +02001201 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001202 memset(memo, 0, new_size * sizeof(PyObject *));
1203 return memo;
1204}
1205
1206/* Free the unpickler's memo, taking care to decref any items left in it. */
1207static void
1208_Unpickler_MemoCleanup(UnpicklerObject *self)
1209{
1210 Py_ssize_t i;
1211 PyObject **memo = self->memo;
1212
1213 if (self->memo == NULL)
1214 return;
1215 self->memo = NULL;
1216 i = self->memo_size;
1217 while (--i >= 0) {
1218 Py_XDECREF(memo[i]);
1219 }
1220 PyMem_FREE(memo);
1221}
1222
1223static UnpicklerObject *
1224_Unpickler_New(void)
1225{
1226 UnpicklerObject *self;
1227
1228 self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1229 if (self == NULL)
1230 return NULL;
1231
Alexandre Vassalottib4a04fb2013-11-25 13:25:12 -08001232 self->arg = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001233 self->pers_func = NULL;
1234 self->input_buffer = NULL;
1235 self->input_line = NULL;
1236 self->input_len = 0;
1237 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001238 self->prefetched_idx = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001239 self->read = NULL;
1240 self->readline = NULL;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001241 self->peek = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001242 self->encoding = NULL;
1243 self->errors = NULL;
1244 self->marks = NULL;
1245 self->num_marks = 0;
1246 self->marks_size = 0;
1247 self->proto = 0;
1248 self->fix_imports = 0;
Victor Stinner68c8ea22013-07-11 22:56:25 +02001249 memset(&self->buffer, 0, sizeof(Py_buffer));
1250 self->memo_size = 32;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001251 self->memo_len = 0;
Victor Stinner68c8ea22013-07-11 22:56:25 +02001252 self->memo = _Unpickler_NewMemo(self->memo_size);
1253 self->stack = (Pdata *)Pdata_New();
1254
1255 if (self->memo == NULL || self->stack == NULL) {
1256 Py_DECREF(self);
1257 return NULL;
1258 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001259
1260 return self;
1261}
1262
1263/* Returns -1 (with an exception set) on failure, 0 on success. This may
1264 be called once on a freshly created Pickler. */
1265static int
1266_Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1267{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001268 _Py_IDENTIFIER(peek);
1269 _Py_IDENTIFIER(read);
1270 _Py_IDENTIFIER(readline);
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001271
1272 self->peek = _PyObject_GetAttrId(file, &PyId_peek);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001273 if (self->peek == NULL) {
1274 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1275 PyErr_Clear();
1276 else
1277 return -1;
1278 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001279 self->read = _PyObject_GetAttrId(file, &PyId_read);
1280 self->readline = _PyObject_GetAttrId(file, &PyId_readline);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001281 if (self->readline == NULL || self->read == NULL) {
1282 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1283 PyErr_SetString(PyExc_TypeError,
1284 "file must have 'read' and 'readline' attributes");
1285 Py_CLEAR(self->read);
1286 Py_CLEAR(self->readline);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001287 Py_CLEAR(self->peek);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001288 return -1;
1289 }
1290 return 0;
1291}
1292
1293/* Returns -1 (with an exception set) on failure, 0 on success. This may
1294 be called once on a freshly created Pickler. */
1295static int
1296_Unpickler_SetInputEncoding(UnpicklerObject *self,
1297 const char *encoding,
1298 const char *errors)
1299{
1300 if (encoding == NULL)
1301 encoding = "ASCII";
1302 if (errors == NULL)
1303 errors = "strict";
1304
Victor Stinner49fc8ec2013-07-07 23:30:24 +02001305 self->encoding = _PyMem_Strdup(encoding);
1306 self->errors = _PyMem_Strdup(errors);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001307 if (self->encoding == NULL || self->errors == NULL) {
1308 PyErr_NoMemory();
1309 return -1;
1310 }
1311 return 0;
1312}
1313
1314/* Generate a GET opcode for an object stored in the memo. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001315static int
1316memo_get(PicklerObject *self, PyObject *key)
1317{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001318 Py_ssize_t *value;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001319 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001320 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001321
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001322 value = PyMemoTable_Get(self->memo, key);
1323 if (value == NULL) {
1324 PyErr_SetObject(PyExc_KeyError, key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001325 return -1;
1326 }
1327
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001328 if (!self->bin) {
1329 pdata[0] = GET;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001330 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1331 "%" PY_FORMAT_SIZE_T "d\n", *value);
1332 len = strlen(pdata);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001333 }
1334 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001335 if (*value < 256) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001336 pdata[0] = BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001337 pdata[1] = (unsigned char)(*value & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001338 len = 2;
1339 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001340 else if (*value <= 0xffffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001341 pdata[0] = LONG_BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001342 pdata[1] = (unsigned char)(*value & 0xff);
1343 pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1344 pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1345 pdata[4] = (unsigned char)((*value >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001346 len = 5;
1347 }
1348 else { /* unlikely */
1349 PyErr_SetString(PicklingError,
1350 "memo id too large for LONG_BINGET");
1351 return -1;
1352 }
1353 }
1354
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001355 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001356 return -1;
1357
1358 return 0;
1359}
1360
1361/* Store an object in the memo, assign it a new unique ID based on the number
1362 of objects currently stored in the memo and generate a PUT opcode. */
1363static int
1364memo_put(PicklerObject *self, PyObject *obj)
1365{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001366 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001367 Py_ssize_t len;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001368 Py_ssize_t idx;
1369
1370 const char memoize_op = MEMOIZE;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001371
1372 if (self->fast)
1373 return 0;
1374
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001375 idx = PyMemoTable_Size(self->memo);
1376 if (PyMemoTable_Set(self->memo, obj, idx) < 0)
1377 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001378
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001379 if (self->proto >= 4) {
1380 if (_Pickler_Write(self, &memoize_op, 1) < 0)
1381 return -1;
1382 return 0;
1383 }
1384 else if (!self->bin) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001385 pdata[0] = PUT;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001386 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001387 "%" PY_FORMAT_SIZE_T "d\n", idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001388 len = strlen(pdata);
1389 }
1390 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001391 if (idx < 256) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001392 pdata[0] = BINPUT;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001393 pdata[1] = (unsigned char)idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001394 len = 2;
1395 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001396 else if (idx <= 0xffffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001397 pdata[0] = LONG_BINPUT;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001398 pdata[1] = (unsigned char)(idx & 0xff);
1399 pdata[2] = (unsigned char)((idx >> 8) & 0xff);
1400 pdata[3] = (unsigned char)((idx >> 16) & 0xff);
1401 pdata[4] = (unsigned char)((idx >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001402 len = 5;
1403 }
1404 else { /* unlikely */
1405 PyErr_SetString(PicklingError,
1406 "memo id too large for LONG_BINPUT");
1407 return -1;
1408 }
1409 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001410 if (_Pickler_Write(self, pdata, len) < 0)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001411 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001412
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001413 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001414}
1415
1416static PyObject *
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001417getattribute(PyObject *obj, PyObject *name, int allow_qualname) {
1418 PyObject *dotted_path;
1419 Py_ssize_t i;
1420 _Py_static_string(PyId_dot, ".");
1421 _Py_static_string(PyId_locals, "<locals>");
1422
1423 dotted_path = PyUnicode_Split(name, _PyUnicode_FromId(&PyId_dot), -1);
1424 if (dotted_path == NULL) {
1425 return NULL;
1426 }
1427 assert(Py_SIZE(dotted_path) >= 1);
1428 if (!allow_qualname && Py_SIZE(dotted_path) > 1) {
1429 PyErr_Format(PyExc_AttributeError,
1430 "Can't get qualified attribute %R on %R;"
1431 "use protocols >= 4 to enable support",
1432 name, obj);
1433 Py_DECREF(dotted_path);
1434 return NULL;
1435 }
1436 Py_INCREF(obj);
1437 for (i = 0; i < Py_SIZE(dotted_path); i++) {
1438 PyObject *subpath = PyList_GET_ITEM(dotted_path, i);
1439 PyObject *tmp;
1440 PyObject *result = PyUnicode_RichCompare(
1441 subpath, _PyUnicode_FromId(&PyId_locals), Py_EQ);
1442 int is_equal = (result == Py_True);
1443 assert(PyBool_Check(result));
1444 Py_DECREF(result);
1445 if (is_equal) {
1446 PyErr_Format(PyExc_AttributeError,
1447 "Can't get local attribute %R on %R", name, obj);
1448 Py_DECREF(dotted_path);
1449 Py_DECREF(obj);
1450 return NULL;
1451 }
1452 tmp = PyObject_GetAttr(obj, subpath);
1453 Py_DECREF(obj);
1454 if (tmp == NULL) {
1455 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
1456 PyErr_Clear();
1457 PyErr_Format(PyExc_AttributeError,
1458 "Can't get attribute %R on %R", name, obj);
1459 }
1460 Py_DECREF(dotted_path);
1461 return NULL;
1462 }
1463 obj = tmp;
1464 }
1465 Py_DECREF(dotted_path);
1466 return obj;
1467}
1468
1469static PyObject *
1470whichmodule(PyObject *global, PyObject *global_name, int allow_qualname)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001471{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001472 PyObject *module_name;
1473 PyObject *modules_dict;
1474 PyObject *module;
1475 PyObject *obj;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001476 Py_ssize_t i, j;
1477 _Py_IDENTIFIER(__module__);
1478 _Py_IDENTIFIER(modules);
1479 _Py_IDENTIFIER(__main__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001480
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001481 module_name = _PyObject_GetAttrId(global, &PyId___module__);
1482
1483 if (module_name == NULL) {
1484 if (!PyErr_ExceptionMatches(PyExc_AttributeError))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001485 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001486 PyErr_Clear();
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001487 }
1488 else {
1489 /* In some rare cases (e.g., bound methods of extension types),
1490 __module__ can be None. If it is so, then search sys.modules for
1491 the module of global. */
1492 if (module_name != Py_None)
1493 return module_name;
1494 Py_CLEAR(module_name);
1495 }
1496 assert(module_name == NULL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001497
Victor Stinnerbb520202013-11-06 22:40:41 +01001498 modules_dict = _PySys_GetObjectId(&PyId_modules);
Victor Stinner1e53bba2013-07-16 22:26:05 +02001499 if (modules_dict == NULL) {
1500 PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001501 return NULL;
Victor Stinner1e53bba2013-07-16 22:26:05 +02001502 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001503
1504 i = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001505 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001506 PyObject *result = PyUnicode_RichCompare(
1507 module_name, _PyUnicode_FromId(&PyId___main__), Py_EQ);
1508 int is_equal = (result == Py_True);
1509 assert(PyBool_Check(result));
1510 Py_DECREF(result);
1511 if (is_equal)
1512 continue;
1513 if (module == Py_None)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001514 continue;
1515
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001516 obj = getattribute(module, global_name, allow_qualname);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001517 if (obj == NULL) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001518 if (!PyErr_ExceptionMatches(PyExc_AttributeError))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001519 return NULL;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001520 PyErr_Clear();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001521 continue;
1522 }
1523
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001524 if (obj == global) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001525 Py_DECREF(obj);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001526 Py_INCREF(module_name);
1527 return module_name;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001528 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001529 Py_DECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001530 }
1531
1532 /* If no module is found, use __main__. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001533 module_name = _PyUnicode_FromId(&PyId___main__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001534 Py_INCREF(module_name);
1535 return module_name;
1536}
1537
1538/* fast_save_enter() and fast_save_leave() are guards against recursive
1539 objects when Pickler is used with the "fast mode" (i.e., with object
1540 memoization disabled). If the nesting of a list or dict object exceed
1541 FAST_NESTING_LIMIT, these guards will start keeping an internal
1542 reference to the seen list or dict objects and check whether these objects
1543 are recursive. These are not strictly necessary, since save() has a
1544 hard-coded recursion limit, but they give a nicer error message than the
1545 typical RuntimeError. */
1546static int
1547fast_save_enter(PicklerObject *self, PyObject *obj)
1548{
1549 /* if fast_nesting < 0, we're doing an error exit. */
1550 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1551 PyObject *key = NULL;
1552 if (self->fast_memo == NULL) {
1553 self->fast_memo = PyDict_New();
1554 if (self->fast_memo == NULL) {
1555 self->fast_nesting = -1;
1556 return 0;
1557 }
1558 }
1559 key = PyLong_FromVoidPtr(obj);
1560 if (key == NULL)
1561 return 0;
1562 if (PyDict_GetItem(self->fast_memo, key)) {
1563 Py_DECREF(key);
1564 PyErr_Format(PyExc_ValueError,
1565 "fast mode: can't pickle cyclic objects "
1566 "including object type %.200s at %p",
1567 obj->ob_type->tp_name, obj);
1568 self->fast_nesting = -1;
1569 return 0;
1570 }
1571 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
1572 Py_DECREF(key);
1573 self->fast_nesting = -1;
1574 return 0;
1575 }
1576 Py_DECREF(key);
1577 }
1578 return 1;
1579}
1580
1581static int
1582fast_save_leave(PicklerObject *self, PyObject *obj)
1583{
1584 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
1585 PyObject *key = PyLong_FromVoidPtr(obj);
1586 if (key == NULL)
1587 return 0;
1588 if (PyDict_DelItem(self->fast_memo, key) < 0) {
1589 Py_DECREF(key);
1590 return 0;
1591 }
1592 Py_DECREF(key);
1593 }
1594 return 1;
1595}
1596
1597static int
1598save_none(PicklerObject *self, PyObject *obj)
1599{
1600 const char none_op = NONE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001601 if (_Pickler_Write(self, &none_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001602 return -1;
1603
1604 return 0;
1605}
1606
1607static int
1608save_bool(PicklerObject *self, PyObject *obj)
1609{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001610 if (self->proto >= 2) {
Alexandre Vassalotti8a67f522013-11-24 21:40:18 -08001611 const char bool_op = (obj == Py_True) ? NEWTRUE : NEWFALSE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001612 if (_Pickler_Write(self, &bool_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001613 return -1;
1614 }
Alexandre Vassalotti8a67f522013-11-24 21:40:18 -08001615 else {
1616 /* These aren't opcodes -- they're ways to pickle bools before protocol 2
1617 * so that unpicklers written before bools were introduced unpickle them
1618 * as ints, but unpicklers after can recognize that bools were intended.
1619 * Note that protocol 2 added direct ways to pickle bools.
1620 */
1621 const char *bool_str = (obj == Py_True) ? "I01\n" : "I00\n";
1622 if (_Pickler_Write(self, bool_str, strlen(bool_str)) < 0)
1623 return -1;
1624 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001625 return 0;
1626}
1627
1628static int
Alexandre Vassalottided929b2013-11-24 22:41:13 -08001629save_long(PicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001630{
Alexandre Vassalottided929b2013-11-24 22:41:13 -08001631 PyObject *repr = NULL;
1632 Py_ssize_t size;
1633 long val;
1634 int status = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001635
Alexandre Vassalottided929b2013-11-24 22:41:13 -08001636 const char long_op = LONG;
1637
1638 val= PyLong_AsLong(obj);
1639 if (val == -1 && PyErr_Occurred()) {
1640 /* out of range for int pickling */
1641 PyErr_Clear();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001642 }
Alexandre Vassalottided929b2013-11-24 22:41:13 -08001643 else if (self->bin &&
1644 (sizeof(long) <= 4 ||
Alexandre Vassalotti1048fb52013-11-25 11:35:46 -08001645 (val <= 0x7fffffffL && val >= (-0x7fffffffL - 1)))) {
1646 /* result fits in a signed 4-byte integer.
1647
1648 Note: we can't use -0x80000000L in the above condition because some
1649 compilers (e.g., MSVC) will promote 0x80000000L to an unsigned type
1650 before applying the unary minus when sizeof(long) <= 4. The
1651 resulting value stays unsigned which is commonly not what we want,
1652 so MSVC happily warns us about it. However, that result would have
1653 been fine because we guard for sizeof(long) <= 4 which turns the
1654 condition true in that particular case. */
Alexandre Vassalottided929b2013-11-24 22:41:13 -08001655 char pdata[32];
1656 Py_ssize_t len = 0;
1657
1658 pdata[1] = (unsigned char)(val & 0xff);
1659 pdata[2] = (unsigned char)((val >> 8) & 0xff);
1660 pdata[3] = (unsigned char)((val >> 16) & 0xff);
1661 pdata[4] = (unsigned char)((val >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001662
1663 if ((pdata[4] == 0) && (pdata[3] == 0)) {
1664 if (pdata[2] == 0) {
1665 pdata[0] = BININT1;
1666 len = 2;
1667 }
1668 else {
1669 pdata[0] = BININT2;
1670 len = 3;
1671 }
1672 }
1673 else {
1674 pdata[0] = BININT;
1675 len = 5;
1676 }
1677
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001678 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001679 return -1;
Alexandre Vassalottided929b2013-11-24 22:41:13 -08001680
1681 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001682 }
1683
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001684 if (self->proto >= 2) {
1685 /* Linear-time pickling. */
1686 size_t nbits;
1687 size_t nbytes;
1688 unsigned char *pdata;
1689 char header[5];
1690 int i;
1691 int sign = _PyLong_Sign(obj);
1692
1693 if (sign == 0) {
1694 header[0] = LONG1;
1695 header[1] = 0; /* It's 0 -- an empty bytestring. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001696 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001697 goto error;
1698 return 0;
1699 }
1700 nbits = _PyLong_NumBits(obj);
1701 if (nbits == (size_t)-1 && PyErr_Occurred())
1702 goto error;
1703 /* How many bytes do we need? There are nbits >> 3 full
1704 * bytes of data, and nbits & 7 leftover bits. If there
1705 * are any leftover bits, then we clearly need another
1706 * byte. Wnat's not so obvious is that we *probably*
1707 * need another byte even if there aren't any leftovers:
1708 * the most-significant bit of the most-significant byte
1709 * acts like a sign bit, and it's usually got a sense
Serhiy Storchaka95949422013-08-27 19:40:23 +03001710 * opposite of the one we need. The exception is ints
1711 * of the form -(2**(8*j-1)) for j > 0. Such an int is
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001712 * its own 256's-complement, so has the right sign bit
1713 * even without the extra byte. That's a pain to check
1714 * for in advance, though, so we always grab an extra
1715 * byte at the start, and cut it back later if possible.
1716 */
1717 nbytes = (nbits >> 3) + 1;
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01001718 if (nbytes > 0x7fffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001719 PyErr_SetString(PyExc_OverflowError,
Serhiy Storchaka95949422013-08-27 19:40:23 +03001720 "int too large to pickle");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001721 goto error;
1722 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001723 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001724 if (repr == NULL)
1725 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001726 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001727 i = _PyLong_AsByteArray((PyLongObject *)obj,
1728 pdata, nbytes,
1729 1 /* little endian */ , 1 /* signed */ );
1730 if (i < 0)
1731 goto error;
Serhiy Storchaka95949422013-08-27 19:40:23 +03001732 /* If the int is negative, this may be a byte more than
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001733 * needed. This is so iff the MSB is all redundant sign
1734 * bits.
1735 */
1736 if (sign < 0 &&
Victor Stinner121aab42011-09-29 23:40:53 +02001737 nbytes > 1 &&
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001738 pdata[nbytes - 1] == 0xff &&
1739 (pdata[nbytes - 2] & 0x80) != 0) {
1740 nbytes--;
1741 }
1742
1743 if (nbytes < 256) {
1744 header[0] = LONG1;
1745 header[1] = (unsigned char)nbytes;
1746 size = 2;
1747 }
1748 else {
1749 header[0] = LONG4;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001750 size = (Py_ssize_t) nbytes;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001751 for (i = 1; i < 5; i++) {
1752 header[i] = (unsigned char)(size & 0xff);
1753 size >>= 8;
1754 }
1755 size = 5;
1756 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001757 if (_Pickler_Write(self, header, size) < 0 ||
1758 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001759 goto error;
1760 }
1761 else {
1762 char *string;
1763
Mark Dickinson8dd05142009-01-20 20:43:58 +00001764 /* proto < 2: write the repr and newline. This is quadratic-time (in
1765 the number of digits), in both directions. We add a trailing 'L'
1766 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001767
1768 repr = PyObject_Repr(obj);
1769 if (repr == NULL)
1770 goto error;
1771
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001772 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001773 if (string == NULL)
1774 goto error;
1775
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001776 if (_Pickler_Write(self, &long_op, 1) < 0 ||
1777 _Pickler_Write(self, string, size) < 0 ||
1778 _Pickler_Write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001779 goto error;
1780 }
1781
1782 if (0) {
1783 error:
1784 status = -1;
1785 }
1786 Py_XDECREF(repr);
1787
1788 return status;
1789}
1790
1791static int
1792save_float(PicklerObject *self, PyObject *obj)
1793{
1794 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1795
1796 if (self->bin) {
1797 char pdata[9];
1798 pdata[0] = BINFLOAT;
1799 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1800 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001801 if (_Pickler_Write(self, pdata, 9) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001802 return -1;
Victor Stinner121aab42011-09-29 23:40:53 +02001803 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001804 else {
Eric Smith0923d1d2009-04-16 20:16:10 +00001805 int result = -1;
1806 char *buf = NULL;
1807 char op = FLOAT;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001808
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001809 if (_Pickler_Write(self, &op, 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001810 goto done;
1811
Mark Dickinson3e09f432009-04-17 08:41:23 +00001812 buf = PyOS_double_to_string(x, 'g', 17, 0, NULL);
Eric Smith0923d1d2009-04-16 20:16:10 +00001813 if (!buf) {
1814 PyErr_NoMemory();
1815 goto done;
1816 }
1817
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001818 if (_Pickler_Write(self, buf, strlen(buf)) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001819 goto done;
1820
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001821 if (_Pickler_Write(self, "\n", 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001822 goto done;
1823
1824 result = 0;
1825done:
1826 PyMem_Free(buf);
1827 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001828 }
1829
1830 return 0;
1831}
1832
1833static int
1834save_bytes(PicklerObject *self, PyObject *obj)
1835{
1836 if (self->proto < 3) {
1837 /* Older pickle protocols do not have an opcode for pickling bytes
1838 objects. Therefore, we need to fake the copy protocol (i.e.,
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001839 the __reduce__ method) to permit bytes object unpickling.
1840
1841 Here we use a hack to be compatible with Python 2. Since in Python
1842 2 'bytes' is just an alias for 'str' (which has different
1843 parameters than the actual bytes object), we use codecs.encode
1844 to create the appropriate 'str' object when unpickled using
1845 Python 2 *and* the appropriate 'bytes' object when unpickled
1846 using Python 3. Again this is a hack and we don't need to do this
1847 with newer protocols. */
1848 static PyObject *codecs_encode = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001849 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001850 int status;
1851
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001852 if (codecs_encode == NULL) {
1853 PyObject *codecs_module = PyImport_ImportModule("codecs");
1854 if (codecs_module == NULL) {
1855 return -1;
1856 }
1857 codecs_encode = PyObject_GetAttrString(codecs_module, "encode");
1858 Py_DECREF(codecs_module);
1859 if (codecs_encode == NULL) {
1860 return -1;
1861 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001862 }
1863
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001864 if (PyBytes_GET_SIZE(obj) == 0) {
1865 reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
1866 }
1867 else {
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001868 PyObject *unicode_str =
1869 PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
1870 PyBytes_GET_SIZE(obj),
1871 "strict");
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001872 _Py_IDENTIFIER(latin1);
1873
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001874 if (unicode_str == NULL)
1875 return -1;
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001876 reduce_value = Py_BuildValue("(O(OO))",
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001877 codecs_encode, unicode_str,
1878 _PyUnicode_FromId(&PyId_latin1));
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001879 Py_DECREF(unicode_str);
1880 }
1881
1882 if (reduce_value == NULL)
1883 return -1;
1884
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001885 /* save_reduce() will memoize the object automatically. */
1886 status = save_reduce(self, reduce_value, obj);
1887 Py_DECREF(reduce_value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001888 return status;
1889 }
1890 else {
1891 Py_ssize_t size;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001892 char header[9];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001893 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001894
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001895 size = PyBytes_GET_SIZE(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001896 if (size < 0)
1897 return -1;
1898
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001899 if (size <= 0xff) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001900 header[0] = SHORT_BINBYTES;
1901 header[1] = (unsigned char)size;
1902 len = 2;
1903 }
1904 else if (size <= 0xffffffffL) {
1905 header[0] = BINBYTES;
1906 header[1] = (unsigned char)(size & 0xff);
1907 header[2] = (unsigned char)((size >> 8) & 0xff);
1908 header[3] = (unsigned char)((size >> 16) & 0xff);
1909 header[4] = (unsigned char)((size >> 24) & 0xff);
1910 len = 5;
1911 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001912 else if (self->proto >= 4) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001913 header[0] = BINBYTES8;
Alexandre Vassalotti1048fb52013-11-25 11:35:46 -08001914 _write_size64(header + 1, size);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001915 len = 8;
1916 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001917 else {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001918 PyErr_SetString(PyExc_OverflowError,
Serhiy Storchakaf8def282013-02-16 17:29:56 +02001919 "cannot serialize a bytes object larger than 4 GiB");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001920 return -1; /* string too large */
1921 }
1922
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001923 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001924 return -1;
1925
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001926 if (_Pickler_Write(self, PyBytes_AS_STRING(obj), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001927 return -1;
1928
1929 if (memo_put(self, obj) < 0)
1930 return -1;
1931
1932 return 0;
1933 }
1934}
1935
1936/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1937 backslash and newline characters to \uXXXX escapes. */
1938static PyObject *
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001939raw_unicode_escape(PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001940{
1941 PyObject *repr, *result;
1942 char *p;
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001943 Py_ssize_t i, size, expandsize;
1944 void *data;
1945 unsigned int kind;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001946
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001947 if (PyUnicode_READY(obj))
1948 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001949
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001950 size = PyUnicode_GET_LENGTH(obj);
1951 data = PyUnicode_DATA(obj);
1952 kind = PyUnicode_KIND(obj);
1953 if (kind == PyUnicode_4BYTE_KIND)
1954 expandsize = 10;
1955 else
1956 expandsize = 6;
Victor Stinner121aab42011-09-29 23:40:53 +02001957
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001958 if (size > PY_SSIZE_T_MAX / expandsize)
1959 return PyErr_NoMemory();
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001960 repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001961 if (repr == NULL)
1962 return NULL;
1963 if (size == 0)
1964 goto done;
1965
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001966 p = PyByteArray_AS_STRING(repr);
1967 for (i=0; i < size; i++) {
1968 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001969 /* Map 32-bit characters to '\Uxxxxxxxx' */
1970 if (ch >= 0x10000) {
1971 *p++ = '\\';
1972 *p++ = 'U';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001973 *p++ = Py_hexdigits[(ch >> 28) & 0xf];
1974 *p++ = Py_hexdigits[(ch >> 24) & 0xf];
1975 *p++ = Py_hexdigits[(ch >> 20) & 0xf];
1976 *p++ = Py_hexdigits[(ch >> 16) & 0xf];
1977 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
1978 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
1979 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
1980 *p++ = Py_hexdigits[ch & 15];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001981 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001982 /* Map 16-bit characters to '\uxxxx' */
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001983 else if (ch >= 256 || ch == '\\' || ch == '\n') {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001984 *p++ = '\\';
1985 *p++ = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001986 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
1987 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
1988 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
1989 *p++ = Py_hexdigits[ch & 15];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001990 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001991 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001992 else
1993 *p++ = (char) ch;
1994 }
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001995 size = p - PyByteArray_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001996
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001997done:
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001998 result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001999 Py_DECREF(repr);
2000 return result;
2001}
2002
2003static int
Antoine Pitrou299978d2013-04-07 17:38:11 +02002004write_utf8(PicklerObject *self, char *data, Py_ssize_t size)
2005{
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002006 char header[9];
2007 Py_ssize_t len;
Antoine Pitrou299978d2013-04-07 17:38:11 +02002008
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002009 if (size <= 0xff && self->proto >= 4) {
2010 header[0] = SHORT_BINUNICODE;
2011 header[1] = (unsigned char)(size & 0xff);
2012 len = 2;
2013 }
2014 else if (size <= 0xffffffffUL) {
2015 header[0] = BINUNICODE;
2016 header[1] = (unsigned char)(size & 0xff);
2017 header[2] = (unsigned char)((size >> 8) & 0xff);
2018 header[3] = (unsigned char)((size >> 16) & 0xff);
2019 header[4] = (unsigned char)((size >> 24) & 0xff);
2020 len = 5;
2021 }
2022 else if (self->proto >= 4) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002023 header[0] = BINUNICODE8;
Alexandre Vassalotti1048fb52013-11-25 11:35:46 -08002024 _write_size64(header + 1, size);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002025 len = 9;
2026 }
2027 else {
Antoine Pitrou299978d2013-04-07 17:38:11 +02002028 PyErr_SetString(PyExc_OverflowError,
Antoine Pitrou4b7b0f02013-04-07 23:46:52 +02002029 "cannot serialize a string larger than 4GiB");
Antoine Pitrou299978d2013-04-07 17:38:11 +02002030 return -1;
2031 }
Antoine Pitrou299978d2013-04-07 17:38:11 +02002032
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002033 if (_Pickler_Write(self, header, len) < 0)
Antoine Pitrou299978d2013-04-07 17:38:11 +02002034 return -1;
Antoine Pitrou299978d2013-04-07 17:38:11 +02002035 if (_Pickler_Write(self, data, size) < 0)
2036 return -1;
2037
2038 return 0;
2039}
2040
2041static int
2042write_unicode_binary(PicklerObject *self, PyObject *obj)
2043{
2044 PyObject *encoded = NULL;
2045 Py_ssize_t size;
2046 char *data;
2047 int r;
2048
2049 if (PyUnicode_READY(obj))
2050 return -1;
2051
2052 data = PyUnicode_AsUTF8AndSize(obj, &size);
2053 if (data != NULL)
2054 return write_utf8(self, data, size);
2055
2056 /* Issue #8383: for strings with lone surrogates, fallback on the
2057 "surrogatepass" error handler. */
2058 PyErr_Clear();
2059 encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
2060 if (encoded == NULL)
2061 return -1;
2062
2063 r = write_utf8(self, PyBytes_AS_STRING(encoded),
2064 PyBytes_GET_SIZE(encoded));
2065 Py_DECREF(encoded);
2066 return r;
2067}
2068
2069static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002070save_unicode(PicklerObject *self, PyObject *obj)
2071{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002072 if (self->bin) {
Antoine Pitrou299978d2013-04-07 17:38:11 +02002073 if (write_unicode_binary(self, obj) < 0)
2074 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002075 }
2076 else {
Antoine Pitrou299978d2013-04-07 17:38:11 +02002077 PyObject *encoded;
2078 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002079 const char unicode_op = UNICODE;
2080
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002081 encoded = raw_unicode_escape(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002082 if (encoded == NULL)
Antoine Pitrou299978d2013-04-07 17:38:11 +02002083 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002084
Antoine Pitrou299978d2013-04-07 17:38:11 +02002085 if (_Pickler_Write(self, &unicode_op, 1) < 0) {
2086 Py_DECREF(encoded);
2087 return -1;
2088 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002089
2090 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrou299978d2013-04-07 17:38:11 +02002091 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) {
2092 Py_DECREF(encoded);
2093 return -1;
2094 }
2095 Py_DECREF(encoded);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002096
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002097 if (_Pickler_Write(self, "\n", 1) < 0)
Antoine Pitrou299978d2013-04-07 17:38:11 +02002098 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002099 }
2100 if (memo_put(self, obj) < 0)
Antoine Pitrou299978d2013-04-07 17:38:11 +02002101 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002102
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002103 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002104}
2105
2106/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
2107static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002108store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002109{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002110 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002111
2112 assert(PyTuple_Size(t) == len);
2113
2114 for (i = 0; i < len; i++) {
2115 PyObject *element = PyTuple_GET_ITEM(t, i);
2116
2117 if (element == NULL)
2118 return -1;
2119 if (save(self, element, 0) < 0)
2120 return -1;
2121 }
2122
2123 return 0;
2124}
2125
2126/* Tuples are ubiquitous in the pickle protocols, so many techniques are
2127 * used across protocols to minimize the space needed to pickle them.
2128 * Tuples are also the only builtin immutable type that can be recursive
2129 * (a tuple can be reached from itself), and that requires some subtle
2130 * magic so that it works in all cases. IOW, this is a long routine.
2131 */
2132static int
2133save_tuple(PicklerObject *self, PyObject *obj)
2134{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002135 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002136
2137 const char mark_op = MARK;
2138 const char tuple_op = TUPLE;
2139 const char pop_op = POP;
2140 const char pop_mark_op = POP_MARK;
2141 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
2142
2143 if ((len = PyTuple_Size(obj)) < 0)
2144 return -1;
2145
2146 if (len == 0) {
2147 char pdata[2];
2148
2149 if (self->proto) {
2150 pdata[0] = EMPTY_TUPLE;
2151 len = 1;
2152 }
2153 else {
2154 pdata[0] = MARK;
2155 pdata[1] = TUPLE;
2156 len = 2;
2157 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002158 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002159 return -1;
2160 return 0;
2161 }
2162
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002163 /* The tuple isn't in the memo now. If it shows up there after
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002164 * saving the tuple elements, the tuple must be recursive, in
2165 * which case we'll pop everything we put on the stack, and fetch
2166 * its value from the memo.
2167 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002168 if (len <= 3 && self->proto >= 2) {
2169 /* Use TUPLE{1,2,3} opcodes. */
2170 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002171 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002172
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002173 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002174 /* pop the len elements */
2175 for (i = 0; i < len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002176 if (_Pickler_Write(self, &pop_op, 1) < 0)
2177 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002178 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002179 if (memo_get(self, obj) < 0)
2180 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002181
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002182 return 0;
2183 }
2184 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002185 if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2186 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002187 }
2188 goto memoize;
2189 }
2190
2191 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2192 * Generate MARK e1 e2 ... TUPLE
2193 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002194 if (_Pickler_Write(self, &mark_op, 1) < 0)
2195 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002196
2197 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002198 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002199
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002200 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002201 /* pop the stack stuff we pushed */
2202 if (self->bin) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002203 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2204 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002205 }
2206 else {
2207 /* Note that we pop one more than len, to remove
2208 * the MARK too.
2209 */
2210 for (i = 0; i <= len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002211 if (_Pickler_Write(self, &pop_op, 1) < 0)
2212 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002213 }
2214 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002215 if (memo_get(self, obj) < 0)
2216 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002217
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002218 return 0;
2219 }
2220 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002221 if (_Pickler_Write(self, &tuple_op, 1) < 0)
2222 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002223 }
2224
2225 memoize:
2226 if (memo_put(self, obj) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002227 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002228
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002229 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002230}
2231
2232/* iter is an iterator giving items, and we batch up chunks of
2233 * MARK item item ... item APPENDS
2234 * opcode sequences. Calling code should have arranged to first create an
2235 * empty list, or list-like object, for the APPENDS to operate on.
2236 * Returns 0 on success, <0 on error.
2237 */
2238static int
2239batch_list(PicklerObject *self, PyObject *iter)
2240{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002241 PyObject *obj = NULL;
2242 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002243 int i, n;
2244
2245 const char mark_op = MARK;
2246 const char append_op = APPEND;
2247 const char appends_op = APPENDS;
2248
2249 assert(iter != NULL);
2250
2251 /* XXX: I think this function could be made faster by avoiding the
2252 iterator interface and fetching objects directly from list using
2253 PyList_GET_ITEM.
2254 */
2255
2256 if (self->proto == 0) {
2257 /* APPENDS isn't available; do one at a time. */
2258 for (;;) {
2259 obj = PyIter_Next(iter);
2260 if (obj == NULL) {
2261 if (PyErr_Occurred())
2262 return -1;
2263 break;
2264 }
2265 i = save(self, obj, 0);
2266 Py_DECREF(obj);
2267 if (i < 0)
2268 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002269 if (_Pickler_Write(self, &append_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002270 return -1;
2271 }
2272 return 0;
2273 }
2274
2275 /* proto > 0: write in batches of BATCHSIZE. */
2276 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002277 /* Get first item */
2278 firstitem = PyIter_Next(iter);
2279 if (firstitem == NULL) {
2280 if (PyErr_Occurred())
2281 goto error;
2282
2283 /* nothing more to add */
2284 break;
2285 }
2286
2287 /* Try to get a second item */
2288 obj = PyIter_Next(iter);
2289 if (obj == NULL) {
2290 if (PyErr_Occurred())
2291 goto error;
2292
2293 /* Only one item to write */
2294 if (save(self, firstitem, 0) < 0)
2295 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002296 if (_Pickler_Write(self, &append_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002297 goto error;
2298 Py_CLEAR(firstitem);
2299 break;
2300 }
2301
2302 /* More than one item to write */
2303
2304 /* Pump out MARK, items, APPENDS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002305 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002306 goto error;
2307
2308 if (save(self, firstitem, 0) < 0)
2309 goto error;
2310 Py_CLEAR(firstitem);
2311 n = 1;
2312
2313 /* Fetch and save up to BATCHSIZE items */
2314 while (obj) {
2315 if (save(self, obj, 0) < 0)
2316 goto error;
2317 Py_CLEAR(obj);
2318 n += 1;
2319
2320 if (n == BATCHSIZE)
2321 break;
2322
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002323 obj = PyIter_Next(iter);
2324 if (obj == NULL) {
2325 if (PyErr_Occurred())
2326 goto error;
2327 break;
2328 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002329 }
2330
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002331 if (_Pickler_Write(self, &appends_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002332 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002333
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002334 } while (n == BATCHSIZE);
2335 return 0;
2336
2337 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002338 Py_XDECREF(firstitem);
2339 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002340 return -1;
2341}
2342
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002343/* This is a variant of batch_list() above, specialized for lists (with no
2344 * support for list subclasses). Like batch_list(), we batch up chunks of
2345 * MARK item item ... item APPENDS
2346 * opcode sequences. Calling code should have arranged to first create an
2347 * empty list, or list-like object, for the APPENDS to operate on.
2348 * Returns 0 on success, -1 on error.
2349 *
2350 * This version is considerably faster than batch_list(), if less general.
2351 *
2352 * Note that this only works for protocols > 0.
2353 */
2354static int
2355batch_list_exact(PicklerObject *self, PyObject *obj)
2356{
2357 PyObject *item = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002358 Py_ssize_t this_batch, total;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002359
2360 const char append_op = APPEND;
2361 const char appends_op = APPENDS;
2362 const char mark_op = MARK;
2363
2364 assert(obj != NULL);
2365 assert(self->proto > 0);
2366 assert(PyList_CheckExact(obj));
2367
2368 if (PyList_GET_SIZE(obj) == 1) {
2369 item = PyList_GET_ITEM(obj, 0);
2370 if (save(self, item, 0) < 0)
2371 return -1;
2372 if (_Pickler_Write(self, &append_op, 1) < 0)
2373 return -1;
2374 return 0;
2375 }
2376
2377 /* Write in batches of BATCHSIZE. */
2378 total = 0;
2379 do {
2380 this_batch = 0;
2381 if (_Pickler_Write(self, &mark_op, 1) < 0)
2382 return -1;
2383 while (total < PyList_GET_SIZE(obj)) {
2384 item = PyList_GET_ITEM(obj, total);
2385 if (save(self, item, 0) < 0)
2386 return -1;
2387 total++;
2388 if (++this_batch == BATCHSIZE)
2389 break;
2390 }
2391 if (_Pickler_Write(self, &appends_op, 1) < 0)
2392 return -1;
2393
2394 } while (total < PyList_GET_SIZE(obj));
2395
2396 return 0;
2397}
2398
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002399static int
2400save_list(PicklerObject *self, PyObject *obj)
2401{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002402 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002403 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002404 int status = 0;
2405
2406 if (self->fast && !fast_save_enter(self, obj))
2407 goto error;
2408
2409 /* Create an empty list. */
2410 if (self->bin) {
2411 header[0] = EMPTY_LIST;
2412 len = 1;
2413 }
2414 else {
2415 header[0] = MARK;
2416 header[1] = LIST;
2417 len = 2;
2418 }
2419
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002420 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002421 goto error;
2422
2423 /* Get list length, and bow out early if empty. */
2424 if ((len = PyList_Size(obj)) < 0)
2425 goto error;
2426
2427 if (memo_put(self, obj) < 0)
2428 goto error;
2429
2430 if (len != 0) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002431 /* Materialize the list elements. */
2432 if (PyList_CheckExact(obj) && self->proto > 0) {
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002433 if (Py_EnterRecursiveCall(" while pickling an object"))
2434 goto error;
2435 status = batch_list_exact(self, obj);
2436 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002437 } else {
2438 PyObject *iter = PyObject_GetIter(obj);
2439 if (iter == NULL)
2440 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002441
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002442 if (Py_EnterRecursiveCall(" while pickling an object")) {
2443 Py_DECREF(iter);
2444 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002445 }
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002446 status = batch_list(self, iter);
2447 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002448 Py_DECREF(iter);
2449 }
2450 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002451 if (0) {
2452 error:
2453 status = -1;
2454 }
2455
2456 if (self->fast && !fast_save_leave(self, obj))
2457 status = -1;
2458
2459 return status;
2460}
2461
2462/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
2463 * MARK key value ... key value SETITEMS
2464 * opcode sequences. Calling code should have arranged to first create an
2465 * empty dict, or dict-like object, for the SETITEMS to operate on.
2466 * Returns 0 on success, <0 on error.
2467 *
2468 * This is very much like batch_list(). The difference between saving
2469 * elements directly, and picking apart two-tuples, is so long-winded at
2470 * the C level, though, that attempts to combine these routines were too
2471 * ugly to bear.
2472 */
2473static int
2474batch_dict(PicklerObject *self, PyObject *iter)
2475{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002476 PyObject *obj = NULL;
2477 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002478 int i, n;
2479
2480 const char mark_op = MARK;
2481 const char setitem_op = SETITEM;
2482 const char setitems_op = SETITEMS;
2483
2484 assert(iter != NULL);
2485
2486 if (self->proto == 0) {
2487 /* SETITEMS isn't available; do one at a time. */
2488 for (;;) {
2489 obj = PyIter_Next(iter);
2490 if (obj == NULL) {
2491 if (PyErr_Occurred())
2492 return -1;
2493 break;
2494 }
2495 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2496 PyErr_SetString(PyExc_TypeError, "dict items "
2497 "iterator must return 2-tuples");
2498 return -1;
2499 }
2500 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
2501 if (i >= 0)
2502 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
2503 Py_DECREF(obj);
2504 if (i < 0)
2505 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002506 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002507 return -1;
2508 }
2509 return 0;
2510 }
2511
2512 /* proto > 0: write in batches of BATCHSIZE. */
2513 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002514 /* Get first item */
2515 firstitem = PyIter_Next(iter);
2516 if (firstitem == NULL) {
2517 if (PyErr_Occurred())
2518 goto error;
2519
2520 /* nothing more to add */
2521 break;
2522 }
2523 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
2524 PyErr_SetString(PyExc_TypeError, "dict items "
2525 "iterator must return 2-tuples");
2526 goto error;
2527 }
2528
2529 /* Try to get a second item */
2530 obj = PyIter_Next(iter);
2531 if (obj == NULL) {
2532 if (PyErr_Occurred())
2533 goto error;
2534
2535 /* Only one item to write */
2536 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2537 goto error;
2538 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2539 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002540 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002541 goto error;
2542 Py_CLEAR(firstitem);
2543 break;
2544 }
2545
2546 /* More than one item to write */
2547
2548 /* Pump out MARK, items, SETITEMS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002549 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002550 goto error;
2551
2552 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2553 goto error;
2554 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2555 goto error;
2556 Py_CLEAR(firstitem);
2557 n = 1;
2558
2559 /* Fetch and save up to BATCHSIZE items */
2560 while (obj) {
2561 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2562 PyErr_SetString(PyExc_TypeError, "dict items "
2563 "iterator must return 2-tuples");
2564 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002565 }
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002566 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
2567 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
2568 goto error;
2569 Py_CLEAR(obj);
2570 n += 1;
2571
2572 if (n == BATCHSIZE)
2573 break;
2574
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002575 obj = PyIter_Next(iter);
2576 if (obj == NULL) {
2577 if (PyErr_Occurred())
2578 goto error;
2579 break;
2580 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002581 }
2582
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002583 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002584 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002585
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002586 } while (n == BATCHSIZE);
2587 return 0;
2588
2589 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002590 Py_XDECREF(firstitem);
2591 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002592 return -1;
2593}
2594
Collin Winter5c9b02d2009-05-25 05:43:30 +00002595/* This is a variant of batch_dict() above that specializes for dicts, with no
2596 * support for dict subclasses. Like batch_dict(), we batch up chunks of
2597 * MARK key value ... key value SETITEMS
2598 * opcode sequences. Calling code should have arranged to first create an
2599 * empty dict, or dict-like object, for the SETITEMS to operate on.
2600 * Returns 0 on success, -1 on error.
2601 *
2602 * Note that this currently doesn't work for protocol 0.
2603 */
2604static int
2605batch_dict_exact(PicklerObject *self, PyObject *obj)
2606{
2607 PyObject *key = NULL, *value = NULL;
2608 int i;
2609 Py_ssize_t dict_size, ppos = 0;
2610
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00002611 const char mark_op = MARK;
2612 const char setitem_op = SETITEM;
2613 const char setitems_op = SETITEMS;
Collin Winter5c9b02d2009-05-25 05:43:30 +00002614
2615 assert(obj != NULL);
2616 assert(self->proto > 0);
2617
2618 dict_size = PyDict_Size(obj);
2619
2620 /* Special-case len(d) == 1 to save space. */
2621 if (dict_size == 1) {
2622 PyDict_Next(obj, &ppos, &key, &value);
2623 if (save(self, key, 0) < 0)
2624 return -1;
2625 if (save(self, value, 0) < 0)
2626 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002627 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002628 return -1;
2629 return 0;
2630 }
2631
2632 /* Write in batches of BATCHSIZE. */
2633 do {
2634 i = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002635 if (_Pickler_Write(self, &mark_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002636 return -1;
2637 while (PyDict_Next(obj, &ppos, &key, &value)) {
2638 if (save(self, key, 0) < 0)
2639 return -1;
2640 if (save(self, value, 0) < 0)
2641 return -1;
2642 if (++i == BATCHSIZE)
2643 break;
2644 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002645 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002646 return -1;
2647 if (PyDict_Size(obj) != dict_size) {
2648 PyErr_Format(
2649 PyExc_RuntimeError,
2650 "dictionary changed size during iteration");
2651 return -1;
2652 }
2653
2654 } while (i == BATCHSIZE);
2655 return 0;
2656}
2657
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002658static int
2659save_dict(PicklerObject *self, PyObject *obj)
2660{
2661 PyObject *items, *iter;
2662 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002663 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002664 int status = 0;
2665
2666 if (self->fast && !fast_save_enter(self, obj))
2667 goto error;
2668
2669 /* Create an empty dict. */
2670 if (self->bin) {
2671 header[0] = EMPTY_DICT;
2672 len = 1;
2673 }
2674 else {
2675 header[0] = MARK;
2676 header[1] = DICT;
2677 len = 2;
2678 }
2679
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002680 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002681 goto error;
2682
2683 /* Get dict size, and bow out early if empty. */
2684 if ((len = PyDict_Size(obj)) < 0)
2685 goto error;
2686
2687 if (memo_put(self, obj) < 0)
2688 goto error;
2689
2690 if (len != 0) {
2691 /* Save the dict items. */
Collin Winter5c9b02d2009-05-25 05:43:30 +00002692 if (PyDict_CheckExact(obj) && self->proto > 0) {
2693 /* We can take certain shortcuts if we know this is a dict and
2694 not a dict subclass. */
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002695 if (Py_EnterRecursiveCall(" while pickling an object"))
2696 goto error;
2697 status = batch_dict_exact(self, obj);
2698 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002699 } else {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02002700 _Py_IDENTIFIER(items);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002701
2702 items = _PyObject_CallMethodId(obj, &PyId_items, "()");
Collin Winter5c9b02d2009-05-25 05:43:30 +00002703 if (items == NULL)
2704 goto error;
2705 iter = PyObject_GetIter(items);
2706 Py_DECREF(items);
2707 if (iter == NULL)
2708 goto error;
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002709 if (Py_EnterRecursiveCall(" while pickling an object")) {
2710 Py_DECREF(iter);
2711 goto error;
2712 }
Collin Winter5c9b02d2009-05-25 05:43:30 +00002713 status = batch_dict(self, iter);
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002714 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002715 Py_DECREF(iter);
2716 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002717 }
2718
2719 if (0) {
2720 error:
2721 status = -1;
2722 }
2723
2724 if (self->fast && !fast_save_leave(self, obj))
2725 status = -1;
2726
2727 return status;
2728}
2729
2730static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002731save_set(PicklerObject *self, PyObject *obj)
2732{
2733 PyObject *item;
2734 int i;
2735 Py_ssize_t set_size, ppos = 0;
2736 Py_hash_t hash;
2737
2738 const char empty_set_op = EMPTY_SET;
2739 const char mark_op = MARK;
2740 const char additems_op = ADDITEMS;
2741
2742 if (self->proto < 4) {
2743 PyObject *items;
2744 PyObject *reduce_value;
2745 int status;
2746
2747 items = PySequence_List(obj);
2748 if (items == NULL) {
2749 return -1;
2750 }
2751 reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PySet_Type, items);
2752 Py_DECREF(items);
2753 if (reduce_value == NULL) {
2754 return -1;
2755 }
2756 /* save_reduce() will memoize the object automatically. */
2757 status = save_reduce(self, reduce_value, obj);
2758 Py_DECREF(reduce_value);
2759 return status;
2760 }
2761
2762 if (_Pickler_Write(self, &empty_set_op, 1) < 0)
2763 return -1;
2764
2765 if (memo_put(self, obj) < 0)
2766 return -1;
2767
2768 set_size = PySet_GET_SIZE(obj);
2769 if (set_size == 0)
2770 return 0; /* nothing to do */
2771
2772 /* Write in batches of BATCHSIZE. */
2773 do {
2774 i = 0;
2775 if (_Pickler_Write(self, &mark_op, 1) < 0)
2776 return -1;
2777 while (_PySet_NextEntry(obj, &ppos, &item, &hash)) {
2778 if (save(self, item, 0) < 0)
2779 return -1;
2780 if (++i == BATCHSIZE)
2781 break;
2782 }
2783 if (_Pickler_Write(self, &additems_op, 1) < 0)
2784 return -1;
2785 if (PySet_GET_SIZE(obj) != set_size) {
2786 PyErr_Format(
2787 PyExc_RuntimeError,
2788 "set changed size during iteration");
2789 return -1;
2790 }
2791 } while (i == BATCHSIZE);
2792
2793 return 0;
2794}
2795
2796static int
2797save_frozenset(PicklerObject *self, PyObject *obj)
2798{
2799 PyObject *iter;
2800
2801 const char mark_op = MARK;
2802 const char frozenset_op = FROZENSET;
2803
2804 if (self->fast && !fast_save_enter(self, obj))
2805 return -1;
2806
2807 if (self->proto < 4) {
2808 PyObject *items;
2809 PyObject *reduce_value;
2810 int status;
2811
2812 items = PySequence_List(obj);
2813 if (items == NULL) {
2814 return -1;
2815 }
2816 reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PyFrozenSet_Type,
2817 items);
2818 Py_DECREF(items);
2819 if (reduce_value == NULL) {
2820 return -1;
2821 }
2822 /* save_reduce() will memoize the object automatically. */
2823 status = save_reduce(self, reduce_value, obj);
2824 Py_DECREF(reduce_value);
2825 return status;
2826 }
2827
2828 if (_Pickler_Write(self, &mark_op, 1) < 0)
2829 return -1;
2830
2831 iter = PyObject_GetIter(obj);
Christian Heimesb3d3ee42013-11-23 21:01:40 +01002832 if (iter == NULL) {
Christian Heimes74d8d632013-11-23 21:05:31 +01002833 return -1;
Christian Heimesb3d3ee42013-11-23 21:01:40 +01002834 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002835 for (;;) {
2836 PyObject *item;
2837
2838 item = PyIter_Next(iter);
2839 if (item == NULL) {
2840 if (PyErr_Occurred()) {
2841 Py_DECREF(iter);
2842 return -1;
2843 }
2844 break;
2845 }
2846 if (save(self, item, 0) < 0) {
2847 Py_DECREF(item);
2848 Py_DECREF(iter);
2849 return -1;
2850 }
2851 Py_DECREF(item);
2852 }
2853 Py_DECREF(iter);
2854
2855 /* If the object is already in the memo, this means it is
2856 recursive. In this case, throw away everything we put on the
2857 stack, and fetch the object back from the memo. */
2858 if (PyMemoTable_Get(self->memo, obj)) {
2859 const char pop_mark_op = POP_MARK;
2860
2861 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2862 return -1;
2863 if (memo_get(self, obj) < 0)
2864 return -1;
2865 return 0;
2866 }
2867
2868 if (_Pickler_Write(self, &frozenset_op, 1) < 0)
2869 return -1;
2870 if (memo_put(self, obj) < 0)
2871 return -1;
2872
2873 return 0;
2874}
2875
2876static int
2877fix_imports(PyObject **module_name, PyObject **global_name)
2878{
2879 PyObject *key;
2880 PyObject *item;
2881
2882 key = PyTuple_Pack(2, *module_name, *global_name);
2883 if (key == NULL)
2884 return -1;
2885 item = PyDict_GetItemWithError(name_mapping_3to2, key);
2886 Py_DECREF(key);
2887 if (item) {
2888 PyObject *fixed_module_name;
2889 PyObject *fixed_global_name;
2890
2891 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
2892 PyErr_Format(PyExc_RuntimeError,
2893 "_compat_pickle.REVERSE_NAME_MAPPING values "
2894 "should be 2-tuples, not %.200s",
2895 Py_TYPE(item)->tp_name);
2896 return -1;
2897 }
2898 fixed_module_name = PyTuple_GET_ITEM(item, 0);
2899 fixed_global_name = PyTuple_GET_ITEM(item, 1);
2900 if (!PyUnicode_Check(fixed_module_name) ||
2901 !PyUnicode_Check(fixed_global_name)) {
2902 PyErr_Format(PyExc_RuntimeError,
2903 "_compat_pickle.REVERSE_NAME_MAPPING values "
2904 "should be pairs of str, not (%.200s, %.200s)",
2905 Py_TYPE(fixed_module_name)->tp_name,
2906 Py_TYPE(fixed_global_name)->tp_name);
2907 return -1;
2908 }
2909
2910 Py_CLEAR(*module_name);
2911 Py_CLEAR(*global_name);
2912 Py_INCREF(fixed_module_name);
2913 Py_INCREF(fixed_global_name);
2914 *module_name = fixed_module_name;
2915 *global_name = fixed_global_name;
2916 }
2917 else if (PyErr_Occurred()) {
2918 return -1;
2919 }
2920
2921 item = PyDict_GetItemWithError(import_mapping_3to2, *module_name);
2922 if (item) {
2923 if (!PyUnicode_Check(item)) {
2924 PyErr_Format(PyExc_RuntimeError,
2925 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
2926 "should be strings, not %.200s",
2927 Py_TYPE(item)->tp_name);
2928 return -1;
2929 }
2930 Py_CLEAR(*module_name);
2931 Py_INCREF(item);
2932 *module_name = item;
2933 }
2934 else if (PyErr_Occurred()) {
2935 return -1;
2936 }
2937
2938 return 0;
2939}
2940
2941static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002942save_global(PicklerObject *self, PyObject *obj, PyObject *name)
2943{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002944 PyObject *global_name = NULL;
2945 PyObject *module_name = NULL;
2946 PyObject *module = NULL;
2947 PyObject *cls;
2948 int status = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002949 _Py_IDENTIFIER(__name__);
2950 _Py_IDENTIFIER(__qualname__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002951
2952 const char global_op = GLOBAL;
2953
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002954 if (name) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002955 Py_INCREF(name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002956 global_name = name;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002957 }
2958 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002959 if (self->proto >= 4) {
2960 global_name = _PyObject_GetAttrId(obj, &PyId___qualname__);
2961 if (global_name == NULL) {
2962 if (!PyErr_ExceptionMatches(PyExc_AttributeError))
2963 goto error;
2964 PyErr_Clear();
2965 }
2966 }
2967 if (global_name == NULL) {
2968 global_name = _PyObject_GetAttrId(obj, &PyId___name__);
2969 if (global_name == NULL)
2970 goto error;
2971 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002972 }
2973
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002974 module_name = whichmodule(obj, global_name, self->proto >= 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002975 if (module_name == NULL)
2976 goto error;
2977
2978 /* XXX: Change to use the import C API directly with level=0 to disallow
2979 relative imports.
2980
2981 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
2982 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
2983 custom import functions (IMHO, this would be a nice security
2984 feature). The import C API would need to be extended to support the
2985 extra parameters of __import__ to fix that. */
2986 module = PyImport_Import(module_name);
2987 if (module == NULL) {
2988 PyErr_Format(PicklingError,
2989 "Can't pickle %R: import of module %R failed",
2990 obj, module_name);
2991 goto error;
2992 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002993 cls = getattribute(module, global_name, self->proto >= 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002994 if (cls == NULL) {
2995 PyErr_Format(PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002996 "Can't pickle %R: attribute lookup %S on %S failed",
2997 obj, global_name, module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002998 goto error;
2999 }
3000 if (cls != obj) {
3001 Py_DECREF(cls);
3002 PyErr_Format(PicklingError,
3003 "Can't pickle %R: it's not the same object as %S.%S",
3004 obj, module_name, global_name);
3005 goto error;
3006 }
3007 Py_DECREF(cls);
3008
3009 if (self->proto >= 2) {
3010 /* See whether this is in the extension registry, and if
3011 * so generate an EXT opcode.
3012 */
3013 PyObject *code_obj; /* extension code as Python object */
3014 long code; /* extension code as C value */
3015 char pdata[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003016 Py_ssize_t n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003017
3018 PyTuple_SET_ITEM(two_tuple, 0, module_name);
3019 PyTuple_SET_ITEM(two_tuple, 1, global_name);
3020 code_obj = PyDict_GetItem(extension_registry, two_tuple);
3021 /* The object is not registered in the extension registry.
3022 This is the most likely code path. */
3023 if (code_obj == NULL)
3024 goto gen_global;
3025
3026 /* XXX: pickle.py doesn't check neither the type, nor the range
3027 of the value returned by the extension_registry. It should for
3028 consistency. */
3029
3030 /* Verify code_obj has the right type and value. */
3031 if (!PyLong_Check(code_obj)) {
3032 PyErr_Format(PicklingError,
3033 "Can't pickle %R: extension code %R isn't an integer",
3034 obj, code_obj);
3035 goto error;
3036 }
3037 code = PyLong_AS_LONG(code_obj);
3038 if (code <= 0 || code > 0x7fffffffL) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003039 if (!PyErr_Occurred())
3040 PyErr_Format(PicklingError,
3041 "Can't pickle %R: extension code %ld is out of range",
3042 obj, code);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003043 goto error;
3044 }
3045
3046 /* Generate an EXT opcode. */
3047 if (code <= 0xff) {
3048 pdata[0] = EXT1;
3049 pdata[1] = (unsigned char)code;
3050 n = 2;
3051 }
3052 else if (code <= 0xffff) {
3053 pdata[0] = EXT2;
3054 pdata[1] = (unsigned char)(code & 0xff);
3055 pdata[2] = (unsigned char)((code >> 8) & 0xff);
3056 n = 3;
3057 }
3058 else {
3059 pdata[0] = EXT4;
3060 pdata[1] = (unsigned char)(code & 0xff);
3061 pdata[2] = (unsigned char)((code >> 8) & 0xff);
3062 pdata[3] = (unsigned char)((code >> 16) & 0xff);
3063 pdata[4] = (unsigned char)((code >> 24) & 0xff);
3064 n = 5;
3065 }
3066
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003067 if (_Pickler_Write(self, pdata, n) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003068 goto error;
3069 }
3070 else {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003071 gen_global:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003072 if (self->proto >= 4) {
3073 const char stack_global_op = STACK_GLOBAL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003074
Christian Heimese8b1ba12013-11-23 21:13:39 +01003075 if (save(self, module_name, 0) < 0)
3076 goto error;
3077 if (save(self, global_name, 0) < 0)
3078 goto error;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003079
3080 if (_Pickler_Write(self, &stack_global_op, 1) < 0)
3081 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003082 }
3083 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003084 /* Generate a normal global opcode if we are using a pickle
3085 protocol < 4, or if the object is not registered in the
3086 extension registry. */
3087 PyObject *encoded;
3088 PyObject *(*unicode_encoder)(PyObject *);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003089
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003090 if (_Pickler_Write(self, &global_op, 1) < 0)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003091 goto error;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003092
3093 /* For protocol < 3 and if the user didn't request against doing
3094 so, we convert module names to the old 2.x module names. */
3095 if (self->proto < 3 && self->fix_imports) {
3096 if (fix_imports(&module_name, &global_name) < 0) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003097 goto error;
3098 }
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003099 }
3100
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003101 /* Since Python 3.0 now supports non-ASCII identifiers, we encode
3102 both the module name and the global name using UTF-8. We do so
3103 only when we are using the pickle protocol newer than version
3104 3. This is to ensure compatibility with older Unpickler running
3105 on Python 2.x. */
3106 if (self->proto == 3) {
3107 unicode_encoder = PyUnicode_AsUTF8String;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003108 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003109 else {
3110 unicode_encoder = PyUnicode_AsASCIIString;
3111 }
3112 encoded = unicode_encoder(module_name);
3113 if (encoded == NULL) {
3114 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3115 PyErr_Format(PicklingError,
3116 "can't pickle module identifier '%S' using "
3117 "pickle protocol %i",
3118 module_name, self->proto);
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003119 goto error;
3120 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003121 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3122 PyBytes_GET_SIZE(encoded)) < 0) {
3123 Py_DECREF(encoded);
3124 goto error;
3125 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003126 Py_DECREF(encoded);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003127 if(_Pickler_Write(self, "\n", 1) < 0)
3128 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003129
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003130 /* Save the name of the module. */
3131 encoded = unicode_encoder(global_name);
3132 if (encoded == NULL) {
3133 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3134 PyErr_Format(PicklingError,
3135 "can't pickle global identifier '%S' using "
3136 "pickle protocol %i",
3137 global_name, self->proto);
3138 goto error;
3139 }
3140 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3141 PyBytes_GET_SIZE(encoded)) < 0) {
3142 Py_DECREF(encoded);
3143 goto error;
3144 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003145 Py_DECREF(encoded);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003146 if (_Pickler_Write(self, "\n", 1) < 0)
3147 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003148 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003149 /* Memoize the object. */
3150 if (memo_put(self, obj) < 0)
3151 goto error;
3152 }
3153
3154 if (0) {
3155 error:
3156 status = -1;
3157 }
3158 Py_XDECREF(module_name);
3159 Py_XDECREF(global_name);
3160 Py_XDECREF(module);
3161
3162 return status;
3163}
3164
3165static int
3166save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
3167{
3168 PyObject *pid = NULL;
3169 int status = 0;
3170
3171 const char persid_op = PERSID;
3172 const char binpersid_op = BINPERSID;
3173
3174 Py_INCREF(obj);
Alexandre Vassalottib4a04fb2013-11-25 13:25:12 -08003175 pid = _Pickler_FastCall(self, func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003176 if (pid == NULL)
3177 return -1;
3178
3179 if (pid != Py_None) {
3180 if (self->bin) {
3181 if (save(self, pid, 1) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003182 _Pickler_Write(self, &binpersid_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003183 goto error;
3184 }
3185 else {
3186 PyObject *pid_str = NULL;
3187 char *pid_ascii_bytes;
3188 Py_ssize_t size;
3189
3190 pid_str = PyObject_Str(pid);
3191 if (pid_str == NULL)
3192 goto error;
3193
3194 /* XXX: Should it check whether the persistent id only contains
3195 ASCII characters? And what if the pid contains embedded
3196 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00003197 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003198 Py_DECREF(pid_str);
3199 if (pid_ascii_bytes == NULL)
3200 goto error;
3201
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003202 if (_Pickler_Write(self, &persid_op, 1) < 0 ||
3203 _Pickler_Write(self, pid_ascii_bytes, size) < 0 ||
3204 _Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003205 goto error;
3206 }
3207 status = 1;
3208 }
3209
3210 if (0) {
3211 error:
3212 status = -1;
3213 }
3214 Py_XDECREF(pid);
3215
3216 return status;
3217}
3218
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003219static PyObject *
3220get_class(PyObject *obj)
3221{
3222 PyObject *cls;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003223 _Py_IDENTIFIER(__class__);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003224
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003225 cls = _PyObject_GetAttrId(obj, &PyId___class__);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003226 if (cls == NULL) {
3227 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
3228 PyErr_Clear();
3229 cls = (PyObject *) Py_TYPE(obj);
3230 Py_INCREF(cls);
3231 }
3232 }
3233 return cls;
3234}
3235
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003236/* We're saving obj, and args is the 2-thru-5 tuple returned by the
3237 * appropriate __reduce__ method for obj.
3238 */
3239static int
3240save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
3241{
3242 PyObject *callable;
3243 PyObject *argtup;
3244 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003245 PyObject *listitems = Py_None;
3246 PyObject *dictitems = Py_None;
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00003247 Py_ssize_t size;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003248 int use_newobj = 0, use_newobj_ex = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003249
3250 const char reduce_op = REDUCE;
3251 const char build_op = BUILD;
3252 const char newobj_op = NEWOBJ;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003253 const char newobj_ex_op = NEWOBJ_EX;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003254
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00003255 size = PyTuple_Size(args);
3256 if (size < 2 || size > 5) {
3257 PyErr_SetString(PicklingError, "tuple returned by "
3258 "__reduce__ must contain 2 through 5 elements");
3259 return -1;
3260 }
3261
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003262 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
3263 &callable, &argtup, &state, &listitems, &dictitems))
3264 return -1;
3265
3266 if (!PyCallable_Check(callable)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003267 PyErr_SetString(PicklingError, "first item of the tuple "
3268 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003269 return -1;
3270 }
3271 if (!PyTuple_Check(argtup)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003272 PyErr_SetString(PicklingError, "second item of the tuple "
3273 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003274 return -1;
3275 }
3276
3277 if (state == Py_None)
3278 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003279
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003280 if (listitems == Py_None)
3281 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003282 else if (!PyIter_Check(listitems)) {
Alexandre Vassalotti00d83f22013-04-14 01:28:01 -07003283 PyErr_Format(PicklingError, "fourth element of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003284 "returned by __reduce__ must be an iterator, not %s",
3285 Py_TYPE(listitems)->tp_name);
3286 return -1;
3287 }
3288
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003289 if (dictitems == Py_None)
3290 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003291 else if (!PyIter_Check(dictitems)) {
Alexandre Vassalotti00d83f22013-04-14 01:28:01 -07003292 PyErr_Format(PicklingError, "fifth element of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003293 "returned by __reduce__ must be an iterator, not %s",
3294 Py_TYPE(dictitems)->tp_name);
3295 return -1;
3296 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003297
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003298 if (self->proto >= 2) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003299 PyObject *name;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003300 _Py_IDENTIFIER(__name__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003301
Victor Stinner804e05e2013-11-14 01:26:17 +01003302 name = _PyObject_GetAttrId(callable, &PyId___name__);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003303 if (name == NULL) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003304 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003305 return -1;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003306 }
3307 PyErr_Clear();
3308 }
3309 else if (self->proto >= 4) {
3310 _Py_IDENTIFIER(__newobj_ex__);
3311 use_newobj_ex = PyUnicode_Check(name) &&
3312 PyUnicode_Compare(
3313 name, _PyUnicode_FromId(&PyId___newobj_ex__)) == 0;
3314 Py_DECREF(name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003315 }
3316 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003317 _Py_IDENTIFIER(__newobj__);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003318 use_newobj = PyUnicode_Check(name) &&
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003319 PyUnicode_Compare(
3320 name, _PyUnicode_FromId(&PyId___newobj__)) == 0;
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003321 Py_DECREF(name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003322 }
3323 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003324
3325 if (use_newobj_ex) {
3326 PyObject *cls;
3327 PyObject *args;
3328 PyObject *kwargs;
3329
3330 if (Py_SIZE(argtup) != 3) {
3331 PyErr_Format(PicklingError,
3332 "length of the NEWOBJ_EX argument tuple must be "
3333 "exactly 3, not %zd", Py_SIZE(argtup));
3334 return -1;
3335 }
3336
3337 cls = PyTuple_GET_ITEM(argtup, 0);
3338 if (!PyType_Check(cls)) {
3339 PyErr_Format(PicklingError,
3340 "first item from NEWOBJ_EX argument tuple must "
3341 "be a class, not %.200s", Py_TYPE(cls)->tp_name);
3342 return -1;
3343 }
3344 args = PyTuple_GET_ITEM(argtup, 1);
3345 if (!PyTuple_Check(args)) {
3346 PyErr_Format(PicklingError,
3347 "second item from NEWOBJ_EX argument tuple must "
3348 "be a tuple, not %.200s", Py_TYPE(args)->tp_name);
3349 return -1;
3350 }
3351 kwargs = PyTuple_GET_ITEM(argtup, 2);
3352 if (!PyDict_Check(kwargs)) {
3353 PyErr_Format(PicklingError,
3354 "third item from NEWOBJ_EX argument tuple must "
3355 "be a dict, not %.200s", Py_TYPE(kwargs)->tp_name);
3356 return -1;
3357 }
3358
3359 if (save(self, cls, 0) < 0 ||
3360 save(self, args, 0) < 0 ||
3361 save(self, kwargs, 0) < 0 ||
3362 _Pickler_Write(self, &newobj_ex_op, 1) < 0) {
3363 return -1;
3364 }
3365 }
3366 else if (use_newobj) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003367 PyObject *cls;
3368 PyObject *newargtup;
3369 PyObject *obj_class;
3370 int p;
3371
3372 /* Sanity checks. */
3373 if (Py_SIZE(argtup) < 1) {
3374 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
3375 return -1;
3376 }
3377
3378 cls = PyTuple_GET_ITEM(argtup, 0);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003379 if (!PyType_Check(cls)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003380 PyErr_SetString(PicklingError, "args[0] from "
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003381 "__newobj__ args is not a type");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003382 return -1;
3383 }
3384
3385 if (obj != NULL) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003386 obj_class = get_class(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003387 p = obj_class != cls; /* true iff a problem */
3388 Py_DECREF(obj_class);
3389 if (p) {
3390 PyErr_SetString(PicklingError, "args[0] from "
3391 "__newobj__ args has the wrong class");
3392 return -1;
3393 }
3394 }
3395 /* XXX: These calls save() are prone to infinite recursion. Imagine
3396 what happen if the value returned by the __reduce__() method of
3397 some extension type contains another object of the same type. Ouch!
3398
3399 Here is a quick example, that I ran into, to illustrate what I
3400 mean:
3401
3402 >>> import pickle, copyreg
3403 >>> copyreg.dispatch_table.pop(complex)
3404 >>> pickle.dumps(1+2j)
3405 Traceback (most recent call last):
3406 ...
3407 RuntimeError: maximum recursion depth exceeded
3408
3409 Removing the complex class from copyreg.dispatch_table made the
3410 __reduce_ex__() method emit another complex object:
3411
3412 >>> (1+1j).__reduce_ex__(2)
3413 (<function __newobj__ at 0xb7b71c3c>,
3414 (<class 'complex'>, (1+1j)), None, None, None)
3415
3416 Thus when save() was called on newargstup (the 2nd item) recursion
3417 ensued. Of course, the bug was in the complex class which had a
3418 broken __getnewargs__() that emitted another complex object. But,
3419 the point, here, is it is quite easy to end up with a broken reduce
3420 function. */
3421
3422 /* Save the class and its __new__ arguments. */
3423 if (save(self, cls, 0) < 0)
3424 return -1;
3425
3426 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
3427 if (newargtup == NULL)
3428 return -1;
3429
3430 p = save(self, newargtup, 0);
3431 Py_DECREF(newargtup);
3432 if (p < 0)
3433 return -1;
3434
3435 /* Add NEWOBJ opcode. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003436 if (_Pickler_Write(self, &newobj_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003437 return -1;
3438 }
3439 else { /* Not using NEWOBJ. */
3440 if (save(self, callable, 0) < 0 ||
3441 save(self, argtup, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003442 _Pickler_Write(self, &reduce_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003443 return -1;
3444 }
3445
3446 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
3447 the caller do not want to memoize the object. Not particularly useful,
3448 but that is to mimic the behavior save_reduce() in pickle.py when
3449 obj is None. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003450 if (obj != NULL) {
3451 /* If the object is already in the memo, this means it is
3452 recursive. In this case, throw away everything we put on the
3453 stack, and fetch the object back from the memo. */
3454 if (PyMemoTable_Get(self->memo, obj)) {
3455 const char pop_op = POP;
3456
3457 if (_Pickler_Write(self, &pop_op, 1) < 0)
3458 return -1;
3459 if (memo_get(self, obj) < 0)
3460 return -1;
3461
3462 return 0;
3463 }
3464 else if (memo_put(self, obj) < 0)
3465 return -1;
3466 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003467
3468 if (listitems && batch_list(self, listitems) < 0)
3469 return -1;
3470
3471 if (dictitems && batch_dict(self, dictitems) < 0)
3472 return -1;
3473
3474 if (state) {
Victor Stinner121aab42011-09-29 23:40:53 +02003475 if (save(self, state, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003476 _Pickler_Write(self, &build_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003477 return -1;
3478 }
3479
3480 return 0;
3481}
3482
3483static int
3484save(PicklerObject *self, PyObject *obj, int pers_save)
3485{
3486 PyTypeObject *type;
3487 PyObject *reduce_func = NULL;
3488 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003489 int status = 0;
3490
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08003491 if (_Pickler_OpcodeBoundary(self) < 0)
3492 return -1;
3493
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003494 if (Py_EnterRecursiveCall(" while pickling an object"))
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003495 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003496
3497 /* The extra pers_save argument is necessary to avoid calling save_pers()
3498 on its returned object. */
3499 if (!pers_save && self->pers_func) {
3500 /* save_pers() returns:
3501 -1 to signal an error;
3502 0 if it did nothing successfully;
3503 1 if a persistent id was saved.
3504 */
3505 if ((status = save_pers(self, obj, self->pers_func)) != 0)
3506 goto done;
3507 }
3508
3509 type = Py_TYPE(obj);
3510
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003511 /* The old cPickle had an optimization that used switch-case statement
3512 dispatching on the first letter of the type name. This has was removed
3513 since benchmarks shown that this optimization was actually slowing
3514 things down. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003515
3516 /* Atom types; these aren't memoized, so don't check the memo. */
3517
3518 if (obj == Py_None) {
3519 status = save_none(self, obj);
3520 goto done;
3521 }
3522 else if (obj == Py_False || obj == Py_True) {
3523 status = save_bool(self, obj);
3524 goto done;
3525 }
3526 else if (type == &PyLong_Type) {
3527 status = save_long(self, obj);
3528 goto done;
3529 }
3530 else if (type == &PyFloat_Type) {
3531 status = save_float(self, obj);
3532 goto done;
3533 }
3534
3535 /* Check the memo to see if it has the object. If so, generate
3536 a GET (or BINGET) opcode, instead of pickling the object
3537 once again. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003538 if (PyMemoTable_Get(self->memo, obj)) {
3539 if (memo_get(self, obj) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003540 goto error;
3541 goto done;
3542 }
3543
3544 if (type == &PyBytes_Type) {
3545 status = save_bytes(self, obj);
3546 goto done;
3547 }
3548 else if (type == &PyUnicode_Type) {
3549 status = save_unicode(self, obj);
3550 goto done;
3551 }
3552 else if (type == &PyDict_Type) {
3553 status = save_dict(self, obj);
3554 goto done;
3555 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003556 else if (type == &PySet_Type) {
3557 status = save_set(self, obj);
3558 goto done;
3559 }
3560 else if (type == &PyFrozenSet_Type) {
3561 status = save_frozenset(self, obj);
3562 goto done;
3563 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003564 else if (type == &PyList_Type) {
3565 status = save_list(self, obj);
3566 goto done;
3567 }
3568 else if (type == &PyTuple_Type) {
3569 status = save_tuple(self, obj);
3570 goto done;
3571 }
3572 else if (type == &PyType_Type) {
3573 status = save_global(self, obj, NULL);
3574 goto done;
3575 }
3576 else if (type == &PyFunction_Type) {
3577 status = save_global(self, obj, NULL);
Alexandre Vassalottifc912852013-11-24 03:07:35 -08003578 goto done;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003579 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003580
3581 /* XXX: This part needs some unit tests. */
3582
3583 /* Get a reduction callable, and call it. This may come from
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003584 * self.dispatch_table, copyreg.dispatch_table, the object's
3585 * __reduce_ex__ method, or the object's __reduce__ method.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003586 */
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003587 if (self->dispatch_table == NULL) {
3588 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
3589 /* PyDict_GetItem() unlike PyObject_GetItem() and
3590 PyObject_GetAttr() returns a borrowed ref */
3591 Py_XINCREF(reduce_func);
3592 } else {
3593 reduce_func = PyObject_GetItem(self->dispatch_table, (PyObject *)type);
3594 if (reduce_func == NULL) {
3595 if (PyErr_ExceptionMatches(PyExc_KeyError))
3596 PyErr_Clear();
3597 else
3598 goto error;
3599 }
3600 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003601 if (reduce_func != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003602 Py_INCREF(obj);
Alexandre Vassalottib4a04fb2013-11-25 13:25:12 -08003603 reduce_value = _Pickler_FastCall(self, reduce_func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003604 }
Antoine Pitrouffd41d92011-10-04 09:23:04 +02003605 else if (PyType_IsSubtype(type, &PyType_Type)) {
3606 status = save_global(self, obj, NULL);
3607 goto done;
3608 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003609 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003610 _Py_IDENTIFIER(__reduce__);
3611 _Py_IDENTIFIER(__reduce_ex__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003612
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003613
3614 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
3615 automatically defined as __reduce__. While this is convenient, this
3616 make it impossible to know which method was actually called. Of
3617 course, this is not a big deal. But still, it would be nice to let
3618 the user know which method was called when something go
3619 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
3620 don't actually have to check for a __reduce__ method. */
3621
3622 /* Check for a __reduce_ex__ method. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003623 reduce_func = _PyObject_GetAttrId(obj, &PyId___reduce_ex__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003624 if (reduce_func != NULL) {
3625 PyObject *proto;
3626 proto = PyLong_FromLong(self->proto);
3627 if (proto != NULL) {
Alexandre Vassalottib4a04fb2013-11-25 13:25:12 -08003628 reduce_value = _Pickler_FastCall(self, reduce_func, proto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003629 }
3630 }
3631 else {
3632 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3633 PyErr_Clear();
3634 else
3635 goto error;
3636 /* Check for a __reduce__ method. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003637 reduce_func = _PyObject_GetAttrId(obj, &PyId___reduce__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003638 if (reduce_func != NULL) {
3639 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
3640 }
3641 else {
3642 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
3643 type->tp_name, obj);
3644 goto error;
3645 }
3646 }
3647 }
3648
3649 if (reduce_value == NULL)
3650 goto error;
3651
3652 if (PyUnicode_Check(reduce_value)) {
3653 status = save_global(self, obj, reduce_value);
3654 goto done;
3655 }
3656
3657 if (!PyTuple_Check(reduce_value)) {
3658 PyErr_SetString(PicklingError,
3659 "__reduce__ must return a string or tuple");
3660 goto error;
3661 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003662
3663 status = save_reduce(self, reduce_value, obj);
3664
3665 if (0) {
3666 error:
3667 status = -1;
3668 }
3669 done:
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08003670
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003671 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003672 Py_XDECREF(reduce_func);
3673 Py_XDECREF(reduce_value);
3674
3675 return status;
3676}
3677
3678static int
3679dump(PicklerObject *self, PyObject *obj)
3680{
3681 const char stop_op = STOP;
3682
3683 if (self->proto >= 2) {
3684 char header[2];
3685
3686 header[0] = PROTO;
3687 assert(self->proto >= 0 && self->proto < 256);
3688 header[1] = (unsigned char)self->proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003689 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003690 return -1;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003691 if (self->proto >= 4)
3692 self->framing = 1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003693 }
3694
3695 if (save(self, obj, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003696 _Pickler_Write(self, &stop_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003697 return -1;
3698
3699 return 0;
3700}
3701
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08003702/*[clinic]
3703
3704_pickle.Pickler.clear_memo
3705
3706 self: PicklerObject
3707
3708Clears the pickler's "memo".
3709
3710The memo is the data structure that remembers which objects the
3711pickler has already seen, so that shared or recursive objects are
3712pickled by reference and not by value. This method is useful when
3713re-using picklers.
3714[clinic]*/
3715
3716PyDoc_STRVAR(_pickle_Pickler_clear_memo__doc__,
3717"clear_memo()\n"
3718"Clears the pickler\'s \"memo\".\n"
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003719"\n"
3720"The memo is the data structure that remembers which objects the\n"
3721"pickler has already seen, so that shared or recursive objects are\n"
3722"pickled by reference and not by value. This method is useful when\n"
3723"re-using picklers.");
3724
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08003725#define _PICKLE_PICKLER_CLEAR_MEMO_METHODDEF \
3726 {"clear_memo", (PyCFunction)_pickle_Pickler_clear_memo, METH_NOARGS, _pickle_Pickler_clear_memo__doc__},
3727
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003728static PyObject *
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08003729_pickle_Pickler_clear_memo(PicklerObject *self)
3730/*[clinic checksum: 9c32be7e7a17ff82a81aae409d0d4f469033a5b2]*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003731{
3732 if (self->memo)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003733 PyMemoTable_Clear(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003734
3735 Py_RETURN_NONE;
3736}
3737
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08003738/*[clinic]
3739
3740_pickle.Pickler.dump
3741
3742 self: PicklerObject
3743 obj: object
3744 /
3745
3746Write a pickled representation of the given object to the open file.
3747[clinic]*/
3748
3749PyDoc_STRVAR(_pickle_Pickler_dump__doc__,
3750"dump(obj)\n"
3751"Write a pickled representation of the given object to the open file.");
3752
3753#define _PICKLE_PICKLER_DUMP_METHODDEF \
3754 {"dump", (PyCFunction)_pickle_Pickler_dump, METH_O, _pickle_Pickler_dump__doc__},
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003755
3756static PyObject *
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08003757_pickle_Pickler_dump(PicklerObject *self, PyObject *obj)
3758/*[clinic checksum: b72a69ec98737fabf66dae7c5a3210178bdbd3e6]*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003759{
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00003760 /* Check whether the Pickler was initialized correctly (issue3664).
3761 Developers often forget to call __init__() in their subclasses, which
3762 would trigger a segfault without this check. */
3763 if (self->write == NULL) {
Victor Stinner121aab42011-09-29 23:40:53 +02003764 PyErr_Format(PicklingError,
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00003765 "Pickler.__init__() was not called by %s.__init__()",
3766 Py_TYPE(self)->tp_name);
3767 return NULL;
3768 }
3769
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003770 if (_Pickler_ClearBuffer(self) < 0)
3771 return NULL;
3772
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003773 if (dump(self, obj) < 0)
3774 return NULL;
3775
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003776 if (_Pickler_FlushToFile(self) < 0)
3777 return NULL;
3778
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003779 Py_RETURN_NONE;
3780}
3781
3782static struct PyMethodDef Pickler_methods[] = {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08003783 _PICKLE_PICKLER_DUMP_METHODDEF
3784 _PICKLE_PICKLER_CLEAR_MEMO_METHODDEF
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003785 {NULL, NULL} /* sentinel */
3786};
3787
3788static void
3789Pickler_dealloc(PicklerObject *self)
3790{
3791 PyObject_GC_UnTrack(self);
3792
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003793 Py_XDECREF(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003794 Py_XDECREF(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003795 Py_XDECREF(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003796 Py_XDECREF(self->dispatch_table);
Alexandre Vassalottib4a04fb2013-11-25 13:25:12 -08003797 Py_XDECREF(self->arg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003798 Py_XDECREF(self->fast_memo);
3799
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003800 PyMemoTable_Del(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003801
3802 Py_TYPE(self)->tp_free((PyObject *)self);
3803}
3804
3805static int
3806Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
3807{
3808 Py_VISIT(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003809 Py_VISIT(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003810 Py_VISIT(self->dispatch_table);
Alexandre Vassalottib4a04fb2013-11-25 13:25:12 -08003811 Py_VISIT(self->arg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003812 Py_VISIT(self->fast_memo);
3813 return 0;
3814}
3815
3816static int
3817Pickler_clear(PicklerObject *self)
3818{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003819 Py_CLEAR(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003820 Py_CLEAR(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003821 Py_CLEAR(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003822 Py_CLEAR(self->dispatch_table);
Alexandre Vassalottib4a04fb2013-11-25 13:25:12 -08003823 Py_CLEAR(self->arg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003824 Py_CLEAR(self->fast_memo);
3825
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003826 if (self->memo != NULL) {
3827 PyMemoTable *memo = self->memo;
3828 self->memo = NULL;
3829 PyMemoTable_Del(memo);
3830 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003831 return 0;
3832}
3833
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003834
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08003835/*[clinic]
3836
3837_pickle.Pickler.__init__
3838
3839 self: PicklerObject
3840 file: object
3841 protocol: object = NULL
3842 fix_imports: bool = True
3843
3844This takes a binary file for writing a pickle data stream.
3845
3846The optional protocol argument tells the pickler to use the
3847given protocol; supported protocols are 0, 1, 2, 3 and 4. The
3848default protocol is 3; a backward-incompatible protocol designed for
3849Python 3.
3850
3851Specifying a negative protocol version selects the highest
3852protocol version supported. The higher the protocol used, the
3853more recent the version of Python needed to read the pickle
3854produced.
3855
3856The file argument must have a write() method that accepts a single
3857bytes argument. It can thus be a file object opened for binary
3858writing, a io.BytesIO instance, or any other custom object that
3859meets this interface.
3860
3861If fix_imports is True and protocol is less than 3, pickle will try to
3862map the new Python 3 names to the old module names used in Python 2,
3863so that the pickle data stream is readable with Python 2.
3864[clinic]*/
3865
3866PyDoc_STRVAR(_pickle_Pickler___init____doc__,
3867"__init__(file, protocol=None, fix_imports=True)\n"
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003868"This takes a binary file for writing a pickle data stream.\n"
3869"\n"
3870"The optional protocol argument tells the pickler to use the\n"
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003871"given protocol; supported protocols are 0, 1, 2, 3 and 4. The\n"
3872"default protocol is 3; a backward-incompatible protocol designed for\n"
3873"Python 3.\n"
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003874"\n"
3875"Specifying a negative protocol version selects the highest\n"
3876"protocol version supported. The higher the protocol used, the\n"
3877"more recent the version of Python needed to read the pickle\n"
3878"produced.\n"
3879"\n"
3880"The file argument must have a write() method that accepts a single\n"
3881"bytes argument. It can thus be a file object opened for binary\n"
3882"writing, a io.BytesIO instance, or any other custom object that\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003883"meets this interface.\n"
3884"\n"
3885"If fix_imports is True and protocol is less than 3, pickle will try to\n"
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003886"map the new Python 3 names to the old module names used in Python 2,\n"
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08003887"so that the pickle data stream is readable with Python 2.");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003888
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08003889#define _PICKLE_PICKLER___INIT___METHODDEF \
3890 {"__init__", (PyCFunction)_pickle_Pickler___init__, METH_VARARGS|METH_KEYWORDS, _pickle_Pickler___init____doc__},
3891
3892static PyObject *
3893_pickle_Pickler___init___impl(PicklerObject *self, PyObject *file, PyObject *protocol, int fix_imports);
3894
3895static PyObject *
3896_pickle_Pickler___init__(PyObject *self, PyObject *args, PyObject *kwargs)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003897{
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08003898 PyObject *return_value = NULL;
3899 static char *_keywords[] = {"file", "protocol", "fix_imports", NULL};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003900 PyObject *file;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08003901 PyObject *protocol = NULL;
3902 int fix_imports = 1;
3903
3904 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
3905 "O|Op:__init__", _keywords,
3906 &file, &protocol, &fix_imports))
3907 goto exit;
3908 return_value = _pickle_Pickler___init___impl((PicklerObject *)self, file, protocol, fix_imports);
3909
3910exit:
3911 return return_value;
3912}
3913
3914static PyObject *
3915_pickle_Pickler___init___impl(PicklerObject *self, PyObject *file, PyObject *protocol, int fix_imports)
3916/*[clinic checksum: c99ff417bd703a74affc4b708167e56e135e8969]*/
3917{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02003918 _Py_IDENTIFIER(persistent_id);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003919 _Py_IDENTIFIER(dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003920
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003921 /* In case of multiple __init__() calls, clear previous content. */
3922 if (self->write != NULL)
3923 (void)Pickler_clear(self);
3924
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08003925 if (_Pickler_SetProtocol(self, protocol, fix_imports) < 0)
3926 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003927
3928 if (_Pickler_SetOutputStream(self, file) < 0)
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08003929 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003930
3931 /* memo and output_buffer may have already been created in _Pickler_New */
3932 if (self->memo == NULL) {
3933 self->memo = PyMemoTable_New();
3934 if (self->memo == NULL)
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08003935 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003936 }
3937 self->output_len = 0;
3938 if (self->output_buffer == NULL) {
3939 self->max_output_len = WRITE_BUF_SIZE;
3940 self->output_buffer = PyBytes_FromStringAndSize(NULL,
3941 self->max_output_len);
3942 if (self->output_buffer == NULL)
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08003943 return NULL;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003944 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003945
Alexandre Vassalottib4a04fb2013-11-25 13:25:12 -08003946 self->arg = NULL;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003947 self->fast = 0;
3948 self->fast_nesting = 0;
3949 self->fast_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003950 self->pers_func = NULL;
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02003951 if (_PyObject_HasAttrId((PyObject *)self, &PyId_persistent_id)) {
3952 self->pers_func = _PyObject_GetAttrId((PyObject *)self,
3953 &PyId_persistent_id);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003954 if (self->pers_func == NULL)
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08003955 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003956 }
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003957 self->dispatch_table = NULL;
3958 if (_PyObject_HasAttrId((PyObject *)self, &PyId_dispatch_table)) {
3959 self->dispatch_table = _PyObject_GetAttrId((PyObject *)self,
3960 &PyId_dispatch_table);
3961 if (self->dispatch_table == NULL)
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08003962 return NULL;
3963 }
3964 return Py_None;
3965}
3966
3967/* XXX Slight hack to slot a Clinic generated signature in tp_init. */
3968static int
3969Pickler_init(PyObject *self, PyObject *args, PyObject *kwargs)
3970{
3971 if (_pickle_Pickler___init__(self, args, kwargs) == NULL) {
3972 return -1;
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003973 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003974 return 0;
3975}
3976
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003977/* Define a proxy object for the Pickler's internal memo object. This is to
3978 * avoid breaking code like:
3979 * pickler.memo.clear()
3980 * and
3981 * pickler.memo = saved_memo
3982 * Is this a good idea? Not really, but we don't want to break code that uses
3983 * it. Note that we don't implement the entire mapping API here. This is
3984 * intentional, as these should be treated as black-box implementation details.
3985 */
3986
3987typedef struct {
3988 PyObject_HEAD
3989 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
3990} PicklerMemoProxyObject;
3991
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08003992/*[clinic]
3993_pickle.PicklerMemoProxy.clear
3994
3995 self: PicklerMemoProxyObject
3996
3997Remove all items from memo.
3998[clinic]*/
3999
4000PyDoc_STRVAR(_pickle_PicklerMemoProxy_clear__doc__,
4001"clear()\n"
4002"Remove all items from memo.");
4003
4004#define _PICKLE_PICKLERMEMOPROXY_CLEAR_METHODDEF \
4005 {"clear", (PyCFunction)_pickle_PicklerMemoProxy_clear, METH_NOARGS, _pickle_PicklerMemoProxy_clear__doc__},
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004006
4007static PyObject *
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004008_pickle_PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
4009/*[clinic checksum: 507f13938721992e175a3e58b5ad02620045a1cc]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004010{
4011 if (self->pickler->memo)
4012 PyMemoTable_Clear(self->pickler->memo);
4013 Py_RETURN_NONE;
4014}
4015
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004016/*[clinic]
4017_pickle.PicklerMemoProxy.copy
4018
4019 self: PicklerMemoProxyObject
4020
4021Copy the memo to a new object.
4022[clinic]*/
4023
4024PyDoc_STRVAR(_pickle_PicklerMemoProxy_copy__doc__,
4025"copy()\n"
4026"Copy the memo to a new object.");
4027
4028#define _PICKLE_PICKLERMEMOPROXY_COPY_METHODDEF \
4029 {"copy", (PyCFunction)_pickle_PicklerMemoProxy_copy, METH_NOARGS, _pickle_PicklerMemoProxy_copy__doc__},
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004030
4031static PyObject *
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004032_pickle_PicklerMemoProxy_copy(PicklerMemoProxyObject *self)
4033/*[clinic checksum: 73a5117ab354290ebdbe07bd0bf7232d0936a69d]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004034{
4035 Py_ssize_t i;
4036 PyMemoTable *memo;
4037 PyObject *new_memo = PyDict_New();
4038 if (new_memo == NULL)
4039 return NULL;
4040
4041 memo = self->pickler->memo;
4042 for (i = 0; i < memo->mt_allocated; ++i) {
4043 PyMemoEntry entry = memo->mt_table[i];
4044 if (entry.me_key != NULL) {
4045 int status;
4046 PyObject *key, *value;
4047
4048 key = PyLong_FromVoidPtr(entry.me_key);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004049 value = Py_BuildValue("nO", entry.me_value, entry.me_key);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004050
4051 if (key == NULL || value == NULL) {
4052 Py_XDECREF(key);
4053 Py_XDECREF(value);
4054 goto error;
4055 }
4056 status = PyDict_SetItem(new_memo, key, value);
4057 Py_DECREF(key);
4058 Py_DECREF(value);
4059 if (status < 0)
4060 goto error;
4061 }
4062 }
4063 return new_memo;
4064
4065 error:
4066 Py_XDECREF(new_memo);
4067 return NULL;
4068}
4069
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004070/*[clinic]
4071_pickle.PicklerMemoProxy.__reduce__
4072
4073 self: PicklerMemoProxyObject
4074
4075Implement pickle support.
4076[clinic]*/
4077
4078PyDoc_STRVAR(_pickle_PicklerMemoProxy___reduce____doc__,
4079"__reduce__()\n"
4080"Implement pickle support.");
4081
4082#define _PICKLE_PICKLERMEMOPROXY___REDUCE___METHODDEF \
4083 {"__reduce__", (PyCFunction)_pickle_PicklerMemoProxy___reduce__, METH_NOARGS, _pickle_PicklerMemoProxy___reduce____doc__},
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004084
4085static PyObject *
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004086_pickle_PicklerMemoProxy___reduce__(PicklerMemoProxyObject *self)
4087/*[clinic checksum: 40f0bf7a9b161e77130674f0481bda0a0184dcce]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004088{
4089 PyObject *reduce_value, *dict_args;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004090 PyObject *contents = _pickle_PicklerMemoProxy_copy(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004091 if (contents == NULL)
4092 return NULL;
4093
4094 reduce_value = PyTuple_New(2);
4095 if (reduce_value == NULL) {
4096 Py_DECREF(contents);
4097 return NULL;
4098 }
4099 dict_args = PyTuple_New(1);
4100 if (dict_args == NULL) {
4101 Py_DECREF(contents);
4102 Py_DECREF(reduce_value);
4103 return NULL;
4104 }
4105 PyTuple_SET_ITEM(dict_args, 0, contents);
4106 Py_INCREF((PyObject *)&PyDict_Type);
4107 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
4108 PyTuple_SET_ITEM(reduce_value, 1, dict_args);
4109 return reduce_value;
4110}
4111
4112static PyMethodDef picklerproxy_methods[] = {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004113 _PICKLE_PICKLERMEMOPROXY_CLEAR_METHODDEF
4114 _PICKLE_PICKLERMEMOPROXY_COPY_METHODDEF
4115 _PICKLE_PICKLERMEMOPROXY___REDUCE___METHODDEF
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004116 {NULL, NULL} /* sentinel */
4117};
4118
4119static void
4120PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
4121{
4122 PyObject_GC_UnTrack(self);
4123 Py_XDECREF(self->pickler);
4124 PyObject_GC_Del((PyObject *)self);
4125}
4126
4127static int
4128PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
4129 visitproc visit, void *arg)
4130{
4131 Py_VISIT(self->pickler);
4132 return 0;
4133}
4134
4135static int
4136PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
4137{
4138 Py_CLEAR(self->pickler);
4139 return 0;
4140}
4141
4142static PyTypeObject PicklerMemoProxyType = {
4143 PyVarObject_HEAD_INIT(NULL, 0)
4144 "_pickle.PicklerMemoProxy", /*tp_name*/
4145 sizeof(PicklerMemoProxyObject), /*tp_basicsize*/
4146 0,
4147 (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */
4148 0, /* tp_print */
4149 0, /* tp_getattr */
4150 0, /* tp_setattr */
4151 0, /* tp_compare */
4152 0, /* tp_repr */
4153 0, /* tp_as_number */
4154 0, /* tp_as_sequence */
4155 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00004156 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004157 0, /* tp_call */
4158 0, /* tp_str */
4159 PyObject_GenericGetAttr, /* tp_getattro */
4160 PyObject_GenericSetAttr, /* tp_setattro */
4161 0, /* tp_as_buffer */
4162 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4163 0, /* tp_doc */
4164 (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */
4165 (inquiry)PicklerMemoProxy_clear, /* tp_clear */
4166 0, /* tp_richcompare */
4167 0, /* tp_weaklistoffset */
4168 0, /* tp_iter */
4169 0, /* tp_iternext */
4170 picklerproxy_methods, /* tp_methods */
4171};
4172
4173static PyObject *
4174PicklerMemoProxy_New(PicklerObject *pickler)
4175{
4176 PicklerMemoProxyObject *self;
4177
4178 self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
4179 if (self == NULL)
4180 return NULL;
4181 Py_INCREF(pickler);
4182 self->pickler = pickler;
4183 PyObject_GC_Track(self);
4184 return (PyObject *)self;
4185}
4186
4187/*****************************************************************************/
4188
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004189static PyObject *
4190Pickler_get_memo(PicklerObject *self)
4191{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004192 return PicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004193}
4194
4195static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004196Pickler_set_memo(PicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004197{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004198 PyMemoTable *new_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004199
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004200 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004201 PyErr_SetString(PyExc_TypeError,
4202 "attribute deletion is not supported");
4203 return -1;
4204 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004205
4206 if (Py_TYPE(obj) == &PicklerMemoProxyType) {
4207 PicklerObject *pickler =
4208 ((PicklerMemoProxyObject *)obj)->pickler;
4209
4210 new_memo = PyMemoTable_Copy(pickler->memo);
4211 if (new_memo == NULL)
4212 return -1;
4213 }
4214 else if (PyDict_Check(obj)) {
4215 Py_ssize_t i = 0;
4216 PyObject *key, *value;
4217
4218 new_memo = PyMemoTable_New();
4219 if (new_memo == NULL)
4220 return -1;
4221
4222 while (PyDict_Next(obj, &i, &key, &value)) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004223 Py_ssize_t memo_id;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004224 PyObject *memo_obj;
4225
4226 if (!PyTuple_Check(value) || Py_SIZE(value) != 2) {
4227 PyErr_SetString(PyExc_TypeError,
4228 "'memo' values must be 2-item tuples");
4229 goto error;
4230 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004231 memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004232 if (memo_id == -1 && PyErr_Occurred())
4233 goto error;
4234 memo_obj = PyTuple_GET_ITEM(value, 1);
4235 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
4236 goto error;
4237 }
4238 }
4239 else {
4240 PyErr_Format(PyExc_TypeError,
4241 "'memo' attribute must be an PicklerMemoProxy object"
4242 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004243 return -1;
4244 }
4245
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004246 PyMemoTable_Del(self->memo);
4247 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004248
4249 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004250
4251 error:
4252 if (new_memo)
4253 PyMemoTable_Del(new_memo);
4254 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004255}
4256
4257static PyObject *
4258Pickler_get_persid(PicklerObject *self)
4259{
4260 if (self->pers_func == NULL)
4261 PyErr_SetString(PyExc_AttributeError, "persistent_id");
4262 else
4263 Py_INCREF(self->pers_func);
4264 return self->pers_func;
4265}
4266
4267static int
4268Pickler_set_persid(PicklerObject *self, PyObject *value)
4269{
4270 PyObject *tmp;
4271
4272 if (value == NULL) {
4273 PyErr_SetString(PyExc_TypeError,
4274 "attribute deletion is not supported");
4275 return -1;
4276 }
4277 if (!PyCallable_Check(value)) {
4278 PyErr_SetString(PyExc_TypeError,
4279 "persistent_id must be a callable taking one argument");
4280 return -1;
4281 }
4282
4283 tmp = self->pers_func;
4284 Py_INCREF(value);
4285 self->pers_func = value;
4286 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
4287
4288 return 0;
4289}
4290
4291static PyMemberDef Pickler_members[] = {
4292 {"bin", T_INT, offsetof(PicklerObject, bin)},
4293 {"fast", T_INT, offsetof(PicklerObject, fast)},
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004294 {"dispatch_table", T_OBJECT_EX, offsetof(PicklerObject, dispatch_table)},
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004295 {NULL}
4296};
4297
4298static PyGetSetDef Pickler_getsets[] = {
4299 {"memo", (getter)Pickler_get_memo,
4300 (setter)Pickler_set_memo},
4301 {"persistent_id", (getter)Pickler_get_persid,
4302 (setter)Pickler_set_persid},
4303 {NULL}
4304};
4305
4306static PyTypeObject Pickler_Type = {
4307 PyVarObject_HEAD_INIT(NULL, 0)
4308 "_pickle.Pickler" , /*tp_name*/
4309 sizeof(PicklerObject), /*tp_basicsize*/
4310 0, /*tp_itemsize*/
4311 (destructor)Pickler_dealloc, /*tp_dealloc*/
4312 0, /*tp_print*/
4313 0, /*tp_getattr*/
4314 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00004315 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004316 0, /*tp_repr*/
4317 0, /*tp_as_number*/
4318 0, /*tp_as_sequence*/
4319 0, /*tp_as_mapping*/
4320 0, /*tp_hash*/
4321 0, /*tp_call*/
4322 0, /*tp_str*/
4323 0, /*tp_getattro*/
4324 0, /*tp_setattro*/
4325 0, /*tp_as_buffer*/
4326 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004327 _pickle_Pickler___init____doc__, /*tp_doc*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004328 (traverseproc)Pickler_traverse, /*tp_traverse*/
4329 (inquiry)Pickler_clear, /*tp_clear*/
4330 0, /*tp_richcompare*/
4331 0, /*tp_weaklistoffset*/
4332 0, /*tp_iter*/
4333 0, /*tp_iternext*/
4334 Pickler_methods, /*tp_methods*/
4335 Pickler_members, /*tp_members*/
4336 Pickler_getsets, /*tp_getset*/
4337 0, /*tp_base*/
4338 0, /*tp_dict*/
4339 0, /*tp_descr_get*/
4340 0, /*tp_descr_set*/
4341 0, /*tp_dictoffset*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08004342 Pickler_init, /*tp_init*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004343 PyType_GenericAlloc, /*tp_alloc*/
4344 PyType_GenericNew, /*tp_new*/
4345 PyObject_GC_Del, /*tp_free*/
4346 0, /*tp_is_gc*/
4347};
4348
Victor Stinner121aab42011-09-29 23:40:53 +02004349/* Temporary helper for calling self.find_class().
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004350
4351 XXX: It would be nice to able to avoid Python function call overhead, by
4352 using directly the C version of find_class(), when find_class() is not
4353 overridden by a subclass. Although, this could become rather hackish. A
4354 simpler optimization would be to call the C function when self is not a
4355 subclass instance. */
4356static PyObject *
4357find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
4358{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02004359 _Py_IDENTIFIER(find_class);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02004360
4361 return _PyObject_CallMethodId((PyObject *)self, &PyId_find_class, "OO",
4362 module_name, global_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004363}
4364
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004365static Py_ssize_t
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004366marker(UnpicklerObject *self)
4367{
4368 if (self->num_marks < 1) {
4369 PyErr_SetString(UnpicklingError, "could not find MARK");
4370 return -1;
4371 }
4372
4373 return self->marks[--self->num_marks];
4374}
4375
4376static int
4377load_none(UnpicklerObject *self)
4378{
4379 PDATA_APPEND(self->stack, Py_None, -1);
4380 return 0;
4381}
4382
4383static int
4384bad_readline(void)
4385{
4386 PyErr_SetString(UnpicklingError, "pickle data was truncated");
4387 return -1;
4388}
4389
4390static int
4391load_int(UnpicklerObject *self)
4392{
4393 PyObject *value;
4394 char *endptr, *s;
4395 Py_ssize_t len;
4396 long x;
4397
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004398 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004399 return -1;
4400 if (len < 2)
4401 return bad_readline();
4402
4403 errno = 0;
Victor Stinner121aab42011-09-29 23:40:53 +02004404 /* XXX: Should the base argument of strtol() be explicitly set to 10?
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004405 XXX(avassalotti): Should this uses PyOS_strtol()? */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004406 x = strtol(s, &endptr, 0);
4407
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004408 if (errno || (*endptr != '\n' && *endptr != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004409 /* Hm, maybe we've got something long. Let's try reading
Serhiy Storchaka95949422013-08-27 19:40:23 +03004410 * it as a Python int object. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004411 errno = 0;
4412 /* XXX: Same thing about the base here. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004413 value = PyLong_FromString(s, NULL, 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004414 if (value == NULL) {
4415 PyErr_SetString(PyExc_ValueError,
4416 "could not convert string to int");
4417 return -1;
4418 }
4419 }
4420 else {
4421 if (len == 3 && (x == 0 || x == 1)) {
4422 if ((value = PyBool_FromLong(x)) == NULL)
4423 return -1;
4424 }
4425 else {
4426 if ((value = PyLong_FromLong(x)) == NULL)
4427 return -1;
4428 }
4429 }
4430
4431 PDATA_PUSH(self->stack, value, -1);
4432 return 0;
4433}
4434
4435static int
4436load_bool(UnpicklerObject *self, PyObject *boolean)
4437{
4438 assert(boolean == Py_True || boolean == Py_False);
4439 PDATA_APPEND(self->stack, boolean, -1);
4440 return 0;
4441}
4442
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004443/* s contains x bytes of an unsigned little-endian integer. Return its value
4444 * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
4445 */
4446static Py_ssize_t
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004447calc_binsize(char *bytes, int nbytes)
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004448{
4449 unsigned char *s = (unsigned char *)bytes;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004450 int i;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004451 size_t x = 0;
4452
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08004453 for (i = 0; i < nbytes && i < sizeof(size_t); i++) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004454 x |= (size_t) s[i] << (8 * i);
4455 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004456
4457 if (x > PY_SSIZE_T_MAX)
4458 return -1;
4459 else
4460 return (Py_ssize_t) x;
4461}
4462
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004463/* s contains x bytes of a little-endian integer. Return its value as a
4464 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
4465 * int, but when x is 4 it's a signed one. This is an historical source
4466 * of x-platform bugs.
4467 */
4468static long
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004469calc_binint(char *bytes, int nbytes)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004470{
4471 unsigned char *s = (unsigned char *)bytes;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004472 int i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004473 long x = 0;
4474
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004475 for (i = 0; i < nbytes; i++) {
4476 x |= (long)s[i] << (8 * i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004477 }
4478
4479 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
4480 * is signed, so on a box with longs bigger than 4 bytes we need
4481 * to extend a BININT's sign bit to the full width.
4482 */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004483 if (SIZEOF_LONG > 4 && nbytes == 4) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004484 x |= -(x & (1L << 31));
4485 }
4486
4487 return x;
4488}
4489
4490static int
4491load_binintx(UnpicklerObject *self, char *s, int size)
4492{
4493 PyObject *value;
4494 long x;
4495
4496 x = calc_binint(s, size);
4497
4498 if ((value = PyLong_FromLong(x)) == NULL)
4499 return -1;
4500
4501 PDATA_PUSH(self->stack, value, -1);
4502 return 0;
4503}
4504
4505static int
4506load_binint(UnpicklerObject *self)
4507{
4508 char *s;
4509
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004510 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004511 return -1;
4512
4513 return load_binintx(self, s, 4);
4514}
4515
4516static int
4517load_binint1(UnpicklerObject *self)
4518{
4519 char *s;
4520
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004521 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004522 return -1;
4523
4524 return load_binintx(self, s, 1);
4525}
4526
4527static int
4528load_binint2(UnpicklerObject *self)
4529{
4530 char *s;
4531
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004532 if (_Unpickler_Read(self, &s, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004533 return -1;
4534
4535 return load_binintx(self, s, 2);
4536}
4537
4538static int
4539load_long(UnpicklerObject *self)
4540{
4541 PyObject *value;
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00004542 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004543 Py_ssize_t len;
4544
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004545 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004546 return -1;
4547 if (len < 2)
4548 return bad_readline();
4549
Mark Dickinson8dd05142009-01-20 20:43:58 +00004550 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
4551 the 'L' before calling PyLong_FromString. In order to maintain
4552 compatibility with Python 3.0.0, we don't actually *require*
4553 the 'L' to be present. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004554 if (s[len-2] == 'L')
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00004555 s[len-2] = '\0';
Alexandre Vassalottie4bccb72009-01-24 01:47:57 +00004556 /* XXX: Should the base argument explicitly set to 10? */
4557 value = PyLong_FromString(s, NULL, 0);
Mark Dickinson8dd05142009-01-20 20:43:58 +00004558 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004559 return -1;
4560
4561 PDATA_PUSH(self->stack, value, -1);
4562 return 0;
4563}
4564
4565/* 'size' bytes contain the # of bytes of little-endian 256's-complement
4566 * data following.
4567 */
4568static int
4569load_counted_long(UnpicklerObject *self, int size)
4570{
4571 PyObject *value;
4572 char *nbytes;
4573 char *pdata;
4574
4575 assert(size == 1 || size == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004576 if (_Unpickler_Read(self, &nbytes, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004577 return -1;
4578
4579 size = calc_binint(nbytes, size);
4580 if (size < 0) {
4581 /* Corrupt or hostile pickle -- we never write one like this */
4582 PyErr_SetString(UnpicklingError,
4583 "LONG pickle has negative byte count");
4584 return -1;
4585 }
4586
4587 if (size == 0)
4588 value = PyLong_FromLong(0L);
4589 else {
4590 /* Read the raw little-endian bytes and convert. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004591 if (_Unpickler_Read(self, &pdata, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004592 return -1;
4593 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
4594 1 /* little endian */ , 1 /* signed */ );
4595 }
4596 if (value == NULL)
4597 return -1;
4598 PDATA_PUSH(self->stack, value, -1);
4599 return 0;
4600}
4601
4602static int
4603load_float(UnpicklerObject *self)
4604{
4605 PyObject *value;
4606 char *endptr, *s;
4607 Py_ssize_t len;
4608 double d;
4609
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004610 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004611 return -1;
4612 if (len < 2)
4613 return bad_readline();
4614
4615 errno = 0;
Mark Dickinson725bfd82009-05-03 20:33:40 +00004616 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
4617 if (d == -1.0 && PyErr_Occurred())
4618 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004619 if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004620 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
4621 return -1;
4622 }
Mark Dickinson725bfd82009-05-03 20:33:40 +00004623 value = PyFloat_FromDouble(d);
4624 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004625 return -1;
4626
4627 PDATA_PUSH(self->stack, value, -1);
4628 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004629}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004630
4631static int
4632load_binfloat(UnpicklerObject *self)
4633{
4634 PyObject *value;
4635 double x;
4636 char *s;
4637
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004638 if (_Unpickler_Read(self, &s, 8) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004639 return -1;
4640
4641 x = _PyFloat_Unpack8((unsigned char *)s, 0);
4642 if (x == -1.0 && PyErr_Occurred())
4643 return -1;
4644
4645 if ((value = PyFloat_FromDouble(x)) == NULL)
4646 return -1;
4647
4648 PDATA_PUSH(self->stack, value, -1);
4649 return 0;
4650}
4651
4652static int
4653load_string(UnpicklerObject *self)
4654{
4655 PyObject *bytes;
4656 PyObject *str = NULL;
4657 Py_ssize_t len;
4658 char *s, *p;
4659
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004660 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004661 return -1;
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07004662 /* Strip the newline */
4663 len--;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004664 /* Strip outermost quotes */
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07004665 if (len >= 2 && s[0] == s[len - 1] && (s[0] == '\'' || s[0] == '"')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004666 p = s + 1;
4667 len -= 2;
4668 }
4669 else {
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07004670 PyErr_SetString(UnpicklingError,
4671 "the STRING opcode argument must be quoted");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004672 return -1;
4673 }
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07004674 assert(len >= 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004675
4676 /* Use the PyBytes API to decode the string, since that is what is used
4677 to encode, and then coerce the result to Unicode. */
4678 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004679 if (bytes == NULL)
4680 return -1;
4681 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
4682 Py_DECREF(bytes);
4683 if (str == NULL)
4684 return -1;
4685
4686 PDATA_PUSH(self->stack, str, -1);
4687 return 0;
4688}
4689
4690static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004691load_counted_binbytes(UnpicklerObject *self, int nbytes)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004692{
4693 PyObject *bytes;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004694 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004695 char *s;
4696
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004697 if (_Unpickler_Read(self, &s, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004698 return -1;
4699
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004700 size = calc_binsize(s, nbytes);
4701 if (size < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004702 PyErr_Format(PyExc_OverflowError,
4703 "BINBYTES exceeds system's maximum size of %zd bytes",
Alexandre Vassalotticc757172013-04-14 02:25:10 -07004704 PY_SSIZE_T_MAX);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004705 return -1;
4706 }
4707
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004708 if (_Unpickler_Read(self, &s, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004709 return -1;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004710
4711 bytes = PyBytes_FromStringAndSize(s, size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004712 if (bytes == NULL)
4713 return -1;
4714
4715 PDATA_PUSH(self->stack, bytes, -1);
4716 return 0;
4717}
4718
4719static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004720load_counted_binstring(UnpicklerObject *self, int nbytes)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004721{
4722 PyObject *str;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004723 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004724 char *s;
4725
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004726 if (_Unpickler_Read(self, &s, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004727 return -1;
4728
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004729 size = calc_binsize(s, nbytes);
4730 if (size < 0) {
4731 PyErr_Format(UnpicklingError,
4732 "BINSTRING exceeds system's maximum size of %zd bytes",
4733 PY_SSIZE_T_MAX);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004734 return -1;
4735 }
4736
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004737 if (_Unpickler_Read(self, &s, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004738 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004739 /* Convert Python 2.x strings to unicode. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004740 str = PyUnicode_Decode(s, size, self->encoding, self->errors);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004741 if (str == NULL)
4742 return -1;
4743
4744 PDATA_PUSH(self->stack, str, -1);
4745 return 0;
4746}
4747
4748static int
4749load_unicode(UnpicklerObject *self)
4750{
4751 PyObject *str;
4752 Py_ssize_t len;
4753 char *s;
4754
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004755 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004756 return -1;
4757 if (len < 1)
4758 return bad_readline();
4759
4760 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
4761 if (str == NULL)
4762 return -1;
4763
4764 PDATA_PUSH(self->stack, str, -1);
4765 return 0;
4766}
4767
4768static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004769load_counted_binunicode(UnpicklerObject *self, int nbytes)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004770{
4771 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004772 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004773 char *s;
4774
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004775 if (_Unpickler_Read(self, &s, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004776 return -1;
4777
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004778 size = calc_binsize(s, nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004779 if (size < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004780 PyErr_Format(PyExc_OverflowError,
4781 "BINUNICODE exceeds system's maximum size of %zd bytes",
Alexandre Vassalotticc757172013-04-14 02:25:10 -07004782 PY_SSIZE_T_MAX);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004783 return -1;
4784 }
4785
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004786 if (_Unpickler_Read(self, &s, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004787 return -1;
4788
Victor Stinner485fb562010-04-13 11:07:24 +00004789 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004790 if (str == NULL)
4791 return -1;
4792
4793 PDATA_PUSH(self->stack, str, -1);
4794 return 0;
4795}
4796
4797static int
4798load_tuple(UnpicklerObject *self)
4799{
4800 PyObject *tuple;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004801 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004802
4803 if ((i = marker(self)) < 0)
4804 return -1;
4805
4806 tuple = Pdata_poptuple(self->stack, i);
4807 if (tuple == NULL)
4808 return -1;
4809 PDATA_PUSH(self->stack, tuple, -1);
4810 return 0;
4811}
4812
4813static int
4814load_counted_tuple(UnpicklerObject *self, int len)
4815{
4816 PyObject *tuple;
4817
4818 tuple = PyTuple_New(len);
4819 if (tuple == NULL)
4820 return -1;
4821
4822 while (--len >= 0) {
4823 PyObject *item;
4824
4825 PDATA_POP(self->stack, item);
4826 if (item == NULL)
4827 return -1;
4828 PyTuple_SET_ITEM(tuple, len, item);
4829 }
4830 PDATA_PUSH(self->stack, tuple, -1);
4831 return 0;
4832}
4833
4834static int
4835load_empty_list(UnpicklerObject *self)
4836{
4837 PyObject *list;
4838
4839 if ((list = PyList_New(0)) == NULL)
4840 return -1;
4841 PDATA_PUSH(self->stack, list, -1);
4842 return 0;
4843}
4844
4845static int
4846load_empty_dict(UnpicklerObject *self)
4847{
4848 PyObject *dict;
4849
4850 if ((dict = PyDict_New()) == NULL)
4851 return -1;
4852 PDATA_PUSH(self->stack, dict, -1);
4853 return 0;
4854}
4855
4856static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004857load_empty_set(UnpicklerObject *self)
4858{
4859 PyObject *set;
4860
4861 if ((set = PySet_New(NULL)) == NULL)
4862 return -1;
4863 PDATA_PUSH(self->stack, set, -1);
4864 return 0;
4865}
4866
4867static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004868load_list(UnpicklerObject *self)
4869{
4870 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004871 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004872
4873 if ((i = marker(self)) < 0)
4874 return -1;
4875
4876 list = Pdata_poplist(self->stack, i);
4877 if (list == NULL)
4878 return -1;
4879 PDATA_PUSH(self->stack, list, -1);
4880 return 0;
4881}
4882
4883static int
4884load_dict(UnpicklerObject *self)
4885{
4886 PyObject *dict, *key, *value;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004887 Py_ssize_t i, j, k;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004888
4889 if ((i = marker(self)) < 0)
4890 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004891 j = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004892
4893 if ((dict = PyDict_New()) == NULL)
4894 return -1;
4895
4896 for (k = i + 1; k < j; k += 2) {
4897 key = self->stack->data[k - 1];
4898 value = self->stack->data[k];
4899 if (PyDict_SetItem(dict, key, value) < 0) {
4900 Py_DECREF(dict);
4901 return -1;
4902 }
4903 }
4904 Pdata_clear(self->stack, i);
4905 PDATA_PUSH(self->stack, dict, -1);
4906 return 0;
4907}
4908
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004909static int
4910load_frozenset(UnpicklerObject *self)
4911{
4912 PyObject *items;
4913 PyObject *frozenset;
4914 Py_ssize_t i;
4915
4916 if ((i = marker(self)) < 0)
4917 return -1;
4918
4919 items = Pdata_poptuple(self->stack, i);
4920 if (items == NULL)
4921 return -1;
4922
4923 frozenset = PyFrozenSet_New(items);
4924 Py_DECREF(items);
4925 if (frozenset == NULL)
4926 return -1;
4927
4928 PDATA_PUSH(self->stack, frozenset, -1);
4929 return 0;
4930}
4931
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004932static PyObject *
4933instantiate(PyObject *cls, PyObject *args)
4934{
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004935 PyObject *result = NULL;
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02004936 _Py_IDENTIFIER(__getinitargs__);
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004937 /* Caller must assure args are a tuple. Normally, args come from
4938 Pdata_poptuple which packs objects from the top of the stack
4939 into a newly created tuple. */
4940 assert(PyTuple_Check(args));
4941 if (Py_SIZE(args) > 0 || !PyType_Check(cls) ||
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02004942 _PyObject_HasAttrId(cls, &PyId___getinitargs__)) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004943 result = PyObject_CallObject(cls, args);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004944 }
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004945 else {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02004946 _Py_IDENTIFIER(__new__);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02004947
4948 result = _PyObject_CallMethodId(cls, &PyId___new__, "O", cls);
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004949 }
4950 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004951}
4952
4953static int
4954load_obj(UnpicklerObject *self)
4955{
4956 PyObject *cls, *args, *obj = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004957 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004958
4959 if ((i = marker(self)) < 0)
4960 return -1;
4961
4962 args = Pdata_poptuple(self->stack, i + 1);
4963 if (args == NULL)
4964 return -1;
4965
4966 PDATA_POP(self->stack, cls);
4967 if (cls) {
4968 obj = instantiate(cls, args);
4969 Py_DECREF(cls);
4970 }
4971 Py_DECREF(args);
4972 if (obj == NULL)
4973 return -1;
4974
4975 PDATA_PUSH(self->stack, obj, -1);
4976 return 0;
4977}
4978
4979static int
4980load_inst(UnpicklerObject *self)
4981{
4982 PyObject *cls = NULL;
4983 PyObject *args = NULL;
4984 PyObject *obj = NULL;
4985 PyObject *module_name;
4986 PyObject *class_name;
4987 Py_ssize_t len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004988 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004989 char *s;
4990
4991 if ((i = marker(self)) < 0)
4992 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004993 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004994 return -1;
4995 if (len < 2)
4996 return bad_readline();
4997
4998 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
4999 identifiers are permitted in Python 3.0, since the INST opcode is only
5000 supported by older protocols on Python 2.x. */
5001 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
5002 if (module_name == NULL)
5003 return -1;
5004
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005005 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005006 if (len < 2)
5007 return bad_readline();
5008 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005009 if (class_name != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005010 cls = find_class(self, module_name, class_name);
5011 Py_DECREF(class_name);
5012 }
5013 }
5014 Py_DECREF(module_name);
5015
5016 if (cls == NULL)
5017 return -1;
5018
5019 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
5020 obj = instantiate(cls, args);
5021 Py_DECREF(args);
5022 }
5023 Py_DECREF(cls);
5024
5025 if (obj == NULL)
5026 return -1;
5027
5028 PDATA_PUSH(self->stack, obj, -1);
5029 return 0;
5030}
5031
5032static int
5033load_newobj(UnpicklerObject *self)
5034{
5035 PyObject *args = NULL;
5036 PyObject *clsraw = NULL;
5037 PyTypeObject *cls; /* clsraw cast to its true type */
5038 PyObject *obj;
5039
5040 /* Stack is ... cls argtuple, and we want to call
5041 * cls.__new__(cls, *argtuple).
5042 */
5043 PDATA_POP(self->stack, args);
5044 if (args == NULL)
5045 goto error;
5046 if (!PyTuple_Check(args)) {
5047 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
5048 goto error;
5049 }
5050
5051 PDATA_POP(self->stack, clsraw);
5052 cls = (PyTypeObject *)clsraw;
5053 if (cls == NULL)
5054 goto error;
5055 if (!PyType_Check(cls)) {
5056 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
5057 "isn't a type object");
5058 goto error;
5059 }
5060 if (cls->tp_new == NULL) {
5061 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
5062 "has NULL tp_new");
5063 goto error;
5064 }
5065
5066 /* Call __new__. */
5067 obj = cls->tp_new(cls, args, NULL);
5068 if (obj == NULL)
5069 goto error;
5070
5071 Py_DECREF(args);
5072 Py_DECREF(clsraw);
5073 PDATA_PUSH(self->stack, obj, -1);
5074 return 0;
5075
5076 error:
5077 Py_XDECREF(args);
5078 Py_XDECREF(clsraw);
5079 return -1;
5080}
5081
5082static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005083load_newobj_ex(UnpicklerObject *self)
5084{
5085 PyObject *cls, *args, *kwargs;
5086 PyObject *obj;
5087
5088 PDATA_POP(self->stack, kwargs);
5089 if (kwargs == NULL) {
5090 return -1;
5091 }
5092 PDATA_POP(self->stack, args);
5093 if (args == NULL) {
5094 Py_DECREF(kwargs);
5095 return -1;
5096 }
5097 PDATA_POP(self->stack, cls);
5098 if (cls == NULL) {
5099 Py_DECREF(kwargs);
5100 Py_DECREF(args);
5101 return -1;
5102 }
5103
5104 if (!PyType_Check(cls)) {
5105 Py_DECREF(kwargs);
5106 Py_DECREF(args);
5107 Py_DECREF(cls);
5108 PyErr_Format(UnpicklingError,
5109 "NEWOBJ_EX class argument must be a type, not %.200s",
5110 Py_TYPE(cls)->tp_name);
5111 return -1;
5112 }
5113
5114 if (((PyTypeObject *)cls)->tp_new == NULL) {
5115 Py_DECREF(kwargs);
5116 Py_DECREF(args);
5117 Py_DECREF(cls);
5118 PyErr_SetString(UnpicklingError,
5119 "NEWOBJ_EX class argument doesn't have __new__");
5120 return -1;
5121 }
5122 obj = ((PyTypeObject *)cls)->tp_new((PyTypeObject *)cls, args, kwargs);
5123 Py_DECREF(kwargs);
5124 Py_DECREF(args);
5125 Py_DECREF(cls);
5126 if (obj == NULL) {
5127 return -1;
5128 }
5129 PDATA_PUSH(self->stack, obj, -1);
5130 return 0;
5131}
5132
5133static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005134load_global(UnpicklerObject *self)
5135{
5136 PyObject *global = NULL;
5137 PyObject *module_name;
5138 PyObject *global_name;
5139 Py_ssize_t len;
5140 char *s;
5141
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005142 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005143 return -1;
5144 if (len < 2)
5145 return bad_readline();
5146 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
5147 if (!module_name)
5148 return -1;
5149
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005150 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005151 if (len < 2) {
5152 Py_DECREF(module_name);
5153 return bad_readline();
5154 }
5155 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
5156 if (global_name) {
5157 global = find_class(self, module_name, global_name);
5158 Py_DECREF(global_name);
5159 }
5160 }
5161 Py_DECREF(module_name);
5162
5163 if (global == NULL)
5164 return -1;
5165 PDATA_PUSH(self->stack, global, -1);
5166 return 0;
5167}
5168
5169static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005170load_stack_global(UnpicklerObject *self)
5171{
5172 PyObject *global;
5173 PyObject *module_name;
5174 PyObject *global_name;
5175
5176 PDATA_POP(self->stack, global_name);
5177 PDATA_POP(self->stack, module_name);
5178 if (module_name == NULL || !PyUnicode_CheckExact(module_name) ||
5179 global_name == NULL || !PyUnicode_CheckExact(global_name)) {
5180 PyErr_SetString(UnpicklingError, "STACK_GLOBAL requires str");
5181 Py_XDECREF(global_name);
5182 Py_XDECREF(module_name);
5183 return -1;
5184 }
5185 global = find_class(self, module_name, global_name);
5186 Py_DECREF(global_name);
5187 Py_DECREF(module_name);
5188 if (global == NULL)
5189 return -1;
5190 PDATA_PUSH(self->stack, global, -1);
5191 return 0;
5192}
5193
5194static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005195load_persid(UnpicklerObject *self)
5196{
5197 PyObject *pid;
5198 Py_ssize_t len;
5199 char *s;
5200
5201 if (self->pers_func) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005202 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005203 return -1;
5204 if (len < 2)
5205 return bad_readline();
5206
5207 pid = PyBytes_FromStringAndSize(s, len - 1);
5208 if (pid == NULL)
5209 return -1;
5210
Alexandre Vassalottib4a04fb2013-11-25 13:25:12 -08005211 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
5212 reference to pid first. */
5213 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005214 if (pid == NULL)
5215 return -1;
5216
5217 PDATA_PUSH(self->stack, pid, -1);
5218 return 0;
5219 }
5220 else {
5221 PyErr_SetString(UnpicklingError,
5222 "A load persistent id instruction was encountered,\n"
5223 "but no persistent_load function was specified.");
5224 return -1;
5225 }
5226}
5227
5228static int
5229load_binpersid(UnpicklerObject *self)
5230{
5231 PyObject *pid;
5232
5233 if (self->pers_func) {
5234 PDATA_POP(self->stack, pid);
5235 if (pid == NULL)
5236 return -1;
5237
Alexandre Vassalottib4a04fb2013-11-25 13:25:12 -08005238 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005239 reference to pid first. */
Alexandre Vassalottib4a04fb2013-11-25 13:25:12 -08005240 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005241 if (pid == NULL)
5242 return -1;
5243
5244 PDATA_PUSH(self->stack, pid, -1);
5245 return 0;
5246 }
5247 else {
5248 PyErr_SetString(UnpicklingError,
5249 "A load persistent id instruction was encountered,\n"
5250 "but no persistent_load function was specified.");
5251 return -1;
5252 }
5253}
5254
5255static int
5256load_pop(UnpicklerObject *self)
5257{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005258 Py_ssize_t len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005259
5260 /* Note that we split the (pickle.py) stack into two stacks,
5261 * an object stack and a mark stack. We have to be clever and
5262 * pop the right one. We do this by looking at the top of the
Collin Winter8ca69de2009-05-26 16:53:41 +00005263 * mark stack first, and only signalling a stack underflow if
5264 * the object stack is empty and the mark stack doesn't match
5265 * our expectations.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005266 */
Collin Winter8ca69de2009-05-26 16:53:41 +00005267 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005268 self->num_marks--;
Antoine Pitrou01a15ea2010-01-07 17:57:31 +00005269 } else if (len > 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005270 len--;
5271 Py_DECREF(self->stack->data[len]);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005272 Py_SIZE(self->stack) = len;
Collin Winter8ca69de2009-05-26 16:53:41 +00005273 } else {
5274 return stack_underflow();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005275 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005276 return 0;
5277}
5278
5279static int
5280load_pop_mark(UnpicklerObject *self)
5281{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005282 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005283
5284 if ((i = marker(self)) < 0)
5285 return -1;
5286
5287 Pdata_clear(self->stack, i);
5288
5289 return 0;
5290}
5291
5292static int
5293load_dup(UnpicklerObject *self)
5294{
5295 PyObject *last;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005296 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005297
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005298 if ((len = Py_SIZE(self->stack)) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005299 return stack_underflow();
5300 last = self->stack->data[len - 1];
5301 PDATA_APPEND(self->stack, last, -1);
5302 return 0;
5303}
5304
5305static int
5306load_get(UnpicklerObject *self)
5307{
5308 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005309 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005310 Py_ssize_t len;
5311 char *s;
5312
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005313 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005314 return -1;
5315 if (len < 2)
5316 return bad_readline();
5317
5318 key = PyLong_FromString(s, NULL, 10);
5319 if (key == NULL)
5320 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005321 idx = PyLong_AsSsize_t(key);
5322 if (idx == -1 && PyErr_Occurred()) {
5323 Py_DECREF(key);
5324 return -1;
5325 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005326
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005327 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005328 if (value == NULL) {
5329 if (!PyErr_Occurred())
5330 PyErr_SetObject(PyExc_KeyError, key);
5331 Py_DECREF(key);
5332 return -1;
5333 }
5334 Py_DECREF(key);
5335
5336 PDATA_APPEND(self->stack, value, -1);
5337 return 0;
5338}
5339
5340static int
5341load_binget(UnpicklerObject *self)
5342{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005343 PyObject *value;
5344 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005345 char *s;
5346
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005347 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005348 return -1;
5349
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005350 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005351
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005352 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005353 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005354 PyObject *key = PyLong_FromSsize_t(idx);
Christian Heimes9ee5c372013-07-26 22:45:00 +02005355 if (key != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005356 PyErr_SetObject(PyExc_KeyError, key);
Christian Heimes9ee5c372013-07-26 22:45:00 +02005357 Py_DECREF(key);
5358 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005359 return -1;
5360 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005361
5362 PDATA_APPEND(self->stack, value, -1);
5363 return 0;
5364}
5365
5366static int
5367load_long_binget(UnpicklerObject *self)
5368{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005369 PyObject *value;
5370 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005371 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005372
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005373 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005374 return -1;
5375
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005376 idx = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005377
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005378 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005379 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005380 PyObject *key = PyLong_FromSsize_t(idx);
Christian Heimes9ee5c372013-07-26 22:45:00 +02005381 if (key != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005382 PyErr_SetObject(PyExc_KeyError, key);
Christian Heimes9ee5c372013-07-26 22:45:00 +02005383 Py_DECREF(key);
5384 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005385 return -1;
5386 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005387
5388 PDATA_APPEND(self->stack, value, -1);
5389 return 0;
5390}
5391
5392/* Push an object from the extension registry (EXT[124]). nbytes is
5393 * the number of bytes following the opcode, holding the index (code) value.
5394 */
5395static int
5396load_extension(UnpicklerObject *self, int nbytes)
5397{
5398 char *codebytes; /* the nbytes bytes after the opcode */
5399 long code; /* calc_binint returns long */
5400 PyObject *py_code; /* code as a Python int */
5401 PyObject *obj; /* the object to push */
5402 PyObject *pair; /* (module_name, class_name) */
5403 PyObject *module_name, *class_name;
5404
5405 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005406 if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005407 return -1;
5408 code = calc_binint(codebytes, nbytes);
5409 if (code <= 0) { /* note that 0 is forbidden */
5410 /* Corrupt or hostile pickle. */
5411 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
5412 return -1;
5413 }
5414
5415 /* Look for the code in the cache. */
5416 py_code = PyLong_FromLong(code);
5417 if (py_code == NULL)
5418 return -1;
5419 obj = PyDict_GetItem(extension_cache, py_code);
5420 if (obj != NULL) {
5421 /* Bingo. */
5422 Py_DECREF(py_code);
5423 PDATA_APPEND(self->stack, obj, -1);
5424 return 0;
5425 }
5426
5427 /* Look up the (module_name, class_name) pair. */
5428 pair = PyDict_GetItem(inverted_registry, py_code);
5429 if (pair == NULL) {
5430 Py_DECREF(py_code);
5431 PyErr_Format(PyExc_ValueError, "unregistered extension "
5432 "code %ld", code);
5433 return -1;
5434 }
5435 /* Since the extension registry is manipulable via Python code,
5436 * confirm that pair is really a 2-tuple of strings.
5437 */
5438 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
5439 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
5440 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
5441 Py_DECREF(py_code);
5442 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
5443 "isn't a 2-tuple of strings", code);
5444 return -1;
5445 }
5446 /* Load the object. */
5447 obj = find_class(self, module_name, class_name);
5448 if (obj == NULL) {
5449 Py_DECREF(py_code);
5450 return -1;
5451 }
5452 /* Cache code -> obj. */
5453 code = PyDict_SetItem(extension_cache, py_code, obj);
5454 Py_DECREF(py_code);
5455 if (code < 0) {
5456 Py_DECREF(obj);
5457 return -1;
5458 }
5459 PDATA_PUSH(self->stack, obj, -1);
5460 return 0;
5461}
5462
5463static int
5464load_put(UnpicklerObject *self)
5465{
5466 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005467 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005468 Py_ssize_t len;
5469 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005470
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005471 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005472 return -1;
5473 if (len < 2)
5474 return bad_readline();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005475 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005476 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005477 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005478
5479 key = PyLong_FromString(s, NULL, 10);
5480 if (key == NULL)
5481 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005482 idx = PyLong_AsSsize_t(key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005483 Py_DECREF(key);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02005484 if (idx < 0) {
5485 if (!PyErr_Occurred())
5486 PyErr_SetString(PyExc_ValueError,
5487 "negative PUT argument");
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005488 return -1;
Antoine Pitrou55549ec2011-08-30 00:27:10 +02005489 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005490
5491 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005492}
5493
5494static int
5495load_binput(UnpicklerObject *self)
5496{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005497 PyObject *value;
5498 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005499 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005500
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005501 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005502 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005503
5504 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005505 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005506 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005507
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005508 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005509
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005510 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005511}
5512
5513static int
5514load_long_binput(UnpicklerObject *self)
5515{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005516 PyObject *value;
5517 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005518 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005519
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005520 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005521 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005522
5523 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005524 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005525 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005526
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005527 idx = calc_binsize(s, 4);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02005528 if (idx < 0) {
5529 PyErr_SetString(PyExc_ValueError,
5530 "negative LONG_BINPUT argument");
5531 return -1;
5532 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005533
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005534 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005535}
5536
5537static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005538load_memoize(UnpicklerObject *self)
5539{
5540 PyObject *value;
5541
5542 if (Py_SIZE(self->stack) <= 0)
5543 return stack_underflow();
5544 value = self->stack->data[Py_SIZE(self->stack) - 1];
5545
5546 return _Unpickler_MemoPut(self, self->memo_len, value);
5547}
5548
5549static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005550do_append(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005551{
5552 PyObject *value;
5553 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005554 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005555
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005556 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005557 if (x > len || x <= 0)
5558 return stack_underflow();
5559 if (len == x) /* nothing to do */
5560 return 0;
5561
5562 list = self->stack->data[x - 1];
5563
5564 if (PyList_Check(list)) {
5565 PyObject *slice;
5566 Py_ssize_t list_len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005567 int ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005568
5569 slice = Pdata_poplist(self->stack, x);
5570 if (!slice)
5571 return -1;
5572 list_len = PyList_GET_SIZE(list);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005573 ret = PyList_SetSlice(list, list_len, list_len, slice);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005574 Py_DECREF(slice);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005575 return ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005576 }
5577 else {
5578 PyObject *append_func;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005579 _Py_IDENTIFIER(append);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005580
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005581 append_func = _PyObject_GetAttrId(list, &PyId_append);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005582 if (append_func == NULL)
5583 return -1;
5584 for (i = x; i < len; i++) {
5585 PyObject *result;
5586
5587 value = self->stack->data[i];
Alexandre Vassalottib4a04fb2013-11-25 13:25:12 -08005588 result = _Unpickler_FastCall(self, append_func, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005589 if (result == NULL) {
5590 Pdata_clear(self->stack, i + 1);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005591 Py_SIZE(self->stack) = x;
Alexandre Vassalotti637c7c42013-04-20 21:28:21 -07005592 Py_DECREF(append_func);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005593 return -1;
5594 }
5595 Py_DECREF(result);
5596 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005597 Py_SIZE(self->stack) = x;
Alexandre Vassalotti637c7c42013-04-20 21:28:21 -07005598 Py_DECREF(append_func);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005599 }
5600
5601 return 0;
5602}
5603
5604static int
5605load_append(UnpicklerObject *self)
5606{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005607 return do_append(self, Py_SIZE(self->stack) - 1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005608}
5609
5610static int
5611load_appends(UnpicklerObject *self)
5612{
5613 return do_append(self, marker(self));
5614}
5615
5616static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005617do_setitems(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005618{
5619 PyObject *value, *key;
5620 PyObject *dict;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005621 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005622 int status = 0;
5623
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005624 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005625 if (x > len || x <= 0)
5626 return stack_underflow();
5627 if (len == x) /* nothing to do */
5628 return 0;
Victor Stinner121aab42011-09-29 23:40:53 +02005629 if ((len - x) % 2 != 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005630 /* Currupt or hostile pickle -- we never write one like this. */
5631 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
5632 return -1;
5633 }
5634
5635 /* Here, dict does not actually need to be a PyDict; it could be anything
5636 that supports the __setitem__ attribute. */
5637 dict = self->stack->data[x - 1];
5638
5639 for (i = x + 1; i < len; i += 2) {
5640 key = self->stack->data[i - 1];
5641 value = self->stack->data[i];
5642 if (PyObject_SetItem(dict, key, value) < 0) {
5643 status = -1;
5644 break;
5645 }
5646 }
5647
5648 Pdata_clear(self->stack, x);
5649 return status;
5650}
5651
5652static int
5653load_setitem(UnpicklerObject *self)
5654{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005655 return do_setitems(self, Py_SIZE(self->stack) - 2);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005656}
5657
5658static int
5659load_setitems(UnpicklerObject *self)
5660{
5661 return do_setitems(self, marker(self));
5662}
5663
5664static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005665load_additems(UnpicklerObject *self)
5666{
5667 PyObject *set;
5668 Py_ssize_t mark, len, i;
5669
5670 mark = marker(self);
5671 len = Py_SIZE(self->stack);
5672 if (mark > len || mark <= 0)
5673 return stack_underflow();
5674 if (len == mark) /* nothing to do */
5675 return 0;
5676
5677 set = self->stack->data[mark - 1];
5678
5679 if (PySet_Check(set)) {
5680 PyObject *items;
5681 int status;
5682
5683 items = Pdata_poptuple(self->stack, mark);
5684 if (items == NULL)
5685 return -1;
5686
5687 status = _PySet_Update(set, items);
5688 Py_DECREF(items);
5689 return status;
5690 }
5691 else {
5692 PyObject *add_func;
5693 _Py_IDENTIFIER(add);
5694
5695 add_func = _PyObject_GetAttrId(set, &PyId_add);
5696 if (add_func == NULL)
5697 return -1;
5698 for (i = mark; i < len; i++) {
5699 PyObject *result;
5700 PyObject *item;
5701
5702 item = self->stack->data[i];
Alexandre Vassalottib4a04fb2013-11-25 13:25:12 -08005703 result = _Unpickler_FastCall(self, add_func, item);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005704 if (result == NULL) {
5705 Pdata_clear(self->stack, i + 1);
5706 Py_SIZE(self->stack) = mark;
5707 return -1;
5708 }
5709 Py_DECREF(result);
5710 }
5711 Py_SIZE(self->stack) = mark;
5712 }
5713
5714 return 0;
5715}
5716
5717static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005718load_build(UnpicklerObject *self)
5719{
5720 PyObject *state, *inst, *slotstate;
5721 PyObject *setstate;
5722 int status = 0;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005723 _Py_IDENTIFIER(__setstate__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005724
5725 /* Stack is ... instance, state. We want to leave instance at
5726 * the stack top, possibly mutated via instance.__setstate__(state).
5727 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005728 if (Py_SIZE(self->stack) < 2)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005729 return stack_underflow();
5730
5731 PDATA_POP(self->stack, state);
5732 if (state == NULL)
5733 return -1;
5734
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005735 inst = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005736
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005737 setstate = _PyObject_GetAttrId(inst, &PyId___setstate__);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005738 if (setstate == NULL) {
5739 if (PyErr_ExceptionMatches(PyExc_AttributeError))
5740 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00005741 else {
5742 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005743 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00005744 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005745 }
5746 else {
5747 PyObject *result;
5748
5749 /* The explicit __setstate__ is responsible for everything. */
Alexandre Vassalottib4a04fb2013-11-25 13:25:12 -08005750 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
5751 reference to state first. */
5752 result = _Unpickler_FastCall(self, setstate, state);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005753 Py_DECREF(setstate);
5754 if (result == NULL)
5755 return -1;
5756 Py_DECREF(result);
5757 return 0;
5758 }
5759
5760 /* A default __setstate__. First see whether state embeds a
5761 * slot state dict too (a proto 2 addition).
5762 */
5763 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
5764 PyObject *tmp = state;
5765
5766 state = PyTuple_GET_ITEM(tmp, 0);
5767 slotstate = PyTuple_GET_ITEM(tmp, 1);
5768 Py_INCREF(state);
5769 Py_INCREF(slotstate);
5770 Py_DECREF(tmp);
5771 }
5772 else
5773 slotstate = NULL;
5774
5775 /* Set inst.__dict__ from the state dict (if any). */
5776 if (state != Py_None) {
5777 PyObject *dict;
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005778 PyObject *d_key, *d_value;
5779 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005780 _Py_IDENTIFIER(__dict__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005781
5782 if (!PyDict_Check(state)) {
5783 PyErr_SetString(UnpicklingError, "state is not a dictionary");
5784 goto error;
5785 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005786 dict = _PyObject_GetAttrId(inst, &PyId___dict__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005787 if (dict == NULL)
5788 goto error;
5789
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005790 i = 0;
5791 while (PyDict_Next(state, &i, &d_key, &d_value)) {
5792 /* normally the keys for instance attributes are
5793 interned. we should try to do that here. */
5794 Py_INCREF(d_key);
5795 if (PyUnicode_CheckExact(d_key))
5796 PyUnicode_InternInPlace(&d_key);
5797 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
5798 Py_DECREF(d_key);
5799 goto error;
5800 }
5801 Py_DECREF(d_key);
5802 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005803 Py_DECREF(dict);
5804 }
5805
5806 /* Also set instance attributes from the slotstate dict (if any). */
5807 if (slotstate != NULL) {
5808 PyObject *d_key, *d_value;
5809 Py_ssize_t i;
5810
5811 if (!PyDict_Check(slotstate)) {
5812 PyErr_SetString(UnpicklingError,
5813 "slot state is not a dictionary");
5814 goto error;
5815 }
5816 i = 0;
5817 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
5818 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
5819 goto error;
5820 }
5821 }
5822
5823 if (0) {
5824 error:
5825 status = -1;
5826 }
5827
5828 Py_DECREF(state);
5829 Py_XDECREF(slotstate);
5830 return status;
5831}
5832
5833static int
5834load_mark(UnpicklerObject *self)
5835{
5836
5837 /* Note that we split the (pickle.py) stack into two stacks, an
5838 * object stack and a mark stack. Here we push a mark onto the
5839 * mark stack.
5840 */
5841
5842 if ((self->num_marks + 1) >= self->marks_size) {
5843 size_t alloc;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005844 Py_ssize_t *marks;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005845
5846 /* Use the size_t type to check for overflow. */
5847 alloc = ((size_t)self->num_marks << 1) + 20;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005848 if (alloc > (PY_SSIZE_T_MAX / sizeof(Py_ssize_t)) ||
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00005849 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005850 PyErr_NoMemory();
5851 return -1;
5852 }
5853
5854 if (self->marks == NULL)
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005855 marks = (Py_ssize_t *) PyMem_Malloc(alloc * sizeof(Py_ssize_t));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005856 else
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005857 marks = (Py_ssize_t *) PyMem_Realloc(self->marks,
5858 alloc * sizeof(Py_ssize_t));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005859 if (marks == NULL) {
5860 PyErr_NoMemory();
5861 return -1;
5862 }
5863 self->marks = marks;
5864 self->marks_size = (Py_ssize_t)alloc;
5865 }
5866
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005867 self->marks[self->num_marks++] = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005868
5869 return 0;
5870}
5871
5872static int
5873load_reduce(UnpicklerObject *self)
5874{
5875 PyObject *callable = NULL;
5876 PyObject *argtup = NULL;
5877 PyObject *obj = NULL;
5878
5879 PDATA_POP(self->stack, argtup);
5880 if (argtup == NULL)
5881 return -1;
5882 PDATA_POP(self->stack, callable);
5883 if (callable) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005884 obj = PyObject_CallObject(callable, argtup);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005885 Py_DECREF(callable);
5886 }
5887 Py_DECREF(argtup);
5888
5889 if (obj == NULL)
5890 return -1;
5891
5892 PDATA_PUSH(self->stack, obj, -1);
5893 return 0;
5894}
5895
5896/* Just raises an error if we don't know the protocol specified. PROTO
5897 * is the first opcode for protocols >= 2.
5898 */
5899static int
5900load_proto(UnpicklerObject *self)
5901{
5902 char *s;
5903 int i;
5904
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005905 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005906 return -1;
5907
5908 i = (unsigned char)s[0];
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005909 if (i <= HIGHEST_PROTOCOL) {
5910 self->proto = i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005911 return 0;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005912 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005913
5914 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
5915 return -1;
5916}
5917
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08005918static int
5919load_frame(UnpicklerObject *self)
5920{
5921 char *s;
5922 Py_ssize_t frame_len;
5923
5924 if (_Unpickler_Read(self, &s, 8) < 0)
5925 return -1;
5926
5927 frame_len = calc_binsize(s, 8);
5928 if (frame_len < 0) {
5929 PyErr_Format(PyExc_OverflowError,
5930 "FRAME length exceeds system's maximum of %zd bytes",
5931 PY_SSIZE_T_MAX);
5932 return -1;
5933 }
5934
5935 if (_Unpickler_Read(self, &s, frame_len) < 0)
5936 return -1;
5937
5938 /* Rewind to start of frame */
5939 self->next_read_idx -= frame_len;
5940 return 0;
5941}
5942
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005943static PyObject *
5944load(UnpicklerObject *self)
5945{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005946 PyObject *value = NULL;
5947 char *s;
5948
5949 self->num_marks = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005950 self->proto = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005951 if (Py_SIZE(self->stack))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005952 Pdata_clear(self->stack, 0);
5953
5954 /* Convenient macros for the dispatch while-switch loop just below. */
5955#define OP(opcode, load_func) \
5956 case opcode: if (load_func(self) < 0) break; continue;
5957
5958#define OP_ARG(opcode, load_func, arg) \
5959 case opcode: if (load_func(self, (arg)) < 0) break; continue;
5960
5961 while (1) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005962 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005963 break;
5964
5965 switch ((enum opcode)s[0]) {
5966 OP(NONE, load_none)
5967 OP(BININT, load_binint)
5968 OP(BININT1, load_binint1)
5969 OP(BININT2, load_binint2)
5970 OP(INT, load_int)
5971 OP(LONG, load_long)
5972 OP_ARG(LONG1, load_counted_long, 1)
5973 OP_ARG(LONG4, load_counted_long, 4)
5974 OP(FLOAT, load_float)
5975 OP(BINFLOAT, load_binfloat)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005976 OP_ARG(SHORT_BINBYTES, load_counted_binbytes, 1)
5977 OP_ARG(BINBYTES, load_counted_binbytes, 4)
5978 OP_ARG(BINBYTES8, load_counted_binbytes, 8)
5979 OP_ARG(SHORT_BINSTRING, load_counted_binstring, 1)
5980 OP_ARG(BINSTRING, load_counted_binstring, 4)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005981 OP(STRING, load_string)
5982 OP(UNICODE, load_unicode)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005983 OP_ARG(SHORT_BINUNICODE, load_counted_binunicode, 1)
5984 OP_ARG(BINUNICODE, load_counted_binunicode, 4)
5985 OP_ARG(BINUNICODE8, load_counted_binunicode, 8)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005986 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
5987 OP_ARG(TUPLE1, load_counted_tuple, 1)
5988 OP_ARG(TUPLE2, load_counted_tuple, 2)
5989 OP_ARG(TUPLE3, load_counted_tuple, 3)
5990 OP(TUPLE, load_tuple)
5991 OP(EMPTY_LIST, load_empty_list)
5992 OP(LIST, load_list)
5993 OP(EMPTY_DICT, load_empty_dict)
5994 OP(DICT, load_dict)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005995 OP(EMPTY_SET, load_empty_set)
5996 OP(ADDITEMS, load_additems)
5997 OP(FROZENSET, load_frozenset)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005998 OP(OBJ, load_obj)
5999 OP(INST, load_inst)
6000 OP(NEWOBJ, load_newobj)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006001 OP(NEWOBJ_EX, load_newobj_ex)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006002 OP(GLOBAL, load_global)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006003 OP(STACK_GLOBAL, load_stack_global)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006004 OP(APPEND, load_append)
6005 OP(APPENDS, load_appends)
6006 OP(BUILD, load_build)
6007 OP(DUP, load_dup)
6008 OP(BINGET, load_binget)
6009 OP(LONG_BINGET, load_long_binget)
6010 OP(GET, load_get)
6011 OP(MARK, load_mark)
6012 OP(BINPUT, load_binput)
6013 OP(LONG_BINPUT, load_long_binput)
6014 OP(PUT, load_put)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006015 OP(MEMOIZE, load_memoize)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006016 OP(POP, load_pop)
6017 OP(POP_MARK, load_pop_mark)
6018 OP(SETITEM, load_setitem)
6019 OP(SETITEMS, load_setitems)
6020 OP(PERSID, load_persid)
6021 OP(BINPERSID, load_binpersid)
6022 OP(REDUCE, load_reduce)
6023 OP(PROTO, load_proto)
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08006024 OP(FRAME, load_frame)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006025 OP_ARG(EXT1, load_extension, 1)
6026 OP_ARG(EXT2, load_extension, 2)
6027 OP_ARG(EXT4, load_extension, 4)
6028 OP_ARG(NEWTRUE, load_bool, Py_True)
6029 OP_ARG(NEWFALSE, load_bool, Py_False)
6030
6031 case STOP:
6032 break;
6033
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006034 default:
Benjamin Petersonadde86d2011-09-23 13:41:41 -04006035 if (s[0] == '\0')
6036 PyErr_SetNone(PyExc_EOFError);
6037 else
6038 PyErr_Format(UnpicklingError,
6039 "invalid load key, '%c'.", s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006040 return NULL;
6041 }
6042
6043 break; /* and we are done! */
6044 }
6045
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08006046 if (PyErr_Occurred()) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006047 return NULL;
6048 }
6049
Victor Stinner2ae57e32013-10-31 13:39:23 +01006050 if (_Unpickler_SkipConsumed(self) < 0)
6051 return NULL;
6052
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006053 PDATA_POP(self->stack, value);
6054 return value;
6055}
6056
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006057/*[clinic]
6058
6059_pickle.Unpickler.load
6060
6061Load a pickle.
6062
6063Read a pickled object representation from the open file object given in
6064the constructor, and return the reconstituted object hierarchy specified
6065therein.
6066[clinic]*/
6067
6068PyDoc_STRVAR(_pickle_Unpickler_load__doc__,
6069"load()\n"
6070"Load a pickle.\n"
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006071"\n"
6072"Read a pickled object representation from the open file object given in\n"
6073"the constructor, and return the reconstituted object hierarchy specified\n"
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006074"therein.");
6075
6076#define _PICKLE_UNPICKLER_LOAD_METHODDEF \
6077 {"load", (PyCFunction)_pickle_Unpickler_load, METH_NOARGS, _pickle_Unpickler_load__doc__},
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006078
6079static PyObject *
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006080_pickle_Unpickler_load(PyObject *self)
6081/*[clinic checksum: 9a30ba4e4d9221d4dcd705e1471ab11b2c9e3ac6]*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006082{
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006083 UnpicklerObject *unpickler = (UnpicklerObject*)self;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006084 /* Check whether the Unpickler was initialized correctly. This prevents
6085 segfaulting if a subclass overridden __init__ with a function that does
6086 not call Unpickler.__init__(). Here, we simply ensure that self->read
6087 is not NULL. */
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006088 if (unpickler->read == NULL) {
Victor Stinner121aab42011-09-29 23:40:53 +02006089 PyErr_Format(UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006090 "Unpickler.__init__() was not called by %s.__init__()",
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006091 Py_TYPE(unpickler)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006092 return NULL;
6093 }
6094
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006095 return load(unpickler);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006096}
6097
6098/* The name of find_class() is misleading. In newer pickle protocols, this
6099 function is used for loading any global (i.e., functions), not just
6100 classes. The name is kept only for backward compatibility. */
6101
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006102/*[clinic]
6103
6104_pickle.Unpickler.find_class
6105
6106 self: UnpicklerObject
6107 module_name: object
6108 global_name: object
6109 /
6110
6111Return an object from a specified module.
6112
6113If necessary, the module will be imported. Subclasses may override this
6114method (e.g. to restrict unpickling of arbitrary classes and functions).
6115
6116This method is called whenever a class or a function object is
6117needed. Both arguments passed are str objects.
6118[clinic]*/
6119
6120PyDoc_STRVAR(_pickle_Unpickler_find_class__doc__,
6121"find_class(module_name, global_name)\n"
6122"Return an object from a specified module.\n"
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006123"\n"
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006124"If necessary, the module will be imported. Subclasses may override this\n"
6125"method (e.g. to restrict unpickling of arbitrary classes and functions).\n"
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006126"\n"
6127"This method is called whenever a class or a function object is\n"
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006128"needed. Both arguments passed are str objects.");
6129
6130#define _PICKLE_UNPICKLER_FIND_CLASS_METHODDEF \
6131 {"find_class", (PyCFunction)_pickle_Unpickler_find_class, METH_VARARGS, _pickle_Unpickler_find_class__doc__},
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006132
6133static PyObject *
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006134_pickle_Unpickler_find_class_impl(UnpicklerObject *self, PyObject *module_name, PyObject *global_name);
6135
6136static PyObject *
6137_pickle_Unpickler_find_class(PyObject *self, PyObject *args)
6138{
6139 PyObject *return_value = NULL;
6140 PyObject *module_name;
6141 PyObject *global_name;
6142
6143 if (!PyArg_ParseTuple(args,
6144 "OO:find_class",
6145 &module_name, &global_name))
6146 goto exit;
6147 return_value = _pickle_Unpickler_find_class_impl((UnpicklerObject *)self, module_name, global_name);
6148
6149exit:
6150 return return_value;
6151}
6152
6153static PyObject *
6154_pickle_Unpickler_find_class_impl(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
6155/*[clinic checksum: b7d05d4dd8adc698e5780c1ac2be0f5062d33915]*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006156{
6157 PyObject *global;
6158 PyObject *modules_dict;
6159 PyObject *module;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006160 _Py_IDENTIFIER(modules);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006161
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006162 /* Try to map the old names used in Python 2.x to the new ones used in
6163 Python 3.x. We do this only with old pickle protocols and when the
6164 user has not disabled the feature. */
6165 if (self->proto < 3 && self->fix_imports) {
6166 PyObject *key;
6167 PyObject *item;
6168
6169 /* Check if the global (i.e., a function or a class) was renamed
6170 or moved to another module. */
6171 key = PyTuple_Pack(2, module_name, global_name);
6172 if (key == NULL)
6173 return NULL;
6174 item = PyDict_GetItemWithError(name_mapping_2to3, key);
6175 Py_DECREF(key);
6176 if (item) {
6177 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
6178 PyErr_Format(PyExc_RuntimeError,
6179 "_compat_pickle.NAME_MAPPING values should be "
6180 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
6181 return NULL;
6182 }
6183 module_name = PyTuple_GET_ITEM(item, 0);
6184 global_name = PyTuple_GET_ITEM(item, 1);
6185 if (!PyUnicode_Check(module_name) ||
6186 !PyUnicode_Check(global_name)) {
6187 PyErr_Format(PyExc_RuntimeError,
6188 "_compat_pickle.NAME_MAPPING values should be "
6189 "pairs of str, not (%.200s, %.200s)",
6190 Py_TYPE(module_name)->tp_name,
6191 Py_TYPE(global_name)->tp_name);
6192 return NULL;
6193 }
6194 }
6195 else if (PyErr_Occurred()) {
6196 return NULL;
6197 }
6198
6199 /* Check if the module was renamed. */
6200 item = PyDict_GetItemWithError(import_mapping_2to3, module_name);
6201 if (item) {
6202 if (!PyUnicode_Check(item)) {
6203 PyErr_Format(PyExc_RuntimeError,
6204 "_compat_pickle.IMPORT_MAPPING values should be "
6205 "strings, not %.200s", Py_TYPE(item)->tp_name);
6206 return NULL;
6207 }
6208 module_name = item;
6209 }
6210 else if (PyErr_Occurred()) {
6211 return NULL;
6212 }
6213 }
6214
Victor Stinnerbb520202013-11-06 22:40:41 +01006215 modules_dict = _PySys_GetObjectId(&PyId_modules);
Victor Stinner1e53bba2013-07-16 22:26:05 +02006216 if (modules_dict == NULL) {
6217 PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006218 return NULL;
Victor Stinner1e53bba2013-07-16 22:26:05 +02006219 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006220
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006221 module = PyDict_GetItemWithError(modules_dict, module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006222 if (module == NULL) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006223 if (PyErr_Occurred())
6224 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006225 module = PyImport_Import(module_name);
6226 if (module == NULL)
6227 return NULL;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006228 global = getattribute(module, global_name, self->proto >= 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006229 Py_DECREF(module);
6230 }
Victor Stinner121aab42011-09-29 23:40:53 +02006231 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006232 global = getattribute(module, global_name, self->proto >= 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006233 }
6234 return global;
6235}
6236
6237static struct PyMethodDef Unpickler_methods[] = {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006238 _PICKLE_UNPICKLER_LOAD_METHODDEF
6239 _PICKLE_UNPICKLER_FIND_CLASS_METHODDEF
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006240 {NULL, NULL} /* sentinel */
6241};
6242
6243static void
6244Unpickler_dealloc(UnpicklerObject *self)
6245{
6246 PyObject_GC_UnTrack((PyObject *)self);
6247 Py_XDECREF(self->readline);
6248 Py_XDECREF(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00006249 Py_XDECREF(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006250 Py_XDECREF(self->stack);
6251 Py_XDECREF(self->pers_func);
Alexandre Vassalottib4a04fb2013-11-25 13:25:12 -08006252 Py_XDECREF(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006253 if (self->buffer.buf != NULL) {
6254 PyBuffer_Release(&self->buffer);
6255 self->buffer.buf = NULL;
6256 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006257
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006258 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006259 PyMem_Free(self->marks);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006260 PyMem_Free(self->input_line);
Victor Stinner49fc8ec2013-07-07 23:30:24 +02006261 PyMem_Free(self->encoding);
6262 PyMem_Free(self->errors);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006263
6264 Py_TYPE(self)->tp_free((PyObject *)self);
6265}
6266
6267static int
6268Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
6269{
6270 Py_VISIT(self->readline);
6271 Py_VISIT(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00006272 Py_VISIT(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006273 Py_VISIT(self->stack);
6274 Py_VISIT(self->pers_func);
Alexandre Vassalottib4a04fb2013-11-25 13:25:12 -08006275 Py_VISIT(self->arg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006276 return 0;
6277}
6278
6279static int
6280Unpickler_clear(UnpicklerObject *self)
6281{
6282 Py_CLEAR(self->readline);
6283 Py_CLEAR(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00006284 Py_CLEAR(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006285 Py_CLEAR(self->stack);
6286 Py_CLEAR(self->pers_func);
Alexandre Vassalottib4a04fb2013-11-25 13:25:12 -08006287 Py_CLEAR(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006288 if (self->buffer.buf != NULL) {
6289 PyBuffer_Release(&self->buffer);
6290 self->buffer.buf = NULL;
6291 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006292
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006293 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006294 PyMem_Free(self->marks);
6295 self->marks = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006296 PyMem_Free(self->input_line);
6297 self->input_line = NULL;
Victor Stinner49fc8ec2013-07-07 23:30:24 +02006298 PyMem_Free(self->encoding);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006299 self->encoding = NULL;
Victor Stinner49fc8ec2013-07-07 23:30:24 +02006300 PyMem_Free(self->errors);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006301 self->errors = NULL;
6302
6303 return 0;
6304}
6305
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006306/*[clinic]
6307
6308_pickle.Unpickler.__init__
6309
6310 self: UnpicklerObject
6311 file: object
6312 *
6313 fix_imports: bool = True
6314 encoding: str = 'ASCII'
6315 errors: str = 'strict'
6316
6317This takes a binary file for reading a pickle data stream.
6318
6319The protocol version of the pickle is detected automatically, so no
6320proto argument is needed.
6321
6322The file-like object must have two methods, a read() method
6323that takes an integer argument, and a readline() method that
6324requires no arguments. Both methods should return bytes.
6325Thus file-like object can be a binary file object opened for
6326reading, a BytesIO object, or any other custom object that
6327meets this interface.
6328
6329Optional keyword arguments are *fix_imports*, *encoding* and *errors*,
6330which are used to control compatiblity support for pickle stream
6331generated by Python 2.x. If *fix_imports* is True, pickle will try to
6332map the old Python 2.x names to the new names used in Python 3.x. The
6333*encoding* and *errors* tell pickle how to decode 8-bit string
6334instances pickled by Python 2.x; these default to 'ASCII' and
6335'strict', respectively.
6336
6337[clinic]*/
6338
6339PyDoc_STRVAR(_pickle_Unpickler___init____doc__,
6340"__init__(file, *, fix_imports=True, encoding=\'ASCII\', errors=\'strict\')\n"
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006341"This takes a binary file for reading a pickle data stream.\n"
6342"\n"
6343"The protocol version of the pickle is detected automatically, so no\n"
6344"proto argument is needed.\n"
6345"\n"
6346"The file-like object must have two methods, a read() method\n"
6347"that takes an integer argument, and a readline() method that\n"
6348"requires no arguments. Both methods should return bytes.\n"
6349"Thus file-like object can be a binary file object opened for\n"
6350"reading, a BytesIO object, or any other custom object that\n"
6351"meets this interface.\n"
6352"\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006353"Optional keyword arguments are *fix_imports*, *encoding* and *errors*,\n"
6354"which are used to control compatiblity support for pickle stream\n"
6355"generated by Python 2.x. If *fix_imports* is True, pickle will try to\n"
6356"map the old Python 2.x names to the new names used in Python 3.x. The\n"
6357"*encoding* and *errors* tell pickle how to decode 8-bit string\n"
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006358"instances pickled by Python 2.x; these default to \'ASCII\' and\n"
6359"\'strict\', respectively.");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006360
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006361#define _PICKLE_UNPICKLER___INIT___METHODDEF \
6362 {"__init__", (PyCFunction)_pickle_Unpickler___init__, METH_VARARGS|METH_KEYWORDS, _pickle_Unpickler___init____doc__},
6363
6364static PyObject *
6365_pickle_Unpickler___init___impl(UnpicklerObject *self, PyObject *file, int fix_imports, const char *encoding, const char *errors);
6366
6367static PyObject *
6368_pickle_Unpickler___init__(PyObject *self, PyObject *args, PyObject *kwargs)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006369{
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006370 PyObject *return_value = NULL;
6371 static char *_keywords[] = {"file", "fix_imports", "encoding", "errors", NULL};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006372 PyObject *file;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006373 int fix_imports = 1;
6374 const char *encoding = "ASCII";
6375 const char *errors = "strict";
6376
6377 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
6378 "O|$pss:__init__", _keywords,
6379 &file, &fix_imports, &encoding, &errors))
6380 goto exit;
6381 return_value = _pickle_Unpickler___init___impl((UnpicklerObject *)self, file, fix_imports, encoding, errors);
6382
6383exit:
6384 return return_value;
6385}
6386
6387static PyObject *
6388_pickle_Unpickler___init___impl(UnpicklerObject *self, PyObject *file, int fix_imports, const char *encoding, const char *errors)
6389/*[clinic checksum: bed0d8bbe1c647960ccc6f997b33bf33935fa56f]*/
6390{
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02006391 _Py_IDENTIFIER(persistent_load);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006392
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006393 /* In case of multiple __init__() calls, clear previous content. */
6394 if (self->read != NULL)
6395 (void)Unpickler_clear(self);
6396
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006397 if (_Unpickler_SetInputStream(self, file) < 0)
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006398 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006399
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006400 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006401 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006402
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006403 self->fix_imports = fix_imports;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006404 if (self->fix_imports == -1)
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006405 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006406
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02006407 if (_PyObject_HasAttrId((PyObject *)self, &PyId_persistent_load)) {
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02006408 self->pers_func = _PyObject_GetAttrId((PyObject *)self,
6409 &PyId_persistent_load);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006410 if (self->pers_func == NULL)
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006411 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006412 }
6413 else {
6414 self->pers_func = NULL;
6415 }
6416
6417 self->stack = (Pdata *)Pdata_New();
6418 if (self->stack == NULL)
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006419 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006420
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006421 self->memo_size = 32;
6422 self->memo = _Unpickler_NewMemo(self->memo_size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006423 if (self->memo == NULL)
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006424 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006425
Alexandre Vassalottib4a04fb2013-11-25 13:25:12 -08006426 self->arg = NULL;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006427 self->proto = 0;
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00006428
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006429 return Py_None;
6430}
6431
6432/* XXX Slight hack to slot a Clinic generated signature in tp_init. */
6433static int
6434Unpickler_init(PyObject *self, PyObject *args, PyObject *kwargs)
6435{
6436 if (_pickle_Unpickler___init__(self, args, kwargs) == NULL) {
6437 return -1;
6438 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006439 return 0;
6440}
6441
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006442/* Define a proxy object for the Unpickler's internal memo object. This is to
6443 * avoid breaking code like:
6444 * unpickler.memo.clear()
6445 * and
6446 * unpickler.memo = saved_memo
6447 * Is this a good idea? Not really, but we don't want to break code that uses
6448 * it. Note that we don't implement the entire mapping API here. This is
6449 * intentional, as these should be treated as black-box implementation details.
6450 *
6451 * We do, however, have to implement pickling/unpickling support because of
Victor Stinner121aab42011-09-29 23:40:53 +02006452 * real-world code like cvs2svn.
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006453 */
6454
6455typedef struct {
6456 PyObject_HEAD
6457 UnpicklerObject *unpickler;
6458} UnpicklerMemoProxyObject;
6459
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006460/*[clinic]
6461_pickle.UnpicklerMemoProxy.clear
6462
6463 self: UnpicklerMemoProxyObject
6464
6465Remove all items from memo.
6466[clinic]*/
6467
6468PyDoc_STRVAR(_pickle_UnpicklerMemoProxy_clear__doc__,
6469"clear()\n"
6470"Remove all items from memo.");
6471
6472#define _PICKLE_UNPICKLERMEMOPROXY_CLEAR_METHODDEF \
6473 {"clear", (PyCFunction)_pickle_UnpicklerMemoProxy_clear, METH_NOARGS, _pickle_UnpicklerMemoProxy_clear__doc__},
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006474
6475static PyObject *
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006476_pickle_UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
6477/*[clinic checksum: 46fecf4e33c0c873124f845edf6cc3a2e9864bd5]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006478{
6479 _Unpickler_MemoCleanup(self->unpickler);
6480 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
6481 if (self->unpickler->memo == NULL)
6482 return NULL;
6483 Py_RETURN_NONE;
6484}
6485
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006486/*[clinic]
6487_pickle.UnpicklerMemoProxy.copy
6488
6489 self: UnpicklerMemoProxyObject
6490
6491Copy the memo to a new object.
6492[clinic]*/
6493
6494PyDoc_STRVAR(_pickle_UnpicklerMemoProxy_copy__doc__,
6495"copy()\n"
6496"Copy the memo to a new object.");
6497
6498#define _PICKLE_UNPICKLERMEMOPROXY_COPY_METHODDEF \
6499 {"copy", (PyCFunction)_pickle_UnpicklerMemoProxy_copy, METH_NOARGS, _pickle_UnpicklerMemoProxy_copy__doc__},
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006500
6501static PyObject *
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006502_pickle_UnpicklerMemoProxy_copy(UnpicklerMemoProxyObject *self)
6503/*[clinic checksum: f8856c4e8a33540886dfbb245f286af3008fa0ad]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006504{
6505 Py_ssize_t i;
6506 PyObject *new_memo = PyDict_New();
6507 if (new_memo == NULL)
6508 return NULL;
6509
6510 for (i = 0; i < self->unpickler->memo_size; i++) {
6511 int status;
6512 PyObject *key, *value;
6513
6514 value = self->unpickler->memo[i];
6515 if (value == NULL)
6516 continue;
6517
6518 key = PyLong_FromSsize_t(i);
6519 if (key == NULL)
6520 goto error;
6521 status = PyDict_SetItem(new_memo, key, value);
6522 Py_DECREF(key);
6523 if (status < 0)
6524 goto error;
6525 }
6526 return new_memo;
6527
6528error:
6529 Py_DECREF(new_memo);
6530 return NULL;
6531}
6532
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006533/*[clinic]
6534_pickle.UnpicklerMemoProxy.__reduce__
6535
6536 self: UnpicklerMemoProxyObject
6537
6538Implement pickling support.
6539[clinic]*/
6540
6541PyDoc_STRVAR(_pickle_UnpicklerMemoProxy___reduce____doc__,
6542"__reduce__()\n"
6543"Implement pickling support.");
6544
6545#define _PICKLE_UNPICKLERMEMOPROXY___REDUCE___METHODDEF \
6546 {"__reduce__", (PyCFunction)_pickle_UnpicklerMemoProxy___reduce__, METH_NOARGS, _pickle_UnpicklerMemoProxy___reduce____doc__},
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006547
6548static PyObject *
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006549_pickle_UnpicklerMemoProxy___reduce__(UnpicklerMemoProxyObject *self)
6550/*[clinic checksum: ab5516a77659144e1191c7dd70a0c6c7455660bc]*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006551{
6552 PyObject *reduce_value;
6553 PyObject *constructor_args;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006554 PyObject *contents = _pickle_UnpicklerMemoProxy_copy(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006555 if (contents == NULL)
6556 return NULL;
6557
6558 reduce_value = PyTuple_New(2);
6559 if (reduce_value == NULL) {
6560 Py_DECREF(contents);
6561 return NULL;
6562 }
6563 constructor_args = PyTuple_New(1);
6564 if (constructor_args == NULL) {
6565 Py_DECREF(contents);
6566 Py_DECREF(reduce_value);
6567 return NULL;
6568 }
6569 PyTuple_SET_ITEM(constructor_args, 0, contents);
6570 Py_INCREF((PyObject *)&PyDict_Type);
6571 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
6572 PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
6573 return reduce_value;
6574}
6575
6576static PyMethodDef unpicklerproxy_methods[] = {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006577 _PICKLE_UNPICKLERMEMOPROXY_CLEAR_METHODDEF
6578 _PICKLE_UNPICKLERMEMOPROXY_COPY_METHODDEF
6579 _PICKLE_UNPICKLERMEMOPROXY___REDUCE___METHODDEF
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006580 {NULL, NULL} /* sentinel */
6581};
6582
6583static void
6584UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
6585{
6586 PyObject_GC_UnTrack(self);
6587 Py_XDECREF(self->unpickler);
6588 PyObject_GC_Del((PyObject *)self);
6589}
6590
6591static int
6592UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
6593 visitproc visit, void *arg)
6594{
6595 Py_VISIT(self->unpickler);
6596 return 0;
6597}
6598
6599static int
6600UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
6601{
6602 Py_CLEAR(self->unpickler);
6603 return 0;
6604}
6605
6606static PyTypeObject UnpicklerMemoProxyType = {
6607 PyVarObject_HEAD_INIT(NULL, 0)
6608 "_pickle.UnpicklerMemoProxy", /*tp_name*/
6609 sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/
6610 0,
6611 (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */
6612 0, /* tp_print */
6613 0, /* tp_getattr */
6614 0, /* tp_setattr */
6615 0, /* tp_compare */
6616 0, /* tp_repr */
6617 0, /* tp_as_number */
6618 0, /* tp_as_sequence */
6619 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00006620 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006621 0, /* tp_call */
6622 0, /* tp_str */
6623 PyObject_GenericGetAttr, /* tp_getattro */
6624 PyObject_GenericSetAttr, /* tp_setattro */
6625 0, /* tp_as_buffer */
6626 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
6627 0, /* tp_doc */
6628 (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */
6629 (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */
6630 0, /* tp_richcompare */
6631 0, /* tp_weaklistoffset */
6632 0, /* tp_iter */
6633 0, /* tp_iternext */
6634 unpicklerproxy_methods, /* tp_methods */
6635};
6636
6637static PyObject *
6638UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
6639{
6640 UnpicklerMemoProxyObject *self;
6641
6642 self = PyObject_GC_New(UnpicklerMemoProxyObject,
6643 &UnpicklerMemoProxyType);
6644 if (self == NULL)
6645 return NULL;
6646 Py_INCREF(unpickler);
6647 self->unpickler = unpickler;
6648 PyObject_GC_Track(self);
6649 return (PyObject *)self;
6650}
6651
6652/*****************************************************************************/
6653
6654
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006655static PyObject *
6656Unpickler_get_memo(UnpicklerObject *self)
6657{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006658 return UnpicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006659}
6660
6661static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006662Unpickler_set_memo(UnpicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006663{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006664 PyObject **new_memo;
6665 Py_ssize_t new_memo_size = 0;
6666 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006667
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006668 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006669 PyErr_SetString(PyExc_TypeError,
6670 "attribute deletion is not supported");
6671 return -1;
6672 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006673
6674 if (Py_TYPE(obj) == &UnpicklerMemoProxyType) {
6675 UnpicklerObject *unpickler =
6676 ((UnpicklerMemoProxyObject *)obj)->unpickler;
6677
6678 new_memo_size = unpickler->memo_size;
6679 new_memo = _Unpickler_NewMemo(new_memo_size);
6680 if (new_memo == NULL)
6681 return -1;
6682
6683 for (i = 0; i < new_memo_size; i++) {
6684 Py_XINCREF(unpickler->memo[i]);
6685 new_memo[i] = unpickler->memo[i];
6686 }
6687 }
6688 else if (PyDict_Check(obj)) {
6689 Py_ssize_t i = 0;
6690 PyObject *key, *value;
6691
6692 new_memo_size = PyDict_Size(obj);
6693 new_memo = _Unpickler_NewMemo(new_memo_size);
6694 if (new_memo == NULL)
6695 return -1;
6696
6697 while (PyDict_Next(obj, &i, &key, &value)) {
6698 Py_ssize_t idx;
6699 if (!PyLong_Check(key)) {
6700 PyErr_SetString(PyExc_TypeError,
6701 "memo key must be integers");
6702 goto error;
6703 }
6704 idx = PyLong_AsSsize_t(key);
6705 if (idx == -1 && PyErr_Occurred())
6706 goto error;
Christian Heimesa24b4d22013-07-01 15:17:45 +02006707 if (idx < 0) {
6708 PyErr_SetString(PyExc_ValueError,
Christian Heimes80878792013-07-01 15:23:39 +02006709 "memo key must be positive integers.");
Christian Heimesa24b4d22013-07-01 15:17:45 +02006710 goto error;
6711 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006712 if (_Unpickler_MemoPut(self, idx, value) < 0)
6713 goto error;
6714 }
6715 }
6716 else {
6717 PyErr_Format(PyExc_TypeError,
6718 "'memo' attribute must be an UnpicklerMemoProxy object"
6719 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006720 return -1;
6721 }
6722
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006723 _Unpickler_MemoCleanup(self);
6724 self->memo_size = new_memo_size;
6725 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006726
6727 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006728
6729 error:
6730 if (new_memo_size) {
6731 i = new_memo_size;
6732 while (--i >= 0) {
6733 Py_XDECREF(new_memo[i]);
6734 }
6735 PyMem_FREE(new_memo);
6736 }
6737 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006738}
6739
6740static PyObject *
6741Unpickler_get_persload(UnpicklerObject *self)
6742{
6743 if (self->pers_func == NULL)
6744 PyErr_SetString(PyExc_AttributeError, "persistent_load");
6745 else
6746 Py_INCREF(self->pers_func);
6747 return self->pers_func;
6748}
6749
6750static int
6751Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
6752{
6753 PyObject *tmp;
6754
6755 if (value == NULL) {
6756 PyErr_SetString(PyExc_TypeError,
6757 "attribute deletion is not supported");
6758 return -1;
6759 }
6760 if (!PyCallable_Check(value)) {
6761 PyErr_SetString(PyExc_TypeError,
6762 "persistent_load must be a callable taking "
6763 "one argument");
6764 return -1;
6765 }
6766
6767 tmp = self->pers_func;
6768 Py_INCREF(value);
6769 self->pers_func = value;
6770 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
6771
6772 return 0;
6773}
6774
6775static PyGetSetDef Unpickler_getsets[] = {
6776 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
6777 {"persistent_load", (getter)Unpickler_get_persload,
6778 (setter)Unpickler_set_persload},
6779 {NULL}
6780};
6781
6782static PyTypeObject Unpickler_Type = {
6783 PyVarObject_HEAD_INIT(NULL, 0)
6784 "_pickle.Unpickler", /*tp_name*/
6785 sizeof(UnpicklerObject), /*tp_basicsize*/
6786 0, /*tp_itemsize*/
6787 (destructor)Unpickler_dealloc, /*tp_dealloc*/
6788 0, /*tp_print*/
6789 0, /*tp_getattr*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006790 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00006791 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006792 0, /*tp_repr*/
6793 0, /*tp_as_number*/
6794 0, /*tp_as_sequence*/
6795 0, /*tp_as_mapping*/
6796 0, /*tp_hash*/
6797 0, /*tp_call*/
6798 0, /*tp_str*/
6799 0, /*tp_getattro*/
6800 0, /*tp_setattro*/
6801 0, /*tp_as_buffer*/
6802 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006803 _pickle_Unpickler___init____doc__, /*tp_doc*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006804 (traverseproc)Unpickler_traverse, /*tp_traverse*/
6805 (inquiry)Unpickler_clear, /*tp_clear*/
6806 0, /*tp_richcompare*/
6807 0, /*tp_weaklistoffset*/
6808 0, /*tp_iter*/
6809 0, /*tp_iternext*/
6810 Unpickler_methods, /*tp_methods*/
6811 0, /*tp_members*/
6812 Unpickler_getsets, /*tp_getset*/
6813 0, /*tp_base*/
6814 0, /*tp_dict*/
6815 0, /*tp_descr_get*/
6816 0, /*tp_descr_set*/
6817 0, /*tp_dictoffset*/
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006818 Unpickler_init, /*tp_init*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006819 PyType_GenericAlloc, /*tp_alloc*/
6820 PyType_GenericNew, /*tp_new*/
6821 PyObject_GC_Del, /*tp_free*/
6822 0, /*tp_is_gc*/
6823};
6824
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006825/*[clinic]
6826
6827_pickle.dump
6828
6829 obj: object
6830 file: object
6831 protocol: object = NULL
6832 *
6833 fix_imports: bool = True
6834
6835Write a pickled representation of obj to the open file object file.
6836
6837This is equivalent to ``Pickler(file, protocol).dump(obj)``, but may be more
6838efficient.
6839
6840The optional protocol argument tells the pickler to use the given protocol
6841supported protocols are 0, 1, 2, 3. The default protocol is 3; a
6842backward-incompatible protocol designed for Python 3.0.
6843
6844Specifying a negative protocol version selects the highest protocol version
6845supported. The higher the protocol used, the more recent the version of
6846Python needed to read the pickle produced.
6847
6848The file argument must have a write() method that accepts a single bytes
6849argument. It can thus be a file object opened for binary writing, a
6850io.BytesIO instance, or any other custom object that meets this interface.
6851
6852If fix_imports is True and protocol is less than 3, pickle will try to
6853map the new Python 3.x names to the old module names used in Python 2.x,
6854so that the pickle data stream is readable with Python 2.x.
6855[clinic]*/
6856
6857PyDoc_STRVAR(_pickle_dump__doc__,
6858"dump(obj, file, protocol=None, *, fix_imports=True)\n"
6859"Write a pickled representation of obj to the open file object file.\n"
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006860"\n"
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006861"This is equivalent to ``Pickler(file, protocol).dump(obj)``, but may be more\n"
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006862"efficient.\n"
6863"\n"
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006864"The optional protocol argument tells the pickler to use the given protocol\n"
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006865"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
6866"backward-incompatible protocol designed for Python 3.0.\n"
6867"\n"
6868"Specifying a negative protocol version selects the highest protocol version\n"
6869"supported. The higher the protocol used, the more recent the version of\n"
6870"Python needed to read the pickle produced.\n"
6871"\n"
6872"The file argument must have a write() method that accepts a single bytes\n"
6873"argument. It can thus be a file object opened for binary writing, a\n"
6874"io.BytesIO instance, or any other custom object that meets this interface.\n"
6875"\n"
6876"If fix_imports is True and protocol is less than 3, pickle will try to\n"
6877"map the new Python 3.x names to the old module names used in Python 2.x,\n"
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006878"so that the pickle data stream is readable with Python 2.x.");
6879
6880#define _PICKLE_DUMP_METHODDEF \
6881 {"dump", (PyCFunction)_pickle_dump, METH_VARARGS|METH_KEYWORDS, _pickle_dump__doc__},
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006882
6883static PyObject *
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006884_pickle_dump_impl(PyModuleDef *module, PyObject *obj, PyObject *file, PyObject *protocol, int fix_imports);
6885
6886static PyObject *
6887_pickle_dump(PyModuleDef *module, PyObject *args, PyObject *kwargs)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006888{
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006889 PyObject *return_value = NULL;
6890 static char *_keywords[] = {"obj", "file", "protocol", "fix_imports", NULL};
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006891 PyObject *obj;
6892 PyObject *file;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006893 PyObject *protocol = NULL;
6894 int fix_imports = 1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006895
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006896 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
6897 "OO|O$p:dump", _keywords,
6898 &obj, &file, &protocol, &fix_imports))
6899 goto exit;
6900 return_value = _pickle_dump_impl(module, obj, file, protocol, fix_imports);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006901
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006902exit:
6903 return return_value;
6904}
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006905
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006906static PyObject *
6907_pickle_dump_impl(PyModuleDef *module, PyObject *obj, PyObject *file, PyObject *protocol, int fix_imports)
6908/*[clinic checksum: e442721b16052d921b5e3fbd146d0a62e94a459e]*/
6909{
6910 PicklerObject *pickler = _Pickler_New();
6911
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006912 if (pickler == NULL)
6913 return NULL;
6914
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006915 if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006916 goto error;
6917
6918 if (_Pickler_SetOutputStream(pickler, file) < 0)
6919 goto error;
6920
6921 if (dump(pickler, obj) < 0)
6922 goto error;
6923
6924 if (_Pickler_FlushToFile(pickler) < 0)
6925 goto error;
6926
6927 Py_DECREF(pickler);
6928 Py_RETURN_NONE;
6929
6930 error:
6931 Py_XDECREF(pickler);
6932 return NULL;
6933}
6934
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006935/*[clinic]
6936
6937_pickle.dumps
6938
6939 obj: object
6940 protocol: object = NULL
6941 *
6942 fix_imports: bool = True
6943
6944Return the pickled representation of the object as a bytes object.
6945
6946The optional protocol argument tells the pickler to use the given protocol;
6947supported protocols are 0, 1, 2, 3. The default protocol is 3; a
6948backward-incompatible protocol designed for Python 3.0.
6949
6950Specifying a negative protocol version selects the highest protocol version
6951supported. The higher the protocol used, the more recent the version of
6952Python needed to read the pickle produced.
6953
6954If fix_imports is True and *protocol* is less than 3, pickle will try to
6955map the new Python 3.x names to the old module names used in Python 2.x,
6956so that the pickle data stream is readable with Python 2.x.
6957[clinic]*/
6958
6959PyDoc_STRVAR(_pickle_dumps__doc__,
6960"dumps(obj, protocol=None, *, fix_imports=True)\n"
6961"Return the pickled representation of the object as a bytes object.\n"
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006962"\n"
6963"The optional protocol argument tells the pickler to use the given protocol;\n"
6964"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
6965"backward-incompatible protocol designed for Python 3.0.\n"
6966"\n"
6967"Specifying a negative protocol version selects the highest protocol version\n"
6968"supported. The higher the protocol used, the more recent the version of\n"
6969"Python needed to read the pickle produced.\n"
6970"\n"
6971"If fix_imports is True and *protocol* is less than 3, pickle will try to\n"
6972"map the new Python 3.x names to the old module names used in Python 2.x,\n"
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006973"so that the pickle data stream is readable with Python 2.x.");
6974
6975#define _PICKLE_DUMPS_METHODDEF \
6976 {"dumps", (PyCFunction)_pickle_dumps, METH_VARARGS|METH_KEYWORDS, _pickle_dumps__doc__},
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006977
6978static PyObject *
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006979_pickle_dumps_impl(PyModuleDef *module, PyObject *obj, PyObject *protocol, int fix_imports);
6980
6981static PyObject *
6982_pickle_dumps(PyModuleDef *module, PyObject *args, PyObject *kwargs)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006983{
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006984 PyObject *return_value = NULL;
6985 static char *_keywords[] = {"obj", "protocol", "fix_imports", NULL};
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006986 PyObject *obj;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08006987 PyObject *protocol = NULL;
6988 int fix_imports = 1;
6989
6990 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
6991 "O|O$p:dumps", _keywords,
6992 &obj, &protocol, &fix_imports))
6993 goto exit;
6994 return_value = _pickle_dumps_impl(module, obj, protocol, fix_imports);
6995
6996exit:
6997 return return_value;
6998}
6999
7000static PyObject *
7001_pickle_dumps_impl(PyModuleDef *module, PyObject *obj, PyObject *protocol, int fix_imports)
7002/*[clinic checksum: df6262c4c487f537f47aec8a1709318204c1e174]*/
7003{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007004 PyObject *result;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007005 PicklerObject *pickler = _Pickler_New();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007006
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007007 if (pickler == NULL)
7008 return NULL;
7009
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007010 if (_Pickler_SetProtocol(pickler, protocol, fix_imports) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007011 goto error;
7012
7013 if (dump(pickler, obj) < 0)
7014 goto error;
7015
7016 result = _Pickler_GetString(pickler);
7017 Py_DECREF(pickler);
7018 return result;
7019
7020 error:
7021 Py_XDECREF(pickler);
7022 return NULL;
7023}
7024
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007025/*[clinic]
7026
7027_pickle.load
7028
7029 file: object
7030 *
7031 fix_imports: bool = True
7032 encoding: str = 'ASCII'
7033 errors: str = 'strict'
7034
7035Return a reconstituted object from the pickle data stored in a file.
7036
7037This is equivalent to ``Unpickler(file).load()``, but may be more efficient.
7038
7039The protocol version of the pickle is detected automatically, so no protocol
7040argument is needed. Bytes past the pickled object's representation are
7041ignored.
7042
7043The argument file must have two methods, a read() method that takes an
7044integer argument, and a readline() method that requires no arguments. Both
7045methods should return bytes. Thus *file* can be a binary file object opened
7046for reading, a BytesIO object, or any other custom object that meets this
7047interface.
7048
7049Optional keyword arguments are fix_imports, encoding and errors,
7050which are used to control compatiblity support for pickle stream generated
7051by Python 2.x. If fix_imports is True, pickle will try to map the old
7052Python 2.x names to the new names used in Python 3.x. The encoding and
7053errors tell pickle how to decode 8-bit string instances pickled by Python
70542.x; these default to 'ASCII' and 'strict', respectively.
7055[clinic]*/
7056
7057PyDoc_STRVAR(_pickle_load__doc__,
7058"load(file, *, fix_imports=True, encoding=\'ASCII\', errors=\'strict\')\n"
7059"Return a reconstituted object from the pickle data stored in a file.\n"
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007060"\n"
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007061"This is equivalent to ``Unpickler(file).load()``, but may be more efficient.\n"
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007062"\n"
7063"The protocol version of the pickle is detected automatically, so no protocol\n"
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007064"argument is needed. Bytes past the pickled object\'s representation are\n"
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007065"ignored.\n"
7066"\n"
7067"The argument file must have two methods, a read() method that takes an\n"
7068"integer argument, and a readline() method that requires no arguments. Both\n"
7069"methods should return bytes. Thus *file* can be a binary file object opened\n"
7070"for reading, a BytesIO object, or any other custom object that meets this\n"
7071"interface.\n"
7072"\n"
7073"Optional keyword arguments are fix_imports, encoding and errors,\n"
7074"which are used to control compatiblity support for pickle stream generated\n"
7075"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
7076"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
7077"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007078"2.x; these default to \'ASCII\' and \'strict\', respectively.");
7079
7080#define _PICKLE_LOAD_METHODDEF \
7081 {"load", (PyCFunction)_pickle_load, METH_VARARGS|METH_KEYWORDS, _pickle_load__doc__},
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007082
7083static PyObject *
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007084_pickle_load_impl(PyModuleDef *module, PyObject *file, int fix_imports, const char *encoding, const char *errors);
7085
7086static PyObject *
7087_pickle_load(PyModuleDef *module, PyObject *args, PyObject *kwargs)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007088{
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007089 PyObject *return_value = NULL;
7090 static char *_keywords[] = {"file", "fix_imports", "encoding", "errors", NULL};
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007091 PyObject *file;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007092 int fix_imports = 1;
7093 const char *encoding = "ASCII";
7094 const char *errors = "strict";
7095
7096 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
7097 "O|$pss:load", _keywords,
7098 &file, &fix_imports, &encoding, &errors))
7099 goto exit;
7100 return_value = _pickle_load_impl(module, file, fix_imports, encoding, errors);
7101
7102exit:
7103 return return_value;
7104}
7105
7106static PyObject *
7107_pickle_load_impl(PyModuleDef *module, PyObject *file, int fix_imports, const char *encoding, const char *errors)
7108/*[clinic checksum: e10796f6765b22ce48dca6940f11b3933853ca35]*/
7109{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007110 PyObject *result;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007111 UnpicklerObject *unpickler = _Unpickler_New();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007112
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007113 if (unpickler == NULL)
7114 return NULL;
7115
7116 if (_Unpickler_SetInputStream(unpickler, file) < 0)
7117 goto error;
7118
7119 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7120 goto error;
7121
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007122 unpickler->fix_imports = fix_imports;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007123
7124 result = load(unpickler);
7125 Py_DECREF(unpickler);
7126 return result;
7127
7128 error:
7129 Py_XDECREF(unpickler);
7130 return NULL;
7131}
7132
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007133/*[clinic]
7134
7135_pickle.loads
7136
7137 data: object
7138 *
7139 fix_imports: bool = True
7140 encoding: str = 'ASCII'
7141 errors: str = 'strict'
7142
7143Return a reconstituted object from the given pickle data.
7144
7145The protocol version of the pickle is detected automatically, so no protocol
7146argument is needed. Bytes past the pickled object's representation are
7147ignored.
7148
7149Optional keyword arguments are fix_imports, encoding and errors, which
7150are used to control compatiblity support for pickle stream generated
7151by Python 2.x. If fix_imports is True, pickle will try to map the old
7152Python 2.x names to the new names used in Python 3.x. The encoding and
7153errors tell pickle how to decode 8-bit string instances pickled by Python
71542.x; these default to 'ASCII' and 'strict', respectively.
7155[clinic]*/
7156
7157PyDoc_STRVAR(_pickle_loads__doc__,
7158"loads(data, *, fix_imports=True, encoding=\'ASCII\', errors=\'strict\')\n"
7159"Return a reconstituted object from the given pickle data.\n"
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007160"\n"
7161"The protocol version of the pickle is detected automatically, so no protocol\n"
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007162"argument is needed. Bytes past the pickled object\'s representation are\n"
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007163"ignored.\n"
7164"\n"
7165"Optional keyword arguments are fix_imports, encoding and errors, which\n"
7166"are used to control compatiblity support for pickle stream generated\n"
7167"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
7168"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
7169"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007170"2.x; these default to \'ASCII\' and \'strict\', respectively.");
7171
7172#define _PICKLE_LOADS_METHODDEF \
7173 {"loads", (PyCFunction)_pickle_loads, METH_VARARGS|METH_KEYWORDS, _pickle_loads__doc__},
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007174
7175static PyObject *
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007176_pickle_loads_impl(PyModuleDef *module, PyObject *data, int fix_imports, const char *encoding, const char *errors);
7177
7178static PyObject *
7179_pickle_loads(PyModuleDef *module, PyObject *args, PyObject *kwargs)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007180{
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007181 PyObject *return_value = NULL;
7182 static char *_keywords[] = {"data", "fix_imports", "encoding", "errors", NULL};
7183 PyObject *data;
7184 int fix_imports = 1;
7185 const char *encoding = "ASCII";
7186 const char *errors = "strict";
7187
7188 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
7189 "O|$pss:loads", _keywords,
7190 &data, &fix_imports, &encoding, &errors))
7191 goto exit;
7192 return_value = _pickle_loads_impl(module, data, fix_imports, encoding, errors);
7193
7194exit:
7195 return return_value;
7196}
7197
7198static PyObject *
7199_pickle_loads_impl(PyModuleDef *module, PyObject *data, int fix_imports, const char *encoding, const char *errors)
7200/*[clinic checksum: 29ee725efcbf51a3533c19cb8261a8e267b7080a]*/
7201{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007202 PyObject *result;
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007203 UnpicklerObject *unpickler = _Unpickler_New();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007204
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007205 if (unpickler == NULL)
7206 return NULL;
7207
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007208 if (_Unpickler_SetStringInput(unpickler, data) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007209 goto error;
7210
7211 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
7212 goto error;
7213
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007214 unpickler->fix_imports = fix_imports;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007215
7216 result = load(unpickler);
7217 Py_DECREF(unpickler);
7218 return result;
7219
7220 error:
7221 Py_XDECREF(unpickler);
7222 return NULL;
7223}
7224
7225
7226static struct PyMethodDef pickle_methods[] = {
Alexandre Vassalottied8c9062013-11-24 12:25:48 -08007227 _PICKLE_DUMP_METHODDEF
7228 _PICKLE_DUMPS_METHODDEF
7229 _PICKLE_LOAD_METHODDEF
7230 _PICKLE_LOADS_METHODDEF
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007231 {NULL, NULL} /* sentinel */
7232};
7233
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007234static int
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007235initmodule(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007236{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007237 PyObject *copyreg = NULL;
7238 PyObject *compat_pickle = NULL;
7239
7240 /* XXX: We should ensure that the types of the dictionaries imported are
7241 exactly PyDict objects. Otherwise, it is possible to crash the pickle
7242 since we use the PyDict API directly to access these dictionaries. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007243
7244 copyreg = PyImport_ImportModule("copyreg");
7245 if (!copyreg)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007246 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007247 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
7248 if (!dispatch_table)
7249 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007250 extension_registry = \
7251 PyObject_GetAttrString(copyreg, "_extension_registry");
7252 if (!extension_registry)
7253 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007254 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
7255 if (!inverted_registry)
7256 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007257 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
7258 if (!extension_cache)
7259 goto error;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007260 Py_CLEAR(copyreg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007261
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007262 /* Load the 2.x -> 3.x stdlib module mapping tables */
7263 compat_pickle = PyImport_ImportModule("_compat_pickle");
7264 if (!compat_pickle)
7265 goto error;
7266 name_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
7267 if (!name_mapping_2to3)
7268 goto error;
7269 if (!PyDict_CheckExact(name_mapping_2to3)) {
7270 PyErr_Format(PyExc_RuntimeError,
7271 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
7272 Py_TYPE(name_mapping_2to3)->tp_name);
7273 goto error;
7274 }
7275 import_mapping_2to3 = PyObject_GetAttrString(compat_pickle,
7276 "IMPORT_MAPPING");
7277 if (!import_mapping_2to3)
7278 goto error;
7279 if (!PyDict_CheckExact(import_mapping_2to3)) {
7280 PyErr_Format(PyExc_RuntimeError,
7281 "_compat_pickle.IMPORT_MAPPING should be a dict, "
7282 "not %.200s", Py_TYPE(import_mapping_2to3)->tp_name);
7283 goto error;
7284 }
7285 /* ... and the 3.x -> 2.x mapping tables */
7286 name_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
7287 "REVERSE_NAME_MAPPING");
7288 if (!name_mapping_3to2)
7289 goto error;
7290 if (!PyDict_CheckExact(name_mapping_3to2)) {
7291 PyErr_Format(PyExc_RuntimeError,
Ezio Melotti13925002011-03-16 11:05:33 +02007292 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007293 "not %.200s", Py_TYPE(name_mapping_3to2)->tp_name);
7294 goto error;
7295 }
7296 import_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
7297 "REVERSE_IMPORT_MAPPING");
7298 if (!import_mapping_3to2)
7299 goto error;
7300 if (!PyDict_CheckExact(import_mapping_3to2)) {
7301 PyErr_Format(PyExc_RuntimeError,
7302 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
7303 "not %.200s", Py_TYPE(import_mapping_3to2)->tp_name);
7304 goto error;
7305 }
7306 Py_CLEAR(compat_pickle);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007307
7308 empty_tuple = PyTuple_New(0);
7309 if (empty_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007310 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007311 two_tuple = PyTuple_New(2);
7312 if (two_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007313 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007314 /* We use this temp container with no regard to refcounts, or to
7315 * keeping containees alive. Exempt from GC, because we don't
7316 * want anything looking at two_tuple() by magic.
7317 */
7318 PyObject_GC_UnTrack(two_tuple);
7319
7320 return 0;
7321
7322 error:
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007323 Py_CLEAR(copyreg);
7324 Py_CLEAR(dispatch_table);
7325 Py_CLEAR(extension_registry);
7326 Py_CLEAR(inverted_registry);
7327 Py_CLEAR(extension_cache);
7328 Py_CLEAR(compat_pickle);
7329 Py_CLEAR(name_mapping_2to3);
7330 Py_CLEAR(import_mapping_2to3);
7331 Py_CLEAR(name_mapping_3to2);
7332 Py_CLEAR(import_mapping_3to2);
7333 Py_CLEAR(empty_tuple);
7334 Py_CLEAR(two_tuple);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007335 return -1;
7336}
7337
7338static struct PyModuleDef _picklemodule = {
7339 PyModuleDef_HEAD_INIT,
7340 "_pickle",
7341 pickle_module_doc,
7342 -1,
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007343 pickle_methods,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007344 NULL,
7345 NULL,
7346 NULL,
7347 NULL
7348};
7349
7350PyMODINIT_FUNC
7351PyInit__pickle(void)
7352{
7353 PyObject *m;
7354
7355 if (PyType_Ready(&Unpickler_Type) < 0)
7356 return NULL;
7357 if (PyType_Ready(&Pickler_Type) < 0)
7358 return NULL;
7359 if (PyType_Ready(&Pdata_Type) < 0)
7360 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007361 if (PyType_Ready(&PicklerMemoProxyType) < 0)
7362 return NULL;
7363 if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
7364 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007365
7366 /* Create the module and add the functions. */
7367 m = PyModule_Create(&_picklemodule);
7368 if (m == NULL)
7369 return NULL;
7370
Antoine Pitrou8391cf42011-07-15 21:01:21 +02007371 Py_INCREF(&Pickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007372 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
7373 return NULL;
Antoine Pitrou8391cf42011-07-15 21:01:21 +02007374 Py_INCREF(&Unpickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007375 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
7376 return NULL;
7377
7378 /* Initialize the exceptions. */
7379 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
7380 if (PickleError == NULL)
7381 return NULL;
7382 PicklingError = \
7383 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
7384 if (PicklingError == NULL)
7385 return NULL;
7386 UnpicklingError = \
7387 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
7388 if (UnpicklingError == NULL)
7389 return NULL;
7390
7391 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
7392 return NULL;
7393 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
7394 return NULL;
7395 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
7396 return NULL;
7397
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007398 if (initmodule() < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007399 return NULL;
7400
7401 return m;
7402}