blob: 741cb8a7a2e93b3b3973359b0bf4985dea6fd5fd [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01009 HIGHEST_PROTOCOL = 4,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000010 DEFAULT_PROTOCOL = 3
11};
12
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000013/* Pickle opcodes. These must be kept updated with pickle.py.
14 Extensive docs are in pickletools.py. */
15enum opcode {
16 MARK = '(',
17 STOP = '.',
18 POP = '0',
19 POP_MARK = '1',
20 DUP = '2',
21 FLOAT = 'F',
22 INT = 'I',
23 BININT = 'J',
24 BININT1 = 'K',
25 LONG = 'L',
26 BININT2 = 'M',
27 NONE = 'N',
28 PERSID = 'P',
29 BINPERSID = 'Q',
30 REDUCE = 'R',
31 STRING = 'S',
32 BINSTRING = 'T',
33 SHORT_BINSTRING = 'U',
34 UNICODE = 'V',
35 BINUNICODE = 'X',
36 APPEND = 'a',
37 BUILD = 'b',
38 GLOBAL = 'c',
39 DICT = 'd',
40 EMPTY_DICT = '}',
41 APPENDS = 'e',
42 GET = 'g',
43 BINGET = 'h',
44 INST = 'i',
45 LONG_BINGET = 'j',
46 LIST = 'l',
47 EMPTY_LIST = ']',
48 OBJ = 'o',
49 PUT = 'p',
50 BINPUT = 'q',
51 LONG_BINPUT = 'r',
52 SETITEM = 's',
53 TUPLE = 't',
54 EMPTY_TUPLE = ')',
55 SETITEMS = 'u',
56 BINFLOAT = 'G',
57
58 /* Protocol 2. */
59 PROTO = '\x80',
60 NEWOBJ = '\x81',
61 EXT1 = '\x82',
62 EXT2 = '\x83',
63 EXT4 = '\x84',
64 TUPLE1 = '\x85',
65 TUPLE2 = '\x86',
66 TUPLE3 = '\x87',
67 NEWTRUE = '\x88',
68 NEWFALSE = '\x89',
69 LONG1 = '\x8a',
70 LONG4 = '\x8b',
71
72 /* Protocol 3 (Python 3.x) */
73 BINBYTES = 'B',
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +010074 SHORT_BINBYTES = 'C',
75
76 /* Protocol 4 */
77 SHORT_BINUNICODE = '\x8c',
78 BINUNICODE8 = '\x8d',
79 BINBYTES8 = '\x8e',
80 EMPTY_SET = '\x8f',
81 ADDITEMS = '\x90',
82 FROZENSET = '\x91',
83 NEWOBJ_EX = '\x92',
84 STACK_GLOBAL = '\x93',
85 MEMOIZE = '\x94',
86 FRAME = '\x95'
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000087};
88
89/* These aren't opcodes -- they're ways to pickle bools before protocol 2
90 * so that unpicklers written before bools were introduced unpickle them
91 * as ints, but unpicklers after can recognize that bools were intended.
92 * Note that protocol 2 added direct ways to pickle bools.
93 */
94#undef TRUE
95#define TRUE "I01\n"
96#undef FALSE
97#define FALSE "I00\n"
98
99enum {
100 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
101 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
102 break if this gets out of synch with pickle.py, but it's unclear that would
103 help anything either. */
104 BATCHSIZE = 1000,
105
106 /* Nesting limit until Pickler, when running in "fast mode", starts
107 checking for self-referential data-structures. */
108 FAST_NESTING_LIMIT = 50,
109
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000110 /* Initial size of the write buffer of Pickler. */
111 WRITE_BUF_SIZE = 4096,
112
Antoine Pitrou04248a82010-10-12 20:51:21 +0000113 /* Prefetch size when unpickling (disabled on unpeekable streams) */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100114 PREFETCH = 8192 * 16,
115
116 FRAME_SIZE_TARGET = 64 * 1024,
117
118 FRAME_HEADER_SIZE = 9
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000119};
120
121/* Exception classes for pickle. These should override the ones defined in
122 pickle.py, when the C-optimized Pickler and Unpickler are used. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000123static PyObject *PickleError = NULL;
124static PyObject *PicklingError = NULL;
125static PyObject *UnpicklingError = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000126
127/* copyreg.dispatch_table, {type_object: pickling_function} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000128static PyObject *dispatch_table = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000129/* For EXT[124] opcodes. */
130/* copyreg._extension_registry, {(module_name, function_name): code} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000131static PyObject *extension_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000132/* copyreg._inverted_registry, {code: (module_name, function_name)} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000133static PyObject *inverted_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000134/* copyreg._extension_cache, {code: object} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000135static PyObject *extension_cache = NULL;
136
137/* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */
138static PyObject *name_mapping_2to3 = NULL;
139/* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
140static PyObject *import_mapping_2to3 = NULL;
141/* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
142static PyObject *name_mapping_3to2 = NULL;
143static PyObject *import_mapping_3to2 = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000144
145/* XXX: Are these really nescessary? */
146/* As the name says, an empty tuple. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000147static PyObject *empty_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000148/* For looking up name pairs in copyreg._extension_registry. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000149static PyObject *two_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000150
151static int
152stack_underflow(void)
153{
154 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
155 return -1;
156}
157
158/* Internal data type used as the unpickling stack. */
159typedef struct {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000160 PyObject_VAR_HEAD
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000161 PyObject **data;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000162 Py_ssize_t allocated; /* number of slots in data allocated */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000163} Pdata;
164
165static void
166Pdata_dealloc(Pdata *self)
167{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200168 Py_ssize_t i = Py_SIZE(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000169 while (--i >= 0) {
170 Py_DECREF(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000171 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000172 PyMem_FREE(self->data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000173 PyObject_Del(self);
174}
175
176static PyTypeObject Pdata_Type = {
177 PyVarObject_HEAD_INIT(NULL, 0)
178 "_pickle.Pdata", /*tp_name*/
179 sizeof(Pdata), /*tp_basicsize*/
180 0, /*tp_itemsize*/
181 (destructor)Pdata_dealloc, /*tp_dealloc*/
182};
183
184static PyObject *
185Pdata_New(void)
186{
187 Pdata *self;
188
189 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
190 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000191 Py_SIZE(self) = 0;
192 self->allocated = 8;
193 self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000194 if (self->data)
195 return (PyObject *)self;
196 Py_DECREF(self);
197 return PyErr_NoMemory();
198}
199
200
201/* Retain only the initial clearto items. If clearto >= the current
202 * number of items, this is a (non-erroneous) NOP.
203 */
204static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200205Pdata_clear(Pdata *self, Py_ssize_t clearto)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000206{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200207 Py_ssize_t i = Py_SIZE(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000208
209 if (clearto < 0)
210 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000211 if (clearto >= i)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000212 return 0;
213
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000214 while (--i >= clearto) {
215 Py_CLEAR(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000216 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000217 Py_SIZE(self) = clearto;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000218 return 0;
219}
220
221static int
222Pdata_grow(Pdata *self)
223{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000224 PyObject **data = self->data;
225 Py_ssize_t allocated = self->allocated;
226 Py_ssize_t new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000227
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000228 new_allocated = (allocated >> 3) + 6;
229 /* check for integer overflow */
230 if (new_allocated > PY_SSIZE_T_MAX - allocated)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000231 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000232 new_allocated += allocated;
233 if (new_allocated > (PY_SSIZE_T_MAX / sizeof(PyObject *)))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000234 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000235 data = PyMem_REALLOC(data, new_allocated * sizeof(PyObject *));
236 if (data == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000237 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000238
239 self->data = data;
240 self->allocated = new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000241 return 0;
242
243 nomemory:
244 PyErr_NoMemory();
245 return -1;
246}
247
248/* D is a Pdata*. Pop the topmost element and store it into V, which
249 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
250 * is raised and V is set to NULL.
251 */
252static PyObject *
253Pdata_pop(Pdata *self)
254{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000255 if (Py_SIZE(self) == 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000256 PyErr_SetString(UnpicklingError, "bad pickle data");
257 return NULL;
258 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000259 return self->data[--Py_SIZE(self)];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000260}
261#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
262
263static int
264Pdata_push(Pdata *self, PyObject *obj)
265{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000266 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000267 return -1;
268 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000269 self->data[Py_SIZE(self)++] = obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000270 return 0;
271}
272
273/* Push an object on stack, transferring its ownership to the stack. */
274#define PDATA_PUSH(D, O, ER) do { \
275 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
276
277/* Push an object on stack, adding a new reference to the object. */
278#define PDATA_APPEND(D, O, ER) do { \
279 Py_INCREF((O)); \
280 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
281
282static PyObject *
283Pdata_poptuple(Pdata *self, Py_ssize_t start)
284{
285 PyObject *tuple;
286 Py_ssize_t len, i, j;
287
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000288 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000289 tuple = PyTuple_New(len);
290 if (tuple == NULL)
291 return NULL;
292 for (i = start, j = 0; j < len; i++, j++)
293 PyTuple_SET_ITEM(tuple, j, self->data[i]);
294
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000295 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000296 return tuple;
297}
298
299static PyObject *
300Pdata_poplist(Pdata *self, Py_ssize_t start)
301{
302 PyObject *list;
303 Py_ssize_t len, i, j;
304
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000305 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000306 list = PyList_New(len);
307 if (list == NULL)
308 return NULL;
309 for (i = start, j = 0; j < len; i++, j++)
310 PyList_SET_ITEM(list, j, self->data[i]);
311
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000312 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000313 return list;
314}
315
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000316typedef struct {
317 PyObject *me_key;
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200318 Py_ssize_t me_value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000319} PyMemoEntry;
320
321typedef struct {
322 Py_ssize_t mt_mask;
323 Py_ssize_t mt_used;
324 Py_ssize_t mt_allocated;
325 PyMemoEntry *mt_table;
326} PyMemoTable;
327
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000328typedef struct PicklerObject {
329 PyObject_HEAD
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000330 PyMemoTable *memo; /* Memo table, keep track of the seen
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000331 objects to support self-referential objects
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000332 pickling. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000333 PyObject *pers_func; /* persistent_id() method, can be NULL */
Antoine Pitrou8d3c2902012-03-04 18:31:48 +0100334 PyObject *dispatch_table; /* private dispatch_table, can be NULL */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000335 PyObject *arg;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000336
337 PyObject *write; /* write() method of the output stream. */
338 PyObject *output_buffer; /* Write into a local bytearray buffer before
339 flushing to the stream. */
340 Py_ssize_t output_len; /* Length of output_buffer. */
341 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000342 int proto; /* Pickle protocol number, >= 0 */
343 int bin; /* Boolean, true if proto > 0 */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100344 int framing; /* True when framing is enabled, proto >= 4 */
345 Py_ssize_t frame_start; /* Position in output_buffer where the
346 where the current frame begins. -1 if there
347 is no frame currently open. */
348
349 Py_ssize_t buf_size; /* Size of the current buffered pickle data */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000350 int fast; /* Enable fast mode if set to a true value.
351 The fast mode disable the usage of memo,
352 therefore speeding the pickling process by
353 not generating superfluous PUT opcodes. It
354 should not be used if with self-referential
355 objects. */
356 int fast_nesting;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000357 int fix_imports; /* Indicate whether Pickler should fix
358 the name of globals for Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000359 PyObject *fast_memo;
360} PicklerObject;
361
362typedef struct UnpicklerObject {
363 PyObject_HEAD
364 Pdata *stack; /* Pickle data stack, store unpickled objects. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000365
366 /* The unpickler memo is just an array of PyObject *s. Using a dict
367 is unnecessary, since the keys are contiguous ints. */
368 PyObject **memo;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100369 Py_ssize_t memo_size; /* Capacity of the memo array */
370 Py_ssize_t memo_len; /* Number of objects in the memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000371
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000372 PyObject *arg;
373 PyObject *pers_func; /* persistent_load() method, can be NULL. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000374
375 Py_buffer buffer;
376 char *input_buffer;
377 char *input_line;
378 Py_ssize_t input_len;
379 Py_ssize_t next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000380 Py_ssize_t prefetched_idx; /* index of first prefetched byte */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100381
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000382 PyObject *read; /* read() method of the input stream. */
383 PyObject *readline; /* readline() method of the input stream. */
Antoine Pitrou04248a82010-10-12 20:51:21 +0000384 PyObject *peek; /* peek() method of the input stream, or NULL */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000385
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000386 char *encoding; /* Name of the encoding to be used for
387 decoding strings pickled using Python
388 2.x. The default value is "ASCII" */
389 char *errors; /* Name of errors handling scheme to used when
390 decoding strings. The default value is
391 "strict". */
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -0500392 Py_ssize_t *marks; /* Mark stack, used for unpickling container
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000393 objects. */
394 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
395 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000396 int proto; /* Protocol of the pickle loaded. */
397 int fix_imports; /* Indicate whether Unpickler should fix
398 the name of globals pickled by Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000399} UnpicklerObject;
400
401/* Forward declarations */
402static int save(PicklerObject *, PyObject *, int);
403static int save_reduce(PicklerObject *, PyObject *, PyObject *);
404static PyTypeObject Pickler_Type;
405static PyTypeObject Unpickler_Type;
406
407
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000408/*************************************************************************
Serhiy Storchaka95949422013-08-27 19:40:23 +0300409 A custom hashtable mapping void* to Python ints. This is used by the pickler
410 for memoization. Using a custom hashtable rather than PyDict allows us to skip
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000411 a bunch of unnecessary object creation. This makes a huge performance
412 difference. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000413
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000414#define MT_MINSIZE 8
415#define PERTURB_SHIFT 5
416
417
418static PyMemoTable *
419PyMemoTable_New(void)
420{
421 PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
422 if (memo == NULL) {
423 PyErr_NoMemory();
424 return NULL;
425 }
426
427 memo->mt_used = 0;
428 memo->mt_allocated = MT_MINSIZE;
429 memo->mt_mask = MT_MINSIZE - 1;
430 memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
431 if (memo->mt_table == NULL) {
432 PyMem_FREE(memo);
433 PyErr_NoMemory();
434 return NULL;
435 }
436 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
437
438 return memo;
439}
440
441static PyMemoTable *
442PyMemoTable_Copy(PyMemoTable *self)
443{
444 Py_ssize_t i;
445 PyMemoTable *new = PyMemoTable_New();
446 if (new == NULL)
447 return NULL;
448
449 new->mt_used = self->mt_used;
450 new->mt_allocated = self->mt_allocated;
451 new->mt_mask = self->mt_mask;
452 /* The table we get from _New() is probably smaller than we wanted.
453 Free it and allocate one that's the right size. */
454 PyMem_FREE(new->mt_table);
455 new->mt_table = PyMem_MALLOC(self->mt_allocated * sizeof(PyMemoEntry));
456 if (new->mt_table == NULL) {
457 PyMem_FREE(new);
Victor Stinner42024562013-07-12 00:53:57 +0200458 PyErr_NoMemory();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000459 return NULL;
460 }
461 for (i = 0; i < self->mt_allocated; i++) {
462 Py_XINCREF(self->mt_table[i].me_key);
463 }
464 memcpy(new->mt_table, self->mt_table,
465 sizeof(PyMemoEntry) * self->mt_allocated);
466
467 return new;
468}
469
470static Py_ssize_t
471PyMemoTable_Size(PyMemoTable *self)
472{
473 return self->mt_used;
474}
475
476static int
477PyMemoTable_Clear(PyMemoTable *self)
478{
479 Py_ssize_t i = self->mt_allocated;
480
481 while (--i >= 0) {
482 Py_XDECREF(self->mt_table[i].me_key);
483 }
484 self->mt_used = 0;
485 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
486 return 0;
487}
488
489static void
490PyMemoTable_Del(PyMemoTable *self)
491{
492 if (self == NULL)
493 return;
494 PyMemoTable_Clear(self);
495
496 PyMem_FREE(self->mt_table);
497 PyMem_FREE(self);
498}
499
500/* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
501 can be considerably simpler than dictobject.c's lookdict(). */
502static PyMemoEntry *
503_PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
504{
505 size_t i;
506 size_t perturb;
507 size_t mask = (size_t)self->mt_mask;
508 PyMemoEntry *table = self->mt_table;
509 PyMemoEntry *entry;
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000510 Py_hash_t hash = (Py_hash_t)key >> 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000511
512 i = hash & mask;
513 entry = &table[i];
514 if (entry->me_key == NULL || entry->me_key == key)
515 return entry;
516
517 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
518 i = (i << 2) + i + perturb + 1;
519 entry = &table[i & mask];
520 if (entry->me_key == NULL || entry->me_key == key)
521 return entry;
522 }
523 assert(0); /* Never reached */
524 return NULL;
525}
526
527/* Returns -1 on failure, 0 on success. */
528static int
529_PyMemoTable_ResizeTable(PyMemoTable *self, Py_ssize_t min_size)
530{
531 PyMemoEntry *oldtable = NULL;
532 PyMemoEntry *oldentry, *newentry;
533 Py_ssize_t new_size = MT_MINSIZE;
534 Py_ssize_t to_process;
535
536 assert(min_size > 0);
537
538 /* Find the smallest valid table size >= min_size. */
539 while (new_size < min_size && new_size > 0)
540 new_size <<= 1;
541 if (new_size <= 0) {
542 PyErr_NoMemory();
543 return -1;
544 }
545 /* new_size needs to be a power of two. */
546 assert((new_size & (new_size - 1)) == 0);
547
548 /* Allocate new table. */
549 oldtable = self->mt_table;
550 self->mt_table = PyMem_MALLOC(new_size * sizeof(PyMemoEntry));
551 if (self->mt_table == NULL) {
Victor Stinner8ca72e22013-07-12 00:53:26 +0200552 self->mt_table = oldtable;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000553 PyErr_NoMemory();
554 return -1;
555 }
556 self->mt_allocated = new_size;
557 self->mt_mask = new_size - 1;
558 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
559
560 /* Copy entries from the old table. */
561 to_process = self->mt_used;
562 for (oldentry = oldtable; to_process > 0; oldentry++) {
563 if (oldentry->me_key != NULL) {
564 to_process--;
565 /* newentry is a pointer to a chunk of the new
566 mt_table, so we're setting the key:value pair
567 in-place. */
568 newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
569 newentry->me_key = oldentry->me_key;
570 newentry->me_value = oldentry->me_value;
571 }
572 }
573
574 /* Deallocate the old table. */
575 PyMem_FREE(oldtable);
576 return 0;
577}
578
579/* Returns NULL on failure, a pointer to the value otherwise. */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200580static Py_ssize_t *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000581PyMemoTable_Get(PyMemoTable *self, PyObject *key)
582{
583 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
584 if (entry->me_key == NULL)
585 return NULL;
586 return &entry->me_value;
587}
588
589/* Returns -1 on failure, 0 on success. */
590static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200591PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000592{
593 PyMemoEntry *entry;
594
595 assert(key != NULL);
596
597 entry = _PyMemoTable_Lookup(self, key);
598 if (entry->me_key != NULL) {
599 entry->me_value = value;
600 return 0;
601 }
602 Py_INCREF(key);
603 entry->me_key = key;
604 entry->me_value = value;
605 self->mt_used++;
606
607 /* If we added a key, we can safely resize. Otherwise just return!
608 * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
609 *
610 * Quadrupling the size improves average table sparseness
611 * (reducing collisions) at the cost of some memory. It also halves
612 * the number of expensive resize operations in a growing memo table.
613 *
614 * Very large memo tables (over 50K items) use doubling instead.
615 * This may help applications with severe memory constraints.
616 */
617 if (!(self->mt_used * 3 >= (self->mt_mask + 1) * 2))
618 return 0;
619 return _PyMemoTable_ResizeTable(self,
620 (self->mt_used > 50000 ? 2 : 4) * self->mt_used);
621}
622
623#undef MT_MINSIZE
624#undef PERTURB_SHIFT
625
626/*************************************************************************/
627
628/* Helpers for creating the argument tuple passed to functions. This has the
Victor Stinner121aab42011-09-29 23:40:53 +0200629 performance advantage of calling PyTuple_New() only once.
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000630
631 XXX(avassalotti): Inline directly in _Pickler_FastCall() and
632 _Unpickler_FastCall(). */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000633#define ARG_TUP(self, obj) do { \
634 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
635 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
636 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
637 } \
638 else { \
639 Py_DECREF((obj)); \
640 } \
641 } while (0)
642
643#define FREE_ARG_TUP(self) do { \
644 if ((self)->arg->ob_refcnt > 1) \
645 Py_CLEAR((self)->arg); \
646 } while (0)
647
648/* A temporary cleaner API for fast single argument function call.
649
650 XXX: Does caching the argument tuple provides any real performance benefits?
651
652 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
653 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
654 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
655 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
656 (i.e, call PyTuple_New() and store the returned value in an array), to save
657 one second (wall clock time). Either ways, the loading time a pickle stream
658 large enough to generate this number of calls would be massively
659 overwhelmed by other factors, like I/O throughput, the GC traversal and
660 object allocation overhead. So, I really doubt these functions provide any
661 real benefits.
662
663 On the other hand, oprofile reports that pickle spends a lot of time in
664 these functions. But, that is probably more related to the function call
665 overhead, than the argument tuple allocation.
666
667 XXX: And, what is the reference behavior of these? Steal, borrow? At first
668 glance, it seems to steal the reference of 'arg' and borrow the reference
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000669 of 'func'. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000670static PyObject *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000671_Pickler_FastCall(PicklerObject *self, PyObject *func, PyObject *arg)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000672{
673 PyObject *result = NULL;
674
675 ARG_TUP(self, arg);
676 if (self->arg) {
677 result = PyObject_Call(func, self->arg, NULL);
678 FREE_ARG_TUP(self);
679 }
680 return result;
681}
682
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000683static int
684_Pickler_ClearBuffer(PicklerObject *self)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000685{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000686 Py_CLEAR(self->output_buffer);
687 self->output_buffer =
688 PyBytes_FromStringAndSize(NULL, self->max_output_len);
689 if (self->output_buffer == NULL)
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000690 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000691 self->output_len = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100692 self->frame_start = -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000693 return 0;
694}
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000695
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100696static void
Antoine Pitrou8f2ee6e2013-11-23 21:05:08 +0100697_write_size64(char *out, size_t value)
698{
699 out[0] = (unsigned char)(value & 0xff);
700 out[1] = (unsigned char)((value >> 8) & 0xff);
701 out[2] = (unsigned char)((value >> 16) & 0xff);
702 out[3] = (unsigned char)((value >> 24) & 0xff);
703#if SIZEOF_SIZE_T >= 8
704 out[4] = (unsigned char)((value >> 32) & 0xff);
705 out[5] = (unsigned char)((value >> 40) & 0xff);
706 out[6] = (unsigned char)((value >> 48) & 0xff);
707 out[7] = (unsigned char)((value >> 56) & 0xff);
708#else
709 out[4] = out[5] = out[6] = out[7] = 0;
710#endif
711}
712
713static void
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100714_Pickler_WriteFrameHeader(PicklerObject *self, char *qdata, size_t frame_len)
715{
Antoine Pitrou8f2ee6e2013-11-23 21:05:08 +0100716 qdata[0] = FRAME;
717 _write_size64(qdata + 1, frame_len);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100718}
719
720static int
721_Pickler_CommitFrame(PicklerObject *self)
722{
723 size_t frame_len;
724 char *qdata;
725
726 if (!self->framing || self->frame_start == -1)
727 return 0;
728 frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
729 qdata = PyBytes_AS_STRING(self->output_buffer) + self->frame_start;
730 _Pickler_WriteFrameHeader(self, qdata, frame_len);
731 self->frame_start = -1;
732 return 0;
733}
734
735static int
736_Pickler_OpcodeBoundary(PicklerObject *self)
737{
738 Py_ssize_t frame_len;
739
740 if (!self->framing || self->frame_start == -1)
741 return 0;
742 frame_len = self->output_len - self->frame_start - FRAME_HEADER_SIZE;
743 if (frame_len >= FRAME_SIZE_TARGET)
744 return _Pickler_CommitFrame(self);
745 else
746 return 0;
747}
748
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000749static PyObject *
750_Pickler_GetString(PicklerObject *self)
751{
752 PyObject *output_buffer = self->output_buffer;
753
754 assert(self->output_buffer != NULL);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100755
756 if (_Pickler_CommitFrame(self))
757 return NULL;
758
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000759 self->output_buffer = NULL;
760 /* Resize down to exact size */
761 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
762 return NULL;
763 return output_buffer;
764}
765
766static int
767_Pickler_FlushToFile(PicklerObject *self)
768{
769 PyObject *output, *result;
770
771 assert(self->write != NULL);
772
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100773 /* This will commit the frame first */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000774 output = _Pickler_GetString(self);
775 if (output == NULL)
776 return -1;
777
778 result = _Pickler_FastCall(self, self->write, output);
779 Py_XDECREF(result);
780 return (result == NULL) ? -1 : 0;
781}
782
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200783static Py_ssize_t
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100784_Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t data_len)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000785{
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100786 Py_ssize_t i, n, required;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000787 char *buffer;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100788 int need_new_frame;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000789
790 assert(s != NULL);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100791 need_new_frame = (self->framing && self->frame_start == -1);
792
793 if (need_new_frame)
794 n = data_len + FRAME_HEADER_SIZE;
795 else
796 n = data_len;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000797
798 required = self->output_len + n;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100799 if (required > self->max_output_len) {
800 /* Make place in buffer for the pickle chunk */
801 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
802 PyErr_NoMemory();
803 return -1;
804 }
805 self->max_output_len = (self->output_len + n) / 2 * 3;
806 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
807 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000808 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000809 buffer = PyBytes_AS_STRING(self->output_buffer);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100810 if (need_new_frame) {
811 /* Setup new frame */
812 Py_ssize_t frame_start = self->output_len;
813 self->frame_start = frame_start;
814 for (i = 0; i < FRAME_HEADER_SIZE; i++) {
815 /* Write an invalid value, for debugging */
816 buffer[frame_start + i] = 0xFE;
817 }
818 self->output_len += FRAME_HEADER_SIZE;
819 }
820 if (data_len < 8) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000821 /* This is faster than memcpy when the string is short. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100822 for (i = 0; i < data_len; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000823 buffer[self->output_len + i] = s[i];
824 }
825 }
826 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100827 memcpy(buffer + self->output_len, s, data_len);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000828 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100829 self->output_len += data_len;
830 return data_len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000831}
832
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000833static PicklerObject *
834_Pickler_New(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000835{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000836 PicklerObject *self;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000837
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000838 self = PyObject_GC_New(PicklerObject, &Pickler_Type);
839 if (self == NULL)
840 return NULL;
841
842 self->pers_func = NULL;
Antoine Pitrou8d3c2902012-03-04 18:31:48 +0100843 self->dispatch_table = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000844 self->arg = NULL;
845 self->write = NULL;
846 self->proto = 0;
847 self->bin = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +0100848 self->framing = 0;
849 self->frame_start = -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000850 self->fast = 0;
851 self->fast_nesting = 0;
852 self->fix_imports = 0;
853 self->fast_memo = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000854 self->max_output_len = WRITE_BUF_SIZE;
855 self->output_len = 0;
Victor Stinner68c8ea22013-07-11 22:56:25 +0200856
857 self->memo = PyMemoTable_New();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000858 self->output_buffer = PyBytes_FromStringAndSize(NULL,
859 self->max_output_len);
Victor Stinner68c8ea22013-07-11 22:56:25 +0200860
861 if (self->memo == NULL || self->output_buffer == NULL) {
Victor Stinnerc31df042013-07-12 00:08:59 +0200862 Py_DECREF(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000863 return NULL;
864 }
865 return self;
866}
867
868static int
869_Pickler_SetProtocol(PicklerObject *self, PyObject *proto_obj,
870 PyObject *fix_imports_obj)
871{
872 long proto = 0;
873 int fix_imports;
874
875 if (proto_obj == NULL || proto_obj == Py_None)
876 proto = DEFAULT_PROTOCOL;
877 else {
878 proto = PyLong_AsLong(proto_obj);
879 if (proto == -1 && PyErr_Occurred())
880 return -1;
881 }
882 if (proto < 0)
883 proto = HIGHEST_PROTOCOL;
884 if (proto > HIGHEST_PROTOCOL) {
885 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
886 HIGHEST_PROTOCOL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000887 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000888 }
889 fix_imports = PyObject_IsTrue(fix_imports_obj);
890 if (fix_imports == -1)
891 return -1;
Victor Stinner121aab42011-09-29 23:40:53 +0200892
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000893 self->proto = proto;
894 self->bin = proto > 0;
895 self->fix_imports = fix_imports && proto < 3;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000896
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000897 return 0;
898}
899
900/* Returns -1 (with an exception set) on failure, 0 on success. This may
901 be called once on a freshly created Pickler. */
902static int
903_Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
904{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +0200905 _Py_IDENTIFIER(write);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000906 assert(file != NULL);
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +0200907 self->write = _PyObject_GetAttrId(file, &PyId_write);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000908 if (self->write == NULL) {
909 if (PyErr_ExceptionMatches(PyExc_AttributeError))
910 PyErr_SetString(PyExc_TypeError,
911 "file must have a 'write' attribute");
912 return -1;
913 }
914
915 return 0;
916}
917
918/* See documentation for _Pickler_FastCall(). */
919static PyObject *
920_Unpickler_FastCall(UnpicklerObject *self, PyObject *func, PyObject *arg)
921{
922 PyObject *result = NULL;
923
924 ARG_TUP(self, arg);
925 if (self->arg) {
926 result = PyObject_Call(func, self->arg, NULL);
927 FREE_ARG_TUP(self);
928 }
929 return result;
930}
931
932/* Returns the size of the input on success, -1 on failure. This takes its
933 own reference to `input`. */
934static Py_ssize_t
935_Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
936{
937 if (self->buffer.buf != NULL)
938 PyBuffer_Release(&self->buffer);
939 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
940 return -1;
941 self->input_buffer = self->buffer.buf;
942 self->input_len = self->buffer.len;
943 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000944 self->prefetched_idx = self->input_len;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000945 return self->input_len;
946}
947
Antoine Pitrou04248a82010-10-12 20:51:21 +0000948static int
949_Unpickler_SkipConsumed(UnpicklerObject *self)
950{
Victor Stinnerb43ad1d2013-10-31 13:38:42 +0100951 Py_ssize_t consumed;
952 PyObject *r;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000953
Victor Stinnerb43ad1d2013-10-31 13:38:42 +0100954 consumed = self->next_read_idx - self->prefetched_idx;
955 if (consumed <= 0)
956 return 0;
957
958 assert(self->peek); /* otherwise we did something wrong */
959 /* This makes an useless copy... */
960 r = PyObject_CallFunction(self->read, "n", consumed);
961 if (r == NULL)
962 return -1;
963 Py_DECREF(r);
964
965 self->prefetched_idx = self->next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000966 return 0;
967}
968
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000969static const Py_ssize_t READ_WHOLE_LINE = -1;
970
971/* If reading from a file, we need to only pull the bytes we need, since there
972 may be multiple pickle objects arranged contiguously in the same input
973 buffer.
974
975 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
976 bytes from the input stream/buffer.
977
978 Update the unpickler's input buffer with the newly-read data. Returns -1 on
979 failure; on success, returns the number of bytes read from the file.
980
981 On success, self->input_len will be 0; this is intentional so that when
982 unpickling from a file, the "we've run out of data" code paths will trigger,
983 causing the Unpickler to go back to the file for more data. Use the returned
984 size to tell you how much data you can process. */
985static Py_ssize_t
986_Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
987{
988 PyObject *data;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000989 Py_ssize_t read_size, prefetched_size = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000990
991 assert(self->read != NULL);
Victor Stinner121aab42011-09-29 23:40:53 +0200992
Antoine Pitrou04248a82010-10-12 20:51:21 +0000993 if (_Unpickler_SkipConsumed(self) < 0)
994 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000995
996 if (n == READ_WHOLE_LINE)
997 data = PyObject_Call(self->readline, empty_tuple, NULL);
998 else {
999 PyObject *len = PyLong_FromSsize_t(n);
1000 if (len == NULL)
1001 return -1;
1002 data = _Unpickler_FastCall(self, self->read, len);
1003 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001004 if (data == NULL)
1005 return -1;
1006
Antoine Pitrou04248a82010-10-12 20:51:21 +00001007 /* Prefetch some data without advancing the file pointer, if possible */
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08001008 if (self->peek) {
Antoine Pitrou04248a82010-10-12 20:51:21 +00001009 PyObject *len, *prefetched;
1010 len = PyLong_FromSsize_t(PREFETCH);
1011 if (len == NULL) {
1012 Py_DECREF(data);
1013 return -1;
1014 }
1015 prefetched = _Unpickler_FastCall(self, self->peek, len);
1016 if (prefetched == NULL) {
1017 if (PyErr_ExceptionMatches(PyExc_NotImplementedError)) {
1018 /* peek() is probably not supported by the given file object */
1019 PyErr_Clear();
1020 Py_CLEAR(self->peek);
1021 }
1022 else {
1023 Py_DECREF(data);
1024 return -1;
1025 }
1026 }
1027 else {
1028 assert(PyBytes_Check(prefetched));
1029 prefetched_size = PyBytes_GET_SIZE(prefetched);
1030 PyBytes_ConcatAndDel(&data, prefetched);
1031 if (data == NULL)
1032 return -1;
1033 }
1034 }
1035
1036 read_size = _Unpickler_SetStringInput(self, data) - prefetched_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001037 Py_DECREF(data);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001038 self->prefetched_idx = read_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001039 return read_size;
1040}
1041
1042/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
1043
1044 This should be used for all data reads, rather than accessing the unpickler's
1045 input buffer directly. This method deals correctly with reading from input
1046 streams, which the input buffer doesn't deal with.
1047
1048 Note that when reading from a file-like object, self->next_read_idx won't
1049 be updated (it should remain at 0 for the entire unpickling process). You
1050 should use this function's return value to know how many bytes you can
1051 consume.
1052
1053 Returns -1 (with an exception set) on failure. On success, return the
1054 number of chars read. */
1055static Py_ssize_t
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08001056_Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001057{
Antoine Pitrou04248a82010-10-12 20:51:21 +00001058 Py_ssize_t num_read;
1059
Antoine Pitrou04248a82010-10-12 20:51:21 +00001060 if (self->next_read_idx + n <= self->input_len) {
1061 *s = self->input_buffer + self->next_read_idx;
1062 self->next_read_idx += n;
1063 return n;
1064 }
1065 if (!self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001066 PyErr_Format(PyExc_EOFError, "Ran out of input");
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +00001067 return -1;
1068 }
Antoine Pitrou04248a82010-10-12 20:51:21 +00001069 num_read = _Unpickler_ReadFromFile(self, n);
1070 if (num_read < 0)
1071 return -1;
1072 if (num_read < n) {
1073 PyErr_Format(PyExc_EOFError, "Ran out of input");
1074 return -1;
1075 }
1076 *s = self->input_buffer;
1077 self->next_read_idx = n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001078 return n;
1079}
1080
1081static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001082_Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1083 char **result)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001084{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001085 char *input_line = PyMem_Realloc(self->input_line, len + 1);
Victor Stinner42024562013-07-12 00:53:57 +02001086 if (input_line == NULL) {
1087 PyErr_NoMemory();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001088 return -1;
Victor Stinner42024562013-07-12 00:53:57 +02001089 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001090
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001091 memcpy(input_line, line, len);
1092 input_line[len] = '\0';
1093 self->input_line = input_line;
1094 *result = self->input_line;
1095 return len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001096}
1097
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001098/* Read a line from the input stream/buffer. If we run off the end of the input
1099 before hitting \n, return the data we found.
1100
1101 Returns the number of chars read, or -1 on failure. */
1102static Py_ssize_t
1103_Unpickler_Readline(UnpicklerObject *self, char **result)
1104{
1105 Py_ssize_t i, num_read;
1106
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001107 for (i = self->next_read_idx; i < self->input_len; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001108 if (self->input_buffer[i] == '\n') {
1109 char *line_start = self->input_buffer + self->next_read_idx;
1110 num_read = i - self->next_read_idx + 1;
1111 self->next_read_idx = i + 1;
1112 return _Unpickler_CopyLine(self, line_start, num_read, result);
1113 }
1114 }
1115 if (self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001116 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1117 if (num_read < 0)
1118 return -1;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001119 self->next_read_idx = num_read;
Antoine Pitrouf6c7a852011-08-11 21:04:02 +02001120 return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001121 }
Victor Stinner121aab42011-09-29 23:40:53 +02001122
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001123 /* If we get here, we've run off the end of the input string. Return the
1124 remaining string and let the caller figure it out. */
1125 *result = self->input_buffer + self->next_read_idx;
1126 num_read = i - self->next_read_idx;
1127 self->next_read_idx = i;
1128 return num_read;
1129}
1130
1131/* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1132 will be modified in place. */
1133static int
1134_Unpickler_ResizeMemoList(UnpicklerObject *self, Py_ssize_t new_size)
1135{
1136 Py_ssize_t i;
1137 PyObject **memo;
1138
1139 assert(new_size > self->memo_size);
1140
1141 memo = PyMem_REALLOC(self->memo, new_size * sizeof(PyObject *));
1142 if (memo == NULL) {
1143 PyErr_NoMemory();
1144 return -1;
1145 }
1146 self->memo = memo;
1147 for (i = self->memo_size; i < new_size; i++)
1148 self->memo[i] = NULL;
1149 self->memo_size = new_size;
1150 return 0;
1151}
1152
1153/* Returns NULL if idx is out of bounds. */
1154static PyObject *
1155_Unpickler_MemoGet(UnpicklerObject *self, Py_ssize_t idx)
1156{
1157 if (idx < 0 || idx >= self->memo_size)
1158 return NULL;
1159
1160 return self->memo[idx];
1161}
1162
1163/* Returns -1 (with an exception set) on failure, 0 on success.
1164 This takes its own reference to `value`. */
1165static int
1166_Unpickler_MemoPut(UnpicklerObject *self, Py_ssize_t idx, PyObject *value)
1167{
1168 PyObject *old_item;
1169
1170 if (idx >= self->memo_size) {
1171 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1172 return -1;
1173 assert(idx < self->memo_size);
1174 }
1175 Py_INCREF(value);
1176 old_item = self->memo[idx];
1177 self->memo[idx] = value;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001178 if (old_item != NULL) {
1179 Py_DECREF(old_item);
1180 }
1181 else {
1182 self->memo_len++;
1183 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001184 return 0;
1185}
1186
1187static PyObject **
1188_Unpickler_NewMemo(Py_ssize_t new_size)
1189{
1190 PyObject **memo = PyMem_MALLOC(new_size * sizeof(PyObject *));
Victor Stinner42024562013-07-12 00:53:57 +02001191 if (memo == NULL) {
1192 PyErr_NoMemory();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001193 return NULL;
Victor Stinner42024562013-07-12 00:53:57 +02001194 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001195 memset(memo, 0, new_size * sizeof(PyObject *));
1196 return memo;
1197}
1198
1199/* Free the unpickler's memo, taking care to decref any items left in it. */
1200static void
1201_Unpickler_MemoCleanup(UnpicklerObject *self)
1202{
1203 Py_ssize_t i;
1204 PyObject **memo = self->memo;
1205
1206 if (self->memo == NULL)
1207 return;
1208 self->memo = NULL;
1209 i = self->memo_size;
1210 while (--i >= 0) {
1211 Py_XDECREF(memo[i]);
1212 }
1213 PyMem_FREE(memo);
1214}
1215
1216static UnpicklerObject *
1217_Unpickler_New(void)
1218{
1219 UnpicklerObject *self;
1220
1221 self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1222 if (self == NULL)
1223 return NULL;
1224
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001225 self->arg = NULL;
1226 self->pers_func = NULL;
1227 self->input_buffer = NULL;
1228 self->input_line = NULL;
1229 self->input_len = 0;
1230 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001231 self->prefetched_idx = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001232 self->read = NULL;
1233 self->readline = NULL;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001234 self->peek = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001235 self->encoding = NULL;
1236 self->errors = NULL;
1237 self->marks = NULL;
1238 self->num_marks = 0;
1239 self->marks_size = 0;
1240 self->proto = 0;
1241 self->fix_imports = 0;
Victor Stinner68c8ea22013-07-11 22:56:25 +02001242 memset(&self->buffer, 0, sizeof(Py_buffer));
1243 self->memo_size = 32;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001244 self->memo_len = 0;
Victor Stinner68c8ea22013-07-11 22:56:25 +02001245 self->memo = _Unpickler_NewMemo(self->memo_size);
1246 self->stack = (Pdata *)Pdata_New();
1247
1248 if (self->memo == NULL || self->stack == NULL) {
1249 Py_DECREF(self);
1250 return NULL;
1251 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001252
1253 return self;
1254}
1255
1256/* Returns -1 (with an exception set) on failure, 0 on success. This may
1257 be called once on a freshly created Pickler. */
1258static int
1259_Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1260{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001261 _Py_IDENTIFIER(peek);
1262 _Py_IDENTIFIER(read);
1263 _Py_IDENTIFIER(readline);
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001264
1265 self->peek = _PyObject_GetAttrId(file, &PyId_peek);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001266 if (self->peek == NULL) {
1267 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1268 PyErr_Clear();
1269 else
1270 return -1;
1271 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001272 self->read = _PyObject_GetAttrId(file, &PyId_read);
1273 self->readline = _PyObject_GetAttrId(file, &PyId_readline);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001274 if (self->readline == NULL || self->read == NULL) {
1275 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1276 PyErr_SetString(PyExc_TypeError,
1277 "file must have 'read' and 'readline' attributes");
1278 Py_CLEAR(self->read);
1279 Py_CLEAR(self->readline);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001280 Py_CLEAR(self->peek);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001281 return -1;
1282 }
1283 return 0;
1284}
1285
1286/* Returns -1 (with an exception set) on failure, 0 on success. This may
1287 be called once on a freshly created Pickler. */
1288static int
1289_Unpickler_SetInputEncoding(UnpicklerObject *self,
1290 const char *encoding,
1291 const char *errors)
1292{
1293 if (encoding == NULL)
1294 encoding = "ASCII";
1295 if (errors == NULL)
1296 errors = "strict";
1297
Victor Stinner49fc8ec2013-07-07 23:30:24 +02001298 self->encoding = _PyMem_Strdup(encoding);
1299 self->errors = _PyMem_Strdup(errors);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001300 if (self->encoding == NULL || self->errors == NULL) {
1301 PyErr_NoMemory();
1302 return -1;
1303 }
1304 return 0;
1305}
1306
1307/* Generate a GET opcode for an object stored in the memo. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001308static int
1309memo_get(PicklerObject *self, PyObject *key)
1310{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001311 Py_ssize_t *value;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001312 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001313 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001314
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001315 value = PyMemoTable_Get(self->memo, key);
1316 if (value == NULL) {
1317 PyErr_SetObject(PyExc_KeyError, key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001318 return -1;
1319 }
1320
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001321 if (!self->bin) {
1322 pdata[0] = GET;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001323 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1324 "%" PY_FORMAT_SIZE_T "d\n", *value);
1325 len = strlen(pdata);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001326 }
1327 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001328 if (*value < 256) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001329 pdata[0] = BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001330 pdata[1] = (unsigned char)(*value & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001331 len = 2;
1332 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001333 else if (*value <= 0xffffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001334 pdata[0] = LONG_BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001335 pdata[1] = (unsigned char)(*value & 0xff);
1336 pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1337 pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1338 pdata[4] = (unsigned char)((*value >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001339 len = 5;
1340 }
1341 else { /* unlikely */
1342 PyErr_SetString(PicklingError,
1343 "memo id too large for LONG_BINGET");
1344 return -1;
1345 }
1346 }
1347
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001348 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001349 return -1;
1350
1351 return 0;
1352}
1353
1354/* Store an object in the memo, assign it a new unique ID based on the number
1355 of objects currently stored in the memo and generate a PUT opcode. */
1356static int
1357memo_put(PicklerObject *self, PyObject *obj)
1358{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001359 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001360 Py_ssize_t len;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001361 Py_ssize_t idx;
1362
1363 const char memoize_op = MEMOIZE;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001364
1365 if (self->fast)
1366 return 0;
1367
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001368 idx = PyMemoTable_Size(self->memo);
1369 if (PyMemoTable_Set(self->memo, obj, idx) < 0)
1370 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001371
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001372 if (self->proto >= 4) {
1373 if (_Pickler_Write(self, &memoize_op, 1) < 0)
1374 return -1;
1375 return 0;
1376 }
1377 else if (!self->bin) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001378 pdata[0] = PUT;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001379 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001380 "%" PY_FORMAT_SIZE_T "d\n", idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001381 len = strlen(pdata);
1382 }
1383 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001384 if (idx < 256) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001385 pdata[0] = BINPUT;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001386 pdata[1] = (unsigned char)idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001387 len = 2;
1388 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001389 else if (idx <= 0xffffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001390 pdata[0] = LONG_BINPUT;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001391 pdata[1] = (unsigned char)(idx & 0xff);
1392 pdata[2] = (unsigned char)((idx >> 8) & 0xff);
1393 pdata[3] = (unsigned char)((idx >> 16) & 0xff);
1394 pdata[4] = (unsigned char)((idx >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001395 len = 5;
1396 }
1397 else { /* unlikely */
1398 PyErr_SetString(PicklingError,
1399 "memo id too large for LONG_BINPUT");
1400 return -1;
1401 }
1402 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001403 if (_Pickler_Write(self, pdata, len) < 0)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001404 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001405
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001406 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001407}
1408
1409static PyObject *
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001410getattribute(PyObject *obj, PyObject *name, int allow_qualname) {
1411 PyObject *dotted_path;
1412 Py_ssize_t i;
1413 _Py_static_string(PyId_dot, ".");
1414 _Py_static_string(PyId_locals, "<locals>");
1415
1416 dotted_path = PyUnicode_Split(name, _PyUnicode_FromId(&PyId_dot), -1);
1417 if (dotted_path == NULL) {
1418 return NULL;
1419 }
1420 assert(Py_SIZE(dotted_path) >= 1);
1421 if (!allow_qualname && Py_SIZE(dotted_path) > 1) {
1422 PyErr_Format(PyExc_AttributeError,
1423 "Can't get qualified attribute %R on %R;"
1424 "use protocols >= 4 to enable support",
1425 name, obj);
1426 Py_DECREF(dotted_path);
1427 return NULL;
1428 }
1429 Py_INCREF(obj);
1430 for (i = 0; i < Py_SIZE(dotted_path); i++) {
1431 PyObject *subpath = PyList_GET_ITEM(dotted_path, i);
1432 PyObject *tmp;
1433 PyObject *result = PyUnicode_RichCompare(
1434 subpath, _PyUnicode_FromId(&PyId_locals), Py_EQ);
1435 int is_equal = (result == Py_True);
1436 assert(PyBool_Check(result));
1437 Py_DECREF(result);
1438 if (is_equal) {
1439 PyErr_Format(PyExc_AttributeError,
1440 "Can't get local attribute %R on %R", name, obj);
1441 Py_DECREF(dotted_path);
1442 Py_DECREF(obj);
1443 return NULL;
1444 }
1445 tmp = PyObject_GetAttr(obj, subpath);
1446 Py_DECREF(obj);
1447 if (tmp == NULL) {
1448 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
1449 PyErr_Clear();
1450 PyErr_Format(PyExc_AttributeError,
1451 "Can't get attribute %R on %R", name, obj);
1452 }
1453 Py_DECREF(dotted_path);
1454 return NULL;
1455 }
1456 obj = tmp;
1457 }
1458 Py_DECREF(dotted_path);
1459 return obj;
1460}
1461
1462static PyObject *
1463whichmodule(PyObject *global, PyObject *global_name, int allow_qualname)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001464{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001465 PyObject *module_name;
1466 PyObject *modules_dict;
1467 PyObject *module;
1468 PyObject *obj;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001469 Py_ssize_t i, j;
1470 _Py_IDENTIFIER(__module__);
1471 _Py_IDENTIFIER(modules);
1472 _Py_IDENTIFIER(__main__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001473
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001474 module_name = _PyObject_GetAttrId(global, &PyId___module__);
1475
1476 if (module_name == NULL) {
1477 if (!PyErr_ExceptionMatches(PyExc_AttributeError))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001478 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001479 PyErr_Clear();
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001480 }
1481 else {
1482 /* In some rare cases (e.g., bound methods of extension types),
1483 __module__ can be None. If it is so, then search sys.modules for
1484 the module of global. */
1485 if (module_name != Py_None)
1486 return module_name;
1487 Py_CLEAR(module_name);
1488 }
1489 assert(module_name == NULL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001490
Victor Stinnerbb520202013-11-06 22:40:41 +01001491 modules_dict = _PySys_GetObjectId(&PyId_modules);
Victor Stinner1e53bba2013-07-16 22:26:05 +02001492 if (modules_dict == NULL) {
1493 PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001494 return NULL;
Victor Stinner1e53bba2013-07-16 22:26:05 +02001495 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001496
1497 i = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001498 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001499 PyObject *result = PyUnicode_RichCompare(
1500 module_name, _PyUnicode_FromId(&PyId___main__), Py_EQ);
1501 int is_equal = (result == Py_True);
1502 assert(PyBool_Check(result));
1503 Py_DECREF(result);
1504 if (is_equal)
1505 continue;
1506 if (module == Py_None)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001507 continue;
1508
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001509 obj = getattribute(module, global_name, allow_qualname);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001510 if (obj == NULL) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001511 if (!PyErr_ExceptionMatches(PyExc_AttributeError))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001512 return NULL;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001513 PyErr_Clear();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001514 continue;
1515 }
1516
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001517 if (obj == global) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001518 Py_DECREF(obj);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001519 Py_INCREF(module_name);
1520 return module_name;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001521 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001522 Py_DECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001523 }
1524
1525 /* If no module is found, use __main__. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001526 module_name = _PyUnicode_FromId(&PyId___main__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001527 Py_INCREF(module_name);
1528 return module_name;
1529}
1530
1531/* fast_save_enter() and fast_save_leave() are guards against recursive
1532 objects when Pickler is used with the "fast mode" (i.e., with object
1533 memoization disabled). If the nesting of a list or dict object exceed
1534 FAST_NESTING_LIMIT, these guards will start keeping an internal
1535 reference to the seen list or dict objects and check whether these objects
1536 are recursive. These are not strictly necessary, since save() has a
1537 hard-coded recursion limit, but they give a nicer error message than the
1538 typical RuntimeError. */
1539static int
1540fast_save_enter(PicklerObject *self, PyObject *obj)
1541{
1542 /* if fast_nesting < 0, we're doing an error exit. */
1543 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1544 PyObject *key = NULL;
1545 if (self->fast_memo == NULL) {
1546 self->fast_memo = PyDict_New();
1547 if (self->fast_memo == NULL) {
1548 self->fast_nesting = -1;
1549 return 0;
1550 }
1551 }
1552 key = PyLong_FromVoidPtr(obj);
1553 if (key == NULL)
1554 return 0;
1555 if (PyDict_GetItem(self->fast_memo, key)) {
1556 Py_DECREF(key);
1557 PyErr_Format(PyExc_ValueError,
1558 "fast mode: can't pickle cyclic objects "
1559 "including object type %.200s at %p",
1560 obj->ob_type->tp_name, obj);
1561 self->fast_nesting = -1;
1562 return 0;
1563 }
1564 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
1565 Py_DECREF(key);
1566 self->fast_nesting = -1;
1567 return 0;
1568 }
1569 Py_DECREF(key);
1570 }
1571 return 1;
1572}
1573
1574static int
1575fast_save_leave(PicklerObject *self, PyObject *obj)
1576{
1577 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
1578 PyObject *key = PyLong_FromVoidPtr(obj);
1579 if (key == NULL)
1580 return 0;
1581 if (PyDict_DelItem(self->fast_memo, key) < 0) {
1582 Py_DECREF(key);
1583 return 0;
1584 }
1585 Py_DECREF(key);
1586 }
1587 return 1;
1588}
1589
1590static int
1591save_none(PicklerObject *self, PyObject *obj)
1592{
1593 const char none_op = NONE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001594 if (_Pickler_Write(self, &none_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001595 return -1;
1596
1597 return 0;
1598}
1599
1600static int
1601save_bool(PicklerObject *self, PyObject *obj)
1602{
1603 static const char *buf[2] = { FALSE, TRUE };
1604 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
1605 int p = (obj == Py_True);
1606
1607 if (self->proto >= 2) {
1608 const char bool_op = p ? NEWTRUE : NEWFALSE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001609 if (_Pickler_Write(self, &bool_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001610 return -1;
1611 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001612 else if (_Pickler_Write(self, buf[p], len[p]) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001613 return -1;
1614
1615 return 0;
1616}
1617
1618static int
1619save_int(PicklerObject *self, long x)
1620{
1621 char pdata[32];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001622 Py_ssize_t len = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001623
1624 if (!self->bin
1625#if SIZEOF_LONG > 4
1626 || x > 0x7fffffffL || x < -0x80000000L
1627#endif
1628 ) {
1629 /* Text-mode pickle, or long too big to fit in the 4-byte
1630 * signed BININT format: store as a string.
1631 */
Mark Dickinson8dd05142009-01-20 20:43:58 +00001632 pdata[0] = LONG; /* use LONG for consistency with pickle.py */
1633 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ldL\n", x);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001634 if (_Pickler_Write(self, pdata, strlen(pdata)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001635 return -1;
1636 }
1637 else {
1638 /* Binary pickle and x fits in a signed 4-byte int. */
1639 pdata[1] = (unsigned char)(x & 0xff);
1640 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1641 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1642 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1643
1644 if ((pdata[4] == 0) && (pdata[3] == 0)) {
1645 if (pdata[2] == 0) {
1646 pdata[0] = BININT1;
1647 len = 2;
1648 }
1649 else {
1650 pdata[0] = BININT2;
1651 len = 3;
1652 }
1653 }
1654 else {
1655 pdata[0] = BININT;
1656 len = 5;
1657 }
1658
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001659 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001660 return -1;
1661 }
1662
1663 return 0;
1664}
1665
1666static int
1667save_long(PicklerObject *self, PyObject *obj)
1668{
1669 PyObject *repr = NULL;
1670 Py_ssize_t size;
1671 long val = PyLong_AsLong(obj);
1672 int status = 0;
1673
1674 const char long_op = LONG;
1675
1676 if (val == -1 && PyErr_Occurred()) {
1677 /* out of range for int pickling */
1678 PyErr_Clear();
1679 }
Antoine Pitroue58bffb2011-08-13 20:40:32 +02001680 else
1681#if SIZEOF_LONG > 4
1682 if (val <= 0x7fffffffL && val >= -0x80000000L)
1683#endif
1684 return save_int(self, val);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001685
1686 if (self->proto >= 2) {
1687 /* Linear-time pickling. */
1688 size_t nbits;
1689 size_t nbytes;
1690 unsigned char *pdata;
1691 char header[5];
1692 int i;
1693 int sign = _PyLong_Sign(obj);
1694
1695 if (sign == 0) {
1696 header[0] = LONG1;
1697 header[1] = 0; /* It's 0 -- an empty bytestring. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001698 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001699 goto error;
1700 return 0;
1701 }
1702 nbits = _PyLong_NumBits(obj);
1703 if (nbits == (size_t)-1 && PyErr_Occurred())
1704 goto error;
1705 /* How many bytes do we need? There are nbits >> 3 full
1706 * bytes of data, and nbits & 7 leftover bits. If there
1707 * are any leftover bits, then we clearly need another
1708 * byte. Wnat's not so obvious is that we *probably*
1709 * need another byte even if there aren't any leftovers:
1710 * the most-significant bit of the most-significant byte
1711 * acts like a sign bit, and it's usually got a sense
Serhiy Storchaka95949422013-08-27 19:40:23 +03001712 * opposite of the one we need. The exception is ints
1713 * of the form -(2**(8*j-1)) for j > 0. Such an int is
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001714 * its own 256's-complement, so has the right sign bit
1715 * even without the extra byte. That's a pain to check
1716 * for in advance, though, so we always grab an extra
1717 * byte at the start, and cut it back later if possible.
1718 */
1719 nbytes = (nbits >> 3) + 1;
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01001720 if (nbytes > 0x7fffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001721 PyErr_SetString(PyExc_OverflowError,
Serhiy Storchaka95949422013-08-27 19:40:23 +03001722 "int too large to pickle");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001723 goto error;
1724 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001725 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001726 if (repr == NULL)
1727 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001728 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001729 i = _PyLong_AsByteArray((PyLongObject *)obj,
1730 pdata, nbytes,
1731 1 /* little endian */ , 1 /* signed */ );
1732 if (i < 0)
1733 goto error;
Serhiy Storchaka95949422013-08-27 19:40:23 +03001734 /* If the int is negative, this may be a byte more than
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001735 * needed. This is so iff the MSB is all redundant sign
1736 * bits.
1737 */
1738 if (sign < 0 &&
Victor Stinner121aab42011-09-29 23:40:53 +02001739 nbytes > 1 &&
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001740 pdata[nbytes - 1] == 0xff &&
1741 (pdata[nbytes - 2] & 0x80) != 0) {
1742 nbytes--;
1743 }
1744
1745 if (nbytes < 256) {
1746 header[0] = LONG1;
1747 header[1] = (unsigned char)nbytes;
1748 size = 2;
1749 }
1750 else {
1751 header[0] = LONG4;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001752 size = (Py_ssize_t) nbytes;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001753 for (i = 1; i < 5; i++) {
1754 header[i] = (unsigned char)(size & 0xff);
1755 size >>= 8;
1756 }
1757 size = 5;
1758 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001759 if (_Pickler_Write(self, header, size) < 0 ||
1760 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001761 goto error;
1762 }
1763 else {
1764 char *string;
1765
Mark Dickinson8dd05142009-01-20 20:43:58 +00001766 /* proto < 2: write the repr and newline. This is quadratic-time (in
1767 the number of digits), in both directions. We add a trailing 'L'
1768 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001769
1770 repr = PyObject_Repr(obj);
1771 if (repr == NULL)
1772 goto error;
1773
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001774 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001775 if (string == NULL)
1776 goto error;
1777
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001778 if (_Pickler_Write(self, &long_op, 1) < 0 ||
1779 _Pickler_Write(self, string, size) < 0 ||
1780 _Pickler_Write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001781 goto error;
1782 }
1783
1784 if (0) {
1785 error:
1786 status = -1;
1787 }
1788 Py_XDECREF(repr);
1789
1790 return status;
1791}
1792
1793static int
1794save_float(PicklerObject *self, PyObject *obj)
1795{
1796 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1797
1798 if (self->bin) {
1799 char pdata[9];
1800 pdata[0] = BINFLOAT;
1801 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1802 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001803 if (_Pickler_Write(self, pdata, 9) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001804 return -1;
Victor Stinner121aab42011-09-29 23:40:53 +02001805 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001806 else {
Eric Smith0923d1d2009-04-16 20:16:10 +00001807 int result = -1;
1808 char *buf = NULL;
1809 char op = FLOAT;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001810
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001811 if (_Pickler_Write(self, &op, 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001812 goto done;
1813
Mark Dickinson3e09f432009-04-17 08:41:23 +00001814 buf = PyOS_double_to_string(x, 'g', 17, 0, NULL);
Eric Smith0923d1d2009-04-16 20:16:10 +00001815 if (!buf) {
1816 PyErr_NoMemory();
1817 goto done;
1818 }
1819
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001820 if (_Pickler_Write(self, buf, strlen(buf)) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001821 goto done;
1822
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001823 if (_Pickler_Write(self, "\n", 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001824 goto done;
1825
1826 result = 0;
1827done:
1828 PyMem_Free(buf);
1829 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001830 }
1831
1832 return 0;
1833}
1834
1835static int
1836save_bytes(PicklerObject *self, PyObject *obj)
1837{
1838 if (self->proto < 3) {
1839 /* Older pickle protocols do not have an opcode for pickling bytes
1840 objects. Therefore, we need to fake the copy protocol (i.e.,
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001841 the __reduce__ method) to permit bytes object unpickling.
1842
1843 Here we use a hack to be compatible with Python 2. Since in Python
1844 2 'bytes' is just an alias for 'str' (which has different
1845 parameters than the actual bytes object), we use codecs.encode
1846 to create the appropriate 'str' object when unpickled using
1847 Python 2 *and* the appropriate 'bytes' object when unpickled
1848 using Python 3. Again this is a hack and we don't need to do this
1849 with newer protocols. */
1850 static PyObject *codecs_encode = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001851 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001852 int status;
1853
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001854 if (codecs_encode == NULL) {
1855 PyObject *codecs_module = PyImport_ImportModule("codecs");
1856 if (codecs_module == NULL) {
1857 return -1;
1858 }
1859 codecs_encode = PyObject_GetAttrString(codecs_module, "encode");
1860 Py_DECREF(codecs_module);
1861 if (codecs_encode == NULL) {
1862 return -1;
1863 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001864 }
1865
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001866 if (PyBytes_GET_SIZE(obj) == 0) {
1867 reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
1868 }
1869 else {
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001870 PyObject *unicode_str =
1871 PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
1872 PyBytes_GET_SIZE(obj),
1873 "strict");
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001874 _Py_IDENTIFIER(latin1);
1875
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001876 if (unicode_str == NULL)
1877 return -1;
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001878 reduce_value = Py_BuildValue("(O(OO))",
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001879 codecs_encode, unicode_str,
1880 _PyUnicode_FromId(&PyId_latin1));
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001881 Py_DECREF(unicode_str);
1882 }
1883
1884 if (reduce_value == NULL)
1885 return -1;
1886
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001887 /* save_reduce() will memoize the object automatically. */
1888 status = save_reduce(self, reduce_value, obj);
1889 Py_DECREF(reduce_value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001890 return status;
1891 }
1892 else {
1893 Py_ssize_t size;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001894 char header[9];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001895 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001896
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001897 size = PyBytes_GET_SIZE(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001898 if (size < 0)
1899 return -1;
1900
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001901 if (size <= 0xff) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001902 header[0] = SHORT_BINBYTES;
1903 header[1] = (unsigned char)size;
1904 len = 2;
1905 }
1906 else if (size <= 0xffffffffL) {
1907 header[0] = BINBYTES;
1908 header[1] = (unsigned char)(size & 0xff);
1909 header[2] = (unsigned char)((size >> 8) & 0xff);
1910 header[3] = (unsigned char)((size >> 16) & 0xff);
1911 header[4] = (unsigned char)((size >> 24) & 0xff);
1912 len = 5;
1913 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001914 else if (self->proto >= 4) {
1915 int i;
1916 header[0] = BINBYTES8;
1917 for (i = 0; i < 8; i++) {
Antoine Pitrou8f2ee6e2013-11-23 21:05:08 +01001918 _write_size64(header + 1, size);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01001919 }
1920 len = 8;
1921 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001922 else {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001923 PyErr_SetString(PyExc_OverflowError,
Serhiy Storchakaf8def282013-02-16 17:29:56 +02001924 "cannot serialize a bytes object larger than 4 GiB");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001925 return -1; /* string too large */
1926 }
1927
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001928 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001929 return -1;
1930
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001931 if (_Pickler_Write(self, PyBytes_AS_STRING(obj), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001932 return -1;
1933
1934 if (memo_put(self, obj) < 0)
1935 return -1;
1936
1937 return 0;
1938 }
1939}
1940
1941/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1942 backslash and newline characters to \uXXXX escapes. */
1943static PyObject *
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001944raw_unicode_escape(PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001945{
1946 PyObject *repr, *result;
1947 char *p;
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001948 Py_ssize_t i, size, expandsize;
1949 void *data;
1950 unsigned int kind;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001951
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001952 if (PyUnicode_READY(obj))
1953 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001954
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001955 size = PyUnicode_GET_LENGTH(obj);
1956 data = PyUnicode_DATA(obj);
1957 kind = PyUnicode_KIND(obj);
1958 if (kind == PyUnicode_4BYTE_KIND)
1959 expandsize = 10;
1960 else
1961 expandsize = 6;
Victor Stinner121aab42011-09-29 23:40:53 +02001962
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001963 if (size > PY_SSIZE_T_MAX / expandsize)
1964 return PyErr_NoMemory();
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001965 repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001966 if (repr == NULL)
1967 return NULL;
1968 if (size == 0)
1969 goto done;
1970
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001971 p = PyByteArray_AS_STRING(repr);
1972 for (i=0; i < size; i++) {
1973 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001974 /* Map 32-bit characters to '\Uxxxxxxxx' */
1975 if (ch >= 0x10000) {
1976 *p++ = '\\';
1977 *p++ = 'U';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001978 *p++ = Py_hexdigits[(ch >> 28) & 0xf];
1979 *p++ = Py_hexdigits[(ch >> 24) & 0xf];
1980 *p++ = Py_hexdigits[(ch >> 20) & 0xf];
1981 *p++ = Py_hexdigits[(ch >> 16) & 0xf];
1982 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
1983 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
1984 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
1985 *p++ = Py_hexdigits[ch & 15];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001986 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001987 /* Map 16-bit characters to '\uxxxx' */
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001988 else if (ch >= 256 || ch == '\\' || ch == '\n') {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001989 *p++ = '\\';
1990 *p++ = 'u';
Victor Stinnerf5cff562011-10-14 02:13:11 +02001991 *p++ = Py_hexdigits[(ch >> 12) & 0xf];
1992 *p++ = Py_hexdigits[(ch >> 8) & 0xf];
1993 *p++ = Py_hexdigits[(ch >> 4) & 0xf];
1994 *p++ = Py_hexdigits[ch & 15];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001995 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001996 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001997 else
1998 *p++ = (char) ch;
1999 }
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002000 size = p - PyByteArray_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002001
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002002done:
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00002003 result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002004 Py_DECREF(repr);
2005 return result;
2006}
2007
2008static int
Antoine Pitrou299978d2013-04-07 17:38:11 +02002009write_utf8(PicklerObject *self, char *data, Py_ssize_t size)
2010{
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002011 char header[9];
2012 Py_ssize_t len;
Antoine Pitrou299978d2013-04-07 17:38:11 +02002013
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002014 if (size <= 0xff && self->proto >= 4) {
2015 header[0] = SHORT_BINUNICODE;
2016 header[1] = (unsigned char)(size & 0xff);
2017 len = 2;
2018 }
2019 else if (size <= 0xffffffffUL) {
2020 header[0] = BINUNICODE;
2021 header[1] = (unsigned char)(size & 0xff);
2022 header[2] = (unsigned char)((size >> 8) & 0xff);
2023 header[3] = (unsigned char)((size >> 16) & 0xff);
2024 header[4] = (unsigned char)((size >> 24) & 0xff);
2025 len = 5;
2026 }
2027 else if (self->proto >= 4) {
2028 int i;
2029
2030 header[0] = BINUNICODE8;
2031 for (i = 0; i < 8; i++) {
Antoine Pitrou8f2ee6e2013-11-23 21:05:08 +01002032 _write_size64(header + 1, size);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002033 }
2034 len = 9;
2035 }
2036 else {
Antoine Pitrou299978d2013-04-07 17:38:11 +02002037 PyErr_SetString(PyExc_OverflowError,
Antoine Pitrou4b7b0f02013-04-07 23:46:52 +02002038 "cannot serialize a string larger than 4GiB");
Antoine Pitrou299978d2013-04-07 17:38:11 +02002039 return -1;
2040 }
Antoine Pitrou299978d2013-04-07 17:38:11 +02002041
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002042 if (_Pickler_Write(self, header, len) < 0)
Antoine Pitrou299978d2013-04-07 17:38:11 +02002043 return -1;
Antoine Pitrou299978d2013-04-07 17:38:11 +02002044 if (_Pickler_Write(self, data, size) < 0)
2045 return -1;
2046
2047 return 0;
2048}
2049
2050static int
2051write_unicode_binary(PicklerObject *self, PyObject *obj)
2052{
2053 PyObject *encoded = NULL;
2054 Py_ssize_t size;
2055 char *data;
2056 int r;
2057
2058 if (PyUnicode_READY(obj))
2059 return -1;
2060
2061 data = PyUnicode_AsUTF8AndSize(obj, &size);
2062 if (data != NULL)
2063 return write_utf8(self, data, size);
2064
2065 /* Issue #8383: for strings with lone surrogates, fallback on the
2066 "surrogatepass" error handler. */
2067 PyErr_Clear();
2068 encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
2069 if (encoded == NULL)
2070 return -1;
2071
2072 r = write_utf8(self, PyBytes_AS_STRING(encoded),
2073 PyBytes_GET_SIZE(encoded));
2074 Py_DECREF(encoded);
2075 return r;
2076}
2077
2078static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002079save_unicode(PicklerObject *self, PyObject *obj)
2080{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002081 if (self->bin) {
Antoine Pitrou299978d2013-04-07 17:38:11 +02002082 if (write_unicode_binary(self, obj) < 0)
2083 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002084 }
2085 else {
Antoine Pitrou299978d2013-04-07 17:38:11 +02002086 PyObject *encoded;
2087 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002088 const char unicode_op = UNICODE;
2089
Victor Stinnerc806fdc2011-09-29 23:50:23 +02002090 encoded = raw_unicode_escape(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002091 if (encoded == NULL)
Antoine Pitrou299978d2013-04-07 17:38:11 +02002092 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002093
Antoine Pitrou299978d2013-04-07 17:38:11 +02002094 if (_Pickler_Write(self, &unicode_op, 1) < 0) {
2095 Py_DECREF(encoded);
2096 return -1;
2097 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002098
2099 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrou299978d2013-04-07 17:38:11 +02002100 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0) {
2101 Py_DECREF(encoded);
2102 return -1;
2103 }
2104 Py_DECREF(encoded);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002105
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002106 if (_Pickler_Write(self, "\n", 1) < 0)
Antoine Pitrou299978d2013-04-07 17:38:11 +02002107 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002108 }
2109 if (memo_put(self, obj) < 0)
Antoine Pitrou299978d2013-04-07 17:38:11 +02002110 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002111
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002112 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002113}
2114
2115/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
2116static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002117store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002118{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002119 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002120
2121 assert(PyTuple_Size(t) == len);
2122
2123 for (i = 0; i < len; i++) {
2124 PyObject *element = PyTuple_GET_ITEM(t, i);
2125
2126 if (element == NULL)
2127 return -1;
2128 if (save(self, element, 0) < 0)
2129 return -1;
2130 }
2131
2132 return 0;
2133}
2134
2135/* Tuples are ubiquitous in the pickle protocols, so many techniques are
2136 * used across protocols to minimize the space needed to pickle them.
2137 * Tuples are also the only builtin immutable type that can be recursive
2138 * (a tuple can be reached from itself), and that requires some subtle
2139 * magic so that it works in all cases. IOW, this is a long routine.
2140 */
2141static int
2142save_tuple(PicklerObject *self, PyObject *obj)
2143{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002144 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002145
2146 const char mark_op = MARK;
2147 const char tuple_op = TUPLE;
2148 const char pop_op = POP;
2149 const char pop_mark_op = POP_MARK;
2150 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
2151
2152 if ((len = PyTuple_Size(obj)) < 0)
2153 return -1;
2154
2155 if (len == 0) {
2156 char pdata[2];
2157
2158 if (self->proto) {
2159 pdata[0] = EMPTY_TUPLE;
2160 len = 1;
2161 }
2162 else {
2163 pdata[0] = MARK;
2164 pdata[1] = TUPLE;
2165 len = 2;
2166 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002167 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002168 return -1;
2169 return 0;
2170 }
2171
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002172 /* The tuple isn't in the memo now. If it shows up there after
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002173 * saving the tuple elements, the tuple must be recursive, in
2174 * which case we'll pop everything we put on the stack, and fetch
2175 * its value from the memo.
2176 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002177 if (len <= 3 && self->proto >= 2) {
2178 /* Use TUPLE{1,2,3} opcodes. */
2179 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002180 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002181
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002182 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002183 /* pop the len elements */
2184 for (i = 0; i < len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002185 if (_Pickler_Write(self, &pop_op, 1) < 0)
2186 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002187 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002188 if (memo_get(self, obj) < 0)
2189 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002190
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002191 return 0;
2192 }
2193 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002194 if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2195 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002196 }
2197 goto memoize;
2198 }
2199
2200 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2201 * Generate MARK e1 e2 ... TUPLE
2202 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002203 if (_Pickler_Write(self, &mark_op, 1) < 0)
2204 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002205
2206 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002207 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002208
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002209 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002210 /* pop the stack stuff we pushed */
2211 if (self->bin) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002212 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2213 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002214 }
2215 else {
2216 /* Note that we pop one more than len, to remove
2217 * the MARK too.
2218 */
2219 for (i = 0; i <= len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002220 if (_Pickler_Write(self, &pop_op, 1) < 0)
2221 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002222 }
2223 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002224 if (memo_get(self, obj) < 0)
2225 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002226
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002227 return 0;
2228 }
2229 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002230 if (_Pickler_Write(self, &tuple_op, 1) < 0)
2231 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002232 }
2233
2234 memoize:
2235 if (memo_put(self, obj) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002236 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002237
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002238 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002239}
2240
2241/* iter is an iterator giving items, and we batch up chunks of
2242 * MARK item item ... item APPENDS
2243 * opcode sequences. Calling code should have arranged to first create an
2244 * empty list, or list-like object, for the APPENDS to operate on.
2245 * Returns 0 on success, <0 on error.
2246 */
2247static int
2248batch_list(PicklerObject *self, PyObject *iter)
2249{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002250 PyObject *obj = NULL;
2251 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002252 int i, n;
2253
2254 const char mark_op = MARK;
2255 const char append_op = APPEND;
2256 const char appends_op = APPENDS;
2257
2258 assert(iter != NULL);
2259
2260 /* XXX: I think this function could be made faster by avoiding the
2261 iterator interface and fetching objects directly from list using
2262 PyList_GET_ITEM.
2263 */
2264
2265 if (self->proto == 0) {
2266 /* APPENDS isn't available; do one at a time. */
2267 for (;;) {
2268 obj = PyIter_Next(iter);
2269 if (obj == NULL) {
2270 if (PyErr_Occurred())
2271 return -1;
2272 break;
2273 }
2274 i = save(self, obj, 0);
2275 Py_DECREF(obj);
2276 if (i < 0)
2277 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002278 if (_Pickler_Write(self, &append_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002279 return -1;
2280 }
2281 return 0;
2282 }
2283
2284 /* proto > 0: write in batches of BATCHSIZE. */
2285 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002286 /* Get first item */
2287 firstitem = PyIter_Next(iter);
2288 if (firstitem == NULL) {
2289 if (PyErr_Occurred())
2290 goto error;
2291
2292 /* nothing more to add */
2293 break;
2294 }
2295
2296 /* Try to get a second item */
2297 obj = PyIter_Next(iter);
2298 if (obj == NULL) {
2299 if (PyErr_Occurred())
2300 goto error;
2301
2302 /* Only one item to write */
2303 if (save(self, firstitem, 0) < 0)
2304 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002305 if (_Pickler_Write(self, &append_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002306 goto error;
2307 Py_CLEAR(firstitem);
2308 break;
2309 }
2310
2311 /* More than one item to write */
2312
2313 /* Pump out MARK, items, APPENDS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002314 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002315 goto error;
2316
2317 if (save(self, firstitem, 0) < 0)
2318 goto error;
2319 Py_CLEAR(firstitem);
2320 n = 1;
2321
2322 /* Fetch and save up to BATCHSIZE items */
2323 while (obj) {
2324 if (save(self, obj, 0) < 0)
2325 goto error;
2326 Py_CLEAR(obj);
2327 n += 1;
2328
2329 if (n == BATCHSIZE)
2330 break;
2331
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002332 obj = PyIter_Next(iter);
2333 if (obj == NULL) {
2334 if (PyErr_Occurred())
2335 goto error;
2336 break;
2337 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002338 }
2339
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002340 if (_Pickler_Write(self, &appends_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002341 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002342
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002343 } while (n == BATCHSIZE);
2344 return 0;
2345
2346 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002347 Py_XDECREF(firstitem);
2348 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002349 return -1;
2350}
2351
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002352/* This is a variant of batch_list() above, specialized for lists (with no
2353 * support for list subclasses). Like batch_list(), we batch up chunks of
2354 * MARK item item ... item APPENDS
2355 * opcode sequences. Calling code should have arranged to first create an
2356 * empty list, or list-like object, for the APPENDS to operate on.
2357 * Returns 0 on success, -1 on error.
2358 *
2359 * This version is considerably faster than batch_list(), if less general.
2360 *
2361 * Note that this only works for protocols > 0.
2362 */
2363static int
2364batch_list_exact(PicklerObject *self, PyObject *obj)
2365{
2366 PyObject *item = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002367 Py_ssize_t this_batch, total;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002368
2369 const char append_op = APPEND;
2370 const char appends_op = APPENDS;
2371 const char mark_op = MARK;
2372
2373 assert(obj != NULL);
2374 assert(self->proto > 0);
2375 assert(PyList_CheckExact(obj));
2376
2377 if (PyList_GET_SIZE(obj) == 1) {
2378 item = PyList_GET_ITEM(obj, 0);
2379 if (save(self, item, 0) < 0)
2380 return -1;
2381 if (_Pickler_Write(self, &append_op, 1) < 0)
2382 return -1;
2383 return 0;
2384 }
2385
2386 /* Write in batches of BATCHSIZE. */
2387 total = 0;
2388 do {
2389 this_batch = 0;
2390 if (_Pickler_Write(self, &mark_op, 1) < 0)
2391 return -1;
2392 while (total < PyList_GET_SIZE(obj)) {
2393 item = PyList_GET_ITEM(obj, total);
2394 if (save(self, item, 0) < 0)
2395 return -1;
2396 total++;
2397 if (++this_batch == BATCHSIZE)
2398 break;
2399 }
2400 if (_Pickler_Write(self, &appends_op, 1) < 0)
2401 return -1;
2402
2403 } while (total < PyList_GET_SIZE(obj));
2404
2405 return 0;
2406}
2407
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002408static int
2409save_list(PicklerObject *self, PyObject *obj)
2410{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002411 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002412 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002413 int status = 0;
2414
2415 if (self->fast && !fast_save_enter(self, obj))
2416 goto error;
2417
2418 /* Create an empty list. */
2419 if (self->bin) {
2420 header[0] = EMPTY_LIST;
2421 len = 1;
2422 }
2423 else {
2424 header[0] = MARK;
2425 header[1] = LIST;
2426 len = 2;
2427 }
2428
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002429 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002430 goto error;
2431
2432 /* Get list length, and bow out early if empty. */
2433 if ((len = PyList_Size(obj)) < 0)
2434 goto error;
2435
2436 if (memo_put(self, obj) < 0)
2437 goto error;
2438
2439 if (len != 0) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002440 /* Materialize the list elements. */
2441 if (PyList_CheckExact(obj) && self->proto > 0) {
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002442 if (Py_EnterRecursiveCall(" while pickling an object"))
2443 goto error;
2444 status = batch_list_exact(self, obj);
2445 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002446 } else {
2447 PyObject *iter = PyObject_GetIter(obj);
2448 if (iter == NULL)
2449 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002450
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002451 if (Py_EnterRecursiveCall(" while pickling an object")) {
2452 Py_DECREF(iter);
2453 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002454 }
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002455 status = batch_list(self, iter);
2456 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002457 Py_DECREF(iter);
2458 }
2459 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002460 if (0) {
2461 error:
2462 status = -1;
2463 }
2464
2465 if (self->fast && !fast_save_leave(self, obj))
2466 status = -1;
2467
2468 return status;
2469}
2470
2471/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
2472 * MARK key value ... key value SETITEMS
2473 * opcode sequences. Calling code should have arranged to first create an
2474 * empty dict, or dict-like object, for the SETITEMS to operate on.
2475 * Returns 0 on success, <0 on error.
2476 *
2477 * This is very much like batch_list(). The difference between saving
2478 * elements directly, and picking apart two-tuples, is so long-winded at
2479 * the C level, though, that attempts to combine these routines were too
2480 * ugly to bear.
2481 */
2482static int
2483batch_dict(PicklerObject *self, PyObject *iter)
2484{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002485 PyObject *obj = NULL;
2486 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002487 int i, n;
2488
2489 const char mark_op = MARK;
2490 const char setitem_op = SETITEM;
2491 const char setitems_op = SETITEMS;
2492
2493 assert(iter != NULL);
2494
2495 if (self->proto == 0) {
2496 /* SETITEMS isn't available; do one at a time. */
2497 for (;;) {
2498 obj = PyIter_Next(iter);
2499 if (obj == NULL) {
2500 if (PyErr_Occurred())
2501 return -1;
2502 break;
2503 }
2504 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2505 PyErr_SetString(PyExc_TypeError, "dict items "
2506 "iterator must return 2-tuples");
2507 return -1;
2508 }
2509 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
2510 if (i >= 0)
2511 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
2512 Py_DECREF(obj);
2513 if (i < 0)
2514 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002515 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002516 return -1;
2517 }
2518 return 0;
2519 }
2520
2521 /* proto > 0: write in batches of BATCHSIZE. */
2522 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002523 /* Get first item */
2524 firstitem = PyIter_Next(iter);
2525 if (firstitem == NULL) {
2526 if (PyErr_Occurred())
2527 goto error;
2528
2529 /* nothing more to add */
2530 break;
2531 }
2532 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
2533 PyErr_SetString(PyExc_TypeError, "dict items "
2534 "iterator must return 2-tuples");
2535 goto error;
2536 }
2537
2538 /* Try to get a second item */
2539 obj = PyIter_Next(iter);
2540 if (obj == NULL) {
2541 if (PyErr_Occurred())
2542 goto error;
2543
2544 /* Only one item to write */
2545 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2546 goto error;
2547 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2548 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002549 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002550 goto error;
2551 Py_CLEAR(firstitem);
2552 break;
2553 }
2554
2555 /* More than one item to write */
2556
2557 /* Pump out MARK, items, SETITEMS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002558 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002559 goto error;
2560
2561 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2562 goto error;
2563 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2564 goto error;
2565 Py_CLEAR(firstitem);
2566 n = 1;
2567
2568 /* Fetch and save up to BATCHSIZE items */
2569 while (obj) {
2570 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2571 PyErr_SetString(PyExc_TypeError, "dict items "
2572 "iterator must return 2-tuples");
2573 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002574 }
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002575 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
2576 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
2577 goto error;
2578 Py_CLEAR(obj);
2579 n += 1;
2580
2581 if (n == BATCHSIZE)
2582 break;
2583
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002584 obj = PyIter_Next(iter);
2585 if (obj == NULL) {
2586 if (PyErr_Occurred())
2587 goto error;
2588 break;
2589 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002590 }
2591
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002592 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002593 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002594
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002595 } while (n == BATCHSIZE);
2596 return 0;
2597
2598 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002599 Py_XDECREF(firstitem);
2600 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002601 return -1;
2602}
2603
Collin Winter5c9b02d2009-05-25 05:43:30 +00002604/* This is a variant of batch_dict() above that specializes for dicts, with no
2605 * support for dict subclasses. Like batch_dict(), we batch up chunks of
2606 * MARK key value ... key value SETITEMS
2607 * opcode sequences. Calling code should have arranged to first create an
2608 * empty dict, or dict-like object, for the SETITEMS to operate on.
2609 * Returns 0 on success, -1 on error.
2610 *
2611 * Note that this currently doesn't work for protocol 0.
2612 */
2613static int
2614batch_dict_exact(PicklerObject *self, PyObject *obj)
2615{
2616 PyObject *key = NULL, *value = NULL;
2617 int i;
2618 Py_ssize_t dict_size, ppos = 0;
2619
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00002620 const char mark_op = MARK;
2621 const char setitem_op = SETITEM;
2622 const char setitems_op = SETITEMS;
Collin Winter5c9b02d2009-05-25 05:43:30 +00002623
2624 assert(obj != NULL);
2625 assert(self->proto > 0);
2626
2627 dict_size = PyDict_Size(obj);
2628
2629 /* Special-case len(d) == 1 to save space. */
2630 if (dict_size == 1) {
2631 PyDict_Next(obj, &ppos, &key, &value);
2632 if (save(self, key, 0) < 0)
2633 return -1;
2634 if (save(self, value, 0) < 0)
2635 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002636 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002637 return -1;
2638 return 0;
2639 }
2640
2641 /* Write in batches of BATCHSIZE. */
2642 do {
2643 i = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002644 if (_Pickler_Write(self, &mark_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002645 return -1;
2646 while (PyDict_Next(obj, &ppos, &key, &value)) {
2647 if (save(self, key, 0) < 0)
2648 return -1;
2649 if (save(self, value, 0) < 0)
2650 return -1;
2651 if (++i == BATCHSIZE)
2652 break;
2653 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002654 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002655 return -1;
2656 if (PyDict_Size(obj) != dict_size) {
2657 PyErr_Format(
2658 PyExc_RuntimeError,
2659 "dictionary changed size during iteration");
2660 return -1;
2661 }
2662
2663 } while (i == BATCHSIZE);
2664 return 0;
2665}
2666
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002667static int
2668save_dict(PicklerObject *self, PyObject *obj)
2669{
2670 PyObject *items, *iter;
2671 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002672 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002673 int status = 0;
2674
2675 if (self->fast && !fast_save_enter(self, obj))
2676 goto error;
2677
2678 /* Create an empty dict. */
2679 if (self->bin) {
2680 header[0] = EMPTY_DICT;
2681 len = 1;
2682 }
2683 else {
2684 header[0] = MARK;
2685 header[1] = DICT;
2686 len = 2;
2687 }
2688
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002689 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002690 goto error;
2691
2692 /* Get dict size, and bow out early if empty. */
2693 if ((len = PyDict_Size(obj)) < 0)
2694 goto error;
2695
2696 if (memo_put(self, obj) < 0)
2697 goto error;
2698
2699 if (len != 0) {
2700 /* Save the dict items. */
Collin Winter5c9b02d2009-05-25 05:43:30 +00002701 if (PyDict_CheckExact(obj) && self->proto > 0) {
2702 /* We can take certain shortcuts if we know this is a dict and
2703 not a dict subclass. */
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002704 if (Py_EnterRecursiveCall(" while pickling an object"))
2705 goto error;
2706 status = batch_dict_exact(self, obj);
2707 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002708 } else {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02002709 _Py_IDENTIFIER(items);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002710
2711 items = _PyObject_CallMethodId(obj, &PyId_items, "()");
Collin Winter5c9b02d2009-05-25 05:43:30 +00002712 if (items == NULL)
2713 goto error;
2714 iter = PyObject_GetIter(items);
2715 Py_DECREF(items);
2716 if (iter == NULL)
2717 goto error;
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002718 if (Py_EnterRecursiveCall(" while pickling an object")) {
2719 Py_DECREF(iter);
2720 goto error;
2721 }
Collin Winter5c9b02d2009-05-25 05:43:30 +00002722 status = batch_dict(self, iter);
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002723 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002724 Py_DECREF(iter);
2725 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002726 }
2727
2728 if (0) {
2729 error:
2730 status = -1;
2731 }
2732
2733 if (self->fast && !fast_save_leave(self, obj))
2734 status = -1;
2735
2736 return status;
2737}
2738
2739static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002740save_set(PicklerObject *self, PyObject *obj)
2741{
2742 PyObject *item;
2743 int i;
2744 Py_ssize_t set_size, ppos = 0;
2745 Py_hash_t hash;
2746
2747 const char empty_set_op = EMPTY_SET;
2748 const char mark_op = MARK;
2749 const char additems_op = ADDITEMS;
2750
2751 if (self->proto < 4) {
2752 PyObject *items;
2753 PyObject *reduce_value;
2754 int status;
2755
2756 items = PySequence_List(obj);
2757 if (items == NULL) {
2758 return -1;
2759 }
2760 reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PySet_Type, items);
2761 Py_DECREF(items);
2762 if (reduce_value == NULL) {
2763 return -1;
2764 }
2765 /* save_reduce() will memoize the object automatically. */
2766 status = save_reduce(self, reduce_value, obj);
2767 Py_DECREF(reduce_value);
2768 return status;
2769 }
2770
2771 if (_Pickler_Write(self, &empty_set_op, 1) < 0)
2772 return -1;
2773
2774 if (memo_put(self, obj) < 0)
2775 return -1;
2776
2777 set_size = PySet_GET_SIZE(obj);
2778 if (set_size == 0)
2779 return 0; /* nothing to do */
2780
2781 /* Write in batches of BATCHSIZE. */
2782 do {
2783 i = 0;
2784 if (_Pickler_Write(self, &mark_op, 1) < 0)
2785 return -1;
2786 while (_PySet_NextEntry(obj, &ppos, &item, &hash)) {
2787 if (save(self, item, 0) < 0)
2788 return -1;
2789 if (++i == BATCHSIZE)
2790 break;
2791 }
2792 if (_Pickler_Write(self, &additems_op, 1) < 0)
2793 return -1;
2794 if (PySet_GET_SIZE(obj) != set_size) {
2795 PyErr_Format(
2796 PyExc_RuntimeError,
2797 "set changed size during iteration");
2798 return -1;
2799 }
2800 } while (i == BATCHSIZE);
2801
2802 return 0;
2803}
2804
2805static int
2806save_frozenset(PicklerObject *self, PyObject *obj)
2807{
2808 PyObject *iter;
2809
2810 const char mark_op = MARK;
2811 const char frozenset_op = FROZENSET;
2812
2813 if (self->fast && !fast_save_enter(self, obj))
2814 return -1;
2815
2816 if (self->proto < 4) {
2817 PyObject *items;
2818 PyObject *reduce_value;
2819 int status;
2820
2821 items = PySequence_List(obj);
2822 if (items == NULL) {
2823 return -1;
2824 }
2825 reduce_value = Py_BuildValue("(O(O))", (PyObject*)&PyFrozenSet_Type,
2826 items);
2827 Py_DECREF(items);
2828 if (reduce_value == NULL) {
2829 return -1;
2830 }
2831 /* save_reduce() will memoize the object automatically. */
2832 status = save_reduce(self, reduce_value, obj);
2833 Py_DECREF(reduce_value);
2834 return status;
2835 }
2836
2837 if (_Pickler_Write(self, &mark_op, 1) < 0)
2838 return -1;
2839
2840 iter = PyObject_GetIter(obj);
Christian Heimesb3d3ee42013-11-23 21:01:40 +01002841 if (iter == NULL) {
Christian Heimes74d8d632013-11-23 21:05:31 +01002842 return -1;
Christian Heimesb3d3ee42013-11-23 21:01:40 +01002843 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002844 for (;;) {
2845 PyObject *item;
2846
2847 item = PyIter_Next(iter);
2848 if (item == NULL) {
2849 if (PyErr_Occurred()) {
2850 Py_DECREF(iter);
2851 return -1;
2852 }
2853 break;
2854 }
2855 if (save(self, item, 0) < 0) {
2856 Py_DECREF(item);
2857 Py_DECREF(iter);
2858 return -1;
2859 }
2860 Py_DECREF(item);
2861 }
2862 Py_DECREF(iter);
2863
2864 /* If the object is already in the memo, this means it is
2865 recursive. In this case, throw away everything we put on the
2866 stack, and fetch the object back from the memo. */
2867 if (PyMemoTable_Get(self->memo, obj)) {
2868 const char pop_mark_op = POP_MARK;
2869
2870 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2871 return -1;
2872 if (memo_get(self, obj) < 0)
2873 return -1;
2874 return 0;
2875 }
2876
2877 if (_Pickler_Write(self, &frozenset_op, 1) < 0)
2878 return -1;
2879 if (memo_put(self, obj) < 0)
2880 return -1;
2881
2882 return 0;
2883}
2884
2885static int
2886fix_imports(PyObject **module_name, PyObject **global_name)
2887{
2888 PyObject *key;
2889 PyObject *item;
2890
2891 key = PyTuple_Pack(2, *module_name, *global_name);
2892 if (key == NULL)
2893 return -1;
2894 item = PyDict_GetItemWithError(name_mapping_3to2, key);
2895 Py_DECREF(key);
2896 if (item) {
2897 PyObject *fixed_module_name;
2898 PyObject *fixed_global_name;
2899
2900 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
2901 PyErr_Format(PyExc_RuntimeError,
2902 "_compat_pickle.REVERSE_NAME_MAPPING values "
2903 "should be 2-tuples, not %.200s",
2904 Py_TYPE(item)->tp_name);
2905 return -1;
2906 }
2907 fixed_module_name = PyTuple_GET_ITEM(item, 0);
2908 fixed_global_name = PyTuple_GET_ITEM(item, 1);
2909 if (!PyUnicode_Check(fixed_module_name) ||
2910 !PyUnicode_Check(fixed_global_name)) {
2911 PyErr_Format(PyExc_RuntimeError,
2912 "_compat_pickle.REVERSE_NAME_MAPPING values "
2913 "should be pairs of str, not (%.200s, %.200s)",
2914 Py_TYPE(fixed_module_name)->tp_name,
2915 Py_TYPE(fixed_global_name)->tp_name);
2916 return -1;
2917 }
2918
2919 Py_CLEAR(*module_name);
2920 Py_CLEAR(*global_name);
2921 Py_INCREF(fixed_module_name);
2922 Py_INCREF(fixed_global_name);
2923 *module_name = fixed_module_name;
2924 *global_name = fixed_global_name;
2925 }
2926 else if (PyErr_Occurred()) {
2927 return -1;
2928 }
2929
2930 item = PyDict_GetItemWithError(import_mapping_3to2, *module_name);
2931 if (item) {
2932 if (!PyUnicode_Check(item)) {
2933 PyErr_Format(PyExc_RuntimeError,
2934 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
2935 "should be strings, not %.200s",
2936 Py_TYPE(item)->tp_name);
2937 return -1;
2938 }
2939 Py_CLEAR(*module_name);
2940 Py_INCREF(item);
2941 *module_name = item;
2942 }
2943 else if (PyErr_Occurred()) {
2944 return -1;
2945 }
2946
2947 return 0;
2948}
2949
2950static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002951save_global(PicklerObject *self, PyObject *obj, PyObject *name)
2952{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002953 PyObject *global_name = NULL;
2954 PyObject *module_name = NULL;
2955 PyObject *module = NULL;
2956 PyObject *cls;
2957 int status = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002958 _Py_IDENTIFIER(__name__);
2959 _Py_IDENTIFIER(__qualname__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002960
2961 const char global_op = GLOBAL;
2962
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002963 if (name) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002964 Py_INCREF(name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002965 global_name = name;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002966 }
2967 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002968 if (self->proto >= 4) {
2969 global_name = _PyObject_GetAttrId(obj, &PyId___qualname__);
2970 if (global_name == NULL) {
2971 if (!PyErr_ExceptionMatches(PyExc_AttributeError))
2972 goto error;
2973 PyErr_Clear();
2974 }
2975 }
2976 if (global_name == NULL) {
2977 global_name = _PyObject_GetAttrId(obj, &PyId___name__);
2978 if (global_name == NULL)
2979 goto error;
2980 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002981 }
2982
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01002983 module_name = whichmodule(obj, global_name, self->proto >= 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002984 if (module_name == NULL)
2985 goto error;
2986
2987 /* XXX: Change to use the import C API directly with level=0 to disallow
2988 relative imports.
2989
2990 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
2991 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
2992 custom import functions (IMHO, this would be a nice security
2993 feature). The import C API would need to be extended to support the
2994 extra parameters of __import__ to fix that. */
2995 module = PyImport_Import(module_name);
2996 if (module == NULL) {
2997 PyErr_Format(PicklingError,
2998 "Can't pickle %R: import of module %R failed",
2999 obj, module_name);
3000 goto error;
3001 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003002 cls = getattribute(module, global_name, self->proto >= 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003003 if (cls == NULL) {
3004 PyErr_Format(PicklingError,
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003005 "Can't pickle %R: attribute lookup %S on %S failed",
3006 obj, global_name, module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003007 goto error;
3008 }
3009 if (cls != obj) {
3010 Py_DECREF(cls);
3011 PyErr_Format(PicklingError,
3012 "Can't pickle %R: it's not the same object as %S.%S",
3013 obj, module_name, global_name);
3014 goto error;
3015 }
3016 Py_DECREF(cls);
3017
3018 if (self->proto >= 2) {
3019 /* See whether this is in the extension registry, and if
3020 * so generate an EXT opcode.
3021 */
3022 PyObject *code_obj; /* extension code as Python object */
3023 long code; /* extension code as C value */
3024 char pdata[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003025 Py_ssize_t n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003026
3027 PyTuple_SET_ITEM(two_tuple, 0, module_name);
3028 PyTuple_SET_ITEM(two_tuple, 1, global_name);
3029 code_obj = PyDict_GetItem(extension_registry, two_tuple);
3030 /* The object is not registered in the extension registry.
3031 This is the most likely code path. */
3032 if (code_obj == NULL)
3033 goto gen_global;
3034
3035 /* XXX: pickle.py doesn't check neither the type, nor the range
3036 of the value returned by the extension_registry. It should for
3037 consistency. */
3038
3039 /* Verify code_obj has the right type and value. */
3040 if (!PyLong_Check(code_obj)) {
3041 PyErr_Format(PicklingError,
3042 "Can't pickle %R: extension code %R isn't an integer",
3043 obj, code_obj);
3044 goto error;
3045 }
3046 code = PyLong_AS_LONG(code_obj);
3047 if (code <= 0 || code > 0x7fffffffL) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003048 if (!PyErr_Occurred())
3049 PyErr_Format(PicklingError,
3050 "Can't pickle %R: extension code %ld is out of range",
3051 obj, code);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003052 goto error;
3053 }
3054
3055 /* Generate an EXT opcode. */
3056 if (code <= 0xff) {
3057 pdata[0] = EXT1;
3058 pdata[1] = (unsigned char)code;
3059 n = 2;
3060 }
3061 else if (code <= 0xffff) {
3062 pdata[0] = EXT2;
3063 pdata[1] = (unsigned char)(code & 0xff);
3064 pdata[2] = (unsigned char)((code >> 8) & 0xff);
3065 n = 3;
3066 }
3067 else {
3068 pdata[0] = EXT4;
3069 pdata[1] = (unsigned char)(code & 0xff);
3070 pdata[2] = (unsigned char)((code >> 8) & 0xff);
3071 pdata[3] = (unsigned char)((code >> 16) & 0xff);
3072 pdata[4] = (unsigned char)((code >> 24) & 0xff);
3073 n = 5;
3074 }
3075
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003076 if (_Pickler_Write(self, pdata, n) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003077 goto error;
3078 }
3079 else {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003080 gen_global:
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003081 if (self->proto >= 4) {
3082 const char stack_global_op = STACK_GLOBAL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003083
Christian Heimese8b1ba12013-11-23 21:13:39 +01003084 if (save(self, module_name, 0) < 0)
3085 goto error;
3086 if (save(self, global_name, 0) < 0)
3087 goto error;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003088
3089 if (_Pickler_Write(self, &stack_global_op, 1) < 0)
3090 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003091 }
3092 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003093 /* Generate a normal global opcode if we are using a pickle
3094 protocol < 4, or if the object is not registered in the
3095 extension registry. */
3096 PyObject *encoded;
3097 PyObject *(*unicode_encoder)(PyObject *);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003098
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003099 if (_Pickler_Write(self, &global_op, 1) < 0)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003100 goto error;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003101
3102 /* For protocol < 3 and if the user didn't request against doing
3103 so, we convert module names to the old 2.x module names. */
3104 if (self->proto < 3 && self->fix_imports) {
3105 if (fix_imports(&module_name, &global_name) < 0) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003106 goto error;
3107 }
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003108 }
3109
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003110 /* Since Python 3.0 now supports non-ASCII identifiers, we encode
3111 both the module name and the global name using UTF-8. We do so
3112 only when we are using the pickle protocol newer than version
3113 3. This is to ensure compatibility with older Unpickler running
3114 on Python 2.x. */
3115 if (self->proto == 3) {
3116 unicode_encoder = PyUnicode_AsUTF8String;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003117 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003118 else {
3119 unicode_encoder = PyUnicode_AsASCIIString;
3120 }
3121 encoded = unicode_encoder(module_name);
3122 if (encoded == NULL) {
3123 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3124 PyErr_Format(PicklingError,
3125 "can't pickle module identifier '%S' using "
3126 "pickle protocol %i",
3127 module_name, self->proto);
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003128 goto error;
3129 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003130 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3131 PyBytes_GET_SIZE(encoded)) < 0) {
3132 Py_DECREF(encoded);
3133 goto error;
3134 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003135 Py_DECREF(encoded);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003136 if(_Pickler_Write(self, "\n", 1) < 0)
3137 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003138
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003139 /* Save the name of the module. */
3140 encoded = unicode_encoder(global_name);
3141 if (encoded == NULL) {
3142 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
3143 PyErr_Format(PicklingError,
3144 "can't pickle global identifier '%S' using "
3145 "pickle protocol %i",
3146 global_name, self->proto);
3147 goto error;
3148 }
3149 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
3150 PyBytes_GET_SIZE(encoded)) < 0) {
3151 Py_DECREF(encoded);
3152 goto error;
3153 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003154 Py_DECREF(encoded);
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003155 if (_Pickler_Write(self, "\n", 1) < 0)
3156 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003157 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003158 /* Memoize the object. */
3159 if (memo_put(self, obj) < 0)
3160 goto error;
3161 }
3162
3163 if (0) {
3164 error:
3165 status = -1;
3166 }
3167 Py_XDECREF(module_name);
3168 Py_XDECREF(global_name);
3169 Py_XDECREF(module);
3170
3171 return status;
3172}
3173
3174static int
Łukasz Langaf3078fb2012-03-12 19:46:12 +01003175save_ellipsis(PicklerObject *self, PyObject *obj)
3176{
Łukasz Langadbd78252012-03-12 22:59:11 +01003177 PyObject *str = PyUnicode_FromString("Ellipsis");
Benjamin Petersone80b29b2012-03-16 18:45:31 -05003178 int res;
Łukasz Langadbd78252012-03-12 22:59:11 +01003179 if (str == NULL)
Łukasz Langacad1a072012-03-12 23:41:07 +01003180 return -1;
Benjamin Petersone80b29b2012-03-16 18:45:31 -05003181 res = save_global(self, Py_Ellipsis, str);
3182 Py_DECREF(str);
3183 return res;
Łukasz Langaf3078fb2012-03-12 19:46:12 +01003184}
3185
3186static int
3187save_notimplemented(PicklerObject *self, PyObject *obj)
3188{
Łukasz Langadbd78252012-03-12 22:59:11 +01003189 PyObject *str = PyUnicode_FromString("NotImplemented");
Benjamin Petersone80b29b2012-03-16 18:45:31 -05003190 int res;
Łukasz Langadbd78252012-03-12 22:59:11 +01003191 if (str == NULL)
Łukasz Langacad1a072012-03-12 23:41:07 +01003192 return -1;
Benjamin Petersone80b29b2012-03-16 18:45:31 -05003193 res = save_global(self, Py_NotImplemented, str);
3194 Py_DECREF(str);
3195 return res;
Łukasz Langaf3078fb2012-03-12 19:46:12 +01003196}
3197
3198static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003199save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
3200{
3201 PyObject *pid = NULL;
3202 int status = 0;
3203
3204 const char persid_op = PERSID;
3205 const char binpersid_op = BINPERSID;
3206
3207 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003208 pid = _Pickler_FastCall(self, func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003209 if (pid == NULL)
3210 return -1;
3211
3212 if (pid != Py_None) {
3213 if (self->bin) {
3214 if (save(self, pid, 1) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003215 _Pickler_Write(self, &binpersid_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003216 goto error;
3217 }
3218 else {
3219 PyObject *pid_str = NULL;
3220 char *pid_ascii_bytes;
3221 Py_ssize_t size;
3222
3223 pid_str = PyObject_Str(pid);
3224 if (pid_str == NULL)
3225 goto error;
3226
3227 /* XXX: Should it check whether the persistent id only contains
3228 ASCII characters? And what if the pid contains embedded
3229 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00003230 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003231 Py_DECREF(pid_str);
3232 if (pid_ascii_bytes == NULL)
3233 goto error;
3234
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003235 if (_Pickler_Write(self, &persid_op, 1) < 0 ||
3236 _Pickler_Write(self, pid_ascii_bytes, size) < 0 ||
3237 _Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003238 goto error;
3239 }
3240 status = 1;
3241 }
3242
3243 if (0) {
3244 error:
3245 status = -1;
3246 }
3247 Py_XDECREF(pid);
3248
3249 return status;
3250}
3251
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003252static PyObject *
3253get_class(PyObject *obj)
3254{
3255 PyObject *cls;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003256 _Py_IDENTIFIER(__class__);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003257
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003258 cls = _PyObject_GetAttrId(obj, &PyId___class__);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003259 if (cls == NULL) {
3260 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
3261 PyErr_Clear();
3262 cls = (PyObject *) Py_TYPE(obj);
3263 Py_INCREF(cls);
3264 }
3265 }
3266 return cls;
3267}
3268
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003269/* We're saving obj, and args is the 2-thru-5 tuple returned by the
3270 * appropriate __reduce__ method for obj.
3271 */
3272static int
3273save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
3274{
3275 PyObject *callable;
3276 PyObject *argtup;
3277 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003278 PyObject *listitems = Py_None;
3279 PyObject *dictitems = Py_None;
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00003280 Py_ssize_t size;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003281 int use_newobj = 0, use_newobj_ex = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003282
3283 const char reduce_op = REDUCE;
3284 const char build_op = BUILD;
3285 const char newobj_op = NEWOBJ;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003286 const char newobj_ex_op = NEWOBJ_EX;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003287
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00003288 size = PyTuple_Size(args);
3289 if (size < 2 || size > 5) {
3290 PyErr_SetString(PicklingError, "tuple returned by "
3291 "__reduce__ must contain 2 through 5 elements");
3292 return -1;
3293 }
3294
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003295 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
3296 &callable, &argtup, &state, &listitems, &dictitems))
3297 return -1;
3298
3299 if (!PyCallable_Check(callable)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003300 PyErr_SetString(PicklingError, "first item of the tuple "
3301 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003302 return -1;
3303 }
3304 if (!PyTuple_Check(argtup)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003305 PyErr_SetString(PicklingError, "second item of the tuple "
3306 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003307 return -1;
3308 }
3309
3310 if (state == Py_None)
3311 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003312
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003313 if (listitems == Py_None)
3314 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003315 else if (!PyIter_Check(listitems)) {
Alexandre Vassalotti00d83f22013-04-14 01:28:01 -07003316 PyErr_Format(PicklingError, "fourth element of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003317 "returned by __reduce__ must be an iterator, not %s",
3318 Py_TYPE(listitems)->tp_name);
3319 return -1;
3320 }
3321
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003322 if (dictitems == Py_None)
3323 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003324 else if (!PyIter_Check(dictitems)) {
Alexandre Vassalotti00d83f22013-04-14 01:28:01 -07003325 PyErr_Format(PicklingError, "fifth element of the tuple "
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00003326 "returned by __reduce__ must be an iterator, not %s",
3327 Py_TYPE(dictitems)->tp_name);
3328 return -1;
3329 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003330
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003331 if (self->proto >= 2) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003332 PyObject *name;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003333 _Py_IDENTIFIER(__name__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003334
Victor Stinner804e05e2013-11-14 01:26:17 +01003335 name = _PyObject_GetAttrId(callable, &PyId___name__);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003336 if (name == NULL) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003337 if (!PyErr_ExceptionMatches(PyExc_AttributeError)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003338 return -1;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003339 }
3340 PyErr_Clear();
3341 }
3342 else if (self->proto >= 4) {
3343 _Py_IDENTIFIER(__newobj_ex__);
3344 use_newobj_ex = PyUnicode_Check(name) &&
3345 PyUnicode_Compare(
3346 name, _PyUnicode_FromId(&PyId___newobj_ex__)) == 0;
3347 Py_DECREF(name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003348 }
3349 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003350 _Py_IDENTIFIER(__newobj__);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003351 use_newobj = PyUnicode_Check(name) &&
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003352 PyUnicode_Compare(
3353 name, _PyUnicode_FromId(&PyId___newobj__)) == 0;
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003354 Py_DECREF(name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003355 }
3356 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003357
3358 if (use_newobj_ex) {
3359 PyObject *cls;
3360 PyObject *args;
3361 PyObject *kwargs;
3362
3363 if (Py_SIZE(argtup) != 3) {
3364 PyErr_Format(PicklingError,
3365 "length of the NEWOBJ_EX argument tuple must be "
3366 "exactly 3, not %zd", Py_SIZE(argtup));
3367 return -1;
3368 }
3369
3370 cls = PyTuple_GET_ITEM(argtup, 0);
3371 if (!PyType_Check(cls)) {
3372 PyErr_Format(PicklingError,
3373 "first item from NEWOBJ_EX argument tuple must "
3374 "be a class, not %.200s", Py_TYPE(cls)->tp_name);
3375 return -1;
3376 }
3377 args = PyTuple_GET_ITEM(argtup, 1);
3378 if (!PyTuple_Check(args)) {
3379 PyErr_Format(PicklingError,
3380 "second item from NEWOBJ_EX argument tuple must "
3381 "be a tuple, not %.200s", Py_TYPE(args)->tp_name);
3382 return -1;
3383 }
3384 kwargs = PyTuple_GET_ITEM(argtup, 2);
3385 if (!PyDict_Check(kwargs)) {
3386 PyErr_Format(PicklingError,
3387 "third item from NEWOBJ_EX argument tuple must "
3388 "be a dict, not %.200s", Py_TYPE(kwargs)->tp_name);
3389 return -1;
3390 }
3391
3392 if (save(self, cls, 0) < 0 ||
3393 save(self, args, 0) < 0 ||
3394 save(self, kwargs, 0) < 0 ||
3395 _Pickler_Write(self, &newobj_ex_op, 1) < 0) {
3396 return -1;
3397 }
3398 }
3399 else if (use_newobj) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003400 PyObject *cls;
3401 PyObject *newargtup;
3402 PyObject *obj_class;
3403 int p;
3404
3405 /* Sanity checks. */
3406 if (Py_SIZE(argtup) < 1) {
3407 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
3408 return -1;
3409 }
3410
3411 cls = PyTuple_GET_ITEM(argtup, 0);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003412 if (!PyType_Check(cls)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003413 PyErr_SetString(PicklingError, "args[0] from "
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003414 "__newobj__ args is not a type");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003415 return -1;
3416 }
3417
3418 if (obj != NULL) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01003419 obj_class = get_class(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003420 p = obj_class != cls; /* true iff a problem */
3421 Py_DECREF(obj_class);
3422 if (p) {
3423 PyErr_SetString(PicklingError, "args[0] from "
3424 "__newobj__ args has the wrong class");
3425 return -1;
3426 }
3427 }
3428 /* XXX: These calls save() are prone to infinite recursion. Imagine
3429 what happen if the value returned by the __reduce__() method of
3430 some extension type contains another object of the same type. Ouch!
3431
3432 Here is a quick example, that I ran into, to illustrate what I
3433 mean:
3434
3435 >>> import pickle, copyreg
3436 >>> copyreg.dispatch_table.pop(complex)
3437 >>> pickle.dumps(1+2j)
3438 Traceback (most recent call last):
3439 ...
3440 RuntimeError: maximum recursion depth exceeded
3441
3442 Removing the complex class from copyreg.dispatch_table made the
3443 __reduce_ex__() method emit another complex object:
3444
3445 >>> (1+1j).__reduce_ex__(2)
3446 (<function __newobj__ at 0xb7b71c3c>,
3447 (<class 'complex'>, (1+1j)), None, None, None)
3448
3449 Thus when save() was called on newargstup (the 2nd item) recursion
3450 ensued. Of course, the bug was in the complex class which had a
3451 broken __getnewargs__() that emitted another complex object. But,
3452 the point, here, is it is quite easy to end up with a broken reduce
3453 function. */
3454
3455 /* Save the class and its __new__ arguments. */
3456 if (save(self, cls, 0) < 0)
3457 return -1;
3458
3459 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
3460 if (newargtup == NULL)
3461 return -1;
3462
3463 p = save(self, newargtup, 0);
3464 Py_DECREF(newargtup);
3465 if (p < 0)
3466 return -1;
3467
3468 /* Add NEWOBJ opcode. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003469 if (_Pickler_Write(self, &newobj_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003470 return -1;
3471 }
3472 else { /* Not using NEWOBJ. */
3473 if (save(self, callable, 0) < 0 ||
3474 save(self, argtup, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003475 _Pickler_Write(self, &reduce_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003476 return -1;
3477 }
3478
3479 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
3480 the caller do not want to memoize the object. Not particularly useful,
3481 but that is to mimic the behavior save_reduce() in pickle.py when
3482 obj is None. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003483 if (obj != NULL) {
3484 /* If the object is already in the memo, this means it is
3485 recursive. In this case, throw away everything we put on the
3486 stack, and fetch the object back from the memo. */
3487 if (PyMemoTable_Get(self->memo, obj)) {
3488 const char pop_op = POP;
3489
3490 if (_Pickler_Write(self, &pop_op, 1) < 0)
3491 return -1;
3492 if (memo_get(self, obj) < 0)
3493 return -1;
3494
3495 return 0;
3496 }
3497 else if (memo_put(self, obj) < 0)
3498 return -1;
3499 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003500
3501 if (listitems && batch_list(self, listitems) < 0)
3502 return -1;
3503
3504 if (dictitems && batch_dict(self, dictitems) < 0)
3505 return -1;
3506
3507 if (state) {
Victor Stinner121aab42011-09-29 23:40:53 +02003508 if (save(self, state, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003509 _Pickler_Write(self, &build_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003510 return -1;
3511 }
3512
3513 return 0;
3514}
3515
3516static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003517save_method(PicklerObject *self, PyObject *obj)
3518{
3519 PyObject *method_self = PyCFunction_GET_SELF(obj);
3520
3521 if (method_self == NULL || PyModule_Check(method_self)) {
3522 return save_global(self, obj, NULL);
3523 }
3524 else {
3525 PyObject *builtins;
3526 PyObject *getattr;
3527 PyObject *reduce_value;
3528 int status = -1;
3529 _Py_IDENTIFIER(getattr);
3530
3531 builtins = PyEval_GetBuiltins();
3532 getattr = _PyDict_GetItemId(builtins, &PyId_getattr);
3533 reduce_value = \
3534 Py_BuildValue("O(Os)", getattr, method_self,
3535 ((PyCFunctionObject *)obj)->m_ml->ml_name);
3536 if (reduce_value != NULL) {
3537 status = save_reduce(self, reduce_value, obj);
3538 Py_DECREF(reduce_value);
3539 }
3540 return status;
3541 }
3542}
3543
3544static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003545save(PicklerObject *self, PyObject *obj, int pers_save)
3546{
3547 PyTypeObject *type;
3548 PyObject *reduce_func = NULL;
3549 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003550 int status = 0;
3551
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08003552 if (_Pickler_OpcodeBoundary(self) < 0)
3553 return -1;
3554
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003555 if (Py_EnterRecursiveCall(" while pickling an object"))
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003556 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003557
3558 /* The extra pers_save argument is necessary to avoid calling save_pers()
3559 on its returned object. */
3560 if (!pers_save && self->pers_func) {
3561 /* save_pers() returns:
3562 -1 to signal an error;
3563 0 if it did nothing successfully;
3564 1 if a persistent id was saved.
3565 */
3566 if ((status = save_pers(self, obj, self->pers_func)) != 0)
3567 goto done;
3568 }
3569
3570 type = Py_TYPE(obj);
3571
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003572 /* The old cPickle had an optimization that used switch-case statement
3573 dispatching on the first letter of the type name. This has was removed
3574 since benchmarks shown that this optimization was actually slowing
3575 things down. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003576
3577 /* Atom types; these aren't memoized, so don't check the memo. */
3578
3579 if (obj == Py_None) {
3580 status = save_none(self, obj);
3581 goto done;
3582 }
Łukasz Langaf3078fb2012-03-12 19:46:12 +01003583 else if (obj == Py_Ellipsis) {
3584 status = save_ellipsis(self, obj);
3585 goto done;
3586 }
3587 else if (obj == Py_NotImplemented) {
3588 status = save_notimplemented(self, obj);
3589 goto done;
3590 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003591 else if (obj == Py_False || obj == Py_True) {
3592 status = save_bool(self, obj);
3593 goto done;
3594 }
3595 else if (type == &PyLong_Type) {
3596 status = save_long(self, obj);
3597 goto done;
3598 }
3599 else if (type == &PyFloat_Type) {
3600 status = save_float(self, obj);
3601 goto done;
3602 }
3603
3604 /* Check the memo to see if it has the object. If so, generate
3605 a GET (or BINGET) opcode, instead of pickling the object
3606 once again. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003607 if (PyMemoTable_Get(self->memo, obj)) {
3608 if (memo_get(self, obj) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003609 goto error;
3610 goto done;
3611 }
3612
3613 if (type == &PyBytes_Type) {
3614 status = save_bytes(self, obj);
3615 goto done;
3616 }
3617 else if (type == &PyUnicode_Type) {
3618 status = save_unicode(self, obj);
3619 goto done;
3620 }
3621 else if (type == &PyDict_Type) {
3622 status = save_dict(self, obj);
3623 goto done;
3624 }
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003625 else if (type == &PySet_Type) {
3626 status = save_set(self, obj);
3627 goto done;
3628 }
3629 else if (type == &PyFrozenSet_Type) {
3630 status = save_frozenset(self, obj);
3631 goto done;
3632 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003633 else if (type == &PyList_Type) {
3634 status = save_list(self, obj);
3635 goto done;
3636 }
3637 else if (type == &PyTuple_Type) {
3638 status = save_tuple(self, obj);
3639 goto done;
3640 }
3641 else if (type == &PyType_Type) {
3642 status = save_global(self, obj, NULL);
3643 goto done;
3644 }
3645 else if (type == &PyFunction_Type) {
3646 status = save_global(self, obj, NULL);
3647 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
3648 /* fall back to reduce */
3649 PyErr_Clear();
3650 }
3651 else {
3652 goto done;
3653 }
3654 }
3655 else if (type == &PyCFunction_Type) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003656 status = save_method(self, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003657 goto done;
3658 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003659
3660 /* XXX: This part needs some unit tests. */
3661
3662 /* Get a reduction callable, and call it. This may come from
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003663 * self.dispatch_table, copyreg.dispatch_table, the object's
3664 * __reduce_ex__ method, or the object's __reduce__ method.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003665 */
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003666 if (self->dispatch_table == NULL) {
3667 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
3668 /* PyDict_GetItem() unlike PyObject_GetItem() and
3669 PyObject_GetAttr() returns a borrowed ref */
3670 Py_XINCREF(reduce_func);
3671 } else {
3672 reduce_func = PyObject_GetItem(self->dispatch_table, (PyObject *)type);
3673 if (reduce_func == NULL) {
3674 if (PyErr_ExceptionMatches(PyExc_KeyError))
3675 PyErr_Clear();
3676 else
3677 goto error;
3678 }
3679 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003680 if (reduce_func != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003681 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003682 reduce_value = _Pickler_FastCall(self, reduce_func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003683 }
Antoine Pitrouffd41d92011-10-04 09:23:04 +02003684 else if (PyType_IsSubtype(type, &PyType_Type)) {
3685 status = save_global(self, obj, NULL);
3686 goto done;
3687 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003688 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003689 _Py_IDENTIFIER(__reduce__);
3690 _Py_IDENTIFIER(__reduce_ex__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003691
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003692
3693 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
3694 automatically defined as __reduce__. While this is convenient, this
3695 make it impossible to know which method was actually called. Of
3696 course, this is not a big deal. But still, it would be nice to let
3697 the user know which method was called when something go
3698 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
3699 don't actually have to check for a __reduce__ method. */
3700
3701 /* Check for a __reduce_ex__ method. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003702 reduce_func = _PyObject_GetAttrId(obj, &PyId___reduce_ex__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003703 if (reduce_func != NULL) {
3704 PyObject *proto;
3705 proto = PyLong_FromLong(self->proto);
3706 if (proto != NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003707 reduce_value = _Pickler_FastCall(self, reduce_func, proto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003708 }
3709 }
3710 else {
3711 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3712 PyErr_Clear();
3713 else
3714 goto error;
3715 /* Check for a __reduce__ method. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003716 reduce_func = _PyObject_GetAttrId(obj, &PyId___reduce__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003717 if (reduce_func != NULL) {
3718 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
3719 }
3720 else {
3721 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
3722 type->tp_name, obj);
3723 goto error;
3724 }
3725 }
3726 }
3727
3728 if (reduce_value == NULL)
3729 goto error;
3730
3731 if (PyUnicode_Check(reduce_value)) {
3732 status = save_global(self, obj, reduce_value);
3733 goto done;
3734 }
3735
3736 if (!PyTuple_Check(reduce_value)) {
3737 PyErr_SetString(PicklingError,
3738 "__reduce__ must return a string or tuple");
3739 goto error;
3740 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003741
3742 status = save_reduce(self, reduce_value, obj);
3743
3744 if (0) {
3745 error:
3746 status = -1;
3747 }
3748 done:
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08003749
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003750 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003751 Py_XDECREF(reduce_func);
3752 Py_XDECREF(reduce_value);
3753
3754 return status;
3755}
3756
3757static int
3758dump(PicklerObject *self, PyObject *obj)
3759{
3760 const char stop_op = STOP;
3761
3762 if (self->proto >= 2) {
3763 char header[2];
3764
3765 header[0] = PROTO;
3766 assert(self->proto >= 0 && self->proto < 256);
3767 header[1] = (unsigned char)self->proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003768 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003769 return -1;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003770 if (self->proto >= 4)
3771 self->framing = 1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003772 }
3773
3774 if (save(self, obj, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003775 _Pickler_Write(self, &stop_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003776 return -1;
3777
3778 return 0;
3779}
3780
3781PyDoc_STRVAR(Pickler_clear_memo_doc,
3782"clear_memo() -> None. Clears the pickler's \"memo\"."
3783"\n"
3784"The memo is the data structure that remembers which objects the\n"
3785"pickler has already seen, so that shared or recursive objects are\n"
3786"pickled by reference and not by value. This method is useful when\n"
3787"re-using picklers.");
3788
3789static PyObject *
3790Pickler_clear_memo(PicklerObject *self)
3791{
3792 if (self->memo)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003793 PyMemoTable_Clear(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003794
3795 Py_RETURN_NONE;
3796}
3797
3798PyDoc_STRVAR(Pickler_dump_doc,
3799"dump(obj) -> None. Write a pickled representation of obj to the open file.");
3800
3801static PyObject *
3802Pickler_dump(PicklerObject *self, PyObject *args)
3803{
3804 PyObject *obj;
3805
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00003806 /* Check whether the Pickler was initialized correctly (issue3664).
3807 Developers often forget to call __init__() in their subclasses, which
3808 would trigger a segfault without this check. */
3809 if (self->write == NULL) {
Victor Stinner121aab42011-09-29 23:40:53 +02003810 PyErr_Format(PicklingError,
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00003811 "Pickler.__init__() was not called by %s.__init__()",
3812 Py_TYPE(self)->tp_name);
3813 return NULL;
3814 }
3815
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003816 if (!PyArg_ParseTuple(args, "O:dump", &obj))
3817 return NULL;
3818
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003819 if (_Pickler_ClearBuffer(self) < 0)
3820 return NULL;
3821
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003822 if (dump(self, obj) < 0)
3823 return NULL;
3824
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003825 if (_Pickler_FlushToFile(self) < 0)
3826 return NULL;
3827
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003828 Py_RETURN_NONE;
3829}
3830
3831static struct PyMethodDef Pickler_methods[] = {
3832 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
3833 Pickler_dump_doc},
3834 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
3835 Pickler_clear_memo_doc},
3836 {NULL, NULL} /* sentinel */
3837};
3838
3839static void
3840Pickler_dealloc(PicklerObject *self)
3841{
3842 PyObject_GC_UnTrack(self);
3843
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003844 Py_XDECREF(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003845 Py_XDECREF(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003846 Py_XDECREF(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003847 Py_XDECREF(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003848 Py_XDECREF(self->arg);
3849 Py_XDECREF(self->fast_memo);
3850
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003851 PyMemoTable_Del(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003852
3853 Py_TYPE(self)->tp_free((PyObject *)self);
3854}
3855
3856static int
3857Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
3858{
3859 Py_VISIT(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003860 Py_VISIT(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003861 Py_VISIT(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003862 Py_VISIT(self->arg);
3863 Py_VISIT(self->fast_memo);
3864 return 0;
3865}
3866
3867static int
3868Pickler_clear(PicklerObject *self)
3869{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003870 Py_CLEAR(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003871 Py_CLEAR(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003872 Py_CLEAR(self->pers_func);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003873 Py_CLEAR(self->dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003874 Py_CLEAR(self->arg);
3875 Py_CLEAR(self->fast_memo);
3876
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003877 if (self->memo != NULL) {
3878 PyMemoTable *memo = self->memo;
3879 self->memo = NULL;
3880 PyMemoTable_Del(memo);
3881 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003882 return 0;
3883}
3884
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003885
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003886PyDoc_STRVAR(Pickler_doc,
3887"Pickler(file, protocol=None)"
3888"\n"
3889"This takes a binary file for writing a pickle data stream.\n"
3890"\n"
3891"The optional protocol argument tells the pickler to use the\n"
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003892"given protocol; supported protocols are 0, 1, 2, 3 and 4. The\n"
3893"default protocol is 3; a backward-incompatible protocol designed for\n"
3894"Python 3.\n"
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003895"\n"
3896"Specifying a negative protocol version selects the highest\n"
3897"protocol version supported. The higher the protocol used, the\n"
3898"more recent the version of Python needed to read the pickle\n"
3899"produced.\n"
3900"\n"
3901"The file argument must have a write() method that accepts a single\n"
3902"bytes argument. It can thus be a file object opened for binary\n"
3903"writing, a io.BytesIO instance, or any other custom object that\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003904"meets this interface.\n"
3905"\n"
3906"If fix_imports is True and protocol is less than 3, pickle will try to\n"
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01003907"map the new Python 3 names to the old module names used in Python 2,\n"
3908"so that the pickle data stream is readable with Python 2.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003909
3910static int
3911Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
3912{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003913 static char *kwlist[] = {"file", "protocol", "fix_imports", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003914 PyObject *file;
3915 PyObject *proto_obj = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003916 PyObject *fix_imports = Py_True;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02003917 _Py_IDENTIFIER(persistent_id);
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003918 _Py_IDENTIFIER(dispatch_table);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003919
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003920 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:Pickler",
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003921 kwlist, &file, &proto_obj, &fix_imports))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003922 return -1;
3923
3924 /* In case of multiple __init__() calls, clear previous content. */
3925 if (self->write != NULL)
3926 (void)Pickler_clear(self);
3927
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003928 if (_Pickler_SetProtocol(self, proto_obj, fix_imports) < 0)
3929 return -1;
3930
3931 if (_Pickler_SetOutputStream(self, file) < 0)
3932 return -1;
3933
3934 /* memo and output_buffer may have already been created in _Pickler_New */
3935 if (self->memo == NULL) {
3936 self->memo = PyMemoTable_New();
3937 if (self->memo == NULL)
3938 return -1;
3939 }
3940 self->output_len = 0;
3941 if (self->output_buffer == NULL) {
3942 self->max_output_len = WRITE_BUF_SIZE;
3943 self->output_buffer = PyBytes_FromStringAndSize(NULL,
3944 self->max_output_len);
3945 if (self->output_buffer == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003946 return -1;
3947 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003948
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003949 self->arg = NULL;
3950 self->fast = 0;
3951 self->fast_nesting = 0;
3952 self->fast_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003953 self->pers_func = NULL;
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02003954 if (_PyObject_HasAttrId((PyObject *)self, &PyId_persistent_id)) {
3955 self->pers_func = _PyObject_GetAttrId((PyObject *)self,
3956 &PyId_persistent_id);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003957 if (self->pers_func == NULL)
3958 return -1;
3959 }
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01003960 self->dispatch_table = NULL;
3961 if (_PyObject_HasAttrId((PyObject *)self, &PyId_dispatch_table)) {
3962 self->dispatch_table = _PyObject_GetAttrId((PyObject *)self,
3963 &PyId_dispatch_table);
3964 if (self->dispatch_table == NULL)
3965 return -1;
3966 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003967 return 0;
3968}
3969
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003970/* Define a proxy object for the Pickler's internal memo object. This is to
3971 * avoid breaking code like:
3972 * pickler.memo.clear()
3973 * and
3974 * pickler.memo = saved_memo
3975 * Is this a good idea? Not really, but we don't want to break code that uses
3976 * it. Note that we don't implement the entire mapping API here. This is
3977 * intentional, as these should be treated as black-box implementation details.
3978 */
3979
3980typedef struct {
3981 PyObject_HEAD
3982 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
3983} PicklerMemoProxyObject;
3984
3985PyDoc_STRVAR(pmp_clear_doc,
3986"memo.clear() -> None. Remove all items from memo.");
3987
3988static PyObject *
3989pmp_clear(PicklerMemoProxyObject *self)
3990{
3991 if (self->pickler->memo)
3992 PyMemoTable_Clear(self->pickler->memo);
3993 Py_RETURN_NONE;
3994}
3995
3996PyDoc_STRVAR(pmp_copy_doc,
3997"memo.copy() -> new_memo. Copy the memo to a new object.");
3998
3999static PyObject *
4000pmp_copy(PicklerMemoProxyObject *self)
4001{
4002 Py_ssize_t i;
4003 PyMemoTable *memo;
4004 PyObject *new_memo = PyDict_New();
4005 if (new_memo == NULL)
4006 return NULL;
4007
4008 memo = self->pickler->memo;
4009 for (i = 0; i < memo->mt_allocated; ++i) {
4010 PyMemoEntry entry = memo->mt_table[i];
4011 if (entry.me_key != NULL) {
4012 int status;
4013 PyObject *key, *value;
4014
4015 key = PyLong_FromVoidPtr(entry.me_key);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004016 value = Py_BuildValue("nO", entry.me_value, entry.me_key);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004017
4018 if (key == NULL || value == NULL) {
4019 Py_XDECREF(key);
4020 Py_XDECREF(value);
4021 goto error;
4022 }
4023 status = PyDict_SetItem(new_memo, key, value);
4024 Py_DECREF(key);
4025 Py_DECREF(value);
4026 if (status < 0)
4027 goto error;
4028 }
4029 }
4030 return new_memo;
4031
4032 error:
4033 Py_XDECREF(new_memo);
4034 return NULL;
4035}
4036
4037PyDoc_STRVAR(pmp_reduce_doc,
4038"memo.__reduce__(). Pickling support.");
4039
4040static PyObject *
4041pmp_reduce(PicklerMemoProxyObject *self, PyObject *args)
4042{
4043 PyObject *reduce_value, *dict_args;
4044 PyObject *contents = pmp_copy(self);
4045 if (contents == NULL)
4046 return NULL;
4047
4048 reduce_value = PyTuple_New(2);
4049 if (reduce_value == NULL) {
4050 Py_DECREF(contents);
4051 return NULL;
4052 }
4053 dict_args = PyTuple_New(1);
4054 if (dict_args == NULL) {
4055 Py_DECREF(contents);
4056 Py_DECREF(reduce_value);
4057 return NULL;
4058 }
4059 PyTuple_SET_ITEM(dict_args, 0, contents);
4060 Py_INCREF((PyObject *)&PyDict_Type);
4061 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
4062 PyTuple_SET_ITEM(reduce_value, 1, dict_args);
4063 return reduce_value;
4064}
4065
4066static PyMethodDef picklerproxy_methods[] = {
4067 {"clear", (PyCFunction)pmp_clear, METH_NOARGS, pmp_clear_doc},
4068 {"copy", (PyCFunction)pmp_copy, METH_NOARGS, pmp_copy_doc},
4069 {"__reduce__", (PyCFunction)pmp_reduce, METH_VARARGS, pmp_reduce_doc},
4070 {NULL, NULL} /* sentinel */
4071};
4072
4073static void
4074PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
4075{
4076 PyObject_GC_UnTrack(self);
4077 Py_XDECREF(self->pickler);
4078 PyObject_GC_Del((PyObject *)self);
4079}
4080
4081static int
4082PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
4083 visitproc visit, void *arg)
4084{
4085 Py_VISIT(self->pickler);
4086 return 0;
4087}
4088
4089static int
4090PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
4091{
4092 Py_CLEAR(self->pickler);
4093 return 0;
4094}
4095
4096static PyTypeObject PicklerMemoProxyType = {
4097 PyVarObject_HEAD_INIT(NULL, 0)
4098 "_pickle.PicklerMemoProxy", /*tp_name*/
4099 sizeof(PicklerMemoProxyObject), /*tp_basicsize*/
4100 0,
4101 (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */
4102 0, /* tp_print */
4103 0, /* tp_getattr */
4104 0, /* tp_setattr */
4105 0, /* tp_compare */
4106 0, /* tp_repr */
4107 0, /* tp_as_number */
4108 0, /* tp_as_sequence */
4109 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00004110 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004111 0, /* tp_call */
4112 0, /* tp_str */
4113 PyObject_GenericGetAttr, /* tp_getattro */
4114 PyObject_GenericSetAttr, /* tp_setattro */
4115 0, /* tp_as_buffer */
4116 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4117 0, /* tp_doc */
4118 (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */
4119 (inquiry)PicklerMemoProxy_clear, /* tp_clear */
4120 0, /* tp_richcompare */
4121 0, /* tp_weaklistoffset */
4122 0, /* tp_iter */
4123 0, /* tp_iternext */
4124 picklerproxy_methods, /* tp_methods */
4125};
4126
4127static PyObject *
4128PicklerMemoProxy_New(PicklerObject *pickler)
4129{
4130 PicklerMemoProxyObject *self;
4131
4132 self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
4133 if (self == NULL)
4134 return NULL;
4135 Py_INCREF(pickler);
4136 self->pickler = pickler;
4137 PyObject_GC_Track(self);
4138 return (PyObject *)self;
4139}
4140
4141/*****************************************************************************/
4142
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004143static PyObject *
4144Pickler_get_memo(PicklerObject *self)
4145{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004146 return PicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004147}
4148
4149static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004150Pickler_set_memo(PicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004151{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004152 PyMemoTable *new_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004153
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004154 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004155 PyErr_SetString(PyExc_TypeError,
4156 "attribute deletion is not supported");
4157 return -1;
4158 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004159
4160 if (Py_TYPE(obj) == &PicklerMemoProxyType) {
4161 PicklerObject *pickler =
4162 ((PicklerMemoProxyObject *)obj)->pickler;
4163
4164 new_memo = PyMemoTable_Copy(pickler->memo);
4165 if (new_memo == NULL)
4166 return -1;
4167 }
4168 else if (PyDict_Check(obj)) {
4169 Py_ssize_t i = 0;
4170 PyObject *key, *value;
4171
4172 new_memo = PyMemoTable_New();
4173 if (new_memo == NULL)
4174 return -1;
4175
4176 while (PyDict_Next(obj, &i, &key, &value)) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004177 Py_ssize_t memo_id;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004178 PyObject *memo_obj;
4179
4180 if (!PyTuple_Check(value) || Py_SIZE(value) != 2) {
4181 PyErr_SetString(PyExc_TypeError,
4182 "'memo' values must be 2-item tuples");
4183 goto error;
4184 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004185 memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004186 if (memo_id == -1 && PyErr_Occurred())
4187 goto error;
4188 memo_obj = PyTuple_GET_ITEM(value, 1);
4189 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
4190 goto error;
4191 }
4192 }
4193 else {
4194 PyErr_Format(PyExc_TypeError,
4195 "'memo' attribute must be an PicklerMemoProxy object"
4196 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004197 return -1;
4198 }
4199
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004200 PyMemoTable_Del(self->memo);
4201 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004202
4203 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004204
4205 error:
4206 if (new_memo)
4207 PyMemoTable_Del(new_memo);
4208 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004209}
4210
4211static PyObject *
4212Pickler_get_persid(PicklerObject *self)
4213{
4214 if (self->pers_func == NULL)
4215 PyErr_SetString(PyExc_AttributeError, "persistent_id");
4216 else
4217 Py_INCREF(self->pers_func);
4218 return self->pers_func;
4219}
4220
4221static int
4222Pickler_set_persid(PicklerObject *self, PyObject *value)
4223{
4224 PyObject *tmp;
4225
4226 if (value == NULL) {
4227 PyErr_SetString(PyExc_TypeError,
4228 "attribute deletion is not supported");
4229 return -1;
4230 }
4231 if (!PyCallable_Check(value)) {
4232 PyErr_SetString(PyExc_TypeError,
4233 "persistent_id must be a callable taking one argument");
4234 return -1;
4235 }
4236
4237 tmp = self->pers_func;
4238 Py_INCREF(value);
4239 self->pers_func = value;
4240 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
4241
4242 return 0;
4243}
4244
4245static PyMemberDef Pickler_members[] = {
4246 {"bin", T_INT, offsetof(PicklerObject, bin)},
4247 {"fast", T_INT, offsetof(PicklerObject, fast)},
Antoine Pitrou8d3c2902012-03-04 18:31:48 +01004248 {"dispatch_table", T_OBJECT_EX, offsetof(PicklerObject, dispatch_table)},
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004249 {NULL}
4250};
4251
4252static PyGetSetDef Pickler_getsets[] = {
4253 {"memo", (getter)Pickler_get_memo,
4254 (setter)Pickler_set_memo},
4255 {"persistent_id", (getter)Pickler_get_persid,
4256 (setter)Pickler_set_persid},
4257 {NULL}
4258};
4259
4260static PyTypeObject Pickler_Type = {
4261 PyVarObject_HEAD_INIT(NULL, 0)
4262 "_pickle.Pickler" , /*tp_name*/
4263 sizeof(PicklerObject), /*tp_basicsize*/
4264 0, /*tp_itemsize*/
4265 (destructor)Pickler_dealloc, /*tp_dealloc*/
4266 0, /*tp_print*/
4267 0, /*tp_getattr*/
4268 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00004269 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004270 0, /*tp_repr*/
4271 0, /*tp_as_number*/
4272 0, /*tp_as_sequence*/
4273 0, /*tp_as_mapping*/
4274 0, /*tp_hash*/
4275 0, /*tp_call*/
4276 0, /*tp_str*/
4277 0, /*tp_getattro*/
4278 0, /*tp_setattro*/
4279 0, /*tp_as_buffer*/
4280 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4281 Pickler_doc, /*tp_doc*/
4282 (traverseproc)Pickler_traverse, /*tp_traverse*/
4283 (inquiry)Pickler_clear, /*tp_clear*/
4284 0, /*tp_richcompare*/
4285 0, /*tp_weaklistoffset*/
4286 0, /*tp_iter*/
4287 0, /*tp_iternext*/
4288 Pickler_methods, /*tp_methods*/
4289 Pickler_members, /*tp_members*/
4290 Pickler_getsets, /*tp_getset*/
4291 0, /*tp_base*/
4292 0, /*tp_dict*/
4293 0, /*tp_descr_get*/
4294 0, /*tp_descr_set*/
4295 0, /*tp_dictoffset*/
4296 (initproc)Pickler_init, /*tp_init*/
4297 PyType_GenericAlloc, /*tp_alloc*/
4298 PyType_GenericNew, /*tp_new*/
4299 PyObject_GC_Del, /*tp_free*/
4300 0, /*tp_is_gc*/
4301};
4302
Victor Stinner121aab42011-09-29 23:40:53 +02004303/* Temporary helper for calling self.find_class().
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004304
4305 XXX: It would be nice to able to avoid Python function call overhead, by
4306 using directly the C version of find_class(), when find_class() is not
4307 overridden by a subclass. Although, this could become rather hackish. A
4308 simpler optimization would be to call the C function when self is not a
4309 subclass instance. */
4310static PyObject *
4311find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
4312{
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02004313 _Py_IDENTIFIER(find_class);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02004314
4315 return _PyObject_CallMethodId((PyObject *)self, &PyId_find_class, "OO",
4316 module_name, global_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004317}
4318
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004319static Py_ssize_t
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004320marker(UnpicklerObject *self)
4321{
4322 if (self->num_marks < 1) {
4323 PyErr_SetString(UnpicklingError, "could not find MARK");
4324 return -1;
4325 }
4326
4327 return self->marks[--self->num_marks];
4328}
4329
4330static int
4331load_none(UnpicklerObject *self)
4332{
4333 PDATA_APPEND(self->stack, Py_None, -1);
4334 return 0;
4335}
4336
4337static int
4338bad_readline(void)
4339{
4340 PyErr_SetString(UnpicklingError, "pickle data was truncated");
4341 return -1;
4342}
4343
4344static int
4345load_int(UnpicklerObject *self)
4346{
4347 PyObject *value;
4348 char *endptr, *s;
4349 Py_ssize_t len;
4350 long x;
4351
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004352 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004353 return -1;
4354 if (len < 2)
4355 return bad_readline();
4356
4357 errno = 0;
Victor Stinner121aab42011-09-29 23:40:53 +02004358 /* XXX: Should the base argument of strtol() be explicitly set to 10?
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004359 XXX(avassalotti): Should this uses PyOS_strtol()? */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004360 x = strtol(s, &endptr, 0);
4361
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004362 if (errno || (*endptr != '\n' && *endptr != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004363 /* Hm, maybe we've got something long. Let's try reading
Serhiy Storchaka95949422013-08-27 19:40:23 +03004364 * it as a Python int object. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004365 errno = 0;
4366 /* XXX: Same thing about the base here. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004367 value = PyLong_FromString(s, NULL, 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004368 if (value == NULL) {
4369 PyErr_SetString(PyExc_ValueError,
4370 "could not convert string to int");
4371 return -1;
4372 }
4373 }
4374 else {
4375 if (len == 3 && (x == 0 || x == 1)) {
4376 if ((value = PyBool_FromLong(x)) == NULL)
4377 return -1;
4378 }
4379 else {
4380 if ((value = PyLong_FromLong(x)) == NULL)
4381 return -1;
4382 }
4383 }
4384
4385 PDATA_PUSH(self->stack, value, -1);
4386 return 0;
4387}
4388
4389static int
4390load_bool(UnpicklerObject *self, PyObject *boolean)
4391{
4392 assert(boolean == Py_True || boolean == Py_False);
4393 PDATA_APPEND(self->stack, boolean, -1);
4394 return 0;
4395}
4396
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004397/* s contains x bytes of an unsigned little-endian integer. Return its value
4398 * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
4399 */
4400static Py_ssize_t
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004401calc_binsize(char *bytes, int nbytes)
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004402{
4403 unsigned char *s = (unsigned char *)bytes;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004404 int i;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004405 size_t x = 0;
4406
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08004407 for (i = 0; i < nbytes && i < sizeof(size_t); i++) {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004408 x |= (size_t) s[i] << (8 * i);
4409 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004410
4411 if (x > PY_SSIZE_T_MAX)
4412 return -1;
4413 else
4414 return (Py_ssize_t) x;
4415}
4416
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004417/* s contains x bytes of a little-endian integer. Return its value as a
4418 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
4419 * int, but when x is 4 it's a signed one. This is an historical source
4420 * of x-platform bugs.
4421 */
4422static long
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004423calc_binint(char *bytes, int nbytes)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004424{
4425 unsigned char *s = (unsigned char *)bytes;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004426 int i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004427 long x = 0;
4428
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004429 for (i = 0; i < nbytes; i++) {
4430 x |= (long)s[i] << (8 * i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004431 }
4432
4433 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
4434 * is signed, so on a box with longs bigger than 4 bytes we need
4435 * to extend a BININT's sign bit to the full width.
4436 */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004437 if (SIZEOF_LONG > 4 && nbytes == 4) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004438 x |= -(x & (1L << 31));
4439 }
4440
4441 return x;
4442}
4443
4444static int
4445load_binintx(UnpicklerObject *self, char *s, int size)
4446{
4447 PyObject *value;
4448 long x;
4449
4450 x = calc_binint(s, size);
4451
4452 if ((value = PyLong_FromLong(x)) == NULL)
4453 return -1;
4454
4455 PDATA_PUSH(self->stack, value, -1);
4456 return 0;
4457}
4458
4459static int
4460load_binint(UnpicklerObject *self)
4461{
4462 char *s;
4463
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004464 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004465 return -1;
4466
4467 return load_binintx(self, s, 4);
4468}
4469
4470static int
4471load_binint1(UnpicklerObject *self)
4472{
4473 char *s;
4474
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004475 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004476 return -1;
4477
4478 return load_binintx(self, s, 1);
4479}
4480
4481static int
4482load_binint2(UnpicklerObject *self)
4483{
4484 char *s;
4485
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004486 if (_Unpickler_Read(self, &s, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004487 return -1;
4488
4489 return load_binintx(self, s, 2);
4490}
4491
4492static int
4493load_long(UnpicklerObject *self)
4494{
4495 PyObject *value;
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00004496 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004497 Py_ssize_t len;
4498
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004499 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004500 return -1;
4501 if (len < 2)
4502 return bad_readline();
4503
Mark Dickinson8dd05142009-01-20 20:43:58 +00004504 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
4505 the 'L' before calling PyLong_FromString. In order to maintain
4506 compatibility with Python 3.0.0, we don't actually *require*
4507 the 'L' to be present. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004508 if (s[len-2] == 'L')
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00004509 s[len-2] = '\0';
Alexandre Vassalottie4bccb72009-01-24 01:47:57 +00004510 /* XXX: Should the base argument explicitly set to 10? */
4511 value = PyLong_FromString(s, NULL, 0);
Mark Dickinson8dd05142009-01-20 20:43:58 +00004512 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004513 return -1;
4514
4515 PDATA_PUSH(self->stack, value, -1);
4516 return 0;
4517}
4518
4519/* 'size' bytes contain the # of bytes of little-endian 256's-complement
4520 * data following.
4521 */
4522static int
4523load_counted_long(UnpicklerObject *self, int size)
4524{
4525 PyObject *value;
4526 char *nbytes;
4527 char *pdata;
4528
4529 assert(size == 1 || size == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004530 if (_Unpickler_Read(self, &nbytes, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004531 return -1;
4532
4533 size = calc_binint(nbytes, size);
4534 if (size < 0) {
4535 /* Corrupt or hostile pickle -- we never write one like this */
4536 PyErr_SetString(UnpicklingError,
4537 "LONG pickle has negative byte count");
4538 return -1;
4539 }
4540
4541 if (size == 0)
4542 value = PyLong_FromLong(0L);
4543 else {
4544 /* Read the raw little-endian bytes and convert. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004545 if (_Unpickler_Read(self, &pdata, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004546 return -1;
4547 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
4548 1 /* little endian */ , 1 /* signed */ );
4549 }
4550 if (value == NULL)
4551 return -1;
4552 PDATA_PUSH(self->stack, value, -1);
4553 return 0;
4554}
4555
4556static int
4557load_float(UnpicklerObject *self)
4558{
4559 PyObject *value;
4560 char *endptr, *s;
4561 Py_ssize_t len;
4562 double d;
4563
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004564 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004565 return -1;
4566 if (len < 2)
4567 return bad_readline();
4568
4569 errno = 0;
Mark Dickinson725bfd82009-05-03 20:33:40 +00004570 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
4571 if (d == -1.0 && PyErr_Occurred())
4572 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004573 if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004574 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
4575 return -1;
4576 }
Mark Dickinson725bfd82009-05-03 20:33:40 +00004577 value = PyFloat_FromDouble(d);
4578 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004579 return -1;
4580
4581 PDATA_PUSH(self->stack, value, -1);
4582 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004583}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004584
4585static int
4586load_binfloat(UnpicklerObject *self)
4587{
4588 PyObject *value;
4589 double x;
4590 char *s;
4591
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004592 if (_Unpickler_Read(self, &s, 8) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004593 return -1;
4594
4595 x = _PyFloat_Unpack8((unsigned char *)s, 0);
4596 if (x == -1.0 && PyErr_Occurred())
4597 return -1;
4598
4599 if ((value = PyFloat_FromDouble(x)) == NULL)
4600 return -1;
4601
4602 PDATA_PUSH(self->stack, value, -1);
4603 return 0;
4604}
4605
4606static int
4607load_string(UnpicklerObject *self)
4608{
4609 PyObject *bytes;
4610 PyObject *str = NULL;
4611 Py_ssize_t len;
4612 char *s, *p;
4613
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004614 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004615 return -1;
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07004616 /* Strip the newline */
4617 len--;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004618 /* Strip outermost quotes */
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07004619 if (len >= 2 && s[0] == s[len - 1] && (s[0] == '\'' || s[0] == '"')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004620 p = s + 1;
4621 len -= 2;
4622 }
4623 else {
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07004624 PyErr_SetString(UnpicklingError,
4625 "the STRING opcode argument must be quoted");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004626 return -1;
4627 }
Alexandre Vassalotti7c5e0942013-04-15 23:14:55 -07004628 assert(len >= 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004629
4630 /* Use the PyBytes API to decode the string, since that is what is used
4631 to encode, and then coerce the result to Unicode. */
4632 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004633 if (bytes == NULL)
4634 return -1;
4635 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
4636 Py_DECREF(bytes);
4637 if (str == NULL)
4638 return -1;
4639
4640 PDATA_PUSH(self->stack, str, -1);
4641 return 0;
4642}
4643
4644static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004645load_counted_binbytes(UnpicklerObject *self, int nbytes)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004646{
4647 PyObject *bytes;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004648 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004649 char *s;
4650
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004651 if (_Unpickler_Read(self, &s, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004652 return -1;
4653
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004654 size = calc_binsize(s, nbytes);
4655 if (size < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004656 PyErr_Format(PyExc_OverflowError,
4657 "BINBYTES exceeds system's maximum size of %zd bytes",
Alexandre Vassalotticc757172013-04-14 02:25:10 -07004658 PY_SSIZE_T_MAX);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004659 return -1;
4660 }
4661
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004662 if (_Unpickler_Read(self, &s, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004663 return -1;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004664
4665 bytes = PyBytes_FromStringAndSize(s, size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004666 if (bytes == NULL)
4667 return -1;
4668
4669 PDATA_PUSH(self->stack, bytes, -1);
4670 return 0;
4671}
4672
4673static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004674load_counted_binstring(UnpicklerObject *self, int nbytes)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004675{
4676 PyObject *str;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004677 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004678 char *s;
4679
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004680 if (_Unpickler_Read(self, &s, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004681 return -1;
4682
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004683 size = calc_binsize(s, nbytes);
4684 if (size < 0) {
4685 PyErr_Format(UnpicklingError,
4686 "BINSTRING exceeds system's maximum size of %zd bytes",
4687 PY_SSIZE_T_MAX);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004688 return -1;
4689 }
4690
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004691 if (_Unpickler_Read(self, &s, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004692 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004693 /* Convert Python 2.x strings to unicode. */
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004694 str = PyUnicode_Decode(s, size, self->encoding, self->errors);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004695 if (str == NULL)
4696 return -1;
4697
4698 PDATA_PUSH(self->stack, str, -1);
4699 return 0;
4700}
4701
4702static int
4703load_unicode(UnpicklerObject *self)
4704{
4705 PyObject *str;
4706 Py_ssize_t len;
4707 char *s;
4708
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004709 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004710 return -1;
4711 if (len < 1)
4712 return bad_readline();
4713
4714 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
4715 if (str == NULL)
4716 return -1;
4717
4718 PDATA_PUSH(self->stack, str, -1);
4719 return 0;
4720}
4721
4722static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004723load_counted_binunicode(UnpicklerObject *self, int nbytes)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004724{
4725 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004726 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004727 char *s;
4728
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004729 if (_Unpickler_Read(self, &s, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004730 return -1;
4731
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004732 size = calc_binsize(s, nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004733 if (size < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004734 PyErr_Format(PyExc_OverflowError,
4735 "BINUNICODE exceeds system's maximum size of %zd bytes",
Alexandre Vassalotticc757172013-04-14 02:25:10 -07004736 PY_SSIZE_T_MAX);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004737 return -1;
4738 }
4739
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004740 if (_Unpickler_Read(self, &s, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004741 return -1;
4742
Victor Stinner485fb562010-04-13 11:07:24 +00004743 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004744 if (str == NULL)
4745 return -1;
4746
4747 PDATA_PUSH(self->stack, str, -1);
4748 return 0;
4749}
4750
4751static int
4752load_tuple(UnpicklerObject *self)
4753{
4754 PyObject *tuple;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004755 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004756
4757 if ((i = marker(self)) < 0)
4758 return -1;
4759
4760 tuple = Pdata_poptuple(self->stack, i);
4761 if (tuple == NULL)
4762 return -1;
4763 PDATA_PUSH(self->stack, tuple, -1);
4764 return 0;
4765}
4766
4767static int
4768load_counted_tuple(UnpicklerObject *self, int len)
4769{
4770 PyObject *tuple;
4771
4772 tuple = PyTuple_New(len);
4773 if (tuple == NULL)
4774 return -1;
4775
4776 while (--len >= 0) {
4777 PyObject *item;
4778
4779 PDATA_POP(self->stack, item);
4780 if (item == NULL)
4781 return -1;
4782 PyTuple_SET_ITEM(tuple, len, item);
4783 }
4784 PDATA_PUSH(self->stack, tuple, -1);
4785 return 0;
4786}
4787
4788static int
4789load_empty_list(UnpicklerObject *self)
4790{
4791 PyObject *list;
4792
4793 if ((list = PyList_New(0)) == NULL)
4794 return -1;
4795 PDATA_PUSH(self->stack, list, -1);
4796 return 0;
4797}
4798
4799static int
4800load_empty_dict(UnpicklerObject *self)
4801{
4802 PyObject *dict;
4803
4804 if ((dict = PyDict_New()) == NULL)
4805 return -1;
4806 PDATA_PUSH(self->stack, dict, -1);
4807 return 0;
4808}
4809
4810static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004811load_empty_set(UnpicklerObject *self)
4812{
4813 PyObject *set;
4814
4815 if ((set = PySet_New(NULL)) == NULL)
4816 return -1;
4817 PDATA_PUSH(self->stack, set, -1);
4818 return 0;
4819}
4820
4821static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004822load_list(UnpicklerObject *self)
4823{
4824 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004825 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004826
4827 if ((i = marker(self)) < 0)
4828 return -1;
4829
4830 list = Pdata_poplist(self->stack, i);
4831 if (list == NULL)
4832 return -1;
4833 PDATA_PUSH(self->stack, list, -1);
4834 return 0;
4835}
4836
4837static int
4838load_dict(UnpicklerObject *self)
4839{
4840 PyObject *dict, *key, *value;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004841 Py_ssize_t i, j, k;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004842
4843 if ((i = marker(self)) < 0)
4844 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004845 j = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004846
4847 if ((dict = PyDict_New()) == NULL)
4848 return -1;
4849
4850 for (k = i + 1; k < j; k += 2) {
4851 key = self->stack->data[k - 1];
4852 value = self->stack->data[k];
4853 if (PyDict_SetItem(dict, key, value) < 0) {
4854 Py_DECREF(dict);
4855 return -1;
4856 }
4857 }
4858 Pdata_clear(self->stack, i);
4859 PDATA_PUSH(self->stack, dict, -1);
4860 return 0;
4861}
4862
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01004863static int
4864load_frozenset(UnpicklerObject *self)
4865{
4866 PyObject *items;
4867 PyObject *frozenset;
4868 Py_ssize_t i;
4869
4870 if ((i = marker(self)) < 0)
4871 return -1;
4872
4873 items = Pdata_poptuple(self->stack, i);
4874 if (items == NULL)
4875 return -1;
4876
4877 frozenset = PyFrozenSet_New(items);
4878 Py_DECREF(items);
4879 if (frozenset == NULL)
4880 return -1;
4881
4882 PDATA_PUSH(self->stack, frozenset, -1);
4883 return 0;
4884}
4885
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004886static PyObject *
4887instantiate(PyObject *cls, PyObject *args)
4888{
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004889 PyObject *result = NULL;
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02004890 _Py_IDENTIFIER(__getinitargs__);
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004891 /* Caller must assure args are a tuple. Normally, args come from
4892 Pdata_poptuple which packs objects from the top of the stack
4893 into a newly created tuple. */
4894 assert(PyTuple_Check(args));
4895 if (Py_SIZE(args) > 0 || !PyType_Check(cls) ||
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02004896 _PyObject_HasAttrId(cls, &PyId___getinitargs__)) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004897 result = PyObject_CallObject(cls, args);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004898 }
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004899 else {
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02004900 _Py_IDENTIFIER(__new__);
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02004901
4902 result = _PyObject_CallMethodId(cls, &PyId___new__, "O", cls);
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004903 }
4904 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004905}
4906
4907static int
4908load_obj(UnpicklerObject *self)
4909{
4910 PyObject *cls, *args, *obj = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004911 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004912
4913 if ((i = marker(self)) < 0)
4914 return -1;
4915
4916 args = Pdata_poptuple(self->stack, i + 1);
4917 if (args == NULL)
4918 return -1;
4919
4920 PDATA_POP(self->stack, cls);
4921 if (cls) {
4922 obj = instantiate(cls, args);
4923 Py_DECREF(cls);
4924 }
4925 Py_DECREF(args);
4926 if (obj == NULL)
4927 return -1;
4928
4929 PDATA_PUSH(self->stack, obj, -1);
4930 return 0;
4931}
4932
4933static int
4934load_inst(UnpicklerObject *self)
4935{
4936 PyObject *cls = NULL;
4937 PyObject *args = NULL;
4938 PyObject *obj = NULL;
4939 PyObject *module_name;
4940 PyObject *class_name;
4941 Py_ssize_t len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004942 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004943 char *s;
4944
4945 if ((i = marker(self)) < 0)
4946 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004947 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004948 return -1;
4949 if (len < 2)
4950 return bad_readline();
4951
4952 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
4953 identifiers are permitted in Python 3.0, since the INST opcode is only
4954 supported by older protocols on Python 2.x. */
4955 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
4956 if (module_name == NULL)
4957 return -1;
4958
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004959 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004960 if (len < 2)
4961 return bad_readline();
4962 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004963 if (class_name != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004964 cls = find_class(self, module_name, class_name);
4965 Py_DECREF(class_name);
4966 }
4967 }
4968 Py_DECREF(module_name);
4969
4970 if (cls == NULL)
4971 return -1;
4972
4973 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
4974 obj = instantiate(cls, args);
4975 Py_DECREF(args);
4976 }
4977 Py_DECREF(cls);
4978
4979 if (obj == NULL)
4980 return -1;
4981
4982 PDATA_PUSH(self->stack, obj, -1);
4983 return 0;
4984}
4985
4986static int
4987load_newobj(UnpicklerObject *self)
4988{
4989 PyObject *args = NULL;
4990 PyObject *clsraw = NULL;
4991 PyTypeObject *cls; /* clsraw cast to its true type */
4992 PyObject *obj;
4993
4994 /* Stack is ... cls argtuple, and we want to call
4995 * cls.__new__(cls, *argtuple).
4996 */
4997 PDATA_POP(self->stack, args);
4998 if (args == NULL)
4999 goto error;
5000 if (!PyTuple_Check(args)) {
5001 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
5002 goto error;
5003 }
5004
5005 PDATA_POP(self->stack, clsraw);
5006 cls = (PyTypeObject *)clsraw;
5007 if (cls == NULL)
5008 goto error;
5009 if (!PyType_Check(cls)) {
5010 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
5011 "isn't a type object");
5012 goto error;
5013 }
5014 if (cls->tp_new == NULL) {
5015 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
5016 "has NULL tp_new");
5017 goto error;
5018 }
5019
5020 /* Call __new__. */
5021 obj = cls->tp_new(cls, args, NULL);
5022 if (obj == NULL)
5023 goto error;
5024
5025 Py_DECREF(args);
5026 Py_DECREF(clsraw);
5027 PDATA_PUSH(self->stack, obj, -1);
5028 return 0;
5029
5030 error:
5031 Py_XDECREF(args);
5032 Py_XDECREF(clsraw);
5033 return -1;
5034}
5035
5036static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005037load_newobj_ex(UnpicklerObject *self)
5038{
5039 PyObject *cls, *args, *kwargs;
5040 PyObject *obj;
5041
5042 PDATA_POP(self->stack, kwargs);
5043 if (kwargs == NULL) {
5044 return -1;
5045 }
5046 PDATA_POP(self->stack, args);
5047 if (args == NULL) {
5048 Py_DECREF(kwargs);
5049 return -1;
5050 }
5051 PDATA_POP(self->stack, cls);
5052 if (cls == NULL) {
5053 Py_DECREF(kwargs);
5054 Py_DECREF(args);
5055 return -1;
5056 }
5057
5058 if (!PyType_Check(cls)) {
5059 Py_DECREF(kwargs);
5060 Py_DECREF(args);
5061 Py_DECREF(cls);
5062 PyErr_Format(UnpicklingError,
5063 "NEWOBJ_EX class argument must be a type, not %.200s",
5064 Py_TYPE(cls)->tp_name);
5065 return -1;
5066 }
5067
5068 if (((PyTypeObject *)cls)->tp_new == NULL) {
5069 Py_DECREF(kwargs);
5070 Py_DECREF(args);
5071 Py_DECREF(cls);
5072 PyErr_SetString(UnpicklingError,
5073 "NEWOBJ_EX class argument doesn't have __new__");
5074 return -1;
5075 }
5076 obj = ((PyTypeObject *)cls)->tp_new((PyTypeObject *)cls, args, kwargs);
5077 Py_DECREF(kwargs);
5078 Py_DECREF(args);
5079 Py_DECREF(cls);
5080 if (obj == NULL) {
5081 return -1;
5082 }
5083 PDATA_PUSH(self->stack, obj, -1);
5084 return 0;
5085}
5086
5087static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005088load_global(UnpicklerObject *self)
5089{
5090 PyObject *global = NULL;
5091 PyObject *module_name;
5092 PyObject *global_name;
5093 Py_ssize_t len;
5094 char *s;
5095
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005096 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005097 return -1;
5098 if (len < 2)
5099 return bad_readline();
5100 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
5101 if (!module_name)
5102 return -1;
5103
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005104 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005105 if (len < 2) {
5106 Py_DECREF(module_name);
5107 return bad_readline();
5108 }
5109 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
5110 if (global_name) {
5111 global = find_class(self, module_name, global_name);
5112 Py_DECREF(global_name);
5113 }
5114 }
5115 Py_DECREF(module_name);
5116
5117 if (global == NULL)
5118 return -1;
5119 PDATA_PUSH(self->stack, global, -1);
5120 return 0;
5121}
5122
5123static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005124load_stack_global(UnpicklerObject *self)
5125{
5126 PyObject *global;
5127 PyObject *module_name;
5128 PyObject *global_name;
5129
5130 PDATA_POP(self->stack, global_name);
5131 PDATA_POP(self->stack, module_name);
5132 if (module_name == NULL || !PyUnicode_CheckExact(module_name) ||
5133 global_name == NULL || !PyUnicode_CheckExact(global_name)) {
5134 PyErr_SetString(UnpicklingError, "STACK_GLOBAL requires str");
5135 Py_XDECREF(global_name);
5136 Py_XDECREF(module_name);
5137 return -1;
5138 }
5139 global = find_class(self, module_name, global_name);
5140 Py_DECREF(global_name);
5141 Py_DECREF(module_name);
5142 if (global == NULL)
5143 return -1;
5144 PDATA_PUSH(self->stack, global, -1);
5145 return 0;
5146}
5147
5148static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005149load_persid(UnpicklerObject *self)
5150{
5151 PyObject *pid;
5152 Py_ssize_t len;
5153 char *s;
5154
5155 if (self->pers_func) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005156 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005157 return -1;
5158 if (len < 2)
5159 return bad_readline();
5160
5161 pid = PyBytes_FromStringAndSize(s, len - 1);
5162 if (pid == NULL)
5163 return -1;
5164
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005165 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005166 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005167 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005168 if (pid == NULL)
5169 return -1;
5170
5171 PDATA_PUSH(self->stack, pid, -1);
5172 return 0;
5173 }
5174 else {
5175 PyErr_SetString(UnpicklingError,
5176 "A load persistent id instruction was encountered,\n"
5177 "but no persistent_load function was specified.");
5178 return -1;
5179 }
5180}
5181
5182static int
5183load_binpersid(UnpicklerObject *self)
5184{
5185 PyObject *pid;
5186
5187 if (self->pers_func) {
5188 PDATA_POP(self->stack, pid);
5189 if (pid == NULL)
5190 return -1;
5191
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005192 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005193 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005194 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005195 if (pid == NULL)
5196 return -1;
5197
5198 PDATA_PUSH(self->stack, pid, -1);
5199 return 0;
5200 }
5201 else {
5202 PyErr_SetString(UnpicklingError,
5203 "A load persistent id instruction was encountered,\n"
5204 "but no persistent_load function was specified.");
5205 return -1;
5206 }
5207}
5208
5209static int
5210load_pop(UnpicklerObject *self)
5211{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005212 Py_ssize_t len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005213
5214 /* Note that we split the (pickle.py) stack into two stacks,
5215 * an object stack and a mark stack. We have to be clever and
5216 * pop the right one. We do this by looking at the top of the
Collin Winter8ca69de2009-05-26 16:53:41 +00005217 * mark stack first, and only signalling a stack underflow if
5218 * the object stack is empty and the mark stack doesn't match
5219 * our expectations.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005220 */
Collin Winter8ca69de2009-05-26 16:53:41 +00005221 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005222 self->num_marks--;
Antoine Pitrou01a15ea2010-01-07 17:57:31 +00005223 } else if (len > 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005224 len--;
5225 Py_DECREF(self->stack->data[len]);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005226 Py_SIZE(self->stack) = len;
Collin Winter8ca69de2009-05-26 16:53:41 +00005227 } else {
5228 return stack_underflow();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005229 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005230 return 0;
5231}
5232
5233static int
5234load_pop_mark(UnpicklerObject *self)
5235{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005236 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005237
5238 if ((i = marker(self)) < 0)
5239 return -1;
5240
5241 Pdata_clear(self->stack, i);
5242
5243 return 0;
5244}
5245
5246static int
5247load_dup(UnpicklerObject *self)
5248{
5249 PyObject *last;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005250 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005251
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005252 if ((len = Py_SIZE(self->stack)) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005253 return stack_underflow();
5254 last = self->stack->data[len - 1];
5255 PDATA_APPEND(self->stack, last, -1);
5256 return 0;
5257}
5258
5259static int
5260load_get(UnpicklerObject *self)
5261{
5262 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005263 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005264 Py_ssize_t len;
5265 char *s;
5266
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005267 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005268 return -1;
5269 if (len < 2)
5270 return bad_readline();
5271
5272 key = PyLong_FromString(s, NULL, 10);
5273 if (key == NULL)
5274 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005275 idx = PyLong_AsSsize_t(key);
5276 if (idx == -1 && PyErr_Occurred()) {
5277 Py_DECREF(key);
5278 return -1;
5279 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005280
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005281 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005282 if (value == NULL) {
5283 if (!PyErr_Occurred())
5284 PyErr_SetObject(PyExc_KeyError, key);
5285 Py_DECREF(key);
5286 return -1;
5287 }
5288 Py_DECREF(key);
5289
5290 PDATA_APPEND(self->stack, value, -1);
5291 return 0;
5292}
5293
5294static int
5295load_binget(UnpicklerObject *self)
5296{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005297 PyObject *value;
5298 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005299 char *s;
5300
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005301 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005302 return -1;
5303
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005304 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005305
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005306 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005307 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005308 PyObject *key = PyLong_FromSsize_t(idx);
Christian Heimes9ee5c372013-07-26 22:45:00 +02005309 if (key != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005310 PyErr_SetObject(PyExc_KeyError, key);
Christian Heimes9ee5c372013-07-26 22:45:00 +02005311 Py_DECREF(key);
5312 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005313 return -1;
5314 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005315
5316 PDATA_APPEND(self->stack, value, -1);
5317 return 0;
5318}
5319
5320static int
5321load_long_binget(UnpicklerObject *self)
5322{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005323 PyObject *value;
5324 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005325 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005326
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005327 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005328 return -1;
5329
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005330 idx = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005331
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005332 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005333 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005334 PyObject *key = PyLong_FromSsize_t(idx);
Christian Heimes9ee5c372013-07-26 22:45:00 +02005335 if (key != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005336 PyErr_SetObject(PyExc_KeyError, key);
Christian Heimes9ee5c372013-07-26 22:45:00 +02005337 Py_DECREF(key);
5338 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005339 return -1;
5340 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005341
5342 PDATA_APPEND(self->stack, value, -1);
5343 return 0;
5344}
5345
5346/* Push an object from the extension registry (EXT[124]). nbytes is
5347 * the number of bytes following the opcode, holding the index (code) value.
5348 */
5349static int
5350load_extension(UnpicklerObject *self, int nbytes)
5351{
5352 char *codebytes; /* the nbytes bytes after the opcode */
5353 long code; /* calc_binint returns long */
5354 PyObject *py_code; /* code as a Python int */
5355 PyObject *obj; /* the object to push */
5356 PyObject *pair; /* (module_name, class_name) */
5357 PyObject *module_name, *class_name;
5358
5359 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005360 if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005361 return -1;
5362 code = calc_binint(codebytes, nbytes);
5363 if (code <= 0) { /* note that 0 is forbidden */
5364 /* Corrupt or hostile pickle. */
5365 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
5366 return -1;
5367 }
5368
5369 /* Look for the code in the cache. */
5370 py_code = PyLong_FromLong(code);
5371 if (py_code == NULL)
5372 return -1;
5373 obj = PyDict_GetItem(extension_cache, py_code);
5374 if (obj != NULL) {
5375 /* Bingo. */
5376 Py_DECREF(py_code);
5377 PDATA_APPEND(self->stack, obj, -1);
5378 return 0;
5379 }
5380
5381 /* Look up the (module_name, class_name) pair. */
5382 pair = PyDict_GetItem(inverted_registry, py_code);
5383 if (pair == NULL) {
5384 Py_DECREF(py_code);
5385 PyErr_Format(PyExc_ValueError, "unregistered extension "
5386 "code %ld", code);
5387 return -1;
5388 }
5389 /* Since the extension registry is manipulable via Python code,
5390 * confirm that pair is really a 2-tuple of strings.
5391 */
5392 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
5393 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
5394 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
5395 Py_DECREF(py_code);
5396 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
5397 "isn't a 2-tuple of strings", code);
5398 return -1;
5399 }
5400 /* Load the object. */
5401 obj = find_class(self, module_name, class_name);
5402 if (obj == NULL) {
5403 Py_DECREF(py_code);
5404 return -1;
5405 }
5406 /* Cache code -> obj. */
5407 code = PyDict_SetItem(extension_cache, py_code, obj);
5408 Py_DECREF(py_code);
5409 if (code < 0) {
5410 Py_DECREF(obj);
5411 return -1;
5412 }
5413 PDATA_PUSH(self->stack, obj, -1);
5414 return 0;
5415}
5416
5417static int
5418load_put(UnpicklerObject *self)
5419{
5420 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005421 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005422 Py_ssize_t len;
5423 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005424
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005425 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005426 return -1;
5427 if (len < 2)
5428 return bad_readline();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005429 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005430 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005431 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005432
5433 key = PyLong_FromString(s, NULL, 10);
5434 if (key == NULL)
5435 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005436 idx = PyLong_AsSsize_t(key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005437 Py_DECREF(key);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02005438 if (idx < 0) {
5439 if (!PyErr_Occurred())
5440 PyErr_SetString(PyExc_ValueError,
5441 "negative PUT argument");
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005442 return -1;
Antoine Pitrou55549ec2011-08-30 00:27:10 +02005443 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005444
5445 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005446}
5447
5448static int
5449load_binput(UnpicklerObject *self)
5450{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005451 PyObject *value;
5452 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005453 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005454
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005455 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005456 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005457
5458 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005459 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005460 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005461
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005462 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005463
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005464 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005465}
5466
5467static int
5468load_long_binput(UnpicklerObject *self)
5469{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005470 PyObject *value;
5471 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005472 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005473
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005474 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005475 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005476
5477 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005478 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005479 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005480
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005481 idx = calc_binsize(s, 4);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02005482 if (idx < 0) {
5483 PyErr_SetString(PyExc_ValueError,
5484 "negative LONG_BINPUT argument");
5485 return -1;
5486 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005487
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005488 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005489}
5490
5491static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005492load_memoize(UnpicklerObject *self)
5493{
5494 PyObject *value;
5495
5496 if (Py_SIZE(self->stack) <= 0)
5497 return stack_underflow();
5498 value = self->stack->data[Py_SIZE(self->stack) - 1];
5499
5500 return _Unpickler_MemoPut(self, self->memo_len, value);
5501}
5502
5503static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005504do_append(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005505{
5506 PyObject *value;
5507 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005508 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005509
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005510 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005511 if (x > len || x <= 0)
5512 return stack_underflow();
5513 if (len == x) /* nothing to do */
5514 return 0;
5515
5516 list = self->stack->data[x - 1];
5517
5518 if (PyList_Check(list)) {
5519 PyObject *slice;
5520 Py_ssize_t list_len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005521 int ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005522
5523 slice = Pdata_poplist(self->stack, x);
5524 if (!slice)
5525 return -1;
5526 list_len = PyList_GET_SIZE(list);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005527 ret = PyList_SetSlice(list, list_len, list_len, slice);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005528 Py_DECREF(slice);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005529 return ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005530 }
5531 else {
5532 PyObject *append_func;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005533 _Py_IDENTIFIER(append);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005534
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005535 append_func = _PyObject_GetAttrId(list, &PyId_append);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005536 if (append_func == NULL)
5537 return -1;
5538 for (i = x; i < len; i++) {
5539 PyObject *result;
5540
5541 value = self->stack->data[i];
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005542 result = _Unpickler_FastCall(self, append_func, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005543 if (result == NULL) {
5544 Pdata_clear(self->stack, i + 1);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005545 Py_SIZE(self->stack) = x;
Alexandre Vassalotti637c7c42013-04-20 21:28:21 -07005546 Py_DECREF(append_func);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005547 return -1;
5548 }
5549 Py_DECREF(result);
5550 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005551 Py_SIZE(self->stack) = x;
Alexandre Vassalotti637c7c42013-04-20 21:28:21 -07005552 Py_DECREF(append_func);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005553 }
5554
5555 return 0;
5556}
5557
5558static int
5559load_append(UnpicklerObject *self)
5560{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005561 return do_append(self, Py_SIZE(self->stack) - 1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005562}
5563
5564static int
5565load_appends(UnpicklerObject *self)
5566{
5567 return do_append(self, marker(self));
5568}
5569
5570static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005571do_setitems(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005572{
5573 PyObject *value, *key;
5574 PyObject *dict;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005575 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005576 int status = 0;
5577
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005578 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005579 if (x > len || x <= 0)
5580 return stack_underflow();
5581 if (len == x) /* nothing to do */
5582 return 0;
Victor Stinner121aab42011-09-29 23:40:53 +02005583 if ((len - x) % 2 != 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005584 /* Currupt or hostile pickle -- we never write one like this. */
5585 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
5586 return -1;
5587 }
5588
5589 /* Here, dict does not actually need to be a PyDict; it could be anything
5590 that supports the __setitem__ attribute. */
5591 dict = self->stack->data[x - 1];
5592
5593 for (i = x + 1; i < len; i += 2) {
5594 key = self->stack->data[i - 1];
5595 value = self->stack->data[i];
5596 if (PyObject_SetItem(dict, key, value) < 0) {
5597 status = -1;
5598 break;
5599 }
5600 }
5601
5602 Pdata_clear(self->stack, x);
5603 return status;
5604}
5605
5606static int
5607load_setitem(UnpicklerObject *self)
5608{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005609 return do_setitems(self, Py_SIZE(self->stack) - 2);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005610}
5611
5612static int
5613load_setitems(UnpicklerObject *self)
5614{
5615 return do_setitems(self, marker(self));
5616}
5617
5618static int
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005619load_additems(UnpicklerObject *self)
5620{
5621 PyObject *set;
5622 Py_ssize_t mark, len, i;
5623
5624 mark = marker(self);
5625 len = Py_SIZE(self->stack);
5626 if (mark > len || mark <= 0)
5627 return stack_underflow();
5628 if (len == mark) /* nothing to do */
5629 return 0;
5630
5631 set = self->stack->data[mark - 1];
5632
5633 if (PySet_Check(set)) {
5634 PyObject *items;
5635 int status;
5636
5637 items = Pdata_poptuple(self->stack, mark);
5638 if (items == NULL)
5639 return -1;
5640
5641 status = _PySet_Update(set, items);
5642 Py_DECREF(items);
5643 return status;
5644 }
5645 else {
5646 PyObject *add_func;
5647 _Py_IDENTIFIER(add);
5648
5649 add_func = _PyObject_GetAttrId(set, &PyId_add);
5650 if (add_func == NULL)
5651 return -1;
5652 for (i = mark; i < len; i++) {
5653 PyObject *result;
5654 PyObject *item;
5655
5656 item = self->stack->data[i];
5657 result = _Unpickler_FastCall(self, add_func, item);
5658 if (result == NULL) {
5659 Pdata_clear(self->stack, i + 1);
5660 Py_SIZE(self->stack) = mark;
5661 return -1;
5662 }
5663 Py_DECREF(result);
5664 }
5665 Py_SIZE(self->stack) = mark;
5666 }
5667
5668 return 0;
5669}
5670
5671static int
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005672load_build(UnpicklerObject *self)
5673{
5674 PyObject *state, *inst, *slotstate;
5675 PyObject *setstate;
5676 int status = 0;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005677 _Py_IDENTIFIER(__setstate__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005678
5679 /* Stack is ... instance, state. We want to leave instance at
5680 * the stack top, possibly mutated via instance.__setstate__(state).
5681 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005682 if (Py_SIZE(self->stack) < 2)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005683 return stack_underflow();
5684
5685 PDATA_POP(self->stack, state);
5686 if (state == NULL)
5687 return -1;
5688
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005689 inst = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005690
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005691 setstate = _PyObject_GetAttrId(inst, &PyId___setstate__);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005692 if (setstate == NULL) {
5693 if (PyErr_ExceptionMatches(PyExc_AttributeError))
5694 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00005695 else {
5696 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005697 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00005698 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005699 }
5700 else {
5701 PyObject *result;
5702
5703 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005704 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Antoine Pitroud79dc622008-09-05 00:03:33 +00005705 reference to state first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005706 result = _Unpickler_FastCall(self, setstate, state);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005707 Py_DECREF(setstate);
5708 if (result == NULL)
5709 return -1;
5710 Py_DECREF(result);
5711 return 0;
5712 }
5713
5714 /* A default __setstate__. First see whether state embeds a
5715 * slot state dict too (a proto 2 addition).
5716 */
5717 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
5718 PyObject *tmp = state;
5719
5720 state = PyTuple_GET_ITEM(tmp, 0);
5721 slotstate = PyTuple_GET_ITEM(tmp, 1);
5722 Py_INCREF(state);
5723 Py_INCREF(slotstate);
5724 Py_DECREF(tmp);
5725 }
5726 else
5727 slotstate = NULL;
5728
5729 /* Set inst.__dict__ from the state dict (if any). */
5730 if (state != Py_None) {
5731 PyObject *dict;
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005732 PyObject *d_key, *d_value;
5733 Py_ssize_t i;
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02005734 _Py_IDENTIFIER(__dict__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005735
5736 if (!PyDict_Check(state)) {
5737 PyErr_SetString(UnpicklingError, "state is not a dictionary");
5738 goto error;
5739 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02005740 dict = _PyObject_GetAttrId(inst, &PyId___dict__);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005741 if (dict == NULL)
5742 goto error;
5743
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005744 i = 0;
5745 while (PyDict_Next(state, &i, &d_key, &d_value)) {
5746 /* normally the keys for instance attributes are
5747 interned. we should try to do that here. */
5748 Py_INCREF(d_key);
5749 if (PyUnicode_CheckExact(d_key))
5750 PyUnicode_InternInPlace(&d_key);
5751 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
5752 Py_DECREF(d_key);
5753 goto error;
5754 }
5755 Py_DECREF(d_key);
5756 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005757 Py_DECREF(dict);
5758 }
5759
5760 /* Also set instance attributes from the slotstate dict (if any). */
5761 if (slotstate != NULL) {
5762 PyObject *d_key, *d_value;
5763 Py_ssize_t i;
5764
5765 if (!PyDict_Check(slotstate)) {
5766 PyErr_SetString(UnpicklingError,
5767 "slot state is not a dictionary");
5768 goto error;
5769 }
5770 i = 0;
5771 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
5772 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
5773 goto error;
5774 }
5775 }
5776
5777 if (0) {
5778 error:
5779 status = -1;
5780 }
5781
5782 Py_DECREF(state);
5783 Py_XDECREF(slotstate);
5784 return status;
5785}
5786
5787static int
5788load_mark(UnpicklerObject *self)
5789{
5790
5791 /* Note that we split the (pickle.py) stack into two stacks, an
5792 * object stack and a mark stack. Here we push a mark onto the
5793 * mark stack.
5794 */
5795
5796 if ((self->num_marks + 1) >= self->marks_size) {
5797 size_t alloc;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005798 Py_ssize_t *marks;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005799
5800 /* Use the size_t type to check for overflow. */
5801 alloc = ((size_t)self->num_marks << 1) + 20;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005802 if (alloc > (PY_SSIZE_T_MAX / sizeof(Py_ssize_t)) ||
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00005803 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005804 PyErr_NoMemory();
5805 return -1;
5806 }
5807
5808 if (self->marks == NULL)
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005809 marks = (Py_ssize_t *) PyMem_Malloc(alloc * sizeof(Py_ssize_t));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005810 else
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005811 marks = (Py_ssize_t *) PyMem_Realloc(self->marks,
5812 alloc * sizeof(Py_ssize_t));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005813 if (marks == NULL) {
5814 PyErr_NoMemory();
5815 return -1;
5816 }
5817 self->marks = marks;
5818 self->marks_size = (Py_ssize_t)alloc;
5819 }
5820
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005821 self->marks[self->num_marks++] = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005822
5823 return 0;
5824}
5825
5826static int
5827load_reduce(UnpicklerObject *self)
5828{
5829 PyObject *callable = NULL;
5830 PyObject *argtup = NULL;
5831 PyObject *obj = NULL;
5832
5833 PDATA_POP(self->stack, argtup);
5834 if (argtup == NULL)
5835 return -1;
5836 PDATA_POP(self->stack, callable);
5837 if (callable) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005838 obj = PyObject_CallObject(callable, argtup);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005839 Py_DECREF(callable);
5840 }
5841 Py_DECREF(argtup);
5842
5843 if (obj == NULL)
5844 return -1;
5845
5846 PDATA_PUSH(self->stack, obj, -1);
5847 return 0;
5848}
5849
5850/* Just raises an error if we don't know the protocol specified. PROTO
5851 * is the first opcode for protocols >= 2.
5852 */
5853static int
5854load_proto(UnpicklerObject *self)
5855{
5856 char *s;
5857 int i;
5858
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005859 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005860 return -1;
5861
5862 i = (unsigned char)s[0];
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005863 if (i <= HIGHEST_PROTOCOL) {
5864 self->proto = i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005865 return 0;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005866 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005867
5868 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
5869 return -1;
5870}
5871
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08005872static int
5873load_frame(UnpicklerObject *self)
5874{
5875 char *s;
5876 Py_ssize_t frame_len;
5877
5878 if (_Unpickler_Read(self, &s, 8) < 0)
5879 return -1;
5880
5881 frame_len = calc_binsize(s, 8);
5882 if (frame_len < 0) {
5883 PyErr_Format(PyExc_OverflowError,
5884 "FRAME length exceeds system's maximum of %zd bytes",
5885 PY_SSIZE_T_MAX);
5886 return -1;
5887 }
5888
5889 if (_Unpickler_Read(self, &s, frame_len) < 0)
5890 return -1;
5891
5892 /* Rewind to start of frame */
5893 self->next_read_idx -= frame_len;
5894 return 0;
5895}
5896
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005897static PyObject *
5898load(UnpicklerObject *self)
5899{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005900 PyObject *value = NULL;
5901 char *s;
5902
5903 self->num_marks = 0;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005904 self->proto = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005905 if (Py_SIZE(self->stack))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005906 Pdata_clear(self->stack, 0);
5907
5908 /* Convenient macros for the dispatch while-switch loop just below. */
5909#define OP(opcode, load_func) \
5910 case opcode: if (load_func(self) < 0) break; continue;
5911
5912#define OP_ARG(opcode, load_func, arg) \
5913 case opcode: if (load_func(self, (arg)) < 0) break; continue;
5914
5915 while (1) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005916 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005917 break;
5918
5919 switch ((enum opcode)s[0]) {
5920 OP(NONE, load_none)
5921 OP(BININT, load_binint)
5922 OP(BININT1, load_binint1)
5923 OP(BININT2, load_binint2)
5924 OP(INT, load_int)
5925 OP(LONG, load_long)
5926 OP_ARG(LONG1, load_counted_long, 1)
5927 OP_ARG(LONG4, load_counted_long, 4)
5928 OP(FLOAT, load_float)
5929 OP(BINFLOAT, load_binfloat)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005930 OP_ARG(SHORT_BINBYTES, load_counted_binbytes, 1)
5931 OP_ARG(BINBYTES, load_counted_binbytes, 4)
5932 OP_ARG(BINBYTES8, load_counted_binbytes, 8)
5933 OP_ARG(SHORT_BINSTRING, load_counted_binstring, 1)
5934 OP_ARG(BINSTRING, load_counted_binstring, 4)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005935 OP(STRING, load_string)
5936 OP(UNICODE, load_unicode)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005937 OP_ARG(SHORT_BINUNICODE, load_counted_binunicode, 1)
5938 OP_ARG(BINUNICODE, load_counted_binunicode, 4)
5939 OP_ARG(BINUNICODE8, load_counted_binunicode, 8)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005940 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
5941 OP_ARG(TUPLE1, load_counted_tuple, 1)
5942 OP_ARG(TUPLE2, load_counted_tuple, 2)
5943 OP_ARG(TUPLE3, load_counted_tuple, 3)
5944 OP(TUPLE, load_tuple)
5945 OP(EMPTY_LIST, load_empty_list)
5946 OP(LIST, load_list)
5947 OP(EMPTY_DICT, load_empty_dict)
5948 OP(DICT, load_dict)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005949 OP(EMPTY_SET, load_empty_set)
5950 OP(ADDITEMS, load_additems)
5951 OP(FROZENSET, load_frozenset)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005952 OP(OBJ, load_obj)
5953 OP(INST, load_inst)
5954 OP(NEWOBJ, load_newobj)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005955 OP(NEWOBJ_EX, load_newobj_ex)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005956 OP(GLOBAL, load_global)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005957 OP(STACK_GLOBAL, load_stack_global)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005958 OP(APPEND, load_append)
5959 OP(APPENDS, load_appends)
5960 OP(BUILD, load_build)
5961 OP(DUP, load_dup)
5962 OP(BINGET, load_binget)
5963 OP(LONG_BINGET, load_long_binget)
5964 OP(GET, load_get)
5965 OP(MARK, load_mark)
5966 OP(BINPUT, load_binput)
5967 OP(LONG_BINPUT, load_long_binput)
5968 OP(PUT, load_put)
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01005969 OP(MEMOIZE, load_memoize)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005970 OP(POP, load_pop)
5971 OP(POP_MARK, load_pop_mark)
5972 OP(SETITEM, load_setitem)
5973 OP(SETITEMS, load_setitems)
5974 OP(PERSID, load_persid)
5975 OP(BINPERSID, load_binpersid)
5976 OP(REDUCE, load_reduce)
5977 OP(PROTO, load_proto)
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08005978 OP(FRAME, load_frame)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005979 OP_ARG(EXT1, load_extension, 1)
5980 OP_ARG(EXT2, load_extension, 2)
5981 OP_ARG(EXT4, load_extension, 4)
5982 OP_ARG(NEWTRUE, load_bool, Py_True)
5983 OP_ARG(NEWFALSE, load_bool, Py_False)
5984
5985 case STOP:
5986 break;
5987
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005988 default:
Benjamin Petersonadde86d2011-09-23 13:41:41 -04005989 if (s[0] == '\0')
5990 PyErr_SetNone(PyExc_EOFError);
5991 else
5992 PyErr_Format(UnpicklingError,
5993 "invalid load key, '%c'.", s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005994 return NULL;
5995 }
5996
5997 break; /* and we are done! */
5998 }
5999
Alexandre Vassalottib6a2f2a2013-11-23 20:30:03 -08006000 if (PyErr_Occurred()) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006001 return NULL;
6002 }
6003
Victor Stinner2ae57e32013-10-31 13:39:23 +01006004 if (_Unpickler_SkipConsumed(self) < 0)
6005 return NULL;
6006
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006007 PDATA_POP(self->stack, value);
6008 return value;
6009}
6010
6011PyDoc_STRVAR(Unpickler_load_doc,
6012"load() -> object. Load a pickle."
6013"\n"
6014"Read a pickled object representation from the open file object given in\n"
6015"the constructor, and return the reconstituted object hierarchy specified\n"
6016"therein.\n");
6017
6018static PyObject *
6019Unpickler_load(UnpicklerObject *self)
6020{
6021 /* Check whether the Unpickler was initialized correctly. This prevents
6022 segfaulting if a subclass overridden __init__ with a function that does
6023 not call Unpickler.__init__(). Here, we simply ensure that self->read
6024 is not NULL. */
6025 if (self->read == NULL) {
Victor Stinner121aab42011-09-29 23:40:53 +02006026 PyErr_Format(UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006027 "Unpickler.__init__() was not called by %s.__init__()",
6028 Py_TYPE(self)->tp_name);
6029 return NULL;
6030 }
6031
6032 return load(self);
6033}
6034
6035/* The name of find_class() is misleading. In newer pickle protocols, this
6036 function is used for loading any global (i.e., functions), not just
6037 classes. The name is kept only for backward compatibility. */
6038
6039PyDoc_STRVAR(Unpickler_find_class_doc,
6040"find_class(module_name, global_name) -> object.\n"
6041"\n"
6042"Return an object from a specified module, importing the module if\n"
6043"necessary. Subclasses may override this method (e.g. to restrict\n"
6044"unpickling of arbitrary classes and functions).\n"
6045"\n"
6046"This method is called whenever a class or a function object is\n"
6047"needed. Both arguments passed are str objects.\n");
6048
6049static PyObject *
6050Unpickler_find_class(UnpicklerObject *self, PyObject *args)
6051{
6052 PyObject *global;
6053 PyObject *modules_dict;
6054 PyObject *module;
6055 PyObject *module_name, *global_name;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006056 _Py_IDENTIFIER(modules);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006057
6058 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
6059 &module_name, &global_name))
6060 return NULL;
6061
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006062 /* Try to map the old names used in Python 2.x to the new ones used in
6063 Python 3.x. We do this only with old pickle protocols and when the
6064 user has not disabled the feature. */
6065 if (self->proto < 3 && self->fix_imports) {
6066 PyObject *key;
6067 PyObject *item;
6068
6069 /* Check if the global (i.e., a function or a class) was renamed
6070 or moved to another module. */
6071 key = PyTuple_Pack(2, module_name, global_name);
6072 if (key == NULL)
6073 return NULL;
6074 item = PyDict_GetItemWithError(name_mapping_2to3, key);
6075 Py_DECREF(key);
6076 if (item) {
6077 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
6078 PyErr_Format(PyExc_RuntimeError,
6079 "_compat_pickle.NAME_MAPPING values should be "
6080 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
6081 return NULL;
6082 }
6083 module_name = PyTuple_GET_ITEM(item, 0);
6084 global_name = PyTuple_GET_ITEM(item, 1);
6085 if (!PyUnicode_Check(module_name) ||
6086 !PyUnicode_Check(global_name)) {
6087 PyErr_Format(PyExc_RuntimeError,
6088 "_compat_pickle.NAME_MAPPING values should be "
6089 "pairs of str, not (%.200s, %.200s)",
6090 Py_TYPE(module_name)->tp_name,
6091 Py_TYPE(global_name)->tp_name);
6092 return NULL;
6093 }
6094 }
6095 else if (PyErr_Occurred()) {
6096 return NULL;
6097 }
6098
6099 /* Check if the module was renamed. */
6100 item = PyDict_GetItemWithError(import_mapping_2to3, module_name);
6101 if (item) {
6102 if (!PyUnicode_Check(item)) {
6103 PyErr_Format(PyExc_RuntimeError,
6104 "_compat_pickle.IMPORT_MAPPING values should be "
6105 "strings, not %.200s", Py_TYPE(item)->tp_name);
6106 return NULL;
6107 }
6108 module_name = item;
6109 }
6110 else if (PyErr_Occurred()) {
6111 return NULL;
6112 }
6113 }
6114
Victor Stinnerbb520202013-11-06 22:40:41 +01006115 modules_dict = _PySys_GetObjectId(&PyId_modules);
Victor Stinner1e53bba2013-07-16 22:26:05 +02006116 if (modules_dict == NULL) {
6117 PyErr_SetString(PyExc_RuntimeError, "unable to get sys.modules");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006118 return NULL;
Victor Stinner1e53bba2013-07-16 22:26:05 +02006119 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006120
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006121 module = PyDict_GetItemWithError(modules_dict, module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006122 if (module == NULL) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006123 if (PyErr_Occurred())
6124 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006125 module = PyImport_Import(module_name);
6126 if (module == NULL)
6127 return NULL;
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006128 global = getattribute(module, global_name, self->proto >= 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006129 Py_DECREF(module);
6130 }
Victor Stinner121aab42011-09-29 23:40:53 +02006131 else {
Antoine Pitrouc9dc4a22013-11-23 18:59:12 +01006132 global = getattribute(module, global_name, self->proto >= 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006133 }
6134 return global;
6135}
6136
6137static struct PyMethodDef Unpickler_methods[] = {
6138 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
6139 Unpickler_load_doc},
6140 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
6141 Unpickler_find_class_doc},
6142 {NULL, NULL} /* sentinel */
6143};
6144
6145static void
6146Unpickler_dealloc(UnpicklerObject *self)
6147{
6148 PyObject_GC_UnTrack((PyObject *)self);
6149 Py_XDECREF(self->readline);
6150 Py_XDECREF(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00006151 Py_XDECREF(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006152 Py_XDECREF(self->stack);
6153 Py_XDECREF(self->pers_func);
6154 Py_XDECREF(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006155 if (self->buffer.buf != NULL) {
6156 PyBuffer_Release(&self->buffer);
6157 self->buffer.buf = NULL;
6158 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006159
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006160 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006161 PyMem_Free(self->marks);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006162 PyMem_Free(self->input_line);
Victor Stinner49fc8ec2013-07-07 23:30:24 +02006163 PyMem_Free(self->encoding);
6164 PyMem_Free(self->errors);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006165
6166 Py_TYPE(self)->tp_free((PyObject *)self);
6167}
6168
6169static int
6170Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
6171{
6172 Py_VISIT(self->readline);
6173 Py_VISIT(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00006174 Py_VISIT(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006175 Py_VISIT(self->stack);
6176 Py_VISIT(self->pers_func);
6177 Py_VISIT(self->arg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006178 return 0;
6179}
6180
6181static int
6182Unpickler_clear(UnpicklerObject *self)
6183{
6184 Py_CLEAR(self->readline);
6185 Py_CLEAR(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00006186 Py_CLEAR(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006187 Py_CLEAR(self->stack);
6188 Py_CLEAR(self->pers_func);
6189 Py_CLEAR(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006190 if (self->buffer.buf != NULL) {
6191 PyBuffer_Release(&self->buffer);
6192 self->buffer.buf = NULL;
6193 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006194
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006195 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006196 PyMem_Free(self->marks);
6197 self->marks = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006198 PyMem_Free(self->input_line);
6199 self->input_line = NULL;
Victor Stinner49fc8ec2013-07-07 23:30:24 +02006200 PyMem_Free(self->encoding);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006201 self->encoding = NULL;
Victor Stinner49fc8ec2013-07-07 23:30:24 +02006202 PyMem_Free(self->errors);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006203 self->errors = NULL;
6204
6205 return 0;
6206}
6207
6208PyDoc_STRVAR(Unpickler_doc,
6209"Unpickler(file, *, encoding='ASCII', errors='strict')"
6210"\n"
6211"This takes a binary file for reading a pickle data stream.\n"
6212"\n"
6213"The protocol version of the pickle is detected automatically, so no\n"
6214"proto argument is needed.\n"
6215"\n"
6216"The file-like object must have two methods, a read() method\n"
6217"that takes an integer argument, and a readline() method that\n"
6218"requires no arguments. Both methods should return bytes.\n"
6219"Thus file-like object can be a binary file object opened for\n"
6220"reading, a BytesIO object, or any other custom object that\n"
6221"meets this interface.\n"
6222"\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006223"Optional keyword arguments are *fix_imports*, *encoding* and *errors*,\n"
6224"which are used to control compatiblity support for pickle stream\n"
6225"generated by Python 2.x. If *fix_imports* is True, pickle will try to\n"
6226"map the old Python 2.x names to the new names used in Python 3.x. The\n"
6227"*encoding* and *errors* tell pickle how to decode 8-bit string\n"
6228"instances pickled by Python 2.x; these default to 'ASCII' and\n"
6229"'strict', respectively.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006230
6231static int
6232Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
6233{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006234 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006235 PyObject *file;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006236 PyObject *fix_imports = Py_True;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006237 char *encoding = NULL;
6238 char *errors = NULL;
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02006239 _Py_IDENTIFIER(persistent_load);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006240
6241 /* XXX: That is an horrible error message. But, I don't know how to do
6242 better... */
6243 if (Py_SIZE(args) != 1) {
6244 PyErr_Format(PyExc_TypeError,
6245 "%s takes exactly one positional argument (%zd given)",
6246 Py_TYPE(self)->tp_name, Py_SIZE(args));
6247 return -1;
6248 }
6249
6250 /* Arguments parsing needs to be done in the __init__() method to allow
6251 subclasses to define their own __init__() method, which may (or may
6252 not) support Unpickler arguments. However, this means we need to be
6253 extra careful in the other Unpickler methods, since a subclass could
6254 forget to call Unpickler.__init__() thus breaking our internal
6255 invariants. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006256 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:Unpickler", kwlist,
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006257 &file, &fix_imports, &encoding, &errors))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006258 return -1;
6259
6260 /* In case of multiple __init__() calls, clear previous content. */
6261 if (self->read != NULL)
6262 (void)Unpickler_clear(self);
6263
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006264 if (_Unpickler_SetInputStream(self, file) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006265 return -1;
6266
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006267 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006268 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006269
6270 self->fix_imports = PyObject_IsTrue(fix_imports);
6271 if (self->fix_imports == -1)
6272 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006273
Martin v. Löwis1c67dd92011-10-14 15:16:45 +02006274 if (_PyObject_HasAttrId((PyObject *)self, &PyId_persistent_load)) {
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02006275 self->pers_func = _PyObject_GetAttrId((PyObject *)self,
6276 &PyId_persistent_load);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006277 if (self->pers_func == NULL)
6278 return -1;
6279 }
6280 else {
6281 self->pers_func = NULL;
6282 }
6283
6284 self->stack = (Pdata *)Pdata_New();
6285 if (self->stack == NULL)
6286 return -1;
6287
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006288 self->memo_size = 32;
6289 self->memo = _Unpickler_NewMemo(self->memo_size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006290 if (self->memo == NULL)
6291 return -1;
6292
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00006293 self->arg = NULL;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006294 self->proto = 0;
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00006295
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006296 return 0;
6297}
6298
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006299/* Define a proxy object for the Unpickler's internal memo object. This is to
6300 * avoid breaking code like:
6301 * unpickler.memo.clear()
6302 * and
6303 * unpickler.memo = saved_memo
6304 * Is this a good idea? Not really, but we don't want to break code that uses
6305 * it. Note that we don't implement the entire mapping API here. This is
6306 * intentional, as these should be treated as black-box implementation details.
6307 *
6308 * We do, however, have to implement pickling/unpickling support because of
Victor Stinner121aab42011-09-29 23:40:53 +02006309 * real-world code like cvs2svn.
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006310 */
6311
6312typedef struct {
6313 PyObject_HEAD
6314 UnpicklerObject *unpickler;
6315} UnpicklerMemoProxyObject;
6316
6317PyDoc_STRVAR(ump_clear_doc,
6318"memo.clear() -> None. Remove all items from memo.");
6319
6320static PyObject *
6321ump_clear(UnpicklerMemoProxyObject *self)
6322{
6323 _Unpickler_MemoCleanup(self->unpickler);
6324 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
6325 if (self->unpickler->memo == NULL)
6326 return NULL;
6327 Py_RETURN_NONE;
6328}
6329
6330PyDoc_STRVAR(ump_copy_doc,
6331"memo.copy() -> new_memo. Copy the memo to a new object.");
6332
6333static PyObject *
6334ump_copy(UnpicklerMemoProxyObject *self)
6335{
6336 Py_ssize_t i;
6337 PyObject *new_memo = PyDict_New();
6338 if (new_memo == NULL)
6339 return NULL;
6340
6341 for (i = 0; i < self->unpickler->memo_size; i++) {
6342 int status;
6343 PyObject *key, *value;
6344
6345 value = self->unpickler->memo[i];
6346 if (value == NULL)
6347 continue;
6348
6349 key = PyLong_FromSsize_t(i);
6350 if (key == NULL)
6351 goto error;
6352 status = PyDict_SetItem(new_memo, key, value);
6353 Py_DECREF(key);
6354 if (status < 0)
6355 goto error;
6356 }
6357 return new_memo;
6358
6359error:
6360 Py_DECREF(new_memo);
6361 return NULL;
6362}
6363
6364PyDoc_STRVAR(ump_reduce_doc,
6365"memo.__reduce__(). Pickling support.");
6366
6367static PyObject *
6368ump_reduce(UnpicklerMemoProxyObject *self, PyObject *args)
6369{
6370 PyObject *reduce_value;
6371 PyObject *constructor_args;
6372 PyObject *contents = ump_copy(self);
6373 if (contents == NULL)
6374 return NULL;
6375
6376 reduce_value = PyTuple_New(2);
6377 if (reduce_value == NULL) {
6378 Py_DECREF(contents);
6379 return NULL;
6380 }
6381 constructor_args = PyTuple_New(1);
6382 if (constructor_args == NULL) {
6383 Py_DECREF(contents);
6384 Py_DECREF(reduce_value);
6385 return NULL;
6386 }
6387 PyTuple_SET_ITEM(constructor_args, 0, contents);
6388 Py_INCREF((PyObject *)&PyDict_Type);
6389 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
6390 PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
6391 return reduce_value;
6392}
6393
6394static PyMethodDef unpicklerproxy_methods[] = {
6395 {"clear", (PyCFunction)ump_clear, METH_NOARGS, ump_clear_doc},
6396 {"copy", (PyCFunction)ump_copy, METH_NOARGS, ump_copy_doc},
6397 {"__reduce__", (PyCFunction)ump_reduce, METH_VARARGS, ump_reduce_doc},
6398 {NULL, NULL} /* sentinel */
6399};
6400
6401static void
6402UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
6403{
6404 PyObject_GC_UnTrack(self);
6405 Py_XDECREF(self->unpickler);
6406 PyObject_GC_Del((PyObject *)self);
6407}
6408
6409static int
6410UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
6411 visitproc visit, void *arg)
6412{
6413 Py_VISIT(self->unpickler);
6414 return 0;
6415}
6416
6417static int
6418UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
6419{
6420 Py_CLEAR(self->unpickler);
6421 return 0;
6422}
6423
6424static PyTypeObject UnpicklerMemoProxyType = {
6425 PyVarObject_HEAD_INIT(NULL, 0)
6426 "_pickle.UnpicklerMemoProxy", /*tp_name*/
6427 sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/
6428 0,
6429 (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */
6430 0, /* tp_print */
6431 0, /* tp_getattr */
6432 0, /* tp_setattr */
6433 0, /* tp_compare */
6434 0, /* tp_repr */
6435 0, /* tp_as_number */
6436 0, /* tp_as_sequence */
6437 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00006438 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006439 0, /* tp_call */
6440 0, /* tp_str */
6441 PyObject_GenericGetAttr, /* tp_getattro */
6442 PyObject_GenericSetAttr, /* tp_setattro */
6443 0, /* tp_as_buffer */
6444 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
6445 0, /* tp_doc */
6446 (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */
6447 (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */
6448 0, /* tp_richcompare */
6449 0, /* tp_weaklistoffset */
6450 0, /* tp_iter */
6451 0, /* tp_iternext */
6452 unpicklerproxy_methods, /* tp_methods */
6453};
6454
6455static PyObject *
6456UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
6457{
6458 UnpicklerMemoProxyObject *self;
6459
6460 self = PyObject_GC_New(UnpicklerMemoProxyObject,
6461 &UnpicklerMemoProxyType);
6462 if (self == NULL)
6463 return NULL;
6464 Py_INCREF(unpickler);
6465 self->unpickler = unpickler;
6466 PyObject_GC_Track(self);
6467 return (PyObject *)self;
6468}
6469
6470/*****************************************************************************/
6471
6472
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006473static PyObject *
6474Unpickler_get_memo(UnpicklerObject *self)
6475{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006476 return UnpicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006477}
6478
6479static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006480Unpickler_set_memo(UnpicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006481{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006482 PyObject **new_memo;
6483 Py_ssize_t new_memo_size = 0;
6484 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006485
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006486 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006487 PyErr_SetString(PyExc_TypeError,
6488 "attribute deletion is not supported");
6489 return -1;
6490 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006491
6492 if (Py_TYPE(obj) == &UnpicklerMemoProxyType) {
6493 UnpicklerObject *unpickler =
6494 ((UnpicklerMemoProxyObject *)obj)->unpickler;
6495
6496 new_memo_size = unpickler->memo_size;
6497 new_memo = _Unpickler_NewMemo(new_memo_size);
6498 if (new_memo == NULL)
6499 return -1;
6500
6501 for (i = 0; i < new_memo_size; i++) {
6502 Py_XINCREF(unpickler->memo[i]);
6503 new_memo[i] = unpickler->memo[i];
6504 }
6505 }
6506 else if (PyDict_Check(obj)) {
6507 Py_ssize_t i = 0;
6508 PyObject *key, *value;
6509
6510 new_memo_size = PyDict_Size(obj);
6511 new_memo = _Unpickler_NewMemo(new_memo_size);
6512 if (new_memo == NULL)
6513 return -1;
6514
6515 while (PyDict_Next(obj, &i, &key, &value)) {
6516 Py_ssize_t idx;
6517 if (!PyLong_Check(key)) {
6518 PyErr_SetString(PyExc_TypeError,
6519 "memo key must be integers");
6520 goto error;
6521 }
6522 idx = PyLong_AsSsize_t(key);
6523 if (idx == -1 && PyErr_Occurred())
6524 goto error;
Christian Heimesa24b4d22013-07-01 15:17:45 +02006525 if (idx < 0) {
6526 PyErr_SetString(PyExc_ValueError,
Christian Heimes80878792013-07-01 15:23:39 +02006527 "memo key must be positive integers.");
Christian Heimesa24b4d22013-07-01 15:17:45 +02006528 goto error;
6529 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006530 if (_Unpickler_MemoPut(self, idx, value) < 0)
6531 goto error;
6532 }
6533 }
6534 else {
6535 PyErr_Format(PyExc_TypeError,
6536 "'memo' attribute must be an UnpicklerMemoProxy object"
6537 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006538 return -1;
6539 }
6540
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006541 _Unpickler_MemoCleanup(self);
6542 self->memo_size = new_memo_size;
6543 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006544
6545 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006546
6547 error:
6548 if (new_memo_size) {
6549 i = new_memo_size;
6550 while (--i >= 0) {
6551 Py_XDECREF(new_memo[i]);
6552 }
6553 PyMem_FREE(new_memo);
6554 }
6555 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006556}
6557
6558static PyObject *
6559Unpickler_get_persload(UnpicklerObject *self)
6560{
6561 if (self->pers_func == NULL)
6562 PyErr_SetString(PyExc_AttributeError, "persistent_load");
6563 else
6564 Py_INCREF(self->pers_func);
6565 return self->pers_func;
6566}
6567
6568static int
6569Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
6570{
6571 PyObject *tmp;
6572
6573 if (value == NULL) {
6574 PyErr_SetString(PyExc_TypeError,
6575 "attribute deletion is not supported");
6576 return -1;
6577 }
6578 if (!PyCallable_Check(value)) {
6579 PyErr_SetString(PyExc_TypeError,
6580 "persistent_load must be a callable taking "
6581 "one argument");
6582 return -1;
6583 }
6584
6585 tmp = self->pers_func;
6586 Py_INCREF(value);
6587 self->pers_func = value;
6588 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
6589
6590 return 0;
6591}
6592
6593static PyGetSetDef Unpickler_getsets[] = {
6594 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
6595 {"persistent_load", (getter)Unpickler_get_persload,
6596 (setter)Unpickler_set_persload},
6597 {NULL}
6598};
6599
6600static PyTypeObject Unpickler_Type = {
6601 PyVarObject_HEAD_INIT(NULL, 0)
6602 "_pickle.Unpickler", /*tp_name*/
6603 sizeof(UnpicklerObject), /*tp_basicsize*/
6604 0, /*tp_itemsize*/
6605 (destructor)Unpickler_dealloc, /*tp_dealloc*/
6606 0, /*tp_print*/
6607 0, /*tp_getattr*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006608 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00006609 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006610 0, /*tp_repr*/
6611 0, /*tp_as_number*/
6612 0, /*tp_as_sequence*/
6613 0, /*tp_as_mapping*/
6614 0, /*tp_hash*/
6615 0, /*tp_call*/
6616 0, /*tp_str*/
6617 0, /*tp_getattro*/
6618 0, /*tp_setattro*/
6619 0, /*tp_as_buffer*/
6620 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
6621 Unpickler_doc, /*tp_doc*/
6622 (traverseproc)Unpickler_traverse, /*tp_traverse*/
6623 (inquiry)Unpickler_clear, /*tp_clear*/
6624 0, /*tp_richcompare*/
6625 0, /*tp_weaklistoffset*/
6626 0, /*tp_iter*/
6627 0, /*tp_iternext*/
6628 Unpickler_methods, /*tp_methods*/
6629 0, /*tp_members*/
6630 Unpickler_getsets, /*tp_getset*/
6631 0, /*tp_base*/
6632 0, /*tp_dict*/
6633 0, /*tp_descr_get*/
6634 0, /*tp_descr_set*/
6635 0, /*tp_dictoffset*/
6636 (initproc)Unpickler_init, /*tp_init*/
6637 PyType_GenericAlloc, /*tp_alloc*/
6638 PyType_GenericNew, /*tp_new*/
6639 PyObject_GC_Del, /*tp_free*/
6640 0, /*tp_is_gc*/
6641};
6642
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006643PyDoc_STRVAR(pickle_dump_doc,
6644"dump(obj, file, protocol=None, *, fix_imports=True) -> None\n"
6645"\n"
6646"Write a pickled representation of obj to the open file object file. This\n"
6647"is equivalent to ``Pickler(file, protocol).dump(obj)``, but may be more\n"
6648"efficient.\n"
6649"\n"
6650"The optional protocol argument tells the pickler to use the given protocol;\n"
6651"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
6652"backward-incompatible protocol designed for Python 3.0.\n"
6653"\n"
6654"Specifying a negative protocol version selects the highest protocol version\n"
6655"supported. The higher the protocol used, the more recent the version of\n"
6656"Python needed to read the pickle produced.\n"
6657"\n"
6658"The file argument must have a write() method that accepts a single bytes\n"
6659"argument. It can thus be a file object opened for binary writing, a\n"
6660"io.BytesIO instance, or any other custom object that meets this interface.\n"
6661"\n"
6662"If fix_imports is True and protocol is less than 3, pickle will try to\n"
6663"map the new Python 3.x names to the old module names used in Python 2.x,\n"
6664"so that the pickle data stream is readable with Python 2.x.\n");
6665
6666static PyObject *
6667pickle_dump(PyObject *self, PyObject *args, PyObject *kwds)
6668{
6669 static char *kwlist[] = {"obj", "file", "protocol", "fix_imports", 0};
6670 PyObject *obj;
6671 PyObject *file;
6672 PyObject *proto = NULL;
6673 PyObject *fix_imports = Py_True;
6674 PicklerObject *pickler;
6675
6676 /* fix_imports is a keyword-only argument. */
6677 if (Py_SIZE(args) > 3) {
6678 PyErr_Format(PyExc_TypeError,
6679 "pickle.dump() takes at most 3 positional "
6680 "argument (%zd given)", Py_SIZE(args));
6681 return NULL;
6682 }
6683
6684 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|OO:dump", kwlist,
6685 &obj, &file, &proto, &fix_imports))
6686 return NULL;
6687
6688 pickler = _Pickler_New();
6689 if (pickler == NULL)
6690 return NULL;
6691
6692 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
6693 goto error;
6694
6695 if (_Pickler_SetOutputStream(pickler, file) < 0)
6696 goto error;
6697
6698 if (dump(pickler, obj) < 0)
6699 goto error;
6700
6701 if (_Pickler_FlushToFile(pickler) < 0)
6702 goto error;
6703
6704 Py_DECREF(pickler);
6705 Py_RETURN_NONE;
6706
6707 error:
6708 Py_XDECREF(pickler);
6709 return NULL;
6710}
6711
6712PyDoc_STRVAR(pickle_dumps_doc,
6713"dumps(obj, protocol=None, *, fix_imports=True) -> bytes\n"
6714"\n"
6715"Return the pickled representation of the object as a bytes\n"
6716"object, instead of writing it to a file.\n"
6717"\n"
6718"The optional protocol argument tells the pickler to use the given protocol;\n"
6719"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
6720"backward-incompatible protocol designed for Python 3.0.\n"
6721"\n"
6722"Specifying a negative protocol version selects the highest protocol version\n"
6723"supported. The higher the protocol used, the more recent the version of\n"
6724"Python needed to read the pickle produced.\n"
6725"\n"
6726"If fix_imports is True and *protocol* is less than 3, pickle will try to\n"
6727"map the new Python 3.x names to the old module names used in Python 2.x,\n"
6728"so that the pickle data stream is readable with Python 2.x.\n");
6729
6730static PyObject *
6731pickle_dumps(PyObject *self, PyObject *args, PyObject *kwds)
6732{
6733 static char *kwlist[] = {"obj", "protocol", "fix_imports", 0};
6734 PyObject *obj;
6735 PyObject *proto = NULL;
6736 PyObject *result;
6737 PyObject *fix_imports = Py_True;
6738 PicklerObject *pickler;
6739
6740 /* fix_imports is a keyword-only argument. */
6741 if (Py_SIZE(args) > 2) {
6742 PyErr_Format(PyExc_TypeError,
6743 "pickle.dumps() takes at most 2 positional "
6744 "argument (%zd given)", Py_SIZE(args));
6745 return NULL;
6746 }
6747
6748 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:dumps", kwlist,
6749 &obj, &proto, &fix_imports))
6750 return NULL;
6751
6752 pickler = _Pickler_New();
6753 if (pickler == NULL)
6754 return NULL;
6755
6756 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
6757 goto error;
6758
6759 if (dump(pickler, obj) < 0)
6760 goto error;
6761
6762 result = _Pickler_GetString(pickler);
6763 Py_DECREF(pickler);
6764 return result;
6765
6766 error:
6767 Py_XDECREF(pickler);
6768 return NULL;
6769}
6770
6771PyDoc_STRVAR(pickle_load_doc,
6772"load(file, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6773"\n"
6774"Read a pickled object representation from the open file object file and\n"
6775"return the reconstituted object hierarchy specified therein. This is\n"
6776"equivalent to ``Unpickler(file).load()``, but may be more efficient.\n"
6777"\n"
6778"The protocol version of the pickle is detected automatically, so no protocol\n"
6779"argument is needed. Bytes past the pickled object's representation are\n"
6780"ignored.\n"
6781"\n"
6782"The argument file must have two methods, a read() method that takes an\n"
6783"integer argument, and a readline() method that requires no arguments. Both\n"
6784"methods should return bytes. Thus *file* can be a binary file object opened\n"
6785"for reading, a BytesIO object, or any other custom object that meets this\n"
6786"interface.\n"
6787"\n"
6788"Optional keyword arguments are fix_imports, encoding and errors,\n"
6789"which are used to control compatiblity support for pickle stream generated\n"
6790"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6791"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6792"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6793"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6794
6795static PyObject *
6796pickle_load(PyObject *self, PyObject *args, PyObject *kwds)
6797{
6798 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
6799 PyObject *file;
6800 PyObject *fix_imports = Py_True;
6801 PyObject *result;
6802 char *encoding = NULL;
6803 char *errors = NULL;
6804 UnpicklerObject *unpickler;
6805
6806 /* fix_imports, encoding and errors are a keyword-only argument. */
6807 if (Py_SIZE(args) != 1) {
6808 PyErr_Format(PyExc_TypeError,
6809 "pickle.load() takes exactly one positional "
6810 "argument (%zd given)", Py_SIZE(args));
6811 return NULL;
6812 }
6813
6814 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:load", kwlist,
6815 &file, &fix_imports, &encoding, &errors))
6816 return NULL;
6817
6818 unpickler = _Unpickler_New();
6819 if (unpickler == NULL)
6820 return NULL;
6821
6822 if (_Unpickler_SetInputStream(unpickler, file) < 0)
6823 goto error;
6824
6825 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6826 goto error;
6827
6828 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6829 if (unpickler->fix_imports == -1)
6830 goto error;
6831
6832 result = load(unpickler);
6833 Py_DECREF(unpickler);
6834 return result;
6835
6836 error:
6837 Py_XDECREF(unpickler);
6838 return NULL;
6839}
6840
6841PyDoc_STRVAR(pickle_loads_doc,
6842"loads(input, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6843"\n"
6844"Read a pickled object hierarchy from a bytes object and return the\n"
6845"reconstituted object hierarchy specified therein\n"
6846"\n"
6847"The protocol version of the pickle is detected automatically, so no protocol\n"
6848"argument is needed. Bytes past the pickled object's representation are\n"
6849"ignored.\n"
6850"\n"
6851"Optional keyword arguments are fix_imports, encoding and errors, which\n"
6852"are used to control compatiblity support for pickle stream generated\n"
6853"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6854"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6855"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6856"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6857
6858static PyObject *
6859pickle_loads(PyObject *self, PyObject *args, PyObject *kwds)
6860{
6861 static char *kwlist[] = {"input", "fix_imports", "encoding", "errors", 0};
6862 PyObject *input;
6863 PyObject *fix_imports = Py_True;
6864 PyObject *result;
6865 char *encoding = NULL;
6866 char *errors = NULL;
6867 UnpicklerObject *unpickler;
6868
6869 /* fix_imports, encoding and errors are a keyword-only argument. */
6870 if (Py_SIZE(args) != 1) {
6871 PyErr_Format(PyExc_TypeError,
6872 "pickle.loads() takes exactly one positional "
6873 "argument (%zd given)", Py_SIZE(args));
6874 return NULL;
6875 }
6876
6877 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:loads", kwlist,
6878 &input, &fix_imports, &encoding, &errors))
6879 return NULL;
6880
6881 unpickler = _Unpickler_New();
6882 if (unpickler == NULL)
6883 return NULL;
6884
6885 if (_Unpickler_SetStringInput(unpickler, input) < 0)
6886 goto error;
6887
6888 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6889 goto error;
6890
6891 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6892 if (unpickler->fix_imports == -1)
6893 goto error;
6894
6895 result = load(unpickler);
6896 Py_DECREF(unpickler);
6897 return result;
6898
6899 error:
6900 Py_XDECREF(unpickler);
6901 return NULL;
6902}
6903
6904
6905static struct PyMethodDef pickle_methods[] = {
6906 {"dump", (PyCFunction)pickle_dump, METH_VARARGS|METH_KEYWORDS,
6907 pickle_dump_doc},
6908 {"dumps", (PyCFunction)pickle_dumps, METH_VARARGS|METH_KEYWORDS,
6909 pickle_dumps_doc},
6910 {"load", (PyCFunction)pickle_load, METH_VARARGS|METH_KEYWORDS,
6911 pickle_load_doc},
6912 {"loads", (PyCFunction)pickle_loads, METH_VARARGS|METH_KEYWORDS,
6913 pickle_loads_doc},
6914 {NULL, NULL} /* sentinel */
6915};
6916
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006917static int
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006918initmodule(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006919{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006920 PyObject *copyreg = NULL;
6921 PyObject *compat_pickle = NULL;
6922
6923 /* XXX: We should ensure that the types of the dictionaries imported are
6924 exactly PyDict objects. Otherwise, it is possible to crash the pickle
6925 since we use the PyDict API directly to access these dictionaries. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006926
6927 copyreg = PyImport_ImportModule("copyreg");
6928 if (!copyreg)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006929 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006930 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
6931 if (!dispatch_table)
6932 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006933 extension_registry = \
6934 PyObject_GetAttrString(copyreg, "_extension_registry");
6935 if (!extension_registry)
6936 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006937 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
6938 if (!inverted_registry)
6939 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006940 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
6941 if (!extension_cache)
6942 goto error;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006943 Py_CLEAR(copyreg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006944
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006945 /* Load the 2.x -> 3.x stdlib module mapping tables */
6946 compat_pickle = PyImport_ImportModule("_compat_pickle");
6947 if (!compat_pickle)
6948 goto error;
6949 name_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
6950 if (!name_mapping_2to3)
6951 goto error;
6952 if (!PyDict_CheckExact(name_mapping_2to3)) {
6953 PyErr_Format(PyExc_RuntimeError,
6954 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
6955 Py_TYPE(name_mapping_2to3)->tp_name);
6956 goto error;
6957 }
6958 import_mapping_2to3 = PyObject_GetAttrString(compat_pickle,
6959 "IMPORT_MAPPING");
6960 if (!import_mapping_2to3)
6961 goto error;
6962 if (!PyDict_CheckExact(import_mapping_2to3)) {
6963 PyErr_Format(PyExc_RuntimeError,
6964 "_compat_pickle.IMPORT_MAPPING should be a dict, "
6965 "not %.200s", Py_TYPE(import_mapping_2to3)->tp_name);
6966 goto error;
6967 }
6968 /* ... and the 3.x -> 2.x mapping tables */
6969 name_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6970 "REVERSE_NAME_MAPPING");
6971 if (!name_mapping_3to2)
6972 goto error;
6973 if (!PyDict_CheckExact(name_mapping_3to2)) {
6974 PyErr_Format(PyExc_RuntimeError,
Ezio Melotti13925002011-03-16 11:05:33 +02006975 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006976 "not %.200s", Py_TYPE(name_mapping_3to2)->tp_name);
6977 goto error;
6978 }
6979 import_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6980 "REVERSE_IMPORT_MAPPING");
6981 if (!import_mapping_3to2)
6982 goto error;
6983 if (!PyDict_CheckExact(import_mapping_3to2)) {
6984 PyErr_Format(PyExc_RuntimeError,
6985 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
6986 "not %.200s", Py_TYPE(import_mapping_3to2)->tp_name);
6987 goto error;
6988 }
6989 Py_CLEAR(compat_pickle);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006990
6991 empty_tuple = PyTuple_New(0);
6992 if (empty_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006993 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006994 two_tuple = PyTuple_New(2);
6995 if (two_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006996 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006997 /* We use this temp container with no regard to refcounts, or to
6998 * keeping containees alive. Exempt from GC, because we don't
6999 * want anything looking at two_tuple() by magic.
7000 */
7001 PyObject_GC_UnTrack(two_tuple);
7002
7003 return 0;
7004
7005 error:
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007006 Py_CLEAR(copyreg);
7007 Py_CLEAR(dispatch_table);
7008 Py_CLEAR(extension_registry);
7009 Py_CLEAR(inverted_registry);
7010 Py_CLEAR(extension_cache);
7011 Py_CLEAR(compat_pickle);
7012 Py_CLEAR(name_mapping_2to3);
7013 Py_CLEAR(import_mapping_2to3);
7014 Py_CLEAR(name_mapping_3to2);
7015 Py_CLEAR(import_mapping_3to2);
7016 Py_CLEAR(empty_tuple);
7017 Py_CLEAR(two_tuple);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007018 return -1;
7019}
7020
7021static struct PyModuleDef _picklemodule = {
7022 PyModuleDef_HEAD_INIT,
7023 "_pickle",
7024 pickle_module_doc,
7025 -1,
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007026 pickle_methods,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007027 NULL,
7028 NULL,
7029 NULL,
7030 NULL
7031};
7032
7033PyMODINIT_FUNC
7034PyInit__pickle(void)
7035{
7036 PyObject *m;
7037
7038 if (PyType_Ready(&Unpickler_Type) < 0)
7039 return NULL;
7040 if (PyType_Ready(&Pickler_Type) < 0)
7041 return NULL;
7042 if (PyType_Ready(&Pdata_Type) < 0)
7043 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00007044 if (PyType_Ready(&PicklerMemoProxyType) < 0)
7045 return NULL;
7046 if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
7047 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007048
7049 /* Create the module and add the functions. */
7050 m = PyModule_Create(&_picklemodule);
7051 if (m == NULL)
7052 return NULL;
7053
Antoine Pitrou8391cf42011-07-15 21:01:21 +02007054 Py_INCREF(&Pickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007055 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
7056 return NULL;
Antoine Pitrou8391cf42011-07-15 21:01:21 +02007057 Py_INCREF(&Unpickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007058 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
7059 return NULL;
7060
7061 /* Initialize the exceptions. */
7062 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
7063 if (PickleError == NULL)
7064 return NULL;
7065 PicklingError = \
7066 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
7067 if (PicklingError == NULL)
7068 return NULL;
7069 UnpicklingError = \
7070 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
7071 if (UnpicklingError == NULL)
7072 return NULL;
7073
7074 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
7075 return NULL;
7076 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
7077 return NULL;
7078 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
7079 return NULL;
7080
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00007081 if (initmodule() < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00007082 return NULL;
7083
7084 return m;
7085}