blob: e53abc888e12a4e9e99c96fef8e6811d2b148de5 [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000013/* Pickle opcodes. These must be kept updated with pickle.py.
14 Extensive docs are in pickletools.py. */
15enum opcode {
16 MARK = '(',
17 STOP = '.',
18 POP = '0',
19 POP_MARK = '1',
20 DUP = '2',
21 FLOAT = 'F',
22 INT = 'I',
23 BININT = 'J',
24 BININT1 = 'K',
25 LONG = 'L',
26 BININT2 = 'M',
27 NONE = 'N',
28 PERSID = 'P',
29 BINPERSID = 'Q',
30 REDUCE = 'R',
31 STRING = 'S',
32 BINSTRING = 'T',
33 SHORT_BINSTRING = 'U',
34 UNICODE = 'V',
35 BINUNICODE = 'X',
36 APPEND = 'a',
37 BUILD = 'b',
38 GLOBAL = 'c',
39 DICT = 'd',
40 EMPTY_DICT = '}',
41 APPENDS = 'e',
42 GET = 'g',
43 BINGET = 'h',
44 INST = 'i',
45 LONG_BINGET = 'j',
46 LIST = 'l',
47 EMPTY_LIST = ']',
48 OBJ = 'o',
49 PUT = 'p',
50 BINPUT = 'q',
51 LONG_BINPUT = 'r',
52 SETITEM = 's',
53 TUPLE = 't',
54 EMPTY_TUPLE = ')',
55 SETITEMS = 'u',
56 BINFLOAT = 'G',
57
58 /* Protocol 2. */
59 PROTO = '\x80',
60 NEWOBJ = '\x81',
61 EXT1 = '\x82',
62 EXT2 = '\x83',
63 EXT4 = '\x84',
64 TUPLE1 = '\x85',
65 TUPLE2 = '\x86',
66 TUPLE3 = '\x87',
67 NEWTRUE = '\x88',
68 NEWFALSE = '\x89',
69 LONG1 = '\x8a',
70 LONG4 = '\x8b',
71
72 /* Protocol 3 (Python 3.x) */
73 BINBYTES = 'B',
Victor Stinner132ef6c2010-11-09 09:39:41 +000074 SHORT_BINBYTES = 'C'
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000075};
76
77/* These aren't opcodes -- they're ways to pickle bools before protocol 2
78 * so that unpicklers written before bools were introduced unpickle them
79 * as ints, but unpicklers after can recognize that bools were intended.
80 * Note that protocol 2 added direct ways to pickle bools.
81 */
82#undef TRUE
83#define TRUE "I01\n"
84#undef FALSE
85#define FALSE "I00\n"
86
87enum {
88 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
89 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
90 break if this gets out of synch with pickle.py, but it's unclear that would
91 help anything either. */
92 BATCHSIZE = 1000,
93
94 /* Nesting limit until Pickler, when running in "fast mode", starts
95 checking for self-referential data-structures. */
96 FAST_NESTING_LIMIT = 50,
97
Antoine Pitrouea99c5c2010-09-09 18:33:21 +000098 /* Initial size of the write buffer of Pickler. */
99 WRITE_BUF_SIZE = 4096,
100
101 /* Maximum size of the write buffer of Pickler when pickling to a
102 stream. This is ignored for in-memory pickling. */
103 MAX_WRITE_BUF_SIZE = 64 * 1024,
Antoine Pitrou04248a82010-10-12 20:51:21 +0000104
105 /* Prefetch size when unpickling (disabled on unpeekable streams) */
Victor Stinner132ef6c2010-11-09 09:39:41 +0000106 PREFETCH = 8192 * 16
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000107};
108
109/* Exception classes for pickle. These should override the ones defined in
110 pickle.py, when the C-optimized Pickler and Unpickler are used. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000111static PyObject *PickleError = NULL;
112static PyObject *PicklingError = NULL;
113static PyObject *UnpicklingError = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000114
115/* copyreg.dispatch_table, {type_object: pickling_function} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000116static PyObject *dispatch_table = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000117/* For EXT[124] opcodes. */
118/* copyreg._extension_registry, {(module_name, function_name): code} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000119static PyObject *extension_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000120/* copyreg._inverted_registry, {code: (module_name, function_name)} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000121static PyObject *inverted_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000122/* copyreg._extension_cache, {code: object} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000123static PyObject *extension_cache = NULL;
124
125/* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */
126static PyObject *name_mapping_2to3 = NULL;
127/* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
128static PyObject *import_mapping_2to3 = NULL;
129/* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
130static PyObject *name_mapping_3to2 = NULL;
131static PyObject *import_mapping_3to2 = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000132
133/* XXX: Are these really nescessary? */
134/* As the name says, an empty tuple. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000135static PyObject *empty_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000136/* For looking up name pairs in copyreg._extension_registry. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000137static PyObject *two_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000138
139static int
140stack_underflow(void)
141{
142 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
143 return -1;
144}
145
146/* Internal data type used as the unpickling stack. */
147typedef struct {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000148 PyObject_VAR_HEAD
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000149 PyObject **data;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000150 Py_ssize_t allocated; /* number of slots in data allocated */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000151} Pdata;
152
153static void
154Pdata_dealloc(Pdata *self)
155{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200156 Py_ssize_t i = Py_SIZE(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000157 while (--i >= 0) {
158 Py_DECREF(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000159 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000160 PyMem_FREE(self->data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000161 PyObject_Del(self);
162}
163
164static PyTypeObject Pdata_Type = {
165 PyVarObject_HEAD_INIT(NULL, 0)
166 "_pickle.Pdata", /*tp_name*/
167 sizeof(Pdata), /*tp_basicsize*/
168 0, /*tp_itemsize*/
169 (destructor)Pdata_dealloc, /*tp_dealloc*/
170};
171
172static PyObject *
173Pdata_New(void)
174{
175 Pdata *self;
176
177 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
178 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000179 Py_SIZE(self) = 0;
180 self->allocated = 8;
181 self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000182 if (self->data)
183 return (PyObject *)self;
184 Py_DECREF(self);
185 return PyErr_NoMemory();
186}
187
188
189/* Retain only the initial clearto items. If clearto >= the current
190 * number of items, this is a (non-erroneous) NOP.
191 */
192static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200193Pdata_clear(Pdata *self, Py_ssize_t clearto)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000194{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200195 Py_ssize_t i = Py_SIZE(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000196
197 if (clearto < 0)
198 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000199 if (clearto >= i)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000200 return 0;
201
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000202 while (--i >= clearto) {
203 Py_CLEAR(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000204 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000205 Py_SIZE(self) = clearto;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000206 return 0;
207}
208
209static int
210Pdata_grow(Pdata *self)
211{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000212 PyObject **data = self->data;
213 Py_ssize_t allocated = self->allocated;
214 Py_ssize_t new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000215
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000216 new_allocated = (allocated >> 3) + 6;
217 /* check for integer overflow */
218 if (new_allocated > PY_SSIZE_T_MAX - allocated)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000219 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000220 new_allocated += allocated;
221 if (new_allocated > (PY_SSIZE_T_MAX / sizeof(PyObject *)))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000222 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000223 data = PyMem_REALLOC(data, new_allocated * sizeof(PyObject *));
224 if (data == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000225 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000226
227 self->data = data;
228 self->allocated = new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000229 return 0;
230
231 nomemory:
232 PyErr_NoMemory();
233 return -1;
234}
235
236/* D is a Pdata*. Pop the topmost element and store it into V, which
237 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
238 * is raised and V is set to NULL.
239 */
240static PyObject *
241Pdata_pop(Pdata *self)
242{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000243 if (Py_SIZE(self) == 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000244 PyErr_SetString(UnpicklingError, "bad pickle data");
245 return NULL;
246 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000247 return self->data[--Py_SIZE(self)];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000248}
249#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
250
251static int
252Pdata_push(Pdata *self, PyObject *obj)
253{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000254 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000255 return -1;
256 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000257 self->data[Py_SIZE(self)++] = obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000258 return 0;
259}
260
261/* Push an object on stack, transferring its ownership to the stack. */
262#define PDATA_PUSH(D, O, ER) do { \
263 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
264
265/* Push an object on stack, adding a new reference to the object. */
266#define PDATA_APPEND(D, O, ER) do { \
267 Py_INCREF((O)); \
268 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
269
270static PyObject *
271Pdata_poptuple(Pdata *self, Py_ssize_t start)
272{
273 PyObject *tuple;
274 Py_ssize_t len, i, j;
275
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000276 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000277 tuple = PyTuple_New(len);
278 if (tuple == NULL)
279 return NULL;
280 for (i = start, j = 0; j < len; i++, j++)
281 PyTuple_SET_ITEM(tuple, j, self->data[i]);
282
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000283 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000284 return tuple;
285}
286
287static PyObject *
288Pdata_poplist(Pdata *self, Py_ssize_t start)
289{
290 PyObject *list;
291 Py_ssize_t len, i, j;
292
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000293 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000294 list = PyList_New(len);
295 if (list == NULL)
296 return NULL;
297 for (i = start, j = 0; j < len; i++, j++)
298 PyList_SET_ITEM(list, j, self->data[i]);
299
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000300 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000301 return list;
302}
303
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000304typedef struct {
305 PyObject *me_key;
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200306 Py_ssize_t me_value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000307} PyMemoEntry;
308
309typedef struct {
310 Py_ssize_t mt_mask;
311 Py_ssize_t mt_used;
312 Py_ssize_t mt_allocated;
313 PyMemoEntry *mt_table;
314} PyMemoTable;
315
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000316typedef struct PicklerObject {
317 PyObject_HEAD
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000318 PyMemoTable *memo; /* Memo table, keep track of the seen
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000319 objects to support self-referential objects
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000320 pickling. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000321 PyObject *pers_func; /* persistent_id() method, can be NULL */
322 PyObject *arg;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000323
324 PyObject *write; /* write() method of the output stream. */
325 PyObject *output_buffer; /* Write into a local bytearray buffer before
326 flushing to the stream. */
327 Py_ssize_t output_len; /* Length of output_buffer. */
328 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000329 int proto; /* Pickle protocol number, >= 0 */
330 int bin; /* Boolean, true if proto > 0 */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200331 Py_ssize_t buf_size; /* Size of the current buffered pickle data */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000332 int fast; /* Enable fast mode if set to a true value.
333 The fast mode disable the usage of memo,
334 therefore speeding the pickling process by
335 not generating superfluous PUT opcodes. It
336 should not be used if with self-referential
337 objects. */
338 int fast_nesting;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000339 int fix_imports; /* Indicate whether Pickler should fix
340 the name of globals for Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000341 PyObject *fast_memo;
342} PicklerObject;
343
344typedef struct UnpicklerObject {
345 PyObject_HEAD
346 Pdata *stack; /* Pickle data stack, store unpickled objects. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000347
348 /* The unpickler memo is just an array of PyObject *s. Using a dict
349 is unnecessary, since the keys are contiguous ints. */
350 PyObject **memo;
351 Py_ssize_t memo_size;
352
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000353 PyObject *arg;
354 PyObject *pers_func; /* persistent_load() method, can be NULL. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000355
356 Py_buffer buffer;
357 char *input_buffer;
358 char *input_line;
359 Py_ssize_t input_len;
360 Py_ssize_t next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000361 Py_ssize_t prefetched_idx; /* index of first prefetched byte */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000362 PyObject *read; /* read() method of the input stream. */
363 PyObject *readline; /* readline() method of the input stream. */
Antoine Pitrou04248a82010-10-12 20:51:21 +0000364 PyObject *peek; /* peek() method of the input stream, or NULL */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000365
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000366 char *encoding; /* Name of the encoding to be used for
367 decoding strings pickled using Python
368 2.x. The default value is "ASCII" */
369 char *errors; /* Name of errors handling scheme to used when
370 decoding strings. The default value is
371 "strict". */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200372 Py_ssize_t *marks; /* Mark stack, used for unpickling container
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000373 objects. */
374 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
375 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000376 int proto; /* Protocol of the pickle loaded. */
377 int fix_imports; /* Indicate whether Unpickler should fix
378 the name of globals pickled by Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000379} UnpicklerObject;
380
381/* Forward declarations */
382static int save(PicklerObject *, PyObject *, int);
383static int save_reduce(PicklerObject *, PyObject *, PyObject *);
384static PyTypeObject Pickler_Type;
385static PyTypeObject Unpickler_Type;
386
387
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000388/*************************************************************************
389 A custom hashtable mapping void* to longs. This is used by the pickler for
390 memoization. Using a custom hashtable rather than PyDict allows us to skip
391 a bunch of unnecessary object creation. This makes a huge performance
392 difference. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000393
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000394#define MT_MINSIZE 8
395#define PERTURB_SHIFT 5
396
397
398static PyMemoTable *
399PyMemoTable_New(void)
400{
401 PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
402 if (memo == NULL) {
403 PyErr_NoMemory();
404 return NULL;
405 }
406
407 memo->mt_used = 0;
408 memo->mt_allocated = MT_MINSIZE;
409 memo->mt_mask = MT_MINSIZE - 1;
410 memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
411 if (memo->mt_table == NULL) {
412 PyMem_FREE(memo);
413 PyErr_NoMemory();
414 return NULL;
415 }
416 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
417
418 return memo;
419}
420
421static PyMemoTable *
422PyMemoTable_Copy(PyMemoTable *self)
423{
424 Py_ssize_t i;
425 PyMemoTable *new = PyMemoTable_New();
426 if (new == NULL)
427 return NULL;
428
429 new->mt_used = self->mt_used;
430 new->mt_allocated = self->mt_allocated;
431 new->mt_mask = self->mt_mask;
432 /* The table we get from _New() is probably smaller than we wanted.
433 Free it and allocate one that's the right size. */
434 PyMem_FREE(new->mt_table);
435 new->mt_table = PyMem_MALLOC(self->mt_allocated * sizeof(PyMemoEntry));
436 if (new->mt_table == NULL) {
437 PyMem_FREE(new);
438 return NULL;
439 }
440 for (i = 0; i < self->mt_allocated; i++) {
441 Py_XINCREF(self->mt_table[i].me_key);
442 }
443 memcpy(new->mt_table, self->mt_table,
444 sizeof(PyMemoEntry) * self->mt_allocated);
445
446 return new;
447}
448
449static Py_ssize_t
450PyMemoTable_Size(PyMemoTable *self)
451{
452 return self->mt_used;
453}
454
455static int
456PyMemoTable_Clear(PyMemoTable *self)
457{
458 Py_ssize_t i = self->mt_allocated;
459
460 while (--i >= 0) {
461 Py_XDECREF(self->mt_table[i].me_key);
462 }
463 self->mt_used = 0;
464 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
465 return 0;
466}
467
468static void
469PyMemoTable_Del(PyMemoTable *self)
470{
471 if (self == NULL)
472 return;
473 PyMemoTable_Clear(self);
474
475 PyMem_FREE(self->mt_table);
476 PyMem_FREE(self);
477}
478
479/* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
480 can be considerably simpler than dictobject.c's lookdict(). */
481static PyMemoEntry *
482_PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
483{
484 size_t i;
485 size_t perturb;
486 size_t mask = (size_t)self->mt_mask;
487 PyMemoEntry *table = self->mt_table;
488 PyMemoEntry *entry;
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000489 Py_hash_t hash = (Py_hash_t)key >> 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000490
491 i = hash & mask;
492 entry = &table[i];
493 if (entry->me_key == NULL || entry->me_key == key)
494 return entry;
495
496 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
497 i = (i << 2) + i + perturb + 1;
498 entry = &table[i & mask];
499 if (entry->me_key == NULL || entry->me_key == key)
500 return entry;
501 }
502 assert(0); /* Never reached */
503 return NULL;
504}
505
506/* Returns -1 on failure, 0 on success. */
507static int
508_PyMemoTable_ResizeTable(PyMemoTable *self, Py_ssize_t min_size)
509{
510 PyMemoEntry *oldtable = NULL;
511 PyMemoEntry *oldentry, *newentry;
512 Py_ssize_t new_size = MT_MINSIZE;
513 Py_ssize_t to_process;
514
515 assert(min_size > 0);
516
517 /* Find the smallest valid table size >= min_size. */
518 while (new_size < min_size && new_size > 0)
519 new_size <<= 1;
520 if (new_size <= 0) {
521 PyErr_NoMemory();
522 return -1;
523 }
524 /* new_size needs to be a power of two. */
525 assert((new_size & (new_size - 1)) == 0);
526
527 /* Allocate new table. */
528 oldtable = self->mt_table;
529 self->mt_table = PyMem_MALLOC(new_size * sizeof(PyMemoEntry));
530 if (self->mt_table == NULL) {
531 PyMem_FREE(oldtable);
532 PyErr_NoMemory();
533 return -1;
534 }
535 self->mt_allocated = new_size;
536 self->mt_mask = new_size - 1;
537 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
538
539 /* Copy entries from the old table. */
540 to_process = self->mt_used;
541 for (oldentry = oldtable; to_process > 0; oldentry++) {
542 if (oldentry->me_key != NULL) {
543 to_process--;
544 /* newentry is a pointer to a chunk of the new
545 mt_table, so we're setting the key:value pair
546 in-place. */
547 newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
548 newentry->me_key = oldentry->me_key;
549 newentry->me_value = oldentry->me_value;
550 }
551 }
552
553 /* Deallocate the old table. */
554 PyMem_FREE(oldtable);
555 return 0;
556}
557
558/* Returns NULL on failure, a pointer to the value otherwise. */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200559static Py_ssize_t *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000560PyMemoTable_Get(PyMemoTable *self, PyObject *key)
561{
562 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
563 if (entry->me_key == NULL)
564 return NULL;
565 return &entry->me_value;
566}
567
568/* Returns -1 on failure, 0 on success. */
569static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200570PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000571{
572 PyMemoEntry *entry;
573
574 assert(key != NULL);
575
576 entry = _PyMemoTable_Lookup(self, key);
577 if (entry->me_key != NULL) {
578 entry->me_value = value;
579 return 0;
580 }
581 Py_INCREF(key);
582 entry->me_key = key;
583 entry->me_value = value;
584 self->mt_used++;
585
586 /* If we added a key, we can safely resize. Otherwise just return!
587 * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
588 *
589 * Quadrupling the size improves average table sparseness
590 * (reducing collisions) at the cost of some memory. It also halves
591 * the number of expensive resize operations in a growing memo table.
592 *
593 * Very large memo tables (over 50K items) use doubling instead.
594 * This may help applications with severe memory constraints.
595 */
596 if (!(self->mt_used * 3 >= (self->mt_mask + 1) * 2))
597 return 0;
598 return _PyMemoTable_ResizeTable(self,
599 (self->mt_used > 50000 ? 2 : 4) * self->mt_used);
600}
601
602#undef MT_MINSIZE
603#undef PERTURB_SHIFT
604
605/*************************************************************************/
606
607/* Helpers for creating the argument tuple passed to functions. This has the
Victor Stinner121aab42011-09-29 23:40:53 +0200608 performance advantage of calling PyTuple_New() only once.
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000609
610 XXX(avassalotti): Inline directly in _Pickler_FastCall() and
611 _Unpickler_FastCall(). */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000612#define ARG_TUP(self, obj) do { \
613 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
614 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
615 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
616 } \
617 else { \
618 Py_DECREF((obj)); \
619 } \
620 } while (0)
621
622#define FREE_ARG_TUP(self) do { \
623 if ((self)->arg->ob_refcnt > 1) \
624 Py_CLEAR((self)->arg); \
625 } while (0)
626
627/* A temporary cleaner API for fast single argument function call.
628
629 XXX: Does caching the argument tuple provides any real performance benefits?
630
631 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
632 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
633 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
634 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
635 (i.e, call PyTuple_New() and store the returned value in an array), to save
636 one second (wall clock time). Either ways, the loading time a pickle stream
637 large enough to generate this number of calls would be massively
638 overwhelmed by other factors, like I/O throughput, the GC traversal and
639 object allocation overhead. So, I really doubt these functions provide any
640 real benefits.
641
642 On the other hand, oprofile reports that pickle spends a lot of time in
643 these functions. But, that is probably more related to the function call
644 overhead, than the argument tuple allocation.
645
646 XXX: And, what is the reference behavior of these? Steal, borrow? At first
647 glance, it seems to steal the reference of 'arg' and borrow the reference
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000648 of 'func'. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000649static PyObject *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000650_Pickler_FastCall(PicklerObject *self, PyObject *func, PyObject *arg)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000651{
652 PyObject *result = NULL;
653
654 ARG_TUP(self, arg);
655 if (self->arg) {
656 result = PyObject_Call(func, self->arg, NULL);
657 FREE_ARG_TUP(self);
658 }
659 return result;
660}
661
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000662static int
663_Pickler_ClearBuffer(PicklerObject *self)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000664{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000665 Py_CLEAR(self->output_buffer);
666 self->output_buffer =
667 PyBytes_FromStringAndSize(NULL, self->max_output_len);
668 if (self->output_buffer == NULL)
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000669 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000670 self->output_len = 0;
671 return 0;
672}
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000673
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000674static PyObject *
675_Pickler_GetString(PicklerObject *self)
676{
677 PyObject *output_buffer = self->output_buffer;
678
679 assert(self->output_buffer != NULL);
680 self->output_buffer = NULL;
681 /* Resize down to exact size */
682 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
683 return NULL;
684 return output_buffer;
685}
686
687static int
688_Pickler_FlushToFile(PicklerObject *self)
689{
690 PyObject *output, *result;
691
692 assert(self->write != NULL);
693
694 output = _Pickler_GetString(self);
695 if (output == NULL)
696 return -1;
697
698 result = _Pickler_FastCall(self, self->write, output);
699 Py_XDECREF(result);
700 return (result == NULL) ? -1 : 0;
701}
702
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200703static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000704_Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t n)
705{
706 Py_ssize_t i, required;
707 char *buffer;
708
709 assert(s != NULL);
710
711 required = self->output_len + n;
712 if (required > self->max_output_len) {
713 if (self->write != NULL && required > MAX_WRITE_BUF_SIZE) {
714 /* XXX This reallocates a new buffer every time, which is a bit
715 wasteful. */
716 if (_Pickler_FlushToFile(self) < 0)
717 return -1;
718 if (_Pickler_ClearBuffer(self) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000719 return -1;
720 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000721 if (self->write != NULL && n > MAX_WRITE_BUF_SIZE) {
722 /* we already flushed above, so the buffer is empty */
723 PyObject *result;
724 /* XXX we could spare an intermediate copy and pass
725 a memoryview instead */
726 PyObject *output = PyBytes_FromStringAndSize(s, n);
727 if (s == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000728 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000729 result = _Pickler_FastCall(self, self->write, output);
730 Py_XDECREF(result);
731 return (result == NULL) ? -1 : 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000732 }
733 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000734 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
735 PyErr_NoMemory();
736 return -1;
737 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200738 self->max_output_len = (self->output_len + n) / 2 * 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000739 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
740 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000741 }
742 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000743 buffer = PyBytes_AS_STRING(self->output_buffer);
744 if (n < 8) {
745 /* This is faster than memcpy when the string is short. */
746 for (i = 0; i < n; i++) {
747 buffer[self->output_len + i] = s[i];
748 }
749 }
750 else {
751 memcpy(buffer + self->output_len, s, n);
752 }
753 self->output_len += n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000754 return n;
755}
756
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000757static PicklerObject *
758_Pickler_New(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000759{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000760 PicklerObject *self;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000761
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000762 self = PyObject_GC_New(PicklerObject, &Pickler_Type);
763 if (self == NULL)
764 return NULL;
765
766 self->pers_func = NULL;
767 self->arg = NULL;
768 self->write = NULL;
769 self->proto = 0;
770 self->bin = 0;
771 self->fast = 0;
772 self->fast_nesting = 0;
773 self->fix_imports = 0;
774 self->fast_memo = NULL;
775
776 self->memo = PyMemoTable_New();
777 if (self->memo == NULL) {
778 Py_DECREF(self);
779 return NULL;
780 }
781 self->max_output_len = WRITE_BUF_SIZE;
782 self->output_len = 0;
783 self->output_buffer = PyBytes_FromStringAndSize(NULL,
784 self->max_output_len);
785 if (self->output_buffer == NULL) {
786 Py_DECREF(self);
787 return NULL;
788 }
789 return self;
790}
791
792static int
793_Pickler_SetProtocol(PicklerObject *self, PyObject *proto_obj,
794 PyObject *fix_imports_obj)
795{
796 long proto = 0;
797 int fix_imports;
798
799 if (proto_obj == NULL || proto_obj == Py_None)
800 proto = DEFAULT_PROTOCOL;
801 else {
802 proto = PyLong_AsLong(proto_obj);
803 if (proto == -1 && PyErr_Occurred())
804 return -1;
805 }
806 if (proto < 0)
807 proto = HIGHEST_PROTOCOL;
808 if (proto > HIGHEST_PROTOCOL) {
809 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
810 HIGHEST_PROTOCOL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000811 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000812 }
813 fix_imports = PyObject_IsTrue(fix_imports_obj);
814 if (fix_imports == -1)
815 return -1;
Victor Stinner121aab42011-09-29 23:40:53 +0200816
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000817 self->proto = proto;
818 self->bin = proto > 0;
819 self->fix_imports = fix_imports && proto < 3;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000820
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000821 return 0;
822}
823
824/* Returns -1 (with an exception set) on failure, 0 on success. This may
825 be called once on a freshly created Pickler. */
826static int
827_Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
828{
829 assert(file != NULL);
830 self->write = PyObject_GetAttrString(file, "write");
831 if (self->write == NULL) {
832 if (PyErr_ExceptionMatches(PyExc_AttributeError))
833 PyErr_SetString(PyExc_TypeError,
834 "file must have a 'write' attribute");
835 return -1;
836 }
837
838 return 0;
839}
840
841/* See documentation for _Pickler_FastCall(). */
842static PyObject *
843_Unpickler_FastCall(UnpicklerObject *self, PyObject *func, PyObject *arg)
844{
845 PyObject *result = NULL;
846
847 ARG_TUP(self, arg);
848 if (self->arg) {
849 result = PyObject_Call(func, self->arg, NULL);
850 FREE_ARG_TUP(self);
851 }
852 return result;
853}
854
855/* Returns the size of the input on success, -1 on failure. This takes its
856 own reference to `input`. */
857static Py_ssize_t
858_Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
859{
860 if (self->buffer.buf != NULL)
861 PyBuffer_Release(&self->buffer);
862 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
863 return -1;
864 self->input_buffer = self->buffer.buf;
865 self->input_len = self->buffer.len;
866 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000867 self->prefetched_idx = self->input_len;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000868 return self->input_len;
869}
870
Antoine Pitrou04248a82010-10-12 20:51:21 +0000871static int
872_Unpickler_SkipConsumed(UnpicklerObject *self)
873{
874 Py_ssize_t consumed = self->next_read_idx - self->prefetched_idx;
875
876 if (consumed > 0) {
877 PyObject *r;
878 assert(self->peek); /* otherwise we did something wrong */
879 /* This makes an useless copy... */
880 r = PyObject_CallFunction(self->read, "n", consumed);
881 if (r == NULL)
882 return -1;
883 Py_DECREF(r);
884 self->prefetched_idx = self->next_read_idx;
885 }
886 return 0;
887}
888
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000889static const Py_ssize_t READ_WHOLE_LINE = -1;
890
891/* If reading from a file, we need to only pull the bytes we need, since there
892 may be multiple pickle objects arranged contiguously in the same input
893 buffer.
894
895 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
896 bytes from the input stream/buffer.
897
898 Update the unpickler's input buffer with the newly-read data. Returns -1 on
899 failure; on success, returns the number of bytes read from the file.
900
901 On success, self->input_len will be 0; this is intentional so that when
902 unpickling from a file, the "we've run out of data" code paths will trigger,
903 causing the Unpickler to go back to the file for more data. Use the returned
904 size to tell you how much data you can process. */
905static Py_ssize_t
906_Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
907{
908 PyObject *data;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000909 Py_ssize_t read_size, prefetched_size = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000910
911 assert(self->read != NULL);
Victor Stinner121aab42011-09-29 23:40:53 +0200912
Antoine Pitrou04248a82010-10-12 20:51:21 +0000913 if (_Unpickler_SkipConsumed(self) < 0)
914 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000915
916 if (n == READ_WHOLE_LINE)
917 data = PyObject_Call(self->readline, empty_tuple, NULL);
918 else {
919 PyObject *len = PyLong_FromSsize_t(n);
920 if (len == NULL)
921 return -1;
922 data = _Unpickler_FastCall(self, self->read, len);
923 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000924 if (data == NULL)
925 return -1;
926
Antoine Pitrou04248a82010-10-12 20:51:21 +0000927 /* Prefetch some data without advancing the file pointer, if possible */
928 if (self->peek) {
929 PyObject *len, *prefetched;
930 len = PyLong_FromSsize_t(PREFETCH);
931 if (len == NULL) {
932 Py_DECREF(data);
933 return -1;
934 }
935 prefetched = _Unpickler_FastCall(self, self->peek, len);
936 if (prefetched == NULL) {
937 if (PyErr_ExceptionMatches(PyExc_NotImplementedError)) {
938 /* peek() is probably not supported by the given file object */
939 PyErr_Clear();
940 Py_CLEAR(self->peek);
941 }
942 else {
943 Py_DECREF(data);
944 return -1;
945 }
946 }
947 else {
948 assert(PyBytes_Check(prefetched));
949 prefetched_size = PyBytes_GET_SIZE(prefetched);
950 PyBytes_ConcatAndDel(&data, prefetched);
951 if (data == NULL)
952 return -1;
953 }
954 }
955
956 read_size = _Unpickler_SetStringInput(self, data) - prefetched_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000957 Py_DECREF(data);
Antoine Pitrou04248a82010-10-12 20:51:21 +0000958 self->prefetched_idx = read_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000959 return read_size;
960}
961
962/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
963
964 This should be used for all data reads, rather than accessing the unpickler's
965 input buffer directly. This method deals correctly with reading from input
966 streams, which the input buffer doesn't deal with.
967
968 Note that when reading from a file-like object, self->next_read_idx won't
969 be updated (it should remain at 0 for the entire unpickling process). You
970 should use this function's return value to know how many bytes you can
971 consume.
972
973 Returns -1 (with an exception set) on failure. On success, return the
974 number of chars read. */
975static Py_ssize_t
976_Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n)
977{
Antoine Pitrou04248a82010-10-12 20:51:21 +0000978 Py_ssize_t num_read;
979
Antoine Pitrou04248a82010-10-12 20:51:21 +0000980 if (self->next_read_idx + n <= self->input_len) {
981 *s = self->input_buffer + self->next_read_idx;
982 self->next_read_idx += n;
983 return n;
984 }
985 if (!self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000986 PyErr_Format(PyExc_EOFError, "Ran out of input");
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000987 return -1;
988 }
Antoine Pitrou04248a82010-10-12 20:51:21 +0000989 num_read = _Unpickler_ReadFromFile(self, n);
990 if (num_read < 0)
991 return -1;
992 if (num_read < n) {
993 PyErr_Format(PyExc_EOFError, "Ran out of input");
994 return -1;
995 }
996 *s = self->input_buffer;
997 self->next_read_idx = n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000998 return n;
999}
1000
1001static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001002_Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1003 char **result)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001004{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001005 char *input_line = PyMem_Realloc(self->input_line, len + 1);
1006 if (input_line == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001007 return -1;
1008
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001009 memcpy(input_line, line, len);
1010 input_line[len] = '\0';
1011 self->input_line = input_line;
1012 *result = self->input_line;
1013 return len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001014}
1015
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001016/* Read a line from the input stream/buffer. If we run off the end of the input
1017 before hitting \n, return the data we found.
1018
1019 Returns the number of chars read, or -1 on failure. */
1020static Py_ssize_t
1021_Unpickler_Readline(UnpicklerObject *self, char **result)
1022{
1023 Py_ssize_t i, num_read;
1024
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001025 for (i = self->next_read_idx; i < self->input_len; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001026 if (self->input_buffer[i] == '\n') {
1027 char *line_start = self->input_buffer + self->next_read_idx;
1028 num_read = i - self->next_read_idx + 1;
1029 self->next_read_idx = i + 1;
1030 return _Unpickler_CopyLine(self, line_start, num_read, result);
1031 }
1032 }
1033 if (self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001034 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1035 if (num_read < 0)
1036 return -1;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001037 self->next_read_idx = num_read;
Antoine Pitrouf6c7a852011-08-11 21:04:02 +02001038 return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001039 }
Victor Stinner121aab42011-09-29 23:40:53 +02001040
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001041 /* If we get here, we've run off the end of the input string. Return the
1042 remaining string and let the caller figure it out. */
1043 *result = self->input_buffer + self->next_read_idx;
1044 num_read = i - self->next_read_idx;
1045 self->next_read_idx = i;
1046 return num_read;
1047}
1048
1049/* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1050 will be modified in place. */
1051static int
1052_Unpickler_ResizeMemoList(UnpicklerObject *self, Py_ssize_t new_size)
1053{
1054 Py_ssize_t i;
1055 PyObject **memo;
1056
1057 assert(new_size > self->memo_size);
1058
1059 memo = PyMem_REALLOC(self->memo, new_size * sizeof(PyObject *));
1060 if (memo == NULL) {
1061 PyErr_NoMemory();
1062 return -1;
1063 }
1064 self->memo = memo;
1065 for (i = self->memo_size; i < new_size; i++)
1066 self->memo[i] = NULL;
1067 self->memo_size = new_size;
1068 return 0;
1069}
1070
1071/* Returns NULL if idx is out of bounds. */
1072static PyObject *
1073_Unpickler_MemoGet(UnpicklerObject *self, Py_ssize_t idx)
1074{
1075 if (idx < 0 || idx >= self->memo_size)
1076 return NULL;
1077
1078 return self->memo[idx];
1079}
1080
1081/* Returns -1 (with an exception set) on failure, 0 on success.
1082 This takes its own reference to `value`. */
1083static int
1084_Unpickler_MemoPut(UnpicklerObject *self, Py_ssize_t idx, PyObject *value)
1085{
1086 PyObject *old_item;
1087
1088 if (idx >= self->memo_size) {
1089 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1090 return -1;
1091 assert(idx < self->memo_size);
1092 }
1093 Py_INCREF(value);
1094 old_item = self->memo[idx];
1095 self->memo[idx] = value;
1096 Py_XDECREF(old_item);
1097 return 0;
1098}
1099
1100static PyObject **
1101_Unpickler_NewMemo(Py_ssize_t new_size)
1102{
1103 PyObject **memo = PyMem_MALLOC(new_size * sizeof(PyObject *));
1104 if (memo == NULL)
1105 return NULL;
1106 memset(memo, 0, new_size * sizeof(PyObject *));
1107 return memo;
1108}
1109
1110/* Free the unpickler's memo, taking care to decref any items left in it. */
1111static void
1112_Unpickler_MemoCleanup(UnpicklerObject *self)
1113{
1114 Py_ssize_t i;
1115 PyObject **memo = self->memo;
1116
1117 if (self->memo == NULL)
1118 return;
1119 self->memo = NULL;
1120 i = self->memo_size;
1121 while (--i >= 0) {
1122 Py_XDECREF(memo[i]);
1123 }
1124 PyMem_FREE(memo);
1125}
1126
1127static UnpicklerObject *
1128_Unpickler_New(void)
1129{
1130 UnpicklerObject *self;
1131
1132 self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1133 if (self == NULL)
1134 return NULL;
1135
1136 self->stack = (Pdata *)Pdata_New();
1137 if (self->stack == NULL) {
1138 Py_DECREF(self);
1139 return NULL;
1140 }
1141 memset(&self->buffer, 0, sizeof(Py_buffer));
1142
1143 self->memo_size = 32;
1144 self->memo = _Unpickler_NewMemo(self->memo_size);
1145 if (self->memo == NULL) {
1146 Py_DECREF(self);
1147 return NULL;
1148 }
1149
1150 self->arg = NULL;
1151 self->pers_func = NULL;
1152 self->input_buffer = NULL;
1153 self->input_line = NULL;
1154 self->input_len = 0;
1155 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001156 self->prefetched_idx = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001157 self->read = NULL;
1158 self->readline = NULL;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001159 self->peek = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001160 self->encoding = NULL;
1161 self->errors = NULL;
1162 self->marks = NULL;
1163 self->num_marks = 0;
1164 self->marks_size = 0;
1165 self->proto = 0;
1166 self->fix_imports = 0;
1167
1168 return self;
1169}
1170
1171/* Returns -1 (with an exception set) on failure, 0 on success. This may
1172 be called once on a freshly created Pickler. */
1173static int
1174_Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1175{
Antoine Pitrou04248a82010-10-12 20:51:21 +00001176 self->peek = PyObject_GetAttrString(file, "peek");
1177 if (self->peek == NULL) {
1178 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1179 PyErr_Clear();
1180 else
1181 return -1;
1182 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001183 self->read = PyObject_GetAttrString(file, "read");
1184 self->readline = PyObject_GetAttrString(file, "readline");
1185 if (self->readline == NULL || self->read == NULL) {
1186 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1187 PyErr_SetString(PyExc_TypeError,
1188 "file must have 'read' and 'readline' attributes");
1189 Py_CLEAR(self->read);
1190 Py_CLEAR(self->readline);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001191 Py_CLEAR(self->peek);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001192 return -1;
1193 }
1194 return 0;
1195}
1196
1197/* Returns -1 (with an exception set) on failure, 0 on success. This may
1198 be called once on a freshly created Pickler. */
1199static int
1200_Unpickler_SetInputEncoding(UnpicklerObject *self,
1201 const char *encoding,
1202 const char *errors)
1203{
1204 if (encoding == NULL)
1205 encoding = "ASCII";
1206 if (errors == NULL)
1207 errors = "strict";
1208
1209 self->encoding = strdup(encoding);
1210 self->errors = strdup(errors);
1211 if (self->encoding == NULL || self->errors == NULL) {
1212 PyErr_NoMemory();
1213 return -1;
1214 }
1215 return 0;
1216}
1217
1218/* Generate a GET opcode for an object stored in the memo. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001219static int
1220memo_get(PicklerObject *self, PyObject *key)
1221{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001222 Py_ssize_t *value;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001223 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001224 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001225
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001226 value = PyMemoTable_Get(self->memo, key);
1227 if (value == NULL) {
1228 PyErr_SetObject(PyExc_KeyError, key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001229 return -1;
1230 }
1231
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001232 if (!self->bin) {
1233 pdata[0] = GET;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001234 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1235 "%" PY_FORMAT_SIZE_T "d\n", *value);
1236 len = strlen(pdata);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001237 }
1238 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001239 if (*value < 256) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001240 pdata[0] = BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001241 pdata[1] = (unsigned char)(*value & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001242 len = 2;
1243 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001244 else if (*value <= 0xffffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001245 pdata[0] = LONG_BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001246 pdata[1] = (unsigned char)(*value & 0xff);
1247 pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1248 pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1249 pdata[4] = (unsigned char)((*value >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001250 len = 5;
1251 }
1252 else { /* unlikely */
1253 PyErr_SetString(PicklingError,
1254 "memo id too large for LONG_BINGET");
1255 return -1;
1256 }
1257 }
1258
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001259 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001260 return -1;
1261
1262 return 0;
1263}
1264
1265/* Store an object in the memo, assign it a new unique ID based on the number
1266 of objects currently stored in the memo and generate a PUT opcode. */
1267static int
1268memo_put(PicklerObject *self, PyObject *obj)
1269{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001270 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001271 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001272 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001273 int status = 0;
1274
1275 if (self->fast)
1276 return 0;
1277
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001278 x = PyMemoTable_Size(self->memo);
1279 if (PyMemoTable_Set(self->memo, obj, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001280 goto error;
1281
1282 if (!self->bin) {
1283 pdata[0] = PUT;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001284 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1285 "%" PY_FORMAT_SIZE_T "d\n", x);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001286 len = strlen(pdata);
1287 }
1288 else {
1289 if (x < 256) {
1290 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00001291 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001292 len = 2;
1293 }
1294 else if (x <= 0xffffffffL) {
1295 pdata[0] = LONG_BINPUT;
1296 pdata[1] = (unsigned char)(x & 0xff);
1297 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1298 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1299 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1300 len = 5;
1301 }
1302 else { /* unlikely */
1303 PyErr_SetString(PicklingError,
1304 "memo id too large for LONG_BINPUT");
1305 return -1;
1306 }
1307 }
1308
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001309 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001310 goto error;
1311
1312 if (0) {
1313 error:
1314 status = -1;
1315 }
1316
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001317 return status;
1318}
1319
1320static PyObject *
1321whichmodule(PyObject *global, PyObject *global_name)
1322{
1323 Py_ssize_t i, j;
1324 static PyObject *module_str = NULL;
1325 static PyObject *main_str = NULL;
1326 PyObject *module_name;
1327 PyObject *modules_dict;
1328 PyObject *module;
1329 PyObject *obj;
1330
1331 if (module_str == NULL) {
1332 module_str = PyUnicode_InternFromString("__module__");
1333 if (module_str == NULL)
1334 return NULL;
1335 main_str = PyUnicode_InternFromString("__main__");
1336 if (main_str == NULL)
1337 return NULL;
1338 }
1339
1340 module_name = PyObject_GetAttr(global, module_str);
1341
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00001342 /* In some rare cases (e.g., bound methods of extension types),
1343 __module__ can be None. If it is so, then search sys.modules
1344 for the module of global. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001345 if (module_name == Py_None) {
1346 Py_DECREF(module_name);
1347 goto search;
1348 }
1349
1350 if (module_name) {
1351 return module_name;
1352 }
1353 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1354 PyErr_Clear();
1355 else
1356 return NULL;
1357
1358 search:
1359 modules_dict = PySys_GetObject("modules");
1360 if (modules_dict == NULL)
1361 return NULL;
1362
1363 i = 0;
1364 module_name = NULL;
1365 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
Mark Dickinson211c6252009-02-01 10:28:51 +00001366 if (PyObject_RichCompareBool(module_name, main_str, Py_EQ) == 1)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001367 continue;
1368
1369 obj = PyObject_GetAttr(module, global_name);
1370 if (obj == NULL) {
1371 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1372 PyErr_Clear();
1373 else
1374 return NULL;
1375 continue;
1376 }
1377
1378 if (obj != global) {
1379 Py_DECREF(obj);
1380 continue;
1381 }
1382
1383 Py_DECREF(obj);
1384 break;
1385 }
1386
1387 /* If no module is found, use __main__. */
1388 if (!j) {
1389 module_name = main_str;
1390 }
1391
1392 Py_INCREF(module_name);
1393 return module_name;
1394}
1395
1396/* fast_save_enter() and fast_save_leave() are guards against recursive
1397 objects when Pickler is used with the "fast mode" (i.e., with object
1398 memoization disabled). If the nesting of a list or dict object exceed
1399 FAST_NESTING_LIMIT, these guards will start keeping an internal
1400 reference to the seen list or dict objects and check whether these objects
1401 are recursive. These are not strictly necessary, since save() has a
1402 hard-coded recursion limit, but they give a nicer error message than the
1403 typical RuntimeError. */
1404static int
1405fast_save_enter(PicklerObject *self, PyObject *obj)
1406{
1407 /* if fast_nesting < 0, we're doing an error exit. */
1408 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1409 PyObject *key = NULL;
1410 if (self->fast_memo == NULL) {
1411 self->fast_memo = PyDict_New();
1412 if (self->fast_memo == NULL) {
1413 self->fast_nesting = -1;
1414 return 0;
1415 }
1416 }
1417 key = PyLong_FromVoidPtr(obj);
1418 if (key == NULL)
1419 return 0;
1420 if (PyDict_GetItem(self->fast_memo, key)) {
1421 Py_DECREF(key);
1422 PyErr_Format(PyExc_ValueError,
1423 "fast mode: can't pickle cyclic objects "
1424 "including object type %.200s at %p",
1425 obj->ob_type->tp_name, obj);
1426 self->fast_nesting = -1;
1427 return 0;
1428 }
1429 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
1430 Py_DECREF(key);
1431 self->fast_nesting = -1;
1432 return 0;
1433 }
1434 Py_DECREF(key);
1435 }
1436 return 1;
1437}
1438
1439static int
1440fast_save_leave(PicklerObject *self, PyObject *obj)
1441{
1442 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
1443 PyObject *key = PyLong_FromVoidPtr(obj);
1444 if (key == NULL)
1445 return 0;
1446 if (PyDict_DelItem(self->fast_memo, key) < 0) {
1447 Py_DECREF(key);
1448 return 0;
1449 }
1450 Py_DECREF(key);
1451 }
1452 return 1;
1453}
1454
1455static int
1456save_none(PicklerObject *self, PyObject *obj)
1457{
1458 const char none_op = NONE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001459 if (_Pickler_Write(self, &none_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001460 return -1;
1461
1462 return 0;
1463}
1464
1465static int
1466save_bool(PicklerObject *self, PyObject *obj)
1467{
1468 static const char *buf[2] = { FALSE, TRUE };
1469 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
1470 int p = (obj == Py_True);
1471
1472 if (self->proto >= 2) {
1473 const char bool_op = p ? NEWTRUE : NEWFALSE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001474 if (_Pickler_Write(self, &bool_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001475 return -1;
1476 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001477 else if (_Pickler_Write(self, buf[p], len[p]) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001478 return -1;
1479
1480 return 0;
1481}
1482
1483static int
1484save_int(PicklerObject *self, long x)
1485{
1486 char pdata[32];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001487 Py_ssize_t len = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001488
1489 if (!self->bin
1490#if SIZEOF_LONG > 4
1491 || x > 0x7fffffffL || x < -0x80000000L
1492#endif
1493 ) {
1494 /* Text-mode pickle, or long too big to fit in the 4-byte
1495 * signed BININT format: store as a string.
1496 */
Mark Dickinson8dd05142009-01-20 20:43:58 +00001497 pdata[0] = LONG; /* use LONG for consistency with pickle.py */
1498 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ldL\n", x);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001499 if (_Pickler_Write(self, pdata, strlen(pdata)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001500 return -1;
1501 }
1502 else {
1503 /* Binary pickle and x fits in a signed 4-byte int. */
1504 pdata[1] = (unsigned char)(x & 0xff);
1505 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1506 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1507 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1508
1509 if ((pdata[4] == 0) && (pdata[3] == 0)) {
1510 if (pdata[2] == 0) {
1511 pdata[0] = BININT1;
1512 len = 2;
1513 }
1514 else {
1515 pdata[0] = BININT2;
1516 len = 3;
1517 }
1518 }
1519 else {
1520 pdata[0] = BININT;
1521 len = 5;
1522 }
1523
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001524 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001525 return -1;
1526 }
1527
1528 return 0;
1529}
1530
1531static int
1532save_long(PicklerObject *self, PyObject *obj)
1533{
1534 PyObject *repr = NULL;
1535 Py_ssize_t size;
1536 long val = PyLong_AsLong(obj);
1537 int status = 0;
1538
1539 const char long_op = LONG;
1540
1541 if (val == -1 && PyErr_Occurred()) {
1542 /* out of range for int pickling */
1543 PyErr_Clear();
1544 }
Antoine Pitroue58bffb2011-08-13 20:40:32 +02001545 else
1546#if SIZEOF_LONG > 4
1547 if (val <= 0x7fffffffL && val >= -0x80000000L)
1548#endif
1549 return save_int(self, val);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001550
1551 if (self->proto >= 2) {
1552 /* Linear-time pickling. */
1553 size_t nbits;
1554 size_t nbytes;
1555 unsigned char *pdata;
1556 char header[5];
1557 int i;
1558 int sign = _PyLong_Sign(obj);
1559
1560 if (sign == 0) {
1561 header[0] = LONG1;
1562 header[1] = 0; /* It's 0 -- an empty bytestring. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001563 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001564 goto error;
1565 return 0;
1566 }
1567 nbits = _PyLong_NumBits(obj);
1568 if (nbits == (size_t)-1 && PyErr_Occurred())
1569 goto error;
1570 /* How many bytes do we need? There are nbits >> 3 full
1571 * bytes of data, and nbits & 7 leftover bits. If there
1572 * are any leftover bits, then we clearly need another
1573 * byte. Wnat's not so obvious is that we *probably*
1574 * need another byte even if there aren't any leftovers:
1575 * the most-significant bit of the most-significant byte
1576 * acts like a sign bit, and it's usually got a sense
1577 * opposite of the one we need. The exception is longs
1578 * of the form -(2**(8*j-1)) for j > 0. Such a long is
1579 * its own 256's-complement, so has the right sign bit
1580 * even without the extra byte. That's a pain to check
1581 * for in advance, though, so we always grab an extra
1582 * byte at the start, and cut it back later if possible.
1583 */
1584 nbytes = (nbits >> 3) + 1;
1585 if (nbytes > INT_MAX) {
1586 PyErr_SetString(PyExc_OverflowError,
1587 "long too large to pickle");
1588 goto error;
1589 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001590 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001591 if (repr == NULL)
1592 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001593 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001594 i = _PyLong_AsByteArray((PyLongObject *)obj,
1595 pdata, nbytes,
1596 1 /* little endian */ , 1 /* signed */ );
1597 if (i < 0)
1598 goto error;
1599 /* If the long is negative, this may be a byte more than
1600 * needed. This is so iff the MSB is all redundant sign
1601 * bits.
1602 */
1603 if (sign < 0 &&
Victor Stinner121aab42011-09-29 23:40:53 +02001604 nbytes > 1 &&
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001605 pdata[nbytes - 1] == 0xff &&
1606 (pdata[nbytes - 2] & 0x80) != 0) {
1607 nbytes--;
1608 }
1609
1610 if (nbytes < 256) {
1611 header[0] = LONG1;
1612 header[1] = (unsigned char)nbytes;
1613 size = 2;
1614 }
1615 else {
1616 header[0] = LONG4;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001617 size = (Py_ssize_t) nbytes;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001618 for (i = 1; i < 5; i++) {
1619 header[i] = (unsigned char)(size & 0xff);
1620 size >>= 8;
1621 }
1622 size = 5;
1623 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001624 if (_Pickler_Write(self, header, size) < 0 ||
1625 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001626 goto error;
1627 }
1628 else {
1629 char *string;
1630
Mark Dickinson8dd05142009-01-20 20:43:58 +00001631 /* proto < 2: write the repr and newline. This is quadratic-time (in
1632 the number of digits), in both directions. We add a trailing 'L'
1633 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001634
1635 repr = PyObject_Repr(obj);
1636 if (repr == NULL)
1637 goto error;
1638
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001639 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001640 if (string == NULL)
1641 goto error;
1642
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001643 if (_Pickler_Write(self, &long_op, 1) < 0 ||
1644 _Pickler_Write(self, string, size) < 0 ||
1645 _Pickler_Write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001646 goto error;
1647 }
1648
1649 if (0) {
1650 error:
1651 status = -1;
1652 }
1653 Py_XDECREF(repr);
1654
1655 return status;
1656}
1657
1658static int
1659save_float(PicklerObject *self, PyObject *obj)
1660{
1661 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1662
1663 if (self->bin) {
1664 char pdata[9];
1665 pdata[0] = BINFLOAT;
1666 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1667 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001668 if (_Pickler_Write(self, pdata, 9) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001669 return -1;
Victor Stinner121aab42011-09-29 23:40:53 +02001670 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001671 else {
Eric Smith0923d1d2009-04-16 20:16:10 +00001672 int result = -1;
1673 char *buf = NULL;
1674 char op = FLOAT;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001675
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001676 if (_Pickler_Write(self, &op, 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001677 goto done;
1678
Mark Dickinson3e09f432009-04-17 08:41:23 +00001679 buf = PyOS_double_to_string(x, 'g', 17, 0, NULL);
Eric Smith0923d1d2009-04-16 20:16:10 +00001680 if (!buf) {
1681 PyErr_NoMemory();
1682 goto done;
1683 }
1684
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001685 if (_Pickler_Write(self, buf, strlen(buf)) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001686 goto done;
1687
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001688 if (_Pickler_Write(self, "\n", 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001689 goto done;
1690
1691 result = 0;
1692done:
1693 PyMem_Free(buf);
1694 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001695 }
1696
1697 return 0;
1698}
1699
1700static int
1701save_bytes(PicklerObject *self, PyObject *obj)
1702{
1703 if (self->proto < 3) {
1704 /* Older pickle protocols do not have an opcode for pickling bytes
1705 objects. Therefore, we need to fake the copy protocol (i.e.,
1706 the __reduce__ method) to permit bytes object unpickling. */
1707 PyObject *reduce_value = NULL;
1708 PyObject *bytelist = NULL;
1709 int status;
1710
1711 bytelist = PySequence_List(obj);
1712 if (bytelist == NULL)
1713 return -1;
1714
1715 reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
1716 bytelist);
1717 if (reduce_value == NULL) {
1718 Py_DECREF(bytelist);
1719 return -1;
1720 }
1721
1722 /* save_reduce() will memoize the object automatically. */
1723 status = save_reduce(self, reduce_value, obj);
1724 Py_DECREF(reduce_value);
1725 Py_DECREF(bytelist);
1726 return status;
1727 }
1728 else {
1729 Py_ssize_t size;
1730 char header[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001731 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001732
1733 size = PyBytes_Size(obj);
1734 if (size < 0)
1735 return -1;
1736
1737 if (size < 256) {
1738 header[0] = SHORT_BINBYTES;
1739 header[1] = (unsigned char)size;
1740 len = 2;
1741 }
1742 else if (size <= 0xffffffffL) {
1743 header[0] = BINBYTES;
1744 header[1] = (unsigned char)(size & 0xff);
1745 header[2] = (unsigned char)((size >> 8) & 0xff);
1746 header[3] = (unsigned char)((size >> 16) & 0xff);
1747 header[4] = (unsigned char)((size >> 24) & 0xff);
1748 len = 5;
1749 }
1750 else {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001751 PyErr_SetString(PyExc_OverflowError,
1752 "cannot serialize a bytes object larger than 4GB");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001753 return -1; /* string too large */
1754 }
1755
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001756 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001757 return -1;
1758
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001759 if (_Pickler_Write(self, PyBytes_AS_STRING(obj), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001760 return -1;
1761
1762 if (memo_put(self, obj) < 0)
1763 return -1;
1764
1765 return 0;
1766 }
1767}
1768
1769/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1770 backslash and newline characters to \uXXXX escapes. */
1771static PyObject *
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001772raw_unicode_escape(PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001773{
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001774 static const char *hexdigits = "0123456789abcdef";
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001775 PyObject *repr, *result;
1776 char *p;
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001777 Py_ssize_t i, size, expandsize;
1778 void *data;
1779 unsigned int kind;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001780
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001781 if (PyUnicode_READY(obj))
1782 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001783
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001784 size = PyUnicode_GET_LENGTH(obj);
1785 data = PyUnicode_DATA(obj);
1786 kind = PyUnicode_KIND(obj);
1787 if (kind == PyUnicode_4BYTE_KIND)
1788 expandsize = 10;
1789 else
1790 expandsize = 6;
Victor Stinner121aab42011-09-29 23:40:53 +02001791
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001792 if (size > PY_SSIZE_T_MAX / expandsize)
1793 return PyErr_NoMemory();
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001794 repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001795 if (repr == NULL)
1796 return NULL;
1797 if (size == 0)
1798 goto done;
1799
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001800 p = PyByteArray_AS_STRING(repr);
1801 for (i=0; i < size; i++) {
1802 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001803 /* Map 32-bit characters to '\Uxxxxxxxx' */
1804 if (ch >= 0x10000) {
1805 *p++ = '\\';
1806 *p++ = 'U';
1807 *p++ = hexdigits[(ch >> 28) & 0xf];
1808 *p++ = hexdigits[(ch >> 24) & 0xf];
1809 *p++ = hexdigits[(ch >> 20) & 0xf];
1810 *p++ = hexdigits[(ch >> 16) & 0xf];
1811 *p++ = hexdigits[(ch >> 12) & 0xf];
1812 *p++ = hexdigits[(ch >> 8) & 0xf];
1813 *p++ = hexdigits[(ch >> 4) & 0xf];
1814 *p++ = hexdigits[ch & 15];
1815 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001816 /* Map 16-bit characters to '\uxxxx' */
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001817 else if (ch >= 256 || ch == '\\' || ch == '\n') {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001818 *p++ = '\\';
1819 *p++ = 'u';
1820 *p++ = hexdigits[(ch >> 12) & 0xf];
1821 *p++ = hexdigits[(ch >> 8) & 0xf];
1822 *p++ = hexdigits[(ch >> 4) & 0xf];
1823 *p++ = hexdigits[ch & 15];
1824 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001825 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001826 else
1827 *p++ = (char) ch;
1828 }
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001829 size = p - PyByteArray_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001830
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001831done:
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001832 result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001833 Py_DECREF(repr);
1834 return result;
1835}
1836
1837static int
1838save_unicode(PicklerObject *self, PyObject *obj)
1839{
1840 Py_ssize_t size;
1841 PyObject *encoded = NULL;
1842
1843 if (self->bin) {
1844 char pdata[5];
1845
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001846 encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001847 if (encoded == NULL)
1848 goto error;
1849
1850 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001851 if (size > 0xffffffffL) {
1852 PyErr_SetString(PyExc_OverflowError,
1853 "cannot serialize a string larger than 4GB");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001854 goto error; /* string too large */
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001855 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001856
1857 pdata[0] = BINUNICODE;
1858 pdata[1] = (unsigned char)(size & 0xff);
1859 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1860 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1861 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1862
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001863 if (_Pickler_Write(self, pdata, 5) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001864 goto error;
1865
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001866 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001867 goto error;
1868 }
1869 else {
1870 const char unicode_op = UNICODE;
1871
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001872 encoded = raw_unicode_escape(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001873 if (encoded == NULL)
1874 goto error;
1875
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001876 if (_Pickler_Write(self, &unicode_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001877 goto error;
1878
1879 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001880 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001881 goto error;
1882
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001883 if (_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001884 goto error;
1885 }
1886 if (memo_put(self, obj) < 0)
1887 goto error;
1888
1889 Py_DECREF(encoded);
1890 return 0;
1891
1892 error:
1893 Py_XDECREF(encoded);
1894 return -1;
1895}
1896
1897/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1898static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001899store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001900{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001901 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001902
1903 assert(PyTuple_Size(t) == len);
1904
1905 for (i = 0; i < len; i++) {
1906 PyObject *element = PyTuple_GET_ITEM(t, i);
1907
1908 if (element == NULL)
1909 return -1;
1910 if (save(self, element, 0) < 0)
1911 return -1;
1912 }
1913
1914 return 0;
1915}
1916
1917/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1918 * used across protocols to minimize the space needed to pickle them.
1919 * Tuples are also the only builtin immutable type that can be recursive
1920 * (a tuple can be reached from itself), and that requires some subtle
1921 * magic so that it works in all cases. IOW, this is a long routine.
1922 */
1923static int
1924save_tuple(PicklerObject *self, PyObject *obj)
1925{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001926 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001927
1928 const char mark_op = MARK;
1929 const char tuple_op = TUPLE;
1930 const char pop_op = POP;
1931 const char pop_mark_op = POP_MARK;
1932 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
1933
1934 if ((len = PyTuple_Size(obj)) < 0)
1935 return -1;
1936
1937 if (len == 0) {
1938 char pdata[2];
1939
1940 if (self->proto) {
1941 pdata[0] = EMPTY_TUPLE;
1942 len = 1;
1943 }
1944 else {
1945 pdata[0] = MARK;
1946 pdata[1] = TUPLE;
1947 len = 2;
1948 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001949 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001950 return -1;
1951 return 0;
1952 }
1953
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001954 /* The tuple isn't in the memo now. If it shows up there after
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001955 * saving the tuple elements, the tuple must be recursive, in
1956 * which case we'll pop everything we put on the stack, and fetch
1957 * its value from the memo.
1958 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001959 if (len <= 3 && self->proto >= 2) {
1960 /* Use TUPLE{1,2,3} opcodes. */
1961 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001962 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001963
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001964 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001965 /* pop the len elements */
1966 for (i = 0; i < len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001967 if (_Pickler_Write(self, &pop_op, 1) < 0)
1968 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001969 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001970 if (memo_get(self, obj) < 0)
1971 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001972
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001973 return 0;
1974 }
1975 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001976 if (_Pickler_Write(self, len2opcode + len, 1) < 0)
1977 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001978 }
1979 goto memoize;
1980 }
1981
1982 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
1983 * Generate MARK e1 e2 ... TUPLE
1984 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001985 if (_Pickler_Write(self, &mark_op, 1) < 0)
1986 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001987
1988 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001989 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001990
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001991 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001992 /* pop the stack stuff we pushed */
1993 if (self->bin) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001994 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
1995 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001996 }
1997 else {
1998 /* Note that we pop one more than len, to remove
1999 * the MARK too.
2000 */
2001 for (i = 0; i <= len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002002 if (_Pickler_Write(self, &pop_op, 1) < 0)
2003 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002004 }
2005 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002006 if (memo_get(self, obj) < 0)
2007 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002008
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002009 return 0;
2010 }
2011 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002012 if (_Pickler_Write(self, &tuple_op, 1) < 0)
2013 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002014 }
2015
2016 memoize:
2017 if (memo_put(self, obj) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002018 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002019
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002020 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002021}
2022
2023/* iter is an iterator giving items, and we batch up chunks of
2024 * MARK item item ... item APPENDS
2025 * opcode sequences. Calling code should have arranged to first create an
2026 * empty list, or list-like object, for the APPENDS to operate on.
2027 * Returns 0 on success, <0 on error.
2028 */
2029static int
2030batch_list(PicklerObject *self, PyObject *iter)
2031{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002032 PyObject *obj = NULL;
2033 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002034 int i, n;
2035
2036 const char mark_op = MARK;
2037 const char append_op = APPEND;
2038 const char appends_op = APPENDS;
2039
2040 assert(iter != NULL);
2041
2042 /* XXX: I think this function could be made faster by avoiding the
2043 iterator interface and fetching objects directly from list using
2044 PyList_GET_ITEM.
2045 */
2046
2047 if (self->proto == 0) {
2048 /* APPENDS isn't available; do one at a time. */
2049 for (;;) {
2050 obj = PyIter_Next(iter);
2051 if (obj == NULL) {
2052 if (PyErr_Occurred())
2053 return -1;
2054 break;
2055 }
2056 i = save(self, obj, 0);
2057 Py_DECREF(obj);
2058 if (i < 0)
2059 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002060 if (_Pickler_Write(self, &append_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002061 return -1;
2062 }
2063 return 0;
2064 }
2065
2066 /* proto > 0: write in batches of BATCHSIZE. */
2067 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002068 /* Get first item */
2069 firstitem = PyIter_Next(iter);
2070 if (firstitem == NULL) {
2071 if (PyErr_Occurred())
2072 goto error;
2073
2074 /* nothing more to add */
2075 break;
2076 }
2077
2078 /* Try to get a second item */
2079 obj = PyIter_Next(iter);
2080 if (obj == NULL) {
2081 if (PyErr_Occurred())
2082 goto error;
2083
2084 /* Only one item to write */
2085 if (save(self, firstitem, 0) < 0)
2086 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002087 if (_Pickler_Write(self, &append_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002088 goto error;
2089 Py_CLEAR(firstitem);
2090 break;
2091 }
2092
2093 /* More than one item to write */
2094
2095 /* Pump out MARK, items, APPENDS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002096 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002097 goto error;
2098
2099 if (save(self, firstitem, 0) < 0)
2100 goto error;
2101 Py_CLEAR(firstitem);
2102 n = 1;
2103
2104 /* Fetch and save up to BATCHSIZE items */
2105 while (obj) {
2106 if (save(self, obj, 0) < 0)
2107 goto error;
2108 Py_CLEAR(obj);
2109 n += 1;
2110
2111 if (n == BATCHSIZE)
2112 break;
2113
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002114 obj = PyIter_Next(iter);
2115 if (obj == NULL) {
2116 if (PyErr_Occurred())
2117 goto error;
2118 break;
2119 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002120 }
2121
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002122 if (_Pickler_Write(self, &appends_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002123 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002124
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002125 } while (n == BATCHSIZE);
2126 return 0;
2127
2128 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002129 Py_XDECREF(firstitem);
2130 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002131 return -1;
2132}
2133
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002134/* This is a variant of batch_list() above, specialized for lists (with no
2135 * support for list subclasses). Like batch_list(), we batch up chunks of
2136 * MARK item item ... item APPENDS
2137 * opcode sequences. Calling code should have arranged to first create an
2138 * empty list, or list-like object, for the APPENDS to operate on.
2139 * Returns 0 on success, -1 on error.
2140 *
2141 * This version is considerably faster than batch_list(), if less general.
2142 *
2143 * Note that this only works for protocols > 0.
2144 */
2145static int
2146batch_list_exact(PicklerObject *self, PyObject *obj)
2147{
2148 PyObject *item = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002149 Py_ssize_t this_batch, total;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002150
2151 const char append_op = APPEND;
2152 const char appends_op = APPENDS;
2153 const char mark_op = MARK;
2154
2155 assert(obj != NULL);
2156 assert(self->proto > 0);
2157 assert(PyList_CheckExact(obj));
2158
2159 if (PyList_GET_SIZE(obj) == 1) {
2160 item = PyList_GET_ITEM(obj, 0);
2161 if (save(self, item, 0) < 0)
2162 return -1;
2163 if (_Pickler_Write(self, &append_op, 1) < 0)
2164 return -1;
2165 return 0;
2166 }
2167
2168 /* Write in batches of BATCHSIZE. */
2169 total = 0;
2170 do {
2171 this_batch = 0;
2172 if (_Pickler_Write(self, &mark_op, 1) < 0)
2173 return -1;
2174 while (total < PyList_GET_SIZE(obj)) {
2175 item = PyList_GET_ITEM(obj, total);
2176 if (save(self, item, 0) < 0)
2177 return -1;
2178 total++;
2179 if (++this_batch == BATCHSIZE)
2180 break;
2181 }
2182 if (_Pickler_Write(self, &appends_op, 1) < 0)
2183 return -1;
2184
2185 } while (total < PyList_GET_SIZE(obj));
2186
2187 return 0;
2188}
2189
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002190static int
2191save_list(PicklerObject *self, PyObject *obj)
2192{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002193 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002194 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002195 int status = 0;
2196
2197 if (self->fast && !fast_save_enter(self, obj))
2198 goto error;
2199
2200 /* Create an empty list. */
2201 if (self->bin) {
2202 header[0] = EMPTY_LIST;
2203 len = 1;
2204 }
2205 else {
2206 header[0] = MARK;
2207 header[1] = LIST;
2208 len = 2;
2209 }
2210
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002211 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002212 goto error;
2213
2214 /* Get list length, and bow out early if empty. */
2215 if ((len = PyList_Size(obj)) < 0)
2216 goto error;
2217
2218 if (memo_put(self, obj) < 0)
2219 goto error;
2220
2221 if (len != 0) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002222 /* Materialize the list elements. */
2223 if (PyList_CheckExact(obj) && self->proto > 0) {
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002224 if (Py_EnterRecursiveCall(" while pickling an object"))
2225 goto error;
2226 status = batch_list_exact(self, obj);
2227 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002228 } else {
2229 PyObject *iter = PyObject_GetIter(obj);
2230 if (iter == NULL)
2231 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002232
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002233 if (Py_EnterRecursiveCall(" while pickling an object")) {
2234 Py_DECREF(iter);
2235 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002236 }
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002237 status = batch_list(self, iter);
2238 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002239 Py_DECREF(iter);
2240 }
2241 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002242 if (0) {
2243 error:
2244 status = -1;
2245 }
2246
2247 if (self->fast && !fast_save_leave(self, obj))
2248 status = -1;
2249
2250 return status;
2251}
2252
2253/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
2254 * MARK key value ... key value SETITEMS
2255 * opcode sequences. Calling code should have arranged to first create an
2256 * empty dict, or dict-like object, for the SETITEMS to operate on.
2257 * Returns 0 on success, <0 on error.
2258 *
2259 * This is very much like batch_list(). The difference between saving
2260 * elements directly, and picking apart two-tuples, is so long-winded at
2261 * the C level, though, that attempts to combine these routines were too
2262 * ugly to bear.
2263 */
2264static int
2265batch_dict(PicklerObject *self, PyObject *iter)
2266{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002267 PyObject *obj = NULL;
2268 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002269 int i, n;
2270
2271 const char mark_op = MARK;
2272 const char setitem_op = SETITEM;
2273 const char setitems_op = SETITEMS;
2274
2275 assert(iter != NULL);
2276
2277 if (self->proto == 0) {
2278 /* SETITEMS isn't available; do one at a time. */
2279 for (;;) {
2280 obj = PyIter_Next(iter);
2281 if (obj == NULL) {
2282 if (PyErr_Occurred())
2283 return -1;
2284 break;
2285 }
2286 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2287 PyErr_SetString(PyExc_TypeError, "dict items "
2288 "iterator must return 2-tuples");
2289 return -1;
2290 }
2291 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
2292 if (i >= 0)
2293 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
2294 Py_DECREF(obj);
2295 if (i < 0)
2296 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002297 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002298 return -1;
2299 }
2300 return 0;
2301 }
2302
2303 /* proto > 0: write in batches of BATCHSIZE. */
2304 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002305 /* Get first item */
2306 firstitem = PyIter_Next(iter);
2307 if (firstitem == NULL) {
2308 if (PyErr_Occurred())
2309 goto error;
2310
2311 /* nothing more to add */
2312 break;
2313 }
2314 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
2315 PyErr_SetString(PyExc_TypeError, "dict items "
2316 "iterator must return 2-tuples");
2317 goto error;
2318 }
2319
2320 /* Try to get a second item */
2321 obj = PyIter_Next(iter);
2322 if (obj == NULL) {
2323 if (PyErr_Occurred())
2324 goto error;
2325
2326 /* Only one item to write */
2327 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2328 goto error;
2329 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2330 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002331 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002332 goto error;
2333 Py_CLEAR(firstitem);
2334 break;
2335 }
2336
2337 /* More than one item to write */
2338
2339 /* Pump out MARK, items, SETITEMS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002340 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002341 goto error;
2342
2343 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2344 goto error;
2345 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2346 goto error;
2347 Py_CLEAR(firstitem);
2348 n = 1;
2349
2350 /* Fetch and save up to BATCHSIZE items */
2351 while (obj) {
2352 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2353 PyErr_SetString(PyExc_TypeError, "dict items "
2354 "iterator must return 2-tuples");
2355 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002356 }
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002357 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
2358 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
2359 goto error;
2360 Py_CLEAR(obj);
2361 n += 1;
2362
2363 if (n == BATCHSIZE)
2364 break;
2365
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002366 obj = PyIter_Next(iter);
2367 if (obj == NULL) {
2368 if (PyErr_Occurred())
2369 goto error;
2370 break;
2371 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002372 }
2373
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002374 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002375 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002376
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002377 } while (n == BATCHSIZE);
2378 return 0;
2379
2380 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002381 Py_XDECREF(firstitem);
2382 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002383 return -1;
2384}
2385
Collin Winter5c9b02d2009-05-25 05:43:30 +00002386/* This is a variant of batch_dict() above that specializes for dicts, with no
2387 * support for dict subclasses. Like batch_dict(), we batch up chunks of
2388 * MARK key value ... key value SETITEMS
2389 * opcode sequences. Calling code should have arranged to first create an
2390 * empty dict, or dict-like object, for the SETITEMS to operate on.
2391 * Returns 0 on success, -1 on error.
2392 *
2393 * Note that this currently doesn't work for protocol 0.
2394 */
2395static int
2396batch_dict_exact(PicklerObject *self, PyObject *obj)
2397{
2398 PyObject *key = NULL, *value = NULL;
2399 int i;
2400 Py_ssize_t dict_size, ppos = 0;
2401
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00002402 const char mark_op = MARK;
2403 const char setitem_op = SETITEM;
2404 const char setitems_op = SETITEMS;
Collin Winter5c9b02d2009-05-25 05:43:30 +00002405
2406 assert(obj != NULL);
2407 assert(self->proto > 0);
2408
2409 dict_size = PyDict_Size(obj);
2410
2411 /* Special-case len(d) == 1 to save space. */
2412 if (dict_size == 1) {
2413 PyDict_Next(obj, &ppos, &key, &value);
2414 if (save(self, key, 0) < 0)
2415 return -1;
2416 if (save(self, value, 0) < 0)
2417 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002418 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002419 return -1;
2420 return 0;
2421 }
2422
2423 /* Write in batches of BATCHSIZE. */
2424 do {
2425 i = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002426 if (_Pickler_Write(self, &mark_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002427 return -1;
2428 while (PyDict_Next(obj, &ppos, &key, &value)) {
2429 if (save(self, key, 0) < 0)
2430 return -1;
2431 if (save(self, value, 0) < 0)
2432 return -1;
2433 if (++i == BATCHSIZE)
2434 break;
2435 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002436 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002437 return -1;
2438 if (PyDict_Size(obj) != dict_size) {
2439 PyErr_Format(
2440 PyExc_RuntimeError,
2441 "dictionary changed size during iteration");
2442 return -1;
2443 }
2444
2445 } while (i == BATCHSIZE);
2446 return 0;
2447}
2448
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002449static int
2450save_dict(PicklerObject *self, PyObject *obj)
2451{
2452 PyObject *items, *iter;
2453 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002454 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002455 int status = 0;
2456
2457 if (self->fast && !fast_save_enter(self, obj))
2458 goto error;
2459
2460 /* Create an empty dict. */
2461 if (self->bin) {
2462 header[0] = EMPTY_DICT;
2463 len = 1;
2464 }
2465 else {
2466 header[0] = MARK;
2467 header[1] = DICT;
2468 len = 2;
2469 }
2470
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002471 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002472 goto error;
2473
2474 /* Get dict size, and bow out early if empty. */
2475 if ((len = PyDict_Size(obj)) < 0)
2476 goto error;
2477
2478 if (memo_put(self, obj) < 0)
2479 goto error;
2480
2481 if (len != 0) {
2482 /* Save the dict items. */
Collin Winter5c9b02d2009-05-25 05:43:30 +00002483 if (PyDict_CheckExact(obj) && self->proto > 0) {
2484 /* We can take certain shortcuts if we know this is a dict and
2485 not a dict subclass. */
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002486 if (Py_EnterRecursiveCall(" while pickling an object"))
2487 goto error;
2488 status = batch_dict_exact(self, obj);
2489 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002490 } else {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02002491 _Py_identifier(items);
2492
2493 items = _PyObject_CallMethodId(obj, &PyId_items, "()");
Collin Winter5c9b02d2009-05-25 05:43:30 +00002494 if (items == NULL)
2495 goto error;
2496 iter = PyObject_GetIter(items);
2497 Py_DECREF(items);
2498 if (iter == NULL)
2499 goto error;
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002500 if (Py_EnterRecursiveCall(" while pickling an object")) {
2501 Py_DECREF(iter);
2502 goto error;
2503 }
Collin Winter5c9b02d2009-05-25 05:43:30 +00002504 status = batch_dict(self, iter);
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002505 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002506 Py_DECREF(iter);
2507 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002508 }
2509
2510 if (0) {
2511 error:
2512 status = -1;
2513 }
2514
2515 if (self->fast && !fast_save_leave(self, obj))
2516 status = -1;
2517
2518 return status;
2519}
2520
2521static int
2522save_global(PicklerObject *self, PyObject *obj, PyObject *name)
2523{
2524 static PyObject *name_str = NULL;
2525 PyObject *global_name = NULL;
2526 PyObject *module_name = NULL;
2527 PyObject *module = NULL;
2528 PyObject *cls;
2529 int status = 0;
2530
2531 const char global_op = GLOBAL;
2532
2533 if (name_str == NULL) {
2534 name_str = PyUnicode_InternFromString("__name__");
2535 if (name_str == NULL)
2536 goto error;
2537 }
2538
2539 if (name) {
2540 global_name = name;
2541 Py_INCREF(global_name);
2542 }
2543 else {
2544 global_name = PyObject_GetAttr(obj, name_str);
2545 if (global_name == NULL)
2546 goto error;
2547 }
2548
2549 module_name = whichmodule(obj, global_name);
2550 if (module_name == NULL)
2551 goto error;
2552
2553 /* XXX: Change to use the import C API directly with level=0 to disallow
2554 relative imports.
2555
2556 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
2557 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
2558 custom import functions (IMHO, this would be a nice security
2559 feature). The import C API would need to be extended to support the
2560 extra parameters of __import__ to fix that. */
2561 module = PyImport_Import(module_name);
2562 if (module == NULL) {
2563 PyErr_Format(PicklingError,
2564 "Can't pickle %R: import of module %R failed",
2565 obj, module_name);
2566 goto error;
2567 }
2568 cls = PyObject_GetAttr(module, global_name);
2569 if (cls == NULL) {
2570 PyErr_Format(PicklingError,
2571 "Can't pickle %R: attribute lookup %S.%S failed",
2572 obj, module_name, global_name);
2573 goto error;
2574 }
2575 if (cls != obj) {
2576 Py_DECREF(cls);
2577 PyErr_Format(PicklingError,
2578 "Can't pickle %R: it's not the same object as %S.%S",
2579 obj, module_name, global_name);
2580 goto error;
2581 }
2582 Py_DECREF(cls);
2583
2584 if (self->proto >= 2) {
2585 /* See whether this is in the extension registry, and if
2586 * so generate an EXT opcode.
2587 */
2588 PyObject *code_obj; /* extension code as Python object */
2589 long code; /* extension code as C value */
2590 char pdata[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002591 Py_ssize_t n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002592
2593 PyTuple_SET_ITEM(two_tuple, 0, module_name);
2594 PyTuple_SET_ITEM(two_tuple, 1, global_name);
2595 code_obj = PyDict_GetItem(extension_registry, two_tuple);
2596 /* The object is not registered in the extension registry.
2597 This is the most likely code path. */
2598 if (code_obj == NULL)
2599 goto gen_global;
2600
2601 /* XXX: pickle.py doesn't check neither the type, nor the range
2602 of the value returned by the extension_registry. It should for
2603 consistency. */
2604
2605 /* Verify code_obj has the right type and value. */
2606 if (!PyLong_Check(code_obj)) {
2607 PyErr_Format(PicklingError,
2608 "Can't pickle %R: extension code %R isn't an integer",
2609 obj, code_obj);
2610 goto error;
2611 }
2612 code = PyLong_AS_LONG(code_obj);
2613 if (code <= 0 || code > 0x7fffffffL) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002614 if (!PyErr_Occurred())
2615 PyErr_Format(PicklingError,
2616 "Can't pickle %R: extension code %ld is out of range",
2617 obj, code);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002618 goto error;
2619 }
2620
2621 /* Generate an EXT opcode. */
2622 if (code <= 0xff) {
2623 pdata[0] = EXT1;
2624 pdata[1] = (unsigned char)code;
2625 n = 2;
2626 }
2627 else if (code <= 0xffff) {
2628 pdata[0] = EXT2;
2629 pdata[1] = (unsigned char)(code & 0xff);
2630 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2631 n = 3;
2632 }
2633 else {
2634 pdata[0] = EXT4;
2635 pdata[1] = (unsigned char)(code & 0xff);
2636 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2637 pdata[3] = (unsigned char)((code >> 16) & 0xff);
2638 pdata[4] = (unsigned char)((code >> 24) & 0xff);
2639 n = 5;
2640 }
2641
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002642 if (_Pickler_Write(self, pdata, n) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002643 goto error;
2644 }
2645 else {
2646 /* Generate a normal global opcode if we are using a pickle
2647 protocol <= 2, or if the object is not registered in the
2648 extension registry. */
2649 PyObject *encoded;
2650 PyObject *(*unicode_encoder)(PyObject *);
2651
2652 gen_global:
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002653 if (_Pickler_Write(self, &global_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002654 goto error;
2655
2656 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
2657 the module name and the global name using UTF-8. We do so only when
2658 we are using the pickle protocol newer than version 3. This is to
2659 ensure compatibility with older Unpickler running on Python 2.x. */
2660 if (self->proto >= 3) {
2661 unicode_encoder = PyUnicode_AsUTF8String;
2662 }
2663 else {
2664 unicode_encoder = PyUnicode_AsASCIIString;
2665 }
2666
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002667 /* For protocol < 3 and if the user didn't request against doing so,
2668 we convert module names to the old 2.x module names. */
2669 if (self->fix_imports) {
2670 PyObject *key;
2671 PyObject *item;
2672
2673 key = PyTuple_Pack(2, module_name, global_name);
2674 if (key == NULL)
2675 goto error;
2676 item = PyDict_GetItemWithError(name_mapping_3to2, key);
2677 Py_DECREF(key);
2678 if (item) {
2679 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
2680 PyErr_Format(PyExc_RuntimeError,
2681 "_compat_pickle.REVERSE_NAME_MAPPING values "
2682 "should be 2-tuples, not %.200s",
2683 Py_TYPE(item)->tp_name);
2684 goto error;
2685 }
2686 Py_CLEAR(module_name);
2687 Py_CLEAR(global_name);
2688 module_name = PyTuple_GET_ITEM(item, 0);
2689 global_name = PyTuple_GET_ITEM(item, 1);
2690 if (!PyUnicode_Check(module_name) ||
2691 !PyUnicode_Check(global_name)) {
2692 PyErr_Format(PyExc_RuntimeError,
2693 "_compat_pickle.REVERSE_NAME_MAPPING values "
2694 "should be pairs of str, not (%.200s, %.200s)",
2695 Py_TYPE(module_name)->tp_name,
2696 Py_TYPE(global_name)->tp_name);
2697 goto error;
2698 }
2699 Py_INCREF(module_name);
2700 Py_INCREF(global_name);
2701 }
2702 else if (PyErr_Occurred()) {
2703 goto error;
2704 }
2705
2706 item = PyDict_GetItemWithError(import_mapping_3to2, module_name);
2707 if (item) {
2708 if (!PyUnicode_Check(item)) {
2709 PyErr_Format(PyExc_RuntimeError,
2710 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
2711 "should be strings, not %.200s",
2712 Py_TYPE(item)->tp_name);
2713 goto error;
2714 }
2715 Py_CLEAR(module_name);
2716 module_name = item;
2717 Py_INCREF(module_name);
2718 }
2719 else if (PyErr_Occurred()) {
2720 goto error;
2721 }
2722 }
2723
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002724 /* Save the name of the module. */
2725 encoded = unicode_encoder(module_name);
2726 if (encoded == NULL) {
2727 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2728 PyErr_Format(PicklingError,
2729 "can't pickle module identifier '%S' using "
2730 "pickle protocol %i", module_name, self->proto);
2731 goto error;
2732 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002733 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002734 PyBytes_GET_SIZE(encoded)) < 0) {
2735 Py_DECREF(encoded);
2736 goto error;
2737 }
2738 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002739 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002740 goto error;
2741
2742 /* Save the name of the module. */
2743 encoded = unicode_encoder(global_name);
2744 if (encoded == NULL) {
2745 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2746 PyErr_Format(PicklingError,
2747 "can't pickle global identifier '%S' using "
2748 "pickle protocol %i", global_name, self->proto);
2749 goto error;
2750 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002751 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002752 PyBytes_GET_SIZE(encoded)) < 0) {
2753 Py_DECREF(encoded);
2754 goto error;
2755 }
2756 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002757 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002758 goto error;
2759
2760 /* Memoize the object. */
2761 if (memo_put(self, obj) < 0)
2762 goto error;
2763 }
2764
2765 if (0) {
2766 error:
2767 status = -1;
2768 }
2769 Py_XDECREF(module_name);
2770 Py_XDECREF(global_name);
2771 Py_XDECREF(module);
2772
2773 return status;
2774}
2775
2776static int
2777save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
2778{
2779 PyObject *pid = NULL;
2780 int status = 0;
2781
2782 const char persid_op = PERSID;
2783 const char binpersid_op = BINPERSID;
2784
2785 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002786 pid = _Pickler_FastCall(self, func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002787 if (pid == NULL)
2788 return -1;
2789
2790 if (pid != Py_None) {
2791 if (self->bin) {
2792 if (save(self, pid, 1) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002793 _Pickler_Write(self, &binpersid_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002794 goto error;
2795 }
2796 else {
2797 PyObject *pid_str = NULL;
2798 char *pid_ascii_bytes;
2799 Py_ssize_t size;
2800
2801 pid_str = PyObject_Str(pid);
2802 if (pid_str == NULL)
2803 goto error;
2804
2805 /* XXX: Should it check whether the persistent id only contains
2806 ASCII characters? And what if the pid contains embedded
2807 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00002808 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002809 Py_DECREF(pid_str);
2810 if (pid_ascii_bytes == NULL)
2811 goto error;
2812
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002813 if (_Pickler_Write(self, &persid_op, 1) < 0 ||
2814 _Pickler_Write(self, pid_ascii_bytes, size) < 0 ||
2815 _Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002816 goto error;
2817 }
2818 status = 1;
2819 }
2820
2821 if (0) {
2822 error:
2823 status = -1;
2824 }
2825 Py_XDECREF(pid);
2826
2827 return status;
2828}
2829
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002830static PyObject *
2831get_class(PyObject *obj)
2832{
2833 PyObject *cls;
2834 static PyObject *str_class;
2835
2836 if (str_class == NULL) {
2837 str_class = PyUnicode_InternFromString("__class__");
2838 if (str_class == NULL)
2839 return NULL;
2840 }
2841 cls = PyObject_GetAttr(obj, str_class);
2842 if (cls == NULL) {
2843 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2844 PyErr_Clear();
2845 cls = (PyObject *) Py_TYPE(obj);
2846 Py_INCREF(cls);
2847 }
2848 }
2849 return cls;
2850}
2851
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002852/* We're saving obj, and args is the 2-thru-5 tuple returned by the
2853 * appropriate __reduce__ method for obj.
2854 */
2855static int
2856save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
2857{
2858 PyObject *callable;
2859 PyObject *argtup;
2860 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002861 PyObject *listitems = Py_None;
2862 PyObject *dictitems = Py_None;
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002863 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002864
2865 int use_newobj = self->proto >= 2;
2866
2867 const char reduce_op = REDUCE;
2868 const char build_op = BUILD;
2869 const char newobj_op = NEWOBJ;
2870
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002871 size = PyTuple_Size(args);
2872 if (size < 2 || size > 5) {
2873 PyErr_SetString(PicklingError, "tuple returned by "
2874 "__reduce__ must contain 2 through 5 elements");
2875 return -1;
2876 }
2877
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002878 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
2879 &callable, &argtup, &state, &listitems, &dictitems))
2880 return -1;
2881
2882 if (!PyCallable_Check(callable)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002883 PyErr_SetString(PicklingError, "first item of the tuple "
2884 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002885 return -1;
2886 }
2887 if (!PyTuple_Check(argtup)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002888 PyErr_SetString(PicklingError, "second item of the tuple "
2889 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002890 return -1;
2891 }
2892
2893 if (state == Py_None)
2894 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002895
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002896 if (listitems == Py_None)
2897 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002898 else if (!PyIter_Check(listitems)) {
2899 PyErr_Format(PicklingError, "Fourth element of tuple"
2900 "returned by __reduce__ must be an iterator, not %s",
2901 Py_TYPE(listitems)->tp_name);
2902 return -1;
2903 }
2904
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002905 if (dictitems == Py_None)
2906 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002907 else if (!PyIter_Check(dictitems)) {
2908 PyErr_Format(PicklingError, "Fifth element of tuple"
2909 "returned by __reduce__ must be an iterator, not %s",
2910 Py_TYPE(dictitems)->tp_name);
2911 return -1;
2912 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002913
2914 /* Protocol 2 special case: if callable's name is __newobj__, use
2915 NEWOBJ. */
2916 if (use_newobj) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002917 static PyObject *newobj_str = NULL, *name_str = NULL;
2918 PyObject *name;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002919
2920 if (newobj_str == NULL) {
2921 newobj_str = PyUnicode_InternFromString("__newobj__");
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002922 name_str = PyUnicode_InternFromString("__name__");
2923 if (newobj_str == NULL || name_str == NULL)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002924 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002925 }
2926
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002927 name = PyObject_GetAttr(callable, name_str);
2928 if (name == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002929 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2930 PyErr_Clear();
2931 else
2932 return -1;
2933 use_newobj = 0;
2934 }
2935 else {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002936 use_newobj = PyUnicode_Check(name) &&
2937 PyUnicode_Compare(name, newobj_str) == 0;
2938 Py_DECREF(name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002939 }
2940 }
2941 if (use_newobj) {
2942 PyObject *cls;
2943 PyObject *newargtup;
2944 PyObject *obj_class;
2945 int p;
2946
2947 /* Sanity checks. */
2948 if (Py_SIZE(argtup) < 1) {
2949 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
2950 return -1;
2951 }
2952
2953 cls = PyTuple_GET_ITEM(argtup, 0);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002954 if (!PyType_Check(cls)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002955 PyErr_SetString(PicklingError, "args[0] from "
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002956 "__newobj__ args is not a type");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002957 return -1;
2958 }
2959
2960 if (obj != NULL) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002961 obj_class = get_class(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002962 p = obj_class != cls; /* true iff a problem */
2963 Py_DECREF(obj_class);
2964 if (p) {
2965 PyErr_SetString(PicklingError, "args[0] from "
2966 "__newobj__ args has the wrong class");
2967 return -1;
2968 }
2969 }
2970 /* XXX: These calls save() are prone to infinite recursion. Imagine
2971 what happen if the value returned by the __reduce__() method of
2972 some extension type contains another object of the same type. Ouch!
2973
2974 Here is a quick example, that I ran into, to illustrate what I
2975 mean:
2976
2977 >>> import pickle, copyreg
2978 >>> copyreg.dispatch_table.pop(complex)
2979 >>> pickle.dumps(1+2j)
2980 Traceback (most recent call last):
2981 ...
2982 RuntimeError: maximum recursion depth exceeded
2983
2984 Removing the complex class from copyreg.dispatch_table made the
2985 __reduce_ex__() method emit another complex object:
2986
2987 >>> (1+1j).__reduce_ex__(2)
2988 (<function __newobj__ at 0xb7b71c3c>,
2989 (<class 'complex'>, (1+1j)), None, None, None)
2990
2991 Thus when save() was called on newargstup (the 2nd item) recursion
2992 ensued. Of course, the bug was in the complex class which had a
2993 broken __getnewargs__() that emitted another complex object. But,
2994 the point, here, is it is quite easy to end up with a broken reduce
2995 function. */
2996
2997 /* Save the class and its __new__ arguments. */
2998 if (save(self, cls, 0) < 0)
2999 return -1;
3000
3001 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
3002 if (newargtup == NULL)
3003 return -1;
3004
3005 p = save(self, newargtup, 0);
3006 Py_DECREF(newargtup);
3007 if (p < 0)
3008 return -1;
3009
3010 /* Add NEWOBJ opcode. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003011 if (_Pickler_Write(self, &newobj_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003012 return -1;
3013 }
3014 else { /* Not using NEWOBJ. */
3015 if (save(self, callable, 0) < 0 ||
3016 save(self, argtup, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003017 _Pickler_Write(self, &reduce_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003018 return -1;
3019 }
3020
3021 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
3022 the caller do not want to memoize the object. Not particularly useful,
3023 but that is to mimic the behavior save_reduce() in pickle.py when
3024 obj is None. */
3025 if (obj && memo_put(self, obj) < 0)
3026 return -1;
3027
3028 if (listitems && batch_list(self, listitems) < 0)
3029 return -1;
3030
3031 if (dictitems && batch_dict(self, dictitems) < 0)
3032 return -1;
3033
3034 if (state) {
Victor Stinner121aab42011-09-29 23:40:53 +02003035 if (save(self, state, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003036 _Pickler_Write(self, &build_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003037 return -1;
3038 }
3039
3040 return 0;
3041}
3042
3043static int
3044save(PicklerObject *self, PyObject *obj, int pers_save)
3045{
3046 PyTypeObject *type;
3047 PyObject *reduce_func = NULL;
3048 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003049 int status = 0;
3050
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003051 if (Py_EnterRecursiveCall(" while pickling an object"))
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003052 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003053
3054 /* The extra pers_save argument is necessary to avoid calling save_pers()
3055 on its returned object. */
3056 if (!pers_save && self->pers_func) {
3057 /* save_pers() returns:
3058 -1 to signal an error;
3059 0 if it did nothing successfully;
3060 1 if a persistent id was saved.
3061 */
3062 if ((status = save_pers(self, obj, self->pers_func)) != 0)
3063 goto done;
3064 }
3065
3066 type = Py_TYPE(obj);
3067
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003068 /* The old cPickle had an optimization that used switch-case statement
3069 dispatching on the first letter of the type name. This has was removed
3070 since benchmarks shown that this optimization was actually slowing
3071 things down. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003072
3073 /* Atom types; these aren't memoized, so don't check the memo. */
3074
3075 if (obj == Py_None) {
3076 status = save_none(self, obj);
3077 goto done;
3078 }
3079 else if (obj == Py_False || obj == Py_True) {
3080 status = save_bool(self, obj);
3081 goto done;
3082 }
3083 else if (type == &PyLong_Type) {
3084 status = save_long(self, obj);
3085 goto done;
3086 }
3087 else if (type == &PyFloat_Type) {
3088 status = save_float(self, obj);
3089 goto done;
3090 }
3091
3092 /* Check the memo to see if it has the object. If so, generate
3093 a GET (or BINGET) opcode, instead of pickling the object
3094 once again. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003095 if (PyMemoTable_Get(self->memo, obj)) {
3096 if (memo_get(self, obj) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003097 goto error;
3098 goto done;
3099 }
3100
3101 if (type == &PyBytes_Type) {
3102 status = save_bytes(self, obj);
3103 goto done;
3104 }
3105 else if (type == &PyUnicode_Type) {
3106 status = save_unicode(self, obj);
3107 goto done;
3108 }
3109 else if (type == &PyDict_Type) {
3110 status = save_dict(self, obj);
3111 goto done;
3112 }
3113 else if (type == &PyList_Type) {
3114 status = save_list(self, obj);
3115 goto done;
3116 }
3117 else if (type == &PyTuple_Type) {
3118 status = save_tuple(self, obj);
3119 goto done;
3120 }
3121 else if (type == &PyType_Type) {
3122 status = save_global(self, obj, NULL);
3123 goto done;
3124 }
3125 else if (type == &PyFunction_Type) {
3126 status = save_global(self, obj, NULL);
3127 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
3128 /* fall back to reduce */
3129 PyErr_Clear();
3130 }
3131 else {
3132 goto done;
3133 }
3134 }
3135 else if (type == &PyCFunction_Type) {
3136 status = save_global(self, obj, NULL);
3137 goto done;
3138 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003139
3140 /* XXX: This part needs some unit tests. */
3141
3142 /* Get a reduction callable, and call it. This may come from
3143 * copyreg.dispatch_table, the object's __reduce_ex__ method,
3144 * or the object's __reduce__ method.
3145 */
3146 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
3147 if (reduce_func != NULL) {
3148 /* Here, the reference count of the reduce_func object returned by
3149 PyDict_GetItem needs to be increased to be consistent with the one
3150 returned by PyObject_GetAttr. This is allow us to blindly DECREF
3151 reduce_func at the end of the save() routine.
3152 */
3153 Py_INCREF(reduce_func);
3154 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003155 reduce_value = _Pickler_FastCall(self, reduce_func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003156 }
Antoine Pitrouffd41d92011-10-04 09:23:04 +02003157 else if (PyType_IsSubtype(type, &PyType_Type)) {
3158 status = save_global(self, obj, NULL);
3159 goto done;
3160 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003161 else {
3162 static PyObject *reduce_str = NULL;
3163 static PyObject *reduce_ex_str = NULL;
3164
3165 /* Cache the name of the reduce methods. */
3166 if (reduce_str == NULL) {
3167 reduce_str = PyUnicode_InternFromString("__reduce__");
3168 if (reduce_str == NULL)
3169 goto error;
3170 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
3171 if (reduce_ex_str == NULL)
3172 goto error;
3173 }
3174
3175 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
3176 automatically defined as __reduce__. While this is convenient, this
3177 make it impossible to know which method was actually called. Of
3178 course, this is not a big deal. But still, it would be nice to let
3179 the user know which method was called when something go
3180 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
3181 don't actually have to check for a __reduce__ method. */
3182
3183 /* Check for a __reduce_ex__ method. */
3184 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
3185 if (reduce_func != NULL) {
3186 PyObject *proto;
3187 proto = PyLong_FromLong(self->proto);
3188 if (proto != NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003189 reduce_value = _Pickler_FastCall(self, reduce_func, proto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003190 }
3191 }
3192 else {
3193 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3194 PyErr_Clear();
3195 else
3196 goto error;
3197 /* Check for a __reduce__ method. */
3198 reduce_func = PyObject_GetAttr(obj, reduce_str);
3199 if (reduce_func != NULL) {
3200 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
3201 }
3202 else {
3203 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
3204 type->tp_name, obj);
3205 goto error;
3206 }
3207 }
3208 }
3209
3210 if (reduce_value == NULL)
3211 goto error;
3212
3213 if (PyUnicode_Check(reduce_value)) {
3214 status = save_global(self, obj, reduce_value);
3215 goto done;
3216 }
3217
3218 if (!PyTuple_Check(reduce_value)) {
3219 PyErr_SetString(PicklingError,
3220 "__reduce__ must return a string or tuple");
3221 goto error;
3222 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003223
3224 status = save_reduce(self, reduce_value, obj);
3225
3226 if (0) {
3227 error:
3228 status = -1;
3229 }
3230 done:
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003231 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003232 Py_XDECREF(reduce_func);
3233 Py_XDECREF(reduce_value);
3234
3235 return status;
3236}
3237
3238static int
3239dump(PicklerObject *self, PyObject *obj)
3240{
3241 const char stop_op = STOP;
3242
3243 if (self->proto >= 2) {
3244 char header[2];
3245
3246 header[0] = PROTO;
3247 assert(self->proto >= 0 && self->proto < 256);
3248 header[1] = (unsigned char)self->proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003249 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003250 return -1;
3251 }
3252
3253 if (save(self, obj, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003254 _Pickler_Write(self, &stop_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003255 return -1;
3256
3257 return 0;
3258}
3259
3260PyDoc_STRVAR(Pickler_clear_memo_doc,
3261"clear_memo() -> None. Clears the pickler's \"memo\"."
3262"\n"
3263"The memo is the data structure that remembers which objects the\n"
3264"pickler has already seen, so that shared or recursive objects are\n"
3265"pickled by reference and not by value. This method is useful when\n"
3266"re-using picklers.");
3267
3268static PyObject *
3269Pickler_clear_memo(PicklerObject *self)
3270{
3271 if (self->memo)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003272 PyMemoTable_Clear(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003273
3274 Py_RETURN_NONE;
3275}
3276
3277PyDoc_STRVAR(Pickler_dump_doc,
3278"dump(obj) -> None. Write a pickled representation of obj to the open file.");
3279
3280static PyObject *
3281Pickler_dump(PicklerObject *self, PyObject *args)
3282{
3283 PyObject *obj;
3284
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00003285 /* Check whether the Pickler was initialized correctly (issue3664).
3286 Developers often forget to call __init__() in their subclasses, which
3287 would trigger a segfault without this check. */
3288 if (self->write == NULL) {
Victor Stinner121aab42011-09-29 23:40:53 +02003289 PyErr_Format(PicklingError,
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00003290 "Pickler.__init__() was not called by %s.__init__()",
3291 Py_TYPE(self)->tp_name);
3292 return NULL;
3293 }
3294
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003295 if (!PyArg_ParseTuple(args, "O:dump", &obj))
3296 return NULL;
3297
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003298 if (_Pickler_ClearBuffer(self) < 0)
3299 return NULL;
3300
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003301 if (dump(self, obj) < 0)
3302 return NULL;
3303
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003304 if (_Pickler_FlushToFile(self) < 0)
3305 return NULL;
3306
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003307 Py_RETURN_NONE;
3308}
3309
3310static struct PyMethodDef Pickler_methods[] = {
3311 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
3312 Pickler_dump_doc},
3313 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
3314 Pickler_clear_memo_doc},
3315 {NULL, NULL} /* sentinel */
3316};
3317
3318static void
3319Pickler_dealloc(PicklerObject *self)
3320{
3321 PyObject_GC_UnTrack(self);
3322
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003323 Py_XDECREF(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003324 Py_XDECREF(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003325 Py_XDECREF(self->pers_func);
3326 Py_XDECREF(self->arg);
3327 Py_XDECREF(self->fast_memo);
3328
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003329 PyMemoTable_Del(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003330
3331 Py_TYPE(self)->tp_free((PyObject *)self);
3332}
3333
3334static int
3335Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
3336{
3337 Py_VISIT(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003338 Py_VISIT(self->pers_func);
3339 Py_VISIT(self->arg);
3340 Py_VISIT(self->fast_memo);
3341 return 0;
3342}
3343
3344static int
3345Pickler_clear(PicklerObject *self)
3346{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003347 Py_CLEAR(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003348 Py_CLEAR(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003349 Py_CLEAR(self->pers_func);
3350 Py_CLEAR(self->arg);
3351 Py_CLEAR(self->fast_memo);
3352
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003353 if (self->memo != NULL) {
3354 PyMemoTable *memo = self->memo;
3355 self->memo = NULL;
3356 PyMemoTable_Del(memo);
3357 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003358 return 0;
3359}
3360
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003361
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003362PyDoc_STRVAR(Pickler_doc,
3363"Pickler(file, protocol=None)"
3364"\n"
3365"This takes a binary file for writing a pickle data stream.\n"
3366"\n"
3367"The optional protocol argument tells the pickler to use the\n"
3368"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
3369"protocol is 3; a backward-incompatible protocol designed for\n"
3370"Python 3.0.\n"
3371"\n"
3372"Specifying a negative protocol version selects the highest\n"
3373"protocol version supported. The higher the protocol used, the\n"
3374"more recent the version of Python needed to read the pickle\n"
3375"produced.\n"
3376"\n"
3377"The file argument must have a write() method that accepts a single\n"
3378"bytes argument. It can thus be a file object opened for binary\n"
3379"writing, a io.BytesIO instance, or any other custom object that\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003380"meets this interface.\n"
3381"\n"
3382"If fix_imports is True and protocol is less than 3, pickle will try to\n"
3383"map the new Python 3.x names to the old module names used in Python\n"
3384"2.x, so that the pickle data stream is readable with Python 2.x.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003385
3386static int
3387Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
3388{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003389 static char *kwlist[] = {"file", "protocol", "fix_imports", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003390 PyObject *file;
3391 PyObject *proto_obj = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003392 PyObject *fix_imports = Py_True;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003393
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003394 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:Pickler",
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003395 kwlist, &file, &proto_obj, &fix_imports))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003396 return -1;
3397
3398 /* In case of multiple __init__() calls, clear previous content. */
3399 if (self->write != NULL)
3400 (void)Pickler_clear(self);
3401
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003402 if (_Pickler_SetProtocol(self, proto_obj, fix_imports) < 0)
3403 return -1;
3404
3405 if (_Pickler_SetOutputStream(self, file) < 0)
3406 return -1;
3407
3408 /* memo and output_buffer may have already been created in _Pickler_New */
3409 if (self->memo == NULL) {
3410 self->memo = PyMemoTable_New();
3411 if (self->memo == NULL)
3412 return -1;
3413 }
3414 self->output_len = 0;
3415 if (self->output_buffer == NULL) {
3416 self->max_output_len = WRITE_BUF_SIZE;
3417 self->output_buffer = PyBytes_FromStringAndSize(NULL,
3418 self->max_output_len);
3419 if (self->output_buffer == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003420 return -1;
3421 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003422
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003423 self->arg = NULL;
3424 self->fast = 0;
3425 self->fast_nesting = 0;
3426 self->fast_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003427 self->pers_func = NULL;
3428 if (PyObject_HasAttrString((PyObject *)self, "persistent_id")) {
3429 self->pers_func = PyObject_GetAttrString((PyObject *)self,
3430 "persistent_id");
3431 if (self->pers_func == NULL)
3432 return -1;
3433 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003434 return 0;
3435}
3436
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003437/* Define a proxy object for the Pickler's internal memo object. This is to
3438 * avoid breaking code like:
3439 * pickler.memo.clear()
3440 * and
3441 * pickler.memo = saved_memo
3442 * Is this a good idea? Not really, but we don't want to break code that uses
3443 * it. Note that we don't implement the entire mapping API here. This is
3444 * intentional, as these should be treated as black-box implementation details.
3445 */
3446
3447typedef struct {
3448 PyObject_HEAD
3449 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
3450} PicklerMemoProxyObject;
3451
3452PyDoc_STRVAR(pmp_clear_doc,
3453"memo.clear() -> None. Remove all items from memo.");
3454
3455static PyObject *
3456pmp_clear(PicklerMemoProxyObject *self)
3457{
3458 if (self->pickler->memo)
3459 PyMemoTable_Clear(self->pickler->memo);
3460 Py_RETURN_NONE;
3461}
3462
3463PyDoc_STRVAR(pmp_copy_doc,
3464"memo.copy() -> new_memo. Copy the memo to a new object.");
3465
3466static PyObject *
3467pmp_copy(PicklerMemoProxyObject *self)
3468{
3469 Py_ssize_t i;
3470 PyMemoTable *memo;
3471 PyObject *new_memo = PyDict_New();
3472 if (new_memo == NULL)
3473 return NULL;
3474
3475 memo = self->pickler->memo;
3476 for (i = 0; i < memo->mt_allocated; ++i) {
3477 PyMemoEntry entry = memo->mt_table[i];
3478 if (entry.me_key != NULL) {
3479 int status;
3480 PyObject *key, *value;
3481
3482 key = PyLong_FromVoidPtr(entry.me_key);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003483 value = Py_BuildValue("nO", entry.me_value, entry.me_key);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003484
3485 if (key == NULL || value == NULL) {
3486 Py_XDECREF(key);
3487 Py_XDECREF(value);
3488 goto error;
3489 }
3490 status = PyDict_SetItem(new_memo, key, value);
3491 Py_DECREF(key);
3492 Py_DECREF(value);
3493 if (status < 0)
3494 goto error;
3495 }
3496 }
3497 return new_memo;
3498
3499 error:
3500 Py_XDECREF(new_memo);
3501 return NULL;
3502}
3503
3504PyDoc_STRVAR(pmp_reduce_doc,
3505"memo.__reduce__(). Pickling support.");
3506
3507static PyObject *
3508pmp_reduce(PicklerMemoProxyObject *self, PyObject *args)
3509{
3510 PyObject *reduce_value, *dict_args;
3511 PyObject *contents = pmp_copy(self);
3512 if (contents == NULL)
3513 return NULL;
3514
3515 reduce_value = PyTuple_New(2);
3516 if (reduce_value == NULL) {
3517 Py_DECREF(contents);
3518 return NULL;
3519 }
3520 dict_args = PyTuple_New(1);
3521 if (dict_args == NULL) {
3522 Py_DECREF(contents);
3523 Py_DECREF(reduce_value);
3524 return NULL;
3525 }
3526 PyTuple_SET_ITEM(dict_args, 0, contents);
3527 Py_INCREF((PyObject *)&PyDict_Type);
3528 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
3529 PyTuple_SET_ITEM(reduce_value, 1, dict_args);
3530 return reduce_value;
3531}
3532
3533static PyMethodDef picklerproxy_methods[] = {
3534 {"clear", (PyCFunction)pmp_clear, METH_NOARGS, pmp_clear_doc},
3535 {"copy", (PyCFunction)pmp_copy, METH_NOARGS, pmp_copy_doc},
3536 {"__reduce__", (PyCFunction)pmp_reduce, METH_VARARGS, pmp_reduce_doc},
3537 {NULL, NULL} /* sentinel */
3538};
3539
3540static void
3541PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
3542{
3543 PyObject_GC_UnTrack(self);
3544 Py_XDECREF(self->pickler);
3545 PyObject_GC_Del((PyObject *)self);
3546}
3547
3548static int
3549PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
3550 visitproc visit, void *arg)
3551{
3552 Py_VISIT(self->pickler);
3553 return 0;
3554}
3555
3556static int
3557PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
3558{
3559 Py_CLEAR(self->pickler);
3560 return 0;
3561}
3562
3563static PyTypeObject PicklerMemoProxyType = {
3564 PyVarObject_HEAD_INIT(NULL, 0)
3565 "_pickle.PicklerMemoProxy", /*tp_name*/
3566 sizeof(PicklerMemoProxyObject), /*tp_basicsize*/
3567 0,
3568 (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */
3569 0, /* tp_print */
3570 0, /* tp_getattr */
3571 0, /* tp_setattr */
3572 0, /* tp_compare */
3573 0, /* tp_repr */
3574 0, /* tp_as_number */
3575 0, /* tp_as_sequence */
3576 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00003577 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003578 0, /* tp_call */
3579 0, /* tp_str */
3580 PyObject_GenericGetAttr, /* tp_getattro */
3581 PyObject_GenericSetAttr, /* tp_setattro */
3582 0, /* tp_as_buffer */
3583 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3584 0, /* tp_doc */
3585 (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */
3586 (inquiry)PicklerMemoProxy_clear, /* tp_clear */
3587 0, /* tp_richcompare */
3588 0, /* tp_weaklistoffset */
3589 0, /* tp_iter */
3590 0, /* tp_iternext */
3591 picklerproxy_methods, /* tp_methods */
3592};
3593
3594static PyObject *
3595PicklerMemoProxy_New(PicklerObject *pickler)
3596{
3597 PicklerMemoProxyObject *self;
3598
3599 self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
3600 if (self == NULL)
3601 return NULL;
3602 Py_INCREF(pickler);
3603 self->pickler = pickler;
3604 PyObject_GC_Track(self);
3605 return (PyObject *)self;
3606}
3607
3608/*****************************************************************************/
3609
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003610static PyObject *
3611Pickler_get_memo(PicklerObject *self)
3612{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003613 return PicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003614}
3615
3616static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003617Pickler_set_memo(PicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003618{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003619 PyMemoTable *new_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003620
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003621 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003622 PyErr_SetString(PyExc_TypeError,
3623 "attribute deletion is not supported");
3624 return -1;
3625 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003626
3627 if (Py_TYPE(obj) == &PicklerMemoProxyType) {
3628 PicklerObject *pickler =
3629 ((PicklerMemoProxyObject *)obj)->pickler;
3630
3631 new_memo = PyMemoTable_Copy(pickler->memo);
3632 if (new_memo == NULL)
3633 return -1;
3634 }
3635 else if (PyDict_Check(obj)) {
3636 Py_ssize_t i = 0;
3637 PyObject *key, *value;
3638
3639 new_memo = PyMemoTable_New();
3640 if (new_memo == NULL)
3641 return -1;
3642
3643 while (PyDict_Next(obj, &i, &key, &value)) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003644 Py_ssize_t memo_id;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003645 PyObject *memo_obj;
3646
3647 if (!PyTuple_Check(value) || Py_SIZE(value) != 2) {
3648 PyErr_SetString(PyExc_TypeError,
3649 "'memo' values must be 2-item tuples");
3650 goto error;
3651 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003652 memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003653 if (memo_id == -1 && PyErr_Occurred())
3654 goto error;
3655 memo_obj = PyTuple_GET_ITEM(value, 1);
3656 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
3657 goto error;
3658 }
3659 }
3660 else {
3661 PyErr_Format(PyExc_TypeError,
3662 "'memo' attribute must be an PicklerMemoProxy object"
3663 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003664 return -1;
3665 }
3666
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003667 PyMemoTable_Del(self->memo);
3668 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003669
3670 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003671
3672 error:
3673 if (new_memo)
3674 PyMemoTable_Del(new_memo);
3675 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003676}
3677
3678static PyObject *
3679Pickler_get_persid(PicklerObject *self)
3680{
3681 if (self->pers_func == NULL)
3682 PyErr_SetString(PyExc_AttributeError, "persistent_id");
3683 else
3684 Py_INCREF(self->pers_func);
3685 return self->pers_func;
3686}
3687
3688static int
3689Pickler_set_persid(PicklerObject *self, PyObject *value)
3690{
3691 PyObject *tmp;
3692
3693 if (value == NULL) {
3694 PyErr_SetString(PyExc_TypeError,
3695 "attribute deletion is not supported");
3696 return -1;
3697 }
3698 if (!PyCallable_Check(value)) {
3699 PyErr_SetString(PyExc_TypeError,
3700 "persistent_id must be a callable taking one argument");
3701 return -1;
3702 }
3703
3704 tmp = self->pers_func;
3705 Py_INCREF(value);
3706 self->pers_func = value;
3707 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
3708
3709 return 0;
3710}
3711
3712static PyMemberDef Pickler_members[] = {
3713 {"bin", T_INT, offsetof(PicklerObject, bin)},
3714 {"fast", T_INT, offsetof(PicklerObject, fast)},
3715 {NULL}
3716};
3717
3718static PyGetSetDef Pickler_getsets[] = {
3719 {"memo", (getter)Pickler_get_memo,
3720 (setter)Pickler_set_memo},
3721 {"persistent_id", (getter)Pickler_get_persid,
3722 (setter)Pickler_set_persid},
3723 {NULL}
3724};
3725
3726static PyTypeObject Pickler_Type = {
3727 PyVarObject_HEAD_INIT(NULL, 0)
3728 "_pickle.Pickler" , /*tp_name*/
3729 sizeof(PicklerObject), /*tp_basicsize*/
3730 0, /*tp_itemsize*/
3731 (destructor)Pickler_dealloc, /*tp_dealloc*/
3732 0, /*tp_print*/
3733 0, /*tp_getattr*/
3734 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00003735 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003736 0, /*tp_repr*/
3737 0, /*tp_as_number*/
3738 0, /*tp_as_sequence*/
3739 0, /*tp_as_mapping*/
3740 0, /*tp_hash*/
3741 0, /*tp_call*/
3742 0, /*tp_str*/
3743 0, /*tp_getattro*/
3744 0, /*tp_setattro*/
3745 0, /*tp_as_buffer*/
3746 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3747 Pickler_doc, /*tp_doc*/
3748 (traverseproc)Pickler_traverse, /*tp_traverse*/
3749 (inquiry)Pickler_clear, /*tp_clear*/
3750 0, /*tp_richcompare*/
3751 0, /*tp_weaklistoffset*/
3752 0, /*tp_iter*/
3753 0, /*tp_iternext*/
3754 Pickler_methods, /*tp_methods*/
3755 Pickler_members, /*tp_members*/
3756 Pickler_getsets, /*tp_getset*/
3757 0, /*tp_base*/
3758 0, /*tp_dict*/
3759 0, /*tp_descr_get*/
3760 0, /*tp_descr_set*/
3761 0, /*tp_dictoffset*/
3762 (initproc)Pickler_init, /*tp_init*/
3763 PyType_GenericAlloc, /*tp_alloc*/
3764 PyType_GenericNew, /*tp_new*/
3765 PyObject_GC_Del, /*tp_free*/
3766 0, /*tp_is_gc*/
3767};
3768
Victor Stinner121aab42011-09-29 23:40:53 +02003769/* Temporary helper for calling self.find_class().
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003770
3771 XXX: It would be nice to able to avoid Python function call overhead, by
3772 using directly the C version of find_class(), when find_class() is not
3773 overridden by a subclass. Although, this could become rather hackish. A
3774 simpler optimization would be to call the C function when self is not a
3775 subclass instance. */
3776static PyObject *
3777find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
3778{
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02003779 _Py_identifier(find_class);
3780
3781 return _PyObject_CallMethodId((PyObject *)self, &PyId_find_class, "OO",
3782 module_name, global_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003783}
3784
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003785static Py_ssize_t
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003786marker(UnpicklerObject *self)
3787{
3788 if (self->num_marks < 1) {
3789 PyErr_SetString(UnpicklingError, "could not find MARK");
3790 return -1;
3791 }
3792
3793 return self->marks[--self->num_marks];
3794}
3795
3796static int
3797load_none(UnpicklerObject *self)
3798{
3799 PDATA_APPEND(self->stack, Py_None, -1);
3800 return 0;
3801}
3802
3803static int
3804bad_readline(void)
3805{
3806 PyErr_SetString(UnpicklingError, "pickle data was truncated");
3807 return -1;
3808}
3809
3810static int
3811load_int(UnpicklerObject *self)
3812{
3813 PyObject *value;
3814 char *endptr, *s;
3815 Py_ssize_t len;
3816 long x;
3817
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003818 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003819 return -1;
3820 if (len < 2)
3821 return bad_readline();
3822
3823 errno = 0;
Victor Stinner121aab42011-09-29 23:40:53 +02003824 /* XXX: Should the base argument of strtol() be explicitly set to 10?
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003825 XXX(avassalotti): Should this uses PyOS_strtol()? */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003826 x = strtol(s, &endptr, 0);
3827
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003828 if (errno || (*endptr != '\n' && *endptr != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003829 /* Hm, maybe we've got something long. Let's try reading
3830 * it as a Python long object. */
3831 errno = 0;
3832 /* XXX: Same thing about the base here. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003833 value = PyLong_FromString(s, NULL, 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003834 if (value == NULL) {
3835 PyErr_SetString(PyExc_ValueError,
3836 "could not convert string to int");
3837 return -1;
3838 }
3839 }
3840 else {
3841 if (len == 3 && (x == 0 || x == 1)) {
3842 if ((value = PyBool_FromLong(x)) == NULL)
3843 return -1;
3844 }
3845 else {
3846 if ((value = PyLong_FromLong(x)) == NULL)
3847 return -1;
3848 }
3849 }
3850
3851 PDATA_PUSH(self->stack, value, -1);
3852 return 0;
3853}
3854
3855static int
3856load_bool(UnpicklerObject *self, PyObject *boolean)
3857{
3858 assert(boolean == Py_True || boolean == Py_False);
3859 PDATA_APPEND(self->stack, boolean, -1);
3860 return 0;
3861}
3862
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003863/* s contains x bytes of an unsigned little-endian integer. Return its value
3864 * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
3865 */
3866static Py_ssize_t
3867calc_binsize(char *bytes, int size)
3868{
3869 unsigned char *s = (unsigned char *)bytes;
3870 size_t x = 0;
3871
3872 assert(size == 4);
3873
3874 x = (size_t) s[0];
3875 x |= (size_t) s[1] << 8;
3876 x |= (size_t) s[2] << 16;
3877 x |= (size_t) s[3] << 24;
3878
3879 if (x > PY_SSIZE_T_MAX)
3880 return -1;
3881 else
3882 return (Py_ssize_t) x;
3883}
3884
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003885/* s contains x bytes of a little-endian integer. Return its value as a
3886 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
3887 * int, but when x is 4 it's a signed one. This is an historical source
3888 * of x-platform bugs.
3889 */
3890static long
3891calc_binint(char *bytes, int size)
3892{
3893 unsigned char *s = (unsigned char *)bytes;
3894 int i = size;
3895 long x = 0;
3896
3897 for (i = 0; i < size; i++) {
3898 x |= (long)s[i] << (i * 8);
3899 }
3900
3901 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
3902 * is signed, so on a box with longs bigger than 4 bytes we need
3903 * to extend a BININT's sign bit to the full width.
3904 */
3905 if (SIZEOF_LONG > 4 && size == 4) {
3906 x |= -(x & (1L << 31));
3907 }
3908
3909 return x;
3910}
3911
3912static int
3913load_binintx(UnpicklerObject *self, char *s, int size)
3914{
3915 PyObject *value;
3916 long x;
3917
3918 x = calc_binint(s, size);
3919
3920 if ((value = PyLong_FromLong(x)) == NULL)
3921 return -1;
3922
3923 PDATA_PUSH(self->stack, value, -1);
3924 return 0;
3925}
3926
3927static int
3928load_binint(UnpicklerObject *self)
3929{
3930 char *s;
3931
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003932 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003933 return -1;
3934
3935 return load_binintx(self, s, 4);
3936}
3937
3938static int
3939load_binint1(UnpicklerObject *self)
3940{
3941 char *s;
3942
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003943 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003944 return -1;
3945
3946 return load_binintx(self, s, 1);
3947}
3948
3949static int
3950load_binint2(UnpicklerObject *self)
3951{
3952 char *s;
3953
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003954 if (_Unpickler_Read(self, &s, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003955 return -1;
3956
3957 return load_binintx(self, s, 2);
3958}
3959
3960static int
3961load_long(UnpicklerObject *self)
3962{
3963 PyObject *value;
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00003964 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003965 Py_ssize_t len;
3966
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003967 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003968 return -1;
3969 if (len < 2)
3970 return bad_readline();
3971
Mark Dickinson8dd05142009-01-20 20:43:58 +00003972 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
3973 the 'L' before calling PyLong_FromString. In order to maintain
3974 compatibility with Python 3.0.0, we don't actually *require*
3975 the 'L' to be present. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003976 if (s[len-2] == 'L')
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00003977 s[len-2] = '\0';
Alexandre Vassalottie4bccb72009-01-24 01:47:57 +00003978 /* XXX: Should the base argument explicitly set to 10? */
3979 value = PyLong_FromString(s, NULL, 0);
Mark Dickinson8dd05142009-01-20 20:43:58 +00003980 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003981 return -1;
3982
3983 PDATA_PUSH(self->stack, value, -1);
3984 return 0;
3985}
3986
3987/* 'size' bytes contain the # of bytes of little-endian 256's-complement
3988 * data following.
3989 */
3990static int
3991load_counted_long(UnpicklerObject *self, int size)
3992{
3993 PyObject *value;
3994 char *nbytes;
3995 char *pdata;
3996
3997 assert(size == 1 || size == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003998 if (_Unpickler_Read(self, &nbytes, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003999 return -1;
4000
4001 size = calc_binint(nbytes, size);
4002 if (size < 0) {
4003 /* Corrupt or hostile pickle -- we never write one like this */
4004 PyErr_SetString(UnpicklingError,
4005 "LONG pickle has negative byte count");
4006 return -1;
4007 }
4008
4009 if (size == 0)
4010 value = PyLong_FromLong(0L);
4011 else {
4012 /* Read the raw little-endian bytes and convert. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004013 if (_Unpickler_Read(self, &pdata, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004014 return -1;
4015 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
4016 1 /* little endian */ , 1 /* signed */ );
4017 }
4018 if (value == NULL)
4019 return -1;
4020 PDATA_PUSH(self->stack, value, -1);
4021 return 0;
4022}
4023
4024static int
4025load_float(UnpicklerObject *self)
4026{
4027 PyObject *value;
4028 char *endptr, *s;
4029 Py_ssize_t len;
4030 double d;
4031
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004032 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004033 return -1;
4034 if (len < 2)
4035 return bad_readline();
4036
4037 errno = 0;
Mark Dickinson725bfd82009-05-03 20:33:40 +00004038 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
4039 if (d == -1.0 && PyErr_Occurred())
4040 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004041 if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004042 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
4043 return -1;
4044 }
Mark Dickinson725bfd82009-05-03 20:33:40 +00004045 value = PyFloat_FromDouble(d);
4046 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004047 return -1;
4048
4049 PDATA_PUSH(self->stack, value, -1);
4050 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004051}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004052
4053static int
4054load_binfloat(UnpicklerObject *self)
4055{
4056 PyObject *value;
4057 double x;
4058 char *s;
4059
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004060 if (_Unpickler_Read(self, &s, 8) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004061 return -1;
4062
4063 x = _PyFloat_Unpack8((unsigned char *)s, 0);
4064 if (x == -1.0 && PyErr_Occurred())
4065 return -1;
4066
4067 if ((value = PyFloat_FromDouble(x)) == NULL)
4068 return -1;
4069
4070 PDATA_PUSH(self->stack, value, -1);
4071 return 0;
4072}
4073
4074static int
4075load_string(UnpicklerObject *self)
4076{
4077 PyObject *bytes;
4078 PyObject *str = NULL;
4079 Py_ssize_t len;
4080 char *s, *p;
4081
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004082 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004083 return -1;
4084 if (len < 3)
4085 return bad_readline();
4086 if ((s = strdup(s)) == NULL) {
4087 PyErr_NoMemory();
4088 return -1;
4089 }
4090
4091 /* Strip outermost quotes */
4092 while (s[len - 1] <= ' ')
4093 len--;
4094 if (s[0] == '"' && s[len - 1] == '"') {
4095 s[len - 1] = '\0';
4096 p = s + 1;
4097 len -= 2;
4098 }
4099 else if (s[0] == '\'' && s[len - 1] == '\'') {
4100 s[len - 1] = '\0';
4101 p = s + 1;
4102 len -= 2;
4103 }
4104 else {
4105 free(s);
4106 PyErr_SetString(PyExc_ValueError, "insecure string pickle");
4107 return -1;
4108 }
4109
4110 /* Use the PyBytes API to decode the string, since that is what is used
4111 to encode, and then coerce the result to Unicode. */
4112 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
4113 free(s);
4114 if (bytes == NULL)
4115 return -1;
4116 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
4117 Py_DECREF(bytes);
4118 if (str == NULL)
4119 return -1;
4120
4121 PDATA_PUSH(self->stack, str, -1);
4122 return 0;
4123}
4124
4125static int
4126load_binbytes(UnpicklerObject *self)
4127{
4128 PyObject *bytes;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004129 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004130 char *s;
4131
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004132 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004133 return -1;
4134
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004135 x = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004136 if (x < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004137 PyErr_Format(PyExc_OverflowError,
4138 "BINBYTES exceeds system's maximum size of %zd bytes",
4139 PY_SSIZE_T_MAX
4140 );
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004141 return -1;
4142 }
4143
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004144 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004145 return -1;
4146 bytes = PyBytes_FromStringAndSize(s, x);
4147 if (bytes == NULL)
4148 return -1;
4149
4150 PDATA_PUSH(self->stack, bytes, -1);
4151 return 0;
4152}
4153
4154static int
4155load_short_binbytes(UnpicklerObject *self)
4156{
4157 PyObject *bytes;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004158 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004159 char *s;
4160
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004161 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004162 return -1;
4163
4164 x = (unsigned char)s[0];
4165
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004166 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004167 return -1;
4168
4169 bytes = PyBytes_FromStringAndSize(s, x);
4170 if (bytes == NULL)
4171 return -1;
4172
4173 PDATA_PUSH(self->stack, bytes, -1);
4174 return 0;
4175}
4176
4177static int
4178load_binstring(UnpicklerObject *self)
4179{
4180 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004181 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004182 char *s;
4183
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004184 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004185 return -1;
4186
4187 x = calc_binint(s, 4);
4188 if (x < 0) {
Victor Stinner121aab42011-09-29 23:40:53 +02004189 PyErr_SetString(UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004190 "BINSTRING pickle has negative byte count");
4191 return -1;
4192 }
4193
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004194 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004195 return -1;
4196
4197 /* Convert Python 2.x strings to unicode. */
4198 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4199 if (str == NULL)
4200 return -1;
4201
4202 PDATA_PUSH(self->stack, str, -1);
4203 return 0;
4204}
4205
4206static int
4207load_short_binstring(UnpicklerObject *self)
4208{
4209 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004210 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004211 char *s;
4212
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004213 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004214 return -1;
4215
4216 x = (unsigned char)s[0];
4217
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004218 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004219 return -1;
4220
4221 /* Convert Python 2.x strings to unicode. */
4222 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4223 if (str == NULL)
4224 return -1;
4225
4226 PDATA_PUSH(self->stack, str, -1);
4227 return 0;
4228}
4229
4230static int
4231load_unicode(UnpicklerObject *self)
4232{
4233 PyObject *str;
4234 Py_ssize_t len;
4235 char *s;
4236
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004237 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004238 return -1;
4239 if (len < 1)
4240 return bad_readline();
4241
4242 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
4243 if (str == NULL)
4244 return -1;
4245
4246 PDATA_PUSH(self->stack, str, -1);
4247 return 0;
4248}
4249
4250static int
4251load_binunicode(UnpicklerObject *self)
4252{
4253 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004254 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004255 char *s;
4256
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004257 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004258 return -1;
4259
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004260 size = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004261 if (size < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004262 PyErr_Format(PyExc_OverflowError,
4263 "BINUNICODE exceeds system's maximum size of %zd bytes",
4264 PY_SSIZE_T_MAX
4265 );
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004266 return -1;
4267 }
4268
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004269
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004270 if (_Unpickler_Read(self, &s, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004271 return -1;
4272
Victor Stinner485fb562010-04-13 11:07:24 +00004273 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004274 if (str == NULL)
4275 return -1;
4276
4277 PDATA_PUSH(self->stack, str, -1);
4278 return 0;
4279}
4280
4281static int
4282load_tuple(UnpicklerObject *self)
4283{
4284 PyObject *tuple;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004285 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004286
4287 if ((i = marker(self)) < 0)
4288 return -1;
4289
4290 tuple = Pdata_poptuple(self->stack, i);
4291 if (tuple == NULL)
4292 return -1;
4293 PDATA_PUSH(self->stack, tuple, -1);
4294 return 0;
4295}
4296
4297static int
4298load_counted_tuple(UnpicklerObject *self, int len)
4299{
4300 PyObject *tuple;
4301
4302 tuple = PyTuple_New(len);
4303 if (tuple == NULL)
4304 return -1;
4305
4306 while (--len >= 0) {
4307 PyObject *item;
4308
4309 PDATA_POP(self->stack, item);
4310 if (item == NULL)
4311 return -1;
4312 PyTuple_SET_ITEM(tuple, len, item);
4313 }
4314 PDATA_PUSH(self->stack, tuple, -1);
4315 return 0;
4316}
4317
4318static int
4319load_empty_list(UnpicklerObject *self)
4320{
4321 PyObject *list;
4322
4323 if ((list = PyList_New(0)) == NULL)
4324 return -1;
4325 PDATA_PUSH(self->stack, list, -1);
4326 return 0;
4327}
4328
4329static int
4330load_empty_dict(UnpicklerObject *self)
4331{
4332 PyObject *dict;
4333
4334 if ((dict = PyDict_New()) == NULL)
4335 return -1;
4336 PDATA_PUSH(self->stack, dict, -1);
4337 return 0;
4338}
4339
4340static int
4341load_list(UnpicklerObject *self)
4342{
4343 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004344 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004345
4346 if ((i = marker(self)) < 0)
4347 return -1;
4348
4349 list = Pdata_poplist(self->stack, i);
4350 if (list == NULL)
4351 return -1;
4352 PDATA_PUSH(self->stack, list, -1);
4353 return 0;
4354}
4355
4356static int
4357load_dict(UnpicklerObject *self)
4358{
4359 PyObject *dict, *key, *value;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004360 Py_ssize_t i, j, k;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004361
4362 if ((i = marker(self)) < 0)
4363 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004364 j = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004365
4366 if ((dict = PyDict_New()) == NULL)
4367 return -1;
4368
4369 for (k = i + 1; k < j; k += 2) {
4370 key = self->stack->data[k - 1];
4371 value = self->stack->data[k];
4372 if (PyDict_SetItem(dict, key, value) < 0) {
4373 Py_DECREF(dict);
4374 return -1;
4375 }
4376 }
4377 Pdata_clear(self->stack, i);
4378 PDATA_PUSH(self->stack, dict, -1);
4379 return 0;
4380}
4381
4382static PyObject *
4383instantiate(PyObject *cls, PyObject *args)
4384{
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004385 PyObject *result = NULL;
4386 /* Caller must assure args are a tuple. Normally, args come from
4387 Pdata_poptuple which packs objects from the top of the stack
4388 into a newly created tuple. */
4389 assert(PyTuple_Check(args));
4390 if (Py_SIZE(args) > 0 || !PyType_Check(cls) ||
4391 PyObject_HasAttrString(cls, "__getinitargs__")) {
4392 result = PyObject_CallObject(cls, args);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004393 }
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004394 else {
Martin v. Löwisafe55bb2011-10-09 10:38:36 +02004395 _Py_identifier(__new__);
4396
4397 result = _PyObject_CallMethodId(cls, &PyId___new__, "O", cls);
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004398 }
4399 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004400}
4401
4402static int
4403load_obj(UnpicklerObject *self)
4404{
4405 PyObject *cls, *args, *obj = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004406 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004407
4408 if ((i = marker(self)) < 0)
4409 return -1;
4410
4411 args = Pdata_poptuple(self->stack, i + 1);
4412 if (args == NULL)
4413 return -1;
4414
4415 PDATA_POP(self->stack, cls);
4416 if (cls) {
4417 obj = instantiate(cls, args);
4418 Py_DECREF(cls);
4419 }
4420 Py_DECREF(args);
4421 if (obj == NULL)
4422 return -1;
4423
4424 PDATA_PUSH(self->stack, obj, -1);
4425 return 0;
4426}
4427
4428static int
4429load_inst(UnpicklerObject *self)
4430{
4431 PyObject *cls = NULL;
4432 PyObject *args = NULL;
4433 PyObject *obj = NULL;
4434 PyObject *module_name;
4435 PyObject *class_name;
4436 Py_ssize_t len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004437 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004438 char *s;
4439
4440 if ((i = marker(self)) < 0)
4441 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004442 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004443 return -1;
4444 if (len < 2)
4445 return bad_readline();
4446
4447 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
4448 identifiers are permitted in Python 3.0, since the INST opcode is only
4449 supported by older protocols on Python 2.x. */
4450 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
4451 if (module_name == NULL)
4452 return -1;
4453
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004454 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004455 if (len < 2)
4456 return bad_readline();
4457 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004458 if (class_name != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004459 cls = find_class(self, module_name, class_name);
4460 Py_DECREF(class_name);
4461 }
4462 }
4463 Py_DECREF(module_name);
4464
4465 if (cls == NULL)
4466 return -1;
4467
4468 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
4469 obj = instantiate(cls, args);
4470 Py_DECREF(args);
4471 }
4472 Py_DECREF(cls);
4473
4474 if (obj == NULL)
4475 return -1;
4476
4477 PDATA_PUSH(self->stack, obj, -1);
4478 return 0;
4479}
4480
4481static int
4482load_newobj(UnpicklerObject *self)
4483{
4484 PyObject *args = NULL;
4485 PyObject *clsraw = NULL;
4486 PyTypeObject *cls; /* clsraw cast to its true type */
4487 PyObject *obj;
4488
4489 /* Stack is ... cls argtuple, and we want to call
4490 * cls.__new__(cls, *argtuple).
4491 */
4492 PDATA_POP(self->stack, args);
4493 if (args == NULL)
4494 goto error;
4495 if (!PyTuple_Check(args)) {
4496 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
4497 goto error;
4498 }
4499
4500 PDATA_POP(self->stack, clsraw);
4501 cls = (PyTypeObject *)clsraw;
4502 if (cls == NULL)
4503 goto error;
4504 if (!PyType_Check(cls)) {
4505 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4506 "isn't a type object");
4507 goto error;
4508 }
4509 if (cls->tp_new == NULL) {
4510 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4511 "has NULL tp_new");
4512 goto error;
4513 }
4514
4515 /* Call __new__. */
4516 obj = cls->tp_new(cls, args, NULL);
4517 if (obj == NULL)
4518 goto error;
4519
4520 Py_DECREF(args);
4521 Py_DECREF(clsraw);
4522 PDATA_PUSH(self->stack, obj, -1);
4523 return 0;
4524
4525 error:
4526 Py_XDECREF(args);
4527 Py_XDECREF(clsraw);
4528 return -1;
4529}
4530
4531static int
4532load_global(UnpicklerObject *self)
4533{
4534 PyObject *global = NULL;
4535 PyObject *module_name;
4536 PyObject *global_name;
4537 Py_ssize_t len;
4538 char *s;
4539
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004540 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004541 return -1;
4542 if (len < 2)
4543 return bad_readline();
4544 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4545 if (!module_name)
4546 return -1;
4547
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004548 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004549 if (len < 2) {
4550 Py_DECREF(module_name);
4551 return bad_readline();
4552 }
4553 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4554 if (global_name) {
4555 global = find_class(self, module_name, global_name);
4556 Py_DECREF(global_name);
4557 }
4558 }
4559 Py_DECREF(module_name);
4560
4561 if (global == NULL)
4562 return -1;
4563 PDATA_PUSH(self->stack, global, -1);
4564 return 0;
4565}
4566
4567static int
4568load_persid(UnpicklerObject *self)
4569{
4570 PyObject *pid;
4571 Py_ssize_t len;
4572 char *s;
4573
4574 if (self->pers_func) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004575 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004576 return -1;
4577 if (len < 2)
4578 return bad_readline();
4579
4580 pid = PyBytes_FromStringAndSize(s, len - 1);
4581 if (pid == NULL)
4582 return -1;
4583
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004584 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004585 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004586 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004587 if (pid == NULL)
4588 return -1;
4589
4590 PDATA_PUSH(self->stack, pid, -1);
4591 return 0;
4592 }
4593 else {
4594 PyErr_SetString(UnpicklingError,
4595 "A load persistent id instruction was encountered,\n"
4596 "but no persistent_load function was specified.");
4597 return -1;
4598 }
4599}
4600
4601static int
4602load_binpersid(UnpicklerObject *self)
4603{
4604 PyObject *pid;
4605
4606 if (self->pers_func) {
4607 PDATA_POP(self->stack, pid);
4608 if (pid == NULL)
4609 return -1;
4610
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004611 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004612 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004613 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004614 if (pid == NULL)
4615 return -1;
4616
4617 PDATA_PUSH(self->stack, pid, -1);
4618 return 0;
4619 }
4620 else {
4621 PyErr_SetString(UnpicklingError,
4622 "A load persistent id instruction was encountered,\n"
4623 "but no persistent_load function was specified.");
4624 return -1;
4625 }
4626}
4627
4628static int
4629load_pop(UnpicklerObject *self)
4630{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004631 Py_ssize_t len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004632
4633 /* Note that we split the (pickle.py) stack into two stacks,
4634 * an object stack and a mark stack. We have to be clever and
4635 * pop the right one. We do this by looking at the top of the
Collin Winter8ca69de2009-05-26 16:53:41 +00004636 * mark stack first, and only signalling a stack underflow if
4637 * the object stack is empty and the mark stack doesn't match
4638 * our expectations.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004639 */
Collin Winter8ca69de2009-05-26 16:53:41 +00004640 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004641 self->num_marks--;
Antoine Pitrou01a15ea2010-01-07 17:57:31 +00004642 } else if (len > 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004643 len--;
4644 Py_DECREF(self->stack->data[len]);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004645 Py_SIZE(self->stack) = len;
Collin Winter8ca69de2009-05-26 16:53:41 +00004646 } else {
4647 return stack_underflow();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004648 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004649 return 0;
4650}
4651
4652static int
4653load_pop_mark(UnpicklerObject *self)
4654{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004655 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004656
4657 if ((i = marker(self)) < 0)
4658 return -1;
4659
4660 Pdata_clear(self->stack, i);
4661
4662 return 0;
4663}
4664
4665static int
4666load_dup(UnpicklerObject *self)
4667{
4668 PyObject *last;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004669 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004670
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004671 if ((len = Py_SIZE(self->stack)) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004672 return stack_underflow();
4673 last = self->stack->data[len - 1];
4674 PDATA_APPEND(self->stack, last, -1);
4675 return 0;
4676}
4677
4678static int
4679load_get(UnpicklerObject *self)
4680{
4681 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004682 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004683 Py_ssize_t len;
4684 char *s;
4685
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004686 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004687 return -1;
4688 if (len < 2)
4689 return bad_readline();
4690
4691 key = PyLong_FromString(s, NULL, 10);
4692 if (key == NULL)
4693 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004694 idx = PyLong_AsSsize_t(key);
4695 if (idx == -1 && PyErr_Occurred()) {
4696 Py_DECREF(key);
4697 return -1;
4698 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004699
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004700 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004701 if (value == NULL) {
4702 if (!PyErr_Occurred())
4703 PyErr_SetObject(PyExc_KeyError, key);
4704 Py_DECREF(key);
4705 return -1;
4706 }
4707 Py_DECREF(key);
4708
4709 PDATA_APPEND(self->stack, value, -1);
4710 return 0;
4711}
4712
4713static int
4714load_binget(UnpicklerObject *self)
4715{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004716 PyObject *value;
4717 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004718 char *s;
4719
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004720 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004721 return -1;
4722
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004723 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004724
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004725 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004726 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004727 PyObject *key = PyLong_FromSsize_t(idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004728 if (!PyErr_Occurred())
4729 PyErr_SetObject(PyExc_KeyError, key);
4730 Py_DECREF(key);
4731 return -1;
4732 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004733
4734 PDATA_APPEND(self->stack, value, -1);
4735 return 0;
4736}
4737
4738static int
4739load_long_binget(UnpicklerObject *self)
4740{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004741 PyObject *value;
4742 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004743 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004744
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004745 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004746 return -1;
4747
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004748 idx = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004749
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004750 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004751 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004752 PyObject *key = PyLong_FromSsize_t(idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004753 if (!PyErr_Occurred())
4754 PyErr_SetObject(PyExc_KeyError, key);
4755 Py_DECREF(key);
4756 return -1;
4757 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004758
4759 PDATA_APPEND(self->stack, value, -1);
4760 return 0;
4761}
4762
4763/* Push an object from the extension registry (EXT[124]). nbytes is
4764 * the number of bytes following the opcode, holding the index (code) value.
4765 */
4766static int
4767load_extension(UnpicklerObject *self, int nbytes)
4768{
4769 char *codebytes; /* the nbytes bytes after the opcode */
4770 long code; /* calc_binint returns long */
4771 PyObject *py_code; /* code as a Python int */
4772 PyObject *obj; /* the object to push */
4773 PyObject *pair; /* (module_name, class_name) */
4774 PyObject *module_name, *class_name;
4775
4776 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004777 if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004778 return -1;
4779 code = calc_binint(codebytes, nbytes);
4780 if (code <= 0) { /* note that 0 is forbidden */
4781 /* Corrupt or hostile pickle. */
4782 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
4783 return -1;
4784 }
4785
4786 /* Look for the code in the cache. */
4787 py_code = PyLong_FromLong(code);
4788 if (py_code == NULL)
4789 return -1;
4790 obj = PyDict_GetItem(extension_cache, py_code);
4791 if (obj != NULL) {
4792 /* Bingo. */
4793 Py_DECREF(py_code);
4794 PDATA_APPEND(self->stack, obj, -1);
4795 return 0;
4796 }
4797
4798 /* Look up the (module_name, class_name) pair. */
4799 pair = PyDict_GetItem(inverted_registry, py_code);
4800 if (pair == NULL) {
4801 Py_DECREF(py_code);
4802 PyErr_Format(PyExc_ValueError, "unregistered extension "
4803 "code %ld", code);
4804 return -1;
4805 }
4806 /* Since the extension registry is manipulable via Python code,
4807 * confirm that pair is really a 2-tuple of strings.
4808 */
4809 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
4810 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
4811 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
4812 Py_DECREF(py_code);
4813 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
4814 "isn't a 2-tuple of strings", code);
4815 return -1;
4816 }
4817 /* Load the object. */
4818 obj = find_class(self, module_name, class_name);
4819 if (obj == NULL) {
4820 Py_DECREF(py_code);
4821 return -1;
4822 }
4823 /* Cache code -> obj. */
4824 code = PyDict_SetItem(extension_cache, py_code, obj);
4825 Py_DECREF(py_code);
4826 if (code < 0) {
4827 Py_DECREF(obj);
4828 return -1;
4829 }
4830 PDATA_PUSH(self->stack, obj, -1);
4831 return 0;
4832}
4833
4834static int
4835load_put(UnpicklerObject *self)
4836{
4837 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004838 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004839 Py_ssize_t len;
4840 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004841
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004842 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004843 return -1;
4844 if (len < 2)
4845 return bad_readline();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004846 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004847 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004848 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004849
4850 key = PyLong_FromString(s, NULL, 10);
4851 if (key == NULL)
4852 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004853 idx = PyLong_AsSsize_t(key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004854 Py_DECREF(key);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004855 if (idx < 0) {
4856 if (!PyErr_Occurred())
4857 PyErr_SetString(PyExc_ValueError,
4858 "negative PUT argument");
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004859 return -1;
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004860 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004861
4862 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004863}
4864
4865static int
4866load_binput(UnpicklerObject *self)
4867{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004868 PyObject *value;
4869 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004870 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004871
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004872 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004873 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004874
4875 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004876 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004877 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004878
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004879 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004880
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004881 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004882}
4883
4884static int
4885load_long_binput(UnpicklerObject *self)
4886{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004887 PyObject *value;
4888 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004889 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004890
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004891 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004892 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004893
4894 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004895 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004896 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004897
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004898 idx = calc_binsize(s, 4);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004899 if (idx < 0) {
4900 PyErr_SetString(PyExc_ValueError,
4901 "negative LONG_BINPUT argument");
4902 return -1;
4903 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004904
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004905 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004906}
4907
4908static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004909do_append(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004910{
4911 PyObject *value;
4912 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004913 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004914
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004915 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004916 if (x > len || x <= 0)
4917 return stack_underflow();
4918 if (len == x) /* nothing to do */
4919 return 0;
4920
4921 list = self->stack->data[x - 1];
4922
4923 if (PyList_Check(list)) {
4924 PyObject *slice;
4925 Py_ssize_t list_len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004926 int ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004927
4928 slice = Pdata_poplist(self->stack, x);
4929 if (!slice)
4930 return -1;
4931 list_len = PyList_GET_SIZE(list);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004932 ret = PyList_SetSlice(list, list_len, list_len, slice);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004933 Py_DECREF(slice);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004934 return ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004935 }
4936 else {
4937 PyObject *append_func;
4938
4939 append_func = PyObject_GetAttrString(list, "append");
4940 if (append_func == NULL)
4941 return -1;
4942 for (i = x; i < len; i++) {
4943 PyObject *result;
4944
4945 value = self->stack->data[i];
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004946 result = _Unpickler_FastCall(self, append_func, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004947 if (result == NULL) {
4948 Pdata_clear(self->stack, i + 1);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004949 Py_SIZE(self->stack) = x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004950 return -1;
4951 }
4952 Py_DECREF(result);
4953 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004954 Py_SIZE(self->stack) = x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004955 }
4956
4957 return 0;
4958}
4959
4960static int
4961load_append(UnpicklerObject *self)
4962{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004963 return do_append(self, Py_SIZE(self->stack) - 1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004964}
4965
4966static int
4967load_appends(UnpicklerObject *self)
4968{
4969 return do_append(self, marker(self));
4970}
4971
4972static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004973do_setitems(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004974{
4975 PyObject *value, *key;
4976 PyObject *dict;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004977 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004978 int status = 0;
4979
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004980 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004981 if (x > len || x <= 0)
4982 return stack_underflow();
4983 if (len == x) /* nothing to do */
4984 return 0;
Victor Stinner121aab42011-09-29 23:40:53 +02004985 if ((len - x) % 2 != 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004986 /* Currupt or hostile pickle -- we never write one like this. */
4987 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
4988 return -1;
4989 }
4990
4991 /* Here, dict does not actually need to be a PyDict; it could be anything
4992 that supports the __setitem__ attribute. */
4993 dict = self->stack->data[x - 1];
4994
4995 for (i = x + 1; i < len; i += 2) {
4996 key = self->stack->data[i - 1];
4997 value = self->stack->data[i];
4998 if (PyObject_SetItem(dict, key, value) < 0) {
4999 status = -1;
5000 break;
5001 }
5002 }
5003
5004 Pdata_clear(self->stack, x);
5005 return status;
5006}
5007
5008static int
5009load_setitem(UnpicklerObject *self)
5010{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005011 return do_setitems(self, Py_SIZE(self->stack) - 2);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005012}
5013
5014static int
5015load_setitems(UnpicklerObject *self)
5016{
5017 return do_setitems(self, marker(self));
5018}
5019
5020static int
5021load_build(UnpicklerObject *self)
5022{
5023 PyObject *state, *inst, *slotstate;
5024 PyObject *setstate;
5025 int status = 0;
5026
5027 /* Stack is ... instance, state. We want to leave instance at
5028 * the stack top, possibly mutated via instance.__setstate__(state).
5029 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005030 if (Py_SIZE(self->stack) < 2)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005031 return stack_underflow();
5032
5033 PDATA_POP(self->stack, state);
5034 if (state == NULL)
5035 return -1;
5036
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005037 inst = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005038
5039 setstate = PyObject_GetAttrString(inst, "__setstate__");
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005040 if (setstate == NULL) {
5041 if (PyErr_ExceptionMatches(PyExc_AttributeError))
5042 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00005043 else {
5044 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005045 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00005046 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005047 }
5048 else {
5049 PyObject *result;
5050
5051 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005052 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Antoine Pitroud79dc622008-09-05 00:03:33 +00005053 reference to state first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005054 result = _Unpickler_FastCall(self, setstate, state);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005055 Py_DECREF(setstate);
5056 if (result == NULL)
5057 return -1;
5058 Py_DECREF(result);
5059 return 0;
5060 }
5061
5062 /* A default __setstate__. First see whether state embeds a
5063 * slot state dict too (a proto 2 addition).
5064 */
5065 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
5066 PyObject *tmp = state;
5067
5068 state = PyTuple_GET_ITEM(tmp, 0);
5069 slotstate = PyTuple_GET_ITEM(tmp, 1);
5070 Py_INCREF(state);
5071 Py_INCREF(slotstate);
5072 Py_DECREF(tmp);
5073 }
5074 else
5075 slotstate = NULL;
5076
5077 /* Set inst.__dict__ from the state dict (if any). */
5078 if (state != Py_None) {
5079 PyObject *dict;
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005080 PyObject *d_key, *d_value;
5081 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005082
5083 if (!PyDict_Check(state)) {
5084 PyErr_SetString(UnpicklingError, "state is not a dictionary");
5085 goto error;
5086 }
5087 dict = PyObject_GetAttrString(inst, "__dict__");
5088 if (dict == NULL)
5089 goto error;
5090
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005091 i = 0;
5092 while (PyDict_Next(state, &i, &d_key, &d_value)) {
5093 /* normally the keys for instance attributes are
5094 interned. we should try to do that here. */
5095 Py_INCREF(d_key);
5096 if (PyUnicode_CheckExact(d_key))
5097 PyUnicode_InternInPlace(&d_key);
5098 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
5099 Py_DECREF(d_key);
5100 goto error;
5101 }
5102 Py_DECREF(d_key);
5103 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005104 Py_DECREF(dict);
5105 }
5106
5107 /* Also set instance attributes from the slotstate dict (if any). */
5108 if (slotstate != NULL) {
5109 PyObject *d_key, *d_value;
5110 Py_ssize_t i;
5111
5112 if (!PyDict_Check(slotstate)) {
5113 PyErr_SetString(UnpicklingError,
5114 "slot state is not a dictionary");
5115 goto error;
5116 }
5117 i = 0;
5118 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
5119 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
5120 goto error;
5121 }
5122 }
5123
5124 if (0) {
5125 error:
5126 status = -1;
5127 }
5128
5129 Py_DECREF(state);
5130 Py_XDECREF(slotstate);
5131 return status;
5132}
5133
5134static int
5135load_mark(UnpicklerObject *self)
5136{
5137
5138 /* Note that we split the (pickle.py) stack into two stacks, an
5139 * object stack and a mark stack. Here we push a mark onto the
5140 * mark stack.
5141 */
5142
5143 if ((self->num_marks + 1) >= self->marks_size) {
5144 size_t alloc;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005145 Py_ssize_t *marks;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005146
5147 /* Use the size_t type to check for overflow. */
5148 alloc = ((size_t)self->num_marks << 1) + 20;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005149 if (alloc > (PY_SSIZE_T_MAX / sizeof(Py_ssize_t)) ||
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00005150 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005151 PyErr_NoMemory();
5152 return -1;
5153 }
5154
5155 if (self->marks == NULL)
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005156 marks = (Py_ssize_t *) PyMem_Malloc(alloc * sizeof(Py_ssize_t));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005157 else
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005158 marks = (Py_ssize_t *) PyMem_Realloc(self->marks,
5159 alloc * sizeof(Py_ssize_t));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005160 if (marks == NULL) {
5161 PyErr_NoMemory();
5162 return -1;
5163 }
5164 self->marks = marks;
5165 self->marks_size = (Py_ssize_t)alloc;
5166 }
5167
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005168 self->marks[self->num_marks++] = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005169
5170 return 0;
5171}
5172
5173static int
5174load_reduce(UnpicklerObject *self)
5175{
5176 PyObject *callable = NULL;
5177 PyObject *argtup = NULL;
5178 PyObject *obj = NULL;
5179
5180 PDATA_POP(self->stack, argtup);
5181 if (argtup == NULL)
5182 return -1;
5183 PDATA_POP(self->stack, callable);
5184 if (callable) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005185 obj = PyObject_CallObject(callable, argtup);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005186 Py_DECREF(callable);
5187 }
5188 Py_DECREF(argtup);
5189
5190 if (obj == NULL)
5191 return -1;
5192
5193 PDATA_PUSH(self->stack, obj, -1);
5194 return 0;
5195}
5196
5197/* Just raises an error if we don't know the protocol specified. PROTO
5198 * is the first opcode for protocols >= 2.
5199 */
5200static int
5201load_proto(UnpicklerObject *self)
5202{
5203 char *s;
5204 int i;
5205
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005206 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005207 return -1;
5208
5209 i = (unsigned char)s[0];
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005210 if (i <= HIGHEST_PROTOCOL) {
5211 self->proto = i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005212 return 0;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005213 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005214
5215 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
5216 return -1;
5217}
5218
5219static PyObject *
5220load(UnpicklerObject *self)
5221{
5222 PyObject *err;
5223 PyObject *value = NULL;
5224 char *s;
5225
5226 self->num_marks = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005227 if (Py_SIZE(self->stack))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005228 Pdata_clear(self->stack, 0);
5229
5230 /* Convenient macros for the dispatch while-switch loop just below. */
5231#define OP(opcode, load_func) \
5232 case opcode: if (load_func(self) < 0) break; continue;
5233
5234#define OP_ARG(opcode, load_func, arg) \
5235 case opcode: if (load_func(self, (arg)) < 0) break; continue;
5236
5237 while (1) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005238 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005239 break;
5240
5241 switch ((enum opcode)s[0]) {
5242 OP(NONE, load_none)
5243 OP(BININT, load_binint)
5244 OP(BININT1, load_binint1)
5245 OP(BININT2, load_binint2)
5246 OP(INT, load_int)
5247 OP(LONG, load_long)
5248 OP_ARG(LONG1, load_counted_long, 1)
5249 OP_ARG(LONG4, load_counted_long, 4)
5250 OP(FLOAT, load_float)
5251 OP(BINFLOAT, load_binfloat)
5252 OP(BINBYTES, load_binbytes)
5253 OP(SHORT_BINBYTES, load_short_binbytes)
5254 OP(BINSTRING, load_binstring)
5255 OP(SHORT_BINSTRING, load_short_binstring)
5256 OP(STRING, load_string)
5257 OP(UNICODE, load_unicode)
5258 OP(BINUNICODE, load_binunicode)
5259 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
5260 OP_ARG(TUPLE1, load_counted_tuple, 1)
5261 OP_ARG(TUPLE2, load_counted_tuple, 2)
5262 OP_ARG(TUPLE3, load_counted_tuple, 3)
5263 OP(TUPLE, load_tuple)
5264 OP(EMPTY_LIST, load_empty_list)
5265 OP(LIST, load_list)
5266 OP(EMPTY_DICT, load_empty_dict)
5267 OP(DICT, load_dict)
5268 OP(OBJ, load_obj)
5269 OP(INST, load_inst)
5270 OP(NEWOBJ, load_newobj)
5271 OP(GLOBAL, load_global)
5272 OP(APPEND, load_append)
5273 OP(APPENDS, load_appends)
5274 OP(BUILD, load_build)
5275 OP(DUP, load_dup)
5276 OP(BINGET, load_binget)
5277 OP(LONG_BINGET, load_long_binget)
5278 OP(GET, load_get)
5279 OP(MARK, load_mark)
5280 OP(BINPUT, load_binput)
5281 OP(LONG_BINPUT, load_long_binput)
5282 OP(PUT, load_put)
5283 OP(POP, load_pop)
5284 OP(POP_MARK, load_pop_mark)
5285 OP(SETITEM, load_setitem)
5286 OP(SETITEMS, load_setitems)
5287 OP(PERSID, load_persid)
5288 OP(BINPERSID, load_binpersid)
5289 OP(REDUCE, load_reduce)
5290 OP(PROTO, load_proto)
5291 OP_ARG(EXT1, load_extension, 1)
5292 OP_ARG(EXT2, load_extension, 2)
5293 OP_ARG(EXT4, load_extension, 4)
5294 OP_ARG(NEWTRUE, load_bool, Py_True)
5295 OP_ARG(NEWFALSE, load_bool, Py_False)
5296
5297 case STOP:
5298 break;
5299
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005300 default:
Benjamin Petersonadde86d2011-09-23 13:41:41 -04005301 if (s[0] == '\0')
5302 PyErr_SetNone(PyExc_EOFError);
5303 else
5304 PyErr_Format(UnpicklingError,
5305 "invalid load key, '%c'.", s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005306 return NULL;
5307 }
5308
5309 break; /* and we are done! */
5310 }
5311
Antoine Pitrou04248a82010-10-12 20:51:21 +00005312 if (_Unpickler_SkipConsumed(self) < 0)
5313 return NULL;
5314
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005315 /* XXX: It is not clear what this is actually for. */
5316 if ((err = PyErr_Occurred())) {
5317 if (err == PyExc_EOFError) {
5318 PyErr_SetNone(PyExc_EOFError);
5319 }
5320 return NULL;
5321 }
5322
5323 PDATA_POP(self->stack, value);
5324 return value;
5325}
5326
5327PyDoc_STRVAR(Unpickler_load_doc,
5328"load() -> object. Load a pickle."
5329"\n"
5330"Read a pickled object representation from the open file object given in\n"
5331"the constructor, and return the reconstituted object hierarchy specified\n"
5332"therein.\n");
5333
5334static PyObject *
5335Unpickler_load(UnpicklerObject *self)
5336{
5337 /* Check whether the Unpickler was initialized correctly. This prevents
5338 segfaulting if a subclass overridden __init__ with a function that does
5339 not call Unpickler.__init__(). Here, we simply ensure that self->read
5340 is not NULL. */
5341 if (self->read == NULL) {
Victor Stinner121aab42011-09-29 23:40:53 +02005342 PyErr_Format(UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005343 "Unpickler.__init__() was not called by %s.__init__()",
5344 Py_TYPE(self)->tp_name);
5345 return NULL;
5346 }
5347
5348 return load(self);
5349}
5350
5351/* The name of find_class() is misleading. In newer pickle protocols, this
5352 function is used for loading any global (i.e., functions), not just
5353 classes. The name is kept only for backward compatibility. */
5354
5355PyDoc_STRVAR(Unpickler_find_class_doc,
5356"find_class(module_name, global_name) -> object.\n"
5357"\n"
5358"Return an object from a specified module, importing the module if\n"
5359"necessary. Subclasses may override this method (e.g. to restrict\n"
5360"unpickling of arbitrary classes and functions).\n"
5361"\n"
5362"This method is called whenever a class or a function object is\n"
5363"needed. Both arguments passed are str objects.\n");
5364
5365static PyObject *
5366Unpickler_find_class(UnpicklerObject *self, PyObject *args)
5367{
5368 PyObject *global;
5369 PyObject *modules_dict;
5370 PyObject *module;
5371 PyObject *module_name, *global_name;
5372
5373 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
5374 &module_name, &global_name))
5375 return NULL;
5376
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005377 /* Try to map the old names used in Python 2.x to the new ones used in
5378 Python 3.x. We do this only with old pickle protocols and when the
5379 user has not disabled the feature. */
5380 if (self->proto < 3 && self->fix_imports) {
5381 PyObject *key;
5382 PyObject *item;
5383
5384 /* Check if the global (i.e., a function or a class) was renamed
5385 or moved to another module. */
5386 key = PyTuple_Pack(2, module_name, global_name);
5387 if (key == NULL)
5388 return NULL;
5389 item = PyDict_GetItemWithError(name_mapping_2to3, key);
5390 Py_DECREF(key);
5391 if (item) {
5392 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
5393 PyErr_Format(PyExc_RuntimeError,
5394 "_compat_pickle.NAME_MAPPING values should be "
5395 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
5396 return NULL;
5397 }
5398 module_name = PyTuple_GET_ITEM(item, 0);
5399 global_name = PyTuple_GET_ITEM(item, 1);
5400 if (!PyUnicode_Check(module_name) ||
5401 !PyUnicode_Check(global_name)) {
5402 PyErr_Format(PyExc_RuntimeError,
5403 "_compat_pickle.NAME_MAPPING values should be "
5404 "pairs of str, not (%.200s, %.200s)",
5405 Py_TYPE(module_name)->tp_name,
5406 Py_TYPE(global_name)->tp_name);
5407 return NULL;
5408 }
5409 }
5410 else if (PyErr_Occurred()) {
5411 return NULL;
5412 }
5413
5414 /* Check if the module was renamed. */
5415 item = PyDict_GetItemWithError(import_mapping_2to3, module_name);
5416 if (item) {
5417 if (!PyUnicode_Check(item)) {
5418 PyErr_Format(PyExc_RuntimeError,
5419 "_compat_pickle.IMPORT_MAPPING values should be "
5420 "strings, not %.200s", Py_TYPE(item)->tp_name);
5421 return NULL;
5422 }
5423 module_name = item;
5424 }
5425 else if (PyErr_Occurred()) {
5426 return NULL;
5427 }
5428 }
5429
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005430 modules_dict = PySys_GetObject("modules");
5431 if (modules_dict == NULL)
5432 return NULL;
5433
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005434 module = PyDict_GetItemWithError(modules_dict, module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005435 if (module == NULL) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005436 if (PyErr_Occurred())
5437 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005438 module = PyImport_Import(module_name);
5439 if (module == NULL)
5440 return NULL;
5441 global = PyObject_GetAttr(module, global_name);
5442 Py_DECREF(module);
5443 }
Victor Stinner121aab42011-09-29 23:40:53 +02005444 else {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005445 global = PyObject_GetAttr(module, global_name);
5446 }
5447 return global;
5448}
5449
5450static struct PyMethodDef Unpickler_methods[] = {
5451 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
5452 Unpickler_load_doc},
5453 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
5454 Unpickler_find_class_doc},
5455 {NULL, NULL} /* sentinel */
5456};
5457
5458static void
5459Unpickler_dealloc(UnpicklerObject *self)
5460{
5461 PyObject_GC_UnTrack((PyObject *)self);
5462 Py_XDECREF(self->readline);
5463 Py_XDECREF(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005464 Py_XDECREF(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005465 Py_XDECREF(self->stack);
5466 Py_XDECREF(self->pers_func);
5467 Py_XDECREF(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005468 if (self->buffer.buf != NULL) {
5469 PyBuffer_Release(&self->buffer);
5470 self->buffer.buf = NULL;
5471 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005472
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005473 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005474 PyMem_Free(self->marks);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005475 PyMem_Free(self->input_line);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005476 free(self->encoding);
5477 free(self->errors);
5478
5479 Py_TYPE(self)->tp_free((PyObject *)self);
5480}
5481
5482static int
5483Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
5484{
5485 Py_VISIT(self->readline);
5486 Py_VISIT(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005487 Py_VISIT(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005488 Py_VISIT(self->stack);
5489 Py_VISIT(self->pers_func);
5490 Py_VISIT(self->arg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005491 return 0;
5492}
5493
5494static int
5495Unpickler_clear(UnpicklerObject *self)
5496{
5497 Py_CLEAR(self->readline);
5498 Py_CLEAR(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005499 Py_CLEAR(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005500 Py_CLEAR(self->stack);
5501 Py_CLEAR(self->pers_func);
5502 Py_CLEAR(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005503 if (self->buffer.buf != NULL) {
5504 PyBuffer_Release(&self->buffer);
5505 self->buffer.buf = NULL;
5506 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005507
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005508 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005509 PyMem_Free(self->marks);
5510 self->marks = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005511 PyMem_Free(self->input_line);
5512 self->input_line = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005513 free(self->encoding);
5514 self->encoding = NULL;
5515 free(self->errors);
5516 self->errors = NULL;
5517
5518 return 0;
5519}
5520
5521PyDoc_STRVAR(Unpickler_doc,
5522"Unpickler(file, *, encoding='ASCII', errors='strict')"
5523"\n"
5524"This takes a binary file for reading a pickle data stream.\n"
5525"\n"
5526"The protocol version of the pickle is detected automatically, so no\n"
5527"proto argument is needed.\n"
5528"\n"
5529"The file-like object must have two methods, a read() method\n"
5530"that takes an integer argument, and a readline() method that\n"
5531"requires no arguments. Both methods should return bytes.\n"
5532"Thus file-like object can be a binary file object opened for\n"
5533"reading, a BytesIO object, or any other custom object that\n"
5534"meets this interface.\n"
5535"\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005536"Optional keyword arguments are *fix_imports*, *encoding* and *errors*,\n"
5537"which are used to control compatiblity support for pickle stream\n"
5538"generated by Python 2.x. If *fix_imports* is True, pickle will try to\n"
5539"map the old Python 2.x names to the new names used in Python 3.x. The\n"
5540"*encoding* and *errors* tell pickle how to decode 8-bit string\n"
5541"instances pickled by Python 2.x; these default to 'ASCII' and\n"
5542"'strict', respectively.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005543
5544static int
5545Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
5546{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005547 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005548 PyObject *file;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005549 PyObject *fix_imports = Py_True;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005550 char *encoding = NULL;
5551 char *errors = NULL;
5552
5553 /* XXX: That is an horrible error message. But, I don't know how to do
5554 better... */
5555 if (Py_SIZE(args) != 1) {
5556 PyErr_Format(PyExc_TypeError,
5557 "%s takes exactly one positional argument (%zd given)",
5558 Py_TYPE(self)->tp_name, Py_SIZE(args));
5559 return -1;
5560 }
5561
5562 /* Arguments parsing needs to be done in the __init__() method to allow
5563 subclasses to define their own __init__() method, which may (or may
5564 not) support Unpickler arguments. However, this means we need to be
5565 extra careful in the other Unpickler methods, since a subclass could
5566 forget to call Unpickler.__init__() thus breaking our internal
5567 invariants. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005568 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:Unpickler", kwlist,
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005569 &file, &fix_imports, &encoding, &errors))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005570 return -1;
5571
5572 /* In case of multiple __init__() calls, clear previous content. */
5573 if (self->read != NULL)
5574 (void)Unpickler_clear(self);
5575
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005576 if (_Unpickler_SetInputStream(self, file) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005577 return -1;
5578
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005579 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005580 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005581
5582 self->fix_imports = PyObject_IsTrue(fix_imports);
5583 if (self->fix_imports == -1)
5584 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005585
5586 if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) {
5587 self->pers_func = PyObject_GetAttrString((PyObject *)self,
5588 "persistent_load");
5589 if (self->pers_func == NULL)
5590 return -1;
5591 }
5592 else {
5593 self->pers_func = NULL;
5594 }
5595
5596 self->stack = (Pdata *)Pdata_New();
5597 if (self->stack == NULL)
5598 return -1;
5599
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005600 self->memo_size = 32;
5601 self->memo = _Unpickler_NewMemo(self->memo_size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005602 if (self->memo == NULL)
5603 return -1;
5604
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005605 self->arg = NULL;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005606 self->proto = 0;
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005607
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005608 return 0;
5609}
5610
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005611/* Define a proxy object for the Unpickler's internal memo object. This is to
5612 * avoid breaking code like:
5613 * unpickler.memo.clear()
5614 * and
5615 * unpickler.memo = saved_memo
5616 * Is this a good idea? Not really, but we don't want to break code that uses
5617 * it. Note that we don't implement the entire mapping API here. This is
5618 * intentional, as these should be treated as black-box implementation details.
5619 *
5620 * We do, however, have to implement pickling/unpickling support because of
Victor Stinner121aab42011-09-29 23:40:53 +02005621 * real-world code like cvs2svn.
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005622 */
5623
5624typedef struct {
5625 PyObject_HEAD
5626 UnpicklerObject *unpickler;
5627} UnpicklerMemoProxyObject;
5628
5629PyDoc_STRVAR(ump_clear_doc,
5630"memo.clear() -> None. Remove all items from memo.");
5631
5632static PyObject *
5633ump_clear(UnpicklerMemoProxyObject *self)
5634{
5635 _Unpickler_MemoCleanup(self->unpickler);
5636 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
5637 if (self->unpickler->memo == NULL)
5638 return NULL;
5639 Py_RETURN_NONE;
5640}
5641
5642PyDoc_STRVAR(ump_copy_doc,
5643"memo.copy() -> new_memo. Copy the memo to a new object.");
5644
5645static PyObject *
5646ump_copy(UnpicklerMemoProxyObject *self)
5647{
5648 Py_ssize_t i;
5649 PyObject *new_memo = PyDict_New();
5650 if (new_memo == NULL)
5651 return NULL;
5652
5653 for (i = 0; i < self->unpickler->memo_size; i++) {
5654 int status;
5655 PyObject *key, *value;
5656
5657 value = self->unpickler->memo[i];
5658 if (value == NULL)
5659 continue;
5660
5661 key = PyLong_FromSsize_t(i);
5662 if (key == NULL)
5663 goto error;
5664 status = PyDict_SetItem(new_memo, key, value);
5665 Py_DECREF(key);
5666 if (status < 0)
5667 goto error;
5668 }
5669 return new_memo;
5670
5671error:
5672 Py_DECREF(new_memo);
5673 return NULL;
5674}
5675
5676PyDoc_STRVAR(ump_reduce_doc,
5677"memo.__reduce__(). Pickling support.");
5678
5679static PyObject *
5680ump_reduce(UnpicklerMemoProxyObject *self, PyObject *args)
5681{
5682 PyObject *reduce_value;
5683 PyObject *constructor_args;
5684 PyObject *contents = ump_copy(self);
5685 if (contents == NULL)
5686 return NULL;
5687
5688 reduce_value = PyTuple_New(2);
5689 if (reduce_value == NULL) {
5690 Py_DECREF(contents);
5691 return NULL;
5692 }
5693 constructor_args = PyTuple_New(1);
5694 if (constructor_args == NULL) {
5695 Py_DECREF(contents);
5696 Py_DECREF(reduce_value);
5697 return NULL;
5698 }
5699 PyTuple_SET_ITEM(constructor_args, 0, contents);
5700 Py_INCREF((PyObject *)&PyDict_Type);
5701 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
5702 PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
5703 return reduce_value;
5704}
5705
5706static PyMethodDef unpicklerproxy_methods[] = {
5707 {"clear", (PyCFunction)ump_clear, METH_NOARGS, ump_clear_doc},
5708 {"copy", (PyCFunction)ump_copy, METH_NOARGS, ump_copy_doc},
5709 {"__reduce__", (PyCFunction)ump_reduce, METH_VARARGS, ump_reduce_doc},
5710 {NULL, NULL} /* sentinel */
5711};
5712
5713static void
5714UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
5715{
5716 PyObject_GC_UnTrack(self);
5717 Py_XDECREF(self->unpickler);
5718 PyObject_GC_Del((PyObject *)self);
5719}
5720
5721static int
5722UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
5723 visitproc visit, void *arg)
5724{
5725 Py_VISIT(self->unpickler);
5726 return 0;
5727}
5728
5729static int
5730UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
5731{
5732 Py_CLEAR(self->unpickler);
5733 return 0;
5734}
5735
5736static PyTypeObject UnpicklerMemoProxyType = {
5737 PyVarObject_HEAD_INIT(NULL, 0)
5738 "_pickle.UnpicklerMemoProxy", /*tp_name*/
5739 sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/
5740 0,
5741 (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */
5742 0, /* tp_print */
5743 0, /* tp_getattr */
5744 0, /* tp_setattr */
5745 0, /* tp_compare */
5746 0, /* tp_repr */
5747 0, /* tp_as_number */
5748 0, /* tp_as_sequence */
5749 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00005750 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005751 0, /* tp_call */
5752 0, /* tp_str */
5753 PyObject_GenericGetAttr, /* tp_getattro */
5754 PyObject_GenericSetAttr, /* tp_setattro */
5755 0, /* tp_as_buffer */
5756 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5757 0, /* tp_doc */
5758 (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */
5759 (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */
5760 0, /* tp_richcompare */
5761 0, /* tp_weaklistoffset */
5762 0, /* tp_iter */
5763 0, /* tp_iternext */
5764 unpicklerproxy_methods, /* tp_methods */
5765};
5766
5767static PyObject *
5768UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
5769{
5770 UnpicklerMemoProxyObject *self;
5771
5772 self = PyObject_GC_New(UnpicklerMemoProxyObject,
5773 &UnpicklerMemoProxyType);
5774 if (self == NULL)
5775 return NULL;
5776 Py_INCREF(unpickler);
5777 self->unpickler = unpickler;
5778 PyObject_GC_Track(self);
5779 return (PyObject *)self;
5780}
5781
5782/*****************************************************************************/
5783
5784
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005785static PyObject *
5786Unpickler_get_memo(UnpicklerObject *self)
5787{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005788 return UnpicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005789}
5790
5791static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005792Unpickler_set_memo(UnpicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005793{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005794 PyObject **new_memo;
5795 Py_ssize_t new_memo_size = 0;
5796 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005797
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005798 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005799 PyErr_SetString(PyExc_TypeError,
5800 "attribute deletion is not supported");
5801 return -1;
5802 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005803
5804 if (Py_TYPE(obj) == &UnpicklerMemoProxyType) {
5805 UnpicklerObject *unpickler =
5806 ((UnpicklerMemoProxyObject *)obj)->unpickler;
5807
5808 new_memo_size = unpickler->memo_size;
5809 new_memo = _Unpickler_NewMemo(new_memo_size);
5810 if (new_memo == NULL)
5811 return -1;
5812
5813 for (i = 0; i < new_memo_size; i++) {
5814 Py_XINCREF(unpickler->memo[i]);
5815 new_memo[i] = unpickler->memo[i];
5816 }
5817 }
5818 else if (PyDict_Check(obj)) {
5819 Py_ssize_t i = 0;
5820 PyObject *key, *value;
5821
5822 new_memo_size = PyDict_Size(obj);
5823 new_memo = _Unpickler_NewMemo(new_memo_size);
5824 if (new_memo == NULL)
5825 return -1;
5826
5827 while (PyDict_Next(obj, &i, &key, &value)) {
5828 Py_ssize_t idx;
5829 if (!PyLong_Check(key)) {
5830 PyErr_SetString(PyExc_TypeError,
5831 "memo key must be integers");
5832 goto error;
5833 }
5834 idx = PyLong_AsSsize_t(key);
5835 if (idx == -1 && PyErr_Occurred())
5836 goto error;
5837 if (_Unpickler_MemoPut(self, idx, value) < 0)
5838 goto error;
5839 }
5840 }
5841 else {
5842 PyErr_Format(PyExc_TypeError,
5843 "'memo' attribute must be an UnpicklerMemoProxy object"
5844 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005845 return -1;
5846 }
5847
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005848 _Unpickler_MemoCleanup(self);
5849 self->memo_size = new_memo_size;
5850 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005851
5852 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005853
5854 error:
5855 if (new_memo_size) {
5856 i = new_memo_size;
5857 while (--i >= 0) {
5858 Py_XDECREF(new_memo[i]);
5859 }
5860 PyMem_FREE(new_memo);
5861 }
5862 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005863}
5864
5865static PyObject *
5866Unpickler_get_persload(UnpicklerObject *self)
5867{
5868 if (self->pers_func == NULL)
5869 PyErr_SetString(PyExc_AttributeError, "persistent_load");
5870 else
5871 Py_INCREF(self->pers_func);
5872 return self->pers_func;
5873}
5874
5875static int
5876Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
5877{
5878 PyObject *tmp;
5879
5880 if (value == NULL) {
5881 PyErr_SetString(PyExc_TypeError,
5882 "attribute deletion is not supported");
5883 return -1;
5884 }
5885 if (!PyCallable_Check(value)) {
5886 PyErr_SetString(PyExc_TypeError,
5887 "persistent_load must be a callable taking "
5888 "one argument");
5889 return -1;
5890 }
5891
5892 tmp = self->pers_func;
5893 Py_INCREF(value);
5894 self->pers_func = value;
5895 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
5896
5897 return 0;
5898}
5899
5900static PyGetSetDef Unpickler_getsets[] = {
5901 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
5902 {"persistent_load", (getter)Unpickler_get_persload,
5903 (setter)Unpickler_set_persload},
5904 {NULL}
5905};
5906
5907static PyTypeObject Unpickler_Type = {
5908 PyVarObject_HEAD_INIT(NULL, 0)
5909 "_pickle.Unpickler", /*tp_name*/
5910 sizeof(UnpicklerObject), /*tp_basicsize*/
5911 0, /*tp_itemsize*/
5912 (destructor)Unpickler_dealloc, /*tp_dealloc*/
5913 0, /*tp_print*/
5914 0, /*tp_getattr*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005915 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00005916 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005917 0, /*tp_repr*/
5918 0, /*tp_as_number*/
5919 0, /*tp_as_sequence*/
5920 0, /*tp_as_mapping*/
5921 0, /*tp_hash*/
5922 0, /*tp_call*/
5923 0, /*tp_str*/
5924 0, /*tp_getattro*/
5925 0, /*tp_setattro*/
5926 0, /*tp_as_buffer*/
5927 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5928 Unpickler_doc, /*tp_doc*/
5929 (traverseproc)Unpickler_traverse, /*tp_traverse*/
5930 (inquiry)Unpickler_clear, /*tp_clear*/
5931 0, /*tp_richcompare*/
5932 0, /*tp_weaklistoffset*/
5933 0, /*tp_iter*/
5934 0, /*tp_iternext*/
5935 Unpickler_methods, /*tp_methods*/
5936 0, /*tp_members*/
5937 Unpickler_getsets, /*tp_getset*/
5938 0, /*tp_base*/
5939 0, /*tp_dict*/
5940 0, /*tp_descr_get*/
5941 0, /*tp_descr_set*/
5942 0, /*tp_dictoffset*/
5943 (initproc)Unpickler_init, /*tp_init*/
5944 PyType_GenericAlloc, /*tp_alloc*/
5945 PyType_GenericNew, /*tp_new*/
5946 PyObject_GC_Del, /*tp_free*/
5947 0, /*tp_is_gc*/
5948};
5949
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005950PyDoc_STRVAR(pickle_dump_doc,
5951"dump(obj, file, protocol=None, *, fix_imports=True) -> None\n"
5952"\n"
5953"Write a pickled representation of obj to the open file object file. This\n"
5954"is equivalent to ``Pickler(file, protocol).dump(obj)``, but may be more\n"
5955"efficient.\n"
5956"\n"
5957"The optional protocol argument tells the pickler to use the given protocol;\n"
5958"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
5959"backward-incompatible protocol designed for Python 3.0.\n"
5960"\n"
5961"Specifying a negative protocol version selects the highest protocol version\n"
5962"supported. The higher the protocol used, the more recent the version of\n"
5963"Python needed to read the pickle produced.\n"
5964"\n"
5965"The file argument must have a write() method that accepts a single bytes\n"
5966"argument. It can thus be a file object opened for binary writing, a\n"
5967"io.BytesIO instance, or any other custom object that meets this interface.\n"
5968"\n"
5969"If fix_imports is True and protocol is less than 3, pickle will try to\n"
5970"map the new Python 3.x names to the old module names used in Python 2.x,\n"
5971"so that the pickle data stream is readable with Python 2.x.\n");
5972
5973static PyObject *
5974pickle_dump(PyObject *self, PyObject *args, PyObject *kwds)
5975{
5976 static char *kwlist[] = {"obj", "file", "protocol", "fix_imports", 0};
5977 PyObject *obj;
5978 PyObject *file;
5979 PyObject *proto = NULL;
5980 PyObject *fix_imports = Py_True;
5981 PicklerObject *pickler;
5982
5983 /* fix_imports is a keyword-only argument. */
5984 if (Py_SIZE(args) > 3) {
5985 PyErr_Format(PyExc_TypeError,
5986 "pickle.dump() takes at most 3 positional "
5987 "argument (%zd given)", Py_SIZE(args));
5988 return NULL;
5989 }
5990
5991 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|OO:dump", kwlist,
5992 &obj, &file, &proto, &fix_imports))
5993 return NULL;
5994
5995 pickler = _Pickler_New();
5996 if (pickler == NULL)
5997 return NULL;
5998
5999 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
6000 goto error;
6001
6002 if (_Pickler_SetOutputStream(pickler, file) < 0)
6003 goto error;
6004
6005 if (dump(pickler, obj) < 0)
6006 goto error;
6007
6008 if (_Pickler_FlushToFile(pickler) < 0)
6009 goto error;
6010
6011 Py_DECREF(pickler);
6012 Py_RETURN_NONE;
6013
6014 error:
6015 Py_XDECREF(pickler);
6016 return NULL;
6017}
6018
6019PyDoc_STRVAR(pickle_dumps_doc,
6020"dumps(obj, protocol=None, *, fix_imports=True) -> bytes\n"
6021"\n"
6022"Return the pickled representation of the object as a bytes\n"
6023"object, instead of writing it to a file.\n"
6024"\n"
6025"The optional protocol argument tells the pickler to use the given protocol;\n"
6026"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
6027"backward-incompatible protocol designed for Python 3.0.\n"
6028"\n"
6029"Specifying a negative protocol version selects the highest protocol version\n"
6030"supported. The higher the protocol used, the more recent the version of\n"
6031"Python needed to read the pickle produced.\n"
6032"\n"
6033"If fix_imports is True and *protocol* is less than 3, pickle will try to\n"
6034"map the new Python 3.x names to the old module names used in Python 2.x,\n"
6035"so that the pickle data stream is readable with Python 2.x.\n");
6036
6037static PyObject *
6038pickle_dumps(PyObject *self, PyObject *args, PyObject *kwds)
6039{
6040 static char *kwlist[] = {"obj", "protocol", "fix_imports", 0};
6041 PyObject *obj;
6042 PyObject *proto = NULL;
6043 PyObject *result;
6044 PyObject *fix_imports = Py_True;
6045 PicklerObject *pickler;
6046
6047 /* fix_imports is a keyword-only argument. */
6048 if (Py_SIZE(args) > 2) {
6049 PyErr_Format(PyExc_TypeError,
6050 "pickle.dumps() takes at most 2 positional "
6051 "argument (%zd given)", Py_SIZE(args));
6052 return NULL;
6053 }
6054
6055 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:dumps", kwlist,
6056 &obj, &proto, &fix_imports))
6057 return NULL;
6058
6059 pickler = _Pickler_New();
6060 if (pickler == NULL)
6061 return NULL;
6062
6063 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
6064 goto error;
6065
6066 if (dump(pickler, obj) < 0)
6067 goto error;
6068
6069 result = _Pickler_GetString(pickler);
6070 Py_DECREF(pickler);
6071 return result;
6072
6073 error:
6074 Py_XDECREF(pickler);
6075 return NULL;
6076}
6077
6078PyDoc_STRVAR(pickle_load_doc,
6079"load(file, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6080"\n"
6081"Read a pickled object representation from the open file object file and\n"
6082"return the reconstituted object hierarchy specified therein. This is\n"
6083"equivalent to ``Unpickler(file).load()``, but may be more efficient.\n"
6084"\n"
6085"The protocol version of the pickle is detected automatically, so no protocol\n"
6086"argument is needed. Bytes past the pickled object's representation are\n"
6087"ignored.\n"
6088"\n"
6089"The argument file must have two methods, a read() method that takes an\n"
6090"integer argument, and a readline() method that requires no arguments. Both\n"
6091"methods should return bytes. Thus *file* can be a binary file object opened\n"
6092"for reading, a BytesIO object, or any other custom object that meets this\n"
6093"interface.\n"
6094"\n"
6095"Optional keyword arguments are fix_imports, encoding and errors,\n"
6096"which are used to control compatiblity support for pickle stream generated\n"
6097"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6098"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6099"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6100"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6101
6102static PyObject *
6103pickle_load(PyObject *self, PyObject *args, PyObject *kwds)
6104{
6105 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
6106 PyObject *file;
6107 PyObject *fix_imports = Py_True;
6108 PyObject *result;
6109 char *encoding = NULL;
6110 char *errors = NULL;
6111 UnpicklerObject *unpickler;
6112
6113 /* fix_imports, encoding and errors are a keyword-only argument. */
6114 if (Py_SIZE(args) != 1) {
6115 PyErr_Format(PyExc_TypeError,
6116 "pickle.load() takes exactly one positional "
6117 "argument (%zd given)", Py_SIZE(args));
6118 return NULL;
6119 }
6120
6121 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:load", kwlist,
6122 &file, &fix_imports, &encoding, &errors))
6123 return NULL;
6124
6125 unpickler = _Unpickler_New();
6126 if (unpickler == NULL)
6127 return NULL;
6128
6129 if (_Unpickler_SetInputStream(unpickler, file) < 0)
6130 goto error;
6131
6132 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6133 goto error;
6134
6135 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6136 if (unpickler->fix_imports == -1)
6137 goto error;
6138
6139 result = load(unpickler);
6140 Py_DECREF(unpickler);
6141 return result;
6142
6143 error:
6144 Py_XDECREF(unpickler);
6145 return NULL;
6146}
6147
6148PyDoc_STRVAR(pickle_loads_doc,
6149"loads(input, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6150"\n"
6151"Read a pickled object hierarchy from a bytes object and return the\n"
6152"reconstituted object hierarchy specified therein\n"
6153"\n"
6154"The protocol version of the pickle is detected automatically, so no protocol\n"
6155"argument is needed. Bytes past the pickled object's representation are\n"
6156"ignored.\n"
6157"\n"
6158"Optional keyword arguments are fix_imports, encoding and errors, which\n"
6159"are used to control compatiblity support for pickle stream generated\n"
6160"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6161"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6162"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6163"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6164
6165static PyObject *
6166pickle_loads(PyObject *self, PyObject *args, PyObject *kwds)
6167{
6168 static char *kwlist[] = {"input", "fix_imports", "encoding", "errors", 0};
6169 PyObject *input;
6170 PyObject *fix_imports = Py_True;
6171 PyObject *result;
6172 char *encoding = NULL;
6173 char *errors = NULL;
6174 UnpicklerObject *unpickler;
6175
6176 /* fix_imports, encoding and errors are a keyword-only argument. */
6177 if (Py_SIZE(args) != 1) {
6178 PyErr_Format(PyExc_TypeError,
6179 "pickle.loads() takes exactly one positional "
6180 "argument (%zd given)", Py_SIZE(args));
6181 return NULL;
6182 }
6183
6184 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:loads", kwlist,
6185 &input, &fix_imports, &encoding, &errors))
6186 return NULL;
6187
6188 unpickler = _Unpickler_New();
6189 if (unpickler == NULL)
6190 return NULL;
6191
6192 if (_Unpickler_SetStringInput(unpickler, input) < 0)
6193 goto error;
6194
6195 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6196 goto error;
6197
6198 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6199 if (unpickler->fix_imports == -1)
6200 goto error;
6201
6202 result = load(unpickler);
6203 Py_DECREF(unpickler);
6204 return result;
6205
6206 error:
6207 Py_XDECREF(unpickler);
6208 return NULL;
6209}
6210
6211
6212static struct PyMethodDef pickle_methods[] = {
6213 {"dump", (PyCFunction)pickle_dump, METH_VARARGS|METH_KEYWORDS,
6214 pickle_dump_doc},
6215 {"dumps", (PyCFunction)pickle_dumps, METH_VARARGS|METH_KEYWORDS,
6216 pickle_dumps_doc},
6217 {"load", (PyCFunction)pickle_load, METH_VARARGS|METH_KEYWORDS,
6218 pickle_load_doc},
6219 {"loads", (PyCFunction)pickle_loads, METH_VARARGS|METH_KEYWORDS,
6220 pickle_loads_doc},
6221 {NULL, NULL} /* sentinel */
6222};
6223
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006224static int
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006225initmodule(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006226{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006227 PyObject *copyreg = NULL;
6228 PyObject *compat_pickle = NULL;
6229
6230 /* XXX: We should ensure that the types of the dictionaries imported are
6231 exactly PyDict objects. Otherwise, it is possible to crash the pickle
6232 since we use the PyDict API directly to access these dictionaries. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006233
6234 copyreg = PyImport_ImportModule("copyreg");
6235 if (!copyreg)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006236 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006237 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
6238 if (!dispatch_table)
6239 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006240 extension_registry = \
6241 PyObject_GetAttrString(copyreg, "_extension_registry");
6242 if (!extension_registry)
6243 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006244 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
6245 if (!inverted_registry)
6246 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006247 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
6248 if (!extension_cache)
6249 goto error;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006250 Py_CLEAR(copyreg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006251
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006252 /* Load the 2.x -> 3.x stdlib module mapping tables */
6253 compat_pickle = PyImport_ImportModule("_compat_pickle");
6254 if (!compat_pickle)
6255 goto error;
6256 name_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
6257 if (!name_mapping_2to3)
6258 goto error;
6259 if (!PyDict_CheckExact(name_mapping_2to3)) {
6260 PyErr_Format(PyExc_RuntimeError,
6261 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
6262 Py_TYPE(name_mapping_2to3)->tp_name);
6263 goto error;
6264 }
6265 import_mapping_2to3 = PyObject_GetAttrString(compat_pickle,
6266 "IMPORT_MAPPING");
6267 if (!import_mapping_2to3)
6268 goto error;
6269 if (!PyDict_CheckExact(import_mapping_2to3)) {
6270 PyErr_Format(PyExc_RuntimeError,
6271 "_compat_pickle.IMPORT_MAPPING should be a dict, "
6272 "not %.200s", Py_TYPE(import_mapping_2to3)->tp_name);
6273 goto error;
6274 }
6275 /* ... and the 3.x -> 2.x mapping tables */
6276 name_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6277 "REVERSE_NAME_MAPPING");
6278 if (!name_mapping_3to2)
6279 goto error;
6280 if (!PyDict_CheckExact(name_mapping_3to2)) {
6281 PyErr_Format(PyExc_RuntimeError,
Ezio Melotti13925002011-03-16 11:05:33 +02006282 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006283 "not %.200s", Py_TYPE(name_mapping_3to2)->tp_name);
6284 goto error;
6285 }
6286 import_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6287 "REVERSE_IMPORT_MAPPING");
6288 if (!import_mapping_3to2)
6289 goto error;
6290 if (!PyDict_CheckExact(import_mapping_3to2)) {
6291 PyErr_Format(PyExc_RuntimeError,
6292 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
6293 "not %.200s", Py_TYPE(import_mapping_3to2)->tp_name);
6294 goto error;
6295 }
6296 Py_CLEAR(compat_pickle);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006297
6298 empty_tuple = PyTuple_New(0);
6299 if (empty_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006300 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006301 two_tuple = PyTuple_New(2);
6302 if (two_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006303 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006304 /* We use this temp container with no regard to refcounts, or to
6305 * keeping containees alive. Exempt from GC, because we don't
6306 * want anything looking at two_tuple() by magic.
6307 */
6308 PyObject_GC_UnTrack(two_tuple);
6309
6310 return 0;
6311
6312 error:
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006313 Py_CLEAR(copyreg);
6314 Py_CLEAR(dispatch_table);
6315 Py_CLEAR(extension_registry);
6316 Py_CLEAR(inverted_registry);
6317 Py_CLEAR(extension_cache);
6318 Py_CLEAR(compat_pickle);
6319 Py_CLEAR(name_mapping_2to3);
6320 Py_CLEAR(import_mapping_2to3);
6321 Py_CLEAR(name_mapping_3to2);
6322 Py_CLEAR(import_mapping_3to2);
6323 Py_CLEAR(empty_tuple);
6324 Py_CLEAR(two_tuple);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006325 return -1;
6326}
6327
6328static struct PyModuleDef _picklemodule = {
6329 PyModuleDef_HEAD_INIT,
6330 "_pickle",
6331 pickle_module_doc,
6332 -1,
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006333 pickle_methods,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006334 NULL,
6335 NULL,
6336 NULL,
6337 NULL
6338};
6339
6340PyMODINIT_FUNC
6341PyInit__pickle(void)
6342{
6343 PyObject *m;
6344
6345 if (PyType_Ready(&Unpickler_Type) < 0)
6346 return NULL;
6347 if (PyType_Ready(&Pickler_Type) < 0)
6348 return NULL;
6349 if (PyType_Ready(&Pdata_Type) < 0)
6350 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006351 if (PyType_Ready(&PicklerMemoProxyType) < 0)
6352 return NULL;
6353 if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
6354 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006355
6356 /* Create the module and add the functions. */
6357 m = PyModule_Create(&_picklemodule);
6358 if (m == NULL)
6359 return NULL;
6360
Antoine Pitrou8391cf42011-07-15 21:01:21 +02006361 Py_INCREF(&Pickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006362 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
6363 return NULL;
Antoine Pitrou8391cf42011-07-15 21:01:21 +02006364 Py_INCREF(&Unpickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006365 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
6366 return NULL;
6367
6368 /* Initialize the exceptions. */
6369 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
6370 if (PickleError == NULL)
6371 return NULL;
6372 PicklingError = \
6373 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
6374 if (PicklingError == NULL)
6375 return NULL;
6376 UnpicklingError = \
6377 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
6378 if (UnpicklingError == NULL)
6379 return NULL;
6380
6381 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
6382 return NULL;
6383 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
6384 return NULL;
6385 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
6386 return NULL;
6387
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006388 if (initmodule() < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006389 return NULL;
6390
6391 return m;
6392}