blob: 0b10009334ffb5beb06e656ea59b00b45ab81d80 [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000013/* Pickle opcodes. These must be kept updated with pickle.py.
14 Extensive docs are in pickletools.py. */
15enum opcode {
16 MARK = '(',
17 STOP = '.',
18 POP = '0',
19 POP_MARK = '1',
20 DUP = '2',
21 FLOAT = 'F',
22 INT = 'I',
23 BININT = 'J',
24 BININT1 = 'K',
25 LONG = 'L',
26 BININT2 = 'M',
27 NONE = 'N',
28 PERSID = 'P',
29 BINPERSID = 'Q',
30 REDUCE = 'R',
31 STRING = 'S',
32 BINSTRING = 'T',
33 SHORT_BINSTRING = 'U',
34 UNICODE = 'V',
35 BINUNICODE = 'X',
36 APPEND = 'a',
37 BUILD = 'b',
38 GLOBAL = 'c',
39 DICT = 'd',
40 EMPTY_DICT = '}',
41 APPENDS = 'e',
42 GET = 'g',
43 BINGET = 'h',
44 INST = 'i',
45 LONG_BINGET = 'j',
46 LIST = 'l',
47 EMPTY_LIST = ']',
48 OBJ = 'o',
49 PUT = 'p',
50 BINPUT = 'q',
51 LONG_BINPUT = 'r',
52 SETITEM = 's',
53 TUPLE = 't',
54 EMPTY_TUPLE = ')',
55 SETITEMS = 'u',
56 BINFLOAT = 'G',
57
58 /* Protocol 2. */
59 PROTO = '\x80',
60 NEWOBJ = '\x81',
61 EXT1 = '\x82',
62 EXT2 = '\x83',
63 EXT4 = '\x84',
64 TUPLE1 = '\x85',
65 TUPLE2 = '\x86',
66 TUPLE3 = '\x87',
67 NEWTRUE = '\x88',
68 NEWFALSE = '\x89',
69 LONG1 = '\x8a',
70 LONG4 = '\x8b',
71
72 /* Protocol 3 (Python 3.x) */
73 BINBYTES = 'B',
Victor Stinner132ef6c2010-11-09 09:39:41 +000074 SHORT_BINBYTES = 'C'
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000075};
76
77/* These aren't opcodes -- they're ways to pickle bools before protocol 2
78 * so that unpicklers written before bools were introduced unpickle them
79 * as ints, but unpicklers after can recognize that bools were intended.
80 * Note that protocol 2 added direct ways to pickle bools.
81 */
82#undef TRUE
83#define TRUE "I01\n"
84#undef FALSE
85#define FALSE "I00\n"
86
87enum {
88 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
89 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
90 break if this gets out of synch with pickle.py, but it's unclear that would
91 help anything either. */
92 BATCHSIZE = 1000,
93
94 /* Nesting limit until Pickler, when running in "fast mode", starts
95 checking for self-referential data-structures. */
96 FAST_NESTING_LIMIT = 50,
97
Antoine Pitrouea99c5c2010-09-09 18:33:21 +000098 /* Initial size of the write buffer of Pickler. */
99 WRITE_BUF_SIZE = 4096,
100
101 /* Maximum size of the write buffer of Pickler when pickling to a
102 stream. This is ignored for in-memory pickling. */
103 MAX_WRITE_BUF_SIZE = 64 * 1024,
Antoine Pitrou04248a82010-10-12 20:51:21 +0000104
105 /* Prefetch size when unpickling (disabled on unpeekable streams) */
Victor Stinner132ef6c2010-11-09 09:39:41 +0000106 PREFETCH = 8192 * 16
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000107};
108
109/* Exception classes for pickle. These should override the ones defined in
110 pickle.py, when the C-optimized Pickler and Unpickler are used. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000111static PyObject *PickleError = NULL;
112static PyObject *PicklingError = NULL;
113static PyObject *UnpicklingError = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000114
115/* copyreg.dispatch_table, {type_object: pickling_function} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000116static PyObject *dispatch_table = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000117/* For EXT[124] opcodes. */
118/* copyreg._extension_registry, {(module_name, function_name): code} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000119static PyObject *extension_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000120/* copyreg._inverted_registry, {code: (module_name, function_name)} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000121static PyObject *inverted_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000122/* copyreg._extension_cache, {code: object} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000123static PyObject *extension_cache = NULL;
124
125/* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */
126static PyObject *name_mapping_2to3 = NULL;
127/* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
128static PyObject *import_mapping_2to3 = NULL;
129/* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
130static PyObject *name_mapping_3to2 = NULL;
131static PyObject *import_mapping_3to2 = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000132
133/* XXX: Are these really nescessary? */
134/* As the name says, an empty tuple. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000135static PyObject *empty_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000136/* For looking up name pairs in copyreg._extension_registry. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000137static PyObject *two_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000138
139static int
140stack_underflow(void)
141{
142 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
143 return -1;
144}
145
146/* Internal data type used as the unpickling stack. */
147typedef struct {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000148 PyObject_VAR_HEAD
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000149 PyObject **data;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000150 Py_ssize_t allocated; /* number of slots in data allocated */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000151} Pdata;
152
153static void
154Pdata_dealloc(Pdata *self)
155{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200156 Py_ssize_t i = Py_SIZE(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000157 while (--i >= 0) {
158 Py_DECREF(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000159 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000160 PyMem_FREE(self->data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000161 PyObject_Del(self);
162}
163
164static PyTypeObject Pdata_Type = {
165 PyVarObject_HEAD_INIT(NULL, 0)
166 "_pickle.Pdata", /*tp_name*/
167 sizeof(Pdata), /*tp_basicsize*/
168 0, /*tp_itemsize*/
169 (destructor)Pdata_dealloc, /*tp_dealloc*/
170};
171
172static PyObject *
173Pdata_New(void)
174{
175 Pdata *self;
176
177 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
178 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000179 Py_SIZE(self) = 0;
180 self->allocated = 8;
181 self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000182 if (self->data)
183 return (PyObject *)self;
184 Py_DECREF(self);
185 return PyErr_NoMemory();
186}
187
188
189/* Retain only the initial clearto items. If clearto >= the current
190 * number of items, this is a (non-erroneous) NOP.
191 */
192static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200193Pdata_clear(Pdata *self, Py_ssize_t clearto)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000194{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200195 Py_ssize_t i = Py_SIZE(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000196
197 if (clearto < 0)
198 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000199 if (clearto >= i)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000200 return 0;
201
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000202 while (--i >= clearto) {
203 Py_CLEAR(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000204 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000205 Py_SIZE(self) = clearto;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000206 return 0;
207}
208
209static int
210Pdata_grow(Pdata *self)
211{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000212 PyObject **data = self->data;
213 Py_ssize_t allocated = self->allocated;
214 Py_ssize_t new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000215
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000216 new_allocated = (allocated >> 3) + 6;
217 /* check for integer overflow */
218 if (new_allocated > PY_SSIZE_T_MAX - allocated)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000219 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000220 new_allocated += allocated;
221 if (new_allocated > (PY_SSIZE_T_MAX / sizeof(PyObject *)))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000222 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000223 data = PyMem_REALLOC(data, new_allocated * sizeof(PyObject *));
224 if (data == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000225 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000226
227 self->data = data;
228 self->allocated = new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000229 return 0;
230
231 nomemory:
232 PyErr_NoMemory();
233 return -1;
234}
235
236/* D is a Pdata*. Pop the topmost element and store it into V, which
237 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
238 * is raised and V is set to NULL.
239 */
240static PyObject *
241Pdata_pop(Pdata *self)
242{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000243 if (Py_SIZE(self) == 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000244 PyErr_SetString(UnpicklingError, "bad pickle data");
245 return NULL;
246 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000247 return self->data[--Py_SIZE(self)];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000248}
249#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
250
251static int
252Pdata_push(Pdata *self, PyObject *obj)
253{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000254 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000255 return -1;
256 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000257 self->data[Py_SIZE(self)++] = obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000258 return 0;
259}
260
261/* Push an object on stack, transferring its ownership to the stack. */
262#define PDATA_PUSH(D, O, ER) do { \
263 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
264
265/* Push an object on stack, adding a new reference to the object. */
266#define PDATA_APPEND(D, O, ER) do { \
267 Py_INCREF((O)); \
268 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
269
270static PyObject *
271Pdata_poptuple(Pdata *self, Py_ssize_t start)
272{
273 PyObject *tuple;
274 Py_ssize_t len, i, j;
275
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000276 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000277 tuple = PyTuple_New(len);
278 if (tuple == NULL)
279 return NULL;
280 for (i = start, j = 0; j < len; i++, j++)
281 PyTuple_SET_ITEM(tuple, j, self->data[i]);
282
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000283 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000284 return tuple;
285}
286
287static PyObject *
288Pdata_poplist(Pdata *self, Py_ssize_t start)
289{
290 PyObject *list;
291 Py_ssize_t len, i, j;
292
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000293 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000294 list = PyList_New(len);
295 if (list == NULL)
296 return NULL;
297 for (i = start, j = 0; j < len; i++, j++)
298 PyList_SET_ITEM(list, j, self->data[i]);
299
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000300 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000301 return list;
302}
303
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000304typedef struct {
305 PyObject *me_key;
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200306 Py_ssize_t me_value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000307} PyMemoEntry;
308
309typedef struct {
310 Py_ssize_t mt_mask;
311 Py_ssize_t mt_used;
312 Py_ssize_t mt_allocated;
313 PyMemoEntry *mt_table;
314} PyMemoTable;
315
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000316typedef struct PicklerObject {
317 PyObject_HEAD
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000318 PyMemoTable *memo; /* Memo table, keep track of the seen
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000319 objects to support self-referential objects
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000320 pickling. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000321 PyObject *pers_func; /* persistent_id() method, can be NULL */
322 PyObject *arg;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000323
324 PyObject *write; /* write() method of the output stream. */
325 PyObject *output_buffer; /* Write into a local bytearray buffer before
326 flushing to the stream. */
327 Py_ssize_t output_len; /* Length of output_buffer. */
328 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000329 int proto; /* Pickle protocol number, >= 0 */
330 int bin; /* Boolean, true if proto > 0 */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200331 Py_ssize_t buf_size; /* Size of the current buffered pickle data */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000332 int fast; /* Enable fast mode if set to a true value.
333 The fast mode disable the usage of memo,
334 therefore speeding the pickling process by
335 not generating superfluous PUT opcodes. It
336 should not be used if with self-referential
337 objects. */
338 int fast_nesting;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000339 int fix_imports; /* Indicate whether Pickler should fix
340 the name of globals for Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000341 PyObject *fast_memo;
342} PicklerObject;
343
344typedef struct UnpicklerObject {
345 PyObject_HEAD
346 Pdata *stack; /* Pickle data stack, store unpickled objects. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000347
348 /* The unpickler memo is just an array of PyObject *s. Using a dict
349 is unnecessary, since the keys are contiguous ints. */
350 PyObject **memo;
351 Py_ssize_t memo_size;
352
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000353 PyObject *arg;
354 PyObject *pers_func; /* persistent_load() method, can be NULL. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000355
356 Py_buffer buffer;
357 char *input_buffer;
358 char *input_line;
359 Py_ssize_t input_len;
360 Py_ssize_t next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000361 Py_ssize_t prefetched_idx; /* index of first prefetched byte */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000362 PyObject *read; /* read() method of the input stream. */
363 PyObject *readline; /* readline() method of the input stream. */
Antoine Pitrou04248a82010-10-12 20:51:21 +0000364 PyObject *peek; /* peek() method of the input stream, or NULL */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000365
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000366 char *encoding; /* Name of the encoding to be used for
367 decoding strings pickled using Python
368 2.x. The default value is "ASCII" */
369 char *errors; /* Name of errors handling scheme to used when
370 decoding strings. The default value is
371 "strict". */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200372 Py_ssize_t *marks; /* Mark stack, used for unpickling container
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000373 objects. */
374 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
375 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000376 int proto; /* Protocol of the pickle loaded. */
377 int fix_imports; /* Indicate whether Unpickler should fix
378 the name of globals pickled by Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000379} UnpicklerObject;
380
381/* Forward declarations */
382static int save(PicklerObject *, PyObject *, int);
383static int save_reduce(PicklerObject *, PyObject *, PyObject *);
384static PyTypeObject Pickler_Type;
385static PyTypeObject Unpickler_Type;
386
387
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000388/*************************************************************************
389 A custom hashtable mapping void* to longs. This is used by the pickler for
390 memoization. Using a custom hashtable rather than PyDict allows us to skip
391 a bunch of unnecessary object creation. This makes a huge performance
392 difference. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000393
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000394#define MT_MINSIZE 8
395#define PERTURB_SHIFT 5
396
397
398static PyMemoTable *
399PyMemoTable_New(void)
400{
401 PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
402 if (memo == NULL) {
403 PyErr_NoMemory();
404 return NULL;
405 }
406
407 memo->mt_used = 0;
408 memo->mt_allocated = MT_MINSIZE;
409 memo->mt_mask = MT_MINSIZE - 1;
410 memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
411 if (memo->mt_table == NULL) {
412 PyMem_FREE(memo);
413 PyErr_NoMemory();
414 return NULL;
415 }
416 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
417
418 return memo;
419}
420
421static PyMemoTable *
422PyMemoTable_Copy(PyMemoTable *self)
423{
424 Py_ssize_t i;
425 PyMemoTable *new = PyMemoTable_New();
426 if (new == NULL)
427 return NULL;
428
429 new->mt_used = self->mt_used;
430 new->mt_allocated = self->mt_allocated;
431 new->mt_mask = self->mt_mask;
432 /* The table we get from _New() is probably smaller than we wanted.
433 Free it and allocate one that's the right size. */
434 PyMem_FREE(new->mt_table);
435 new->mt_table = PyMem_MALLOC(self->mt_allocated * sizeof(PyMemoEntry));
436 if (new->mt_table == NULL) {
437 PyMem_FREE(new);
438 return NULL;
439 }
440 for (i = 0; i < self->mt_allocated; i++) {
441 Py_XINCREF(self->mt_table[i].me_key);
442 }
443 memcpy(new->mt_table, self->mt_table,
444 sizeof(PyMemoEntry) * self->mt_allocated);
445
446 return new;
447}
448
449static Py_ssize_t
450PyMemoTable_Size(PyMemoTable *self)
451{
452 return self->mt_used;
453}
454
455static int
456PyMemoTable_Clear(PyMemoTable *self)
457{
458 Py_ssize_t i = self->mt_allocated;
459
460 while (--i >= 0) {
461 Py_XDECREF(self->mt_table[i].me_key);
462 }
463 self->mt_used = 0;
464 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
465 return 0;
466}
467
468static void
469PyMemoTable_Del(PyMemoTable *self)
470{
471 if (self == NULL)
472 return;
473 PyMemoTable_Clear(self);
474
475 PyMem_FREE(self->mt_table);
476 PyMem_FREE(self);
477}
478
479/* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
480 can be considerably simpler than dictobject.c's lookdict(). */
481static PyMemoEntry *
482_PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
483{
484 size_t i;
485 size_t perturb;
486 size_t mask = (size_t)self->mt_mask;
487 PyMemoEntry *table = self->mt_table;
488 PyMemoEntry *entry;
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000489 Py_hash_t hash = (Py_hash_t)key >> 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000490
491 i = hash & mask;
492 entry = &table[i];
493 if (entry->me_key == NULL || entry->me_key == key)
494 return entry;
495
496 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
497 i = (i << 2) + i + perturb + 1;
498 entry = &table[i & mask];
499 if (entry->me_key == NULL || entry->me_key == key)
500 return entry;
501 }
502 assert(0); /* Never reached */
503 return NULL;
504}
505
506/* Returns -1 on failure, 0 on success. */
507static int
508_PyMemoTable_ResizeTable(PyMemoTable *self, Py_ssize_t min_size)
509{
510 PyMemoEntry *oldtable = NULL;
511 PyMemoEntry *oldentry, *newentry;
512 Py_ssize_t new_size = MT_MINSIZE;
513 Py_ssize_t to_process;
514
515 assert(min_size > 0);
516
517 /* Find the smallest valid table size >= min_size. */
518 while (new_size < min_size && new_size > 0)
519 new_size <<= 1;
520 if (new_size <= 0) {
521 PyErr_NoMemory();
522 return -1;
523 }
524 /* new_size needs to be a power of two. */
525 assert((new_size & (new_size - 1)) == 0);
526
527 /* Allocate new table. */
528 oldtable = self->mt_table;
529 self->mt_table = PyMem_MALLOC(new_size * sizeof(PyMemoEntry));
530 if (self->mt_table == NULL) {
531 PyMem_FREE(oldtable);
532 PyErr_NoMemory();
533 return -1;
534 }
535 self->mt_allocated = new_size;
536 self->mt_mask = new_size - 1;
537 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
538
539 /* Copy entries from the old table. */
540 to_process = self->mt_used;
541 for (oldentry = oldtable; to_process > 0; oldentry++) {
542 if (oldentry->me_key != NULL) {
543 to_process--;
544 /* newentry is a pointer to a chunk of the new
545 mt_table, so we're setting the key:value pair
546 in-place. */
547 newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
548 newentry->me_key = oldentry->me_key;
549 newentry->me_value = oldentry->me_value;
550 }
551 }
552
553 /* Deallocate the old table. */
554 PyMem_FREE(oldtable);
555 return 0;
556}
557
558/* Returns NULL on failure, a pointer to the value otherwise. */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200559static Py_ssize_t *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000560PyMemoTable_Get(PyMemoTable *self, PyObject *key)
561{
562 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
563 if (entry->me_key == NULL)
564 return NULL;
565 return &entry->me_value;
566}
567
568/* Returns -1 on failure, 0 on success. */
569static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200570PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000571{
572 PyMemoEntry *entry;
573
574 assert(key != NULL);
575
576 entry = _PyMemoTable_Lookup(self, key);
577 if (entry->me_key != NULL) {
578 entry->me_value = value;
579 return 0;
580 }
581 Py_INCREF(key);
582 entry->me_key = key;
583 entry->me_value = value;
584 self->mt_used++;
585
586 /* If we added a key, we can safely resize. Otherwise just return!
587 * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
588 *
589 * Quadrupling the size improves average table sparseness
590 * (reducing collisions) at the cost of some memory. It also halves
591 * the number of expensive resize operations in a growing memo table.
592 *
593 * Very large memo tables (over 50K items) use doubling instead.
594 * This may help applications with severe memory constraints.
595 */
596 if (!(self->mt_used * 3 >= (self->mt_mask + 1) * 2))
597 return 0;
598 return _PyMemoTable_ResizeTable(self,
599 (self->mt_used > 50000 ? 2 : 4) * self->mt_used);
600}
601
602#undef MT_MINSIZE
603#undef PERTURB_SHIFT
604
605/*************************************************************************/
606
607/* Helpers for creating the argument tuple passed to functions. This has the
Victor Stinner121aab42011-09-29 23:40:53 +0200608 performance advantage of calling PyTuple_New() only once.
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000609
610 XXX(avassalotti): Inline directly in _Pickler_FastCall() and
611 _Unpickler_FastCall(). */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000612#define ARG_TUP(self, obj) do { \
613 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
614 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
615 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
616 } \
617 else { \
618 Py_DECREF((obj)); \
619 } \
620 } while (0)
621
622#define FREE_ARG_TUP(self) do { \
623 if ((self)->arg->ob_refcnt > 1) \
624 Py_CLEAR((self)->arg); \
625 } while (0)
626
627/* A temporary cleaner API for fast single argument function call.
628
629 XXX: Does caching the argument tuple provides any real performance benefits?
630
631 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
632 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
633 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
634 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
635 (i.e, call PyTuple_New() and store the returned value in an array), to save
636 one second (wall clock time). Either ways, the loading time a pickle stream
637 large enough to generate this number of calls would be massively
638 overwhelmed by other factors, like I/O throughput, the GC traversal and
639 object allocation overhead. So, I really doubt these functions provide any
640 real benefits.
641
642 On the other hand, oprofile reports that pickle spends a lot of time in
643 these functions. But, that is probably more related to the function call
644 overhead, than the argument tuple allocation.
645
646 XXX: And, what is the reference behavior of these? Steal, borrow? At first
647 glance, it seems to steal the reference of 'arg' and borrow the reference
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000648 of 'func'. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000649static PyObject *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000650_Pickler_FastCall(PicklerObject *self, PyObject *func, PyObject *arg)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000651{
652 PyObject *result = NULL;
653
654 ARG_TUP(self, arg);
655 if (self->arg) {
656 result = PyObject_Call(func, self->arg, NULL);
657 FREE_ARG_TUP(self);
658 }
659 return result;
660}
661
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000662static int
663_Pickler_ClearBuffer(PicklerObject *self)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000664{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000665 Py_CLEAR(self->output_buffer);
666 self->output_buffer =
667 PyBytes_FromStringAndSize(NULL, self->max_output_len);
668 if (self->output_buffer == NULL)
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000669 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000670 self->output_len = 0;
671 return 0;
672}
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000673
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000674static PyObject *
675_Pickler_GetString(PicklerObject *self)
676{
677 PyObject *output_buffer = self->output_buffer;
678
679 assert(self->output_buffer != NULL);
680 self->output_buffer = NULL;
681 /* Resize down to exact size */
682 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
683 return NULL;
684 return output_buffer;
685}
686
687static int
688_Pickler_FlushToFile(PicklerObject *self)
689{
690 PyObject *output, *result;
691
692 assert(self->write != NULL);
693
694 output = _Pickler_GetString(self);
695 if (output == NULL)
696 return -1;
697
698 result = _Pickler_FastCall(self, self->write, output);
699 Py_XDECREF(result);
700 return (result == NULL) ? -1 : 0;
701}
702
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200703static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000704_Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t n)
705{
706 Py_ssize_t i, required;
707 char *buffer;
708
709 assert(s != NULL);
710
711 required = self->output_len + n;
712 if (required > self->max_output_len) {
713 if (self->write != NULL && required > MAX_WRITE_BUF_SIZE) {
714 /* XXX This reallocates a new buffer every time, which is a bit
715 wasteful. */
716 if (_Pickler_FlushToFile(self) < 0)
717 return -1;
718 if (_Pickler_ClearBuffer(self) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000719 return -1;
720 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000721 if (self->write != NULL && n > MAX_WRITE_BUF_SIZE) {
722 /* we already flushed above, so the buffer is empty */
723 PyObject *result;
724 /* XXX we could spare an intermediate copy and pass
725 a memoryview instead */
726 PyObject *output = PyBytes_FromStringAndSize(s, n);
727 if (s == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000728 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000729 result = _Pickler_FastCall(self, self->write, output);
730 Py_XDECREF(result);
731 return (result == NULL) ? -1 : 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000732 }
733 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000734 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
735 PyErr_NoMemory();
736 return -1;
737 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200738 self->max_output_len = (self->output_len + n) / 2 * 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000739 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
740 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000741 }
742 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000743 buffer = PyBytes_AS_STRING(self->output_buffer);
744 if (n < 8) {
745 /* This is faster than memcpy when the string is short. */
746 for (i = 0; i < n; i++) {
747 buffer[self->output_len + i] = s[i];
748 }
749 }
750 else {
751 memcpy(buffer + self->output_len, s, n);
752 }
753 self->output_len += n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000754 return n;
755}
756
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000757static PicklerObject *
758_Pickler_New(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000759{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000760 PicklerObject *self;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000761
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000762 self = PyObject_GC_New(PicklerObject, &Pickler_Type);
763 if (self == NULL)
764 return NULL;
765
766 self->pers_func = NULL;
767 self->arg = NULL;
768 self->write = NULL;
769 self->proto = 0;
770 self->bin = 0;
771 self->fast = 0;
772 self->fast_nesting = 0;
773 self->fix_imports = 0;
774 self->fast_memo = NULL;
775
776 self->memo = PyMemoTable_New();
777 if (self->memo == NULL) {
778 Py_DECREF(self);
779 return NULL;
780 }
781 self->max_output_len = WRITE_BUF_SIZE;
782 self->output_len = 0;
783 self->output_buffer = PyBytes_FromStringAndSize(NULL,
784 self->max_output_len);
785 if (self->output_buffer == NULL) {
786 Py_DECREF(self);
787 return NULL;
788 }
789 return self;
790}
791
792static int
793_Pickler_SetProtocol(PicklerObject *self, PyObject *proto_obj,
794 PyObject *fix_imports_obj)
795{
796 long proto = 0;
797 int fix_imports;
798
799 if (proto_obj == NULL || proto_obj == Py_None)
800 proto = DEFAULT_PROTOCOL;
801 else {
802 proto = PyLong_AsLong(proto_obj);
803 if (proto == -1 && PyErr_Occurred())
804 return -1;
805 }
806 if (proto < 0)
807 proto = HIGHEST_PROTOCOL;
808 if (proto > HIGHEST_PROTOCOL) {
809 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
810 HIGHEST_PROTOCOL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000811 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000812 }
813 fix_imports = PyObject_IsTrue(fix_imports_obj);
814 if (fix_imports == -1)
815 return -1;
Victor Stinner121aab42011-09-29 23:40:53 +0200816
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000817 self->proto = proto;
818 self->bin = proto > 0;
819 self->fix_imports = fix_imports && proto < 3;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000820
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000821 return 0;
822}
823
824/* Returns -1 (with an exception set) on failure, 0 on success. This may
825 be called once on a freshly created Pickler. */
826static int
827_Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
828{
829 assert(file != NULL);
830 self->write = PyObject_GetAttrString(file, "write");
831 if (self->write == NULL) {
832 if (PyErr_ExceptionMatches(PyExc_AttributeError))
833 PyErr_SetString(PyExc_TypeError,
834 "file must have a 'write' attribute");
835 return -1;
836 }
837
838 return 0;
839}
840
841/* See documentation for _Pickler_FastCall(). */
842static PyObject *
843_Unpickler_FastCall(UnpicklerObject *self, PyObject *func, PyObject *arg)
844{
845 PyObject *result = NULL;
846
847 ARG_TUP(self, arg);
848 if (self->arg) {
849 result = PyObject_Call(func, self->arg, NULL);
850 FREE_ARG_TUP(self);
851 }
852 return result;
853}
854
855/* Returns the size of the input on success, -1 on failure. This takes its
856 own reference to `input`. */
857static Py_ssize_t
858_Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
859{
860 if (self->buffer.buf != NULL)
861 PyBuffer_Release(&self->buffer);
862 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
863 return -1;
864 self->input_buffer = self->buffer.buf;
865 self->input_len = self->buffer.len;
866 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000867 self->prefetched_idx = self->input_len;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000868 return self->input_len;
869}
870
Antoine Pitrou04248a82010-10-12 20:51:21 +0000871static int
872_Unpickler_SkipConsumed(UnpicklerObject *self)
873{
874 Py_ssize_t consumed = self->next_read_idx - self->prefetched_idx;
875
876 if (consumed > 0) {
877 PyObject *r;
878 assert(self->peek); /* otherwise we did something wrong */
879 /* This makes an useless copy... */
880 r = PyObject_CallFunction(self->read, "n", consumed);
881 if (r == NULL)
882 return -1;
883 Py_DECREF(r);
884 self->prefetched_idx = self->next_read_idx;
885 }
886 return 0;
887}
888
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000889static const Py_ssize_t READ_WHOLE_LINE = -1;
890
891/* If reading from a file, we need to only pull the bytes we need, since there
892 may be multiple pickle objects arranged contiguously in the same input
893 buffer.
894
895 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
896 bytes from the input stream/buffer.
897
898 Update the unpickler's input buffer with the newly-read data. Returns -1 on
899 failure; on success, returns the number of bytes read from the file.
900
901 On success, self->input_len will be 0; this is intentional so that when
902 unpickling from a file, the "we've run out of data" code paths will trigger,
903 causing the Unpickler to go back to the file for more data. Use the returned
904 size to tell you how much data you can process. */
905static Py_ssize_t
906_Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
907{
908 PyObject *data;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000909 Py_ssize_t read_size, prefetched_size = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000910
911 assert(self->read != NULL);
Victor Stinner121aab42011-09-29 23:40:53 +0200912
Antoine Pitrou04248a82010-10-12 20:51:21 +0000913 if (_Unpickler_SkipConsumed(self) < 0)
914 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000915
916 if (n == READ_WHOLE_LINE)
917 data = PyObject_Call(self->readline, empty_tuple, NULL);
918 else {
919 PyObject *len = PyLong_FromSsize_t(n);
920 if (len == NULL)
921 return -1;
922 data = _Unpickler_FastCall(self, self->read, len);
923 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000924 if (data == NULL)
925 return -1;
926
Antoine Pitrou04248a82010-10-12 20:51:21 +0000927 /* Prefetch some data without advancing the file pointer, if possible */
928 if (self->peek) {
929 PyObject *len, *prefetched;
930 len = PyLong_FromSsize_t(PREFETCH);
931 if (len == NULL) {
932 Py_DECREF(data);
933 return -1;
934 }
935 prefetched = _Unpickler_FastCall(self, self->peek, len);
936 if (prefetched == NULL) {
937 if (PyErr_ExceptionMatches(PyExc_NotImplementedError)) {
938 /* peek() is probably not supported by the given file object */
939 PyErr_Clear();
940 Py_CLEAR(self->peek);
941 }
942 else {
943 Py_DECREF(data);
944 return -1;
945 }
946 }
947 else {
948 assert(PyBytes_Check(prefetched));
949 prefetched_size = PyBytes_GET_SIZE(prefetched);
950 PyBytes_ConcatAndDel(&data, prefetched);
951 if (data == NULL)
952 return -1;
953 }
954 }
955
956 read_size = _Unpickler_SetStringInput(self, data) - prefetched_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000957 Py_DECREF(data);
Antoine Pitrou04248a82010-10-12 20:51:21 +0000958 self->prefetched_idx = read_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000959 return read_size;
960}
961
962/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
963
964 This should be used for all data reads, rather than accessing the unpickler's
965 input buffer directly. This method deals correctly with reading from input
966 streams, which the input buffer doesn't deal with.
967
968 Note that when reading from a file-like object, self->next_read_idx won't
969 be updated (it should remain at 0 for the entire unpickling process). You
970 should use this function's return value to know how many bytes you can
971 consume.
972
973 Returns -1 (with an exception set) on failure. On success, return the
974 number of chars read. */
975static Py_ssize_t
976_Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n)
977{
Antoine Pitrou04248a82010-10-12 20:51:21 +0000978 Py_ssize_t num_read;
979
Antoine Pitrou04248a82010-10-12 20:51:21 +0000980 if (self->next_read_idx + n <= self->input_len) {
981 *s = self->input_buffer + self->next_read_idx;
982 self->next_read_idx += n;
983 return n;
984 }
985 if (!self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000986 PyErr_Format(PyExc_EOFError, "Ran out of input");
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000987 return -1;
988 }
Antoine Pitrou04248a82010-10-12 20:51:21 +0000989 num_read = _Unpickler_ReadFromFile(self, n);
990 if (num_read < 0)
991 return -1;
992 if (num_read < n) {
993 PyErr_Format(PyExc_EOFError, "Ran out of input");
994 return -1;
995 }
996 *s = self->input_buffer;
997 self->next_read_idx = n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000998 return n;
999}
1000
1001static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001002_Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1003 char **result)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001004{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001005 char *input_line = PyMem_Realloc(self->input_line, len + 1);
1006 if (input_line == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001007 return -1;
1008
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001009 memcpy(input_line, line, len);
1010 input_line[len] = '\0';
1011 self->input_line = input_line;
1012 *result = self->input_line;
1013 return len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001014}
1015
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001016/* Read a line from the input stream/buffer. If we run off the end of the input
1017 before hitting \n, return the data we found.
1018
1019 Returns the number of chars read, or -1 on failure. */
1020static Py_ssize_t
1021_Unpickler_Readline(UnpicklerObject *self, char **result)
1022{
1023 Py_ssize_t i, num_read;
1024
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001025 for (i = self->next_read_idx; i < self->input_len; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001026 if (self->input_buffer[i] == '\n') {
1027 char *line_start = self->input_buffer + self->next_read_idx;
1028 num_read = i - self->next_read_idx + 1;
1029 self->next_read_idx = i + 1;
1030 return _Unpickler_CopyLine(self, line_start, num_read, result);
1031 }
1032 }
1033 if (self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001034 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1035 if (num_read < 0)
1036 return -1;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001037 self->next_read_idx = num_read;
Antoine Pitrouf6c7a852011-08-11 21:04:02 +02001038 return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001039 }
Victor Stinner121aab42011-09-29 23:40:53 +02001040
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001041 /* If we get here, we've run off the end of the input string. Return the
1042 remaining string and let the caller figure it out. */
1043 *result = self->input_buffer + self->next_read_idx;
1044 num_read = i - self->next_read_idx;
1045 self->next_read_idx = i;
1046 return num_read;
1047}
1048
1049/* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1050 will be modified in place. */
1051static int
1052_Unpickler_ResizeMemoList(UnpicklerObject *self, Py_ssize_t new_size)
1053{
1054 Py_ssize_t i;
1055 PyObject **memo;
1056
1057 assert(new_size > self->memo_size);
1058
1059 memo = PyMem_REALLOC(self->memo, new_size * sizeof(PyObject *));
1060 if (memo == NULL) {
1061 PyErr_NoMemory();
1062 return -1;
1063 }
1064 self->memo = memo;
1065 for (i = self->memo_size; i < new_size; i++)
1066 self->memo[i] = NULL;
1067 self->memo_size = new_size;
1068 return 0;
1069}
1070
1071/* Returns NULL if idx is out of bounds. */
1072static PyObject *
1073_Unpickler_MemoGet(UnpicklerObject *self, Py_ssize_t idx)
1074{
1075 if (idx < 0 || idx >= self->memo_size)
1076 return NULL;
1077
1078 return self->memo[idx];
1079}
1080
1081/* Returns -1 (with an exception set) on failure, 0 on success.
1082 This takes its own reference to `value`. */
1083static int
1084_Unpickler_MemoPut(UnpicklerObject *self, Py_ssize_t idx, PyObject *value)
1085{
1086 PyObject *old_item;
1087
1088 if (idx >= self->memo_size) {
1089 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1090 return -1;
1091 assert(idx < self->memo_size);
1092 }
1093 Py_INCREF(value);
1094 old_item = self->memo[idx];
1095 self->memo[idx] = value;
1096 Py_XDECREF(old_item);
1097 return 0;
1098}
1099
1100static PyObject **
1101_Unpickler_NewMemo(Py_ssize_t new_size)
1102{
1103 PyObject **memo = PyMem_MALLOC(new_size * sizeof(PyObject *));
1104 if (memo == NULL)
1105 return NULL;
1106 memset(memo, 0, new_size * sizeof(PyObject *));
1107 return memo;
1108}
1109
1110/* Free the unpickler's memo, taking care to decref any items left in it. */
1111static void
1112_Unpickler_MemoCleanup(UnpicklerObject *self)
1113{
1114 Py_ssize_t i;
1115 PyObject **memo = self->memo;
1116
1117 if (self->memo == NULL)
1118 return;
1119 self->memo = NULL;
1120 i = self->memo_size;
1121 while (--i >= 0) {
1122 Py_XDECREF(memo[i]);
1123 }
1124 PyMem_FREE(memo);
1125}
1126
1127static UnpicklerObject *
1128_Unpickler_New(void)
1129{
1130 UnpicklerObject *self;
1131
1132 self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1133 if (self == NULL)
1134 return NULL;
1135
1136 self->stack = (Pdata *)Pdata_New();
1137 if (self->stack == NULL) {
1138 Py_DECREF(self);
1139 return NULL;
1140 }
1141 memset(&self->buffer, 0, sizeof(Py_buffer));
1142
1143 self->memo_size = 32;
1144 self->memo = _Unpickler_NewMemo(self->memo_size);
1145 if (self->memo == NULL) {
1146 Py_DECREF(self);
1147 return NULL;
1148 }
1149
1150 self->arg = NULL;
1151 self->pers_func = NULL;
1152 self->input_buffer = NULL;
1153 self->input_line = NULL;
1154 self->input_len = 0;
1155 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001156 self->prefetched_idx = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001157 self->read = NULL;
1158 self->readline = NULL;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001159 self->peek = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001160 self->encoding = NULL;
1161 self->errors = NULL;
1162 self->marks = NULL;
1163 self->num_marks = 0;
1164 self->marks_size = 0;
1165 self->proto = 0;
1166 self->fix_imports = 0;
1167
1168 return self;
1169}
1170
1171/* Returns -1 (with an exception set) on failure, 0 on success. This may
1172 be called once on a freshly created Pickler. */
1173static int
1174_Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1175{
Antoine Pitrou04248a82010-10-12 20:51:21 +00001176 self->peek = PyObject_GetAttrString(file, "peek");
1177 if (self->peek == NULL) {
1178 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1179 PyErr_Clear();
1180 else
1181 return -1;
1182 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001183 self->read = PyObject_GetAttrString(file, "read");
1184 self->readline = PyObject_GetAttrString(file, "readline");
1185 if (self->readline == NULL || self->read == NULL) {
1186 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1187 PyErr_SetString(PyExc_TypeError,
1188 "file must have 'read' and 'readline' attributes");
1189 Py_CLEAR(self->read);
1190 Py_CLEAR(self->readline);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001191 Py_CLEAR(self->peek);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001192 return -1;
1193 }
1194 return 0;
1195}
1196
1197/* Returns -1 (with an exception set) on failure, 0 on success. This may
1198 be called once on a freshly created Pickler. */
1199static int
1200_Unpickler_SetInputEncoding(UnpicklerObject *self,
1201 const char *encoding,
1202 const char *errors)
1203{
1204 if (encoding == NULL)
1205 encoding = "ASCII";
1206 if (errors == NULL)
1207 errors = "strict";
1208
1209 self->encoding = strdup(encoding);
1210 self->errors = strdup(errors);
1211 if (self->encoding == NULL || self->errors == NULL) {
1212 PyErr_NoMemory();
1213 return -1;
1214 }
1215 return 0;
1216}
1217
1218/* Generate a GET opcode for an object stored in the memo. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001219static int
1220memo_get(PicklerObject *self, PyObject *key)
1221{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001222 Py_ssize_t *value;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001223 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001224 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001225
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001226 value = PyMemoTable_Get(self->memo, key);
1227 if (value == NULL) {
1228 PyErr_SetObject(PyExc_KeyError, key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001229 return -1;
1230 }
1231
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001232 if (!self->bin) {
1233 pdata[0] = GET;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001234 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1235 "%" PY_FORMAT_SIZE_T "d\n", *value);
1236 len = strlen(pdata);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001237 }
1238 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001239 if (*value < 256) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001240 pdata[0] = BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001241 pdata[1] = (unsigned char)(*value & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001242 len = 2;
1243 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001244 else if (*value <= 0xffffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001245 pdata[0] = LONG_BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001246 pdata[1] = (unsigned char)(*value & 0xff);
1247 pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1248 pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1249 pdata[4] = (unsigned char)((*value >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001250 len = 5;
1251 }
1252 else { /* unlikely */
1253 PyErr_SetString(PicklingError,
1254 "memo id too large for LONG_BINGET");
1255 return -1;
1256 }
1257 }
1258
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001259 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001260 return -1;
1261
1262 return 0;
1263}
1264
1265/* Store an object in the memo, assign it a new unique ID based on the number
1266 of objects currently stored in the memo and generate a PUT opcode. */
1267static int
1268memo_put(PicklerObject *self, PyObject *obj)
1269{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001270 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001271 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001272 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001273 int status = 0;
1274
1275 if (self->fast)
1276 return 0;
1277
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001278 x = PyMemoTable_Size(self->memo);
1279 if (PyMemoTable_Set(self->memo, obj, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001280 goto error;
1281
1282 if (!self->bin) {
1283 pdata[0] = PUT;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001284 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1285 "%" PY_FORMAT_SIZE_T "d\n", x);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001286 len = strlen(pdata);
1287 }
1288 else {
1289 if (x < 256) {
1290 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00001291 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001292 len = 2;
1293 }
1294 else if (x <= 0xffffffffL) {
1295 pdata[0] = LONG_BINPUT;
1296 pdata[1] = (unsigned char)(x & 0xff);
1297 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1298 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1299 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1300 len = 5;
1301 }
1302 else { /* unlikely */
1303 PyErr_SetString(PicklingError,
1304 "memo id too large for LONG_BINPUT");
1305 return -1;
1306 }
1307 }
1308
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001309 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001310 goto error;
1311
1312 if (0) {
1313 error:
1314 status = -1;
1315 }
1316
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001317 return status;
1318}
1319
1320static PyObject *
1321whichmodule(PyObject *global, PyObject *global_name)
1322{
1323 Py_ssize_t i, j;
1324 static PyObject *module_str = NULL;
1325 static PyObject *main_str = NULL;
1326 PyObject *module_name;
1327 PyObject *modules_dict;
1328 PyObject *module;
1329 PyObject *obj;
1330
1331 if (module_str == NULL) {
1332 module_str = PyUnicode_InternFromString("__module__");
1333 if (module_str == NULL)
1334 return NULL;
1335 main_str = PyUnicode_InternFromString("__main__");
1336 if (main_str == NULL)
1337 return NULL;
1338 }
1339
1340 module_name = PyObject_GetAttr(global, module_str);
1341
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00001342 /* In some rare cases (e.g., bound methods of extension types),
1343 __module__ can be None. If it is so, then search sys.modules
1344 for the module of global. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001345 if (module_name == Py_None) {
1346 Py_DECREF(module_name);
1347 goto search;
1348 }
1349
1350 if (module_name) {
1351 return module_name;
1352 }
1353 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1354 PyErr_Clear();
1355 else
1356 return NULL;
1357
1358 search:
1359 modules_dict = PySys_GetObject("modules");
1360 if (modules_dict == NULL)
1361 return NULL;
1362
1363 i = 0;
1364 module_name = NULL;
1365 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
Mark Dickinson211c6252009-02-01 10:28:51 +00001366 if (PyObject_RichCompareBool(module_name, main_str, Py_EQ) == 1)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001367 continue;
1368
1369 obj = PyObject_GetAttr(module, global_name);
1370 if (obj == NULL) {
1371 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1372 PyErr_Clear();
1373 else
1374 return NULL;
1375 continue;
1376 }
1377
1378 if (obj != global) {
1379 Py_DECREF(obj);
1380 continue;
1381 }
1382
1383 Py_DECREF(obj);
1384 break;
1385 }
1386
1387 /* If no module is found, use __main__. */
1388 if (!j) {
1389 module_name = main_str;
1390 }
1391
1392 Py_INCREF(module_name);
1393 return module_name;
1394}
1395
1396/* fast_save_enter() and fast_save_leave() are guards against recursive
1397 objects when Pickler is used with the "fast mode" (i.e., with object
1398 memoization disabled). If the nesting of a list or dict object exceed
1399 FAST_NESTING_LIMIT, these guards will start keeping an internal
1400 reference to the seen list or dict objects and check whether these objects
1401 are recursive. These are not strictly necessary, since save() has a
1402 hard-coded recursion limit, but they give a nicer error message than the
1403 typical RuntimeError. */
1404static int
1405fast_save_enter(PicklerObject *self, PyObject *obj)
1406{
1407 /* if fast_nesting < 0, we're doing an error exit. */
1408 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1409 PyObject *key = NULL;
1410 if (self->fast_memo == NULL) {
1411 self->fast_memo = PyDict_New();
1412 if (self->fast_memo == NULL) {
1413 self->fast_nesting = -1;
1414 return 0;
1415 }
1416 }
1417 key = PyLong_FromVoidPtr(obj);
1418 if (key == NULL)
1419 return 0;
1420 if (PyDict_GetItem(self->fast_memo, key)) {
1421 Py_DECREF(key);
1422 PyErr_Format(PyExc_ValueError,
1423 "fast mode: can't pickle cyclic objects "
1424 "including object type %.200s at %p",
1425 obj->ob_type->tp_name, obj);
1426 self->fast_nesting = -1;
1427 return 0;
1428 }
1429 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
1430 Py_DECREF(key);
1431 self->fast_nesting = -1;
1432 return 0;
1433 }
1434 Py_DECREF(key);
1435 }
1436 return 1;
1437}
1438
1439static int
1440fast_save_leave(PicklerObject *self, PyObject *obj)
1441{
1442 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
1443 PyObject *key = PyLong_FromVoidPtr(obj);
1444 if (key == NULL)
1445 return 0;
1446 if (PyDict_DelItem(self->fast_memo, key) < 0) {
1447 Py_DECREF(key);
1448 return 0;
1449 }
1450 Py_DECREF(key);
1451 }
1452 return 1;
1453}
1454
1455static int
1456save_none(PicklerObject *self, PyObject *obj)
1457{
1458 const char none_op = NONE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001459 if (_Pickler_Write(self, &none_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001460 return -1;
1461
1462 return 0;
1463}
1464
1465static int
1466save_bool(PicklerObject *self, PyObject *obj)
1467{
1468 static const char *buf[2] = { FALSE, TRUE };
1469 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
1470 int p = (obj == Py_True);
1471
1472 if (self->proto >= 2) {
1473 const char bool_op = p ? NEWTRUE : NEWFALSE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001474 if (_Pickler_Write(self, &bool_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001475 return -1;
1476 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001477 else if (_Pickler_Write(self, buf[p], len[p]) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001478 return -1;
1479
1480 return 0;
1481}
1482
1483static int
1484save_int(PicklerObject *self, long x)
1485{
1486 char pdata[32];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001487 Py_ssize_t len = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001488
1489 if (!self->bin
1490#if SIZEOF_LONG > 4
1491 || x > 0x7fffffffL || x < -0x80000000L
1492#endif
1493 ) {
1494 /* Text-mode pickle, or long too big to fit in the 4-byte
1495 * signed BININT format: store as a string.
1496 */
Mark Dickinson8dd05142009-01-20 20:43:58 +00001497 pdata[0] = LONG; /* use LONG for consistency with pickle.py */
1498 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ldL\n", x);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001499 if (_Pickler_Write(self, pdata, strlen(pdata)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001500 return -1;
1501 }
1502 else {
1503 /* Binary pickle and x fits in a signed 4-byte int. */
1504 pdata[1] = (unsigned char)(x & 0xff);
1505 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1506 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1507 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1508
1509 if ((pdata[4] == 0) && (pdata[3] == 0)) {
1510 if (pdata[2] == 0) {
1511 pdata[0] = BININT1;
1512 len = 2;
1513 }
1514 else {
1515 pdata[0] = BININT2;
1516 len = 3;
1517 }
1518 }
1519 else {
1520 pdata[0] = BININT;
1521 len = 5;
1522 }
1523
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001524 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001525 return -1;
1526 }
1527
1528 return 0;
1529}
1530
1531static int
1532save_long(PicklerObject *self, PyObject *obj)
1533{
1534 PyObject *repr = NULL;
1535 Py_ssize_t size;
1536 long val = PyLong_AsLong(obj);
1537 int status = 0;
1538
1539 const char long_op = LONG;
1540
1541 if (val == -1 && PyErr_Occurred()) {
1542 /* out of range for int pickling */
1543 PyErr_Clear();
1544 }
Antoine Pitroue58bffb2011-08-13 20:40:32 +02001545 else
1546#if SIZEOF_LONG > 4
1547 if (val <= 0x7fffffffL && val >= -0x80000000L)
1548#endif
1549 return save_int(self, val);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001550
1551 if (self->proto >= 2) {
1552 /* Linear-time pickling. */
1553 size_t nbits;
1554 size_t nbytes;
1555 unsigned char *pdata;
1556 char header[5];
1557 int i;
1558 int sign = _PyLong_Sign(obj);
1559
1560 if (sign == 0) {
1561 header[0] = LONG1;
1562 header[1] = 0; /* It's 0 -- an empty bytestring. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001563 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001564 goto error;
1565 return 0;
1566 }
1567 nbits = _PyLong_NumBits(obj);
1568 if (nbits == (size_t)-1 && PyErr_Occurred())
1569 goto error;
1570 /* How many bytes do we need? There are nbits >> 3 full
1571 * bytes of data, and nbits & 7 leftover bits. If there
1572 * are any leftover bits, then we clearly need another
1573 * byte. Wnat's not so obvious is that we *probably*
1574 * need another byte even if there aren't any leftovers:
1575 * the most-significant bit of the most-significant byte
1576 * acts like a sign bit, and it's usually got a sense
1577 * opposite of the one we need. The exception is longs
1578 * of the form -(2**(8*j-1)) for j > 0. Such a long is
1579 * its own 256's-complement, so has the right sign bit
1580 * even without the extra byte. That's a pain to check
1581 * for in advance, though, so we always grab an extra
1582 * byte at the start, and cut it back later if possible.
1583 */
1584 nbytes = (nbits >> 3) + 1;
1585 if (nbytes > INT_MAX) {
1586 PyErr_SetString(PyExc_OverflowError,
1587 "long too large to pickle");
1588 goto error;
1589 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001590 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001591 if (repr == NULL)
1592 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001593 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001594 i = _PyLong_AsByteArray((PyLongObject *)obj,
1595 pdata, nbytes,
1596 1 /* little endian */ , 1 /* signed */ );
1597 if (i < 0)
1598 goto error;
1599 /* If the long is negative, this may be a byte more than
1600 * needed. This is so iff the MSB is all redundant sign
1601 * bits.
1602 */
1603 if (sign < 0 &&
Victor Stinner121aab42011-09-29 23:40:53 +02001604 nbytes > 1 &&
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001605 pdata[nbytes - 1] == 0xff &&
1606 (pdata[nbytes - 2] & 0x80) != 0) {
1607 nbytes--;
1608 }
1609
1610 if (nbytes < 256) {
1611 header[0] = LONG1;
1612 header[1] = (unsigned char)nbytes;
1613 size = 2;
1614 }
1615 else {
1616 header[0] = LONG4;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001617 size = (Py_ssize_t) nbytes;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001618 for (i = 1; i < 5; i++) {
1619 header[i] = (unsigned char)(size & 0xff);
1620 size >>= 8;
1621 }
1622 size = 5;
1623 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001624 if (_Pickler_Write(self, header, size) < 0 ||
1625 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001626 goto error;
1627 }
1628 else {
1629 char *string;
1630
Mark Dickinson8dd05142009-01-20 20:43:58 +00001631 /* proto < 2: write the repr and newline. This is quadratic-time (in
1632 the number of digits), in both directions. We add a trailing 'L'
1633 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001634
1635 repr = PyObject_Repr(obj);
1636 if (repr == NULL)
1637 goto error;
1638
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001639 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001640 if (string == NULL)
1641 goto error;
1642
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001643 if (_Pickler_Write(self, &long_op, 1) < 0 ||
1644 _Pickler_Write(self, string, size) < 0 ||
1645 _Pickler_Write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001646 goto error;
1647 }
1648
1649 if (0) {
1650 error:
1651 status = -1;
1652 }
1653 Py_XDECREF(repr);
1654
1655 return status;
1656}
1657
1658static int
1659save_float(PicklerObject *self, PyObject *obj)
1660{
1661 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1662
1663 if (self->bin) {
1664 char pdata[9];
1665 pdata[0] = BINFLOAT;
1666 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1667 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001668 if (_Pickler_Write(self, pdata, 9) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001669 return -1;
Victor Stinner121aab42011-09-29 23:40:53 +02001670 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001671 else {
Eric Smith0923d1d2009-04-16 20:16:10 +00001672 int result = -1;
1673 char *buf = NULL;
1674 char op = FLOAT;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001675
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001676 if (_Pickler_Write(self, &op, 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001677 goto done;
1678
Mark Dickinson3e09f432009-04-17 08:41:23 +00001679 buf = PyOS_double_to_string(x, 'g', 17, 0, NULL);
Eric Smith0923d1d2009-04-16 20:16:10 +00001680 if (!buf) {
1681 PyErr_NoMemory();
1682 goto done;
1683 }
1684
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001685 if (_Pickler_Write(self, buf, strlen(buf)) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001686 goto done;
1687
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001688 if (_Pickler_Write(self, "\n", 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001689 goto done;
1690
1691 result = 0;
1692done:
1693 PyMem_Free(buf);
1694 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001695 }
1696
1697 return 0;
1698}
1699
1700static int
1701save_bytes(PicklerObject *self, PyObject *obj)
1702{
1703 if (self->proto < 3) {
1704 /* Older pickle protocols do not have an opcode for pickling bytes
1705 objects. Therefore, we need to fake the copy protocol (i.e.,
1706 the __reduce__ method) to permit bytes object unpickling. */
1707 PyObject *reduce_value = NULL;
1708 PyObject *bytelist = NULL;
1709 int status;
1710
1711 bytelist = PySequence_List(obj);
1712 if (bytelist == NULL)
1713 return -1;
1714
1715 reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
1716 bytelist);
1717 if (reduce_value == NULL) {
1718 Py_DECREF(bytelist);
1719 return -1;
1720 }
1721
1722 /* save_reduce() will memoize the object automatically. */
1723 status = save_reduce(self, reduce_value, obj);
1724 Py_DECREF(reduce_value);
1725 Py_DECREF(bytelist);
1726 return status;
1727 }
1728 else {
1729 Py_ssize_t size;
1730 char header[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001731 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001732
1733 size = PyBytes_Size(obj);
1734 if (size < 0)
1735 return -1;
1736
1737 if (size < 256) {
1738 header[0] = SHORT_BINBYTES;
1739 header[1] = (unsigned char)size;
1740 len = 2;
1741 }
1742 else if (size <= 0xffffffffL) {
1743 header[0] = BINBYTES;
1744 header[1] = (unsigned char)(size & 0xff);
1745 header[2] = (unsigned char)((size >> 8) & 0xff);
1746 header[3] = (unsigned char)((size >> 16) & 0xff);
1747 header[4] = (unsigned char)((size >> 24) & 0xff);
1748 len = 5;
1749 }
1750 else {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001751 PyErr_SetString(PyExc_OverflowError,
1752 "cannot serialize a bytes object larger than 4GB");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001753 return -1; /* string too large */
1754 }
1755
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001756 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001757 return -1;
1758
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001759 if (_Pickler_Write(self, PyBytes_AS_STRING(obj), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001760 return -1;
1761
1762 if (memo_put(self, obj) < 0)
1763 return -1;
1764
1765 return 0;
1766 }
1767}
1768
1769/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1770 backslash and newline characters to \uXXXX escapes. */
1771static PyObject *
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001772raw_unicode_escape(PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001773{
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001774 static const char *hexdigits = "0123456789abcdef";
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001775 PyObject *repr, *result;
1776 char *p;
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001777 Py_ssize_t i, size, expandsize;
1778 void *data;
1779 unsigned int kind;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001780
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001781 if (PyUnicode_READY(obj))
1782 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001783
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001784 size = PyUnicode_GET_LENGTH(obj);
1785 data = PyUnicode_DATA(obj);
1786 kind = PyUnicode_KIND(obj);
1787 if (kind == PyUnicode_4BYTE_KIND)
1788 expandsize = 10;
1789 else
1790 expandsize = 6;
Victor Stinner121aab42011-09-29 23:40:53 +02001791
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001792 if (size > PY_SSIZE_T_MAX / expandsize)
1793 return PyErr_NoMemory();
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001794 repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001795 if (repr == NULL)
1796 return NULL;
1797 if (size == 0)
1798 goto done;
1799
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001800 p = PyByteArray_AS_STRING(repr);
1801 for (i=0; i < size; i++) {
1802 Py_UCS4 ch = PyUnicode_READ(kind, data, i);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001803 /* Map 32-bit characters to '\Uxxxxxxxx' */
1804 if (ch >= 0x10000) {
1805 *p++ = '\\';
1806 *p++ = 'U';
1807 *p++ = hexdigits[(ch >> 28) & 0xf];
1808 *p++ = hexdigits[(ch >> 24) & 0xf];
1809 *p++ = hexdigits[(ch >> 20) & 0xf];
1810 *p++ = hexdigits[(ch >> 16) & 0xf];
1811 *p++ = hexdigits[(ch >> 12) & 0xf];
1812 *p++ = hexdigits[(ch >> 8) & 0xf];
1813 *p++ = hexdigits[(ch >> 4) & 0xf];
1814 *p++ = hexdigits[ch & 15];
1815 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001816 /* Map 16-bit characters to '\uxxxx' */
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001817 else if (ch >= 256 || ch == '\\' || ch == '\n') {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001818 *p++ = '\\';
1819 *p++ = 'u';
1820 *p++ = hexdigits[(ch >> 12) & 0xf];
1821 *p++ = hexdigits[(ch >> 8) & 0xf];
1822 *p++ = hexdigits[(ch >> 4) & 0xf];
1823 *p++ = hexdigits[ch & 15];
1824 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001825 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001826 else
1827 *p++ = (char) ch;
1828 }
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001829 size = p - PyByteArray_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001830
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001831done:
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001832 result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001833 Py_DECREF(repr);
1834 return result;
1835}
1836
1837static int
1838save_unicode(PicklerObject *self, PyObject *obj)
1839{
1840 Py_ssize_t size;
1841 PyObject *encoded = NULL;
1842
1843 if (self->bin) {
1844 char pdata[5];
1845
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001846 encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001847 if (encoded == NULL)
1848 goto error;
1849
1850 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001851 if (size > 0xffffffffL) {
1852 PyErr_SetString(PyExc_OverflowError,
1853 "cannot serialize a string larger than 4GB");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001854 goto error; /* string too large */
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001855 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001856
1857 pdata[0] = BINUNICODE;
1858 pdata[1] = (unsigned char)(size & 0xff);
1859 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1860 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1861 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1862
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001863 if (_Pickler_Write(self, pdata, 5) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001864 goto error;
1865
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001866 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001867 goto error;
1868 }
1869 else {
1870 const char unicode_op = UNICODE;
1871
Victor Stinnerc806fdc2011-09-29 23:50:23 +02001872 encoded = raw_unicode_escape(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001873 if (encoded == NULL)
1874 goto error;
1875
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001876 if (_Pickler_Write(self, &unicode_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001877 goto error;
1878
1879 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001880 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001881 goto error;
1882
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001883 if (_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001884 goto error;
1885 }
1886 if (memo_put(self, obj) < 0)
1887 goto error;
1888
1889 Py_DECREF(encoded);
1890 return 0;
1891
1892 error:
1893 Py_XDECREF(encoded);
1894 return -1;
1895}
1896
1897/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1898static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001899store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001900{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001901 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001902
1903 assert(PyTuple_Size(t) == len);
1904
1905 for (i = 0; i < len; i++) {
1906 PyObject *element = PyTuple_GET_ITEM(t, i);
1907
1908 if (element == NULL)
1909 return -1;
1910 if (save(self, element, 0) < 0)
1911 return -1;
1912 }
1913
1914 return 0;
1915}
1916
1917/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1918 * used across protocols to minimize the space needed to pickle them.
1919 * Tuples are also the only builtin immutable type that can be recursive
1920 * (a tuple can be reached from itself), and that requires some subtle
1921 * magic so that it works in all cases. IOW, this is a long routine.
1922 */
1923static int
1924save_tuple(PicklerObject *self, PyObject *obj)
1925{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001926 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001927
1928 const char mark_op = MARK;
1929 const char tuple_op = TUPLE;
1930 const char pop_op = POP;
1931 const char pop_mark_op = POP_MARK;
1932 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
1933
1934 if ((len = PyTuple_Size(obj)) < 0)
1935 return -1;
1936
1937 if (len == 0) {
1938 char pdata[2];
1939
1940 if (self->proto) {
1941 pdata[0] = EMPTY_TUPLE;
1942 len = 1;
1943 }
1944 else {
1945 pdata[0] = MARK;
1946 pdata[1] = TUPLE;
1947 len = 2;
1948 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001949 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001950 return -1;
1951 return 0;
1952 }
1953
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001954 /* The tuple isn't in the memo now. If it shows up there after
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001955 * saving the tuple elements, the tuple must be recursive, in
1956 * which case we'll pop everything we put on the stack, and fetch
1957 * its value from the memo.
1958 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001959 if (len <= 3 && self->proto >= 2) {
1960 /* Use TUPLE{1,2,3} opcodes. */
1961 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001962 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001963
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001964 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001965 /* pop the len elements */
1966 for (i = 0; i < len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001967 if (_Pickler_Write(self, &pop_op, 1) < 0)
1968 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001969 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001970 if (memo_get(self, obj) < 0)
1971 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001972
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001973 return 0;
1974 }
1975 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001976 if (_Pickler_Write(self, len2opcode + len, 1) < 0)
1977 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001978 }
1979 goto memoize;
1980 }
1981
1982 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
1983 * Generate MARK e1 e2 ... TUPLE
1984 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001985 if (_Pickler_Write(self, &mark_op, 1) < 0)
1986 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001987
1988 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001989 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001990
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001991 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001992 /* pop the stack stuff we pushed */
1993 if (self->bin) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001994 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
1995 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001996 }
1997 else {
1998 /* Note that we pop one more than len, to remove
1999 * the MARK too.
2000 */
2001 for (i = 0; i <= len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002002 if (_Pickler_Write(self, &pop_op, 1) < 0)
2003 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002004 }
2005 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002006 if (memo_get(self, obj) < 0)
2007 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002008
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002009 return 0;
2010 }
2011 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002012 if (_Pickler_Write(self, &tuple_op, 1) < 0)
2013 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002014 }
2015
2016 memoize:
2017 if (memo_put(self, obj) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002018 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002019
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002020 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002021}
2022
2023/* iter is an iterator giving items, and we batch up chunks of
2024 * MARK item item ... item APPENDS
2025 * opcode sequences. Calling code should have arranged to first create an
2026 * empty list, or list-like object, for the APPENDS to operate on.
2027 * Returns 0 on success, <0 on error.
2028 */
2029static int
2030batch_list(PicklerObject *self, PyObject *iter)
2031{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002032 PyObject *obj = NULL;
2033 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002034 int i, n;
2035
2036 const char mark_op = MARK;
2037 const char append_op = APPEND;
2038 const char appends_op = APPENDS;
2039
2040 assert(iter != NULL);
2041
2042 /* XXX: I think this function could be made faster by avoiding the
2043 iterator interface and fetching objects directly from list using
2044 PyList_GET_ITEM.
2045 */
2046
2047 if (self->proto == 0) {
2048 /* APPENDS isn't available; do one at a time. */
2049 for (;;) {
2050 obj = PyIter_Next(iter);
2051 if (obj == NULL) {
2052 if (PyErr_Occurred())
2053 return -1;
2054 break;
2055 }
2056 i = save(self, obj, 0);
2057 Py_DECREF(obj);
2058 if (i < 0)
2059 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002060 if (_Pickler_Write(self, &append_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002061 return -1;
2062 }
2063 return 0;
2064 }
2065
2066 /* proto > 0: write in batches of BATCHSIZE. */
2067 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002068 /* Get first item */
2069 firstitem = PyIter_Next(iter);
2070 if (firstitem == NULL) {
2071 if (PyErr_Occurred())
2072 goto error;
2073
2074 /* nothing more to add */
2075 break;
2076 }
2077
2078 /* Try to get a second item */
2079 obj = PyIter_Next(iter);
2080 if (obj == NULL) {
2081 if (PyErr_Occurred())
2082 goto error;
2083
2084 /* Only one item to write */
2085 if (save(self, firstitem, 0) < 0)
2086 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002087 if (_Pickler_Write(self, &append_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002088 goto error;
2089 Py_CLEAR(firstitem);
2090 break;
2091 }
2092
2093 /* More than one item to write */
2094
2095 /* Pump out MARK, items, APPENDS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002096 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002097 goto error;
2098
2099 if (save(self, firstitem, 0) < 0)
2100 goto error;
2101 Py_CLEAR(firstitem);
2102 n = 1;
2103
2104 /* Fetch and save up to BATCHSIZE items */
2105 while (obj) {
2106 if (save(self, obj, 0) < 0)
2107 goto error;
2108 Py_CLEAR(obj);
2109 n += 1;
2110
2111 if (n == BATCHSIZE)
2112 break;
2113
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002114 obj = PyIter_Next(iter);
2115 if (obj == NULL) {
2116 if (PyErr_Occurred())
2117 goto error;
2118 break;
2119 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002120 }
2121
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002122 if (_Pickler_Write(self, &appends_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002123 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002124
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002125 } while (n == BATCHSIZE);
2126 return 0;
2127
2128 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002129 Py_XDECREF(firstitem);
2130 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002131 return -1;
2132}
2133
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002134/* This is a variant of batch_list() above, specialized for lists (with no
2135 * support for list subclasses). Like batch_list(), we batch up chunks of
2136 * MARK item item ... item APPENDS
2137 * opcode sequences. Calling code should have arranged to first create an
2138 * empty list, or list-like object, for the APPENDS to operate on.
2139 * Returns 0 on success, -1 on error.
2140 *
2141 * This version is considerably faster than batch_list(), if less general.
2142 *
2143 * Note that this only works for protocols > 0.
2144 */
2145static int
2146batch_list_exact(PicklerObject *self, PyObject *obj)
2147{
2148 PyObject *item = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002149 Py_ssize_t this_batch, total;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002150
2151 const char append_op = APPEND;
2152 const char appends_op = APPENDS;
2153 const char mark_op = MARK;
2154
2155 assert(obj != NULL);
2156 assert(self->proto > 0);
2157 assert(PyList_CheckExact(obj));
2158
2159 if (PyList_GET_SIZE(obj) == 1) {
2160 item = PyList_GET_ITEM(obj, 0);
2161 if (save(self, item, 0) < 0)
2162 return -1;
2163 if (_Pickler_Write(self, &append_op, 1) < 0)
2164 return -1;
2165 return 0;
2166 }
2167
2168 /* Write in batches of BATCHSIZE. */
2169 total = 0;
2170 do {
2171 this_batch = 0;
2172 if (_Pickler_Write(self, &mark_op, 1) < 0)
2173 return -1;
2174 while (total < PyList_GET_SIZE(obj)) {
2175 item = PyList_GET_ITEM(obj, total);
2176 if (save(self, item, 0) < 0)
2177 return -1;
2178 total++;
2179 if (++this_batch == BATCHSIZE)
2180 break;
2181 }
2182 if (_Pickler_Write(self, &appends_op, 1) < 0)
2183 return -1;
2184
2185 } while (total < PyList_GET_SIZE(obj));
2186
2187 return 0;
2188}
2189
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002190static int
2191save_list(PicklerObject *self, PyObject *obj)
2192{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002193 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002194 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002195 int status = 0;
2196
2197 if (self->fast && !fast_save_enter(self, obj))
2198 goto error;
2199
2200 /* Create an empty list. */
2201 if (self->bin) {
2202 header[0] = EMPTY_LIST;
2203 len = 1;
2204 }
2205 else {
2206 header[0] = MARK;
2207 header[1] = LIST;
2208 len = 2;
2209 }
2210
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002211 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002212 goto error;
2213
2214 /* Get list length, and bow out early if empty. */
2215 if ((len = PyList_Size(obj)) < 0)
2216 goto error;
2217
2218 if (memo_put(self, obj) < 0)
2219 goto error;
2220
2221 if (len != 0) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002222 /* Materialize the list elements. */
2223 if (PyList_CheckExact(obj) && self->proto > 0) {
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002224 if (Py_EnterRecursiveCall(" while pickling an object"))
2225 goto error;
2226 status = batch_list_exact(self, obj);
2227 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002228 } else {
2229 PyObject *iter = PyObject_GetIter(obj);
2230 if (iter == NULL)
2231 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002232
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002233 if (Py_EnterRecursiveCall(" while pickling an object")) {
2234 Py_DECREF(iter);
2235 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002236 }
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002237 status = batch_list(self, iter);
2238 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002239 Py_DECREF(iter);
2240 }
2241 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002242 if (0) {
2243 error:
2244 status = -1;
2245 }
2246
2247 if (self->fast && !fast_save_leave(self, obj))
2248 status = -1;
2249
2250 return status;
2251}
2252
2253/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
2254 * MARK key value ... key value SETITEMS
2255 * opcode sequences. Calling code should have arranged to first create an
2256 * empty dict, or dict-like object, for the SETITEMS to operate on.
2257 * Returns 0 on success, <0 on error.
2258 *
2259 * This is very much like batch_list(). The difference between saving
2260 * elements directly, and picking apart two-tuples, is so long-winded at
2261 * the C level, though, that attempts to combine these routines were too
2262 * ugly to bear.
2263 */
2264static int
2265batch_dict(PicklerObject *self, PyObject *iter)
2266{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002267 PyObject *obj = NULL;
2268 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002269 int i, n;
2270
2271 const char mark_op = MARK;
2272 const char setitem_op = SETITEM;
2273 const char setitems_op = SETITEMS;
2274
2275 assert(iter != NULL);
2276
2277 if (self->proto == 0) {
2278 /* SETITEMS isn't available; do one at a time. */
2279 for (;;) {
2280 obj = PyIter_Next(iter);
2281 if (obj == NULL) {
2282 if (PyErr_Occurred())
2283 return -1;
2284 break;
2285 }
2286 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2287 PyErr_SetString(PyExc_TypeError, "dict items "
2288 "iterator must return 2-tuples");
2289 return -1;
2290 }
2291 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
2292 if (i >= 0)
2293 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
2294 Py_DECREF(obj);
2295 if (i < 0)
2296 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002297 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002298 return -1;
2299 }
2300 return 0;
2301 }
2302
2303 /* proto > 0: write in batches of BATCHSIZE. */
2304 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002305 /* Get first item */
2306 firstitem = PyIter_Next(iter);
2307 if (firstitem == NULL) {
2308 if (PyErr_Occurred())
2309 goto error;
2310
2311 /* nothing more to add */
2312 break;
2313 }
2314 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
2315 PyErr_SetString(PyExc_TypeError, "dict items "
2316 "iterator must return 2-tuples");
2317 goto error;
2318 }
2319
2320 /* Try to get a second item */
2321 obj = PyIter_Next(iter);
2322 if (obj == NULL) {
2323 if (PyErr_Occurred())
2324 goto error;
2325
2326 /* Only one item to write */
2327 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2328 goto error;
2329 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2330 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002331 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002332 goto error;
2333 Py_CLEAR(firstitem);
2334 break;
2335 }
2336
2337 /* More than one item to write */
2338
2339 /* Pump out MARK, items, SETITEMS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002340 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002341 goto error;
2342
2343 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2344 goto error;
2345 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2346 goto error;
2347 Py_CLEAR(firstitem);
2348 n = 1;
2349
2350 /* Fetch and save up to BATCHSIZE items */
2351 while (obj) {
2352 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2353 PyErr_SetString(PyExc_TypeError, "dict items "
2354 "iterator must return 2-tuples");
2355 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002356 }
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002357 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
2358 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
2359 goto error;
2360 Py_CLEAR(obj);
2361 n += 1;
2362
2363 if (n == BATCHSIZE)
2364 break;
2365
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002366 obj = PyIter_Next(iter);
2367 if (obj == NULL) {
2368 if (PyErr_Occurred())
2369 goto error;
2370 break;
2371 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002372 }
2373
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002374 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002375 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002376
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002377 } while (n == BATCHSIZE);
2378 return 0;
2379
2380 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002381 Py_XDECREF(firstitem);
2382 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002383 return -1;
2384}
2385
Collin Winter5c9b02d2009-05-25 05:43:30 +00002386/* This is a variant of batch_dict() above that specializes for dicts, with no
2387 * support for dict subclasses. Like batch_dict(), we batch up chunks of
2388 * MARK key value ... key value SETITEMS
2389 * opcode sequences. Calling code should have arranged to first create an
2390 * empty dict, or dict-like object, for the SETITEMS to operate on.
2391 * Returns 0 on success, -1 on error.
2392 *
2393 * Note that this currently doesn't work for protocol 0.
2394 */
2395static int
2396batch_dict_exact(PicklerObject *self, PyObject *obj)
2397{
2398 PyObject *key = NULL, *value = NULL;
2399 int i;
2400 Py_ssize_t dict_size, ppos = 0;
2401
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00002402 const char mark_op = MARK;
2403 const char setitem_op = SETITEM;
2404 const char setitems_op = SETITEMS;
Collin Winter5c9b02d2009-05-25 05:43:30 +00002405
2406 assert(obj != NULL);
2407 assert(self->proto > 0);
2408
2409 dict_size = PyDict_Size(obj);
2410
2411 /* Special-case len(d) == 1 to save space. */
2412 if (dict_size == 1) {
2413 PyDict_Next(obj, &ppos, &key, &value);
2414 if (save(self, key, 0) < 0)
2415 return -1;
2416 if (save(self, value, 0) < 0)
2417 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002418 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002419 return -1;
2420 return 0;
2421 }
2422
2423 /* Write in batches of BATCHSIZE. */
2424 do {
2425 i = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002426 if (_Pickler_Write(self, &mark_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002427 return -1;
2428 while (PyDict_Next(obj, &ppos, &key, &value)) {
2429 if (save(self, key, 0) < 0)
2430 return -1;
2431 if (save(self, value, 0) < 0)
2432 return -1;
2433 if (++i == BATCHSIZE)
2434 break;
2435 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002436 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002437 return -1;
2438 if (PyDict_Size(obj) != dict_size) {
2439 PyErr_Format(
2440 PyExc_RuntimeError,
2441 "dictionary changed size during iteration");
2442 return -1;
2443 }
2444
2445 } while (i == BATCHSIZE);
2446 return 0;
2447}
2448
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002449static int
2450save_dict(PicklerObject *self, PyObject *obj)
2451{
2452 PyObject *items, *iter;
2453 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002454 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002455 int status = 0;
2456
2457 if (self->fast && !fast_save_enter(self, obj))
2458 goto error;
2459
2460 /* Create an empty dict. */
2461 if (self->bin) {
2462 header[0] = EMPTY_DICT;
2463 len = 1;
2464 }
2465 else {
2466 header[0] = MARK;
2467 header[1] = DICT;
2468 len = 2;
2469 }
2470
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002471 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002472 goto error;
2473
2474 /* Get dict size, and bow out early if empty. */
2475 if ((len = PyDict_Size(obj)) < 0)
2476 goto error;
2477
2478 if (memo_put(self, obj) < 0)
2479 goto error;
2480
2481 if (len != 0) {
2482 /* Save the dict items. */
Collin Winter5c9b02d2009-05-25 05:43:30 +00002483 if (PyDict_CheckExact(obj) && self->proto > 0) {
2484 /* We can take certain shortcuts if we know this is a dict and
2485 not a dict subclass. */
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002486 if (Py_EnterRecursiveCall(" while pickling an object"))
2487 goto error;
2488 status = batch_dict_exact(self, obj);
2489 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002490 } else {
2491 items = PyObject_CallMethod(obj, "items", "()");
2492 if (items == NULL)
2493 goto error;
2494 iter = PyObject_GetIter(items);
2495 Py_DECREF(items);
2496 if (iter == NULL)
2497 goto error;
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002498 if (Py_EnterRecursiveCall(" while pickling an object")) {
2499 Py_DECREF(iter);
2500 goto error;
2501 }
Collin Winter5c9b02d2009-05-25 05:43:30 +00002502 status = batch_dict(self, iter);
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002503 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002504 Py_DECREF(iter);
2505 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002506 }
2507
2508 if (0) {
2509 error:
2510 status = -1;
2511 }
2512
2513 if (self->fast && !fast_save_leave(self, obj))
2514 status = -1;
2515
2516 return status;
2517}
2518
2519static int
2520save_global(PicklerObject *self, PyObject *obj, PyObject *name)
2521{
2522 static PyObject *name_str = NULL;
2523 PyObject *global_name = NULL;
2524 PyObject *module_name = NULL;
2525 PyObject *module = NULL;
2526 PyObject *cls;
2527 int status = 0;
2528
2529 const char global_op = GLOBAL;
2530
2531 if (name_str == NULL) {
2532 name_str = PyUnicode_InternFromString("__name__");
2533 if (name_str == NULL)
2534 goto error;
2535 }
2536
2537 if (name) {
2538 global_name = name;
2539 Py_INCREF(global_name);
2540 }
2541 else {
2542 global_name = PyObject_GetAttr(obj, name_str);
2543 if (global_name == NULL)
2544 goto error;
2545 }
2546
2547 module_name = whichmodule(obj, global_name);
2548 if (module_name == NULL)
2549 goto error;
2550
2551 /* XXX: Change to use the import C API directly with level=0 to disallow
2552 relative imports.
2553
2554 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
2555 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
2556 custom import functions (IMHO, this would be a nice security
2557 feature). The import C API would need to be extended to support the
2558 extra parameters of __import__ to fix that. */
2559 module = PyImport_Import(module_name);
2560 if (module == NULL) {
2561 PyErr_Format(PicklingError,
2562 "Can't pickle %R: import of module %R failed",
2563 obj, module_name);
2564 goto error;
2565 }
2566 cls = PyObject_GetAttr(module, global_name);
2567 if (cls == NULL) {
2568 PyErr_Format(PicklingError,
2569 "Can't pickle %R: attribute lookup %S.%S failed",
2570 obj, module_name, global_name);
2571 goto error;
2572 }
2573 if (cls != obj) {
2574 Py_DECREF(cls);
2575 PyErr_Format(PicklingError,
2576 "Can't pickle %R: it's not the same object as %S.%S",
2577 obj, module_name, global_name);
2578 goto error;
2579 }
2580 Py_DECREF(cls);
2581
2582 if (self->proto >= 2) {
2583 /* See whether this is in the extension registry, and if
2584 * so generate an EXT opcode.
2585 */
2586 PyObject *code_obj; /* extension code as Python object */
2587 long code; /* extension code as C value */
2588 char pdata[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002589 Py_ssize_t n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002590
2591 PyTuple_SET_ITEM(two_tuple, 0, module_name);
2592 PyTuple_SET_ITEM(two_tuple, 1, global_name);
2593 code_obj = PyDict_GetItem(extension_registry, two_tuple);
2594 /* The object is not registered in the extension registry.
2595 This is the most likely code path. */
2596 if (code_obj == NULL)
2597 goto gen_global;
2598
2599 /* XXX: pickle.py doesn't check neither the type, nor the range
2600 of the value returned by the extension_registry. It should for
2601 consistency. */
2602
2603 /* Verify code_obj has the right type and value. */
2604 if (!PyLong_Check(code_obj)) {
2605 PyErr_Format(PicklingError,
2606 "Can't pickle %R: extension code %R isn't an integer",
2607 obj, code_obj);
2608 goto error;
2609 }
2610 code = PyLong_AS_LONG(code_obj);
2611 if (code <= 0 || code > 0x7fffffffL) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002612 if (!PyErr_Occurred())
2613 PyErr_Format(PicklingError,
2614 "Can't pickle %R: extension code %ld is out of range",
2615 obj, code);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002616 goto error;
2617 }
2618
2619 /* Generate an EXT opcode. */
2620 if (code <= 0xff) {
2621 pdata[0] = EXT1;
2622 pdata[1] = (unsigned char)code;
2623 n = 2;
2624 }
2625 else if (code <= 0xffff) {
2626 pdata[0] = EXT2;
2627 pdata[1] = (unsigned char)(code & 0xff);
2628 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2629 n = 3;
2630 }
2631 else {
2632 pdata[0] = EXT4;
2633 pdata[1] = (unsigned char)(code & 0xff);
2634 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2635 pdata[3] = (unsigned char)((code >> 16) & 0xff);
2636 pdata[4] = (unsigned char)((code >> 24) & 0xff);
2637 n = 5;
2638 }
2639
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002640 if (_Pickler_Write(self, pdata, n) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002641 goto error;
2642 }
2643 else {
2644 /* Generate a normal global opcode if we are using a pickle
2645 protocol <= 2, or if the object is not registered in the
2646 extension registry. */
2647 PyObject *encoded;
2648 PyObject *(*unicode_encoder)(PyObject *);
2649
2650 gen_global:
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002651 if (_Pickler_Write(self, &global_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002652 goto error;
2653
2654 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
2655 the module name and the global name using UTF-8. We do so only when
2656 we are using the pickle protocol newer than version 3. This is to
2657 ensure compatibility with older Unpickler running on Python 2.x. */
2658 if (self->proto >= 3) {
2659 unicode_encoder = PyUnicode_AsUTF8String;
2660 }
2661 else {
2662 unicode_encoder = PyUnicode_AsASCIIString;
2663 }
2664
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002665 /* For protocol < 3 and if the user didn't request against doing so,
2666 we convert module names to the old 2.x module names. */
2667 if (self->fix_imports) {
2668 PyObject *key;
2669 PyObject *item;
2670
2671 key = PyTuple_Pack(2, module_name, global_name);
2672 if (key == NULL)
2673 goto error;
2674 item = PyDict_GetItemWithError(name_mapping_3to2, key);
2675 Py_DECREF(key);
2676 if (item) {
2677 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
2678 PyErr_Format(PyExc_RuntimeError,
2679 "_compat_pickle.REVERSE_NAME_MAPPING values "
2680 "should be 2-tuples, not %.200s",
2681 Py_TYPE(item)->tp_name);
2682 goto error;
2683 }
2684 Py_CLEAR(module_name);
2685 Py_CLEAR(global_name);
2686 module_name = PyTuple_GET_ITEM(item, 0);
2687 global_name = PyTuple_GET_ITEM(item, 1);
2688 if (!PyUnicode_Check(module_name) ||
2689 !PyUnicode_Check(global_name)) {
2690 PyErr_Format(PyExc_RuntimeError,
2691 "_compat_pickle.REVERSE_NAME_MAPPING values "
2692 "should be pairs of str, not (%.200s, %.200s)",
2693 Py_TYPE(module_name)->tp_name,
2694 Py_TYPE(global_name)->tp_name);
2695 goto error;
2696 }
2697 Py_INCREF(module_name);
2698 Py_INCREF(global_name);
2699 }
2700 else if (PyErr_Occurred()) {
2701 goto error;
2702 }
2703
2704 item = PyDict_GetItemWithError(import_mapping_3to2, module_name);
2705 if (item) {
2706 if (!PyUnicode_Check(item)) {
2707 PyErr_Format(PyExc_RuntimeError,
2708 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
2709 "should be strings, not %.200s",
2710 Py_TYPE(item)->tp_name);
2711 goto error;
2712 }
2713 Py_CLEAR(module_name);
2714 module_name = item;
2715 Py_INCREF(module_name);
2716 }
2717 else if (PyErr_Occurred()) {
2718 goto error;
2719 }
2720 }
2721
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002722 /* Save the name of the module. */
2723 encoded = unicode_encoder(module_name);
2724 if (encoded == NULL) {
2725 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2726 PyErr_Format(PicklingError,
2727 "can't pickle module identifier '%S' using "
2728 "pickle protocol %i", module_name, self->proto);
2729 goto error;
2730 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002731 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002732 PyBytes_GET_SIZE(encoded)) < 0) {
2733 Py_DECREF(encoded);
2734 goto error;
2735 }
2736 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002737 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002738 goto error;
2739
2740 /* Save the name of the module. */
2741 encoded = unicode_encoder(global_name);
2742 if (encoded == NULL) {
2743 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2744 PyErr_Format(PicklingError,
2745 "can't pickle global identifier '%S' using "
2746 "pickle protocol %i", global_name, self->proto);
2747 goto error;
2748 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002749 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002750 PyBytes_GET_SIZE(encoded)) < 0) {
2751 Py_DECREF(encoded);
2752 goto error;
2753 }
2754 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002755 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002756 goto error;
2757
2758 /* Memoize the object. */
2759 if (memo_put(self, obj) < 0)
2760 goto error;
2761 }
2762
2763 if (0) {
2764 error:
2765 status = -1;
2766 }
2767 Py_XDECREF(module_name);
2768 Py_XDECREF(global_name);
2769 Py_XDECREF(module);
2770
2771 return status;
2772}
2773
2774static int
2775save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
2776{
2777 PyObject *pid = NULL;
2778 int status = 0;
2779
2780 const char persid_op = PERSID;
2781 const char binpersid_op = BINPERSID;
2782
2783 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002784 pid = _Pickler_FastCall(self, func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002785 if (pid == NULL)
2786 return -1;
2787
2788 if (pid != Py_None) {
2789 if (self->bin) {
2790 if (save(self, pid, 1) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002791 _Pickler_Write(self, &binpersid_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002792 goto error;
2793 }
2794 else {
2795 PyObject *pid_str = NULL;
2796 char *pid_ascii_bytes;
2797 Py_ssize_t size;
2798
2799 pid_str = PyObject_Str(pid);
2800 if (pid_str == NULL)
2801 goto error;
2802
2803 /* XXX: Should it check whether the persistent id only contains
2804 ASCII characters? And what if the pid contains embedded
2805 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00002806 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002807 Py_DECREF(pid_str);
2808 if (pid_ascii_bytes == NULL)
2809 goto error;
2810
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002811 if (_Pickler_Write(self, &persid_op, 1) < 0 ||
2812 _Pickler_Write(self, pid_ascii_bytes, size) < 0 ||
2813 _Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002814 goto error;
2815 }
2816 status = 1;
2817 }
2818
2819 if (0) {
2820 error:
2821 status = -1;
2822 }
2823 Py_XDECREF(pid);
2824
2825 return status;
2826}
2827
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002828static PyObject *
2829get_class(PyObject *obj)
2830{
2831 PyObject *cls;
2832 static PyObject *str_class;
2833
2834 if (str_class == NULL) {
2835 str_class = PyUnicode_InternFromString("__class__");
2836 if (str_class == NULL)
2837 return NULL;
2838 }
2839 cls = PyObject_GetAttr(obj, str_class);
2840 if (cls == NULL) {
2841 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2842 PyErr_Clear();
2843 cls = (PyObject *) Py_TYPE(obj);
2844 Py_INCREF(cls);
2845 }
2846 }
2847 return cls;
2848}
2849
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002850/* We're saving obj, and args is the 2-thru-5 tuple returned by the
2851 * appropriate __reduce__ method for obj.
2852 */
2853static int
2854save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
2855{
2856 PyObject *callable;
2857 PyObject *argtup;
2858 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002859 PyObject *listitems = Py_None;
2860 PyObject *dictitems = Py_None;
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002861 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002862
2863 int use_newobj = self->proto >= 2;
2864
2865 const char reduce_op = REDUCE;
2866 const char build_op = BUILD;
2867 const char newobj_op = NEWOBJ;
2868
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002869 size = PyTuple_Size(args);
2870 if (size < 2 || size > 5) {
2871 PyErr_SetString(PicklingError, "tuple returned by "
2872 "__reduce__ must contain 2 through 5 elements");
2873 return -1;
2874 }
2875
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002876 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
2877 &callable, &argtup, &state, &listitems, &dictitems))
2878 return -1;
2879
2880 if (!PyCallable_Check(callable)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002881 PyErr_SetString(PicklingError, "first item of the tuple "
2882 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002883 return -1;
2884 }
2885 if (!PyTuple_Check(argtup)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002886 PyErr_SetString(PicklingError, "second item of the tuple "
2887 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002888 return -1;
2889 }
2890
2891 if (state == Py_None)
2892 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002893
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002894 if (listitems == Py_None)
2895 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002896 else if (!PyIter_Check(listitems)) {
2897 PyErr_Format(PicklingError, "Fourth element of tuple"
2898 "returned by __reduce__ must be an iterator, not %s",
2899 Py_TYPE(listitems)->tp_name);
2900 return -1;
2901 }
2902
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002903 if (dictitems == Py_None)
2904 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002905 else if (!PyIter_Check(dictitems)) {
2906 PyErr_Format(PicklingError, "Fifth element of tuple"
2907 "returned by __reduce__ must be an iterator, not %s",
2908 Py_TYPE(dictitems)->tp_name);
2909 return -1;
2910 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002911
2912 /* Protocol 2 special case: if callable's name is __newobj__, use
2913 NEWOBJ. */
2914 if (use_newobj) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002915 static PyObject *newobj_str = NULL, *name_str = NULL;
2916 PyObject *name;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002917
2918 if (newobj_str == NULL) {
2919 newobj_str = PyUnicode_InternFromString("__newobj__");
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002920 name_str = PyUnicode_InternFromString("__name__");
2921 if (newobj_str == NULL || name_str == NULL)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002922 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002923 }
2924
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002925 name = PyObject_GetAttr(callable, name_str);
2926 if (name == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002927 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2928 PyErr_Clear();
2929 else
2930 return -1;
2931 use_newobj = 0;
2932 }
2933 else {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002934 use_newobj = PyUnicode_Check(name) &&
2935 PyUnicode_Compare(name, newobj_str) == 0;
2936 Py_DECREF(name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002937 }
2938 }
2939 if (use_newobj) {
2940 PyObject *cls;
2941 PyObject *newargtup;
2942 PyObject *obj_class;
2943 int p;
2944
2945 /* Sanity checks. */
2946 if (Py_SIZE(argtup) < 1) {
2947 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
2948 return -1;
2949 }
2950
2951 cls = PyTuple_GET_ITEM(argtup, 0);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002952 if (!PyType_Check(cls)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002953 PyErr_SetString(PicklingError, "args[0] from "
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002954 "__newobj__ args is not a type");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002955 return -1;
2956 }
2957
2958 if (obj != NULL) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002959 obj_class = get_class(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002960 p = obj_class != cls; /* true iff a problem */
2961 Py_DECREF(obj_class);
2962 if (p) {
2963 PyErr_SetString(PicklingError, "args[0] from "
2964 "__newobj__ args has the wrong class");
2965 return -1;
2966 }
2967 }
2968 /* XXX: These calls save() are prone to infinite recursion. Imagine
2969 what happen if the value returned by the __reduce__() method of
2970 some extension type contains another object of the same type. Ouch!
2971
2972 Here is a quick example, that I ran into, to illustrate what I
2973 mean:
2974
2975 >>> import pickle, copyreg
2976 >>> copyreg.dispatch_table.pop(complex)
2977 >>> pickle.dumps(1+2j)
2978 Traceback (most recent call last):
2979 ...
2980 RuntimeError: maximum recursion depth exceeded
2981
2982 Removing the complex class from copyreg.dispatch_table made the
2983 __reduce_ex__() method emit another complex object:
2984
2985 >>> (1+1j).__reduce_ex__(2)
2986 (<function __newobj__ at 0xb7b71c3c>,
2987 (<class 'complex'>, (1+1j)), None, None, None)
2988
2989 Thus when save() was called on newargstup (the 2nd item) recursion
2990 ensued. Of course, the bug was in the complex class which had a
2991 broken __getnewargs__() that emitted another complex object. But,
2992 the point, here, is it is quite easy to end up with a broken reduce
2993 function. */
2994
2995 /* Save the class and its __new__ arguments. */
2996 if (save(self, cls, 0) < 0)
2997 return -1;
2998
2999 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
3000 if (newargtup == NULL)
3001 return -1;
3002
3003 p = save(self, newargtup, 0);
3004 Py_DECREF(newargtup);
3005 if (p < 0)
3006 return -1;
3007
3008 /* Add NEWOBJ opcode. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003009 if (_Pickler_Write(self, &newobj_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003010 return -1;
3011 }
3012 else { /* Not using NEWOBJ. */
3013 if (save(self, callable, 0) < 0 ||
3014 save(self, argtup, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003015 _Pickler_Write(self, &reduce_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003016 return -1;
3017 }
3018
3019 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
3020 the caller do not want to memoize the object. Not particularly useful,
3021 but that is to mimic the behavior save_reduce() in pickle.py when
3022 obj is None. */
3023 if (obj && memo_put(self, obj) < 0)
3024 return -1;
3025
3026 if (listitems && batch_list(self, listitems) < 0)
3027 return -1;
3028
3029 if (dictitems && batch_dict(self, dictitems) < 0)
3030 return -1;
3031
3032 if (state) {
Victor Stinner121aab42011-09-29 23:40:53 +02003033 if (save(self, state, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003034 _Pickler_Write(self, &build_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003035 return -1;
3036 }
3037
3038 return 0;
3039}
3040
3041static int
3042save(PicklerObject *self, PyObject *obj, int pers_save)
3043{
3044 PyTypeObject *type;
3045 PyObject *reduce_func = NULL;
3046 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003047 int status = 0;
3048
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003049 if (Py_EnterRecursiveCall(" while pickling an object"))
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003050 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003051
3052 /* The extra pers_save argument is necessary to avoid calling save_pers()
3053 on its returned object. */
3054 if (!pers_save && self->pers_func) {
3055 /* save_pers() returns:
3056 -1 to signal an error;
3057 0 if it did nothing successfully;
3058 1 if a persistent id was saved.
3059 */
3060 if ((status = save_pers(self, obj, self->pers_func)) != 0)
3061 goto done;
3062 }
3063
3064 type = Py_TYPE(obj);
3065
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003066 /* The old cPickle had an optimization that used switch-case statement
3067 dispatching on the first letter of the type name. This has was removed
3068 since benchmarks shown that this optimization was actually slowing
3069 things down. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003070
3071 /* Atom types; these aren't memoized, so don't check the memo. */
3072
3073 if (obj == Py_None) {
3074 status = save_none(self, obj);
3075 goto done;
3076 }
3077 else if (obj == Py_False || obj == Py_True) {
3078 status = save_bool(self, obj);
3079 goto done;
3080 }
3081 else if (type == &PyLong_Type) {
3082 status = save_long(self, obj);
3083 goto done;
3084 }
3085 else if (type == &PyFloat_Type) {
3086 status = save_float(self, obj);
3087 goto done;
3088 }
3089
3090 /* Check the memo to see if it has the object. If so, generate
3091 a GET (or BINGET) opcode, instead of pickling the object
3092 once again. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003093 if (PyMemoTable_Get(self->memo, obj)) {
3094 if (memo_get(self, obj) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003095 goto error;
3096 goto done;
3097 }
3098
3099 if (type == &PyBytes_Type) {
3100 status = save_bytes(self, obj);
3101 goto done;
3102 }
3103 else if (type == &PyUnicode_Type) {
3104 status = save_unicode(self, obj);
3105 goto done;
3106 }
3107 else if (type == &PyDict_Type) {
3108 status = save_dict(self, obj);
3109 goto done;
3110 }
3111 else if (type == &PyList_Type) {
3112 status = save_list(self, obj);
3113 goto done;
3114 }
3115 else if (type == &PyTuple_Type) {
3116 status = save_tuple(self, obj);
3117 goto done;
3118 }
3119 else if (type == &PyType_Type) {
3120 status = save_global(self, obj, NULL);
3121 goto done;
3122 }
3123 else if (type == &PyFunction_Type) {
3124 status = save_global(self, obj, NULL);
3125 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
3126 /* fall back to reduce */
3127 PyErr_Clear();
3128 }
3129 else {
3130 goto done;
3131 }
3132 }
3133 else if (type == &PyCFunction_Type) {
3134 status = save_global(self, obj, NULL);
3135 goto done;
3136 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003137
3138 /* XXX: This part needs some unit tests. */
3139
3140 /* Get a reduction callable, and call it. This may come from
3141 * copyreg.dispatch_table, the object's __reduce_ex__ method,
3142 * or the object's __reduce__ method.
3143 */
3144 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
3145 if (reduce_func != NULL) {
3146 /* Here, the reference count of the reduce_func object returned by
3147 PyDict_GetItem needs to be increased to be consistent with the one
3148 returned by PyObject_GetAttr. This is allow us to blindly DECREF
3149 reduce_func at the end of the save() routine.
3150 */
3151 Py_INCREF(reduce_func);
3152 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003153 reduce_value = _Pickler_FastCall(self, reduce_func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003154 }
Antoine Pitrouffd41d92011-10-04 09:23:04 +02003155 else if (PyType_IsSubtype(type, &PyType_Type)) {
3156 status = save_global(self, obj, NULL);
3157 goto done;
3158 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003159 else {
3160 static PyObject *reduce_str = NULL;
3161 static PyObject *reduce_ex_str = NULL;
3162
3163 /* Cache the name of the reduce methods. */
3164 if (reduce_str == NULL) {
3165 reduce_str = PyUnicode_InternFromString("__reduce__");
3166 if (reduce_str == NULL)
3167 goto error;
3168 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
3169 if (reduce_ex_str == NULL)
3170 goto error;
3171 }
3172
3173 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
3174 automatically defined as __reduce__. While this is convenient, this
3175 make it impossible to know which method was actually called. Of
3176 course, this is not a big deal. But still, it would be nice to let
3177 the user know which method was called when something go
3178 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
3179 don't actually have to check for a __reduce__ method. */
3180
3181 /* Check for a __reduce_ex__ method. */
3182 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
3183 if (reduce_func != NULL) {
3184 PyObject *proto;
3185 proto = PyLong_FromLong(self->proto);
3186 if (proto != NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003187 reduce_value = _Pickler_FastCall(self, reduce_func, proto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003188 }
3189 }
3190 else {
3191 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3192 PyErr_Clear();
3193 else
3194 goto error;
3195 /* Check for a __reduce__ method. */
3196 reduce_func = PyObject_GetAttr(obj, reduce_str);
3197 if (reduce_func != NULL) {
3198 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
3199 }
3200 else {
3201 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
3202 type->tp_name, obj);
3203 goto error;
3204 }
3205 }
3206 }
3207
3208 if (reduce_value == NULL)
3209 goto error;
3210
3211 if (PyUnicode_Check(reduce_value)) {
3212 status = save_global(self, obj, reduce_value);
3213 goto done;
3214 }
3215
3216 if (!PyTuple_Check(reduce_value)) {
3217 PyErr_SetString(PicklingError,
3218 "__reduce__ must return a string or tuple");
3219 goto error;
3220 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003221
3222 status = save_reduce(self, reduce_value, obj);
3223
3224 if (0) {
3225 error:
3226 status = -1;
3227 }
3228 done:
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003229 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003230 Py_XDECREF(reduce_func);
3231 Py_XDECREF(reduce_value);
3232
3233 return status;
3234}
3235
3236static int
3237dump(PicklerObject *self, PyObject *obj)
3238{
3239 const char stop_op = STOP;
3240
3241 if (self->proto >= 2) {
3242 char header[2];
3243
3244 header[0] = PROTO;
3245 assert(self->proto >= 0 && self->proto < 256);
3246 header[1] = (unsigned char)self->proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003247 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003248 return -1;
3249 }
3250
3251 if (save(self, obj, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003252 _Pickler_Write(self, &stop_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003253 return -1;
3254
3255 return 0;
3256}
3257
3258PyDoc_STRVAR(Pickler_clear_memo_doc,
3259"clear_memo() -> None. Clears the pickler's \"memo\"."
3260"\n"
3261"The memo is the data structure that remembers which objects the\n"
3262"pickler has already seen, so that shared or recursive objects are\n"
3263"pickled by reference and not by value. This method is useful when\n"
3264"re-using picklers.");
3265
3266static PyObject *
3267Pickler_clear_memo(PicklerObject *self)
3268{
3269 if (self->memo)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003270 PyMemoTable_Clear(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003271
3272 Py_RETURN_NONE;
3273}
3274
3275PyDoc_STRVAR(Pickler_dump_doc,
3276"dump(obj) -> None. Write a pickled representation of obj to the open file.");
3277
3278static PyObject *
3279Pickler_dump(PicklerObject *self, PyObject *args)
3280{
3281 PyObject *obj;
3282
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00003283 /* Check whether the Pickler was initialized correctly (issue3664).
3284 Developers often forget to call __init__() in their subclasses, which
3285 would trigger a segfault without this check. */
3286 if (self->write == NULL) {
Victor Stinner121aab42011-09-29 23:40:53 +02003287 PyErr_Format(PicklingError,
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00003288 "Pickler.__init__() was not called by %s.__init__()",
3289 Py_TYPE(self)->tp_name);
3290 return NULL;
3291 }
3292
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003293 if (!PyArg_ParseTuple(args, "O:dump", &obj))
3294 return NULL;
3295
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003296 if (_Pickler_ClearBuffer(self) < 0)
3297 return NULL;
3298
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003299 if (dump(self, obj) < 0)
3300 return NULL;
3301
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003302 if (_Pickler_FlushToFile(self) < 0)
3303 return NULL;
3304
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003305 Py_RETURN_NONE;
3306}
3307
3308static struct PyMethodDef Pickler_methods[] = {
3309 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
3310 Pickler_dump_doc},
3311 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
3312 Pickler_clear_memo_doc},
3313 {NULL, NULL} /* sentinel */
3314};
3315
3316static void
3317Pickler_dealloc(PicklerObject *self)
3318{
3319 PyObject_GC_UnTrack(self);
3320
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003321 Py_XDECREF(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003322 Py_XDECREF(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003323 Py_XDECREF(self->pers_func);
3324 Py_XDECREF(self->arg);
3325 Py_XDECREF(self->fast_memo);
3326
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003327 PyMemoTable_Del(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003328
3329 Py_TYPE(self)->tp_free((PyObject *)self);
3330}
3331
3332static int
3333Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
3334{
3335 Py_VISIT(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003336 Py_VISIT(self->pers_func);
3337 Py_VISIT(self->arg);
3338 Py_VISIT(self->fast_memo);
3339 return 0;
3340}
3341
3342static int
3343Pickler_clear(PicklerObject *self)
3344{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003345 Py_CLEAR(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003346 Py_CLEAR(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003347 Py_CLEAR(self->pers_func);
3348 Py_CLEAR(self->arg);
3349 Py_CLEAR(self->fast_memo);
3350
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003351 if (self->memo != NULL) {
3352 PyMemoTable *memo = self->memo;
3353 self->memo = NULL;
3354 PyMemoTable_Del(memo);
3355 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003356 return 0;
3357}
3358
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003359
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003360PyDoc_STRVAR(Pickler_doc,
3361"Pickler(file, protocol=None)"
3362"\n"
3363"This takes a binary file for writing a pickle data stream.\n"
3364"\n"
3365"The optional protocol argument tells the pickler to use the\n"
3366"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
3367"protocol is 3; a backward-incompatible protocol designed for\n"
3368"Python 3.0.\n"
3369"\n"
3370"Specifying a negative protocol version selects the highest\n"
3371"protocol version supported. The higher the protocol used, the\n"
3372"more recent the version of Python needed to read the pickle\n"
3373"produced.\n"
3374"\n"
3375"The file argument must have a write() method that accepts a single\n"
3376"bytes argument. It can thus be a file object opened for binary\n"
3377"writing, a io.BytesIO instance, or any other custom object that\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003378"meets this interface.\n"
3379"\n"
3380"If fix_imports is True and protocol is less than 3, pickle will try to\n"
3381"map the new Python 3.x names to the old module names used in Python\n"
3382"2.x, so that the pickle data stream is readable with Python 2.x.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003383
3384static int
3385Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
3386{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003387 static char *kwlist[] = {"file", "protocol", "fix_imports", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003388 PyObject *file;
3389 PyObject *proto_obj = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003390 PyObject *fix_imports = Py_True;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003391
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003392 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:Pickler",
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003393 kwlist, &file, &proto_obj, &fix_imports))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003394 return -1;
3395
3396 /* In case of multiple __init__() calls, clear previous content. */
3397 if (self->write != NULL)
3398 (void)Pickler_clear(self);
3399
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003400 if (_Pickler_SetProtocol(self, proto_obj, fix_imports) < 0)
3401 return -1;
3402
3403 if (_Pickler_SetOutputStream(self, file) < 0)
3404 return -1;
3405
3406 /* memo and output_buffer may have already been created in _Pickler_New */
3407 if (self->memo == NULL) {
3408 self->memo = PyMemoTable_New();
3409 if (self->memo == NULL)
3410 return -1;
3411 }
3412 self->output_len = 0;
3413 if (self->output_buffer == NULL) {
3414 self->max_output_len = WRITE_BUF_SIZE;
3415 self->output_buffer = PyBytes_FromStringAndSize(NULL,
3416 self->max_output_len);
3417 if (self->output_buffer == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003418 return -1;
3419 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003420
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003421 self->arg = NULL;
3422 self->fast = 0;
3423 self->fast_nesting = 0;
3424 self->fast_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003425 self->pers_func = NULL;
3426 if (PyObject_HasAttrString((PyObject *)self, "persistent_id")) {
3427 self->pers_func = PyObject_GetAttrString((PyObject *)self,
3428 "persistent_id");
3429 if (self->pers_func == NULL)
3430 return -1;
3431 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003432 return 0;
3433}
3434
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003435/* Define a proxy object for the Pickler's internal memo object. This is to
3436 * avoid breaking code like:
3437 * pickler.memo.clear()
3438 * and
3439 * pickler.memo = saved_memo
3440 * Is this a good idea? Not really, but we don't want to break code that uses
3441 * it. Note that we don't implement the entire mapping API here. This is
3442 * intentional, as these should be treated as black-box implementation details.
3443 */
3444
3445typedef struct {
3446 PyObject_HEAD
3447 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
3448} PicklerMemoProxyObject;
3449
3450PyDoc_STRVAR(pmp_clear_doc,
3451"memo.clear() -> None. Remove all items from memo.");
3452
3453static PyObject *
3454pmp_clear(PicklerMemoProxyObject *self)
3455{
3456 if (self->pickler->memo)
3457 PyMemoTable_Clear(self->pickler->memo);
3458 Py_RETURN_NONE;
3459}
3460
3461PyDoc_STRVAR(pmp_copy_doc,
3462"memo.copy() -> new_memo. Copy the memo to a new object.");
3463
3464static PyObject *
3465pmp_copy(PicklerMemoProxyObject *self)
3466{
3467 Py_ssize_t i;
3468 PyMemoTable *memo;
3469 PyObject *new_memo = PyDict_New();
3470 if (new_memo == NULL)
3471 return NULL;
3472
3473 memo = self->pickler->memo;
3474 for (i = 0; i < memo->mt_allocated; ++i) {
3475 PyMemoEntry entry = memo->mt_table[i];
3476 if (entry.me_key != NULL) {
3477 int status;
3478 PyObject *key, *value;
3479
3480 key = PyLong_FromVoidPtr(entry.me_key);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003481 value = Py_BuildValue("nO", entry.me_value, entry.me_key);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003482
3483 if (key == NULL || value == NULL) {
3484 Py_XDECREF(key);
3485 Py_XDECREF(value);
3486 goto error;
3487 }
3488 status = PyDict_SetItem(new_memo, key, value);
3489 Py_DECREF(key);
3490 Py_DECREF(value);
3491 if (status < 0)
3492 goto error;
3493 }
3494 }
3495 return new_memo;
3496
3497 error:
3498 Py_XDECREF(new_memo);
3499 return NULL;
3500}
3501
3502PyDoc_STRVAR(pmp_reduce_doc,
3503"memo.__reduce__(). Pickling support.");
3504
3505static PyObject *
3506pmp_reduce(PicklerMemoProxyObject *self, PyObject *args)
3507{
3508 PyObject *reduce_value, *dict_args;
3509 PyObject *contents = pmp_copy(self);
3510 if (contents == NULL)
3511 return NULL;
3512
3513 reduce_value = PyTuple_New(2);
3514 if (reduce_value == NULL) {
3515 Py_DECREF(contents);
3516 return NULL;
3517 }
3518 dict_args = PyTuple_New(1);
3519 if (dict_args == NULL) {
3520 Py_DECREF(contents);
3521 Py_DECREF(reduce_value);
3522 return NULL;
3523 }
3524 PyTuple_SET_ITEM(dict_args, 0, contents);
3525 Py_INCREF((PyObject *)&PyDict_Type);
3526 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
3527 PyTuple_SET_ITEM(reduce_value, 1, dict_args);
3528 return reduce_value;
3529}
3530
3531static PyMethodDef picklerproxy_methods[] = {
3532 {"clear", (PyCFunction)pmp_clear, METH_NOARGS, pmp_clear_doc},
3533 {"copy", (PyCFunction)pmp_copy, METH_NOARGS, pmp_copy_doc},
3534 {"__reduce__", (PyCFunction)pmp_reduce, METH_VARARGS, pmp_reduce_doc},
3535 {NULL, NULL} /* sentinel */
3536};
3537
3538static void
3539PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
3540{
3541 PyObject_GC_UnTrack(self);
3542 Py_XDECREF(self->pickler);
3543 PyObject_GC_Del((PyObject *)self);
3544}
3545
3546static int
3547PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
3548 visitproc visit, void *arg)
3549{
3550 Py_VISIT(self->pickler);
3551 return 0;
3552}
3553
3554static int
3555PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
3556{
3557 Py_CLEAR(self->pickler);
3558 return 0;
3559}
3560
3561static PyTypeObject PicklerMemoProxyType = {
3562 PyVarObject_HEAD_INIT(NULL, 0)
3563 "_pickle.PicklerMemoProxy", /*tp_name*/
3564 sizeof(PicklerMemoProxyObject), /*tp_basicsize*/
3565 0,
3566 (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */
3567 0, /* tp_print */
3568 0, /* tp_getattr */
3569 0, /* tp_setattr */
3570 0, /* tp_compare */
3571 0, /* tp_repr */
3572 0, /* tp_as_number */
3573 0, /* tp_as_sequence */
3574 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00003575 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003576 0, /* tp_call */
3577 0, /* tp_str */
3578 PyObject_GenericGetAttr, /* tp_getattro */
3579 PyObject_GenericSetAttr, /* tp_setattro */
3580 0, /* tp_as_buffer */
3581 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3582 0, /* tp_doc */
3583 (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */
3584 (inquiry)PicklerMemoProxy_clear, /* tp_clear */
3585 0, /* tp_richcompare */
3586 0, /* tp_weaklistoffset */
3587 0, /* tp_iter */
3588 0, /* tp_iternext */
3589 picklerproxy_methods, /* tp_methods */
3590};
3591
3592static PyObject *
3593PicklerMemoProxy_New(PicklerObject *pickler)
3594{
3595 PicklerMemoProxyObject *self;
3596
3597 self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
3598 if (self == NULL)
3599 return NULL;
3600 Py_INCREF(pickler);
3601 self->pickler = pickler;
3602 PyObject_GC_Track(self);
3603 return (PyObject *)self;
3604}
3605
3606/*****************************************************************************/
3607
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003608static PyObject *
3609Pickler_get_memo(PicklerObject *self)
3610{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003611 return PicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003612}
3613
3614static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003615Pickler_set_memo(PicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003616{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003617 PyMemoTable *new_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003618
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003619 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003620 PyErr_SetString(PyExc_TypeError,
3621 "attribute deletion is not supported");
3622 return -1;
3623 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003624
3625 if (Py_TYPE(obj) == &PicklerMemoProxyType) {
3626 PicklerObject *pickler =
3627 ((PicklerMemoProxyObject *)obj)->pickler;
3628
3629 new_memo = PyMemoTable_Copy(pickler->memo);
3630 if (new_memo == NULL)
3631 return -1;
3632 }
3633 else if (PyDict_Check(obj)) {
3634 Py_ssize_t i = 0;
3635 PyObject *key, *value;
3636
3637 new_memo = PyMemoTable_New();
3638 if (new_memo == NULL)
3639 return -1;
3640
3641 while (PyDict_Next(obj, &i, &key, &value)) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003642 Py_ssize_t memo_id;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003643 PyObject *memo_obj;
3644
3645 if (!PyTuple_Check(value) || Py_SIZE(value) != 2) {
3646 PyErr_SetString(PyExc_TypeError,
3647 "'memo' values must be 2-item tuples");
3648 goto error;
3649 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003650 memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003651 if (memo_id == -1 && PyErr_Occurred())
3652 goto error;
3653 memo_obj = PyTuple_GET_ITEM(value, 1);
3654 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
3655 goto error;
3656 }
3657 }
3658 else {
3659 PyErr_Format(PyExc_TypeError,
3660 "'memo' attribute must be an PicklerMemoProxy object"
3661 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003662 return -1;
3663 }
3664
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003665 PyMemoTable_Del(self->memo);
3666 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003667
3668 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003669
3670 error:
3671 if (new_memo)
3672 PyMemoTable_Del(new_memo);
3673 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003674}
3675
3676static PyObject *
3677Pickler_get_persid(PicklerObject *self)
3678{
3679 if (self->pers_func == NULL)
3680 PyErr_SetString(PyExc_AttributeError, "persistent_id");
3681 else
3682 Py_INCREF(self->pers_func);
3683 return self->pers_func;
3684}
3685
3686static int
3687Pickler_set_persid(PicklerObject *self, PyObject *value)
3688{
3689 PyObject *tmp;
3690
3691 if (value == NULL) {
3692 PyErr_SetString(PyExc_TypeError,
3693 "attribute deletion is not supported");
3694 return -1;
3695 }
3696 if (!PyCallable_Check(value)) {
3697 PyErr_SetString(PyExc_TypeError,
3698 "persistent_id must be a callable taking one argument");
3699 return -1;
3700 }
3701
3702 tmp = self->pers_func;
3703 Py_INCREF(value);
3704 self->pers_func = value;
3705 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
3706
3707 return 0;
3708}
3709
3710static PyMemberDef Pickler_members[] = {
3711 {"bin", T_INT, offsetof(PicklerObject, bin)},
3712 {"fast", T_INT, offsetof(PicklerObject, fast)},
3713 {NULL}
3714};
3715
3716static PyGetSetDef Pickler_getsets[] = {
3717 {"memo", (getter)Pickler_get_memo,
3718 (setter)Pickler_set_memo},
3719 {"persistent_id", (getter)Pickler_get_persid,
3720 (setter)Pickler_set_persid},
3721 {NULL}
3722};
3723
3724static PyTypeObject Pickler_Type = {
3725 PyVarObject_HEAD_INIT(NULL, 0)
3726 "_pickle.Pickler" , /*tp_name*/
3727 sizeof(PicklerObject), /*tp_basicsize*/
3728 0, /*tp_itemsize*/
3729 (destructor)Pickler_dealloc, /*tp_dealloc*/
3730 0, /*tp_print*/
3731 0, /*tp_getattr*/
3732 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00003733 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003734 0, /*tp_repr*/
3735 0, /*tp_as_number*/
3736 0, /*tp_as_sequence*/
3737 0, /*tp_as_mapping*/
3738 0, /*tp_hash*/
3739 0, /*tp_call*/
3740 0, /*tp_str*/
3741 0, /*tp_getattro*/
3742 0, /*tp_setattro*/
3743 0, /*tp_as_buffer*/
3744 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3745 Pickler_doc, /*tp_doc*/
3746 (traverseproc)Pickler_traverse, /*tp_traverse*/
3747 (inquiry)Pickler_clear, /*tp_clear*/
3748 0, /*tp_richcompare*/
3749 0, /*tp_weaklistoffset*/
3750 0, /*tp_iter*/
3751 0, /*tp_iternext*/
3752 Pickler_methods, /*tp_methods*/
3753 Pickler_members, /*tp_members*/
3754 Pickler_getsets, /*tp_getset*/
3755 0, /*tp_base*/
3756 0, /*tp_dict*/
3757 0, /*tp_descr_get*/
3758 0, /*tp_descr_set*/
3759 0, /*tp_dictoffset*/
3760 (initproc)Pickler_init, /*tp_init*/
3761 PyType_GenericAlloc, /*tp_alloc*/
3762 PyType_GenericNew, /*tp_new*/
3763 PyObject_GC_Del, /*tp_free*/
3764 0, /*tp_is_gc*/
3765};
3766
Victor Stinner121aab42011-09-29 23:40:53 +02003767/* Temporary helper for calling self.find_class().
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003768
3769 XXX: It would be nice to able to avoid Python function call overhead, by
3770 using directly the C version of find_class(), when find_class() is not
3771 overridden by a subclass. Although, this could become rather hackish. A
3772 simpler optimization would be to call the C function when self is not a
3773 subclass instance. */
3774static PyObject *
3775find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
3776{
3777 return PyObject_CallMethod((PyObject *)self, "find_class", "OO",
3778 module_name, global_name);
3779}
3780
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003781static Py_ssize_t
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003782marker(UnpicklerObject *self)
3783{
3784 if (self->num_marks < 1) {
3785 PyErr_SetString(UnpicklingError, "could not find MARK");
3786 return -1;
3787 }
3788
3789 return self->marks[--self->num_marks];
3790}
3791
3792static int
3793load_none(UnpicklerObject *self)
3794{
3795 PDATA_APPEND(self->stack, Py_None, -1);
3796 return 0;
3797}
3798
3799static int
3800bad_readline(void)
3801{
3802 PyErr_SetString(UnpicklingError, "pickle data was truncated");
3803 return -1;
3804}
3805
3806static int
3807load_int(UnpicklerObject *self)
3808{
3809 PyObject *value;
3810 char *endptr, *s;
3811 Py_ssize_t len;
3812 long x;
3813
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003814 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003815 return -1;
3816 if (len < 2)
3817 return bad_readline();
3818
3819 errno = 0;
Victor Stinner121aab42011-09-29 23:40:53 +02003820 /* XXX: Should the base argument of strtol() be explicitly set to 10?
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003821 XXX(avassalotti): Should this uses PyOS_strtol()? */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003822 x = strtol(s, &endptr, 0);
3823
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003824 if (errno || (*endptr != '\n' && *endptr != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003825 /* Hm, maybe we've got something long. Let's try reading
3826 * it as a Python long object. */
3827 errno = 0;
3828 /* XXX: Same thing about the base here. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003829 value = PyLong_FromString(s, NULL, 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003830 if (value == NULL) {
3831 PyErr_SetString(PyExc_ValueError,
3832 "could not convert string to int");
3833 return -1;
3834 }
3835 }
3836 else {
3837 if (len == 3 && (x == 0 || x == 1)) {
3838 if ((value = PyBool_FromLong(x)) == NULL)
3839 return -1;
3840 }
3841 else {
3842 if ((value = PyLong_FromLong(x)) == NULL)
3843 return -1;
3844 }
3845 }
3846
3847 PDATA_PUSH(self->stack, value, -1);
3848 return 0;
3849}
3850
3851static int
3852load_bool(UnpicklerObject *self, PyObject *boolean)
3853{
3854 assert(boolean == Py_True || boolean == Py_False);
3855 PDATA_APPEND(self->stack, boolean, -1);
3856 return 0;
3857}
3858
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003859/* s contains x bytes of an unsigned little-endian integer. Return its value
3860 * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
3861 */
3862static Py_ssize_t
3863calc_binsize(char *bytes, int size)
3864{
3865 unsigned char *s = (unsigned char *)bytes;
3866 size_t x = 0;
3867
3868 assert(size == 4);
3869
3870 x = (size_t) s[0];
3871 x |= (size_t) s[1] << 8;
3872 x |= (size_t) s[2] << 16;
3873 x |= (size_t) s[3] << 24;
3874
3875 if (x > PY_SSIZE_T_MAX)
3876 return -1;
3877 else
3878 return (Py_ssize_t) x;
3879}
3880
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003881/* s contains x bytes of a little-endian integer. Return its value as a
3882 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
3883 * int, but when x is 4 it's a signed one. This is an historical source
3884 * of x-platform bugs.
3885 */
3886static long
3887calc_binint(char *bytes, int size)
3888{
3889 unsigned char *s = (unsigned char *)bytes;
3890 int i = size;
3891 long x = 0;
3892
3893 for (i = 0; i < size; i++) {
3894 x |= (long)s[i] << (i * 8);
3895 }
3896
3897 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
3898 * is signed, so on a box with longs bigger than 4 bytes we need
3899 * to extend a BININT's sign bit to the full width.
3900 */
3901 if (SIZEOF_LONG > 4 && size == 4) {
3902 x |= -(x & (1L << 31));
3903 }
3904
3905 return x;
3906}
3907
3908static int
3909load_binintx(UnpicklerObject *self, char *s, int size)
3910{
3911 PyObject *value;
3912 long x;
3913
3914 x = calc_binint(s, size);
3915
3916 if ((value = PyLong_FromLong(x)) == NULL)
3917 return -1;
3918
3919 PDATA_PUSH(self->stack, value, -1);
3920 return 0;
3921}
3922
3923static int
3924load_binint(UnpicklerObject *self)
3925{
3926 char *s;
3927
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003928 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003929 return -1;
3930
3931 return load_binintx(self, s, 4);
3932}
3933
3934static int
3935load_binint1(UnpicklerObject *self)
3936{
3937 char *s;
3938
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003939 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003940 return -1;
3941
3942 return load_binintx(self, s, 1);
3943}
3944
3945static int
3946load_binint2(UnpicklerObject *self)
3947{
3948 char *s;
3949
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003950 if (_Unpickler_Read(self, &s, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003951 return -1;
3952
3953 return load_binintx(self, s, 2);
3954}
3955
3956static int
3957load_long(UnpicklerObject *self)
3958{
3959 PyObject *value;
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00003960 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003961 Py_ssize_t len;
3962
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003963 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003964 return -1;
3965 if (len < 2)
3966 return bad_readline();
3967
Mark Dickinson8dd05142009-01-20 20:43:58 +00003968 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
3969 the 'L' before calling PyLong_FromString. In order to maintain
3970 compatibility with Python 3.0.0, we don't actually *require*
3971 the 'L' to be present. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003972 if (s[len-2] == 'L')
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00003973 s[len-2] = '\0';
Alexandre Vassalottie4bccb72009-01-24 01:47:57 +00003974 /* XXX: Should the base argument explicitly set to 10? */
3975 value = PyLong_FromString(s, NULL, 0);
Mark Dickinson8dd05142009-01-20 20:43:58 +00003976 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003977 return -1;
3978
3979 PDATA_PUSH(self->stack, value, -1);
3980 return 0;
3981}
3982
3983/* 'size' bytes contain the # of bytes of little-endian 256's-complement
3984 * data following.
3985 */
3986static int
3987load_counted_long(UnpicklerObject *self, int size)
3988{
3989 PyObject *value;
3990 char *nbytes;
3991 char *pdata;
3992
3993 assert(size == 1 || size == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003994 if (_Unpickler_Read(self, &nbytes, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003995 return -1;
3996
3997 size = calc_binint(nbytes, size);
3998 if (size < 0) {
3999 /* Corrupt or hostile pickle -- we never write one like this */
4000 PyErr_SetString(UnpicklingError,
4001 "LONG pickle has negative byte count");
4002 return -1;
4003 }
4004
4005 if (size == 0)
4006 value = PyLong_FromLong(0L);
4007 else {
4008 /* Read the raw little-endian bytes and convert. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004009 if (_Unpickler_Read(self, &pdata, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004010 return -1;
4011 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
4012 1 /* little endian */ , 1 /* signed */ );
4013 }
4014 if (value == NULL)
4015 return -1;
4016 PDATA_PUSH(self->stack, value, -1);
4017 return 0;
4018}
4019
4020static int
4021load_float(UnpicklerObject *self)
4022{
4023 PyObject *value;
4024 char *endptr, *s;
4025 Py_ssize_t len;
4026 double d;
4027
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004028 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004029 return -1;
4030 if (len < 2)
4031 return bad_readline();
4032
4033 errno = 0;
Mark Dickinson725bfd82009-05-03 20:33:40 +00004034 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
4035 if (d == -1.0 && PyErr_Occurred())
4036 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004037 if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004038 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
4039 return -1;
4040 }
Mark Dickinson725bfd82009-05-03 20:33:40 +00004041 value = PyFloat_FromDouble(d);
4042 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004043 return -1;
4044
4045 PDATA_PUSH(self->stack, value, -1);
4046 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004047}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004048
4049static int
4050load_binfloat(UnpicklerObject *self)
4051{
4052 PyObject *value;
4053 double x;
4054 char *s;
4055
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004056 if (_Unpickler_Read(self, &s, 8) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004057 return -1;
4058
4059 x = _PyFloat_Unpack8((unsigned char *)s, 0);
4060 if (x == -1.0 && PyErr_Occurred())
4061 return -1;
4062
4063 if ((value = PyFloat_FromDouble(x)) == NULL)
4064 return -1;
4065
4066 PDATA_PUSH(self->stack, value, -1);
4067 return 0;
4068}
4069
4070static int
4071load_string(UnpicklerObject *self)
4072{
4073 PyObject *bytes;
4074 PyObject *str = NULL;
4075 Py_ssize_t len;
4076 char *s, *p;
4077
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004078 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004079 return -1;
4080 if (len < 3)
4081 return bad_readline();
4082 if ((s = strdup(s)) == NULL) {
4083 PyErr_NoMemory();
4084 return -1;
4085 }
4086
4087 /* Strip outermost quotes */
4088 while (s[len - 1] <= ' ')
4089 len--;
4090 if (s[0] == '"' && s[len - 1] == '"') {
4091 s[len - 1] = '\0';
4092 p = s + 1;
4093 len -= 2;
4094 }
4095 else if (s[0] == '\'' && s[len - 1] == '\'') {
4096 s[len - 1] = '\0';
4097 p = s + 1;
4098 len -= 2;
4099 }
4100 else {
4101 free(s);
4102 PyErr_SetString(PyExc_ValueError, "insecure string pickle");
4103 return -1;
4104 }
4105
4106 /* Use the PyBytes API to decode the string, since that is what is used
4107 to encode, and then coerce the result to Unicode. */
4108 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
4109 free(s);
4110 if (bytes == NULL)
4111 return -1;
4112 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
4113 Py_DECREF(bytes);
4114 if (str == NULL)
4115 return -1;
4116
4117 PDATA_PUSH(self->stack, str, -1);
4118 return 0;
4119}
4120
4121static int
4122load_binbytes(UnpicklerObject *self)
4123{
4124 PyObject *bytes;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004125 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004126 char *s;
4127
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004128 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004129 return -1;
4130
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004131 x = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004132 if (x < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004133 PyErr_Format(PyExc_OverflowError,
4134 "BINBYTES exceeds system's maximum size of %zd bytes",
4135 PY_SSIZE_T_MAX
4136 );
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004137 return -1;
4138 }
4139
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004140 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004141 return -1;
4142 bytes = PyBytes_FromStringAndSize(s, x);
4143 if (bytes == NULL)
4144 return -1;
4145
4146 PDATA_PUSH(self->stack, bytes, -1);
4147 return 0;
4148}
4149
4150static int
4151load_short_binbytes(UnpicklerObject *self)
4152{
4153 PyObject *bytes;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004154 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004155 char *s;
4156
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004157 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004158 return -1;
4159
4160 x = (unsigned char)s[0];
4161
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004162 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004163 return -1;
4164
4165 bytes = PyBytes_FromStringAndSize(s, x);
4166 if (bytes == NULL)
4167 return -1;
4168
4169 PDATA_PUSH(self->stack, bytes, -1);
4170 return 0;
4171}
4172
4173static int
4174load_binstring(UnpicklerObject *self)
4175{
4176 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004177 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004178 char *s;
4179
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004180 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004181 return -1;
4182
4183 x = calc_binint(s, 4);
4184 if (x < 0) {
Victor Stinner121aab42011-09-29 23:40:53 +02004185 PyErr_SetString(UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004186 "BINSTRING pickle has negative byte count");
4187 return -1;
4188 }
4189
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004190 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004191 return -1;
4192
4193 /* Convert Python 2.x strings to unicode. */
4194 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4195 if (str == NULL)
4196 return -1;
4197
4198 PDATA_PUSH(self->stack, str, -1);
4199 return 0;
4200}
4201
4202static int
4203load_short_binstring(UnpicklerObject *self)
4204{
4205 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004206 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004207 char *s;
4208
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004209 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004210 return -1;
4211
4212 x = (unsigned char)s[0];
4213
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004214 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004215 return -1;
4216
4217 /* Convert Python 2.x strings to unicode. */
4218 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4219 if (str == NULL)
4220 return -1;
4221
4222 PDATA_PUSH(self->stack, str, -1);
4223 return 0;
4224}
4225
4226static int
4227load_unicode(UnpicklerObject *self)
4228{
4229 PyObject *str;
4230 Py_ssize_t len;
4231 char *s;
4232
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004233 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004234 return -1;
4235 if (len < 1)
4236 return bad_readline();
4237
4238 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
4239 if (str == NULL)
4240 return -1;
4241
4242 PDATA_PUSH(self->stack, str, -1);
4243 return 0;
4244}
4245
4246static int
4247load_binunicode(UnpicklerObject *self)
4248{
4249 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004250 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004251 char *s;
4252
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004253 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004254 return -1;
4255
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004256 size = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004257 if (size < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004258 PyErr_Format(PyExc_OverflowError,
4259 "BINUNICODE exceeds system's maximum size of %zd bytes",
4260 PY_SSIZE_T_MAX
4261 );
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004262 return -1;
4263 }
4264
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004265
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004266 if (_Unpickler_Read(self, &s, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004267 return -1;
4268
Victor Stinner485fb562010-04-13 11:07:24 +00004269 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004270 if (str == NULL)
4271 return -1;
4272
4273 PDATA_PUSH(self->stack, str, -1);
4274 return 0;
4275}
4276
4277static int
4278load_tuple(UnpicklerObject *self)
4279{
4280 PyObject *tuple;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004281 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004282
4283 if ((i = marker(self)) < 0)
4284 return -1;
4285
4286 tuple = Pdata_poptuple(self->stack, i);
4287 if (tuple == NULL)
4288 return -1;
4289 PDATA_PUSH(self->stack, tuple, -1);
4290 return 0;
4291}
4292
4293static int
4294load_counted_tuple(UnpicklerObject *self, int len)
4295{
4296 PyObject *tuple;
4297
4298 tuple = PyTuple_New(len);
4299 if (tuple == NULL)
4300 return -1;
4301
4302 while (--len >= 0) {
4303 PyObject *item;
4304
4305 PDATA_POP(self->stack, item);
4306 if (item == NULL)
4307 return -1;
4308 PyTuple_SET_ITEM(tuple, len, item);
4309 }
4310 PDATA_PUSH(self->stack, tuple, -1);
4311 return 0;
4312}
4313
4314static int
4315load_empty_list(UnpicklerObject *self)
4316{
4317 PyObject *list;
4318
4319 if ((list = PyList_New(0)) == NULL)
4320 return -1;
4321 PDATA_PUSH(self->stack, list, -1);
4322 return 0;
4323}
4324
4325static int
4326load_empty_dict(UnpicklerObject *self)
4327{
4328 PyObject *dict;
4329
4330 if ((dict = PyDict_New()) == NULL)
4331 return -1;
4332 PDATA_PUSH(self->stack, dict, -1);
4333 return 0;
4334}
4335
4336static int
4337load_list(UnpicklerObject *self)
4338{
4339 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004340 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004341
4342 if ((i = marker(self)) < 0)
4343 return -1;
4344
4345 list = Pdata_poplist(self->stack, i);
4346 if (list == NULL)
4347 return -1;
4348 PDATA_PUSH(self->stack, list, -1);
4349 return 0;
4350}
4351
4352static int
4353load_dict(UnpicklerObject *self)
4354{
4355 PyObject *dict, *key, *value;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004356 Py_ssize_t i, j, k;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004357
4358 if ((i = marker(self)) < 0)
4359 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004360 j = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004361
4362 if ((dict = PyDict_New()) == NULL)
4363 return -1;
4364
4365 for (k = i + 1; k < j; k += 2) {
4366 key = self->stack->data[k - 1];
4367 value = self->stack->data[k];
4368 if (PyDict_SetItem(dict, key, value) < 0) {
4369 Py_DECREF(dict);
4370 return -1;
4371 }
4372 }
4373 Pdata_clear(self->stack, i);
4374 PDATA_PUSH(self->stack, dict, -1);
4375 return 0;
4376}
4377
4378static PyObject *
4379instantiate(PyObject *cls, PyObject *args)
4380{
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004381 PyObject *result = NULL;
4382 /* Caller must assure args are a tuple. Normally, args come from
4383 Pdata_poptuple which packs objects from the top of the stack
4384 into a newly created tuple. */
4385 assert(PyTuple_Check(args));
4386 if (Py_SIZE(args) > 0 || !PyType_Check(cls) ||
4387 PyObject_HasAttrString(cls, "__getinitargs__")) {
4388 result = PyObject_CallObject(cls, args);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004389 }
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004390 else {
4391 result = PyObject_CallMethod(cls, "__new__", "O", cls);
4392 }
4393 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004394}
4395
4396static int
4397load_obj(UnpicklerObject *self)
4398{
4399 PyObject *cls, *args, *obj = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004400 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004401
4402 if ((i = marker(self)) < 0)
4403 return -1;
4404
4405 args = Pdata_poptuple(self->stack, i + 1);
4406 if (args == NULL)
4407 return -1;
4408
4409 PDATA_POP(self->stack, cls);
4410 if (cls) {
4411 obj = instantiate(cls, args);
4412 Py_DECREF(cls);
4413 }
4414 Py_DECREF(args);
4415 if (obj == NULL)
4416 return -1;
4417
4418 PDATA_PUSH(self->stack, obj, -1);
4419 return 0;
4420}
4421
4422static int
4423load_inst(UnpicklerObject *self)
4424{
4425 PyObject *cls = NULL;
4426 PyObject *args = NULL;
4427 PyObject *obj = NULL;
4428 PyObject *module_name;
4429 PyObject *class_name;
4430 Py_ssize_t len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004431 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004432 char *s;
4433
4434 if ((i = marker(self)) < 0)
4435 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004436 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004437 return -1;
4438 if (len < 2)
4439 return bad_readline();
4440
4441 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
4442 identifiers are permitted in Python 3.0, since the INST opcode is only
4443 supported by older protocols on Python 2.x. */
4444 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
4445 if (module_name == NULL)
4446 return -1;
4447
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004448 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004449 if (len < 2)
4450 return bad_readline();
4451 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004452 if (class_name != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004453 cls = find_class(self, module_name, class_name);
4454 Py_DECREF(class_name);
4455 }
4456 }
4457 Py_DECREF(module_name);
4458
4459 if (cls == NULL)
4460 return -1;
4461
4462 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
4463 obj = instantiate(cls, args);
4464 Py_DECREF(args);
4465 }
4466 Py_DECREF(cls);
4467
4468 if (obj == NULL)
4469 return -1;
4470
4471 PDATA_PUSH(self->stack, obj, -1);
4472 return 0;
4473}
4474
4475static int
4476load_newobj(UnpicklerObject *self)
4477{
4478 PyObject *args = NULL;
4479 PyObject *clsraw = NULL;
4480 PyTypeObject *cls; /* clsraw cast to its true type */
4481 PyObject *obj;
4482
4483 /* Stack is ... cls argtuple, and we want to call
4484 * cls.__new__(cls, *argtuple).
4485 */
4486 PDATA_POP(self->stack, args);
4487 if (args == NULL)
4488 goto error;
4489 if (!PyTuple_Check(args)) {
4490 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
4491 goto error;
4492 }
4493
4494 PDATA_POP(self->stack, clsraw);
4495 cls = (PyTypeObject *)clsraw;
4496 if (cls == NULL)
4497 goto error;
4498 if (!PyType_Check(cls)) {
4499 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4500 "isn't a type object");
4501 goto error;
4502 }
4503 if (cls->tp_new == NULL) {
4504 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4505 "has NULL tp_new");
4506 goto error;
4507 }
4508
4509 /* Call __new__. */
4510 obj = cls->tp_new(cls, args, NULL);
4511 if (obj == NULL)
4512 goto error;
4513
4514 Py_DECREF(args);
4515 Py_DECREF(clsraw);
4516 PDATA_PUSH(self->stack, obj, -1);
4517 return 0;
4518
4519 error:
4520 Py_XDECREF(args);
4521 Py_XDECREF(clsraw);
4522 return -1;
4523}
4524
4525static int
4526load_global(UnpicklerObject *self)
4527{
4528 PyObject *global = NULL;
4529 PyObject *module_name;
4530 PyObject *global_name;
4531 Py_ssize_t len;
4532 char *s;
4533
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004534 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004535 return -1;
4536 if (len < 2)
4537 return bad_readline();
4538 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4539 if (!module_name)
4540 return -1;
4541
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004542 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004543 if (len < 2) {
4544 Py_DECREF(module_name);
4545 return bad_readline();
4546 }
4547 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4548 if (global_name) {
4549 global = find_class(self, module_name, global_name);
4550 Py_DECREF(global_name);
4551 }
4552 }
4553 Py_DECREF(module_name);
4554
4555 if (global == NULL)
4556 return -1;
4557 PDATA_PUSH(self->stack, global, -1);
4558 return 0;
4559}
4560
4561static int
4562load_persid(UnpicklerObject *self)
4563{
4564 PyObject *pid;
4565 Py_ssize_t len;
4566 char *s;
4567
4568 if (self->pers_func) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004569 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004570 return -1;
4571 if (len < 2)
4572 return bad_readline();
4573
4574 pid = PyBytes_FromStringAndSize(s, len - 1);
4575 if (pid == NULL)
4576 return -1;
4577
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004578 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004579 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004580 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004581 if (pid == NULL)
4582 return -1;
4583
4584 PDATA_PUSH(self->stack, pid, -1);
4585 return 0;
4586 }
4587 else {
4588 PyErr_SetString(UnpicklingError,
4589 "A load persistent id instruction was encountered,\n"
4590 "but no persistent_load function was specified.");
4591 return -1;
4592 }
4593}
4594
4595static int
4596load_binpersid(UnpicklerObject *self)
4597{
4598 PyObject *pid;
4599
4600 if (self->pers_func) {
4601 PDATA_POP(self->stack, pid);
4602 if (pid == NULL)
4603 return -1;
4604
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004605 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004606 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004607 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004608 if (pid == NULL)
4609 return -1;
4610
4611 PDATA_PUSH(self->stack, pid, -1);
4612 return 0;
4613 }
4614 else {
4615 PyErr_SetString(UnpicklingError,
4616 "A load persistent id instruction was encountered,\n"
4617 "but no persistent_load function was specified.");
4618 return -1;
4619 }
4620}
4621
4622static int
4623load_pop(UnpicklerObject *self)
4624{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004625 Py_ssize_t len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004626
4627 /* Note that we split the (pickle.py) stack into two stacks,
4628 * an object stack and a mark stack. We have to be clever and
4629 * pop the right one. We do this by looking at the top of the
Collin Winter8ca69de2009-05-26 16:53:41 +00004630 * mark stack first, and only signalling a stack underflow if
4631 * the object stack is empty and the mark stack doesn't match
4632 * our expectations.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004633 */
Collin Winter8ca69de2009-05-26 16:53:41 +00004634 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004635 self->num_marks--;
Antoine Pitrou01a15ea2010-01-07 17:57:31 +00004636 } else if (len > 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004637 len--;
4638 Py_DECREF(self->stack->data[len]);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004639 Py_SIZE(self->stack) = len;
Collin Winter8ca69de2009-05-26 16:53:41 +00004640 } else {
4641 return stack_underflow();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004642 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004643 return 0;
4644}
4645
4646static int
4647load_pop_mark(UnpicklerObject *self)
4648{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004649 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004650
4651 if ((i = marker(self)) < 0)
4652 return -1;
4653
4654 Pdata_clear(self->stack, i);
4655
4656 return 0;
4657}
4658
4659static int
4660load_dup(UnpicklerObject *self)
4661{
4662 PyObject *last;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004663 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004664
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004665 if ((len = Py_SIZE(self->stack)) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004666 return stack_underflow();
4667 last = self->stack->data[len - 1];
4668 PDATA_APPEND(self->stack, last, -1);
4669 return 0;
4670}
4671
4672static int
4673load_get(UnpicklerObject *self)
4674{
4675 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004676 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004677 Py_ssize_t len;
4678 char *s;
4679
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004680 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004681 return -1;
4682 if (len < 2)
4683 return bad_readline();
4684
4685 key = PyLong_FromString(s, NULL, 10);
4686 if (key == NULL)
4687 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004688 idx = PyLong_AsSsize_t(key);
4689 if (idx == -1 && PyErr_Occurred()) {
4690 Py_DECREF(key);
4691 return -1;
4692 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004693
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004694 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004695 if (value == NULL) {
4696 if (!PyErr_Occurred())
4697 PyErr_SetObject(PyExc_KeyError, key);
4698 Py_DECREF(key);
4699 return -1;
4700 }
4701 Py_DECREF(key);
4702
4703 PDATA_APPEND(self->stack, value, -1);
4704 return 0;
4705}
4706
4707static int
4708load_binget(UnpicklerObject *self)
4709{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004710 PyObject *value;
4711 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004712 char *s;
4713
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004714 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004715 return -1;
4716
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004717 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004718
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004719 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004720 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004721 PyObject *key = PyLong_FromSsize_t(idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004722 if (!PyErr_Occurred())
4723 PyErr_SetObject(PyExc_KeyError, key);
4724 Py_DECREF(key);
4725 return -1;
4726 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004727
4728 PDATA_APPEND(self->stack, value, -1);
4729 return 0;
4730}
4731
4732static int
4733load_long_binget(UnpicklerObject *self)
4734{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004735 PyObject *value;
4736 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004737 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004738
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004739 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004740 return -1;
4741
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004742 idx = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004743
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004744 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004745 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004746 PyObject *key = PyLong_FromSsize_t(idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004747 if (!PyErr_Occurred())
4748 PyErr_SetObject(PyExc_KeyError, key);
4749 Py_DECREF(key);
4750 return -1;
4751 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004752
4753 PDATA_APPEND(self->stack, value, -1);
4754 return 0;
4755}
4756
4757/* Push an object from the extension registry (EXT[124]). nbytes is
4758 * the number of bytes following the opcode, holding the index (code) value.
4759 */
4760static int
4761load_extension(UnpicklerObject *self, int nbytes)
4762{
4763 char *codebytes; /* the nbytes bytes after the opcode */
4764 long code; /* calc_binint returns long */
4765 PyObject *py_code; /* code as a Python int */
4766 PyObject *obj; /* the object to push */
4767 PyObject *pair; /* (module_name, class_name) */
4768 PyObject *module_name, *class_name;
4769
4770 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004771 if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004772 return -1;
4773 code = calc_binint(codebytes, nbytes);
4774 if (code <= 0) { /* note that 0 is forbidden */
4775 /* Corrupt or hostile pickle. */
4776 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
4777 return -1;
4778 }
4779
4780 /* Look for the code in the cache. */
4781 py_code = PyLong_FromLong(code);
4782 if (py_code == NULL)
4783 return -1;
4784 obj = PyDict_GetItem(extension_cache, py_code);
4785 if (obj != NULL) {
4786 /* Bingo. */
4787 Py_DECREF(py_code);
4788 PDATA_APPEND(self->stack, obj, -1);
4789 return 0;
4790 }
4791
4792 /* Look up the (module_name, class_name) pair. */
4793 pair = PyDict_GetItem(inverted_registry, py_code);
4794 if (pair == NULL) {
4795 Py_DECREF(py_code);
4796 PyErr_Format(PyExc_ValueError, "unregistered extension "
4797 "code %ld", code);
4798 return -1;
4799 }
4800 /* Since the extension registry is manipulable via Python code,
4801 * confirm that pair is really a 2-tuple of strings.
4802 */
4803 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
4804 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
4805 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
4806 Py_DECREF(py_code);
4807 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
4808 "isn't a 2-tuple of strings", code);
4809 return -1;
4810 }
4811 /* Load the object. */
4812 obj = find_class(self, module_name, class_name);
4813 if (obj == NULL) {
4814 Py_DECREF(py_code);
4815 return -1;
4816 }
4817 /* Cache code -> obj. */
4818 code = PyDict_SetItem(extension_cache, py_code, obj);
4819 Py_DECREF(py_code);
4820 if (code < 0) {
4821 Py_DECREF(obj);
4822 return -1;
4823 }
4824 PDATA_PUSH(self->stack, obj, -1);
4825 return 0;
4826}
4827
4828static int
4829load_put(UnpicklerObject *self)
4830{
4831 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004832 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004833 Py_ssize_t len;
4834 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004835
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004836 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004837 return -1;
4838 if (len < 2)
4839 return bad_readline();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004840 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004841 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004842 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004843
4844 key = PyLong_FromString(s, NULL, 10);
4845 if (key == NULL)
4846 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004847 idx = PyLong_AsSsize_t(key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004848 Py_DECREF(key);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004849 if (idx < 0) {
4850 if (!PyErr_Occurred())
4851 PyErr_SetString(PyExc_ValueError,
4852 "negative PUT argument");
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004853 return -1;
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004854 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004855
4856 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004857}
4858
4859static int
4860load_binput(UnpicklerObject *self)
4861{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004862 PyObject *value;
4863 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004864 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004865
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004866 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004867 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004868
4869 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004870 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004871 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004872
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004873 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004874
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004875 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004876}
4877
4878static int
4879load_long_binput(UnpicklerObject *self)
4880{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004881 PyObject *value;
4882 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004883 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004884
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004885 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004886 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004887
4888 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004889 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004890 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004891
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004892 idx = calc_binsize(s, 4);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004893 if (idx < 0) {
4894 PyErr_SetString(PyExc_ValueError,
4895 "negative LONG_BINPUT argument");
4896 return -1;
4897 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004898
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004899 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004900}
4901
4902static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004903do_append(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004904{
4905 PyObject *value;
4906 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004907 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004908
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004909 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004910 if (x > len || x <= 0)
4911 return stack_underflow();
4912 if (len == x) /* nothing to do */
4913 return 0;
4914
4915 list = self->stack->data[x - 1];
4916
4917 if (PyList_Check(list)) {
4918 PyObject *slice;
4919 Py_ssize_t list_len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004920 int ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004921
4922 slice = Pdata_poplist(self->stack, x);
4923 if (!slice)
4924 return -1;
4925 list_len = PyList_GET_SIZE(list);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004926 ret = PyList_SetSlice(list, list_len, list_len, slice);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004927 Py_DECREF(slice);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004928 return ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004929 }
4930 else {
4931 PyObject *append_func;
4932
4933 append_func = PyObject_GetAttrString(list, "append");
4934 if (append_func == NULL)
4935 return -1;
4936 for (i = x; i < len; i++) {
4937 PyObject *result;
4938
4939 value = self->stack->data[i];
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004940 result = _Unpickler_FastCall(self, append_func, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004941 if (result == NULL) {
4942 Pdata_clear(self->stack, i + 1);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004943 Py_SIZE(self->stack) = x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004944 return -1;
4945 }
4946 Py_DECREF(result);
4947 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004948 Py_SIZE(self->stack) = x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004949 }
4950
4951 return 0;
4952}
4953
4954static int
4955load_append(UnpicklerObject *self)
4956{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004957 return do_append(self, Py_SIZE(self->stack) - 1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004958}
4959
4960static int
4961load_appends(UnpicklerObject *self)
4962{
4963 return do_append(self, marker(self));
4964}
4965
4966static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004967do_setitems(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004968{
4969 PyObject *value, *key;
4970 PyObject *dict;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004971 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004972 int status = 0;
4973
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004974 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004975 if (x > len || x <= 0)
4976 return stack_underflow();
4977 if (len == x) /* nothing to do */
4978 return 0;
Victor Stinner121aab42011-09-29 23:40:53 +02004979 if ((len - x) % 2 != 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004980 /* Currupt or hostile pickle -- we never write one like this. */
4981 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
4982 return -1;
4983 }
4984
4985 /* Here, dict does not actually need to be a PyDict; it could be anything
4986 that supports the __setitem__ attribute. */
4987 dict = self->stack->data[x - 1];
4988
4989 for (i = x + 1; i < len; i += 2) {
4990 key = self->stack->data[i - 1];
4991 value = self->stack->data[i];
4992 if (PyObject_SetItem(dict, key, value) < 0) {
4993 status = -1;
4994 break;
4995 }
4996 }
4997
4998 Pdata_clear(self->stack, x);
4999 return status;
5000}
5001
5002static int
5003load_setitem(UnpicklerObject *self)
5004{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005005 return do_setitems(self, Py_SIZE(self->stack) - 2);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005006}
5007
5008static int
5009load_setitems(UnpicklerObject *self)
5010{
5011 return do_setitems(self, marker(self));
5012}
5013
5014static int
5015load_build(UnpicklerObject *self)
5016{
5017 PyObject *state, *inst, *slotstate;
5018 PyObject *setstate;
5019 int status = 0;
5020
5021 /* Stack is ... instance, state. We want to leave instance at
5022 * the stack top, possibly mutated via instance.__setstate__(state).
5023 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005024 if (Py_SIZE(self->stack) < 2)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005025 return stack_underflow();
5026
5027 PDATA_POP(self->stack, state);
5028 if (state == NULL)
5029 return -1;
5030
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005031 inst = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005032
5033 setstate = PyObject_GetAttrString(inst, "__setstate__");
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005034 if (setstate == NULL) {
5035 if (PyErr_ExceptionMatches(PyExc_AttributeError))
5036 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00005037 else {
5038 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005039 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00005040 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005041 }
5042 else {
5043 PyObject *result;
5044
5045 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005046 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Antoine Pitroud79dc622008-09-05 00:03:33 +00005047 reference to state first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005048 result = _Unpickler_FastCall(self, setstate, state);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005049 Py_DECREF(setstate);
5050 if (result == NULL)
5051 return -1;
5052 Py_DECREF(result);
5053 return 0;
5054 }
5055
5056 /* A default __setstate__. First see whether state embeds a
5057 * slot state dict too (a proto 2 addition).
5058 */
5059 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
5060 PyObject *tmp = state;
5061
5062 state = PyTuple_GET_ITEM(tmp, 0);
5063 slotstate = PyTuple_GET_ITEM(tmp, 1);
5064 Py_INCREF(state);
5065 Py_INCREF(slotstate);
5066 Py_DECREF(tmp);
5067 }
5068 else
5069 slotstate = NULL;
5070
5071 /* Set inst.__dict__ from the state dict (if any). */
5072 if (state != Py_None) {
5073 PyObject *dict;
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005074 PyObject *d_key, *d_value;
5075 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005076
5077 if (!PyDict_Check(state)) {
5078 PyErr_SetString(UnpicklingError, "state is not a dictionary");
5079 goto error;
5080 }
5081 dict = PyObject_GetAttrString(inst, "__dict__");
5082 if (dict == NULL)
5083 goto error;
5084
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005085 i = 0;
5086 while (PyDict_Next(state, &i, &d_key, &d_value)) {
5087 /* normally the keys for instance attributes are
5088 interned. we should try to do that here. */
5089 Py_INCREF(d_key);
5090 if (PyUnicode_CheckExact(d_key))
5091 PyUnicode_InternInPlace(&d_key);
5092 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
5093 Py_DECREF(d_key);
5094 goto error;
5095 }
5096 Py_DECREF(d_key);
5097 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005098 Py_DECREF(dict);
5099 }
5100
5101 /* Also set instance attributes from the slotstate dict (if any). */
5102 if (slotstate != NULL) {
5103 PyObject *d_key, *d_value;
5104 Py_ssize_t i;
5105
5106 if (!PyDict_Check(slotstate)) {
5107 PyErr_SetString(UnpicklingError,
5108 "slot state is not a dictionary");
5109 goto error;
5110 }
5111 i = 0;
5112 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
5113 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
5114 goto error;
5115 }
5116 }
5117
5118 if (0) {
5119 error:
5120 status = -1;
5121 }
5122
5123 Py_DECREF(state);
5124 Py_XDECREF(slotstate);
5125 return status;
5126}
5127
5128static int
5129load_mark(UnpicklerObject *self)
5130{
5131
5132 /* Note that we split the (pickle.py) stack into two stacks, an
5133 * object stack and a mark stack. Here we push a mark onto the
5134 * mark stack.
5135 */
5136
5137 if ((self->num_marks + 1) >= self->marks_size) {
5138 size_t alloc;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005139 Py_ssize_t *marks;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005140
5141 /* Use the size_t type to check for overflow. */
5142 alloc = ((size_t)self->num_marks << 1) + 20;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005143 if (alloc > (PY_SSIZE_T_MAX / sizeof(Py_ssize_t)) ||
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00005144 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005145 PyErr_NoMemory();
5146 return -1;
5147 }
5148
5149 if (self->marks == NULL)
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005150 marks = (Py_ssize_t *) PyMem_Malloc(alloc * sizeof(Py_ssize_t));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005151 else
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005152 marks = (Py_ssize_t *) PyMem_Realloc(self->marks,
5153 alloc * sizeof(Py_ssize_t));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005154 if (marks == NULL) {
5155 PyErr_NoMemory();
5156 return -1;
5157 }
5158 self->marks = marks;
5159 self->marks_size = (Py_ssize_t)alloc;
5160 }
5161
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005162 self->marks[self->num_marks++] = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005163
5164 return 0;
5165}
5166
5167static int
5168load_reduce(UnpicklerObject *self)
5169{
5170 PyObject *callable = NULL;
5171 PyObject *argtup = NULL;
5172 PyObject *obj = NULL;
5173
5174 PDATA_POP(self->stack, argtup);
5175 if (argtup == NULL)
5176 return -1;
5177 PDATA_POP(self->stack, callable);
5178 if (callable) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005179 obj = PyObject_CallObject(callable, argtup);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005180 Py_DECREF(callable);
5181 }
5182 Py_DECREF(argtup);
5183
5184 if (obj == NULL)
5185 return -1;
5186
5187 PDATA_PUSH(self->stack, obj, -1);
5188 return 0;
5189}
5190
5191/* Just raises an error if we don't know the protocol specified. PROTO
5192 * is the first opcode for protocols >= 2.
5193 */
5194static int
5195load_proto(UnpicklerObject *self)
5196{
5197 char *s;
5198 int i;
5199
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005200 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005201 return -1;
5202
5203 i = (unsigned char)s[0];
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005204 if (i <= HIGHEST_PROTOCOL) {
5205 self->proto = i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005206 return 0;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005207 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005208
5209 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
5210 return -1;
5211}
5212
5213static PyObject *
5214load(UnpicklerObject *self)
5215{
5216 PyObject *err;
5217 PyObject *value = NULL;
5218 char *s;
5219
5220 self->num_marks = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005221 if (Py_SIZE(self->stack))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005222 Pdata_clear(self->stack, 0);
5223
5224 /* Convenient macros for the dispatch while-switch loop just below. */
5225#define OP(opcode, load_func) \
5226 case opcode: if (load_func(self) < 0) break; continue;
5227
5228#define OP_ARG(opcode, load_func, arg) \
5229 case opcode: if (load_func(self, (arg)) < 0) break; continue;
5230
5231 while (1) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005232 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005233 break;
5234
5235 switch ((enum opcode)s[0]) {
5236 OP(NONE, load_none)
5237 OP(BININT, load_binint)
5238 OP(BININT1, load_binint1)
5239 OP(BININT2, load_binint2)
5240 OP(INT, load_int)
5241 OP(LONG, load_long)
5242 OP_ARG(LONG1, load_counted_long, 1)
5243 OP_ARG(LONG4, load_counted_long, 4)
5244 OP(FLOAT, load_float)
5245 OP(BINFLOAT, load_binfloat)
5246 OP(BINBYTES, load_binbytes)
5247 OP(SHORT_BINBYTES, load_short_binbytes)
5248 OP(BINSTRING, load_binstring)
5249 OP(SHORT_BINSTRING, load_short_binstring)
5250 OP(STRING, load_string)
5251 OP(UNICODE, load_unicode)
5252 OP(BINUNICODE, load_binunicode)
5253 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
5254 OP_ARG(TUPLE1, load_counted_tuple, 1)
5255 OP_ARG(TUPLE2, load_counted_tuple, 2)
5256 OP_ARG(TUPLE3, load_counted_tuple, 3)
5257 OP(TUPLE, load_tuple)
5258 OP(EMPTY_LIST, load_empty_list)
5259 OP(LIST, load_list)
5260 OP(EMPTY_DICT, load_empty_dict)
5261 OP(DICT, load_dict)
5262 OP(OBJ, load_obj)
5263 OP(INST, load_inst)
5264 OP(NEWOBJ, load_newobj)
5265 OP(GLOBAL, load_global)
5266 OP(APPEND, load_append)
5267 OP(APPENDS, load_appends)
5268 OP(BUILD, load_build)
5269 OP(DUP, load_dup)
5270 OP(BINGET, load_binget)
5271 OP(LONG_BINGET, load_long_binget)
5272 OP(GET, load_get)
5273 OP(MARK, load_mark)
5274 OP(BINPUT, load_binput)
5275 OP(LONG_BINPUT, load_long_binput)
5276 OP(PUT, load_put)
5277 OP(POP, load_pop)
5278 OP(POP_MARK, load_pop_mark)
5279 OP(SETITEM, load_setitem)
5280 OP(SETITEMS, load_setitems)
5281 OP(PERSID, load_persid)
5282 OP(BINPERSID, load_binpersid)
5283 OP(REDUCE, load_reduce)
5284 OP(PROTO, load_proto)
5285 OP_ARG(EXT1, load_extension, 1)
5286 OP_ARG(EXT2, load_extension, 2)
5287 OP_ARG(EXT4, load_extension, 4)
5288 OP_ARG(NEWTRUE, load_bool, Py_True)
5289 OP_ARG(NEWFALSE, load_bool, Py_False)
5290
5291 case STOP:
5292 break;
5293
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005294 default:
Benjamin Petersonadde86d2011-09-23 13:41:41 -04005295 if (s[0] == '\0')
5296 PyErr_SetNone(PyExc_EOFError);
5297 else
5298 PyErr_Format(UnpicklingError,
5299 "invalid load key, '%c'.", s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005300 return NULL;
5301 }
5302
5303 break; /* and we are done! */
5304 }
5305
Antoine Pitrou04248a82010-10-12 20:51:21 +00005306 if (_Unpickler_SkipConsumed(self) < 0)
5307 return NULL;
5308
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005309 /* XXX: It is not clear what this is actually for. */
5310 if ((err = PyErr_Occurred())) {
5311 if (err == PyExc_EOFError) {
5312 PyErr_SetNone(PyExc_EOFError);
5313 }
5314 return NULL;
5315 }
5316
5317 PDATA_POP(self->stack, value);
5318 return value;
5319}
5320
5321PyDoc_STRVAR(Unpickler_load_doc,
5322"load() -> object. Load a pickle."
5323"\n"
5324"Read a pickled object representation from the open file object given in\n"
5325"the constructor, and return the reconstituted object hierarchy specified\n"
5326"therein.\n");
5327
5328static PyObject *
5329Unpickler_load(UnpicklerObject *self)
5330{
5331 /* Check whether the Unpickler was initialized correctly. This prevents
5332 segfaulting if a subclass overridden __init__ with a function that does
5333 not call Unpickler.__init__(). Here, we simply ensure that self->read
5334 is not NULL. */
5335 if (self->read == NULL) {
Victor Stinner121aab42011-09-29 23:40:53 +02005336 PyErr_Format(UnpicklingError,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005337 "Unpickler.__init__() was not called by %s.__init__()",
5338 Py_TYPE(self)->tp_name);
5339 return NULL;
5340 }
5341
5342 return load(self);
5343}
5344
5345/* The name of find_class() is misleading. In newer pickle protocols, this
5346 function is used for loading any global (i.e., functions), not just
5347 classes. The name is kept only for backward compatibility. */
5348
5349PyDoc_STRVAR(Unpickler_find_class_doc,
5350"find_class(module_name, global_name) -> object.\n"
5351"\n"
5352"Return an object from a specified module, importing the module if\n"
5353"necessary. Subclasses may override this method (e.g. to restrict\n"
5354"unpickling of arbitrary classes and functions).\n"
5355"\n"
5356"This method is called whenever a class or a function object is\n"
5357"needed. Both arguments passed are str objects.\n");
5358
5359static PyObject *
5360Unpickler_find_class(UnpicklerObject *self, PyObject *args)
5361{
5362 PyObject *global;
5363 PyObject *modules_dict;
5364 PyObject *module;
5365 PyObject *module_name, *global_name;
5366
5367 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
5368 &module_name, &global_name))
5369 return NULL;
5370
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005371 /* Try to map the old names used in Python 2.x to the new ones used in
5372 Python 3.x. We do this only with old pickle protocols and when the
5373 user has not disabled the feature. */
5374 if (self->proto < 3 && self->fix_imports) {
5375 PyObject *key;
5376 PyObject *item;
5377
5378 /* Check if the global (i.e., a function or a class) was renamed
5379 or moved to another module. */
5380 key = PyTuple_Pack(2, module_name, global_name);
5381 if (key == NULL)
5382 return NULL;
5383 item = PyDict_GetItemWithError(name_mapping_2to3, key);
5384 Py_DECREF(key);
5385 if (item) {
5386 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
5387 PyErr_Format(PyExc_RuntimeError,
5388 "_compat_pickle.NAME_MAPPING values should be "
5389 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
5390 return NULL;
5391 }
5392 module_name = PyTuple_GET_ITEM(item, 0);
5393 global_name = PyTuple_GET_ITEM(item, 1);
5394 if (!PyUnicode_Check(module_name) ||
5395 !PyUnicode_Check(global_name)) {
5396 PyErr_Format(PyExc_RuntimeError,
5397 "_compat_pickle.NAME_MAPPING values should be "
5398 "pairs of str, not (%.200s, %.200s)",
5399 Py_TYPE(module_name)->tp_name,
5400 Py_TYPE(global_name)->tp_name);
5401 return NULL;
5402 }
5403 }
5404 else if (PyErr_Occurred()) {
5405 return NULL;
5406 }
5407
5408 /* Check if the module was renamed. */
5409 item = PyDict_GetItemWithError(import_mapping_2to3, module_name);
5410 if (item) {
5411 if (!PyUnicode_Check(item)) {
5412 PyErr_Format(PyExc_RuntimeError,
5413 "_compat_pickle.IMPORT_MAPPING values should be "
5414 "strings, not %.200s", Py_TYPE(item)->tp_name);
5415 return NULL;
5416 }
5417 module_name = item;
5418 }
5419 else if (PyErr_Occurred()) {
5420 return NULL;
5421 }
5422 }
5423
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005424 modules_dict = PySys_GetObject("modules");
5425 if (modules_dict == NULL)
5426 return NULL;
5427
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005428 module = PyDict_GetItemWithError(modules_dict, module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005429 if (module == NULL) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005430 if (PyErr_Occurred())
5431 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005432 module = PyImport_Import(module_name);
5433 if (module == NULL)
5434 return NULL;
5435 global = PyObject_GetAttr(module, global_name);
5436 Py_DECREF(module);
5437 }
Victor Stinner121aab42011-09-29 23:40:53 +02005438 else {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005439 global = PyObject_GetAttr(module, global_name);
5440 }
5441 return global;
5442}
5443
5444static struct PyMethodDef Unpickler_methods[] = {
5445 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
5446 Unpickler_load_doc},
5447 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
5448 Unpickler_find_class_doc},
5449 {NULL, NULL} /* sentinel */
5450};
5451
5452static void
5453Unpickler_dealloc(UnpicklerObject *self)
5454{
5455 PyObject_GC_UnTrack((PyObject *)self);
5456 Py_XDECREF(self->readline);
5457 Py_XDECREF(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005458 Py_XDECREF(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005459 Py_XDECREF(self->stack);
5460 Py_XDECREF(self->pers_func);
5461 Py_XDECREF(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005462 if (self->buffer.buf != NULL) {
5463 PyBuffer_Release(&self->buffer);
5464 self->buffer.buf = NULL;
5465 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005466
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005467 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005468 PyMem_Free(self->marks);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005469 PyMem_Free(self->input_line);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005470 free(self->encoding);
5471 free(self->errors);
5472
5473 Py_TYPE(self)->tp_free((PyObject *)self);
5474}
5475
5476static int
5477Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
5478{
5479 Py_VISIT(self->readline);
5480 Py_VISIT(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005481 Py_VISIT(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005482 Py_VISIT(self->stack);
5483 Py_VISIT(self->pers_func);
5484 Py_VISIT(self->arg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005485 return 0;
5486}
5487
5488static int
5489Unpickler_clear(UnpicklerObject *self)
5490{
5491 Py_CLEAR(self->readline);
5492 Py_CLEAR(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005493 Py_CLEAR(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005494 Py_CLEAR(self->stack);
5495 Py_CLEAR(self->pers_func);
5496 Py_CLEAR(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005497 if (self->buffer.buf != NULL) {
5498 PyBuffer_Release(&self->buffer);
5499 self->buffer.buf = NULL;
5500 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005501
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005502 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005503 PyMem_Free(self->marks);
5504 self->marks = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005505 PyMem_Free(self->input_line);
5506 self->input_line = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005507 free(self->encoding);
5508 self->encoding = NULL;
5509 free(self->errors);
5510 self->errors = NULL;
5511
5512 return 0;
5513}
5514
5515PyDoc_STRVAR(Unpickler_doc,
5516"Unpickler(file, *, encoding='ASCII', errors='strict')"
5517"\n"
5518"This takes a binary file for reading a pickle data stream.\n"
5519"\n"
5520"The protocol version of the pickle is detected automatically, so no\n"
5521"proto argument is needed.\n"
5522"\n"
5523"The file-like object must have two methods, a read() method\n"
5524"that takes an integer argument, and a readline() method that\n"
5525"requires no arguments. Both methods should return bytes.\n"
5526"Thus file-like object can be a binary file object opened for\n"
5527"reading, a BytesIO object, or any other custom object that\n"
5528"meets this interface.\n"
5529"\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005530"Optional keyword arguments are *fix_imports*, *encoding* and *errors*,\n"
5531"which are used to control compatiblity support for pickle stream\n"
5532"generated by Python 2.x. If *fix_imports* is True, pickle will try to\n"
5533"map the old Python 2.x names to the new names used in Python 3.x. The\n"
5534"*encoding* and *errors* tell pickle how to decode 8-bit string\n"
5535"instances pickled by Python 2.x; these default to 'ASCII' and\n"
5536"'strict', respectively.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005537
5538static int
5539Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
5540{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005541 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005542 PyObject *file;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005543 PyObject *fix_imports = Py_True;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005544 char *encoding = NULL;
5545 char *errors = NULL;
5546
5547 /* XXX: That is an horrible error message. But, I don't know how to do
5548 better... */
5549 if (Py_SIZE(args) != 1) {
5550 PyErr_Format(PyExc_TypeError,
5551 "%s takes exactly one positional argument (%zd given)",
5552 Py_TYPE(self)->tp_name, Py_SIZE(args));
5553 return -1;
5554 }
5555
5556 /* Arguments parsing needs to be done in the __init__() method to allow
5557 subclasses to define their own __init__() method, which may (or may
5558 not) support Unpickler arguments. However, this means we need to be
5559 extra careful in the other Unpickler methods, since a subclass could
5560 forget to call Unpickler.__init__() thus breaking our internal
5561 invariants. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005562 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:Unpickler", kwlist,
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005563 &file, &fix_imports, &encoding, &errors))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005564 return -1;
5565
5566 /* In case of multiple __init__() calls, clear previous content. */
5567 if (self->read != NULL)
5568 (void)Unpickler_clear(self);
5569
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005570 if (_Unpickler_SetInputStream(self, file) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005571 return -1;
5572
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005573 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005574 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005575
5576 self->fix_imports = PyObject_IsTrue(fix_imports);
5577 if (self->fix_imports == -1)
5578 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005579
5580 if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) {
5581 self->pers_func = PyObject_GetAttrString((PyObject *)self,
5582 "persistent_load");
5583 if (self->pers_func == NULL)
5584 return -1;
5585 }
5586 else {
5587 self->pers_func = NULL;
5588 }
5589
5590 self->stack = (Pdata *)Pdata_New();
5591 if (self->stack == NULL)
5592 return -1;
5593
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005594 self->memo_size = 32;
5595 self->memo = _Unpickler_NewMemo(self->memo_size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005596 if (self->memo == NULL)
5597 return -1;
5598
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005599 self->arg = NULL;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005600 self->proto = 0;
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005601
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005602 return 0;
5603}
5604
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005605/* Define a proxy object for the Unpickler's internal memo object. This is to
5606 * avoid breaking code like:
5607 * unpickler.memo.clear()
5608 * and
5609 * unpickler.memo = saved_memo
5610 * Is this a good idea? Not really, but we don't want to break code that uses
5611 * it. Note that we don't implement the entire mapping API here. This is
5612 * intentional, as these should be treated as black-box implementation details.
5613 *
5614 * We do, however, have to implement pickling/unpickling support because of
Victor Stinner121aab42011-09-29 23:40:53 +02005615 * real-world code like cvs2svn.
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005616 */
5617
5618typedef struct {
5619 PyObject_HEAD
5620 UnpicklerObject *unpickler;
5621} UnpicklerMemoProxyObject;
5622
5623PyDoc_STRVAR(ump_clear_doc,
5624"memo.clear() -> None. Remove all items from memo.");
5625
5626static PyObject *
5627ump_clear(UnpicklerMemoProxyObject *self)
5628{
5629 _Unpickler_MemoCleanup(self->unpickler);
5630 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
5631 if (self->unpickler->memo == NULL)
5632 return NULL;
5633 Py_RETURN_NONE;
5634}
5635
5636PyDoc_STRVAR(ump_copy_doc,
5637"memo.copy() -> new_memo. Copy the memo to a new object.");
5638
5639static PyObject *
5640ump_copy(UnpicklerMemoProxyObject *self)
5641{
5642 Py_ssize_t i;
5643 PyObject *new_memo = PyDict_New();
5644 if (new_memo == NULL)
5645 return NULL;
5646
5647 for (i = 0; i < self->unpickler->memo_size; i++) {
5648 int status;
5649 PyObject *key, *value;
5650
5651 value = self->unpickler->memo[i];
5652 if (value == NULL)
5653 continue;
5654
5655 key = PyLong_FromSsize_t(i);
5656 if (key == NULL)
5657 goto error;
5658 status = PyDict_SetItem(new_memo, key, value);
5659 Py_DECREF(key);
5660 if (status < 0)
5661 goto error;
5662 }
5663 return new_memo;
5664
5665error:
5666 Py_DECREF(new_memo);
5667 return NULL;
5668}
5669
5670PyDoc_STRVAR(ump_reduce_doc,
5671"memo.__reduce__(). Pickling support.");
5672
5673static PyObject *
5674ump_reduce(UnpicklerMemoProxyObject *self, PyObject *args)
5675{
5676 PyObject *reduce_value;
5677 PyObject *constructor_args;
5678 PyObject *contents = ump_copy(self);
5679 if (contents == NULL)
5680 return NULL;
5681
5682 reduce_value = PyTuple_New(2);
5683 if (reduce_value == NULL) {
5684 Py_DECREF(contents);
5685 return NULL;
5686 }
5687 constructor_args = PyTuple_New(1);
5688 if (constructor_args == NULL) {
5689 Py_DECREF(contents);
5690 Py_DECREF(reduce_value);
5691 return NULL;
5692 }
5693 PyTuple_SET_ITEM(constructor_args, 0, contents);
5694 Py_INCREF((PyObject *)&PyDict_Type);
5695 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
5696 PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
5697 return reduce_value;
5698}
5699
5700static PyMethodDef unpicklerproxy_methods[] = {
5701 {"clear", (PyCFunction)ump_clear, METH_NOARGS, ump_clear_doc},
5702 {"copy", (PyCFunction)ump_copy, METH_NOARGS, ump_copy_doc},
5703 {"__reduce__", (PyCFunction)ump_reduce, METH_VARARGS, ump_reduce_doc},
5704 {NULL, NULL} /* sentinel */
5705};
5706
5707static void
5708UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
5709{
5710 PyObject_GC_UnTrack(self);
5711 Py_XDECREF(self->unpickler);
5712 PyObject_GC_Del((PyObject *)self);
5713}
5714
5715static int
5716UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
5717 visitproc visit, void *arg)
5718{
5719 Py_VISIT(self->unpickler);
5720 return 0;
5721}
5722
5723static int
5724UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
5725{
5726 Py_CLEAR(self->unpickler);
5727 return 0;
5728}
5729
5730static PyTypeObject UnpicklerMemoProxyType = {
5731 PyVarObject_HEAD_INIT(NULL, 0)
5732 "_pickle.UnpicklerMemoProxy", /*tp_name*/
5733 sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/
5734 0,
5735 (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */
5736 0, /* tp_print */
5737 0, /* tp_getattr */
5738 0, /* tp_setattr */
5739 0, /* tp_compare */
5740 0, /* tp_repr */
5741 0, /* tp_as_number */
5742 0, /* tp_as_sequence */
5743 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00005744 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005745 0, /* tp_call */
5746 0, /* tp_str */
5747 PyObject_GenericGetAttr, /* tp_getattro */
5748 PyObject_GenericSetAttr, /* tp_setattro */
5749 0, /* tp_as_buffer */
5750 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5751 0, /* tp_doc */
5752 (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */
5753 (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */
5754 0, /* tp_richcompare */
5755 0, /* tp_weaklistoffset */
5756 0, /* tp_iter */
5757 0, /* tp_iternext */
5758 unpicklerproxy_methods, /* tp_methods */
5759};
5760
5761static PyObject *
5762UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
5763{
5764 UnpicklerMemoProxyObject *self;
5765
5766 self = PyObject_GC_New(UnpicklerMemoProxyObject,
5767 &UnpicklerMemoProxyType);
5768 if (self == NULL)
5769 return NULL;
5770 Py_INCREF(unpickler);
5771 self->unpickler = unpickler;
5772 PyObject_GC_Track(self);
5773 return (PyObject *)self;
5774}
5775
5776/*****************************************************************************/
5777
5778
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005779static PyObject *
5780Unpickler_get_memo(UnpicklerObject *self)
5781{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005782 return UnpicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005783}
5784
5785static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005786Unpickler_set_memo(UnpicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005787{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005788 PyObject **new_memo;
5789 Py_ssize_t new_memo_size = 0;
5790 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005791
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005792 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005793 PyErr_SetString(PyExc_TypeError,
5794 "attribute deletion is not supported");
5795 return -1;
5796 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005797
5798 if (Py_TYPE(obj) == &UnpicklerMemoProxyType) {
5799 UnpicklerObject *unpickler =
5800 ((UnpicklerMemoProxyObject *)obj)->unpickler;
5801
5802 new_memo_size = unpickler->memo_size;
5803 new_memo = _Unpickler_NewMemo(new_memo_size);
5804 if (new_memo == NULL)
5805 return -1;
5806
5807 for (i = 0; i < new_memo_size; i++) {
5808 Py_XINCREF(unpickler->memo[i]);
5809 new_memo[i] = unpickler->memo[i];
5810 }
5811 }
5812 else if (PyDict_Check(obj)) {
5813 Py_ssize_t i = 0;
5814 PyObject *key, *value;
5815
5816 new_memo_size = PyDict_Size(obj);
5817 new_memo = _Unpickler_NewMemo(new_memo_size);
5818 if (new_memo == NULL)
5819 return -1;
5820
5821 while (PyDict_Next(obj, &i, &key, &value)) {
5822 Py_ssize_t idx;
5823 if (!PyLong_Check(key)) {
5824 PyErr_SetString(PyExc_TypeError,
5825 "memo key must be integers");
5826 goto error;
5827 }
5828 idx = PyLong_AsSsize_t(key);
5829 if (idx == -1 && PyErr_Occurred())
5830 goto error;
5831 if (_Unpickler_MemoPut(self, idx, value) < 0)
5832 goto error;
5833 }
5834 }
5835 else {
5836 PyErr_Format(PyExc_TypeError,
5837 "'memo' attribute must be an UnpicklerMemoProxy object"
5838 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005839 return -1;
5840 }
5841
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005842 _Unpickler_MemoCleanup(self);
5843 self->memo_size = new_memo_size;
5844 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005845
5846 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005847
5848 error:
5849 if (new_memo_size) {
5850 i = new_memo_size;
5851 while (--i >= 0) {
5852 Py_XDECREF(new_memo[i]);
5853 }
5854 PyMem_FREE(new_memo);
5855 }
5856 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005857}
5858
5859static PyObject *
5860Unpickler_get_persload(UnpicklerObject *self)
5861{
5862 if (self->pers_func == NULL)
5863 PyErr_SetString(PyExc_AttributeError, "persistent_load");
5864 else
5865 Py_INCREF(self->pers_func);
5866 return self->pers_func;
5867}
5868
5869static int
5870Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
5871{
5872 PyObject *tmp;
5873
5874 if (value == NULL) {
5875 PyErr_SetString(PyExc_TypeError,
5876 "attribute deletion is not supported");
5877 return -1;
5878 }
5879 if (!PyCallable_Check(value)) {
5880 PyErr_SetString(PyExc_TypeError,
5881 "persistent_load must be a callable taking "
5882 "one argument");
5883 return -1;
5884 }
5885
5886 tmp = self->pers_func;
5887 Py_INCREF(value);
5888 self->pers_func = value;
5889 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
5890
5891 return 0;
5892}
5893
5894static PyGetSetDef Unpickler_getsets[] = {
5895 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
5896 {"persistent_load", (getter)Unpickler_get_persload,
5897 (setter)Unpickler_set_persload},
5898 {NULL}
5899};
5900
5901static PyTypeObject Unpickler_Type = {
5902 PyVarObject_HEAD_INIT(NULL, 0)
5903 "_pickle.Unpickler", /*tp_name*/
5904 sizeof(UnpicklerObject), /*tp_basicsize*/
5905 0, /*tp_itemsize*/
5906 (destructor)Unpickler_dealloc, /*tp_dealloc*/
5907 0, /*tp_print*/
5908 0, /*tp_getattr*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005909 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00005910 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005911 0, /*tp_repr*/
5912 0, /*tp_as_number*/
5913 0, /*tp_as_sequence*/
5914 0, /*tp_as_mapping*/
5915 0, /*tp_hash*/
5916 0, /*tp_call*/
5917 0, /*tp_str*/
5918 0, /*tp_getattro*/
5919 0, /*tp_setattro*/
5920 0, /*tp_as_buffer*/
5921 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5922 Unpickler_doc, /*tp_doc*/
5923 (traverseproc)Unpickler_traverse, /*tp_traverse*/
5924 (inquiry)Unpickler_clear, /*tp_clear*/
5925 0, /*tp_richcompare*/
5926 0, /*tp_weaklistoffset*/
5927 0, /*tp_iter*/
5928 0, /*tp_iternext*/
5929 Unpickler_methods, /*tp_methods*/
5930 0, /*tp_members*/
5931 Unpickler_getsets, /*tp_getset*/
5932 0, /*tp_base*/
5933 0, /*tp_dict*/
5934 0, /*tp_descr_get*/
5935 0, /*tp_descr_set*/
5936 0, /*tp_dictoffset*/
5937 (initproc)Unpickler_init, /*tp_init*/
5938 PyType_GenericAlloc, /*tp_alloc*/
5939 PyType_GenericNew, /*tp_new*/
5940 PyObject_GC_Del, /*tp_free*/
5941 0, /*tp_is_gc*/
5942};
5943
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005944PyDoc_STRVAR(pickle_dump_doc,
5945"dump(obj, file, protocol=None, *, fix_imports=True) -> None\n"
5946"\n"
5947"Write a pickled representation of obj to the open file object file. This\n"
5948"is equivalent to ``Pickler(file, protocol).dump(obj)``, but may be more\n"
5949"efficient.\n"
5950"\n"
5951"The optional protocol argument tells the pickler to use the given protocol;\n"
5952"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
5953"backward-incompatible protocol designed for Python 3.0.\n"
5954"\n"
5955"Specifying a negative protocol version selects the highest protocol version\n"
5956"supported. The higher the protocol used, the more recent the version of\n"
5957"Python needed to read the pickle produced.\n"
5958"\n"
5959"The file argument must have a write() method that accepts a single bytes\n"
5960"argument. It can thus be a file object opened for binary writing, a\n"
5961"io.BytesIO instance, or any other custom object that meets this interface.\n"
5962"\n"
5963"If fix_imports is True and protocol is less than 3, pickle will try to\n"
5964"map the new Python 3.x names to the old module names used in Python 2.x,\n"
5965"so that the pickle data stream is readable with Python 2.x.\n");
5966
5967static PyObject *
5968pickle_dump(PyObject *self, PyObject *args, PyObject *kwds)
5969{
5970 static char *kwlist[] = {"obj", "file", "protocol", "fix_imports", 0};
5971 PyObject *obj;
5972 PyObject *file;
5973 PyObject *proto = NULL;
5974 PyObject *fix_imports = Py_True;
5975 PicklerObject *pickler;
5976
5977 /* fix_imports is a keyword-only argument. */
5978 if (Py_SIZE(args) > 3) {
5979 PyErr_Format(PyExc_TypeError,
5980 "pickle.dump() takes at most 3 positional "
5981 "argument (%zd given)", Py_SIZE(args));
5982 return NULL;
5983 }
5984
5985 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|OO:dump", kwlist,
5986 &obj, &file, &proto, &fix_imports))
5987 return NULL;
5988
5989 pickler = _Pickler_New();
5990 if (pickler == NULL)
5991 return NULL;
5992
5993 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
5994 goto error;
5995
5996 if (_Pickler_SetOutputStream(pickler, file) < 0)
5997 goto error;
5998
5999 if (dump(pickler, obj) < 0)
6000 goto error;
6001
6002 if (_Pickler_FlushToFile(pickler) < 0)
6003 goto error;
6004
6005 Py_DECREF(pickler);
6006 Py_RETURN_NONE;
6007
6008 error:
6009 Py_XDECREF(pickler);
6010 return NULL;
6011}
6012
6013PyDoc_STRVAR(pickle_dumps_doc,
6014"dumps(obj, protocol=None, *, fix_imports=True) -> bytes\n"
6015"\n"
6016"Return the pickled representation of the object as a bytes\n"
6017"object, instead of writing it to a file.\n"
6018"\n"
6019"The optional protocol argument tells the pickler to use the given protocol;\n"
6020"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
6021"backward-incompatible protocol designed for Python 3.0.\n"
6022"\n"
6023"Specifying a negative protocol version selects the highest protocol version\n"
6024"supported. The higher the protocol used, the more recent the version of\n"
6025"Python needed to read the pickle produced.\n"
6026"\n"
6027"If fix_imports is True and *protocol* is less than 3, pickle will try to\n"
6028"map the new Python 3.x names to the old module names used in Python 2.x,\n"
6029"so that the pickle data stream is readable with Python 2.x.\n");
6030
6031static PyObject *
6032pickle_dumps(PyObject *self, PyObject *args, PyObject *kwds)
6033{
6034 static char *kwlist[] = {"obj", "protocol", "fix_imports", 0};
6035 PyObject *obj;
6036 PyObject *proto = NULL;
6037 PyObject *result;
6038 PyObject *fix_imports = Py_True;
6039 PicklerObject *pickler;
6040
6041 /* fix_imports is a keyword-only argument. */
6042 if (Py_SIZE(args) > 2) {
6043 PyErr_Format(PyExc_TypeError,
6044 "pickle.dumps() takes at most 2 positional "
6045 "argument (%zd given)", Py_SIZE(args));
6046 return NULL;
6047 }
6048
6049 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:dumps", kwlist,
6050 &obj, &proto, &fix_imports))
6051 return NULL;
6052
6053 pickler = _Pickler_New();
6054 if (pickler == NULL)
6055 return NULL;
6056
6057 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
6058 goto error;
6059
6060 if (dump(pickler, obj) < 0)
6061 goto error;
6062
6063 result = _Pickler_GetString(pickler);
6064 Py_DECREF(pickler);
6065 return result;
6066
6067 error:
6068 Py_XDECREF(pickler);
6069 return NULL;
6070}
6071
6072PyDoc_STRVAR(pickle_load_doc,
6073"load(file, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6074"\n"
6075"Read a pickled object representation from the open file object file and\n"
6076"return the reconstituted object hierarchy specified therein. This is\n"
6077"equivalent to ``Unpickler(file).load()``, but may be more efficient.\n"
6078"\n"
6079"The protocol version of the pickle is detected automatically, so no protocol\n"
6080"argument is needed. Bytes past the pickled object's representation are\n"
6081"ignored.\n"
6082"\n"
6083"The argument file must have two methods, a read() method that takes an\n"
6084"integer argument, and a readline() method that requires no arguments. Both\n"
6085"methods should return bytes. Thus *file* can be a binary file object opened\n"
6086"for reading, a BytesIO object, or any other custom object that meets this\n"
6087"interface.\n"
6088"\n"
6089"Optional keyword arguments are fix_imports, encoding and errors,\n"
6090"which are used to control compatiblity support for pickle stream generated\n"
6091"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6092"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6093"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6094"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6095
6096static PyObject *
6097pickle_load(PyObject *self, PyObject *args, PyObject *kwds)
6098{
6099 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
6100 PyObject *file;
6101 PyObject *fix_imports = Py_True;
6102 PyObject *result;
6103 char *encoding = NULL;
6104 char *errors = NULL;
6105 UnpicklerObject *unpickler;
6106
6107 /* fix_imports, encoding and errors are a keyword-only argument. */
6108 if (Py_SIZE(args) != 1) {
6109 PyErr_Format(PyExc_TypeError,
6110 "pickle.load() takes exactly one positional "
6111 "argument (%zd given)", Py_SIZE(args));
6112 return NULL;
6113 }
6114
6115 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:load", kwlist,
6116 &file, &fix_imports, &encoding, &errors))
6117 return NULL;
6118
6119 unpickler = _Unpickler_New();
6120 if (unpickler == NULL)
6121 return NULL;
6122
6123 if (_Unpickler_SetInputStream(unpickler, file) < 0)
6124 goto error;
6125
6126 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6127 goto error;
6128
6129 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6130 if (unpickler->fix_imports == -1)
6131 goto error;
6132
6133 result = load(unpickler);
6134 Py_DECREF(unpickler);
6135 return result;
6136
6137 error:
6138 Py_XDECREF(unpickler);
6139 return NULL;
6140}
6141
6142PyDoc_STRVAR(pickle_loads_doc,
6143"loads(input, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6144"\n"
6145"Read a pickled object hierarchy from a bytes object and return the\n"
6146"reconstituted object hierarchy specified therein\n"
6147"\n"
6148"The protocol version of the pickle is detected automatically, so no protocol\n"
6149"argument is needed. Bytes past the pickled object's representation are\n"
6150"ignored.\n"
6151"\n"
6152"Optional keyword arguments are fix_imports, encoding and errors, which\n"
6153"are used to control compatiblity support for pickle stream generated\n"
6154"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6155"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6156"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6157"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6158
6159static PyObject *
6160pickle_loads(PyObject *self, PyObject *args, PyObject *kwds)
6161{
6162 static char *kwlist[] = {"input", "fix_imports", "encoding", "errors", 0};
6163 PyObject *input;
6164 PyObject *fix_imports = Py_True;
6165 PyObject *result;
6166 char *encoding = NULL;
6167 char *errors = NULL;
6168 UnpicklerObject *unpickler;
6169
6170 /* fix_imports, encoding and errors are a keyword-only argument. */
6171 if (Py_SIZE(args) != 1) {
6172 PyErr_Format(PyExc_TypeError,
6173 "pickle.loads() takes exactly one positional "
6174 "argument (%zd given)", Py_SIZE(args));
6175 return NULL;
6176 }
6177
6178 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:loads", kwlist,
6179 &input, &fix_imports, &encoding, &errors))
6180 return NULL;
6181
6182 unpickler = _Unpickler_New();
6183 if (unpickler == NULL)
6184 return NULL;
6185
6186 if (_Unpickler_SetStringInput(unpickler, input) < 0)
6187 goto error;
6188
6189 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6190 goto error;
6191
6192 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6193 if (unpickler->fix_imports == -1)
6194 goto error;
6195
6196 result = load(unpickler);
6197 Py_DECREF(unpickler);
6198 return result;
6199
6200 error:
6201 Py_XDECREF(unpickler);
6202 return NULL;
6203}
6204
6205
6206static struct PyMethodDef pickle_methods[] = {
6207 {"dump", (PyCFunction)pickle_dump, METH_VARARGS|METH_KEYWORDS,
6208 pickle_dump_doc},
6209 {"dumps", (PyCFunction)pickle_dumps, METH_VARARGS|METH_KEYWORDS,
6210 pickle_dumps_doc},
6211 {"load", (PyCFunction)pickle_load, METH_VARARGS|METH_KEYWORDS,
6212 pickle_load_doc},
6213 {"loads", (PyCFunction)pickle_loads, METH_VARARGS|METH_KEYWORDS,
6214 pickle_loads_doc},
6215 {NULL, NULL} /* sentinel */
6216};
6217
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006218static int
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006219initmodule(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006220{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006221 PyObject *copyreg = NULL;
6222 PyObject *compat_pickle = NULL;
6223
6224 /* XXX: We should ensure that the types of the dictionaries imported are
6225 exactly PyDict objects. Otherwise, it is possible to crash the pickle
6226 since we use the PyDict API directly to access these dictionaries. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006227
6228 copyreg = PyImport_ImportModule("copyreg");
6229 if (!copyreg)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006230 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006231 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
6232 if (!dispatch_table)
6233 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006234 extension_registry = \
6235 PyObject_GetAttrString(copyreg, "_extension_registry");
6236 if (!extension_registry)
6237 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006238 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
6239 if (!inverted_registry)
6240 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006241 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
6242 if (!extension_cache)
6243 goto error;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006244 Py_CLEAR(copyreg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006245
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006246 /* Load the 2.x -> 3.x stdlib module mapping tables */
6247 compat_pickle = PyImport_ImportModule("_compat_pickle");
6248 if (!compat_pickle)
6249 goto error;
6250 name_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
6251 if (!name_mapping_2to3)
6252 goto error;
6253 if (!PyDict_CheckExact(name_mapping_2to3)) {
6254 PyErr_Format(PyExc_RuntimeError,
6255 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
6256 Py_TYPE(name_mapping_2to3)->tp_name);
6257 goto error;
6258 }
6259 import_mapping_2to3 = PyObject_GetAttrString(compat_pickle,
6260 "IMPORT_MAPPING");
6261 if (!import_mapping_2to3)
6262 goto error;
6263 if (!PyDict_CheckExact(import_mapping_2to3)) {
6264 PyErr_Format(PyExc_RuntimeError,
6265 "_compat_pickle.IMPORT_MAPPING should be a dict, "
6266 "not %.200s", Py_TYPE(import_mapping_2to3)->tp_name);
6267 goto error;
6268 }
6269 /* ... and the 3.x -> 2.x mapping tables */
6270 name_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6271 "REVERSE_NAME_MAPPING");
6272 if (!name_mapping_3to2)
6273 goto error;
6274 if (!PyDict_CheckExact(name_mapping_3to2)) {
6275 PyErr_Format(PyExc_RuntimeError,
Ezio Melotti13925002011-03-16 11:05:33 +02006276 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006277 "not %.200s", Py_TYPE(name_mapping_3to2)->tp_name);
6278 goto error;
6279 }
6280 import_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6281 "REVERSE_IMPORT_MAPPING");
6282 if (!import_mapping_3to2)
6283 goto error;
6284 if (!PyDict_CheckExact(import_mapping_3to2)) {
6285 PyErr_Format(PyExc_RuntimeError,
6286 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
6287 "not %.200s", Py_TYPE(import_mapping_3to2)->tp_name);
6288 goto error;
6289 }
6290 Py_CLEAR(compat_pickle);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006291
6292 empty_tuple = PyTuple_New(0);
6293 if (empty_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006294 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006295 two_tuple = PyTuple_New(2);
6296 if (two_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006297 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006298 /* We use this temp container with no regard to refcounts, or to
6299 * keeping containees alive. Exempt from GC, because we don't
6300 * want anything looking at two_tuple() by magic.
6301 */
6302 PyObject_GC_UnTrack(two_tuple);
6303
6304 return 0;
6305
6306 error:
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006307 Py_CLEAR(copyreg);
6308 Py_CLEAR(dispatch_table);
6309 Py_CLEAR(extension_registry);
6310 Py_CLEAR(inverted_registry);
6311 Py_CLEAR(extension_cache);
6312 Py_CLEAR(compat_pickle);
6313 Py_CLEAR(name_mapping_2to3);
6314 Py_CLEAR(import_mapping_2to3);
6315 Py_CLEAR(name_mapping_3to2);
6316 Py_CLEAR(import_mapping_3to2);
6317 Py_CLEAR(empty_tuple);
6318 Py_CLEAR(two_tuple);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006319 return -1;
6320}
6321
6322static struct PyModuleDef _picklemodule = {
6323 PyModuleDef_HEAD_INIT,
6324 "_pickle",
6325 pickle_module_doc,
6326 -1,
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006327 pickle_methods,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006328 NULL,
6329 NULL,
6330 NULL,
6331 NULL
6332};
6333
6334PyMODINIT_FUNC
6335PyInit__pickle(void)
6336{
6337 PyObject *m;
6338
6339 if (PyType_Ready(&Unpickler_Type) < 0)
6340 return NULL;
6341 if (PyType_Ready(&Pickler_Type) < 0)
6342 return NULL;
6343 if (PyType_Ready(&Pdata_Type) < 0)
6344 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006345 if (PyType_Ready(&PicklerMemoProxyType) < 0)
6346 return NULL;
6347 if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
6348 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006349
6350 /* Create the module and add the functions. */
6351 m = PyModule_Create(&_picklemodule);
6352 if (m == NULL)
6353 return NULL;
6354
Antoine Pitrou8391cf42011-07-15 21:01:21 +02006355 Py_INCREF(&Pickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006356 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
6357 return NULL;
Antoine Pitrou8391cf42011-07-15 21:01:21 +02006358 Py_INCREF(&Unpickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006359 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
6360 return NULL;
6361
6362 /* Initialize the exceptions. */
6363 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
6364 if (PickleError == NULL)
6365 return NULL;
6366 PicklingError = \
6367 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
6368 if (PicklingError == NULL)
6369 return NULL;
6370 UnpicklingError = \
6371 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
6372 if (UnpicklingError == NULL)
6373 return NULL;
6374
6375 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
6376 return NULL;
6377 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
6378 return NULL;
6379 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
6380 return NULL;
6381
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006382 if (initmodule() < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006383 return NULL;
6384
6385 return m;
6386}