blob: 19fac40af9bd1c886d9017dc03f7f1b3535c81f0 [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000013/* Pickle opcodes. These must be kept updated with pickle.py.
14 Extensive docs are in pickletools.py. */
15enum opcode {
16 MARK = '(',
17 STOP = '.',
18 POP = '0',
19 POP_MARK = '1',
20 DUP = '2',
21 FLOAT = 'F',
22 INT = 'I',
23 BININT = 'J',
24 BININT1 = 'K',
25 LONG = 'L',
26 BININT2 = 'M',
27 NONE = 'N',
28 PERSID = 'P',
29 BINPERSID = 'Q',
30 REDUCE = 'R',
31 STRING = 'S',
32 BINSTRING = 'T',
33 SHORT_BINSTRING = 'U',
34 UNICODE = 'V',
35 BINUNICODE = 'X',
36 APPEND = 'a',
37 BUILD = 'b',
38 GLOBAL = 'c',
39 DICT = 'd',
40 EMPTY_DICT = '}',
41 APPENDS = 'e',
42 GET = 'g',
43 BINGET = 'h',
44 INST = 'i',
45 LONG_BINGET = 'j',
46 LIST = 'l',
47 EMPTY_LIST = ']',
48 OBJ = 'o',
49 PUT = 'p',
50 BINPUT = 'q',
51 LONG_BINPUT = 'r',
52 SETITEM = 's',
53 TUPLE = 't',
54 EMPTY_TUPLE = ')',
55 SETITEMS = 'u',
56 BINFLOAT = 'G',
57
58 /* Protocol 2. */
59 PROTO = '\x80',
60 NEWOBJ = '\x81',
61 EXT1 = '\x82',
62 EXT2 = '\x83',
63 EXT4 = '\x84',
64 TUPLE1 = '\x85',
65 TUPLE2 = '\x86',
66 TUPLE3 = '\x87',
67 NEWTRUE = '\x88',
68 NEWFALSE = '\x89',
69 LONG1 = '\x8a',
70 LONG4 = '\x8b',
71
72 /* Protocol 3 (Python 3.x) */
73 BINBYTES = 'B',
Victor Stinner132ef6c2010-11-09 09:39:41 +000074 SHORT_BINBYTES = 'C'
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000075};
76
77/* These aren't opcodes -- they're ways to pickle bools before protocol 2
78 * so that unpicklers written before bools were introduced unpickle them
79 * as ints, but unpicklers after can recognize that bools were intended.
80 * Note that protocol 2 added direct ways to pickle bools.
81 */
82#undef TRUE
83#define TRUE "I01\n"
84#undef FALSE
85#define FALSE "I00\n"
86
87enum {
88 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
89 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
90 break if this gets out of synch with pickle.py, but it's unclear that would
91 help anything either. */
92 BATCHSIZE = 1000,
93
94 /* Nesting limit until Pickler, when running in "fast mode", starts
95 checking for self-referential data-structures. */
96 FAST_NESTING_LIMIT = 50,
97
Antoine Pitrouea99c5c2010-09-09 18:33:21 +000098 /* Initial size of the write buffer of Pickler. */
99 WRITE_BUF_SIZE = 4096,
100
101 /* Maximum size of the write buffer of Pickler when pickling to a
102 stream. This is ignored for in-memory pickling. */
103 MAX_WRITE_BUF_SIZE = 64 * 1024,
Antoine Pitrou04248a82010-10-12 20:51:21 +0000104
105 /* Prefetch size when unpickling (disabled on unpeekable streams) */
Victor Stinner132ef6c2010-11-09 09:39:41 +0000106 PREFETCH = 8192 * 16
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000107};
108
109/* Exception classes for pickle. These should override the ones defined in
110 pickle.py, when the C-optimized Pickler and Unpickler are used. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000111static PyObject *PickleError = NULL;
112static PyObject *PicklingError = NULL;
113static PyObject *UnpicklingError = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000114
115/* copyreg.dispatch_table, {type_object: pickling_function} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000116static PyObject *dispatch_table = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000117/* For EXT[124] opcodes. */
118/* copyreg._extension_registry, {(module_name, function_name): code} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000119static PyObject *extension_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000120/* copyreg._inverted_registry, {code: (module_name, function_name)} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000121static PyObject *inverted_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000122/* copyreg._extension_cache, {code: object} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000123static PyObject *extension_cache = NULL;
124
125/* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */
126static PyObject *name_mapping_2to3 = NULL;
127/* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
128static PyObject *import_mapping_2to3 = NULL;
129/* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
130static PyObject *name_mapping_3to2 = NULL;
131static PyObject *import_mapping_3to2 = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000132
133/* XXX: Are these really nescessary? */
134/* As the name says, an empty tuple. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000135static PyObject *empty_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000136/* For looking up name pairs in copyreg._extension_registry. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000137static PyObject *two_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000138
139static int
140stack_underflow(void)
141{
142 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
143 return -1;
144}
145
146/* Internal data type used as the unpickling stack. */
147typedef struct {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000148 PyObject_VAR_HEAD
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000149 PyObject **data;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000150 Py_ssize_t allocated; /* number of slots in data allocated */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000151} Pdata;
152
153static void
154Pdata_dealloc(Pdata *self)
155{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200156 Py_ssize_t i = Py_SIZE(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000157 while (--i >= 0) {
158 Py_DECREF(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000159 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000160 PyMem_FREE(self->data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000161 PyObject_Del(self);
162}
163
164static PyTypeObject Pdata_Type = {
165 PyVarObject_HEAD_INIT(NULL, 0)
166 "_pickle.Pdata", /*tp_name*/
167 sizeof(Pdata), /*tp_basicsize*/
168 0, /*tp_itemsize*/
169 (destructor)Pdata_dealloc, /*tp_dealloc*/
170};
171
172static PyObject *
173Pdata_New(void)
174{
175 Pdata *self;
176
177 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
178 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000179 Py_SIZE(self) = 0;
180 self->allocated = 8;
181 self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000182 if (self->data)
183 return (PyObject *)self;
184 Py_DECREF(self);
185 return PyErr_NoMemory();
186}
187
188
189/* Retain only the initial clearto items. If clearto >= the current
190 * number of items, this is a (non-erroneous) NOP.
191 */
192static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200193Pdata_clear(Pdata *self, Py_ssize_t clearto)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000194{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200195 Py_ssize_t i = Py_SIZE(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000196
197 if (clearto < 0)
198 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000199 if (clearto >= i)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000200 return 0;
201
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000202 while (--i >= clearto) {
203 Py_CLEAR(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000204 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000205 Py_SIZE(self) = clearto;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000206 return 0;
207}
208
209static int
210Pdata_grow(Pdata *self)
211{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000212 PyObject **data = self->data;
213 Py_ssize_t allocated = self->allocated;
214 Py_ssize_t new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000215
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000216 new_allocated = (allocated >> 3) + 6;
217 /* check for integer overflow */
218 if (new_allocated > PY_SSIZE_T_MAX - allocated)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000219 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000220 new_allocated += allocated;
221 if (new_allocated > (PY_SSIZE_T_MAX / sizeof(PyObject *)))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000222 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000223 data = PyMem_REALLOC(data, new_allocated * sizeof(PyObject *));
224 if (data == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000225 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000226
227 self->data = data;
228 self->allocated = new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000229 return 0;
230
231 nomemory:
232 PyErr_NoMemory();
233 return -1;
234}
235
236/* D is a Pdata*. Pop the topmost element and store it into V, which
237 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
238 * is raised and V is set to NULL.
239 */
240static PyObject *
241Pdata_pop(Pdata *self)
242{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000243 if (Py_SIZE(self) == 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000244 PyErr_SetString(UnpicklingError, "bad pickle data");
245 return NULL;
246 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000247 return self->data[--Py_SIZE(self)];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000248}
249#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
250
251static int
252Pdata_push(Pdata *self, PyObject *obj)
253{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000254 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000255 return -1;
256 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000257 self->data[Py_SIZE(self)++] = obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000258 return 0;
259}
260
261/* Push an object on stack, transferring its ownership to the stack. */
262#define PDATA_PUSH(D, O, ER) do { \
263 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
264
265/* Push an object on stack, adding a new reference to the object. */
266#define PDATA_APPEND(D, O, ER) do { \
267 Py_INCREF((O)); \
268 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
269
270static PyObject *
271Pdata_poptuple(Pdata *self, Py_ssize_t start)
272{
273 PyObject *tuple;
274 Py_ssize_t len, i, j;
275
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000276 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000277 tuple = PyTuple_New(len);
278 if (tuple == NULL)
279 return NULL;
280 for (i = start, j = 0; j < len; i++, j++)
281 PyTuple_SET_ITEM(tuple, j, self->data[i]);
282
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000283 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000284 return tuple;
285}
286
287static PyObject *
288Pdata_poplist(Pdata *self, Py_ssize_t start)
289{
290 PyObject *list;
291 Py_ssize_t len, i, j;
292
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000293 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000294 list = PyList_New(len);
295 if (list == NULL)
296 return NULL;
297 for (i = start, j = 0; j < len; i++, j++)
298 PyList_SET_ITEM(list, j, self->data[i]);
299
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000300 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000301 return list;
302}
303
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000304typedef struct {
305 PyObject *me_key;
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200306 Py_ssize_t me_value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000307} PyMemoEntry;
308
309typedef struct {
310 Py_ssize_t mt_mask;
311 Py_ssize_t mt_used;
312 Py_ssize_t mt_allocated;
313 PyMemoEntry *mt_table;
314} PyMemoTable;
315
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000316typedef struct PicklerObject {
317 PyObject_HEAD
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000318 PyMemoTable *memo; /* Memo table, keep track of the seen
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000319 objects to support self-referential objects
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000320 pickling. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000321 PyObject *pers_func; /* persistent_id() method, can be NULL */
322 PyObject *arg;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000323
324 PyObject *write; /* write() method of the output stream. */
325 PyObject *output_buffer; /* Write into a local bytearray buffer before
326 flushing to the stream. */
327 Py_ssize_t output_len; /* Length of output_buffer. */
328 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000329 int proto; /* Pickle protocol number, >= 0 */
330 int bin; /* Boolean, true if proto > 0 */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200331 Py_ssize_t buf_size; /* Size of the current buffered pickle data */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000332 int fast; /* Enable fast mode if set to a true value.
333 The fast mode disable the usage of memo,
334 therefore speeding the pickling process by
335 not generating superfluous PUT opcodes. It
336 should not be used if with self-referential
337 objects. */
338 int fast_nesting;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000339 int fix_imports; /* Indicate whether Pickler should fix
340 the name of globals for Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000341 PyObject *fast_memo;
342} PicklerObject;
343
344typedef struct UnpicklerObject {
345 PyObject_HEAD
346 Pdata *stack; /* Pickle data stack, store unpickled objects. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000347
348 /* The unpickler memo is just an array of PyObject *s. Using a dict
349 is unnecessary, since the keys are contiguous ints. */
350 PyObject **memo;
351 Py_ssize_t memo_size;
352
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000353 PyObject *arg;
354 PyObject *pers_func; /* persistent_load() method, can be NULL. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000355
356 Py_buffer buffer;
357 char *input_buffer;
358 char *input_line;
359 Py_ssize_t input_len;
360 Py_ssize_t next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000361 Py_ssize_t prefetched_idx; /* index of first prefetched byte */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000362 PyObject *read; /* read() method of the input stream. */
363 PyObject *readline; /* readline() method of the input stream. */
Antoine Pitrou04248a82010-10-12 20:51:21 +0000364 PyObject *peek; /* peek() method of the input stream, or NULL */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000365
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000366 char *encoding; /* Name of the encoding to be used for
367 decoding strings pickled using Python
368 2.x. The default value is "ASCII" */
369 char *errors; /* Name of errors handling scheme to used when
370 decoding strings. The default value is
371 "strict". */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200372 Py_ssize_t *marks; /* Mark stack, used for unpickling container
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000373 objects. */
374 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
375 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000376 int proto; /* Protocol of the pickle loaded. */
377 int fix_imports; /* Indicate whether Unpickler should fix
378 the name of globals pickled by Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000379} UnpicklerObject;
380
381/* Forward declarations */
382static int save(PicklerObject *, PyObject *, int);
383static int save_reduce(PicklerObject *, PyObject *, PyObject *);
384static PyTypeObject Pickler_Type;
385static PyTypeObject Unpickler_Type;
386
387
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000388/*************************************************************************
389 A custom hashtable mapping void* to longs. This is used by the pickler for
390 memoization. Using a custom hashtable rather than PyDict allows us to skip
391 a bunch of unnecessary object creation. This makes a huge performance
392 difference. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000393
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000394#define MT_MINSIZE 8
395#define PERTURB_SHIFT 5
396
397
398static PyMemoTable *
399PyMemoTable_New(void)
400{
401 PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
402 if (memo == NULL) {
403 PyErr_NoMemory();
404 return NULL;
405 }
406
407 memo->mt_used = 0;
408 memo->mt_allocated = MT_MINSIZE;
409 memo->mt_mask = MT_MINSIZE - 1;
410 memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
411 if (memo->mt_table == NULL) {
412 PyMem_FREE(memo);
413 PyErr_NoMemory();
414 return NULL;
415 }
416 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
417
418 return memo;
419}
420
421static PyMemoTable *
422PyMemoTable_Copy(PyMemoTable *self)
423{
424 Py_ssize_t i;
425 PyMemoTable *new = PyMemoTable_New();
426 if (new == NULL)
427 return NULL;
428
429 new->mt_used = self->mt_used;
430 new->mt_allocated = self->mt_allocated;
431 new->mt_mask = self->mt_mask;
432 /* The table we get from _New() is probably smaller than we wanted.
433 Free it and allocate one that's the right size. */
434 PyMem_FREE(new->mt_table);
435 new->mt_table = PyMem_MALLOC(self->mt_allocated * sizeof(PyMemoEntry));
436 if (new->mt_table == NULL) {
437 PyMem_FREE(new);
438 return NULL;
439 }
440 for (i = 0; i < self->mt_allocated; i++) {
441 Py_XINCREF(self->mt_table[i].me_key);
442 }
443 memcpy(new->mt_table, self->mt_table,
444 sizeof(PyMemoEntry) * self->mt_allocated);
445
446 return new;
447}
448
449static Py_ssize_t
450PyMemoTable_Size(PyMemoTable *self)
451{
452 return self->mt_used;
453}
454
455static int
456PyMemoTable_Clear(PyMemoTable *self)
457{
458 Py_ssize_t i = self->mt_allocated;
459
460 while (--i >= 0) {
461 Py_XDECREF(self->mt_table[i].me_key);
462 }
463 self->mt_used = 0;
464 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
465 return 0;
466}
467
468static void
469PyMemoTable_Del(PyMemoTable *self)
470{
471 if (self == NULL)
472 return;
473 PyMemoTable_Clear(self);
474
475 PyMem_FREE(self->mt_table);
476 PyMem_FREE(self);
477}
478
479/* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
480 can be considerably simpler than dictobject.c's lookdict(). */
481static PyMemoEntry *
482_PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
483{
484 size_t i;
485 size_t perturb;
486 size_t mask = (size_t)self->mt_mask;
487 PyMemoEntry *table = self->mt_table;
488 PyMemoEntry *entry;
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000489 Py_hash_t hash = (Py_hash_t)key >> 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000490
491 i = hash & mask;
492 entry = &table[i];
493 if (entry->me_key == NULL || entry->me_key == key)
494 return entry;
495
496 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
497 i = (i << 2) + i + perturb + 1;
498 entry = &table[i & mask];
499 if (entry->me_key == NULL || entry->me_key == key)
500 return entry;
501 }
502 assert(0); /* Never reached */
503 return NULL;
504}
505
506/* Returns -1 on failure, 0 on success. */
507static int
508_PyMemoTable_ResizeTable(PyMemoTable *self, Py_ssize_t min_size)
509{
510 PyMemoEntry *oldtable = NULL;
511 PyMemoEntry *oldentry, *newentry;
512 Py_ssize_t new_size = MT_MINSIZE;
513 Py_ssize_t to_process;
514
515 assert(min_size > 0);
516
517 /* Find the smallest valid table size >= min_size. */
518 while (new_size < min_size && new_size > 0)
519 new_size <<= 1;
520 if (new_size <= 0) {
521 PyErr_NoMemory();
522 return -1;
523 }
524 /* new_size needs to be a power of two. */
525 assert((new_size & (new_size - 1)) == 0);
526
527 /* Allocate new table. */
528 oldtable = self->mt_table;
529 self->mt_table = PyMem_MALLOC(new_size * sizeof(PyMemoEntry));
530 if (self->mt_table == NULL) {
531 PyMem_FREE(oldtable);
532 PyErr_NoMemory();
533 return -1;
534 }
535 self->mt_allocated = new_size;
536 self->mt_mask = new_size - 1;
537 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
538
539 /* Copy entries from the old table. */
540 to_process = self->mt_used;
541 for (oldentry = oldtable; to_process > 0; oldentry++) {
542 if (oldentry->me_key != NULL) {
543 to_process--;
544 /* newentry is a pointer to a chunk of the new
545 mt_table, so we're setting the key:value pair
546 in-place. */
547 newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
548 newentry->me_key = oldentry->me_key;
549 newentry->me_value = oldentry->me_value;
550 }
551 }
552
553 /* Deallocate the old table. */
554 PyMem_FREE(oldtable);
555 return 0;
556}
557
558/* Returns NULL on failure, a pointer to the value otherwise. */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200559static Py_ssize_t *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000560PyMemoTable_Get(PyMemoTable *self, PyObject *key)
561{
562 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
563 if (entry->me_key == NULL)
564 return NULL;
565 return &entry->me_value;
566}
567
568/* Returns -1 on failure, 0 on success. */
569static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200570PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000571{
572 PyMemoEntry *entry;
573
574 assert(key != NULL);
575
576 entry = _PyMemoTable_Lookup(self, key);
577 if (entry->me_key != NULL) {
578 entry->me_value = value;
579 return 0;
580 }
581 Py_INCREF(key);
582 entry->me_key = key;
583 entry->me_value = value;
584 self->mt_used++;
585
586 /* If we added a key, we can safely resize. Otherwise just return!
587 * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
588 *
589 * Quadrupling the size improves average table sparseness
590 * (reducing collisions) at the cost of some memory. It also halves
591 * the number of expensive resize operations in a growing memo table.
592 *
593 * Very large memo tables (over 50K items) use doubling instead.
594 * This may help applications with severe memory constraints.
595 */
596 if (!(self->mt_used * 3 >= (self->mt_mask + 1) * 2))
597 return 0;
598 return _PyMemoTable_ResizeTable(self,
599 (self->mt_used > 50000 ? 2 : 4) * self->mt_used);
600}
601
602#undef MT_MINSIZE
603#undef PERTURB_SHIFT
604
605/*************************************************************************/
606
607/* Helpers for creating the argument tuple passed to functions. This has the
608 performance advantage of calling PyTuple_New() only once.
609
610 XXX(avassalotti): Inline directly in _Pickler_FastCall() and
611 _Unpickler_FastCall(). */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000612#define ARG_TUP(self, obj) do { \
613 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
614 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
615 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
616 } \
617 else { \
618 Py_DECREF((obj)); \
619 } \
620 } while (0)
621
622#define FREE_ARG_TUP(self) do { \
623 if ((self)->arg->ob_refcnt > 1) \
624 Py_CLEAR((self)->arg); \
625 } while (0)
626
627/* A temporary cleaner API for fast single argument function call.
628
629 XXX: Does caching the argument tuple provides any real performance benefits?
630
631 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
632 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
633 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
634 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
635 (i.e, call PyTuple_New() and store the returned value in an array), to save
636 one second (wall clock time). Either ways, the loading time a pickle stream
637 large enough to generate this number of calls would be massively
638 overwhelmed by other factors, like I/O throughput, the GC traversal and
639 object allocation overhead. So, I really doubt these functions provide any
640 real benefits.
641
642 On the other hand, oprofile reports that pickle spends a lot of time in
643 these functions. But, that is probably more related to the function call
644 overhead, than the argument tuple allocation.
645
646 XXX: And, what is the reference behavior of these? Steal, borrow? At first
647 glance, it seems to steal the reference of 'arg' and borrow the reference
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000648 of 'func'. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000649static PyObject *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000650_Pickler_FastCall(PicklerObject *self, PyObject *func, PyObject *arg)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000651{
652 PyObject *result = NULL;
653
654 ARG_TUP(self, arg);
655 if (self->arg) {
656 result = PyObject_Call(func, self->arg, NULL);
657 FREE_ARG_TUP(self);
658 }
659 return result;
660}
661
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000662static int
663_Pickler_ClearBuffer(PicklerObject *self)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000664{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000665 Py_CLEAR(self->output_buffer);
666 self->output_buffer =
667 PyBytes_FromStringAndSize(NULL, self->max_output_len);
668 if (self->output_buffer == NULL)
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000669 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000670 self->output_len = 0;
671 return 0;
672}
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000673
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000674static PyObject *
675_Pickler_GetString(PicklerObject *self)
676{
677 PyObject *output_buffer = self->output_buffer;
678
679 assert(self->output_buffer != NULL);
680 self->output_buffer = NULL;
681 /* Resize down to exact size */
682 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
683 return NULL;
684 return output_buffer;
685}
686
687static int
688_Pickler_FlushToFile(PicklerObject *self)
689{
690 PyObject *output, *result;
691
692 assert(self->write != NULL);
693
694 output = _Pickler_GetString(self);
695 if (output == NULL)
696 return -1;
697
698 result = _Pickler_FastCall(self, self->write, output);
699 Py_XDECREF(result);
700 return (result == NULL) ? -1 : 0;
701}
702
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200703static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000704_Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t n)
705{
706 Py_ssize_t i, required;
707 char *buffer;
708
709 assert(s != NULL);
710
711 required = self->output_len + n;
712 if (required > self->max_output_len) {
713 if (self->write != NULL && required > MAX_WRITE_BUF_SIZE) {
714 /* XXX This reallocates a new buffer every time, which is a bit
715 wasteful. */
716 if (_Pickler_FlushToFile(self) < 0)
717 return -1;
718 if (_Pickler_ClearBuffer(self) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000719 return -1;
720 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000721 if (self->write != NULL && n > MAX_WRITE_BUF_SIZE) {
722 /* we already flushed above, so the buffer is empty */
723 PyObject *result;
724 /* XXX we could spare an intermediate copy and pass
725 a memoryview instead */
726 PyObject *output = PyBytes_FromStringAndSize(s, n);
727 if (s == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000728 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000729 result = _Pickler_FastCall(self, self->write, output);
730 Py_XDECREF(result);
731 return (result == NULL) ? -1 : 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000732 }
733 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000734 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
735 PyErr_NoMemory();
736 return -1;
737 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200738 self->max_output_len = (self->output_len + n) / 2 * 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000739 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
740 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000741 }
742 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000743 buffer = PyBytes_AS_STRING(self->output_buffer);
744 if (n < 8) {
745 /* This is faster than memcpy when the string is short. */
746 for (i = 0; i < n; i++) {
747 buffer[self->output_len + i] = s[i];
748 }
749 }
750 else {
751 memcpy(buffer + self->output_len, s, n);
752 }
753 self->output_len += n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000754 return n;
755}
756
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000757static PicklerObject *
758_Pickler_New(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000759{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000760 PicklerObject *self;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000761
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000762 self = PyObject_GC_New(PicklerObject, &Pickler_Type);
763 if (self == NULL)
764 return NULL;
765
766 self->pers_func = NULL;
767 self->arg = NULL;
768 self->write = NULL;
769 self->proto = 0;
770 self->bin = 0;
771 self->fast = 0;
772 self->fast_nesting = 0;
773 self->fix_imports = 0;
774 self->fast_memo = NULL;
775
776 self->memo = PyMemoTable_New();
777 if (self->memo == NULL) {
778 Py_DECREF(self);
779 return NULL;
780 }
781 self->max_output_len = WRITE_BUF_SIZE;
782 self->output_len = 0;
783 self->output_buffer = PyBytes_FromStringAndSize(NULL,
784 self->max_output_len);
785 if (self->output_buffer == NULL) {
786 Py_DECREF(self);
787 return NULL;
788 }
789 return self;
790}
791
792static int
793_Pickler_SetProtocol(PicklerObject *self, PyObject *proto_obj,
794 PyObject *fix_imports_obj)
795{
796 long proto = 0;
797 int fix_imports;
798
799 if (proto_obj == NULL || proto_obj == Py_None)
800 proto = DEFAULT_PROTOCOL;
801 else {
802 proto = PyLong_AsLong(proto_obj);
803 if (proto == -1 && PyErr_Occurred())
804 return -1;
805 }
806 if (proto < 0)
807 proto = HIGHEST_PROTOCOL;
808 if (proto > HIGHEST_PROTOCOL) {
809 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
810 HIGHEST_PROTOCOL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000811 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000812 }
813 fix_imports = PyObject_IsTrue(fix_imports_obj);
814 if (fix_imports == -1)
815 return -1;
816
817 self->proto = proto;
818 self->bin = proto > 0;
819 self->fix_imports = fix_imports && proto < 3;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000820
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000821 return 0;
822}
823
824/* Returns -1 (with an exception set) on failure, 0 on success. This may
825 be called once on a freshly created Pickler. */
826static int
827_Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
828{
829 assert(file != NULL);
830 self->write = PyObject_GetAttrString(file, "write");
831 if (self->write == NULL) {
832 if (PyErr_ExceptionMatches(PyExc_AttributeError))
833 PyErr_SetString(PyExc_TypeError,
834 "file must have a 'write' attribute");
835 return -1;
836 }
837
838 return 0;
839}
840
841/* See documentation for _Pickler_FastCall(). */
842static PyObject *
843_Unpickler_FastCall(UnpicklerObject *self, PyObject *func, PyObject *arg)
844{
845 PyObject *result = NULL;
846
847 ARG_TUP(self, arg);
848 if (self->arg) {
849 result = PyObject_Call(func, self->arg, NULL);
850 FREE_ARG_TUP(self);
851 }
852 return result;
853}
854
855/* Returns the size of the input on success, -1 on failure. This takes its
856 own reference to `input`. */
857static Py_ssize_t
858_Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
859{
860 if (self->buffer.buf != NULL)
861 PyBuffer_Release(&self->buffer);
862 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
863 return -1;
864 self->input_buffer = self->buffer.buf;
865 self->input_len = self->buffer.len;
866 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000867 self->prefetched_idx = self->input_len;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000868 return self->input_len;
869}
870
Antoine Pitrou04248a82010-10-12 20:51:21 +0000871static int
872_Unpickler_SkipConsumed(UnpicklerObject *self)
873{
874 Py_ssize_t consumed = self->next_read_idx - self->prefetched_idx;
875
876 if (consumed > 0) {
877 PyObject *r;
878 assert(self->peek); /* otherwise we did something wrong */
879 /* This makes an useless copy... */
880 r = PyObject_CallFunction(self->read, "n", consumed);
881 if (r == NULL)
882 return -1;
883 Py_DECREF(r);
884 self->prefetched_idx = self->next_read_idx;
885 }
886 return 0;
887}
888
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000889static const Py_ssize_t READ_WHOLE_LINE = -1;
890
891/* If reading from a file, we need to only pull the bytes we need, since there
892 may be multiple pickle objects arranged contiguously in the same input
893 buffer.
894
895 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
896 bytes from the input stream/buffer.
897
898 Update the unpickler's input buffer with the newly-read data. Returns -1 on
899 failure; on success, returns the number of bytes read from the file.
900
901 On success, self->input_len will be 0; this is intentional so that when
902 unpickling from a file, the "we've run out of data" code paths will trigger,
903 causing the Unpickler to go back to the file for more data. Use the returned
904 size to tell you how much data you can process. */
905static Py_ssize_t
906_Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
907{
908 PyObject *data;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000909 Py_ssize_t read_size, prefetched_size = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000910
911 assert(self->read != NULL);
Antoine Pitrou04248a82010-10-12 20:51:21 +0000912
913 if (_Unpickler_SkipConsumed(self) < 0)
914 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000915
916 if (n == READ_WHOLE_LINE)
917 data = PyObject_Call(self->readline, empty_tuple, NULL);
918 else {
919 PyObject *len = PyLong_FromSsize_t(n);
920 if (len == NULL)
921 return -1;
922 data = _Unpickler_FastCall(self, self->read, len);
923 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000924 if (data == NULL)
925 return -1;
926
Antoine Pitrou04248a82010-10-12 20:51:21 +0000927 /* Prefetch some data without advancing the file pointer, if possible */
928 if (self->peek) {
929 PyObject *len, *prefetched;
930 len = PyLong_FromSsize_t(PREFETCH);
931 if (len == NULL) {
932 Py_DECREF(data);
933 return -1;
934 }
935 prefetched = _Unpickler_FastCall(self, self->peek, len);
936 if (prefetched == NULL) {
937 if (PyErr_ExceptionMatches(PyExc_NotImplementedError)) {
938 /* peek() is probably not supported by the given file object */
939 PyErr_Clear();
940 Py_CLEAR(self->peek);
941 }
942 else {
943 Py_DECREF(data);
944 return -1;
945 }
946 }
947 else {
948 assert(PyBytes_Check(prefetched));
949 prefetched_size = PyBytes_GET_SIZE(prefetched);
950 PyBytes_ConcatAndDel(&data, prefetched);
951 if (data == NULL)
952 return -1;
953 }
954 }
955
956 read_size = _Unpickler_SetStringInput(self, data) - prefetched_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000957 Py_DECREF(data);
Antoine Pitrou04248a82010-10-12 20:51:21 +0000958 self->prefetched_idx = read_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000959 return read_size;
960}
961
962/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
963
964 This should be used for all data reads, rather than accessing the unpickler's
965 input buffer directly. This method deals correctly with reading from input
966 streams, which the input buffer doesn't deal with.
967
968 Note that when reading from a file-like object, self->next_read_idx won't
969 be updated (it should remain at 0 for the entire unpickling process). You
970 should use this function's return value to know how many bytes you can
971 consume.
972
973 Returns -1 (with an exception set) on failure. On success, return the
974 number of chars read. */
975static Py_ssize_t
976_Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n)
977{
Antoine Pitrou04248a82010-10-12 20:51:21 +0000978 Py_ssize_t num_read;
979
Antoine Pitrou04248a82010-10-12 20:51:21 +0000980 if (self->next_read_idx + n <= self->input_len) {
981 *s = self->input_buffer + self->next_read_idx;
982 self->next_read_idx += n;
983 return n;
984 }
985 if (!self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000986 PyErr_Format(PyExc_EOFError, "Ran out of input");
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000987 return -1;
988 }
Antoine Pitrou04248a82010-10-12 20:51:21 +0000989 num_read = _Unpickler_ReadFromFile(self, n);
990 if (num_read < 0)
991 return -1;
992 if (num_read < n) {
993 PyErr_Format(PyExc_EOFError, "Ran out of input");
994 return -1;
995 }
996 *s = self->input_buffer;
997 self->next_read_idx = n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000998 return n;
999}
1000
1001static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001002_Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1003 char **result)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001004{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001005 char *input_line = PyMem_Realloc(self->input_line, len + 1);
1006 if (input_line == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001007 return -1;
1008
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001009 memcpy(input_line, line, len);
1010 input_line[len] = '\0';
1011 self->input_line = input_line;
1012 *result = self->input_line;
1013 return len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001014}
1015
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001016/* Read a line from the input stream/buffer. If we run off the end of the input
1017 before hitting \n, return the data we found.
1018
1019 Returns the number of chars read, or -1 on failure. */
1020static Py_ssize_t
1021_Unpickler_Readline(UnpicklerObject *self, char **result)
1022{
1023 Py_ssize_t i, num_read;
1024
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001025 for (i = self->next_read_idx; i < self->input_len; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001026 if (self->input_buffer[i] == '\n') {
1027 char *line_start = self->input_buffer + self->next_read_idx;
1028 num_read = i - self->next_read_idx + 1;
1029 self->next_read_idx = i + 1;
1030 return _Unpickler_CopyLine(self, line_start, num_read, result);
1031 }
1032 }
1033 if (self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001034 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1035 if (num_read < 0)
1036 return -1;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001037 self->next_read_idx = num_read;
Antoine Pitrouf6c7a852011-08-11 21:04:02 +02001038 return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001039 }
1040
1041 /* If we get here, we've run off the end of the input string. Return the
1042 remaining string and let the caller figure it out. */
1043 *result = self->input_buffer + self->next_read_idx;
1044 num_read = i - self->next_read_idx;
1045 self->next_read_idx = i;
1046 return num_read;
1047}
1048
1049/* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1050 will be modified in place. */
1051static int
1052_Unpickler_ResizeMemoList(UnpicklerObject *self, Py_ssize_t new_size)
1053{
1054 Py_ssize_t i;
1055 PyObject **memo;
1056
1057 assert(new_size > self->memo_size);
1058
1059 memo = PyMem_REALLOC(self->memo, new_size * sizeof(PyObject *));
1060 if (memo == NULL) {
1061 PyErr_NoMemory();
1062 return -1;
1063 }
1064 self->memo = memo;
1065 for (i = self->memo_size; i < new_size; i++)
1066 self->memo[i] = NULL;
1067 self->memo_size = new_size;
1068 return 0;
1069}
1070
1071/* Returns NULL if idx is out of bounds. */
1072static PyObject *
1073_Unpickler_MemoGet(UnpicklerObject *self, Py_ssize_t idx)
1074{
1075 if (idx < 0 || idx >= self->memo_size)
1076 return NULL;
1077
1078 return self->memo[idx];
1079}
1080
1081/* Returns -1 (with an exception set) on failure, 0 on success.
1082 This takes its own reference to `value`. */
1083static int
1084_Unpickler_MemoPut(UnpicklerObject *self, Py_ssize_t idx, PyObject *value)
1085{
1086 PyObject *old_item;
1087
1088 if (idx >= self->memo_size) {
1089 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1090 return -1;
1091 assert(idx < self->memo_size);
1092 }
1093 Py_INCREF(value);
1094 old_item = self->memo[idx];
1095 self->memo[idx] = value;
1096 Py_XDECREF(old_item);
1097 return 0;
1098}
1099
1100static PyObject **
1101_Unpickler_NewMemo(Py_ssize_t new_size)
1102{
1103 PyObject **memo = PyMem_MALLOC(new_size * sizeof(PyObject *));
1104 if (memo == NULL)
1105 return NULL;
1106 memset(memo, 0, new_size * sizeof(PyObject *));
1107 return memo;
1108}
1109
1110/* Free the unpickler's memo, taking care to decref any items left in it. */
1111static void
1112_Unpickler_MemoCleanup(UnpicklerObject *self)
1113{
1114 Py_ssize_t i;
1115 PyObject **memo = self->memo;
1116
1117 if (self->memo == NULL)
1118 return;
1119 self->memo = NULL;
1120 i = self->memo_size;
1121 while (--i >= 0) {
1122 Py_XDECREF(memo[i]);
1123 }
1124 PyMem_FREE(memo);
1125}
1126
1127static UnpicklerObject *
1128_Unpickler_New(void)
1129{
1130 UnpicklerObject *self;
1131
1132 self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1133 if (self == NULL)
1134 return NULL;
1135
1136 self->stack = (Pdata *)Pdata_New();
1137 if (self->stack == NULL) {
1138 Py_DECREF(self);
1139 return NULL;
1140 }
1141 memset(&self->buffer, 0, sizeof(Py_buffer));
1142
1143 self->memo_size = 32;
1144 self->memo = _Unpickler_NewMemo(self->memo_size);
1145 if (self->memo == NULL) {
1146 Py_DECREF(self);
1147 return NULL;
1148 }
1149
1150 self->arg = NULL;
1151 self->pers_func = NULL;
1152 self->input_buffer = NULL;
1153 self->input_line = NULL;
1154 self->input_len = 0;
1155 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001156 self->prefetched_idx = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001157 self->read = NULL;
1158 self->readline = NULL;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001159 self->peek = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001160 self->encoding = NULL;
1161 self->errors = NULL;
1162 self->marks = NULL;
1163 self->num_marks = 0;
1164 self->marks_size = 0;
1165 self->proto = 0;
1166 self->fix_imports = 0;
1167
1168 return self;
1169}
1170
1171/* Returns -1 (with an exception set) on failure, 0 on success. This may
1172 be called once on a freshly created Pickler. */
1173static int
1174_Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1175{
Antoine Pitrou04248a82010-10-12 20:51:21 +00001176 self->peek = PyObject_GetAttrString(file, "peek");
1177 if (self->peek == NULL) {
1178 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1179 PyErr_Clear();
1180 else
1181 return -1;
1182 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001183 self->read = PyObject_GetAttrString(file, "read");
1184 self->readline = PyObject_GetAttrString(file, "readline");
1185 if (self->readline == NULL || self->read == NULL) {
1186 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1187 PyErr_SetString(PyExc_TypeError,
1188 "file must have 'read' and 'readline' attributes");
1189 Py_CLEAR(self->read);
1190 Py_CLEAR(self->readline);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001191 Py_CLEAR(self->peek);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001192 return -1;
1193 }
1194 return 0;
1195}
1196
1197/* Returns -1 (with an exception set) on failure, 0 on success. This may
1198 be called once on a freshly created Pickler. */
1199static int
1200_Unpickler_SetInputEncoding(UnpicklerObject *self,
1201 const char *encoding,
1202 const char *errors)
1203{
1204 if (encoding == NULL)
1205 encoding = "ASCII";
1206 if (errors == NULL)
1207 errors = "strict";
1208
1209 self->encoding = strdup(encoding);
1210 self->errors = strdup(errors);
1211 if (self->encoding == NULL || self->errors == NULL) {
1212 PyErr_NoMemory();
1213 return -1;
1214 }
1215 return 0;
1216}
1217
1218/* Generate a GET opcode for an object stored in the memo. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001219static int
1220memo_get(PicklerObject *self, PyObject *key)
1221{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001222 Py_ssize_t *value;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001223 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001224 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001225
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001226 value = PyMemoTable_Get(self->memo, key);
1227 if (value == NULL) {
1228 PyErr_SetObject(PyExc_KeyError, key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001229 return -1;
1230 }
1231
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001232 if (!self->bin) {
1233 pdata[0] = GET;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001234 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1235 "%" PY_FORMAT_SIZE_T "d\n", *value);
1236 len = strlen(pdata);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001237 }
1238 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001239 if (*value < 256) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001240 pdata[0] = BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001241 pdata[1] = (unsigned char)(*value & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001242 len = 2;
1243 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001244 else if (*value <= 0xffffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001245 pdata[0] = LONG_BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001246 pdata[1] = (unsigned char)(*value & 0xff);
1247 pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1248 pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1249 pdata[4] = (unsigned char)((*value >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001250 len = 5;
1251 }
1252 else { /* unlikely */
1253 PyErr_SetString(PicklingError,
1254 "memo id too large for LONG_BINGET");
1255 return -1;
1256 }
1257 }
1258
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001259 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001260 return -1;
1261
1262 return 0;
1263}
1264
1265/* Store an object in the memo, assign it a new unique ID based on the number
1266 of objects currently stored in the memo and generate a PUT opcode. */
1267static int
1268memo_put(PicklerObject *self, PyObject *obj)
1269{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001270 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001271 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001272 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001273 int status = 0;
1274
1275 if (self->fast)
1276 return 0;
1277
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001278 x = PyMemoTable_Size(self->memo);
1279 if (PyMemoTable_Set(self->memo, obj, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001280 goto error;
1281
1282 if (!self->bin) {
1283 pdata[0] = PUT;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001284 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1285 "%" PY_FORMAT_SIZE_T "d\n", x);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001286 len = strlen(pdata);
1287 }
1288 else {
1289 if (x < 256) {
1290 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00001291 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001292 len = 2;
1293 }
1294 else if (x <= 0xffffffffL) {
1295 pdata[0] = LONG_BINPUT;
1296 pdata[1] = (unsigned char)(x & 0xff);
1297 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1298 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1299 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1300 len = 5;
1301 }
1302 else { /* unlikely */
1303 PyErr_SetString(PicklingError,
1304 "memo id too large for LONG_BINPUT");
1305 return -1;
1306 }
1307 }
1308
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001309 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001310 goto error;
1311
1312 if (0) {
1313 error:
1314 status = -1;
1315 }
1316
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001317 return status;
1318}
1319
1320static PyObject *
1321whichmodule(PyObject *global, PyObject *global_name)
1322{
1323 Py_ssize_t i, j;
1324 static PyObject *module_str = NULL;
1325 static PyObject *main_str = NULL;
1326 PyObject *module_name;
1327 PyObject *modules_dict;
1328 PyObject *module;
1329 PyObject *obj;
1330
1331 if (module_str == NULL) {
1332 module_str = PyUnicode_InternFromString("__module__");
1333 if (module_str == NULL)
1334 return NULL;
1335 main_str = PyUnicode_InternFromString("__main__");
1336 if (main_str == NULL)
1337 return NULL;
1338 }
1339
1340 module_name = PyObject_GetAttr(global, module_str);
1341
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00001342 /* In some rare cases (e.g., bound methods of extension types),
1343 __module__ can be None. If it is so, then search sys.modules
1344 for the module of global. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001345 if (module_name == Py_None) {
1346 Py_DECREF(module_name);
1347 goto search;
1348 }
1349
1350 if (module_name) {
1351 return module_name;
1352 }
1353 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1354 PyErr_Clear();
1355 else
1356 return NULL;
1357
1358 search:
1359 modules_dict = PySys_GetObject("modules");
1360 if (modules_dict == NULL)
1361 return NULL;
1362
1363 i = 0;
1364 module_name = NULL;
1365 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
Mark Dickinson211c6252009-02-01 10:28:51 +00001366 if (PyObject_RichCompareBool(module_name, main_str, Py_EQ) == 1)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001367 continue;
1368
1369 obj = PyObject_GetAttr(module, global_name);
1370 if (obj == NULL) {
1371 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1372 PyErr_Clear();
1373 else
1374 return NULL;
1375 continue;
1376 }
1377
1378 if (obj != global) {
1379 Py_DECREF(obj);
1380 continue;
1381 }
1382
1383 Py_DECREF(obj);
1384 break;
1385 }
1386
1387 /* If no module is found, use __main__. */
1388 if (!j) {
1389 module_name = main_str;
1390 }
1391
1392 Py_INCREF(module_name);
1393 return module_name;
1394}
1395
1396/* fast_save_enter() and fast_save_leave() are guards against recursive
1397 objects when Pickler is used with the "fast mode" (i.e., with object
1398 memoization disabled). If the nesting of a list or dict object exceed
1399 FAST_NESTING_LIMIT, these guards will start keeping an internal
1400 reference to the seen list or dict objects and check whether these objects
1401 are recursive. These are not strictly necessary, since save() has a
1402 hard-coded recursion limit, but they give a nicer error message than the
1403 typical RuntimeError. */
1404static int
1405fast_save_enter(PicklerObject *self, PyObject *obj)
1406{
1407 /* if fast_nesting < 0, we're doing an error exit. */
1408 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1409 PyObject *key = NULL;
1410 if (self->fast_memo == NULL) {
1411 self->fast_memo = PyDict_New();
1412 if (self->fast_memo == NULL) {
1413 self->fast_nesting = -1;
1414 return 0;
1415 }
1416 }
1417 key = PyLong_FromVoidPtr(obj);
1418 if (key == NULL)
1419 return 0;
1420 if (PyDict_GetItem(self->fast_memo, key)) {
1421 Py_DECREF(key);
1422 PyErr_Format(PyExc_ValueError,
1423 "fast mode: can't pickle cyclic objects "
1424 "including object type %.200s at %p",
1425 obj->ob_type->tp_name, obj);
1426 self->fast_nesting = -1;
1427 return 0;
1428 }
1429 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
1430 Py_DECREF(key);
1431 self->fast_nesting = -1;
1432 return 0;
1433 }
1434 Py_DECREF(key);
1435 }
1436 return 1;
1437}
1438
1439static int
1440fast_save_leave(PicklerObject *self, PyObject *obj)
1441{
1442 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
1443 PyObject *key = PyLong_FromVoidPtr(obj);
1444 if (key == NULL)
1445 return 0;
1446 if (PyDict_DelItem(self->fast_memo, key) < 0) {
1447 Py_DECREF(key);
1448 return 0;
1449 }
1450 Py_DECREF(key);
1451 }
1452 return 1;
1453}
1454
1455static int
1456save_none(PicklerObject *self, PyObject *obj)
1457{
1458 const char none_op = NONE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001459 if (_Pickler_Write(self, &none_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001460 return -1;
1461
1462 return 0;
1463}
1464
1465static int
1466save_bool(PicklerObject *self, PyObject *obj)
1467{
1468 static const char *buf[2] = { FALSE, TRUE };
1469 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
1470 int p = (obj == Py_True);
1471
1472 if (self->proto >= 2) {
1473 const char bool_op = p ? NEWTRUE : NEWFALSE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001474 if (_Pickler_Write(self, &bool_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001475 return -1;
1476 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001477 else if (_Pickler_Write(self, buf[p], len[p]) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001478 return -1;
1479
1480 return 0;
1481}
1482
1483static int
1484save_int(PicklerObject *self, long x)
1485{
1486 char pdata[32];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001487 Py_ssize_t len = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001488
1489 if (!self->bin
1490#if SIZEOF_LONG > 4
1491 || x > 0x7fffffffL || x < -0x80000000L
1492#endif
1493 ) {
1494 /* Text-mode pickle, or long too big to fit in the 4-byte
1495 * signed BININT format: store as a string.
1496 */
Mark Dickinson8dd05142009-01-20 20:43:58 +00001497 pdata[0] = LONG; /* use LONG for consistency with pickle.py */
1498 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ldL\n", x);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001499 if (_Pickler_Write(self, pdata, strlen(pdata)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001500 return -1;
1501 }
1502 else {
1503 /* Binary pickle and x fits in a signed 4-byte int. */
1504 pdata[1] = (unsigned char)(x & 0xff);
1505 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1506 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1507 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1508
1509 if ((pdata[4] == 0) && (pdata[3] == 0)) {
1510 if (pdata[2] == 0) {
1511 pdata[0] = BININT1;
1512 len = 2;
1513 }
1514 else {
1515 pdata[0] = BININT2;
1516 len = 3;
1517 }
1518 }
1519 else {
1520 pdata[0] = BININT;
1521 len = 5;
1522 }
1523
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001524 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001525 return -1;
1526 }
1527
1528 return 0;
1529}
1530
1531static int
1532save_long(PicklerObject *self, PyObject *obj)
1533{
1534 PyObject *repr = NULL;
1535 Py_ssize_t size;
1536 long val = PyLong_AsLong(obj);
1537 int status = 0;
1538
1539 const char long_op = LONG;
1540
1541 if (val == -1 && PyErr_Occurred()) {
1542 /* out of range for int pickling */
1543 PyErr_Clear();
1544 }
Antoine Pitroue58bffb2011-08-13 20:40:32 +02001545 else
1546#if SIZEOF_LONG > 4
1547 if (val <= 0x7fffffffL && val >= -0x80000000L)
1548#endif
1549 return save_int(self, val);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001550
1551 if (self->proto >= 2) {
1552 /* Linear-time pickling. */
1553 size_t nbits;
1554 size_t nbytes;
1555 unsigned char *pdata;
1556 char header[5];
1557 int i;
1558 int sign = _PyLong_Sign(obj);
1559
1560 if (sign == 0) {
1561 header[0] = LONG1;
1562 header[1] = 0; /* It's 0 -- an empty bytestring. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001563 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001564 goto error;
1565 return 0;
1566 }
1567 nbits = _PyLong_NumBits(obj);
1568 if (nbits == (size_t)-1 && PyErr_Occurred())
1569 goto error;
1570 /* How many bytes do we need? There are nbits >> 3 full
1571 * bytes of data, and nbits & 7 leftover bits. If there
1572 * are any leftover bits, then we clearly need another
1573 * byte. Wnat's not so obvious is that we *probably*
1574 * need another byte even if there aren't any leftovers:
1575 * the most-significant bit of the most-significant byte
1576 * acts like a sign bit, and it's usually got a sense
1577 * opposite of the one we need. The exception is longs
1578 * of the form -(2**(8*j-1)) for j > 0. Such a long is
1579 * its own 256's-complement, so has the right sign bit
1580 * even without the extra byte. That's a pain to check
1581 * for in advance, though, so we always grab an extra
1582 * byte at the start, and cut it back later if possible.
1583 */
1584 nbytes = (nbits >> 3) + 1;
1585 if (nbytes > INT_MAX) {
1586 PyErr_SetString(PyExc_OverflowError,
1587 "long too large to pickle");
1588 goto error;
1589 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001590 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001591 if (repr == NULL)
1592 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001593 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001594 i = _PyLong_AsByteArray((PyLongObject *)obj,
1595 pdata, nbytes,
1596 1 /* little endian */ , 1 /* signed */ );
1597 if (i < 0)
1598 goto error;
1599 /* If the long is negative, this may be a byte more than
1600 * needed. This is so iff the MSB is all redundant sign
1601 * bits.
1602 */
1603 if (sign < 0 &&
1604 nbytes > 1 &&
1605 pdata[nbytes - 1] == 0xff &&
1606 (pdata[nbytes - 2] & 0x80) != 0) {
1607 nbytes--;
1608 }
1609
1610 if (nbytes < 256) {
1611 header[0] = LONG1;
1612 header[1] = (unsigned char)nbytes;
1613 size = 2;
1614 }
1615 else {
1616 header[0] = LONG4;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001617 size = (Py_ssize_t) nbytes;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001618 for (i = 1; i < 5; i++) {
1619 header[i] = (unsigned char)(size & 0xff);
1620 size >>= 8;
1621 }
1622 size = 5;
1623 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001624 if (_Pickler_Write(self, header, size) < 0 ||
1625 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001626 goto error;
1627 }
1628 else {
1629 char *string;
1630
Mark Dickinson8dd05142009-01-20 20:43:58 +00001631 /* proto < 2: write the repr and newline. This is quadratic-time (in
1632 the number of digits), in both directions. We add a trailing 'L'
1633 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001634
1635 repr = PyObject_Repr(obj);
1636 if (repr == NULL)
1637 goto error;
1638
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001639 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001640 if (string == NULL)
1641 goto error;
1642
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001643 if (_Pickler_Write(self, &long_op, 1) < 0 ||
1644 _Pickler_Write(self, string, size) < 0 ||
1645 _Pickler_Write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001646 goto error;
1647 }
1648
1649 if (0) {
1650 error:
1651 status = -1;
1652 }
1653 Py_XDECREF(repr);
1654
1655 return status;
1656}
1657
1658static int
1659save_float(PicklerObject *self, PyObject *obj)
1660{
1661 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1662
1663 if (self->bin) {
1664 char pdata[9];
1665 pdata[0] = BINFLOAT;
1666 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1667 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001668 if (_Pickler_Write(self, pdata, 9) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001669 return -1;
Eric Smith0923d1d2009-04-16 20:16:10 +00001670 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001671 else {
Eric Smith0923d1d2009-04-16 20:16:10 +00001672 int result = -1;
1673 char *buf = NULL;
1674 char op = FLOAT;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001675
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001676 if (_Pickler_Write(self, &op, 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001677 goto done;
1678
Mark Dickinson3e09f432009-04-17 08:41:23 +00001679 buf = PyOS_double_to_string(x, 'g', 17, 0, NULL);
Eric Smith0923d1d2009-04-16 20:16:10 +00001680 if (!buf) {
1681 PyErr_NoMemory();
1682 goto done;
1683 }
1684
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001685 if (_Pickler_Write(self, buf, strlen(buf)) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001686 goto done;
1687
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001688 if (_Pickler_Write(self, "\n", 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001689 goto done;
1690
1691 result = 0;
1692done:
1693 PyMem_Free(buf);
1694 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001695 }
1696
1697 return 0;
1698}
1699
1700static int
1701save_bytes(PicklerObject *self, PyObject *obj)
1702{
1703 if (self->proto < 3) {
1704 /* Older pickle protocols do not have an opcode for pickling bytes
1705 objects. Therefore, we need to fake the copy protocol (i.e.,
1706 the __reduce__ method) to permit bytes object unpickling. */
1707 PyObject *reduce_value = NULL;
1708 PyObject *bytelist = NULL;
1709 int status;
1710
1711 bytelist = PySequence_List(obj);
1712 if (bytelist == NULL)
1713 return -1;
1714
1715 reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
1716 bytelist);
1717 if (reduce_value == NULL) {
1718 Py_DECREF(bytelist);
1719 return -1;
1720 }
1721
1722 /* save_reduce() will memoize the object automatically. */
1723 status = save_reduce(self, reduce_value, obj);
1724 Py_DECREF(reduce_value);
1725 Py_DECREF(bytelist);
1726 return status;
1727 }
1728 else {
1729 Py_ssize_t size;
1730 char header[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001731 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001732
1733 size = PyBytes_Size(obj);
1734 if (size < 0)
1735 return -1;
1736
1737 if (size < 256) {
1738 header[0] = SHORT_BINBYTES;
1739 header[1] = (unsigned char)size;
1740 len = 2;
1741 }
1742 else if (size <= 0xffffffffL) {
1743 header[0] = BINBYTES;
1744 header[1] = (unsigned char)(size & 0xff);
1745 header[2] = (unsigned char)((size >> 8) & 0xff);
1746 header[3] = (unsigned char)((size >> 16) & 0xff);
1747 header[4] = (unsigned char)((size >> 24) & 0xff);
1748 len = 5;
1749 }
1750 else {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001751 PyErr_SetString(PyExc_OverflowError,
1752 "cannot serialize a bytes object larger than 4GB");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001753 return -1; /* string too large */
1754 }
1755
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001756 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001757 return -1;
1758
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001759 if (_Pickler_Write(self, PyBytes_AS_STRING(obj), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001760 return -1;
1761
1762 if (memo_put(self, obj) < 0)
1763 return -1;
1764
1765 return 0;
1766 }
1767}
1768
1769/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1770 backslash and newline characters to \uXXXX escapes. */
1771static PyObject *
1772raw_unicode_escape(const Py_UNICODE *s, Py_ssize_t size)
1773{
1774 PyObject *repr, *result;
1775 char *p;
1776 char *q;
1777
1778 static const char *hexdigits = "0123456789abcdef";
1779
1780#ifdef Py_UNICODE_WIDE
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001781 const Py_ssize_t expandsize = 10;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001782#else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001783 const Py_ssize_t expandsize = 6;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001784#endif
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001785
1786 if (size > PY_SSIZE_T_MAX / expandsize)
1787 return PyErr_NoMemory();
1788
1789 repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001790 if (repr == NULL)
1791 return NULL;
1792 if (size == 0)
1793 goto done;
1794
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001795 p = q = PyByteArray_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001796 while (size-- > 0) {
1797 Py_UNICODE ch = *s++;
1798#ifdef Py_UNICODE_WIDE
1799 /* Map 32-bit characters to '\Uxxxxxxxx' */
1800 if (ch >= 0x10000) {
1801 *p++ = '\\';
1802 *p++ = 'U';
1803 *p++ = hexdigits[(ch >> 28) & 0xf];
1804 *p++ = hexdigits[(ch >> 24) & 0xf];
1805 *p++ = hexdigits[(ch >> 20) & 0xf];
1806 *p++ = hexdigits[(ch >> 16) & 0xf];
1807 *p++ = hexdigits[(ch >> 12) & 0xf];
1808 *p++ = hexdigits[(ch >> 8) & 0xf];
1809 *p++ = hexdigits[(ch >> 4) & 0xf];
1810 *p++ = hexdigits[ch & 15];
1811 }
1812 else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001813#else
1814 /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
1815 if (ch >= 0xD800 && ch < 0xDC00) {
1816 Py_UNICODE ch2;
1817 Py_UCS4 ucs;
1818
1819 ch2 = *s++;
1820 size--;
1821 if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
1822 ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
1823 *p++ = '\\';
1824 *p++ = 'U';
1825 *p++ = hexdigits[(ucs >> 28) & 0xf];
1826 *p++ = hexdigits[(ucs >> 24) & 0xf];
1827 *p++ = hexdigits[(ucs >> 20) & 0xf];
1828 *p++ = hexdigits[(ucs >> 16) & 0xf];
1829 *p++ = hexdigits[(ucs >> 12) & 0xf];
1830 *p++ = hexdigits[(ucs >> 8) & 0xf];
1831 *p++ = hexdigits[(ucs >> 4) & 0xf];
1832 *p++ = hexdigits[ucs & 0xf];
1833 continue;
1834 }
1835 /* Fall through: isolated surrogates are copied as-is */
1836 s--;
1837 size++;
1838 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001839#endif
1840 /* Map 16-bit characters to '\uxxxx' */
1841 if (ch >= 256 || ch == '\\' || ch == '\n') {
1842 *p++ = '\\';
1843 *p++ = 'u';
1844 *p++ = hexdigits[(ch >> 12) & 0xf];
1845 *p++ = hexdigits[(ch >> 8) & 0xf];
1846 *p++ = hexdigits[(ch >> 4) & 0xf];
1847 *p++ = hexdigits[ch & 15];
1848 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001849 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001850 else
1851 *p++ = (char) ch;
1852 }
1853 size = p - q;
1854
1855 done:
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001856 result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001857 Py_DECREF(repr);
1858 return result;
1859}
1860
1861static int
1862save_unicode(PicklerObject *self, PyObject *obj)
1863{
1864 Py_ssize_t size;
1865 PyObject *encoded = NULL;
1866
1867 if (self->bin) {
1868 char pdata[5];
1869
Martin v. Löwisd63a3b82011-09-28 07:41:54 +02001870 encoded = PyUnicode_AsEncodedString(obj, "utf-8", "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001871 if (encoded == NULL)
1872 goto error;
1873
1874 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001875 if (size > 0xffffffffL) {
1876 PyErr_SetString(PyExc_OverflowError,
1877 "cannot serialize a string larger than 4GB");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001878 goto error; /* string too large */
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001879 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001880
1881 pdata[0] = BINUNICODE;
1882 pdata[1] = (unsigned char)(size & 0xff);
1883 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1884 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1885 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1886
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001887 if (_Pickler_Write(self, pdata, 5) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001888 goto error;
1889
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001890 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001891 goto error;
1892 }
1893 else {
1894 const char unicode_op = UNICODE;
1895
1896 encoded = raw_unicode_escape(PyUnicode_AS_UNICODE(obj),
1897 PyUnicode_GET_SIZE(obj));
1898 if (encoded == NULL)
1899 goto error;
1900
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001901 if (_Pickler_Write(self, &unicode_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001902 goto error;
1903
1904 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001905 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001906 goto error;
1907
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001908 if (_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001909 goto error;
1910 }
1911 if (memo_put(self, obj) < 0)
1912 goto error;
1913
1914 Py_DECREF(encoded);
1915 return 0;
1916
1917 error:
1918 Py_XDECREF(encoded);
1919 return -1;
1920}
1921
1922/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1923static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001924store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001925{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001926 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001927
1928 assert(PyTuple_Size(t) == len);
1929
1930 for (i = 0; i < len; i++) {
1931 PyObject *element = PyTuple_GET_ITEM(t, i);
1932
1933 if (element == NULL)
1934 return -1;
1935 if (save(self, element, 0) < 0)
1936 return -1;
1937 }
1938
1939 return 0;
1940}
1941
1942/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1943 * used across protocols to minimize the space needed to pickle them.
1944 * Tuples are also the only builtin immutable type that can be recursive
1945 * (a tuple can be reached from itself), and that requires some subtle
1946 * magic so that it works in all cases. IOW, this is a long routine.
1947 */
1948static int
1949save_tuple(PicklerObject *self, PyObject *obj)
1950{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001951 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001952
1953 const char mark_op = MARK;
1954 const char tuple_op = TUPLE;
1955 const char pop_op = POP;
1956 const char pop_mark_op = POP_MARK;
1957 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
1958
1959 if ((len = PyTuple_Size(obj)) < 0)
1960 return -1;
1961
1962 if (len == 0) {
1963 char pdata[2];
1964
1965 if (self->proto) {
1966 pdata[0] = EMPTY_TUPLE;
1967 len = 1;
1968 }
1969 else {
1970 pdata[0] = MARK;
1971 pdata[1] = TUPLE;
1972 len = 2;
1973 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001974 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001975 return -1;
1976 return 0;
1977 }
1978
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001979 /* The tuple isn't in the memo now. If it shows up there after
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001980 * saving the tuple elements, the tuple must be recursive, in
1981 * which case we'll pop everything we put on the stack, and fetch
1982 * its value from the memo.
1983 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001984 if (len <= 3 && self->proto >= 2) {
1985 /* Use TUPLE{1,2,3} opcodes. */
1986 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001987 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001988
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001989 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001990 /* pop the len elements */
1991 for (i = 0; i < len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001992 if (_Pickler_Write(self, &pop_op, 1) < 0)
1993 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001994 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001995 if (memo_get(self, obj) < 0)
1996 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001997
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001998 return 0;
1999 }
2000 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002001 if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2002 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002003 }
2004 goto memoize;
2005 }
2006
2007 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2008 * Generate MARK e1 e2 ... TUPLE
2009 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002010 if (_Pickler_Write(self, &mark_op, 1) < 0)
2011 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002012
2013 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002014 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002015
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002016 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002017 /* pop the stack stuff we pushed */
2018 if (self->bin) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002019 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2020 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002021 }
2022 else {
2023 /* Note that we pop one more than len, to remove
2024 * the MARK too.
2025 */
2026 for (i = 0; i <= len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002027 if (_Pickler_Write(self, &pop_op, 1) < 0)
2028 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002029 }
2030 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002031 if (memo_get(self, obj) < 0)
2032 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002033
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002034 return 0;
2035 }
2036 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002037 if (_Pickler_Write(self, &tuple_op, 1) < 0)
2038 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002039 }
2040
2041 memoize:
2042 if (memo_put(self, obj) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002043 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002044
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002045 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002046}
2047
2048/* iter is an iterator giving items, and we batch up chunks of
2049 * MARK item item ... item APPENDS
2050 * opcode sequences. Calling code should have arranged to first create an
2051 * empty list, or list-like object, for the APPENDS to operate on.
2052 * Returns 0 on success, <0 on error.
2053 */
2054static int
2055batch_list(PicklerObject *self, PyObject *iter)
2056{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002057 PyObject *obj = NULL;
2058 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002059 int i, n;
2060
2061 const char mark_op = MARK;
2062 const char append_op = APPEND;
2063 const char appends_op = APPENDS;
2064
2065 assert(iter != NULL);
2066
2067 /* XXX: I think this function could be made faster by avoiding the
2068 iterator interface and fetching objects directly from list using
2069 PyList_GET_ITEM.
2070 */
2071
2072 if (self->proto == 0) {
2073 /* APPENDS isn't available; do one at a time. */
2074 for (;;) {
2075 obj = PyIter_Next(iter);
2076 if (obj == NULL) {
2077 if (PyErr_Occurred())
2078 return -1;
2079 break;
2080 }
2081 i = save(self, obj, 0);
2082 Py_DECREF(obj);
2083 if (i < 0)
2084 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002085 if (_Pickler_Write(self, &append_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002086 return -1;
2087 }
2088 return 0;
2089 }
2090
2091 /* proto > 0: write in batches of BATCHSIZE. */
2092 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002093 /* Get first item */
2094 firstitem = PyIter_Next(iter);
2095 if (firstitem == NULL) {
2096 if (PyErr_Occurred())
2097 goto error;
2098
2099 /* nothing more to add */
2100 break;
2101 }
2102
2103 /* Try to get a second item */
2104 obj = PyIter_Next(iter);
2105 if (obj == NULL) {
2106 if (PyErr_Occurred())
2107 goto error;
2108
2109 /* Only one item to write */
2110 if (save(self, firstitem, 0) < 0)
2111 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002112 if (_Pickler_Write(self, &append_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002113 goto error;
2114 Py_CLEAR(firstitem);
2115 break;
2116 }
2117
2118 /* More than one item to write */
2119
2120 /* Pump out MARK, items, APPENDS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002121 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002122 goto error;
2123
2124 if (save(self, firstitem, 0) < 0)
2125 goto error;
2126 Py_CLEAR(firstitem);
2127 n = 1;
2128
2129 /* Fetch and save up to BATCHSIZE items */
2130 while (obj) {
2131 if (save(self, obj, 0) < 0)
2132 goto error;
2133 Py_CLEAR(obj);
2134 n += 1;
2135
2136 if (n == BATCHSIZE)
2137 break;
2138
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002139 obj = PyIter_Next(iter);
2140 if (obj == NULL) {
2141 if (PyErr_Occurred())
2142 goto error;
2143 break;
2144 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002145 }
2146
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002147 if (_Pickler_Write(self, &appends_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002148 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002149
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002150 } while (n == BATCHSIZE);
2151 return 0;
2152
2153 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002154 Py_XDECREF(firstitem);
2155 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002156 return -1;
2157}
2158
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002159/* This is a variant of batch_list() above, specialized for lists (with no
2160 * support for list subclasses). Like batch_list(), we batch up chunks of
2161 * MARK item item ... item APPENDS
2162 * opcode sequences. Calling code should have arranged to first create an
2163 * empty list, or list-like object, for the APPENDS to operate on.
2164 * Returns 0 on success, -1 on error.
2165 *
2166 * This version is considerably faster than batch_list(), if less general.
2167 *
2168 * Note that this only works for protocols > 0.
2169 */
2170static int
2171batch_list_exact(PicklerObject *self, PyObject *obj)
2172{
2173 PyObject *item = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002174 Py_ssize_t this_batch, total;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002175
2176 const char append_op = APPEND;
2177 const char appends_op = APPENDS;
2178 const char mark_op = MARK;
2179
2180 assert(obj != NULL);
2181 assert(self->proto > 0);
2182 assert(PyList_CheckExact(obj));
2183
2184 if (PyList_GET_SIZE(obj) == 1) {
2185 item = PyList_GET_ITEM(obj, 0);
2186 if (save(self, item, 0) < 0)
2187 return -1;
2188 if (_Pickler_Write(self, &append_op, 1) < 0)
2189 return -1;
2190 return 0;
2191 }
2192
2193 /* Write in batches of BATCHSIZE. */
2194 total = 0;
2195 do {
2196 this_batch = 0;
2197 if (_Pickler_Write(self, &mark_op, 1) < 0)
2198 return -1;
2199 while (total < PyList_GET_SIZE(obj)) {
2200 item = PyList_GET_ITEM(obj, total);
2201 if (save(self, item, 0) < 0)
2202 return -1;
2203 total++;
2204 if (++this_batch == BATCHSIZE)
2205 break;
2206 }
2207 if (_Pickler_Write(self, &appends_op, 1) < 0)
2208 return -1;
2209
2210 } while (total < PyList_GET_SIZE(obj));
2211
2212 return 0;
2213}
2214
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002215static int
2216save_list(PicklerObject *self, PyObject *obj)
2217{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002218 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002219 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002220 int status = 0;
2221
2222 if (self->fast && !fast_save_enter(self, obj))
2223 goto error;
2224
2225 /* Create an empty list. */
2226 if (self->bin) {
2227 header[0] = EMPTY_LIST;
2228 len = 1;
2229 }
2230 else {
2231 header[0] = MARK;
2232 header[1] = LIST;
2233 len = 2;
2234 }
2235
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002236 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002237 goto error;
2238
2239 /* Get list length, and bow out early if empty. */
2240 if ((len = PyList_Size(obj)) < 0)
2241 goto error;
2242
2243 if (memo_put(self, obj) < 0)
2244 goto error;
2245
2246 if (len != 0) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002247 /* Materialize the list elements. */
2248 if (PyList_CheckExact(obj) && self->proto > 0) {
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002249 if (Py_EnterRecursiveCall(" while pickling an object"))
2250 goto error;
2251 status = batch_list_exact(self, obj);
2252 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002253 } else {
2254 PyObject *iter = PyObject_GetIter(obj);
2255 if (iter == NULL)
2256 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002257
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002258 if (Py_EnterRecursiveCall(" while pickling an object")) {
2259 Py_DECREF(iter);
2260 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002261 }
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002262 status = batch_list(self, iter);
2263 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002264 Py_DECREF(iter);
2265 }
2266 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002267 if (0) {
2268 error:
2269 status = -1;
2270 }
2271
2272 if (self->fast && !fast_save_leave(self, obj))
2273 status = -1;
2274
2275 return status;
2276}
2277
2278/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
2279 * MARK key value ... key value SETITEMS
2280 * opcode sequences. Calling code should have arranged to first create an
2281 * empty dict, or dict-like object, for the SETITEMS to operate on.
2282 * Returns 0 on success, <0 on error.
2283 *
2284 * This is very much like batch_list(). The difference between saving
2285 * elements directly, and picking apart two-tuples, is so long-winded at
2286 * the C level, though, that attempts to combine these routines were too
2287 * ugly to bear.
2288 */
2289static int
2290batch_dict(PicklerObject *self, PyObject *iter)
2291{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002292 PyObject *obj = NULL;
2293 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002294 int i, n;
2295
2296 const char mark_op = MARK;
2297 const char setitem_op = SETITEM;
2298 const char setitems_op = SETITEMS;
2299
2300 assert(iter != NULL);
2301
2302 if (self->proto == 0) {
2303 /* SETITEMS isn't available; do one at a time. */
2304 for (;;) {
2305 obj = PyIter_Next(iter);
2306 if (obj == NULL) {
2307 if (PyErr_Occurred())
2308 return -1;
2309 break;
2310 }
2311 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2312 PyErr_SetString(PyExc_TypeError, "dict items "
2313 "iterator must return 2-tuples");
2314 return -1;
2315 }
2316 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
2317 if (i >= 0)
2318 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
2319 Py_DECREF(obj);
2320 if (i < 0)
2321 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002322 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002323 return -1;
2324 }
2325 return 0;
2326 }
2327
2328 /* proto > 0: write in batches of BATCHSIZE. */
2329 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002330 /* Get first item */
2331 firstitem = PyIter_Next(iter);
2332 if (firstitem == NULL) {
2333 if (PyErr_Occurred())
2334 goto error;
2335
2336 /* nothing more to add */
2337 break;
2338 }
2339 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
2340 PyErr_SetString(PyExc_TypeError, "dict items "
2341 "iterator must return 2-tuples");
2342 goto error;
2343 }
2344
2345 /* Try to get a second item */
2346 obj = PyIter_Next(iter);
2347 if (obj == NULL) {
2348 if (PyErr_Occurred())
2349 goto error;
2350
2351 /* Only one item to write */
2352 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2353 goto error;
2354 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2355 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002356 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002357 goto error;
2358 Py_CLEAR(firstitem);
2359 break;
2360 }
2361
2362 /* More than one item to write */
2363
2364 /* Pump out MARK, items, SETITEMS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002365 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002366 goto error;
2367
2368 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2369 goto error;
2370 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2371 goto error;
2372 Py_CLEAR(firstitem);
2373 n = 1;
2374
2375 /* Fetch and save up to BATCHSIZE items */
2376 while (obj) {
2377 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2378 PyErr_SetString(PyExc_TypeError, "dict items "
2379 "iterator must return 2-tuples");
2380 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002381 }
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002382 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
2383 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
2384 goto error;
2385 Py_CLEAR(obj);
2386 n += 1;
2387
2388 if (n == BATCHSIZE)
2389 break;
2390
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002391 obj = PyIter_Next(iter);
2392 if (obj == NULL) {
2393 if (PyErr_Occurred())
2394 goto error;
2395 break;
2396 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002397 }
2398
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002399 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002400 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002401
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002402 } while (n == BATCHSIZE);
2403 return 0;
2404
2405 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002406 Py_XDECREF(firstitem);
2407 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002408 return -1;
2409}
2410
Collin Winter5c9b02d2009-05-25 05:43:30 +00002411/* This is a variant of batch_dict() above that specializes for dicts, with no
2412 * support for dict subclasses. Like batch_dict(), we batch up chunks of
2413 * MARK key value ... key value SETITEMS
2414 * opcode sequences. Calling code should have arranged to first create an
2415 * empty dict, or dict-like object, for the SETITEMS to operate on.
2416 * Returns 0 on success, -1 on error.
2417 *
2418 * Note that this currently doesn't work for protocol 0.
2419 */
2420static int
2421batch_dict_exact(PicklerObject *self, PyObject *obj)
2422{
2423 PyObject *key = NULL, *value = NULL;
2424 int i;
2425 Py_ssize_t dict_size, ppos = 0;
2426
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00002427 const char mark_op = MARK;
2428 const char setitem_op = SETITEM;
2429 const char setitems_op = SETITEMS;
Collin Winter5c9b02d2009-05-25 05:43:30 +00002430
2431 assert(obj != NULL);
2432 assert(self->proto > 0);
2433
2434 dict_size = PyDict_Size(obj);
2435
2436 /* Special-case len(d) == 1 to save space. */
2437 if (dict_size == 1) {
2438 PyDict_Next(obj, &ppos, &key, &value);
2439 if (save(self, key, 0) < 0)
2440 return -1;
2441 if (save(self, value, 0) < 0)
2442 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002443 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002444 return -1;
2445 return 0;
2446 }
2447
2448 /* Write in batches of BATCHSIZE. */
2449 do {
2450 i = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002451 if (_Pickler_Write(self, &mark_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002452 return -1;
2453 while (PyDict_Next(obj, &ppos, &key, &value)) {
2454 if (save(self, key, 0) < 0)
2455 return -1;
2456 if (save(self, value, 0) < 0)
2457 return -1;
2458 if (++i == BATCHSIZE)
2459 break;
2460 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002461 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002462 return -1;
2463 if (PyDict_Size(obj) != dict_size) {
2464 PyErr_Format(
2465 PyExc_RuntimeError,
2466 "dictionary changed size during iteration");
2467 return -1;
2468 }
2469
2470 } while (i == BATCHSIZE);
2471 return 0;
2472}
2473
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002474static int
2475save_dict(PicklerObject *self, PyObject *obj)
2476{
2477 PyObject *items, *iter;
2478 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002479 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002480 int status = 0;
2481
2482 if (self->fast && !fast_save_enter(self, obj))
2483 goto error;
2484
2485 /* Create an empty dict. */
2486 if (self->bin) {
2487 header[0] = EMPTY_DICT;
2488 len = 1;
2489 }
2490 else {
2491 header[0] = MARK;
2492 header[1] = DICT;
2493 len = 2;
2494 }
2495
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002496 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002497 goto error;
2498
2499 /* Get dict size, and bow out early if empty. */
2500 if ((len = PyDict_Size(obj)) < 0)
2501 goto error;
2502
2503 if (memo_put(self, obj) < 0)
2504 goto error;
2505
2506 if (len != 0) {
2507 /* Save the dict items. */
Collin Winter5c9b02d2009-05-25 05:43:30 +00002508 if (PyDict_CheckExact(obj) && self->proto > 0) {
2509 /* We can take certain shortcuts if we know this is a dict and
2510 not a dict subclass. */
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002511 if (Py_EnterRecursiveCall(" while pickling an object"))
2512 goto error;
2513 status = batch_dict_exact(self, obj);
2514 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002515 } else {
2516 items = PyObject_CallMethod(obj, "items", "()");
2517 if (items == NULL)
2518 goto error;
2519 iter = PyObject_GetIter(items);
2520 Py_DECREF(items);
2521 if (iter == NULL)
2522 goto error;
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002523 if (Py_EnterRecursiveCall(" while pickling an object")) {
2524 Py_DECREF(iter);
2525 goto error;
2526 }
Collin Winter5c9b02d2009-05-25 05:43:30 +00002527 status = batch_dict(self, iter);
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002528 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002529 Py_DECREF(iter);
2530 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002531 }
2532
2533 if (0) {
2534 error:
2535 status = -1;
2536 }
2537
2538 if (self->fast && !fast_save_leave(self, obj))
2539 status = -1;
2540
2541 return status;
2542}
2543
2544static int
2545save_global(PicklerObject *self, PyObject *obj, PyObject *name)
2546{
2547 static PyObject *name_str = NULL;
2548 PyObject *global_name = NULL;
2549 PyObject *module_name = NULL;
2550 PyObject *module = NULL;
2551 PyObject *cls;
2552 int status = 0;
2553
2554 const char global_op = GLOBAL;
2555
2556 if (name_str == NULL) {
2557 name_str = PyUnicode_InternFromString("__name__");
2558 if (name_str == NULL)
2559 goto error;
2560 }
2561
2562 if (name) {
2563 global_name = name;
2564 Py_INCREF(global_name);
2565 }
2566 else {
2567 global_name = PyObject_GetAttr(obj, name_str);
2568 if (global_name == NULL)
2569 goto error;
2570 }
2571
2572 module_name = whichmodule(obj, global_name);
2573 if (module_name == NULL)
2574 goto error;
2575
2576 /* XXX: Change to use the import C API directly with level=0 to disallow
2577 relative imports.
2578
2579 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
2580 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
2581 custom import functions (IMHO, this would be a nice security
2582 feature). The import C API would need to be extended to support the
2583 extra parameters of __import__ to fix that. */
2584 module = PyImport_Import(module_name);
2585 if (module == NULL) {
2586 PyErr_Format(PicklingError,
2587 "Can't pickle %R: import of module %R failed",
2588 obj, module_name);
2589 goto error;
2590 }
2591 cls = PyObject_GetAttr(module, global_name);
2592 if (cls == NULL) {
2593 PyErr_Format(PicklingError,
2594 "Can't pickle %R: attribute lookup %S.%S failed",
2595 obj, module_name, global_name);
2596 goto error;
2597 }
2598 if (cls != obj) {
2599 Py_DECREF(cls);
2600 PyErr_Format(PicklingError,
2601 "Can't pickle %R: it's not the same object as %S.%S",
2602 obj, module_name, global_name);
2603 goto error;
2604 }
2605 Py_DECREF(cls);
2606
2607 if (self->proto >= 2) {
2608 /* See whether this is in the extension registry, and if
2609 * so generate an EXT opcode.
2610 */
2611 PyObject *code_obj; /* extension code as Python object */
2612 long code; /* extension code as C value */
2613 char pdata[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002614 Py_ssize_t n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002615
2616 PyTuple_SET_ITEM(two_tuple, 0, module_name);
2617 PyTuple_SET_ITEM(two_tuple, 1, global_name);
2618 code_obj = PyDict_GetItem(extension_registry, two_tuple);
2619 /* The object is not registered in the extension registry.
2620 This is the most likely code path. */
2621 if (code_obj == NULL)
2622 goto gen_global;
2623
2624 /* XXX: pickle.py doesn't check neither the type, nor the range
2625 of the value returned by the extension_registry. It should for
2626 consistency. */
2627
2628 /* Verify code_obj has the right type and value. */
2629 if (!PyLong_Check(code_obj)) {
2630 PyErr_Format(PicklingError,
2631 "Can't pickle %R: extension code %R isn't an integer",
2632 obj, code_obj);
2633 goto error;
2634 }
2635 code = PyLong_AS_LONG(code_obj);
2636 if (code <= 0 || code > 0x7fffffffL) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002637 if (!PyErr_Occurred())
2638 PyErr_Format(PicklingError,
2639 "Can't pickle %R: extension code %ld is out of range",
2640 obj, code);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002641 goto error;
2642 }
2643
2644 /* Generate an EXT opcode. */
2645 if (code <= 0xff) {
2646 pdata[0] = EXT1;
2647 pdata[1] = (unsigned char)code;
2648 n = 2;
2649 }
2650 else if (code <= 0xffff) {
2651 pdata[0] = EXT2;
2652 pdata[1] = (unsigned char)(code & 0xff);
2653 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2654 n = 3;
2655 }
2656 else {
2657 pdata[0] = EXT4;
2658 pdata[1] = (unsigned char)(code & 0xff);
2659 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2660 pdata[3] = (unsigned char)((code >> 16) & 0xff);
2661 pdata[4] = (unsigned char)((code >> 24) & 0xff);
2662 n = 5;
2663 }
2664
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002665 if (_Pickler_Write(self, pdata, n) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002666 goto error;
2667 }
2668 else {
2669 /* Generate a normal global opcode if we are using a pickle
2670 protocol <= 2, or if the object is not registered in the
2671 extension registry. */
2672 PyObject *encoded;
2673 PyObject *(*unicode_encoder)(PyObject *);
2674
2675 gen_global:
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002676 if (_Pickler_Write(self, &global_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002677 goto error;
2678
2679 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
2680 the module name and the global name using UTF-8. We do so only when
2681 we are using the pickle protocol newer than version 3. This is to
2682 ensure compatibility with older Unpickler running on Python 2.x. */
2683 if (self->proto >= 3) {
2684 unicode_encoder = PyUnicode_AsUTF8String;
2685 }
2686 else {
2687 unicode_encoder = PyUnicode_AsASCIIString;
2688 }
2689
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002690 /* For protocol < 3 and if the user didn't request against doing so,
2691 we convert module names to the old 2.x module names. */
2692 if (self->fix_imports) {
2693 PyObject *key;
2694 PyObject *item;
2695
2696 key = PyTuple_Pack(2, module_name, global_name);
2697 if (key == NULL)
2698 goto error;
2699 item = PyDict_GetItemWithError(name_mapping_3to2, key);
2700 Py_DECREF(key);
2701 if (item) {
2702 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
2703 PyErr_Format(PyExc_RuntimeError,
2704 "_compat_pickle.REVERSE_NAME_MAPPING values "
2705 "should be 2-tuples, not %.200s",
2706 Py_TYPE(item)->tp_name);
2707 goto error;
2708 }
2709 Py_CLEAR(module_name);
2710 Py_CLEAR(global_name);
2711 module_name = PyTuple_GET_ITEM(item, 0);
2712 global_name = PyTuple_GET_ITEM(item, 1);
2713 if (!PyUnicode_Check(module_name) ||
2714 !PyUnicode_Check(global_name)) {
2715 PyErr_Format(PyExc_RuntimeError,
2716 "_compat_pickle.REVERSE_NAME_MAPPING values "
2717 "should be pairs of str, not (%.200s, %.200s)",
2718 Py_TYPE(module_name)->tp_name,
2719 Py_TYPE(global_name)->tp_name);
2720 goto error;
2721 }
2722 Py_INCREF(module_name);
2723 Py_INCREF(global_name);
2724 }
2725 else if (PyErr_Occurred()) {
2726 goto error;
2727 }
2728
2729 item = PyDict_GetItemWithError(import_mapping_3to2, module_name);
2730 if (item) {
2731 if (!PyUnicode_Check(item)) {
2732 PyErr_Format(PyExc_RuntimeError,
2733 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
2734 "should be strings, not %.200s",
2735 Py_TYPE(item)->tp_name);
2736 goto error;
2737 }
2738 Py_CLEAR(module_name);
2739 module_name = item;
2740 Py_INCREF(module_name);
2741 }
2742 else if (PyErr_Occurred()) {
2743 goto error;
2744 }
2745 }
2746
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002747 /* Save the name of the module. */
2748 encoded = unicode_encoder(module_name);
2749 if (encoded == NULL) {
2750 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2751 PyErr_Format(PicklingError,
2752 "can't pickle module identifier '%S' using "
2753 "pickle protocol %i", module_name, self->proto);
2754 goto error;
2755 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002756 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002757 PyBytes_GET_SIZE(encoded)) < 0) {
2758 Py_DECREF(encoded);
2759 goto error;
2760 }
2761 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002762 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002763 goto error;
2764
2765 /* Save the name of the module. */
2766 encoded = unicode_encoder(global_name);
2767 if (encoded == NULL) {
2768 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2769 PyErr_Format(PicklingError,
2770 "can't pickle global identifier '%S' using "
2771 "pickle protocol %i", global_name, self->proto);
2772 goto error;
2773 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002774 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002775 PyBytes_GET_SIZE(encoded)) < 0) {
2776 Py_DECREF(encoded);
2777 goto error;
2778 }
2779 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002780 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002781 goto error;
2782
2783 /* Memoize the object. */
2784 if (memo_put(self, obj) < 0)
2785 goto error;
2786 }
2787
2788 if (0) {
2789 error:
2790 status = -1;
2791 }
2792 Py_XDECREF(module_name);
2793 Py_XDECREF(global_name);
2794 Py_XDECREF(module);
2795
2796 return status;
2797}
2798
2799static int
2800save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
2801{
2802 PyObject *pid = NULL;
2803 int status = 0;
2804
2805 const char persid_op = PERSID;
2806 const char binpersid_op = BINPERSID;
2807
2808 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002809 pid = _Pickler_FastCall(self, func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002810 if (pid == NULL)
2811 return -1;
2812
2813 if (pid != Py_None) {
2814 if (self->bin) {
2815 if (save(self, pid, 1) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002816 _Pickler_Write(self, &binpersid_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002817 goto error;
2818 }
2819 else {
2820 PyObject *pid_str = NULL;
2821 char *pid_ascii_bytes;
2822 Py_ssize_t size;
2823
2824 pid_str = PyObject_Str(pid);
2825 if (pid_str == NULL)
2826 goto error;
2827
2828 /* XXX: Should it check whether the persistent id only contains
2829 ASCII characters? And what if the pid contains embedded
2830 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00002831 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002832 Py_DECREF(pid_str);
2833 if (pid_ascii_bytes == NULL)
2834 goto error;
2835
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002836 if (_Pickler_Write(self, &persid_op, 1) < 0 ||
2837 _Pickler_Write(self, pid_ascii_bytes, size) < 0 ||
2838 _Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002839 goto error;
2840 }
2841 status = 1;
2842 }
2843
2844 if (0) {
2845 error:
2846 status = -1;
2847 }
2848 Py_XDECREF(pid);
2849
2850 return status;
2851}
2852
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002853static PyObject *
2854get_class(PyObject *obj)
2855{
2856 PyObject *cls;
2857 static PyObject *str_class;
2858
2859 if (str_class == NULL) {
2860 str_class = PyUnicode_InternFromString("__class__");
2861 if (str_class == NULL)
2862 return NULL;
2863 }
2864 cls = PyObject_GetAttr(obj, str_class);
2865 if (cls == NULL) {
2866 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2867 PyErr_Clear();
2868 cls = (PyObject *) Py_TYPE(obj);
2869 Py_INCREF(cls);
2870 }
2871 }
2872 return cls;
2873}
2874
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002875/* We're saving obj, and args is the 2-thru-5 tuple returned by the
2876 * appropriate __reduce__ method for obj.
2877 */
2878static int
2879save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
2880{
2881 PyObject *callable;
2882 PyObject *argtup;
2883 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002884 PyObject *listitems = Py_None;
2885 PyObject *dictitems = Py_None;
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002886 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002887
2888 int use_newobj = self->proto >= 2;
2889
2890 const char reduce_op = REDUCE;
2891 const char build_op = BUILD;
2892 const char newobj_op = NEWOBJ;
2893
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002894 size = PyTuple_Size(args);
2895 if (size < 2 || size > 5) {
2896 PyErr_SetString(PicklingError, "tuple returned by "
2897 "__reduce__ must contain 2 through 5 elements");
2898 return -1;
2899 }
2900
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002901 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
2902 &callable, &argtup, &state, &listitems, &dictitems))
2903 return -1;
2904
2905 if (!PyCallable_Check(callable)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002906 PyErr_SetString(PicklingError, "first item of the tuple "
2907 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002908 return -1;
2909 }
2910 if (!PyTuple_Check(argtup)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002911 PyErr_SetString(PicklingError, "second item of the tuple "
2912 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002913 return -1;
2914 }
2915
2916 if (state == Py_None)
2917 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002918
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002919 if (listitems == Py_None)
2920 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002921 else if (!PyIter_Check(listitems)) {
2922 PyErr_Format(PicklingError, "Fourth element of tuple"
2923 "returned by __reduce__ must be an iterator, not %s",
2924 Py_TYPE(listitems)->tp_name);
2925 return -1;
2926 }
2927
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002928 if (dictitems == Py_None)
2929 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002930 else if (!PyIter_Check(dictitems)) {
2931 PyErr_Format(PicklingError, "Fifth element of tuple"
2932 "returned by __reduce__ must be an iterator, not %s",
2933 Py_TYPE(dictitems)->tp_name);
2934 return -1;
2935 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002936
2937 /* Protocol 2 special case: if callable's name is __newobj__, use
2938 NEWOBJ. */
2939 if (use_newobj) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002940 static PyObject *newobj_str = NULL, *name_str = NULL;
2941 PyObject *name;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002942
2943 if (newobj_str == NULL) {
2944 newobj_str = PyUnicode_InternFromString("__newobj__");
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002945 name_str = PyUnicode_InternFromString("__name__");
2946 if (newobj_str == NULL || name_str == NULL)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002947 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002948 }
2949
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002950 name = PyObject_GetAttr(callable, name_str);
2951 if (name == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002952 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2953 PyErr_Clear();
2954 else
2955 return -1;
2956 use_newobj = 0;
2957 }
2958 else {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002959 use_newobj = PyUnicode_Check(name) &&
2960 PyUnicode_Compare(name, newobj_str) == 0;
2961 Py_DECREF(name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002962 }
2963 }
2964 if (use_newobj) {
2965 PyObject *cls;
2966 PyObject *newargtup;
2967 PyObject *obj_class;
2968 int p;
2969
2970 /* Sanity checks. */
2971 if (Py_SIZE(argtup) < 1) {
2972 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
2973 return -1;
2974 }
2975
2976 cls = PyTuple_GET_ITEM(argtup, 0);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002977 if (!PyType_Check(cls)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002978 PyErr_SetString(PicklingError, "args[0] from "
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002979 "__newobj__ args is not a type");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002980 return -1;
2981 }
2982
2983 if (obj != NULL) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002984 obj_class = get_class(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002985 p = obj_class != cls; /* true iff a problem */
2986 Py_DECREF(obj_class);
2987 if (p) {
2988 PyErr_SetString(PicklingError, "args[0] from "
2989 "__newobj__ args has the wrong class");
2990 return -1;
2991 }
2992 }
2993 /* XXX: These calls save() are prone to infinite recursion. Imagine
2994 what happen if the value returned by the __reduce__() method of
2995 some extension type contains another object of the same type. Ouch!
2996
2997 Here is a quick example, that I ran into, to illustrate what I
2998 mean:
2999
3000 >>> import pickle, copyreg
3001 >>> copyreg.dispatch_table.pop(complex)
3002 >>> pickle.dumps(1+2j)
3003 Traceback (most recent call last):
3004 ...
3005 RuntimeError: maximum recursion depth exceeded
3006
3007 Removing the complex class from copyreg.dispatch_table made the
3008 __reduce_ex__() method emit another complex object:
3009
3010 >>> (1+1j).__reduce_ex__(2)
3011 (<function __newobj__ at 0xb7b71c3c>,
3012 (<class 'complex'>, (1+1j)), None, None, None)
3013
3014 Thus when save() was called on newargstup (the 2nd item) recursion
3015 ensued. Of course, the bug was in the complex class which had a
3016 broken __getnewargs__() that emitted another complex object. But,
3017 the point, here, is it is quite easy to end up with a broken reduce
3018 function. */
3019
3020 /* Save the class and its __new__ arguments. */
3021 if (save(self, cls, 0) < 0)
3022 return -1;
3023
3024 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
3025 if (newargtup == NULL)
3026 return -1;
3027
3028 p = save(self, newargtup, 0);
3029 Py_DECREF(newargtup);
3030 if (p < 0)
3031 return -1;
3032
3033 /* Add NEWOBJ opcode. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003034 if (_Pickler_Write(self, &newobj_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003035 return -1;
3036 }
3037 else { /* Not using NEWOBJ. */
3038 if (save(self, callable, 0) < 0 ||
3039 save(self, argtup, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003040 _Pickler_Write(self, &reduce_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003041 return -1;
3042 }
3043
3044 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
3045 the caller do not want to memoize the object. Not particularly useful,
3046 but that is to mimic the behavior save_reduce() in pickle.py when
3047 obj is None. */
3048 if (obj && memo_put(self, obj) < 0)
3049 return -1;
3050
3051 if (listitems && batch_list(self, listitems) < 0)
3052 return -1;
3053
3054 if (dictitems && batch_dict(self, dictitems) < 0)
3055 return -1;
3056
3057 if (state) {
3058 if (save(self, state, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003059 _Pickler_Write(self, &build_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003060 return -1;
3061 }
3062
3063 return 0;
3064}
3065
3066static int
3067save(PicklerObject *self, PyObject *obj, int pers_save)
3068{
3069 PyTypeObject *type;
3070 PyObject *reduce_func = NULL;
3071 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003072 int status = 0;
3073
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003074 if (Py_EnterRecursiveCall(" while pickling an object"))
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003075 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003076
3077 /* The extra pers_save argument is necessary to avoid calling save_pers()
3078 on its returned object. */
3079 if (!pers_save && self->pers_func) {
3080 /* save_pers() returns:
3081 -1 to signal an error;
3082 0 if it did nothing successfully;
3083 1 if a persistent id was saved.
3084 */
3085 if ((status = save_pers(self, obj, self->pers_func)) != 0)
3086 goto done;
3087 }
3088
3089 type = Py_TYPE(obj);
3090
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003091 /* The old cPickle had an optimization that used switch-case statement
3092 dispatching on the first letter of the type name. This has was removed
3093 since benchmarks shown that this optimization was actually slowing
3094 things down. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003095
3096 /* Atom types; these aren't memoized, so don't check the memo. */
3097
3098 if (obj == Py_None) {
3099 status = save_none(self, obj);
3100 goto done;
3101 }
3102 else if (obj == Py_False || obj == Py_True) {
3103 status = save_bool(self, obj);
3104 goto done;
3105 }
3106 else if (type == &PyLong_Type) {
3107 status = save_long(self, obj);
3108 goto done;
3109 }
3110 else if (type == &PyFloat_Type) {
3111 status = save_float(self, obj);
3112 goto done;
3113 }
3114
3115 /* Check the memo to see if it has the object. If so, generate
3116 a GET (or BINGET) opcode, instead of pickling the object
3117 once again. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003118 if (PyMemoTable_Get(self->memo, obj)) {
3119 if (memo_get(self, obj) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003120 goto error;
3121 goto done;
3122 }
3123
3124 if (type == &PyBytes_Type) {
3125 status = save_bytes(self, obj);
3126 goto done;
3127 }
3128 else if (type == &PyUnicode_Type) {
3129 status = save_unicode(self, obj);
3130 goto done;
3131 }
3132 else if (type == &PyDict_Type) {
3133 status = save_dict(self, obj);
3134 goto done;
3135 }
3136 else if (type == &PyList_Type) {
3137 status = save_list(self, obj);
3138 goto done;
3139 }
3140 else if (type == &PyTuple_Type) {
3141 status = save_tuple(self, obj);
3142 goto done;
3143 }
3144 else if (type == &PyType_Type) {
3145 status = save_global(self, obj, NULL);
3146 goto done;
3147 }
3148 else if (type == &PyFunction_Type) {
3149 status = save_global(self, obj, NULL);
3150 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
3151 /* fall back to reduce */
3152 PyErr_Clear();
3153 }
3154 else {
3155 goto done;
3156 }
3157 }
3158 else if (type == &PyCFunction_Type) {
3159 status = save_global(self, obj, NULL);
3160 goto done;
3161 }
3162 else if (PyType_IsSubtype(type, &PyType_Type)) {
3163 status = save_global(self, obj, NULL);
3164 goto done;
3165 }
3166
3167 /* XXX: This part needs some unit tests. */
3168
3169 /* Get a reduction callable, and call it. This may come from
3170 * copyreg.dispatch_table, the object's __reduce_ex__ method,
3171 * or the object's __reduce__ method.
3172 */
3173 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
3174 if (reduce_func != NULL) {
3175 /* Here, the reference count of the reduce_func object returned by
3176 PyDict_GetItem needs to be increased to be consistent with the one
3177 returned by PyObject_GetAttr. This is allow us to blindly DECREF
3178 reduce_func at the end of the save() routine.
3179 */
3180 Py_INCREF(reduce_func);
3181 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003182 reduce_value = _Pickler_FastCall(self, reduce_func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003183 }
3184 else {
3185 static PyObject *reduce_str = NULL;
3186 static PyObject *reduce_ex_str = NULL;
3187
3188 /* Cache the name of the reduce methods. */
3189 if (reduce_str == NULL) {
3190 reduce_str = PyUnicode_InternFromString("__reduce__");
3191 if (reduce_str == NULL)
3192 goto error;
3193 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
3194 if (reduce_ex_str == NULL)
3195 goto error;
3196 }
3197
3198 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
3199 automatically defined as __reduce__. While this is convenient, this
3200 make it impossible to know which method was actually called. Of
3201 course, this is not a big deal. But still, it would be nice to let
3202 the user know which method was called when something go
3203 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
3204 don't actually have to check for a __reduce__ method. */
3205
3206 /* Check for a __reduce_ex__ method. */
3207 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
3208 if (reduce_func != NULL) {
3209 PyObject *proto;
3210 proto = PyLong_FromLong(self->proto);
3211 if (proto != NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003212 reduce_value = _Pickler_FastCall(self, reduce_func, proto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003213 }
3214 }
3215 else {
3216 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3217 PyErr_Clear();
3218 else
3219 goto error;
3220 /* Check for a __reduce__ method. */
3221 reduce_func = PyObject_GetAttr(obj, reduce_str);
3222 if (reduce_func != NULL) {
3223 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
3224 }
3225 else {
3226 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
3227 type->tp_name, obj);
3228 goto error;
3229 }
3230 }
3231 }
3232
3233 if (reduce_value == NULL)
3234 goto error;
3235
3236 if (PyUnicode_Check(reduce_value)) {
3237 status = save_global(self, obj, reduce_value);
3238 goto done;
3239 }
3240
3241 if (!PyTuple_Check(reduce_value)) {
3242 PyErr_SetString(PicklingError,
3243 "__reduce__ must return a string or tuple");
3244 goto error;
3245 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003246
3247 status = save_reduce(self, reduce_value, obj);
3248
3249 if (0) {
3250 error:
3251 status = -1;
3252 }
3253 done:
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003254 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003255 Py_XDECREF(reduce_func);
3256 Py_XDECREF(reduce_value);
3257
3258 return status;
3259}
3260
3261static int
3262dump(PicklerObject *self, PyObject *obj)
3263{
3264 const char stop_op = STOP;
3265
3266 if (self->proto >= 2) {
3267 char header[2];
3268
3269 header[0] = PROTO;
3270 assert(self->proto >= 0 && self->proto < 256);
3271 header[1] = (unsigned char)self->proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003272 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003273 return -1;
3274 }
3275
3276 if (save(self, obj, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003277 _Pickler_Write(self, &stop_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003278 return -1;
3279
3280 return 0;
3281}
3282
3283PyDoc_STRVAR(Pickler_clear_memo_doc,
3284"clear_memo() -> None. Clears the pickler's \"memo\"."
3285"\n"
3286"The memo is the data structure that remembers which objects the\n"
3287"pickler has already seen, so that shared or recursive objects are\n"
3288"pickled by reference and not by value. This method is useful when\n"
3289"re-using picklers.");
3290
3291static PyObject *
3292Pickler_clear_memo(PicklerObject *self)
3293{
3294 if (self->memo)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003295 PyMemoTable_Clear(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003296
3297 Py_RETURN_NONE;
3298}
3299
3300PyDoc_STRVAR(Pickler_dump_doc,
3301"dump(obj) -> None. Write a pickled representation of obj to the open file.");
3302
3303static PyObject *
3304Pickler_dump(PicklerObject *self, PyObject *args)
3305{
3306 PyObject *obj;
3307
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00003308 /* Check whether the Pickler was initialized correctly (issue3664).
3309 Developers often forget to call __init__() in their subclasses, which
3310 would trigger a segfault without this check. */
3311 if (self->write == NULL) {
3312 PyErr_Format(PicklingError,
3313 "Pickler.__init__() was not called by %s.__init__()",
3314 Py_TYPE(self)->tp_name);
3315 return NULL;
3316 }
3317
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003318 if (!PyArg_ParseTuple(args, "O:dump", &obj))
3319 return NULL;
3320
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003321 if (_Pickler_ClearBuffer(self) < 0)
3322 return NULL;
3323
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003324 if (dump(self, obj) < 0)
3325 return NULL;
3326
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003327 if (_Pickler_FlushToFile(self) < 0)
3328 return NULL;
3329
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003330 Py_RETURN_NONE;
3331}
3332
3333static struct PyMethodDef Pickler_methods[] = {
3334 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
3335 Pickler_dump_doc},
3336 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
3337 Pickler_clear_memo_doc},
3338 {NULL, NULL} /* sentinel */
3339};
3340
3341static void
3342Pickler_dealloc(PicklerObject *self)
3343{
3344 PyObject_GC_UnTrack(self);
3345
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003346 Py_XDECREF(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003347 Py_XDECREF(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003348 Py_XDECREF(self->pers_func);
3349 Py_XDECREF(self->arg);
3350 Py_XDECREF(self->fast_memo);
3351
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003352 PyMemoTable_Del(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003353
3354 Py_TYPE(self)->tp_free((PyObject *)self);
3355}
3356
3357static int
3358Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
3359{
3360 Py_VISIT(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003361 Py_VISIT(self->pers_func);
3362 Py_VISIT(self->arg);
3363 Py_VISIT(self->fast_memo);
3364 return 0;
3365}
3366
3367static int
3368Pickler_clear(PicklerObject *self)
3369{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003370 Py_CLEAR(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003371 Py_CLEAR(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003372 Py_CLEAR(self->pers_func);
3373 Py_CLEAR(self->arg);
3374 Py_CLEAR(self->fast_memo);
3375
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003376 if (self->memo != NULL) {
3377 PyMemoTable *memo = self->memo;
3378 self->memo = NULL;
3379 PyMemoTable_Del(memo);
3380 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003381 return 0;
3382}
3383
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003384
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003385PyDoc_STRVAR(Pickler_doc,
3386"Pickler(file, protocol=None)"
3387"\n"
3388"This takes a binary file for writing a pickle data stream.\n"
3389"\n"
3390"The optional protocol argument tells the pickler to use the\n"
3391"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
3392"protocol is 3; a backward-incompatible protocol designed for\n"
3393"Python 3.0.\n"
3394"\n"
3395"Specifying a negative protocol version selects the highest\n"
3396"protocol version supported. The higher the protocol used, the\n"
3397"more recent the version of Python needed to read the pickle\n"
3398"produced.\n"
3399"\n"
3400"The file argument must have a write() method that accepts a single\n"
3401"bytes argument. It can thus be a file object opened for binary\n"
3402"writing, a io.BytesIO instance, or any other custom object that\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003403"meets this interface.\n"
3404"\n"
3405"If fix_imports is True and protocol is less than 3, pickle will try to\n"
3406"map the new Python 3.x names to the old module names used in Python\n"
3407"2.x, so that the pickle data stream is readable with Python 2.x.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003408
3409static int
3410Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
3411{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003412 static char *kwlist[] = {"file", "protocol", "fix_imports", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003413 PyObject *file;
3414 PyObject *proto_obj = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003415 PyObject *fix_imports = Py_True;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003416
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003417 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:Pickler",
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003418 kwlist, &file, &proto_obj, &fix_imports))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003419 return -1;
3420
3421 /* In case of multiple __init__() calls, clear previous content. */
3422 if (self->write != NULL)
3423 (void)Pickler_clear(self);
3424
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003425 if (_Pickler_SetProtocol(self, proto_obj, fix_imports) < 0)
3426 return -1;
3427
3428 if (_Pickler_SetOutputStream(self, file) < 0)
3429 return -1;
3430
3431 /* memo and output_buffer may have already been created in _Pickler_New */
3432 if (self->memo == NULL) {
3433 self->memo = PyMemoTable_New();
3434 if (self->memo == NULL)
3435 return -1;
3436 }
3437 self->output_len = 0;
3438 if (self->output_buffer == NULL) {
3439 self->max_output_len = WRITE_BUF_SIZE;
3440 self->output_buffer = PyBytes_FromStringAndSize(NULL,
3441 self->max_output_len);
3442 if (self->output_buffer == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003443 return -1;
3444 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003445
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003446 self->arg = NULL;
3447 self->fast = 0;
3448 self->fast_nesting = 0;
3449 self->fast_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003450 self->pers_func = NULL;
3451 if (PyObject_HasAttrString((PyObject *)self, "persistent_id")) {
3452 self->pers_func = PyObject_GetAttrString((PyObject *)self,
3453 "persistent_id");
3454 if (self->pers_func == NULL)
3455 return -1;
3456 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003457 return 0;
3458}
3459
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003460/* Define a proxy object for the Pickler's internal memo object. This is to
3461 * avoid breaking code like:
3462 * pickler.memo.clear()
3463 * and
3464 * pickler.memo = saved_memo
3465 * Is this a good idea? Not really, but we don't want to break code that uses
3466 * it. Note that we don't implement the entire mapping API here. This is
3467 * intentional, as these should be treated as black-box implementation details.
3468 */
3469
3470typedef struct {
3471 PyObject_HEAD
3472 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
3473} PicklerMemoProxyObject;
3474
3475PyDoc_STRVAR(pmp_clear_doc,
3476"memo.clear() -> None. Remove all items from memo.");
3477
3478static PyObject *
3479pmp_clear(PicklerMemoProxyObject *self)
3480{
3481 if (self->pickler->memo)
3482 PyMemoTable_Clear(self->pickler->memo);
3483 Py_RETURN_NONE;
3484}
3485
3486PyDoc_STRVAR(pmp_copy_doc,
3487"memo.copy() -> new_memo. Copy the memo to a new object.");
3488
3489static PyObject *
3490pmp_copy(PicklerMemoProxyObject *self)
3491{
3492 Py_ssize_t i;
3493 PyMemoTable *memo;
3494 PyObject *new_memo = PyDict_New();
3495 if (new_memo == NULL)
3496 return NULL;
3497
3498 memo = self->pickler->memo;
3499 for (i = 0; i < memo->mt_allocated; ++i) {
3500 PyMemoEntry entry = memo->mt_table[i];
3501 if (entry.me_key != NULL) {
3502 int status;
3503 PyObject *key, *value;
3504
3505 key = PyLong_FromVoidPtr(entry.me_key);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003506 value = Py_BuildValue("nO", entry.me_value, entry.me_key);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003507
3508 if (key == NULL || value == NULL) {
3509 Py_XDECREF(key);
3510 Py_XDECREF(value);
3511 goto error;
3512 }
3513 status = PyDict_SetItem(new_memo, key, value);
3514 Py_DECREF(key);
3515 Py_DECREF(value);
3516 if (status < 0)
3517 goto error;
3518 }
3519 }
3520 return new_memo;
3521
3522 error:
3523 Py_XDECREF(new_memo);
3524 return NULL;
3525}
3526
3527PyDoc_STRVAR(pmp_reduce_doc,
3528"memo.__reduce__(). Pickling support.");
3529
3530static PyObject *
3531pmp_reduce(PicklerMemoProxyObject *self, PyObject *args)
3532{
3533 PyObject *reduce_value, *dict_args;
3534 PyObject *contents = pmp_copy(self);
3535 if (contents == NULL)
3536 return NULL;
3537
3538 reduce_value = PyTuple_New(2);
3539 if (reduce_value == NULL) {
3540 Py_DECREF(contents);
3541 return NULL;
3542 }
3543 dict_args = PyTuple_New(1);
3544 if (dict_args == NULL) {
3545 Py_DECREF(contents);
3546 Py_DECREF(reduce_value);
3547 return NULL;
3548 }
3549 PyTuple_SET_ITEM(dict_args, 0, contents);
3550 Py_INCREF((PyObject *)&PyDict_Type);
3551 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
3552 PyTuple_SET_ITEM(reduce_value, 1, dict_args);
3553 return reduce_value;
3554}
3555
3556static PyMethodDef picklerproxy_methods[] = {
3557 {"clear", (PyCFunction)pmp_clear, METH_NOARGS, pmp_clear_doc},
3558 {"copy", (PyCFunction)pmp_copy, METH_NOARGS, pmp_copy_doc},
3559 {"__reduce__", (PyCFunction)pmp_reduce, METH_VARARGS, pmp_reduce_doc},
3560 {NULL, NULL} /* sentinel */
3561};
3562
3563static void
3564PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
3565{
3566 PyObject_GC_UnTrack(self);
3567 Py_XDECREF(self->pickler);
3568 PyObject_GC_Del((PyObject *)self);
3569}
3570
3571static int
3572PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
3573 visitproc visit, void *arg)
3574{
3575 Py_VISIT(self->pickler);
3576 return 0;
3577}
3578
3579static int
3580PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
3581{
3582 Py_CLEAR(self->pickler);
3583 return 0;
3584}
3585
3586static PyTypeObject PicklerMemoProxyType = {
3587 PyVarObject_HEAD_INIT(NULL, 0)
3588 "_pickle.PicklerMemoProxy", /*tp_name*/
3589 sizeof(PicklerMemoProxyObject), /*tp_basicsize*/
3590 0,
3591 (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */
3592 0, /* tp_print */
3593 0, /* tp_getattr */
3594 0, /* tp_setattr */
3595 0, /* tp_compare */
3596 0, /* tp_repr */
3597 0, /* tp_as_number */
3598 0, /* tp_as_sequence */
3599 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00003600 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003601 0, /* tp_call */
3602 0, /* tp_str */
3603 PyObject_GenericGetAttr, /* tp_getattro */
3604 PyObject_GenericSetAttr, /* tp_setattro */
3605 0, /* tp_as_buffer */
3606 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3607 0, /* tp_doc */
3608 (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */
3609 (inquiry)PicklerMemoProxy_clear, /* tp_clear */
3610 0, /* tp_richcompare */
3611 0, /* tp_weaklistoffset */
3612 0, /* tp_iter */
3613 0, /* tp_iternext */
3614 picklerproxy_methods, /* tp_methods */
3615};
3616
3617static PyObject *
3618PicklerMemoProxy_New(PicklerObject *pickler)
3619{
3620 PicklerMemoProxyObject *self;
3621
3622 self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
3623 if (self == NULL)
3624 return NULL;
3625 Py_INCREF(pickler);
3626 self->pickler = pickler;
3627 PyObject_GC_Track(self);
3628 return (PyObject *)self;
3629}
3630
3631/*****************************************************************************/
3632
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003633static PyObject *
3634Pickler_get_memo(PicklerObject *self)
3635{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003636 return PicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003637}
3638
3639static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003640Pickler_set_memo(PicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003641{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003642 PyMemoTable *new_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003643
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003644 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003645 PyErr_SetString(PyExc_TypeError,
3646 "attribute deletion is not supported");
3647 return -1;
3648 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003649
3650 if (Py_TYPE(obj) == &PicklerMemoProxyType) {
3651 PicklerObject *pickler =
3652 ((PicklerMemoProxyObject *)obj)->pickler;
3653
3654 new_memo = PyMemoTable_Copy(pickler->memo);
3655 if (new_memo == NULL)
3656 return -1;
3657 }
3658 else if (PyDict_Check(obj)) {
3659 Py_ssize_t i = 0;
3660 PyObject *key, *value;
3661
3662 new_memo = PyMemoTable_New();
3663 if (new_memo == NULL)
3664 return -1;
3665
3666 while (PyDict_Next(obj, &i, &key, &value)) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003667 Py_ssize_t memo_id;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003668 PyObject *memo_obj;
3669
3670 if (!PyTuple_Check(value) || Py_SIZE(value) != 2) {
3671 PyErr_SetString(PyExc_TypeError,
3672 "'memo' values must be 2-item tuples");
3673 goto error;
3674 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003675 memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003676 if (memo_id == -1 && PyErr_Occurred())
3677 goto error;
3678 memo_obj = PyTuple_GET_ITEM(value, 1);
3679 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
3680 goto error;
3681 }
3682 }
3683 else {
3684 PyErr_Format(PyExc_TypeError,
3685 "'memo' attribute must be an PicklerMemoProxy object"
3686 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003687 return -1;
3688 }
3689
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003690 PyMemoTable_Del(self->memo);
3691 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003692
3693 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003694
3695 error:
3696 if (new_memo)
3697 PyMemoTable_Del(new_memo);
3698 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003699}
3700
3701static PyObject *
3702Pickler_get_persid(PicklerObject *self)
3703{
3704 if (self->pers_func == NULL)
3705 PyErr_SetString(PyExc_AttributeError, "persistent_id");
3706 else
3707 Py_INCREF(self->pers_func);
3708 return self->pers_func;
3709}
3710
3711static int
3712Pickler_set_persid(PicklerObject *self, PyObject *value)
3713{
3714 PyObject *tmp;
3715
3716 if (value == NULL) {
3717 PyErr_SetString(PyExc_TypeError,
3718 "attribute deletion is not supported");
3719 return -1;
3720 }
3721 if (!PyCallable_Check(value)) {
3722 PyErr_SetString(PyExc_TypeError,
3723 "persistent_id must be a callable taking one argument");
3724 return -1;
3725 }
3726
3727 tmp = self->pers_func;
3728 Py_INCREF(value);
3729 self->pers_func = value;
3730 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
3731
3732 return 0;
3733}
3734
3735static PyMemberDef Pickler_members[] = {
3736 {"bin", T_INT, offsetof(PicklerObject, bin)},
3737 {"fast", T_INT, offsetof(PicklerObject, fast)},
3738 {NULL}
3739};
3740
3741static PyGetSetDef Pickler_getsets[] = {
3742 {"memo", (getter)Pickler_get_memo,
3743 (setter)Pickler_set_memo},
3744 {"persistent_id", (getter)Pickler_get_persid,
3745 (setter)Pickler_set_persid},
3746 {NULL}
3747};
3748
3749static PyTypeObject Pickler_Type = {
3750 PyVarObject_HEAD_INIT(NULL, 0)
3751 "_pickle.Pickler" , /*tp_name*/
3752 sizeof(PicklerObject), /*tp_basicsize*/
3753 0, /*tp_itemsize*/
3754 (destructor)Pickler_dealloc, /*tp_dealloc*/
3755 0, /*tp_print*/
3756 0, /*tp_getattr*/
3757 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00003758 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003759 0, /*tp_repr*/
3760 0, /*tp_as_number*/
3761 0, /*tp_as_sequence*/
3762 0, /*tp_as_mapping*/
3763 0, /*tp_hash*/
3764 0, /*tp_call*/
3765 0, /*tp_str*/
3766 0, /*tp_getattro*/
3767 0, /*tp_setattro*/
3768 0, /*tp_as_buffer*/
3769 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3770 Pickler_doc, /*tp_doc*/
3771 (traverseproc)Pickler_traverse, /*tp_traverse*/
3772 (inquiry)Pickler_clear, /*tp_clear*/
3773 0, /*tp_richcompare*/
3774 0, /*tp_weaklistoffset*/
3775 0, /*tp_iter*/
3776 0, /*tp_iternext*/
3777 Pickler_methods, /*tp_methods*/
3778 Pickler_members, /*tp_members*/
3779 Pickler_getsets, /*tp_getset*/
3780 0, /*tp_base*/
3781 0, /*tp_dict*/
3782 0, /*tp_descr_get*/
3783 0, /*tp_descr_set*/
3784 0, /*tp_dictoffset*/
3785 (initproc)Pickler_init, /*tp_init*/
3786 PyType_GenericAlloc, /*tp_alloc*/
3787 PyType_GenericNew, /*tp_new*/
3788 PyObject_GC_Del, /*tp_free*/
3789 0, /*tp_is_gc*/
3790};
3791
3792/* Temporary helper for calling self.find_class().
3793
3794 XXX: It would be nice to able to avoid Python function call overhead, by
3795 using directly the C version of find_class(), when find_class() is not
3796 overridden by a subclass. Although, this could become rather hackish. A
3797 simpler optimization would be to call the C function when self is not a
3798 subclass instance. */
3799static PyObject *
3800find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
3801{
3802 return PyObject_CallMethod((PyObject *)self, "find_class", "OO",
3803 module_name, global_name);
3804}
3805
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003806static Py_ssize_t
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003807marker(UnpicklerObject *self)
3808{
3809 if (self->num_marks < 1) {
3810 PyErr_SetString(UnpicklingError, "could not find MARK");
3811 return -1;
3812 }
3813
3814 return self->marks[--self->num_marks];
3815}
3816
3817static int
3818load_none(UnpicklerObject *self)
3819{
3820 PDATA_APPEND(self->stack, Py_None, -1);
3821 return 0;
3822}
3823
3824static int
3825bad_readline(void)
3826{
3827 PyErr_SetString(UnpicklingError, "pickle data was truncated");
3828 return -1;
3829}
3830
3831static int
3832load_int(UnpicklerObject *self)
3833{
3834 PyObject *value;
3835 char *endptr, *s;
3836 Py_ssize_t len;
3837 long x;
3838
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003839 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003840 return -1;
3841 if (len < 2)
3842 return bad_readline();
3843
3844 errno = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003845 /* XXX: Should the base argument of strtol() be explicitly set to 10?
3846 XXX(avassalotti): Should this uses PyOS_strtol()? */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003847 x = strtol(s, &endptr, 0);
3848
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003849 if (errno || (*endptr != '\n' && *endptr != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003850 /* Hm, maybe we've got something long. Let's try reading
3851 * it as a Python long object. */
3852 errno = 0;
3853 /* XXX: Same thing about the base here. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003854 value = PyLong_FromString(s, NULL, 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003855 if (value == NULL) {
3856 PyErr_SetString(PyExc_ValueError,
3857 "could not convert string to int");
3858 return -1;
3859 }
3860 }
3861 else {
3862 if (len == 3 && (x == 0 || x == 1)) {
3863 if ((value = PyBool_FromLong(x)) == NULL)
3864 return -1;
3865 }
3866 else {
3867 if ((value = PyLong_FromLong(x)) == NULL)
3868 return -1;
3869 }
3870 }
3871
3872 PDATA_PUSH(self->stack, value, -1);
3873 return 0;
3874}
3875
3876static int
3877load_bool(UnpicklerObject *self, PyObject *boolean)
3878{
3879 assert(boolean == Py_True || boolean == Py_False);
3880 PDATA_APPEND(self->stack, boolean, -1);
3881 return 0;
3882}
3883
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003884/* s contains x bytes of an unsigned little-endian integer. Return its value
3885 * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
3886 */
3887static Py_ssize_t
3888calc_binsize(char *bytes, int size)
3889{
3890 unsigned char *s = (unsigned char *)bytes;
3891 size_t x = 0;
3892
3893 assert(size == 4);
3894
3895 x = (size_t) s[0];
3896 x |= (size_t) s[1] << 8;
3897 x |= (size_t) s[2] << 16;
3898 x |= (size_t) s[3] << 24;
3899
3900 if (x > PY_SSIZE_T_MAX)
3901 return -1;
3902 else
3903 return (Py_ssize_t) x;
3904}
3905
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003906/* s contains x bytes of a little-endian integer. Return its value as a
3907 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
3908 * int, but when x is 4 it's a signed one. This is an historical source
3909 * of x-platform bugs.
3910 */
3911static long
3912calc_binint(char *bytes, int size)
3913{
3914 unsigned char *s = (unsigned char *)bytes;
3915 int i = size;
3916 long x = 0;
3917
3918 for (i = 0; i < size; i++) {
3919 x |= (long)s[i] << (i * 8);
3920 }
3921
3922 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
3923 * is signed, so on a box with longs bigger than 4 bytes we need
3924 * to extend a BININT's sign bit to the full width.
3925 */
3926 if (SIZEOF_LONG > 4 && size == 4) {
3927 x |= -(x & (1L << 31));
3928 }
3929
3930 return x;
3931}
3932
3933static int
3934load_binintx(UnpicklerObject *self, char *s, int size)
3935{
3936 PyObject *value;
3937 long x;
3938
3939 x = calc_binint(s, size);
3940
3941 if ((value = PyLong_FromLong(x)) == NULL)
3942 return -1;
3943
3944 PDATA_PUSH(self->stack, value, -1);
3945 return 0;
3946}
3947
3948static int
3949load_binint(UnpicklerObject *self)
3950{
3951 char *s;
3952
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003953 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003954 return -1;
3955
3956 return load_binintx(self, s, 4);
3957}
3958
3959static int
3960load_binint1(UnpicklerObject *self)
3961{
3962 char *s;
3963
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003964 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003965 return -1;
3966
3967 return load_binintx(self, s, 1);
3968}
3969
3970static int
3971load_binint2(UnpicklerObject *self)
3972{
3973 char *s;
3974
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003975 if (_Unpickler_Read(self, &s, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003976 return -1;
3977
3978 return load_binintx(self, s, 2);
3979}
3980
3981static int
3982load_long(UnpicklerObject *self)
3983{
3984 PyObject *value;
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00003985 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003986 Py_ssize_t len;
3987
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003988 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003989 return -1;
3990 if (len < 2)
3991 return bad_readline();
3992
Mark Dickinson8dd05142009-01-20 20:43:58 +00003993 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
3994 the 'L' before calling PyLong_FromString. In order to maintain
3995 compatibility with Python 3.0.0, we don't actually *require*
3996 the 'L' to be present. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003997 if (s[len-2] == 'L')
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00003998 s[len-2] = '\0';
Alexandre Vassalottie4bccb72009-01-24 01:47:57 +00003999 /* XXX: Should the base argument explicitly set to 10? */
4000 value = PyLong_FromString(s, NULL, 0);
Mark Dickinson8dd05142009-01-20 20:43:58 +00004001 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004002 return -1;
4003
4004 PDATA_PUSH(self->stack, value, -1);
4005 return 0;
4006}
4007
4008/* 'size' bytes contain the # of bytes of little-endian 256's-complement
4009 * data following.
4010 */
4011static int
4012load_counted_long(UnpicklerObject *self, int size)
4013{
4014 PyObject *value;
4015 char *nbytes;
4016 char *pdata;
4017
4018 assert(size == 1 || size == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004019 if (_Unpickler_Read(self, &nbytes, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004020 return -1;
4021
4022 size = calc_binint(nbytes, size);
4023 if (size < 0) {
4024 /* Corrupt or hostile pickle -- we never write one like this */
4025 PyErr_SetString(UnpicklingError,
4026 "LONG pickle has negative byte count");
4027 return -1;
4028 }
4029
4030 if (size == 0)
4031 value = PyLong_FromLong(0L);
4032 else {
4033 /* Read the raw little-endian bytes and convert. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004034 if (_Unpickler_Read(self, &pdata, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004035 return -1;
4036 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
4037 1 /* little endian */ , 1 /* signed */ );
4038 }
4039 if (value == NULL)
4040 return -1;
4041 PDATA_PUSH(self->stack, value, -1);
4042 return 0;
4043}
4044
4045static int
4046load_float(UnpicklerObject *self)
4047{
4048 PyObject *value;
4049 char *endptr, *s;
4050 Py_ssize_t len;
4051 double d;
4052
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004053 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004054 return -1;
4055 if (len < 2)
4056 return bad_readline();
4057
4058 errno = 0;
Mark Dickinson725bfd82009-05-03 20:33:40 +00004059 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
4060 if (d == -1.0 && PyErr_Occurred())
4061 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004062 if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004063 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
4064 return -1;
4065 }
Mark Dickinson725bfd82009-05-03 20:33:40 +00004066 value = PyFloat_FromDouble(d);
4067 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004068 return -1;
4069
4070 PDATA_PUSH(self->stack, value, -1);
4071 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004072}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004073
4074static int
4075load_binfloat(UnpicklerObject *self)
4076{
4077 PyObject *value;
4078 double x;
4079 char *s;
4080
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004081 if (_Unpickler_Read(self, &s, 8) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004082 return -1;
4083
4084 x = _PyFloat_Unpack8((unsigned char *)s, 0);
4085 if (x == -1.0 && PyErr_Occurred())
4086 return -1;
4087
4088 if ((value = PyFloat_FromDouble(x)) == NULL)
4089 return -1;
4090
4091 PDATA_PUSH(self->stack, value, -1);
4092 return 0;
4093}
4094
4095static int
4096load_string(UnpicklerObject *self)
4097{
4098 PyObject *bytes;
4099 PyObject *str = NULL;
4100 Py_ssize_t len;
4101 char *s, *p;
4102
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004103 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004104 return -1;
4105 if (len < 3)
4106 return bad_readline();
4107 if ((s = strdup(s)) == NULL) {
4108 PyErr_NoMemory();
4109 return -1;
4110 }
4111
4112 /* Strip outermost quotes */
4113 while (s[len - 1] <= ' ')
4114 len--;
4115 if (s[0] == '"' && s[len - 1] == '"') {
4116 s[len - 1] = '\0';
4117 p = s + 1;
4118 len -= 2;
4119 }
4120 else if (s[0] == '\'' && s[len - 1] == '\'') {
4121 s[len - 1] = '\0';
4122 p = s + 1;
4123 len -= 2;
4124 }
4125 else {
4126 free(s);
4127 PyErr_SetString(PyExc_ValueError, "insecure string pickle");
4128 return -1;
4129 }
4130
4131 /* Use the PyBytes API to decode the string, since that is what is used
4132 to encode, and then coerce the result to Unicode. */
4133 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
4134 free(s);
4135 if (bytes == NULL)
4136 return -1;
4137 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
4138 Py_DECREF(bytes);
4139 if (str == NULL)
4140 return -1;
4141
4142 PDATA_PUSH(self->stack, str, -1);
4143 return 0;
4144}
4145
4146static int
4147load_binbytes(UnpicklerObject *self)
4148{
4149 PyObject *bytes;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004150 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004151 char *s;
4152
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004153 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004154 return -1;
4155
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004156 x = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004157 if (x < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004158 PyErr_Format(PyExc_OverflowError,
4159 "BINBYTES exceeds system's maximum size of %zd bytes",
4160 PY_SSIZE_T_MAX
4161 );
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004162 return -1;
4163 }
4164
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004165 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004166 return -1;
4167 bytes = PyBytes_FromStringAndSize(s, x);
4168 if (bytes == NULL)
4169 return -1;
4170
4171 PDATA_PUSH(self->stack, bytes, -1);
4172 return 0;
4173}
4174
4175static int
4176load_short_binbytes(UnpicklerObject *self)
4177{
4178 PyObject *bytes;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004179 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004180 char *s;
4181
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004182 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004183 return -1;
4184
4185 x = (unsigned char)s[0];
4186
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004187 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004188 return -1;
4189
4190 bytes = PyBytes_FromStringAndSize(s, x);
4191 if (bytes == NULL)
4192 return -1;
4193
4194 PDATA_PUSH(self->stack, bytes, -1);
4195 return 0;
4196}
4197
4198static int
4199load_binstring(UnpicklerObject *self)
4200{
4201 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004202 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004203 char *s;
4204
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004205 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004206 return -1;
4207
4208 x = calc_binint(s, 4);
4209 if (x < 0) {
4210 PyErr_SetString(UnpicklingError,
4211 "BINSTRING pickle has negative byte count");
4212 return -1;
4213 }
4214
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004215 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004216 return -1;
4217
4218 /* Convert Python 2.x strings to unicode. */
4219 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4220 if (str == NULL)
4221 return -1;
4222
4223 PDATA_PUSH(self->stack, str, -1);
4224 return 0;
4225}
4226
4227static int
4228load_short_binstring(UnpicklerObject *self)
4229{
4230 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004231 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004232 char *s;
4233
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004234 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004235 return -1;
4236
4237 x = (unsigned char)s[0];
4238
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004239 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004240 return -1;
4241
4242 /* Convert Python 2.x strings to unicode. */
4243 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4244 if (str == NULL)
4245 return -1;
4246
4247 PDATA_PUSH(self->stack, str, -1);
4248 return 0;
4249}
4250
4251static int
4252load_unicode(UnpicklerObject *self)
4253{
4254 PyObject *str;
4255 Py_ssize_t len;
4256 char *s;
4257
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004258 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004259 return -1;
4260 if (len < 1)
4261 return bad_readline();
4262
4263 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
4264 if (str == NULL)
4265 return -1;
4266
4267 PDATA_PUSH(self->stack, str, -1);
4268 return 0;
4269}
4270
4271static int
4272load_binunicode(UnpicklerObject *self)
4273{
4274 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004275 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004276 char *s;
4277
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004278 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004279 return -1;
4280
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004281 size = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004282 if (size < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004283 PyErr_Format(PyExc_OverflowError,
4284 "BINUNICODE exceeds system's maximum size of %zd bytes",
4285 PY_SSIZE_T_MAX
4286 );
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004287 return -1;
4288 }
4289
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004290
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004291 if (_Unpickler_Read(self, &s, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004292 return -1;
4293
Victor Stinner485fb562010-04-13 11:07:24 +00004294 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004295 if (str == NULL)
4296 return -1;
4297
4298 PDATA_PUSH(self->stack, str, -1);
4299 return 0;
4300}
4301
4302static int
4303load_tuple(UnpicklerObject *self)
4304{
4305 PyObject *tuple;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004306 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004307
4308 if ((i = marker(self)) < 0)
4309 return -1;
4310
4311 tuple = Pdata_poptuple(self->stack, i);
4312 if (tuple == NULL)
4313 return -1;
4314 PDATA_PUSH(self->stack, tuple, -1);
4315 return 0;
4316}
4317
4318static int
4319load_counted_tuple(UnpicklerObject *self, int len)
4320{
4321 PyObject *tuple;
4322
4323 tuple = PyTuple_New(len);
4324 if (tuple == NULL)
4325 return -1;
4326
4327 while (--len >= 0) {
4328 PyObject *item;
4329
4330 PDATA_POP(self->stack, item);
4331 if (item == NULL)
4332 return -1;
4333 PyTuple_SET_ITEM(tuple, len, item);
4334 }
4335 PDATA_PUSH(self->stack, tuple, -1);
4336 return 0;
4337}
4338
4339static int
4340load_empty_list(UnpicklerObject *self)
4341{
4342 PyObject *list;
4343
4344 if ((list = PyList_New(0)) == NULL)
4345 return -1;
4346 PDATA_PUSH(self->stack, list, -1);
4347 return 0;
4348}
4349
4350static int
4351load_empty_dict(UnpicklerObject *self)
4352{
4353 PyObject *dict;
4354
4355 if ((dict = PyDict_New()) == NULL)
4356 return -1;
4357 PDATA_PUSH(self->stack, dict, -1);
4358 return 0;
4359}
4360
4361static int
4362load_list(UnpicklerObject *self)
4363{
4364 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004365 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004366
4367 if ((i = marker(self)) < 0)
4368 return -1;
4369
4370 list = Pdata_poplist(self->stack, i);
4371 if (list == NULL)
4372 return -1;
4373 PDATA_PUSH(self->stack, list, -1);
4374 return 0;
4375}
4376
4377static int
4378load_dict(UnpicklerObject *self)
4379{
4380 PyObject *dict, *key, *value;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004381 Py_ssize_t i, j, k;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004382
4383 if ((i = marker(self)) < 0)
4384 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004385 j = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004386
4387 if ((dict = PyDict_New()) == NULL)
4388 return -1;
4389
4390 for (k = i + 1; k < j; k += 2) {
4391 key = self->stack->data[k - 1];
4392 value = self->stack->data[k];
4393 if (PyDict_SetItem(dict, key, value) < 0) {
4394 Py_DECREF(dict);
4395 return -1;
4396 }
4397 }
4398 Pdata_clear(self->stack, i);
4399 PDATA_PUSH(self->stack, dict, -1);
4400 return 0;
4401}
4402
4403static PyObject *
4404instantiate(PyObject *cls, PyObject *args)
4405{
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004406 PyObject *result = NULL;
4407 /* Caller must assure args are a tuple. Normally, args come from
4408 Pdata_poptuple which packs objects from the top of the stack
4409 into a newly created tuple. */
4410 assert(PyTuple_Check(args));
4411 if (Py_SIZE(args) > 0 || !PyType_Check(cls) ||
4412 PyObject_HasAttrString(cls, "__getinitargs__")) {
4413 result = PyObject_CallObject(cls, args);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004414 }
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004415 else {
4416 result = PyObject_CallMethod(cls, "__new__", "O", cls);
4417 }
4418 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004419}
4420
4421static int
4422load_obj(UnpicklerObject *self)
4423{
4424 PyObject *cls, *args, *obj = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004425 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004426
4427 if ((i = marker(self)) < 0)
4428 return -1;
4429
4430 args = Pdata_poptuple(self->stack, i + 1);
4431 if (args == NULL)
4432 return -1;
4433
4434 PDATA_POP(self->stack, cls);
4435 if (cls) {
4436 obj = instantiate(cls, args);
4437 Py_DECREF(cls);
4438 }
4439 Py_DECREF(args);
4440 if (obj == NULL)
4441 return -1;
4442
4443 PDATA_PUSH(self->stack, obj, -1);
4444 return 0;
4445}
4446
4447static int
4448load_inst(UnpicklerObject *self)
4449{
4450 PyObject *cls = NULL;
4451 PyObject *args = NULL;
4452 PyObject *obj = NULL;
4453 PyObject *module_name;
4454 PyObject *class_name;
4455 Py_ssize_t len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004456 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004457 char *s;
4458
4459 if ((i = marker(self)) < 0)
4460 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004461 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004462 return -1;
4463 if (len < 2)
4464 return bad_readline();
4465
4466 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
4467 identifiers are permitted in Python 3.0, since the INST opcode is only
4468 supported by older protocols on Python 2.x. */
4469 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
4470 if (module_name == NULL)
4471 return -1;
4472
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004473 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004474 if (len < 2)
4475 return bad_readline();
4476 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004477 if (class_name != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004478 cls = find_class(self, module_name, class_name);
4479 Py_DECREF(class_name);
4480 }
4481 }
4482 Py_DECREF(module_name);
4483
4484 if (cls == NULL)
4485 return -1;
4486
4487 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
4488 obj = instantiate(cls, args);
4489 Py_DECREF(args);
4490 }
4491 Py_DECREF(cls);
4492
4493 if (obj == NULL)
4494 return -1;
4495
4496 PDATA_PUSH(self->stack, obj, -1);
4497 return 0;
4498}
4499
4500static int
4501load_newobj(UnpicklerObject *self)
4502{
4503 PyObject *args = NULL;
4504 PyObject *clsraw = NULL;
4505 PyTypeObject *cls; /* clsraw cast to its true type */
4506 PyObject *obj;
4507
4508 /* Stack is ... cls argtuple, and we want to call
4509 * cls.__new__(cls, *argtuple).
4510 */
4511 PDATA_POP(self->stack, args);
4512 if (args == NULL)
4513 goto error;
4514 if (!PyTuple_Check(args)) {
4515 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
4516 goto error;
4517 }
4518
4519 PDATA_POP(self->stack, clsraw);
4520 cls = (PyTypeObject *)clsraw;
4521 if (cls == NULL)
4522 goto error;
4523 if (!PyType_Check(cls)) {
4524 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4525 "isn't a type object");
4526 goto error;
4527 }
4528 if (cls->tp_new == NULL) {
4529 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4530 "has NULL tp_new");
4531 goto error;
4532 }
4533
4534 /* Call __new__. */
4535 obj = cls->tp_new(cls, args, NULL);
4536 if (obj == NULL)
4537 goto error;
4538
4539 Py_DECREF(args);
4540 Py_DECREF(clsraw);
4541 PDATA_PUSH(self->stack, obj, -1);
4542 return 0;
4543
4544 error:
4545 Py_XDECREF(args);
4546 Py_XDECREF(clsraw);
4547 return -1;
4548}
4549
4550static int
4551load_global(UnpicklerObject *self)
4552{
4553 PyObject *global = NULL;
4554 PyObject *module_name;
4555 PyObject *global_name;
4556 Py_ssize_t len;
4557 char *s;
4558
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004559 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004560 return -1;
4561 if (len < 2)
4562 return bad_readline();
4563 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4564 if (!module_name)
4565 return -1;
4566
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004567 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004568 if (len < 2) {
4569 Py_DECREF(module_name);
4570 return bad_readline();
4571 }
4572 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4573 if (global_name) {
4574 global = find_class(self, module_name, global_name);
4575 Py_DECREF(global_name);
4576 }
4577 }
4578 Py_DECREF(module_name);
4579
4580 if (global == NULL)
4581 return -1;
4582 PDATA_PUSH(self->stack, global, -1);
4583 return 0;
4584}
4585
4586static int
4587load_persid(UnpicklerObject *self)
4588{
4589 PyObject *pid;
4590 Py_ssize_t len;
4591 char *s;
4592
4593 if (self->pers_func) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004594 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004595 return -1;
4596 if (len < 2)
4597 return bad_readline();
4598
4599 pid = PyBytes_FromStringAndSize(s, len - 1);
4600 if (pid == NULL)
4601 return -1;
4602
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004603 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004604 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004605 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004606 if (pid == NULL)
4607 return -1;
4608
4609 PDATA_PUSH(self->stack, pid, -1);
4610 return 0;
4611 }
4612 else {
4613 PyErr_SetString(UnpicklingError,
4614 "A load persistent id instruction was encountered,\n"
4615 "but no persistent_load function was specified.");
4616 return -1;
4617 }
4618}
4619
4620static int
4621load_binpersid(UnpicklerObject *self)
4622{
4623 PyObject *pid;
4624
4625 if (self->pers_func) {
4626 PDATA_POP(self->stack, pid);
4627 if (pid == NULL)
4628 return -1;
4629
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004630 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004631 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004632 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004633 if (pid == NULL)
4634 return -1;
4635
4636 PDATA_PUSH(self->stack, pid, -1);
4637 return 0;
4638 }
4639 else {
4640 PyErr_SetString(UnpicklingError,
4641 "A load persistent id instruction was encountered,\n"
4642 "but no persistent_load function was specified.");
4643 return -1;
4644 }
4645}
4646
4647static int
4648load_pop(UnpicklerObject *self)
4649{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004650 Py_ssize_t len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004651
4652 /* Note that we split the (pickle.py) stack into two stacks,
4653 * an object stack and a mark stack. We have to be clever and
4654 * pop the right one. We do this by looking at the top of the
Collin Winter8ca69de2009-05-26 16:53:41 +00004655 * mark stack first, and only signalling a stack underflow if
4656 * the object stack is empty and the mark stack doesn't match
4657 * our expectations.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004658 */
Collin Winter8ca69de2009-05-26 16:53:41 +00004659 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004660 self->num_marks--;
Antoine Pitrou01a15ea2010-01-07 17:57:31 +00004661 } else if (len > 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004662 len--;
4663 Py_DECREF(self->stack->data[len]);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004664 Py_SIZE(self->stack) = len;
Collin Winter8ca69de2009-05-26 16:53:41 +00004665 } else {
4666 return stack_underflow();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004667 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004668 return 0;
4669}
4670
4671static int
4672load_pop_mark(UnpicklerObject *self)
4673{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004674 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004675
4676 if ((i = marker(self)) < 0)
4677 return -1;
4678
4679 Pdata_clear(self->stack, i);
4680
4681 return 0;
4682}
4683
4684static int
4685load_dup(UnpicklerObject *self)
4686{
4687 PyObject *last;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004688 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004689
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004690 if ((len = Py_SIZE(self->stack)) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004691 return stack_underflow();
4692 last = self->stack->data[len - 1];
4693 PDATA_APPEND(self->stack, last, -1);
4694 return 0;
4695}
4696
4697static int
4698load_get(UnpicklerObject *self)
4699{
4700 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004701 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004702 Py_ssize_t len;
4703 char *s;
4704
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004705 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004706 return -1;
4707 if (len < 2)
4708 return bad_readline();
4709
4710 key = PyLong_FromString(s, NULL, 10);
4711 if (key == NULL)
4712 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004713 idx = PyLong_AsSsize_t(key);
4714 if (idx == -1 && PyErr_Occurred()) {
4715 Py_DECREF(key);
4716 return -1;
4717 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004718
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004719 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004720 if (value == NULL) {
4721 if (!PyErr_Occurred())
4722 PyErr_SetObject(PyExc_KeyError, key);
4723 Py_DECREF(key);
4724 return -1;
4725 }
4726 Py_DECREF(key);
4727
4728 PDATA_APPEND(self->stack, value, -1);
4729 return 0;
4730}
4731
4732static int
4733load_binget(UnpicklerObject *self)
4734{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004735 PyObject *value;
4736 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004737 char *s;
4738
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004739 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004740 return -1;
4741
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004742 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004743
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004744 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004745 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004746 PyObject *key = PyLong_FromSsize_t(idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004747 if (!PyErr_Occurred())
4748 PyErr_SetObject(PyExc_KeyError, key);
4749 Py_DECREF(key);
4750 return -1;
4751 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004752
4753 PDATA_APPEND(self->stack, value, -1);
4754 return 0;
4755}
4756
4757static int
4758load_long_binget(UnpicklerObject *self)
4759{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004760 PyObject *value;
4761 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004762 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004763
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004764 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004765 return -1;
4766
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004767 idx = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004768
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004769 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004770 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004771 PyObject *key = PyLong_FromSsize_t(idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004772 if (!PyErr_Occurred())
4773 PyErr_SetObject(PyExc_KeyError, key);
4774 Py_DECREF(key);
4775 return -1;
4776 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004777
4778 PDATA_APPEND(self->stack, value, -1);
4779 return 0;
4780}
4781
4782/* Push an object from the extension registry (EXT[124]). nbytes is
4783 * the number of bytes following the opcode, holding the index (code) value.
4784 */
4785static int
4786load_extension(UnpicklerObject *self, int nbytes)
4787{
4788 char *codebytes; /* the nbytes bytes after the opcode */
4789 long code; /* calc_binint returns long */
4790 PyObject *py_code; /* code as a Python int */
4791 PyObject *obj; /* the object to push */
4792 PyObject *pair; /* (module_name, class_name) */
4793 PyObject *module_name, *class_name;
4794
4795 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004796 if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004797 return -1;
4798 code = calc_binint(codebytes, nbytes);
4799 if (code <= 0) { /* note that 0 is forbidden */
4800 /* Corrupt or hostile pickle. */
4801 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
4802 return -1;
4803 }
4804
4805 /* Look for the code in the cache. */
4806 py_code = PyLong_FromLong(code);
4807 if (py_code == NULL)
4808 return -1;
4809 obj = PyDict_GetItem(extension_cache, py_code);
4810 if (obj != NULL) {
4811 /* Bingo. */
4812 Py_DECREF(py_code);
4813 PDATA_APPEND(self->stack, obj, -1);
4814 return 0;
4815 }
4816
4817 /* Look up the (module_name, class_name) pair. */
4818 pair = PyDict_GetItem(inverted_registry, py_code);
4819 if (pair == NULL) {
4820 Py_DECREF(py_code);
4821 PyErr_Format(PyExc_ValueError, "unregistered extension "
4822 "code %ld", code);
4823 return -1;
4824 }
4825 /* Since the extension registry is manipulable via Python code,
4826 * confirm that pair is really a 2-tuple of strings.
4827 */
4828 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
4829 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
4830 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
4831 Py_DECREF(py_code);
4832 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
4833 "isn't a 2-tuple of strings", code);
4834 return -1;
4835 }
4836 /* Load the object. */
4837 obj = find_class(self, module_name, class_name);
4838 if (obj == NULL) {
4839 Py_DECREF(py_code);
4840 return -1;
4841 }
4842 /* Cache code -> obj. */
4843 code = PyDict_SetItem(extension_cache, py_code, obj);
4844 Py_DECREF(py_code);
4845 if (code < 0) {
4846 Py_DECREF(obj);
4847 return -1;
4848 }
4849 PDATA_PUSH(self->stack, obj, -1);
4850 return 0;
4851}
4852
4853static int
4854load_put(UnpicklerObject *self)
4855{
4856 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004857 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004858 Py_ssize_t len;
4859 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004860
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004861 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004862 return -1;
4863 if (len < 2)
4864 return bad_readline();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004865 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004866 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004867 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004868
4869 key = PyLong_FromString(s, NULL, 10);
4870 if (key == NULL)
4871 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004872 idx = PyLong_AsSsize_t(key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004873 Py_DECREF(key);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004874 if (idx < 0) {
4875 if (!PyErr_Occurred())
4876 PyErr_SetString(PyExc_ValueError,
4877 "negative PUT argument");
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004878 return -1;
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004879 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004880
4881 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004882}
4883
4884static int
4885load_binput(UnpicklerObject *self)
4886{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004887 PyObject *value;
4888 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004889 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004890
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004891 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004892 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004893
4894 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004895 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004896 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004897
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004898 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004899
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004900 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004901}
4902
4903static int
4904load_long_binput(UnpicklerObject *self)
4905{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004906 PyObject *value;
4907 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004908 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004909
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004910 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004911 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004912
4913 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004914 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004915 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004916
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004917 idx = calc_binsize(s, 4);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004918 if (idx < 0) {
4919 PyErr_SetString(PyExc_ValueError,
4920 "negative LONG_BINPUT argument");
4921 return -1;
4922 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004923
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004924 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004925}
4926
4927static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004928do_append(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004929{
4930 PyObject *value;
4931 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004932 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004933
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004934 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004935 if (x > len || x <= 0)
4936 return stack_underflow();
4937 if (len == x) /* nothing to do */
4938 return 0;
4939
4940 list = self->stack->data[x - 1];
4941
4942 if (PyList_Check(list)) {
4943 PyObject *slice;
4944 Py_ssize_t list_len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004945 int ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004946
4947 slice = Pdata_poplist(self->stack, x);
4948 if (!slice)
4949 return -1;
4950 list_len = PyList_GET_SIZE(list);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004951 ret = PyList_SetSlice(list, list_len, list_len, slice);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004952 Py_DECREF(slice);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004953 return ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004954 }
4955 else {
4956 PyObject *append_func;
4957
4958 append_func = PyObject_GetAttrString(list, "append");
4959 if (append_func == NULL)
4960 return -1;
4961 for (i = x; i < len; i++) {
4962 PyObject *result;
4963
4964 value = self->stack->data[i];
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004965 result = _Unpickler_FastCall(self, append_func, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004966 if (result == NULL) {
4967 Pdata_clear(self->stack, i + 1);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004968 Py_SIZE(self->stack) = x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004969 return -1;
4970 }
4971 Py_DECREF(result);
4972 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004973 Py_SIZE(self->stack) = x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004974 }
4975
4976 return 0;
4977}
4978
4979static int
4980load_append(UnpicklerObject *self)
4981{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004982 return do_append(self, Py_SIZE(self->stack) - 1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004983}
4984
4985static int
4986load_appends(UnpicklerObject *self)
4987{
4988 return do_append(self, marker(self));
4989}
4990
4991static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004992do_setitems(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004993{
4994 PyObject *value, *key;
4995 PyObject *dict;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004996 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004997 int status = 0;
4998
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004999 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005000 if (x > len || x <= 0)
5001 return stack_underflow();
5002 if (len == x) /* nothing to do */
5003 return 0;
5004 if ((len - x) % 2 != 0) {
5005 /* Currupt or hostile pickle -- we never write one like this. */
5006 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
5007 return -1;
5008 }
5009
5010 /* Here, dict does not actually need to be a PyDict; it could be anything
5011 that supports the __setitem__ attribute. */
5012 dict = self->stack->data[x - 1];
5013
5014 for (i = x + 1; i < len; i += 2) {
5015 key = self->stack->data[i - 1];
5016 value = self->stack->data[i];
5017 if (PyObject_SetItem(dict, key, value) < 0) {
5018 status = -1;
5019 break;
5020 }
5021 }
5022
5023 Pdata_clear(self->stack, x);
5024 return status;
5025}
5026
5027static int
5028load_setitem(UnpicklerObject *self)
5029{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005030 return do_setitems(self, Py_SIZE(self->stack) - 2);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005031}
5032
5033static int
5034load_setitems(UnpicklerObject *self)
5035{
5036 return do_setitems(self, marker(self));
5037}
5038
5039static int
5040load_build(UnpicklerObject *self)
5041{
5042 PyObject *state, *inst, *slotstate;
5043 PyObject *setstate;
5044 int status = 0;
5045
5046 /* Stack is ... instance, state. We want to leave instance at
5047 * the stack top, possibly mutated via instance.__setstate__(state).
5048 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005049 if (Py_SIZE(self->stack) < 2)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005050 return stack_underflow();
5051
5052 PDATA_POP(self->stack, state);
5053 if (state == NULL)
5054 return -1;
5055
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005056 inst = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005057
5058 setstate = PyObject_GetAttrString(inst, "__setstate__");
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005059 if (setstate == NULL) {
5060 if (PyErr_ExceptionMatches(PyExc_AttributeError))
5061 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00005062 else {
5063 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005064 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00005065 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005066 }
5067 else {
5068 PyObject *result;
5069
5070 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005071 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Antoine Pitroud79dc622008-09-05 00:03:33 +00005072 reference to state first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005073 result = _Unpickler_FastCall(self, setstate, state);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005074 Py_DECREF(setstate);
5075 if (result == NULL)
5076 return -1;
5077 Py_DECREF(result);
5078 return 0;
5079 }
5080
5081 /* A default __setstate__. First see whether state embeds a
5082 * slot state dict too (a proto 2 addition).
5083 */
5084 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
5085 PyObject *tmp = state;
5086
5087 state = PyTuple_GET_ITEM(tmp, 0);
5088 slotstate = PyTuple_GET_ITEM(tmp, 1);
5089 Py_INCREF(state);
5090 Py_INCREF(slotstate);
5091 Py_DECREF(tmp);
5092 }
5093 else
5094 slotstate = NULL;
5095
5096 /* Set inst.__dict__ from the state dict (if any). */
5097 if (state != Py_None) {
5098 PyObject *dict;
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005099 PyObject *d_key, *d_value;
5100 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005101
5102 if (!PyDict_Check(state)) {
5103 PyErr_SetString(UnpicklingError, "state is not a dictionary");
5104 goto error;
5105 }
5106 dict = PyObject_GetAttrString(inst, "__dict__");
5107 if (dict == NULL)
5108 goto error;
5109
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005110 i = 0;
5111 while (PyDict_Next(state, &i, &d_key, &d_value)) {
5112 /* normally the keys for instance attributes are
5113 interned. we should try to do that here. */
5114 Py_INCREF(d_key);
5115 if (PyUnicode_CheckExact(d_key))
5116 PyUnicode_InternInPlace(&d_key);
5117 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
5118 Py_DECREF(d_key);
5119 goto error;
5120 }
5121 Py_DECREF(d_key);
5122 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005123 Py_DECREF(dict);
5124 }
5125
5126 /* Also set instance attributes from the slotstate dict (if any). */
5127 if (slotstate != NULL) {
5128 PyObject *d_key, *d_value;
5129 Py_ssize_t i;
5130
5131 if (!PyDict_Check(slotstate)) {
5132 PyErr_SetString(UnpicklingError,
5133 "slot state is not a dictionary");
5134 goto error;
5135 }
5136 i = 0;
5137 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
5138 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
5139 goto error;
5140 }
5141 }
5142
5143 if (0) {
5144 error:
5145 status = -1;
5146 }
5147
5148 Py_DECREF(state);
5149 Py_XDECREF(slotstate);
5150 return status;
5151}
5152
5153static int
5154load_mark(UnpicklerObject *self)
5155{
5156
5157 /* Note that we split the (pickle.py) stack into two stacks, an
5158 * object stack and a mark stack. Here we push a mark onto the
5159 * mark stack.
5160 */
5161
5162 if ((self->num_marks + 1) >= self->marks_size) {
5163 size_t alloc;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005164 Py_ssize_t *marks;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005165
5166 /* Use the size_t type to check for overflow. */
5167 alloc = ((size_t)self->num_marks << 1) + 20;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005168 if (alloc > (PY_SSIZE_T_MAX / sizeof(Py_ssize_t)) ||
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00005169 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005170 PyErr_NoMemory();
5171 return -1;
5172 }
5173
5174 if (self->marks == NULL)
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005175 marks = (Py_ssize_t *) PyMem_Malloc(alloc * sizeof(Py_ssize_t));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005176 else
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005177 marks = (Py_ssize_t *) PyMem_Realloc(self->marks,
5178 alloc * sizeof(Py_ssize_t));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005179 if (marks == NULL) {
5180 PyErr_NoMemory();
5181 return -1;
5182 }
5183 self->marks = marks;
5184 self->marks_size = (Py_ssize_t)alloc;
5185 }
5186
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005187 self->marks[self->num_marks++] = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005188
5189 return 0;
5190}
5191
5192static int
5193load_reduce(UnpicklerObject *self)
5194{
5195 PyObject *callable = NULL;
5196 PyObject *argtup = NULL;
5197 PyObject *obj = NULL;
5198
5199 PDATA_POP(self->stack, argtup);
5200 if (argtup == NULL)
5201 return -1;
5202 PDATA_POP(self->stack, callable);
5203 if (callable) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005204 obj = PyObject_CallObject(callable, argtup);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005205 Py_DECREF(callable);
5206 }
5207 Py_DECREF(argtup);
5208
5209 if (obj == NULL)
5210 return -1;
5211
5212 PDATA_PUSH(self->stack, obj, -1);
5213 return 0;
5214}
5215
5216/* Just raises an error if we don't know the protocol specified. PROTO
5217 * is the first opcode for protocols >= 2.
5218 */
5219static int
5220load_proto(UnpicklerObject *self)
5221{
5222 char *s;
5223 int i;
5224
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005225 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005226 return -1;
5227
5228 i = (unsigned char)s[0];
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005229 if (i <= HIGHEST_PROTOCOL) {
5230 self->proto = i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005231 return 0;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005232 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005233
5234 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
5235 return -1;
5236}
5237
5238static PyObject *
5239load(UnpicklerObject *self)
5240{
5241 PyObject *err;
5242 PyObject *value = NULL;
5243 char *s;
5244
5245 self->num_marks = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005246 if (Py_SIZE(self->stack))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005247 Pdata_clear(self->stack, 0);
5248
5249 /* Convenient macros for the dispatch while-switch loop just below. */
5250#define OP(opcode, load_func) \
5251 case opcode: if (load_func(self) < 0) break; continue;
5252
5253#define OP_ARG(opcode, load_func, arg) \
5254 case opcode: if (load_func(self, (arg)) < 0) break; continue;
5255
5256 while (1) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005257 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005258 break;
5259
5260 switch ((enum opcode)s[0]) {
5261 OP(NONE, load_none)
5262 OP(BININT, load_binint)
5263 OP(BININT1, load_binint1)
5264 OP(BININT2, load_binint2)
5265 OP(INT, load_int)
5266 OP(LONG, load_long)
5267 OP_ARG(LONG1, load_counted_long, 1)
5268 OP_ARG(LONG4, load_counted_long, 4)
5269 OP(FLOAT, load_float)
5270 OP(BINFLOAT, load_binfloat)
5271 OP(BINBYTES, load_binbytes)
5272 OP(SHORT_BINBYTES, load_short_binbytes)
5273 OP(BINSTRING, load_binstring)
5274 OP(SHORT_BINSTRING, load_short_binstring)
5275 OP(STRING, load_string)
5276 OP(UNICODE, load_unicode)
5277 OP(BINUNICODE, load_binunicode)
5278 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
5279 OP_ARG(TUPLE1, load_counted_tuple, 1)
5280 OP_ARG(TUPLE2, load_counted_tuple, 2)
5281 OP_ARG(TUPLE3, load_counted_tuple, 3)
5282 OP(TUPLE, load_tuple)
5283 OP(EMPTY_LIST, load_empty_list)
5284 OP(LIST, load_list)
5285 OP(EMPTY_DICT, load_empty_dict)
5286 OP(DICT, load_dict)
5287 OP(OBJ, load_obj)
5288 OP(INST, load_inst)
5289 OP(NEWOBJ, load_newobj)
5290 OP(GLOBAL, load_global)
5291 OP(APPEND, load_append)
5292 OP(APPENDS, load_appends)
5293 OP(BUILD, load_build)
5294 OP(DUP, load_dup)
5295 OP(BINGET, load_binget)
5296 OP(LONG_BINGET, load_long_binget)
5297 OP(GET, load_get)
5298 OP(MARK, load_mark)
5299 OP(BINPUT, load_binput)
5300 OP(LONG_BINPUT, load_long_binput)
5301 OP(PUT, load_put)
5302 OP(POP, load_pop)
5303 OP(POP_MARK, load_pop_mark)
5304 OP(SETITEM, load_setitem)
5305 OP(SETITEMS, load_setitems)
5306 OP(PERSID, load_persid)
5307 OP(BINPERSID, load_binpersid)
5308 OP(REDUCE, load_reduce)
5309 OP(PROTO, load_proto)
5310 OP_ARG(EXT1, load_extension, 1)
5311 OP_ARG(EXT2, load_extension, 2)
5312 OP_ARG(EXT4, load_extension, 4)
5313 OP_ARG(NEWTRUE, load_bool, Py_True)
5314 OP_ARG(NEWFALSE, load_bool, Py_False)
5315
5316 case STOP:
5317 break;
5318
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005319 default:
Benjamin Petersonadde86d2011-09-23 13:41:41 -04005320 if (s[0] == '\0')
5321 PyErr_SetNone(PyExc_EOFError);
5322 else
5323 PyErr_Format(UnpicklingError,
5324 "invalid load key, '%c'.", s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005325 return NULL;
5326 }
5327
5328 break; /* and we are done! */
5329 }
5330
Antoine Pitrou04248a82010-10-12 20:51:21 +00005331 if (_Unpickler_SkipConsumed(self) < 0)
5332 return NULL;
5333
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005334 /* XXX: It is not clear what this is actually for. */
5335 if ((err = PyErr_Occurred())) {
5336 if (err == PyExc_EOFError) {
5337 PyErr_SetNone(PyExc_EOFError);
5338 }
5339 return NULL;
5340 }
5341
5342 PDATA_POP(self->stack, value);
5343 return value;
5344}
5345
5346PyDoc_STRVAR(Unpickler_load_doc,
5347"load() -> object. Load a pickle."
5348"\n"
5349"Read a pickled object representation from the open file object given in\n"
5350"the constructor, and return the reconstituted object hierarchy specified\n"
5351"therein.\n");
5352
5353static PyObject *
5354Unpickler_load(UnpicklerObject *self)
5355{
5356 /* Check whether the Unpickler was initialized correctly. This prevents
5357 segfaulting if a subclass overridden __init__ with a function that does
5358 not call Unpickler.__init__(). Here, we simply ensure that self->read
5359 is not NULL. */
5360 if (self->read == NULL) {
5361 PyErr_Format(UnpicklingError,
5362 "Unpickler.__init__() was not called by %s.__init__()",
5363 Py_TYPE(self)->tp_name);
5364 return NULL;
5365 }
5366
5367 return load(self);
5368}
5369
5370/* The name of find_class() is misleading. In newer pickle protocols, this
5371 function is used for loading any global (i.e., functions), not just
5372 classes. The name is kept only for backward compatibility. */
5373
5374PyDoc_STRVAR(Unpickler_find_class_doc,
5375"find_class(module_name, global_name) -> object.\n"
5376"\n"
5377"Return an object from a specified module, importing the module if\n"
5378"necessary. Subclasses may override this method (e.g. to restrict\n"
5379"unpickling of arbitrary classes and functions).\n"
5380"\n"
5381"This method is called whenever a class or a function object is\n"
5382"needed. Both arguments passed are str objects.\n");
5383
5384static PyObject *
5385Unpickler_find_class(UnpicklerObject *self, PyObject *args)
5386{
5387 PyObject *global;
5388 PyObject *modules_dict;
5389 PyObject *module;
5390 PyObject *module_name, *global_name;
5391
5392 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
5393 &module_name, &global_name))
5394 return NULL;
5395
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005396 /* Try to map the old names used in Python 2.x to the new ones used in
5397 Python 3.x. We do this only with old pickle protocols and when the
5398 user has not disabled the feature. */
5399 if (self->proto < 3 && self->fix_imports) {
5400 PyObject *key;
5401 PyObject *item;
5402
5403 /* Check if the global (i.e., a function or a class) was renamed
5404 or moved to another module. */
5405 key = PyTuple_Pack(2, module_name, global_name);
5406 if (key == NULL)
5407 return NULL;
5408 item = PyDict_GetItemWithError(name_mapping_2to3, key);
5409 Py_DECREF(key);
5410 if (item) {
5411 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
5412 PyErr_Format(PyExc_RuntimeError,
5413 "_compat_pickle.NAME_MAPPING values should be "
5414 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
5415 return NULL;
5416 }
5417 module_name = PyTuple_GET_ITEM(item, 0);
5418 global_name = PyTuple_GET_ITEM(item, 1);
5419 if (!PyUnicode_Check(module_name) ||
5420 !PyUnicode_Check(global_name)) {
5421 PyErr_Format(PyExc_RuntimeError,
5422 "_compat_pickle.NAME_MAPPING values should be "
5423 "pairs of str, not (%.200s, %.200s)",
5424 Py_TYPE(module_name)->tp_name,
5425 Py_TYPE(global_name)->tp_name);
5426 return NULL;
5427 }
5428 }
5429 else if (PyErr_Occurred()) {
5430 return NULL;
5431 }
5432
5433 /* Check if the module was renamed. */
5434 item = PyDict_GetItemWithError(import_mapping_2to3, module_name);
5435 if (item) {
5436 if (!PyUnicode_Check(item)) {
5437 PyErr_Format(PyExc_RuntimeError,
5438 "_compat_pickle.IMPORT_MAPPING values should be "
5439 "strings, not %.200s", Py_TYPE(item)->tp_name);
5440 return NULL;
5441 }
5442 module_name = item;
5443 }
5444 else if (PyErr_Occurred()) {
5445 return NULL;
5446 }
5447 }
5448
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005449 modules_dict = PySys_GetObject("modules");
5450 if (modules_dict == NULL)
5451 return NULL;
5452
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005453 module = PyDict_GetItemWithError(modules_dict, module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005454 if (module == NULL) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005455 if (PyErr_Occurred())
5456 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005457 module = PyImport_Import(module_name);
5458 if (module == NULL)
5459 return NULL;
5460 global = PyObject_GetAttr(module, global_name);
5461 Py_DECREF(module);
5462 }
5463 else {
5464 global = PyObject_GetAttr(module, global_name);
5465 }
5466 return global;
5467}
5468
5469static struct PyMethodDef Unpickler_methods[] = {
5470 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
5471 Unpickler_load_doc},
5472 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
5473 Unpickler_find_class_doc},
5474 {NULL, NULL} /* sentinel */
5475};
5476
5477static void
5478Unpickler_dealloc(UnpicklerObject *self)
5479{
5480 PyObject_GC_UnTrack((PyObject *)self);
5481 Py_XDECREF(self->readline);
5482 Py_XDECREF(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005483 Py_XDECREF(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005484 Py_XDECREF(self->stack);
5485 Py_XDECREF(self->pers_func);
5486 Py_XDECREF(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005487 if (self->buffer.buf != NULL) {
5488 PyBuffer_Release(&self->buffer);
5489 self->buffer.buf = NULL;
5490 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005491
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005492 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005493 PyMem_Free(self->marks);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005494 PyMem_Free(self->input_line);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005495 free(self->encoding);
5496 free(self->errors);
5497
5498 Py_TYPE(self)->tp_free((PyObject *)self);
5499}
5500
5501static int
5502Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
5503{
5504 Py_VISIT(self->readline);
5505 Py_VISIT(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005506 Py_VISIT(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005507 Py_VISIT(self->stack);
5508 Py_VISIT(self->pers_func);
5509 Py_VISIT(self->arg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005510 return 0;
5511}
5512
5513static int
5514Unpickler_clear(UnpicklerObject *self)
5515{
5516 Py_CLEAR(self->readline);
5517 Py_CLEAR(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005518 Py_CLEAR(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005519 Py_CLEAR(self->stack);
5520 Py_CLEAR(self->pers_func);
5521 Py_CLEAR(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005522 if (self->buffer.buf != NULL) {
5523 PyBuffer_Release(&self->buffer);
5524 self->buffer.buf = NULL;
5525 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005526
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005527 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005528 PyMem_Free(self->marks);
5529 self->marks = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005530 PyMem_Free(self->input_line);
5531 self->input_line = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005532 free(self->encoding);
5533 self->encoding = NULL;
5534 free(self->errors);
5535 self->errors = NULL;
5536
5537 return 0;
5538}
5539
5540PyDoc_STRVAR(Unpickler_doc,
5541"Unpickler(file, *, encoding='ASCII', errors='strict')"
5542"\n"
5543"This takes a binary file for reading a pickle data stream.\n"
5544"\n"
5545"The protocol version of the pickle is detected automatically, so no\n"
5546"proto argument is needed.\n"
5547"\n"
5548"The file-like object must have two methods, a read() method\n"
5549"that takes an integer argument, and a readline() method that\n"
5550"requires no arguments. Both methods should return bytes.\n"
5551"Thus file-like object can be a binary file object opened for\n"
5552"reading, a BytesIO object, or any other custom object that\n"
5553"meets this interface.\n"
5554"\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005555"Optional keyword arguments are *fix_imports*, *encoding* and *errors*,\n"
5556"which are used to control compatiblity support for pickle stream\n"
5557"generated by Python 2.x. If *fix_imports* is True, pickle will try to\n"
5558"map the old Python 2.x names to the new names used in Python 3.x. The\n"
5559"*encoding* and *errors* tell pickle how to decode 8-bit string\n"
5560"instances pickled by Python 2.x; these default to 'ASCII' and\n"
5561"'strict', respectively.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005562
5563static int
5564Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
5565{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005566 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005567 PyObject *file;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005568 PyObject *fix_imports = Py_True;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005569 char *encoding = NULL;
5570 char *errors = NULL;
5571
5572 /* XXX: That is an horrible error message. But, I don't know how to do
5573 better... */
5574 if (Py_SIZE(args) != 1) {
5575 PyErr_Format(PyExc_TypeError,
5576 "%s takes exactly one positional argument (%zd given)",
5577 Py_TYPE(self)->tp_name, Py_SIZE(args));
5578 return -1;
5579 }
5580
5581 /* Arguments parsing needs to be done in the __init__() method to allow
5582 subclasses to define their own __init__() method, which may (or may
5583 not) support Unpickler arguments. However, this means we need to be
5584 extra careful in the other Unpickler methods, since a subclass could
5585 forget to call Unpickler.__init__() thus breaking our internal
5586 invariants. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005587 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:Unpickler", kwlist,
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005588 &file, &fix_imports, &encoding, &errors))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005589 return -1;
5590
5591 /* In case of multiple __init__() calls, clear previous content. */
5592 if (self->read != NULL)
5593 (void)Unpickler_clear(self);
5594
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005595 if (_Unpickler_SetInputStream(self, file) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005596 return -1;
5597
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005598 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005599 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005600
5601 self->fix_imports = PyObject_IsTrue(fix_imports);
5602 if (self->fix_imports == -1)
5603 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005604
5605 if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) {
5606 self->pers_func = PyObject_GetAttrString((PyObject *)self,
5607 "persistent_load");
5608 if (self->pers_func == NULL)
5609 return -1;
5610 }
5611 else {
5612 self->pers_func = NULL;
5613 }
5614
5615 self->stack = (Pdata *)Pdata_New();
5616 if (self->stack == NULL)
5617 return -1;
5618
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005619 self->memo_size = 32;
5620 self->memo = _Unpickler_NewMemo(self->memo_size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005621 if (self->memo == NULL)
5622 return -1;
5623
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005624 self->arg = NULL;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005625 self->proto = 0;
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005626
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005627 return 0;
5628}
5629
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005630/* Define a proxy object for the Unpickler's internal memo object. This is to
5631 * avoid breaking code like:
5632 * unpickler.memo.clear()
5633 * and
5634 * unpickler.memo = saved_memo
5635 * Is this a good idea? Not really, but we don't want to break code that uses
5636 * it. Note that we don't implement the entire mapping API here. This is
5637 * intentional, as these should be treated as black-box implementation details.
5638 *
5639 * We do, however, have to implement pickling/unpickling support because of
5640 * real-world code like cvs2svn.
5641 */
5642
5643typedef struct {
5644 PyObject_HEAD
5645 UnpicklerObject *unpickler;
5646} UnpicklerMemoProxyObject;
5647
5648PyDoc_STRVAR(ump_clear_doc,
5649"memo.clear() -> None. Remove all items from memo.");
5650
5651static PyObject *
5652ump_clear(UnpicklerMemoProxyObject *self)
5653{
5654 _Unpickler_MemoCleanup(self->unpickler);
5655 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
5656 if (self->unpickler->memo == NULL)
5657 return NULL;
5658 Py_RETURN_NONE;
5659}
5660
5661PyDoc_STRVAR(ump_copy_doc,
5662"memo.copy() -> new_memo. Copy the memo to a new object.");
5663
5664static PyObject *
5665ump_copy(UnpicklerMemoProxyObject *self)
5666{
5667 Py_ssize_t i;
5668 PyObject *new_memo = PyDict_New();
5669 if (new_memo == NULL)
5670 return NULL;
5671
5672 for (i = 0; i < self->unpickler->memo_size; i++) {
5673 int status;
5674 PyObject *key, *value;
5675
5676 value = self->unpickler->memo[i];
5677 if (value == NULL)
5678 continue;
5679
5680 key = PyLong_FromSsize_t(i);
5681 if (key == NULL)
5682 goto error;
5683 status = PyDict_SetItem(new_memo, key, value);
5684 Py_DECREF(key);
5685 if (status < 0)
5686 goto error;
5687 }
5688 return new_memo;
5689
5690error:
5691 Py_DECREF(new_memo);
5692 return NULL;
5693}
5694
5695PyDoc_STRVAR(ump_reduce_doc,
5696"memo.__reduce__(). Pickling support.");
5697
5698static PyObject *
5699ump_reduce(UnpicklerMemoProxyObject *self, PyObject *args)
5700{
5701 PyObject *reduce_value;
5702 PyObject *constructor_args;
5703 PyObject *contents = ump_copy(self);
5704 if (contents == NULL)
5705 return NULL;
5706
5707 reduce_value = PyTuple_New(2);
5708 if (reduce_value == NULL) {
5709 Py_DECREF(contents);
5710 return NULL;
5711 }
5712 constructor_args = PyTuple_New(1);
5713 if (constructor_args == NULL) {
5714 Py_DECREF(contents);
5715 Py_DECREF(reduce_value);
5716 return NULL;
5717 }
5718 PyTuple_SET_ITEM(constructor_args, 0, contents);
5719 Py_INCREF((PyObject *)&PyDict_Type);
5720 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
5721 PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
5722 return reduce_value;
5723}
5724
5725static PyMethodDef unpicklerproxy_methods[] = {
5726 {"clear", (PyCFunction)ump_clear, METH_NOARGS, ump_clear_doc},
5727 {"copy", (PyCFunction)ump_copy, METH_NOARGS, ump_copy_doc},
5728 {"__reduce__", (PyCFunction)ump_reduce, METH_VARARGS, ump_reduce_doc},
5729 {NULL, NULL} /* sentinel */
5730};
5731
5732static void
5733UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
5734{
5735 PyObject_GC_UnTrack(self);
5736 Py_XDECREF(self->unpickler);
5737 PyObject_GC_Del((PyObject *)self);
5738}
5739
5740static int
5741UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
5742 visitproc visit, void *arg)
5743{
5744 Py_VISIT(self->unpickler);
5745 return 0;
5746}
5747
5748static int
5749UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
5750{
5751 Py_CLEAR(self->unpickler);
5752 return 0;
5753}
5754
5755static PyTypeObject UnpicklerMemoProxyType = {
5756 PyVarObject_HEAD_INIT(NULL, 0)
5757 "_pickle.UnpicklerMemoProxy", /*tp_name*/
5758 sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/
5759 0,
5760 (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */
5761 0, /* tp_print */
5762 0, /* tp_getattr */
5763 0, /* tp_setattr */
5764 0, /* tp_compare */
5765 0, /* tp_repr */
5766 0, /* tp_as_number */
5767 0, /* tp_as_sequence */
5768 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00005769 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005770 0, /* tp_call */
5771 0, /* tp_str */
5772 PyObject_GenericGetAttr, /* tp_getattro */
5773 PyObject_GenericSetAttr, /* tp_setattro */
5774 0, /* tp_as_buffer */
5775 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5776 0, /* tp_doc */
5777 (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */
5778 (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */
5779 0, /* tp_richcompare */
5780 0, /* tp_weaklistoffset */
5781 0, /* tp_iter */
5782 0, /* tp_iternext */
5783 unpicklerproxy_methods, /* tp_methods */
5784};
5785
5786static PyObject *
5787UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
5788{
5789 UnpicklerMemoProxyObject *self;
5790
5791 self = PyObject_GC_New(UnpicklerMemoProxyObject,
5792 &UnpicklerMemoProxyType);
5793 if (self == NULL)
5794 return NULL;
5795 Py_INCREF(unpickler);
5796 self->unpickler = unpickler;
5797 PyObject_GC_Track(self);
5798 return (PyObject *)self;
5799}
5800
5801/*****************************************************************************/
5802
5803
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005804static PyObject *
5805Unpickler_get_memo(UnpicklerObject *self)
5806{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005807 return UnpicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005808}
5809
5810static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005811Unpickler_set_memo(UnpicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005812{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005813 PyObject **new_memo;
5814 Py_ssize_t new_memo_size = 0;
5815 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005816
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005817 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005818 PyErr_SetString(PyExc_TypeError,
5819 "attribute deletion is not supported");
5820 return -1;
5821 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005822
5823 if (Py_TYPE(obj) == &UnpicklerMemoProxyType) {
5824 UnpicklerObject *unpickler =
5825 ((UnpicklerMemoProxyObject *)obj)->unpickler;
5826
5827 new_memo_size = unpickler->memo_size;
5828 new_memo = _Unpickler_NewMemo(new_memo_size);
5829 if (new_memo == NULL)
5830 return -1;
5831
5832 for (i = 0; i < new_memo_size; i++) {
5833 Py_XINCREF(unpickler->memo[i]);
5834 new_memo[i] = unpickler->memo[i];
5835 }
5836 }
5837 else if (PyDict_Check(obj)) {
5838 Py_ssize_t i = 0;
5839 PyObject *key, *value;
5840
5841 new_memo_size = PyDict_Size(obj);
5842 new_memo = _Unpickler_NewMemo(new_memo_size);
5843 if (new_memo == NULL)
5844 return -1;
5845
5846 while (PyDict_Next(obj, &i, &key, &value)) {
5847 Py_ssize_t idx;
5848 if (!PyLong_Check(key)) {
5849 PyErr_SetString(PyExc_TypeError,
5850 "memo key must be integers");
5851 goto error;
5852 }
5853 idx = PyLong_AsSsize_t(key);
5854 if (idx == -1 && PyErr_Occurred())
5855 goto error;
5856 if (_Unpickler_MemoPut(self, idx, value) < 0)
5857 goto error;
5858 }
5859 }
5860 else {
5861 PyErr_Format(PyExc_TypeError,
5862 "'memo' attribute must be an UnpicklerMemoProxy object"
5863 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005864 return -1;
5865 }
5866
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005867 _Unpickler_MemoCleanup(self);
5868 self->memo_size = new_memo_size;
5869 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005870
5871 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005872
5873 error:
5874 if (new_memo_size) {
5875 i = new_memo_size;
5876 while (--i >= 0) {
5877 Py_XDECREF(new_memo[i]);
5878 }
5879 PyMem_FREE(new_memo);
5880 }
5881 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005882}
5883
5884static PyObject *
5885Unpickler_get_persload(UnpicklerObject *self)
5886{
5887 if (self->pers_func == NULL)
5888 PyErr_SetString(PyExc_AttributeError, "persistent_load");
5889 else
5890 Py_INCREF(self->pers_func);
5891 return self->pers_func;
5892}
5893
5894static int
5895Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
5896{
5897 PyObject *tmp;
5898
5899 if (value == NULL) {
5900 PyErr_SetString(PyExc_TypeError,
5901 "attribute deletion is not supported");
5902 return -1;
5903 }
5904 if (!PyCallable_Check(value)) {
5905 PyErr_SetString(PyExc_TypeError,
5906 "persistent_load must be a callable taking "
5907 "one argument");
5908 return -1;
5909 }
5910
5911 tmp = self->pers_func;
5912 Py_INCREF(value);
5913 self->pers_func = value;
5914 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
5915
5916 return 0;
5917}
5918
5919static PyGetSetDef Unpickler_getsets[] = {
5920 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
5921 {"persistent_load", (getter)Unpickler_get_persload,
5922 (setter)Unpickler_set_persload},
5923 {NULL}
5924};
5925
5926static PyTypeObject Unpickler_Type = {
5927 PyVarObject_HEAD_INIT(NULL, 0)
5928 "_pickle.Unpickler", /*tp_name*/
5929 sizeof(UnpicklerObject), /*tp_basicsize*/
5930 0, /*tp_itemsize*/
5931 (destructor)Unpickler_dealloc, /*tp_dealloc*/
5932 0, /*tp_print*/
5933 0, /*tp_getattr*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005934 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00005935 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005936 0, /*tp_repr*/
5937 0, /*tp_as_number*/
5938 0, /*tp_as_sequence*/
5939 0, /*tp_as_mapping*/
5940 0, /*tp_hash*/
5941 0, /*tp_call*/
5942 0, /*tp_str*/
5943 0, /*tp_getattro*/
5944 0, /*tp_setattro*/
5945 0, /*tp_as_buffer*/
5946 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5947 Unpickler_doc, /*tp_doc*/
5948 (traverseproc)Unpickler_traverse, /*tp_traverse*/
5949 (inquiry)Unpickler_clear, /*tp_clear*/
5950 0, /*tp_richcompare*/
5951 0, /*tp_weaklistoffset*/
5952 0, /*tp_iter*/
5953 0, /*tp_iternext*/
5954 Unpickler_methods, /*tp_methods*/
5955 0, /*tp_members*/
5956 Unpickler_getsets, /*tp_getset*/
5957 0, /*tp_base*/
5958 0, /*tp_dict*/
5959 0, /*tp_descr_get*/
5960 0, /*tp_descr_set*/
5961 0, /*tp_dictoffset*/
5962 (initproc)Unpickler_init, /*tp_init*/
5963 PyType_GenericAlloc, /*tp_alloc*/
5964 PyType_GenericNew, /*tp_new*/
5965 PyObject_GC_Del, /*tp_free*/
5966 0, /*tp_is_gc*/
5967};
5968
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005969PyDoc_STRVAR(pickle_dump_doc,
5970"dump(obj, file, protocol=None, *, fix_imports=True) -> None\n"
5971"\n"
5972"Write a pickled representation of obj to the open file object file. This\n"
5973"is equivalent to ``Pickler(file, protocol).dump(obj)``, but may be more\n"
5974"efficient.\n"
5975"\n"
5976"The optional protocol argument tells the pickler to use the given protocol;\n"
5977"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
5978"backward-incompatible protocol designed for Python 3.0.\n"
5979"\n"
5980"Specifying a negative protocol version selects the highest protocol version\n"
5981"supported. The higher the protocol used, the more recent the version of\n"
5982"Python needed to read the pickle produced.\n"
5983"\n"
5984"The file argument must have a write() method that accepts a single bytes\n"
5985"argument. It can thus be a file object opened for binary writing, a\n"
5986"io.BytesIO instance, or any other custom object that meets this interface.\n"
5987"\n"
5988"If fix_imports is True and protocol is less than 3, pickle will try to\n"
5989"map the new Python 3.x names to the old module names used in Python 2.x,\n"
5990"so that the pickle data stream is readable with Python 2.x.\n");
5991
5992static PyObject *
5993pickle_dump(PyObject *self, PyObject *args, PyObject *kwds)
5994{
5995 static char *kwlist[] = {"obj", "file", "protocol", "fix_imports", 0};
5996 PyObject *obj;
5997 PyObject *file;
5998 PyObject *proto = NULL;
5999 PyObject *fix_imports = Py_True;
6000 PicklerObject *pickler;
6001
6002 /* fix_imports is a keyword-only argument. */
6003 if (Py_SIZE(args) > 3) {
6004 PyErr_Format(PyExc_TypeError,
6005 "pickle.dump() takes at most 3 positional "
6006 "argument (%zd given)", Py_SIZE(args));
6007 return NULL;
6008 }
6009
6010 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|OO:dump", kwlist,
6011 &obj, &file, &proto, &fix_imports))
6012 return NULL;
6013
6014 pickler = _Pickler_New();
6015 if (pickler == NULL)
6016 return NULL;
6017
6018 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
6019 goto error;
6020
6021 if (_Pickler_SetOutputStream(pickler, file) < 0)
6022 goto error;
6023
6024 if (dump(pickler, obj) < 0)
6025 goto error;
6026
6027 if (_Pickler_FlushToFile(pickler) < 0)
6028 goto error;
6029
6030 Py_DECREF(pickler);
6031 Py_RETURN_NONE;
6032
6033 error:
6034 Py_XDECREF(pickler);
6035 return NULL;
6036}
6037
6038PyDoc_STRVAR(pickle_dumps_doc,
6039"dumps(obj, protocol=None, *, fix_imports=True) -> bytes\n"
6040"\n"
6041"Return the pickled representation of the object as a bytes\n"
6042"object, instead of writing it to a file.\n"
6043"\n"
6044"The optional protocol argument tells the pickler to use the given protocol;\n"
6045"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
6046"backward-incompatible protocol designed for Python 3.0.\n"
6047"\n"
6048"Specifying a negative protocol version selects the highest protocol version\n"
6049"supported. The higher the protocol used, the more recent the version of\n"
6050"Python needed to read the pickle produced.\n"
6051"\n"
6052"If fix_imports is True and *protocol* is less than 3, pickle will try to\n"
6053"map the new Python 3.x names to the old module names used in Python 2.x,\n"
6054"so that the pickle data stream is readable with Python 2.x.\n");
6055
6056static PyObject *
6057pickle_dumps(PyObject *self, PyObject *args, PyObject *kwds)
6058{
6059 static char *kwlist[] = {"obj", "protocol", "fix_imports", 0};
6060 PyObject *obj;
6061 PyObject *proto = NULL;
6062 PyObject *result;
6063 PyObject *fix_imports = Py_True;
6064 PicklerObject *pickler;
6065
6066 /* fix_imports is a keyword-only argument. */
6067 if (Py_SIZE(args) > 2) {
6068 PyErr_Format(PyExc_TypeError,
6069 "pickle.dumps() takes at most 2 positional "
6070 "argument (%zd given)", Py_SIZE(args));
6071 return NULL;
6072 }
6073
6074 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:dumps", kwlist,
6075 &obj, &proto, &fix_imports))
6076 return NULL;
6077
6078 pickler = _Pickler_New();
6079 if (pickler == NULL)
6080 return NULL;
6081
6082 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
6083 goto error;
6084
6085 if (dump(pickler, obj) < 0)
6086 goto error;
6087
6088 result = _Pickler_GetString(pickler);
6089 Py_DECREF(pickler);
6090 return result;
6091
6092 error:
6093 Py_XDECREF(pickler);
6094 return NULL;
6095}
6096
6097PyDoc_STRVAR(pickle_load_doc,
6098"load(file, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6099"\n"
6100"Read a pickled object representation from the open file object file and\n"
6101"return the reconstituted object hierarchy specified therein. This is\n"
6102"equivalent to ``Unpickler(file).load()``, but may be more efficient.\n"
6103"\n"
6104"The protocol version of the pickle is detected automatically, so no protocol\n"
6105"argument is needed. Bytes past the pickled object's representation are\n"
6106"ignored.\n"
6107"\n"
6108"The argument file must have two methods, a read() method that takes an\n"
6109"integer argument, and a readline() method that requires no arguments. Both\n"
6110"methods should return bytes. Thus *file* can be a binary file object opened\n"
6111"for reading, a BytesIO object, or any other custom object that meets this\n"
6112"interface.\n"
6113"\n"
6114"Optional keyword arguments are fix_imports, encoding and errors,\n"
6115"which are used to control compatiblity support for pickle stream generated\n"
6116"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6117"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6118"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6119"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6120
6121static PyObject *
6122pickle_load(PyObject *self, PyObject *args, PyObject *kwds)
6123{
6124 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
6125 PyObject *file;
6126 PyObject *fix_imports = Py_True;
6127 PyObject *result;
6128 char *encoding = NULL;
6129 char *errors = NULL;
6130 UnpicklerObject *unpickler;
6131
6132 /* fix_imports, encoding and errors are a keyword-only argument. */
6133 if (Py_SIZE(args) != 1) {
6134 PyErr_Format(PyExc_TypeError,
6135 "pickle.load() takes exactly one positional "
6136 "argument (%zd given)", Py_SIZE(args));
6137 return NULL;
6138 }
6139
6140 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:load", kwlist,
6141 &file, &fix_imports, &encoding, &errors))
6142 return NULL;
6143
6144 unpickler = _Unpickler_New();
6145 if (unpickler == NULL)
6146 return NULL;
6147
6148 if (_Unpickler_SetInputStream(unpickler, file) < 0)
6149 goto error;
6150
6151 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6152 goto error;
6153
6154 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6155 if (unpickler->fix_imports == -1)
6156 goto error;
6157
6158 result = load(unpickler);
6159 Py_DECREF(unpickler);
6160 return result;
6161
6162 error:
6163 Py_XDECREF(unpickler);
6164 return NULL;
6165}
6166
6167PyDoc_STRVAR(pickle_loads_doc,
6168"loads(input, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6169"\n"
6170"Read a pickled object hierarchy from a bytes object and return the\n"
6171"reconstituted object hierarchy specified therein\n"
6172"\n"
6173"The protocol version of the pickle is detected automatically, so no protocol\n"
6174"argument is needed. Bytes past the pickled object's representation are\n"
6175"ignored.\n"
6176"\n"
6177"Optional keyword arguments are fix_imports, encoding and errors, which\n"
6178"are used to control compatiblity support for pickle stream generated\n"
6179"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6180"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6181"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6182"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6183
6184static PyObject *
6185pickle_loads(PyObject *self, PyObject *args, PyObject *kwds)
6186{
6187 static char *kwlist[] = {"input", "fix_imports", "encoding", "errors", 0};
6188 PyObject *input;
6189 PyObject *fix_imports = Py_True;
6190 PyObject *result;
6191 char *encoding = NULL;
6192 char *errors = NULL;
6193 UnpicklerObject *unpickler;
6194
6195 /* fix_imports, encoding and errors are a keyword-only argument. */
6196 if (Py_SIZE(args) != 1) {
6197 PyErr_Format(PyExc_TypeError,
6198 "pickle.loads() takes exactly one positional "
6199 "argument (%zd given)", Py_SIZE(args));
6200 return NULL;
6201 }
6202
6203 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:loads", kwlist,
6204 &input, &fix_imports, &encoding, &errors))
6205 return NULL;
6206
6207 unpickler = _Unpickler_New();
6208 if (unpickler == NULL)
6209 return NULL;
6210
6211 if (_Unpickler_SetStringInput(unpickler, input) < 0)
6212 goto error;
6213
6214 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6215 goto error;
6216
6217 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6218 if (unpickler->fix_imports == -1)
6219 goto error;
6220
6221 result = load(unpickler);
6222 Py_DECREF(unpickler);
6223 return result;
6224
6225 error:
6226 Py_XDECREF(unpickler);
6227 return NULL;
6228}
6229
6230
6231static struct PyMethodDef pickle_methods[] = {
6232 {"dump", (PyCFunction)pickle_dump, METH_VARARGS|METH_KEYWORDS,
6233 pickle_dump_doc},
6234 {"dumps", (PyCFunction)pickle_dumps, METH_VARARGS|METH_KEYWORDS,
6235 pickle_dumps_doc},
6236 {"load", (PyCFunction)pickle_load, METH_VARARGS|METH_KEYWORDS,
6237 pickle_load_doc},
6238 {"loads", (PyCFunction)pickle_loads, METH_VARARGS|METH_KEYWORDS,
6239 pickle_loads_doc},
6240 {NULL, NULL} /* sentinel */
6241};
6242
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006243static int
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006244initmodule(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006245{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006246 PyObject *copyreg = NULL;
6247 PyObject *compat_pickle = NULL;
6248
6249 /* XXX: We should ensure that the types of the dictionaries imported are
6250 exactly PyDict objects. Otherwise, it is possible to crash the pickle
6251 since we use the PyDict API directly to access these dictionaries. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006252
6253 copyreg = PyImport_ImportModule("copyreg");
6254 if (!copyreg)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006255 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006256 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
6257 if (!dispatch_table)
6258 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006259 extension_registry = \
6260 PyObject_GetAttrString(copyreg, "_extension_registry");
6261 if (!extension_registry)
6262 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006263 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
6264 if (!inverted_registry)
6265 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006266 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
6267 if (!extension_cache)
6268 goto error;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006269 Py_CLEAR(copyreg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006270
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006271 /* Load the 2.x -> 3.x stdlib module mapping tables */
6272 compat_pickle = PyImport_ImportModule("_compat_pickle");
6273 if (!compat_pickle)
6274 goto error;
6275 name_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
6276 if (!name_mapping_2to3)
6277 goto error;
6278 if (!PyDict_CheckExact(name_mapping_2to3)) {
6279 PyErr_Format(PyExc_RuntimeError,
6280 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
6281 Py_TYPE(name_mapping_2to3)->tp_name);
6282 goto error;
6283 }
6284 import_mapping_2to3 = PyObject_GetAttrString(compat_pickle,
6285 "IMPORT_MAPPING");
6286 if (!import_mapping_2to3)
6287 goto error;
6288 if (!PyDict_CheckExact(import_mapping_2to3)) {
6289 PyErr_Format(PyExc_RuntimeError,
6290 "_compat_pickle.IMPORT_MAPPING should be a dict, "
6291 "not %.200s", Py_TYPE(import_mapping_2to3)->tp_name);
6292 goto error;
6293 }
6294 /* ... and the 3.x -> 2.x mapping tables */
6295 name_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6296 "REVERSE_NAME_MAPPING");
6297 if (!name_mapping_3to2)
6298 goto error;
6299 if (!PyDict_CheckExact(name_mapping_3to2)) {
6300 PyErr_Format(PyExc_RuntimeError,
Ezio Melotti13925002011-03-16 11:05:33 +02006301 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006302 "not %.200s", Py_TYPE(name_mapping_3to2)->tp_name);
6303 goto error;
6304 }
6305 import_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6306 "REVERSE_IMPORT_MAPPING");
6307 if (!import_mapping_3to2)
6308 goto error;
6309 if (!PyDict_CheckExact(import_mapping_3to2)) {
6310 PyErr_Format(PyExc_RuntimeError,
6311 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
6312 "not %.200s", Py_TYPE(import_mapping_3to2)->tp_name);
6313 goto error;
6314 }
6315 Py_CLEAR(compat_pickle);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006316
6317 empty_tuple = PyTuple_New(0);
6318 if (empty_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006319 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006320 two_tuple = PyTuple_New(2);
6321 if (two_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006322 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006323 /* We use this temp container with no regard to refcounts, or to
6324 * keeping containees alive. Exempt from GC, because we don't
6325 * want anything looking at two_tuple() by magic.
6326 */
6327 PyObject_GC_UnTrack(two_tuple);
6328
6329 return 0;
6330
6331 error:
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006332 Py_CLEAR(copyreg);
6333 Py_CLEAR(dispatch_table);
6334 Py_CLEAR(extension_registry);
6335 Py_CLEAR(inverted_registry);
6336 Py_CLEAR(extension_cache);
6337 Py_CLEAR(compat_pickle);
6338 Py_CLEAR(name_mapping_2to3);
6339 Py_CLEAR(import_mapping_2to3);
6340 Py_CLEAR(name_mapping_3to2);
6341 Py_CLEAR(import_mapping_3to2);
6342 Py_CLEAR(empty_tuple);
6343 Py_CLEAR(two_tuple);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006344 return -1;
6345}
6346
6347static struct PyModuleDef _picklemodule = {
6348 PyModuleDef_HEAD_INIT,
6349 "_pickle",
6350 pickle_module_doc,
6351 -1,
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006352 pickle_methods,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006353 NULL,
6354 NULL,
6355 NULL,
6356 NULL
6357};
6358
6359PyMODINIT_FUNC
6360PyInit__pickle(void)
6361{
6362 PyObject *m;
6363
6364 if (PyType_Ready(&Unpickler_Type) < 0)
6365 return NULL;
6366 if (PyType_Ready(&Pickler_Type) < 0)
6367 return NULL;
6368 if (PyType_Ready(&Pdata_Type) < 0)
6369 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006370 if (PyType_Ready(&PicklerMemoProxyType) < 0)
6371 return NULL;
6372 if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
6373 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006374
6375 /* Create the module and add the functions. */
6376 m = PyModule_Create(&_picklemodule);
6377 if (m == NULL)
6378 return NULL;
6379
Antoine Pitrou8391cf42011-07-15 21:01:21 +02006380 Py_INCREF(&Pickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006381 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
6382 return NULL;
Antoine Pitrou8391cf42011-07-15 21:01:21 +02006383 Py_INCREF(&Unpickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006384 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
6385 return NULL;
6386
6387 /* Initialize the exceptions. */
6388 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
6389 if (PickleError == NULL)
6390 return NULL;
6391 PicklingError = \
6392 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
6393 if (PicklingError == NULL)
6394 return NULL;
6395 UnpicklingError = \
6396 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
6397 if (UnpicklingError == NULL)
6398 return NULL;
6399
6400 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
6401 return NULL;
6402 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
6403 return NULL;
6404 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
6405 return NULL;
6406
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006407 if (initmodule() < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006408 return NULL;
6409
6410 return m;
6411}