blob: 4389f72a936cbba6ec28453d0352109a95fd02e6 [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000013/* Pickle opcodes. These must be kept updated with pickle.py.
14 Extensive docs are in pickletools.py. */
15enum opcode {
16 MARK = '(',
17 STOP = '.',
18 POP = '0',
19 POP_MARK = '1',
20 DUP = '2',
21 FLOAT = 'F',
22 INT = 'I',
23 BININT = 'J',
24 BININT1 = 'K',
25 LONG = 'L',
26 BININT2 = 'M',
27 NONE = 'N',
28 PERSID = 'P',
29 BINPERSID = 'Q',
30 REDUCE = 'R',
31 STRING = 'S',
32 BINSTRING = 'T',
33 SHORT_BINSTRING = 'U',
34 UNICODE = 'V',
35 BINUNICODE = 'X',
36 APPEND = 'a',
37 BUILD = 'b',
38 GLOBAL = 'c',
39 DICT = 'd',
40 EMPTY_DICT = '}',
41 APPENDS = 'e',
42 GET = 'g',
43 BINGET = 'h',
44 INST = 'i',
45 LONG_BINGET = 'j',
46 LIST = 'l',
47 EMPTY_LIST = ']',
48 OBJ = 'o',
49 PUT = 'p',
50 BINPUT = 'q',
51 LONG_BINPUT = 'r',
52 SETITEM = 's',
53 TUPLE = 't',
54 EMPTY_TUPLE = ')',
55 SETITEMS = 'u',
56 BINFLOAT = 'G',
57
58 /* Protocol 2. */
59 PROTO = '\x80',
60 NEWOBJ = '\x81',
61 EXT1 = '\x82',
62 EXT2 = '\x83',
63 EXT4 = '\x84',
64 TUPLE1 = '\x85',
65 TUPLE2 = '\x86',
66 TUPLE3 = '\x87',
67 NEWTRUE = '\x88',
68 NEWFALSE = '\x89',
69 LONG1 = '\x8a',
70 LONG4 = '\x8b',
71
72 /* Protocol 3 (Python 3.x) */
73 BINBYTES = 'B',
Victor Stinner132ef6c2010-11-09 09:39:41 +000074 SHORT_BINBYTES = 'C'
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000075};
76
77/* These aren't opcodes -- they're ways to pickle bools before protocol 2
78 * so that unpicklers written before bools were introduced unpickle them
79 * as ints, but unpicklers after can recognize that bools were intended.
80 * Note that protocol 2 added direct ways to pickle bools.
81 */
82#undef TRUE
83#define TRUE "I01\n"
84#undef FALSE
85#define FALSE "I00\n"
86
87enum {
88 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
89 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
90 break if this gets out of synch with pickle.py, but it's unclear that would
91 help anything either. */
92 BATCHSIZE = 1000,
93
94 /* Nesting limit until Pickler, when running in "fast mode", starts
95 checking for self-referential data-structures. */
96 FAST_NESTING_LIMIT = 50,
97
Antoine Pitrouea99c5c2010-09-09 18:33:21 +000098 /* Initial size of the write buffer of Pickler. */
99 WRITE_BUF_SIZE = 4096,
100
101 /* Maximum size of the write buffer of Pickler when pickling to a
102 stream. This is ignored for in-memory pickling. */
103 MAX_WRITE_BUF_SIZE = 64 * 1024,
Antoine Pitrou04248a82010-10-12 20:51:21 +0000104
105 /* Prefetch size when unpickling (disabled on unpeekable streams) */
Victor Stinner132ef6c2010-11-09 09:39:41 +0000106 PREFETCH = 8192 * 16
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000107};
108
109/* Exception classes for pickle. These should override the ones defined in
110 pickle.py, when the C-optimized Pickler and Unpickler are used. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000111static PyObject *PickleError = NULL;
112static PyObject *PicklingError = NULL;
113static PyObject *UnpicklingError = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000114
115/* copyreg.dispatch_table, {type_object: pickling_function} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000116static PyObject *dispatch_table = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000117/* For EXT[124] opcodes. */
118/* copyreg._extension_registry, {(module_name, function_name): code} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000119static PyObject *extension_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000120/* copyreg._inverted_registry, {code: (module_name, function_name)} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000121static PyObject *inverted_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000122/* copyreg._extension_cache, {code: object} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000123static PyObject *extension_cache = NULL;
124
125/* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */
126static PyObject *name_mapping_2to3 = NULL;
127/* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
128static PyObject *import_mapping_2to3 = NULL;
129/* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
130static PyObject *name_mapping_3to2 = NULL;
131static PyObject *import_mapping_3to2 = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000132
133/* XXX: Are these really nescessary? */
134/* As the name says, an empty tuple. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000135static PyObject *empty_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000136/* For looking up name pairs in copyreg._extension_registry. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000137static PyObject *two_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000138
139static int
140stack_underflow(void)
141{
142 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
143 return -1;
144}
145
146/* Internal data type used as the unpickling stack. */
147typedef struct {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000148 PyObject_VAR_HEAD
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000149 PyObject **data;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000150 Py_ssize_t allocated; /* number of slots in data allocated */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000151} Pdata;
152
153static void
154Pdata_dealloc(Pdata *self)
155{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200156 Py_ssize_t i = Py_SIZE(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000157 while (--i >= 0) {
158 Py_DECREF(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000159 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000160 PyMem_FREE(self->data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000161 PyObject_Del(self);
162}
163
164static PyTypeObject Pdata_Type = {
165 PyVarObject_HEAD_INIT(NULL, 0)
166 "_pickle.Pdata", /*tp_name*/
167 sizeof(Pdata), /*tp_basicsize*/
168 0, /*tp_itemsize*/
169 (destructor)Pdata_dealloc, /*tp_dealloc*/
170};
171
172static PyObject *
173Pdata_New(void)
174{
175 Pdata *self;
176
177 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
178 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000179 Py_SIZE(self) = 0;
180 self->allocated = 8;
181 self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000182 if (self->data)
183 return (PyObject *)self;
184 Py_DECREF(self);
185 return PyErr_NoMemory();
186}
187
188
189/* Retain only the initial clearto items. If clearto >= the current
190 * number of items, this is a (non-erroneous) NOP.
191 */
192static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200193Pdata_clear(Pdata *self, Py_ssize_t clearto)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000194{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200195 Py_ssize_t i = Py_SIZE(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000196
197 if (clearto < 0)
198 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000199 if (clearto >= i)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000200 return 0;
201
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000202 while (--i >= clearto) {
203 Py_CLEAR(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000204 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000205 Py_SIZE(self) = clearto;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000206 return 0;
207}
208
209static int
210Pdata_grow(Pdata *self)
211{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000212 PyObject **data = self->data;
213 Py_ssize_t allocated = self->allocated;
214 Py_ssize_t new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000215
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000216 new_allocated = (allocated >> 3) + 6;
217 /* check for integer overflow */
218 if (new_allocated > PY_SSIZE_T_MAX - allocated)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000219 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000220 new_allocated += allocated;
221 if (new_allocated > (PY_SSIZE_T_MAX / sizeof(PyObject *)))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000222 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000223 data = PyMem_REALLOC(data, new_allocated * sizeof(PyObject *));
224 if (data == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000225 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000226
227 self->data = data;
228 self->allocated = new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000229 return 0;
230
231 nomemory:
232 PyErr_NoMemory();
233 return -1;
234}
235
236/* D is a Pdata*. Pop the topmost element and store it into V, which
237 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
238 * is raised and V is set to NULL.
239 */
240static PyObject *
241Pdata_pop(Pdata *self)
242{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000243 if (Py_SIZE(self) == 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000244 PyErr_SetString(UnpicklingError, "bad pickle data");
245 return NULL;
246 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000247 return self->data[--Py_SIZE(self)];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000248}
249#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
250
251static int
252Pdata_push(Pdata *self, PyObject *obj)
253{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000254 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000255 return -1;
256 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000257 self->data[Py_SIZE(self)++] = obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000258 return 0;
259}
260
261/* Push an object on stack, transferring its ownership to the stack. */
262#define PDATA_PUSH(D, O, ER) do { \
263 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
264
265/* Push an object on stack, adding a new reference to the object. */
266#define PDATA_APPEND(D, O, ER) do { \
267 Py_INCREF((O)); \
268 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
269
270static PyObject *
271Pdata_poptuple(Pdata *self, Py_ssize_t start)
272{
273 PyObject *tuple;
274 Py_ssize_t len, i, j;
275
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000276 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000277 tuple = PyTuple_New(len);
278 if (tuple == NULL)
279 return NULL;
280 for (i = start, j = 0; j < len; i++, j++)
281 PyTuple_SET_ITEM(tuple, j, self->data[i]);
282
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000283 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000284 return tuple;
285}
286
287static PyObject *
288Pdata_poplist(Pdata *self, Py_ssize_t start)
289{
290 PyObject *list;
291 Py_ssize_t len, i, j;
292
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000293 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000294 list = PyList_New(len);
295 if (list == NULL)
296 return NULL;
297 for (i = start, j = 0; j < len; i++, j++)
298 PyList_SET_ITEM(list, j, self->data[i]);
299
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000300 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000301 return list;
302}
303
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000304typedef struct {
305 PyObject *me_key;
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200306 Py_ssize_t me_value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000307} PyMemoEntry;
308
309typedef struct {
310 Py_ssize_t mt_mask;
311 Py_ssize_t mt_used;
312 Py_ssize_t mt_allocated;
313 PyMemoEntry *mt_table;
314} PyMemoTable;
315
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000316typedef struct PicklerObject {
317 PyObject_HEAD
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000318 PyMemoTable *memo; /* Memo table, keep track of the seen
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000319 objects to support self-referential objects
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000320 pickling. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000321 PyObject *pers_func; /* persistent_id() method, can be NULL */
322 PyObject *arg;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000323
324 PyObject *write; /* write() method of the output stream. */
325 PyObject *output_buffer; /* Write into a local bytearray buffer before
326 flushing to the stream. */
327 Py_ssize_t output_len; /* Length of output_buffer. */
328 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000329 int proto; /* Pickle protocol number, >= 0 */
330 int bin; /* Boolean, true if proto > 0 */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200331 Py_ssize_t buf_size; /* Size of the current buffered pickle data */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000332 int fast; /* Enable fast mode if set to a true value.
333 The fast mode disable the usage of memo,
334 therefore speeding the pickling process by
335 not generating superfluous PUT opcodes. It
336 should not be used if with self-referential
337 objects. */
338 int fast_nesting;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000339 int fix_imports; /* Indicate whether Pickler should fix
340 the name of globals for Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000341 PyObject *fast_memo;
342} PicklerObject;
343
344typedef struct UnpicklerObject {
345 PyObject_HEAD
346 Pdata *stack; /* Pickle data stack, store unpickled objects. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000347
348 /* The unpickler memo is just an array of PyObject *s. Using a dict
349 is unnecessary, since the keys are contiguous ints. */
350 PyObject **memo;
351 Py_ssize_t memo_size;
352
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000353 PyObject *arg;
354 PyObject *pers_func; /* persistent_load() method, can be NULL. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000355
356 Py_buffer buffer;
357 char *input_buffer;
358 char *input_line;
359 Py_ssize_t input_len;
360 Py_ssize_t next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000361 Py_ssize_t prefetched_idx; /* index of first prefetched byte */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000362 PyObject *read; /* read() method of the input stream. */
363 PyObject *readline; /* readline() method of the input stream. */
Antoine Pitrou04248a82010-10-12 20:51:21 +0000364 PyObject *peek; /* peek() method of the input stream, or NULL */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000365
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000366 char *encoding; /* Name of the encoding to be used for
367 decoding strings pickled using Python
368 2.x. The default value is "ASCII" */
369 char *errors; /* Name of errors handling scheme to used when
370 decoding strings. The default value is
371 "strict". */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200372 Py_ssize_t *marks; /* Mark stack, used for unpickling container
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000373 objects. */
374 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
375 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000376 int proto; /* Protocol of the pickle loaded. */
377 int fix_imports; /* Indicate whether Unpickler should fix
378 the name of globals pickled by Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000379} UnpicklerObject;
380
381/* Forward declarations */
382static int save(PicklerObject *, PyObject *, int);
383static int save_reduce(PicklerObject *, PyObject *, PyObject *);
384static PyTypeObject Pickler_Type;
385static PyTypeObject Unpickler_Type;
386
387
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000388/*************************************************************************
389 A custom hashtable mapping void* to longs. This is used by the pickler for
390 memoization. Using a custom hashtable rather than PyDict allows us to skip
391 a bunch of unnecessary object creation. This makes a huge performance
392 difference. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000393
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000394#define MT_MINSIZE 8
395#define PERTURB_SHIFT 5
396
397
398static PyMemoTable *
399PyMemoTable_New(void)
400{
401 PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
402 if (memo == NULL) {
403 PyErr_NoMemory();
404 return NULL;
405 }
406
407 memo->mt_used = 0;
408 memo->mt_allocated = MT_MINSIZE;
409 memo->mt_mask = MT_MINSIZE - 1;
410 memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
411 if (memo->mt_table == NULL) {
412 PyMem_FREE(memo);
413 PyErr_NoMemory();
414 return NULL;
415 }
416 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
417
418 return memo;
419}
420
421static PyMemoTable *
422PyMemoTable_Copy(PyMemoTable *self)
423{
424 Py_ssize_t i;
425 PyMemoTable *new = PyMemoTable_New();
426 if (new == NULL)
427 return NULL;
428
429 new->mt_used = self->mt_used;
430 new->mt_allocated = self->mt_allocated;
431 new->mt_mask = self->mt_mask;
432 /* The table we get from _New() is probably smaller than we wanted.
433 Free it and allocate one that's the right size. */
434 PyMem_FREE(new->mt_table);
435 new->mt_table = PyMem_MALLOC(self->mt_allocated * sizeof(PyMemoEntry));
436 if (new->mt_table == NULL) {
437 PyMem_FREE(new);
438 return NULL;
439 }
440 for (i = 0; i < self->mt_allocated; i++) {
441 Py_XINCREF(self->mt_table[i].me_key);
442 }
443 memcpy(new->mt_table, self->mt_table,
444 sizeof(PyMemoEntry) * self->mt_allocated);
445
446 return new;
447}
448
449static Py_ssize_t
450PyMemoTable_Size(PyMemoTable *self)
451{
452 return self->mt_used;
453}
454
455static int
456PyMemoTable_Clear(PyMemoTable *self)
457{
458 Py_ssize_t i = self->mt_allocated;
459
460 while (--i >= 0) {
461 Py_XDECREF(self->mt_table[i].me_key);
462 }
463 self->mt_used = 0;
464 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
465 return 0;
466}
467
468static void
469PyMemoTable_Del(PyMemoTable *self)
470{
471 if (self == NULL)
472 return;
473 PyMemoTable_Clear(self);
474
475 PyMem_FREE(self->mt_table);
476 PyMem_FREE(self);
477}
478
479/* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
480 can be considerably simpler than dictobject.c's lookdict(). */
481static PyMemoEntry *
482_PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
483{
484 size_t i;
485 size_t perturb;
486 size_t mask = (size_t)self->mt_mask;
487 PyMemoEntry *table = self->mt_table;
488 PyMemoEntry *entry;
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000489 Py_hash_t hash = (Py_hash_t)key >> 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000490
491 i = hash & mask;
492 entry = &table[i];
493 if (entry->me_key == NULL || entry->me_key == key)
494 return entry;
495
496 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
497 i = (i << 2) + i + perturb + 1;
498 entry = &table[i & mask];
499 if (entry->me_key == NULL || entry->me_key == key)
500 return entry;
501 }
502 assert(0); /* Never reached */
503 return NULL;
504}
505
506/* Returns -1 on failure, 0 on success. */
507static int
508_PyMemoTable_ResizeTable(PyMemoTable *self, Py_ssize_t min_size)
509{
510 PyMemoEntry *oldtable = NULL;
511 PyMemoEntry *oldentry, *newentry;
512 Py_ssize_t new_size = MT_MINSIZE;
513 Py_ssize_t to_process;
514
515 assert(min_size > 0);
516
517 /* Find the smallest valid table size >= min_size. */
518 while (new_size < min_size && new_size > 0)
519 new_size <<= 1;
520 if (new_size <= 0) {
521 PyErr_NoMemory();
522 return -1;
523 }
524 /* new_size needs to be a power of two. */
525 assert((new_size & (new_size - 1)) == 0);
526
527 /* Allocate new table. */
528 oldtable = self->mt_table;
529 self->mt_table = PyMem_MALLOC(new_size * sizeof(PyMemoEntry));
530 if (self->mt_table == NULL) {
531 PyMem_FREE(oldtable);
532 PyErr_NoMemory();
533 return -1;
534 }
535 self->mt_allocated = new_size;
536 self->mt_mask = new_size - 1;
537 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
538
539 /* Copy entries from the old table. */
540 to_process = self->mt_used;
541 for (oldentry = oldtable; to_process > 0; oldentry++) {
542 if (oldentry->me_key != NULL) {
543 to_process--;
544 /* newentry is a pointer to a chunk of the new
545 mt_table, so we're setting the key:value pair
546 in-place. */
547 newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
548 newentry->me_key = oldentry->me_key;
549 newentry->me_value = oldentry->me_value;
550 }
551 }
552
553 /* Deallocate the old table. */
554 PyMem_FREE(oldtable);
555 return 0;
556}
557
558/* Returns NULL on failure, a pointer to the value otherwise. */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200559static Py_ssize_t *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000560PyMemoTable_Get(PyMemoTable *self, PyObject *key)
561{
562 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
563 if (entry->me_key == NULL)
564 return NULL;
565 return &entry->me_value;
566}
567
568/* Returns -1 on failure, 0 on success. */
569static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200570PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000571{
572 PyMemoEntry *entry;
573
574 assert(key != NULL);
575
576 entry = _PyMemoTable_Lookup(self, key);
577 if (entry->me_key != NULL) {
578 entry->me_value = value;
579 return 0;
580 }
581 Py_INCREF(key);
582 entry->me_key = key;
583 entry->me_value = value;
584 self->mt_used++;
585
586 /* If we added a key, we can safely resize. Otherwise just return!
587 * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
588 *
589 * Quadrupling the size improves average table sparseness
590 * (reducing collisions) at the cost of some memory. It also halves
591 * the number of expensive resize operations in a growing memo table.
592 *
593 * Very large memo tables (over 50K items) use doubling instead.
594 * This may help applications with severe memory constraints.
595 */
596 if (!(self->mt_used * 3 >= (self->mt_mask + 1) * 2))
597 return 0;
598 return _PyMemoTable_ResizeTable(self,
599 (self->mt_used > 50000 ? 2 : 4) * self->mt_used);
600}
601
602#undef MT_MINSIZE
603#undef PERTURB_SHIFT
604
605/*************************************************************************/
606
607/* Helpers for creating the argument tuple passed to functions. This has the
608 performance advantage of calling PyTuple_New() only once.
609
610 XXX(avassalotti): Inline directly in _Pickler_FastCall() and
611 _Unpickler_FastCall(). */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000612#define ARG_TUP(self, obj) do { \
613 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
614 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
615 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
616 } \
617 else { \
618 Py_DECREF((obj)); \
619 } \
620 } while (0)
621
622#define FREE_ARG_TUP(self) do { \
623 if ((self)->arg->ob_refcnt > 1) \
624 Py_CLEAR((self)->arg); \
625 } while (0)
626
627/* A temporary cleaner API for fast single argument function call.
628
629 XXX: Does caching the argument tuple provides any real performance benefits?
630
631 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
632 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
633 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
634 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
635 (i.e, call PyTuple_New() and store the returned value in an array), to save
636 one second (wall clock time). Either ways, the loading time a pickle stream
637 large enough to generate this number of calls would be massively
638 overwhelmed by other factors, like I/O throughput, the GC traversal and
639 object allocation overhead. So, I really doubt these functions provide any
640 real benefits.
641
642 On the other hand, oprofile reports that pickle spends a lot of time in
643 these functions. But, that is probably more related to the function call
644 overhead, than the argument tuple allocation.
645
646 XXX: And, what is the reference behavior of these? Steal, borrow? At first
647 glance, it seems to steal the reference of 'arg' and borrow the reference
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000648 of 'func'. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000649static PyObject *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000650_Pickler_FastCall(PicklerObject *self, PyObject *func, PyObject *arg)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000651{
652 PyObject *result = NULL;
653
654 ARG_TUP(self, arg);
655 if (self->arg) {
656 result = PyObject_Call(func, self->arg, NULL);
657 FREE_ARG_TUP(self);
658 }
659 return result;
660}
661
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000662static int
663_Pickler_ClearBuffer(PicklerObject *self)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000664{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000665 Py_CLEAR(self->output_buffer);
666 self->output_buffer =
667 PyBytes_FromStringAndSize(NULL, self->max_output_len);
668 if (self->output_buffer == NULL)
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000669 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000670 self->output_len = 0;
671 return 0;
672}
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000673
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000674static PyObject *
675_Pickler_GetString(PicklerObject *self)
676{
677 PyObject *output_buffer = self->output_buffer;
678
679 assert(self->output_buffer != NULL);
680 self->output_buffer = NULL;
681 /* Resize down to exact size */
682 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
683 return NULL;
684 return output_buffer;
685}
686
687static int
688_Pickler_FlushToFile(PicklerObject *self)
689{
690 PyObject *output, *result;
691
692 assert(self->write != NULL);
693
694 output = _Pickler_GetString(self);
695 if (output == NULL)
696 return -1;
697
698 result = _Pickler_FastCall(self, self->write, output);
699 Py_XDECREF(result);
700 return (result == NULL) ? -1 : 0;
701}
702
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200703static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000704_Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t n)
705{
706 Py_ssize_t i, required;
707 char *buffer;
708
709 assert(s != NULL);
710
711 required = self->output_len + n;
712 if (required > self->max_output_len) {
713 if (self->write != NULL && required > MAX_WRITE_BUF_SIZE) {
714 /* XXX This reallocates a new buffer every time, which is a bit
715 wasteful. */
716 if (_Pickler_FlushToFile(self) < 0)
717 return -1;
718 if (_Pickler_ClearBuffer(self) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000719 return -1;
720 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000721 if (self->write != NULL && n > MAX_WRITE_BUF_SIZE) {
722 /* we already flushed above, so the buffer is empty */
723 PyObject *result;
724 /* XXX we could spare an intermediate copy and pass
725 a memoryview instead */
726 PyObject *output = PyBytes_FromStringAndSize(s, n);
727 if (s == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000728 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000729 result = _Pickler_FastCall(self, self->write, output);
730 Py_XDECREF(result);
731 return (result == NULL) ? -1 : 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000732 }
733 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000734 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
735 PyErr_NoMemory();
736 return -1;
737 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200738 self->max_output_len = (self->output_len + n) / 2 * 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000739 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
740 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000741 }
742 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000743 buffer = PyBytes_AS_STRING(self->output_buffer);
744 if (n < 8) {
745 /* This is faster than memcpy when the string is short. */
746 for (i = 0; i < n; i++) {
747 buffer[self->output_len + i] = s[i];
748 }
749 }
750 else {
751 memcpy(buffer + self->output_len, s, n);
752 }
753 self->output_len += n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000754 return n;
755}
756
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000757static PicklerObject *
758_Pickler_New(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000759{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000760 PicklerObject *self;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000761
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000762 self = PyObject_GC_New(PicklerObject, &Pickler_Type);
763 if (self == NULL)
764 return NULL;
765
766 self->pers_func = NULL;
767 self->arg = NULL;
768 self->write = NULL;
769 self->proto = 0;
770 self->bin = 0;
771 self->fast = 0;
772 self->fast_nesting = 0;
773 self->fix_imports = 0;
774 self->fast_memo = NULL;
775
776 self->memo = PyMemoTable_New();
777 if (self->memo == NULL) {
778 Py_DECREF(self);
779 return NULL;
780 }
781 self->max_output_len = WRITE_BUF_SIZE;
782 self->output_len = 0;
783 self->output_buffer = PyBytes_FromStringAndSize(NULL,
784 self->max_output_len);
785 if (self->output_buffer == NULL) {
786 Py_DECREF(self);
787 return NULL;
788 }
789 return self;
790}
791
792static int
793_Pickler_SetProtocol(PicklerObject *self, PyObject *proto_obj,
794 PyObject *fix_imports_obj)
795{
796 long proto = 0;
797 int fix_imports;
798
799 if (proto_obj == NULL || proto_obj == Py_None)
800 proto = DEFAULT_PROTOCOL;
801 else {
802 proto = PyLong_AsLong(proto_obj);
803 if (proto == -1 && PyErr_Occurred())
804 return -1;
805 }
806 if (proto < 0)
807 proto = HIGHEST_PROTOCOL;
808 if (proto > HIGHEST_PROTOCOL) {
809 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
810 HIGHEST_PROTOCOL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000811 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000812 }
813 fix_imports = PyObject_IsTrue(fix_imports_obj);
814 if (fix_imports == -1)
815 return -1;
816
817 self->proto = proto;
818 self->bin = proto > 0;
819 self->fix_imports = fix_imports && proto < 3;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000820
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000821 return 0;
822}
823
824/* Returns -1 (with an exception set) on failure, 0 on success. This may
825 be called once on a freshly created Pickler. */
826static int
827_Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
828{
829 assert(file != NULL);
830 self->write = PyObject_GetAttrString(file, "write");
831 if (self->write == NULL) {
832 if (PyErr_ExceptionMatches(PyExc_AttributeError))
833 PyErr_SetString(PyExc_TypeError,
834 "file must have a 'write' attribute");
835 return -1;
836 }
837
838 return 0;
839}
840
841/* See documentation for _Pickler_FastCall(). */
842static PyObject *
843_Unpickler_FastCall(UnpicklerObject *self, PyObject *func, PyObject *arg)
844{
845 PyObject *result = NULL;
846
847 ARG_TUP(self, arg);
848 if (self->arg) {
849 result = PyObject_Call(func, self->arg, NULL);
850 FREE_ARG_TUP(self);
851 }
852 return result;
853}
854
855/* Returns the size of the input on success, -1 on failure. This takes its
856 own reference to `input`. */
857static Py_ssize_t
858_Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
859{
860 if (self->buffer.buf != NULL)
861 PyBuffer_Release(&self->buffer);
862 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
863 return -1;
864 self->input_buffer = self->buffer.buf;
865 self->input_len = self->buffer.len;
866 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000867 self->prefetched_idx = self->input_len;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000868 return self->input_len;
869}
870
Antoine Pitrou04248a82010-10-12 20:51:21 +0000871static int
872_Unpickler_SkipConsumed(UnpicklerObject *self)
873{
874 Py_ssize_t consumed = self->next_read_idx - self->prefetched_idx;
875
876 if (consumed > 0) {
877 PyObject *r;
878 assert(self->peek); /* otherwise we did something wrong */
879 /* This makes an useless copy... */
880 r = PyObject_CallFunction(self->read, "n", consumed);
881 if (r == NULL)
882 return -1;
883 Py_DECREF(r);
884 self->prefetched_idx = self->next_read_idx;
885 }
886 return 0;
887}
888
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000889static const Py_ssize_t READ_WHOLE_LINE = -1;
890
891/* If reading from a file, we need to only pull the bytes we need, since there
892 may be multiple pickle objects arranged contiguously in the same input
893 buffer.
894
895 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
896 bytes from the input stream/buffer.
897
898 Update the unpickler's input buffer with the newly-read data. Returns -1 on
899 failure; on success, returns the number of bytes read from the file.
900
901 On success, self->input_len will be 0; this is intentional so that when
902 unpickling from a file, the "we've run out of data" code paths will trigger,
903 causing the Unpickler to go back to the file for more data. Use the returned
904 size to tell you how much data you can process. */
905static Py_ssize_t
906_Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
907{
908 PyObject *data;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000909 Py_ssize_t read_size, prefetched_size = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000910
911 assert(self->read != NULL);
Antoine Pitrou04248a82010-10-12 20:51:21 +0000912
913 if (_Unpickler_SkipConsumed(self) < 0)
914 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000915
916 if (n == READ_WHOLE_LINE)
917 data = PyObject_Call(self->readline, empty_tuple, NULL);
918 else {
919 PyObject *len = PyLong_FromSsize_t(n);
920 if (len == NULL)
921 return -1;
922 data = _Unpickler_FastCall(self, self->read, len);
923 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000924 if (data == NULL)
925 return -1;
926
Antoine Pitrou04248a82010-10-12 20:51:21 +0000927 /* Prefetch some data without advancing the file pointer, if possible */
928 if (self->peek) {
929 PyObject *len, *prefetched;
930 len = PyLong_FromSsize_t(PREFETCH);
931 if (len == NULL) {
932 Py_DECREF(data);
933 return -1;
934 }
935 prefetched = _Unpickler_FastCall(self, self->peek, len);
936 if (prefetched == NULL) {
937 if (PyErr_ExceptionMatches(PyExc_NotImplementedError)) {
938 /* peek() is probably not supported by the given file object */
939 PyErr_Clear();
940 Py_CLEAR(self->peek);
941 }
942 else {
943 Py_DECREF(data);
944 return -1;
945 }
946 }
947 else {
948 assert(PyBytes_Check(prefetched));
949 prefetched_size = PyBytes_GET_SIZE(prefetched);
950 PyBytes_ConcatAndDel(&data, prefetched);
951 if (data == NULL)
952 return -1;
953 }
954 }
955
956 read_size = _Unpickler_SetStringInput(self, data) - prefetched_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000957 Py_DECREF(data);
Antoine Pitrou04248a82010-10-12 20:51:21 +0000958 self->prefetched_idx = read_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000959 return read_size;
960}
961
962/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
963
964 This should be used for all data reads, rather than accessing the unpickler's
965 input buffer directly. This method deals correctly with reading from input
966 streams, which the input buffer doesn't deal with.
967
968 Note that when reading from a file-like object, self->next_read_idx won't
969 be updated (it should remain at 0 for the entire unpickling process). You
970 should use this function's return value to know how many bytes you can
971 consume.
972
973 Returns -1 (with an exception set) on failure. On success, return the
974 number of chars read. */
975static Py_ssize_t
976_Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n)
977{
Antoine Pitrou04248a82010-10-12 20:51:21 +0000978 Py_ssize_t num_read;
979
Antoine Pitrou04248a82010-10-12 20:51:21 +0000980 if (self->next_read_idx + n <= self->input_len) {
981 *s = self->input_buffer + self->next_read_idx;
982 self->next_read_idx += n;
983 return n;
984 }
985 if (!self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000986 PyErr_Format(PyExc_EOFError, "Ran out of input");
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000987 return -1;
988 }
Antoine Pitrou04248a82010-10-12 20:51:21 +0000989 num_read = _Unpickler_ReadFromFile(self, n);
990 if (num_read < 0)
991 return -1;
992 if (num_read < n) {
993 PyErr_Format(PyExc_EOFError, "Ran out of input");
994 return -1;
995 }
996 *s = self->input_buffer;
997 self->next_read_idx = n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000998 return n;
999}
1000
1001static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001002_Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1003 char **result)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001004{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001005 char *input_line = PyMem_Realloc(self->input_line, len + 1);
1006 if (input_line == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001007 return -1;
1008
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001009 memcpy(input_line, line, len);
1010 input_line[len] = '\0';
1011 self->input_line = input_line;
1012 *result = self->input_line;
1013 return len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001014}
1015
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001016/* Read a line from the input stream/buffer. If we run off the end of the input
1017 before hitting \n, return the data we found.
1018
1019 Returns the number of chars read, or -1 on failure. */
1020static Py_ssize_t
1021_Unpickler_Readline(UnpicklerObject *self, char **result)
1022{
1023 Py_ssize_t i, num_read;
1024
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001025 for (i = self->next_read_idx; i < self->input_len; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001026 if (self->input_buffer[i] == '\n') {
1027 char *line_start = self->input_buffer + self->next_read_idx;
1028 num_read = i - self->next_read_idx + 1;
1029 self->next_read_idx = i + 1;
1030 return _Unpickler_CopyLine(self, line_start, num_read, result);
1031 }
1032 }
1033 if (self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001034 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1035 if (num_read < 0)
1036 return -1;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001037 self->next_read_idx = num_read;
Antoine Pitrouf6c7a852011-08-11 21:04:02 +02001038 return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001039 }
1040
1041 /* If we get here, we've run off the end of the input string. Return the
1042 remaining string and let the caller figure it out. */
1043 *result = self->input_buffer + self->next_read_idx;
1044 num_read = i - self->next_read_idx;
1045 self->next_read_idx = i;
1046 return num_read;
1047}
1048
1049/* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1050 will be modified in place. */
1051static int
1052_Unpickler_ResizeMemoList(UnpicklerObject *self, Py_ssize_t new_size)
1053{
1054 Py_ssize_t i;
1055 PyObject **memo;
1056
1057 assert(new_size > self->memo_size);
1058
1059 memo = PyMem_REALLOC(self->memo, new_size * sizeof(PyObject *));
1060 if (memo == NULL) {
1061 PyErr_NoMemory();
1062 return -1;
1063 }
1064 self->memo = memo;
1065 for (i = self->memo_size; i < new_size; i++)
1066 self->memo[i] = NULL;
1067 self->memo_size = new_size;
1068 return 0;
1069}
1070
1071/* Returns NULL if idx is out of bounds. */
1072static PyObject *
1073_Unpickler_MemoGet(UnpicklerObject *self, Py_ssize_t idx)
1074{
1075 if (idx < 0 || idx >= self->memo_size)
1076 return NULL;
1077
1078 return self->memo[idx];
1079}
1080
1081/* Returns -1 (with an exception set) on failure, 0 on success.
1082 This takes its own reference to `value`. */
1083static int
1084_Unpickler_MemoPut(UnpicklerObject *self, Py_ssize_t idx, PyObject *value)
1085{
1086 PyObject *old_item;
1087
1088 if (idx >= self->memo_size) {
1089 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1090 return -1;
1091 assert(idx < self->memo_size);
1092 }
1093 Py_INCREF(value);
1094 old_item = self->memo[idx];
1095 self->memo[idx] = value;
1096 Py_XDECREF(old_item);
1097 return 0;
1098}
1099
1100static PyObject **
1101_Unpickler_NewMemo(Py_ssize_t new_size)
1102{
1103 PyObject **memo = PyMem_MALLOC(new_size * sizeof(PyObject *));
1104 if (memo == NULL)
1105 return NULL;
1106 memset(memo, 0, new_size * sizeof(PyObject *));
1107 return memo;
1108}
1109
1110/* Free the unpickler's memo, taking care to decref any items left in it. */
1111static void
1112_Unpickler_MemoCleanup(UnpicklerObject *self)
1113{
1114 Py_ssize_t i;
1115 PyObject **memo = self->memo;
1116
1117 if (self->memo == NULL)
1118 return;
1119 self->memo = NULL;
1120 i = self->memo_size;
1121 while (--i >= 0) {
1122 Py_XDECREF(memo[i]);
1123 }
1124 PyMem_FREE(memo);
1125}
1126
1127static UnpicklerObject *
1128_Unpickler_New(void)
1129{
1130 UnpicklerObject *self;
1131
1132 self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1133 if (self == NULL)
1134 return NULL;
1135
1136 self->stack = (Pdata *)Pdata_New();
1137 if (self->stack == NULL) {
1138 Py_DECREF(self);
1139 return NULL;
1140 }
1141 memset(&self->buffer, 0, sizeof(Py_buffer));
1142
1143 self->memo_size = 32;
1144 self->memo = _Unpickler_NewMemo(self->memo_size);
1145 if (self->memo == NULL) {
1146 Py_DECREF(self);
1147 return NULL;
1148 }
1149
1150 self->arg = NULL;
1151 self->pers_func = NULL;
1152 self->input_buffer = NULL;
1153 self->input_line = NULL;
1154 self->input_len = 0;
1155 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001156 self->prefetched_idx = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001157 self->read = NULL;
1158 self->readline = NULL;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001159 self->peek = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001160 self->encoding = NULL;
1161 self->errors = NULL;
1162 self->marks = NULL;
1163 self->num_marks = 0;
1164 self->marks_size = 0;
1165 self->proto = 0;
1166 self->fix_imports = 0;
1167
1168 return self;
1169}
1170
1171/* Returns -1 (with an exception set) on failure, 0 on success. This may
1172 be called once on a freshly created Pickler. */
1173static int
1174_Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1175{
Antoine Pitrou04248a82010-10-12 20:51:21 +00001176 self->peek = PyObject_GetAttrString(file, "peek");
1177 if (self->peek == NULL) {
1178 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1179 PyErr_Clear();
1180 else
1181 return -1;
1182 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001183 self->read = PyObject_GetAttrString(file, "read");
1184 self->readline = PyObject_GetAttrString(file, "readline");
1185 if (self->readline == NULL || self->read == NULL) {
1186 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1187 PyErr_SetString(PyExc_TypeError,
1188 "file must have 'read' and 'readline' attributes");
1189 Py_CLEAR(self->read);
1190 Py_CLEAR(self->readline);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001191 Py_CLEAR(self->peek);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001192 return -1;
1193 }
1194 return 0;
1195}
1196
1197/* Returns -1 (with an exception set) on failure, 0 on success. This may
1198 be called once on a freshly created Pickler. */
1199static int
1200_Unpickler_SetInputEncoding(UnpicklerObject *self,
1201 const char *encoding,
1202 const char *errors)
1203{
1204 if (encoding == NULL)
1205 encoding = "ASCII";
1206 if (errors == NULL)
1207 errors = "strict";
1208
1209 self->encoding = strdup(encoding);
1210 self->errors = strdup(errors);
1211 if (self->encoding == NULL || self->errors == NULL) {
1212 PyErr_NoMemory();
1213 return -1;
1214 }
1215 return 0;
1216}
1217
1218/* Generate a GET opcode for an object stored in the memo. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001219static int
1220memo_get(PicklerObject *self, PyObject *key)
1221{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001222 Py_ssize_t *value;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001223 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001224 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001225
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001226 value = PyMemoTable_Get(self->memo, key);
1227 if (value == NULL) {
1228 PyErr_SetObject(PyExc_KeyError, key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001229 return -1;
1230 }
1231
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001232 if (!self->bin) {
1233 pdata[0] = GET;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001234 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1235 "%" PY_FORMAT_SIZE_T "d\n", *value);
1236 len = strlen(pdata);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001237 }
1238 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001239 if (*value < 256) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001240 pdata[0] = BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001241 pdata[1] = (unsigned char)(*value & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001242 len = 2;
1243 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001244 else if (*value <= 0xffffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001245 pdata[0] = LONG_BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001246 pdata[1] = (unsigned char)(*value & 0xff);
1247 pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1248 pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1249 pdata[4] = (unsigned char)((*value >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001250 len = 5;
1251 }
1252 else { /* unlikely */
1253 PyErr_SetString(PicklingError,
1254 "memo id too large for LONG_BINGET");
1255 return -1;
1256 }
1257 }
1258
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001259 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001260 return -1;
1261
1262 return 0;
1263}
1264
1265/* Store an object in the memo, assign it a new unique ID based on the number
1266 of objects currently stored in the memo and generate a PUT opcode. */
1267static int
1268memo_put(PicklerObject *self, PyObject *obj)
1269{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001270 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001271 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001272 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001273 int status = 0;
1274
1275 if (self->fast)
1276 return 0;
1277
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001278 x = PyMemoTable_Size(self->memo);
1279 if (PyMemoTable_Set(self->memo, obj, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001280 goto error;
1281
1282 if (!self->bin) {
1283 pdata[0] = PUT;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001284 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1285 "%" PY_FORMAT_SIZE_T "d\n", x);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001286 len = strlen(pdata);
1287 }
1288 else {
1289 if (x < 256) {
1290 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00001291 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001292 len = 2;
1293 }
1294 else if (x <= 0xffffffffL) {
1295 pdata[0] = LONG_BINPUT;
1296 pdata[1] = (unsigned char)(x & 0xff);
1297 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1298 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1299 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1300 len = 5;
1301 }
1302 else { /* unlikely */
1303 PyErr_SetString(PicklingError,
1304 "memo id too large for LONG_BINPUT");
1305 return -1;
1306 }
1307 }
1308
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001309 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001310 goto error;
1311
1312 if (0) {
1313 error:
1314 status = -1;
1315 }
1316
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001317 return status;
1318}
1319
1320static PyObject *
1321whichmodule(PyObject *global, PyObject *global_name)
1322{
1323 Py_ssize_t i, j;
1324 static PyObject *module_str = NULL;
1325 static PyObject *main_str = NULL;
1326 PyObject *module_name;
1327 PyObject *modules_dict;
1328 PyObject *module;
1329 PyObject *obj;
1330
1331 if (module_str == NULL) {
1332 module_str = PyUnicode_InternFromString("__module__");
1333 if (module_str == NULL)
1334 return NULL;
1335 main_str = PyUnicode_InternFromString("__main__");
1336 if (main_str == NULL)
1337 return NULL;
1338 }
1339
1340 module_name = PyObject_GetAttr(global, module_str);
1341
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00001342 /* In some rare cases (e.g., bound methods of extension types),
1343 __module__ can be None. If it is so, then search sys.modules
1344 for the module of global. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001345 if (module_name == Py_None) {
1346 Py_DECREF(module_name);
1347 goto search;
1348 }
1349
1350 if (module_name) {
1351 return module_name;
1352 }
1353 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1354 PyErr_Clear();
1355 else
1356 return NULL;
1357
1358 search:
1359 modules_dict = PySys_GetObject("modules");
1360 if (modules_dict == NULL)
1361 return NULL;
1362
1363 i = 0;
1364 module_name = NULL;
1365 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
Mark Dickinson211c6252009-02-01 10:28:51 +00001366 if (PyObject_RichCompareBool(module_name, main_str, Py_EQ) == 1)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001367 continue;
1368
1369 obj = PyObject_GetAttr(module, global_name);
1370 if (obj == NULL) {
1371 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1372 PyErr_Clear();
1373 else
1374 return NULL;
1375 continue;
1376 }
1377
1378 if (obj != global) {
1379 Py_DECREF(obj);
1380 continue;
1381 }
1382
1383 Py_DECREF(obj);
1384 break;
1385 }
1386
1387 /* If no module is found, use __main__. */
1388 if (!j) {
1389 module_name = main_str;
1390 }
1391
1392 Py_INCREF(module_name);
1393 return module_name;
1394}
1395
1396/* fast_save_enter() and fast_save_leave() are guards against recursive
1397 objects when Pickler is used with the "fast mode" (i.e., with object
1398 memoization disabled). If the nesting of a list or dict object exceed
1399 FAST_NESTING_LIMIT, these guards will start keeping an internal
1400 reference to the seen list or dict objects and check whether these objects
1401 are recursive. These are not strictly necessary, since save() has a
1402 hard-coded recursion limit, but they give a nicer error message than the
1403 typical RuntimeError. */
1404static int
1405fast_save_enter(PicklerObject *self, PyObject *obj)
1406{
1407 /* if fast_nesting < 0, we're doing an error exit. */
1408 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1409 PyObject *key = NULL;
1410 if (self->fast_memo == NULL) {
1411 self->fast_memo = PyDict_New();
1412 if (self->fast_memo == NULL) {
1413 self->fast_nesting = -1;
1414 return 0;
1415 }
1416 }
1417 key = PyLong_FromVoidPtr(obj);
1418 if (key == NULL)
1419 return 0;
1420 if (PyDict_GetItem(self->fast_memo, key)) {
1421 Py_DECREF(key);
1422 PyErr_Format(PyExc_ValueError,
1423 "fast mode: can't pickle cyclic objects "
1424 "including object type %.200s at %p",
1425 obj->ob_type->tp_name, obj);
1426 self->fast_nesting = -1;
1427 return 0;
1428 }
1429 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
1430 Py_DECREF(key);
1431 self->fast_nesting = -1;
1432 return 0;
1433 }
1434 Py_DECREF(key);
1435 }
1436 return 1;
1437}
1438
1439static int
1440fast_save_leave(PicklerObject *self, PyObject *obj)
1441{
1442 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
1443 PyObject *key = PyLong_FromVoidPtr(obj);
1444 if (key == NULL)
1445 return 0;
1446 if (PyDict_DelItem(self->fast_memo, key) < 0) {
1447 Py_DECREF(key);
1448 return 0;
1449 }
1450 Py_DECREF(key);
1451 }
1452 return 1;
1453}
1454
1455static int
1456save_none(PicklerObject *self, PyObject *obj)
1457{
1458 const char none_op = NONE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001459 if (_Pickler_Write(self, &none_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001460 return -1;
1461
1462 return 0;
1463}
1464
1465static int
1466save_bool(PicklerObject *self, PyObject *obj)
1467{
1468 static const char *buf[2] = { FALSE, TRUE };
1469 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
1470 int p = (obj == Py_True);
1471
1472 if (self->proto >= 2) {
1473 const char bool_op = p ? NEWTRUE : NEWFALSE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001474 if (_Pickler_Write(self, &bool_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001475 return -1;
1476 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001477 else if (_Pickler_Write(self, buf[p], len[p]) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001478 return -1;
1479
1480 return 0;
1481}
1482
1483static int
1484save_int(PicklerObject *self, long x)
1485{
1486 char pdata[32];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001487 Py_ssize_t len = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001488
1489 if (!self->bin
1490#if SIZEOF_LONG > 4
1491 || x > 0x7fffffffL || x < -0x80000000L
1492#endif
1493 ) {
1494 /* Text-mode pickle, or long too big to fit in the 4-byte
1495 * signed BININT format: store as a string.
1496 */
Mark Dickinson8dd05142009-01-20 20:43:58 +00001497 pdata[0] = LONG; /* use LONG for consistency with pickle.py */
1498 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ldL\n", x);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001499 if (_Pickler_Write(self, pdata, strlen(pdata)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001500 return -1;
1501 }
1502 else {
1503 /* Binary pickle and x fits in a signed 4-byte int. */
1504 pdata[1] = (unsigned char)(x & 0xff);
1505 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1506 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1507 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1508
1509 if ((pdata[4] == 0) && (pdata[3] == 0)) {
1510 if (pdata[2] == 0) {
1511 pdata[0] = BININT1;
1512 len = 2;
1513 }
1514 else {
1515 pdata[0] = BININT2;
1516 len = 3;
1517 }
1518 }
1519 else {
1520 pdata[0] = BININT;
1521 len = 5;
1522 }
1523
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001524 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001525 return -1;
1526 }
1527
1528 return 0;
1529}
1530
1531static int
1532save_long(PicklerObject *self, PyObject *obj)
1533{
1534 PyObject *repr = NULL;
1535 Py_ssize_t size;
1536 long val = PyLong_AsLong(obj);
1537 int status = 0;
1538
1539 const char long_op = LONG;
1540
1541 if (val == -1 && PyErr_Occurred()) {
1542 /* out of range for int pickling */
1543 PyErr_Clear();
1544 }
Antoine Pitroue58bffb2011-08-13 20:40:32 +02001545 else
1546#if SIZEOF_LONG > 4
1547 if (val <= 0x7fffffffL && val >= -0x80000000L)
1548#endif
1549 return save_int(self, val);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001550
1551 if (self->proto >= 2) {
1552 /* Linear-time pickling. */
1553 size_t nbits;
1554 size_t nbytes;
1555 unsigned char *pdata;
1556 char header[5];
1557 int i;
1558 int sign = _PyLong_Sign(obj);
1559
1560 if (sign == 0) {
1561 header[0] = LONG1;
1562 header[1] = 0; /* It's 0 -- an empty bytestring. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001563 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001564 goto error;
1565 return 0;
1566 }
1567 nbits = _PyLong_NumBits(obj);
1568 if (nbits == (size_t)-1 && PyErr_Occurred())
1569 goto error;
1570 /* How many bytes do we need? There are nbits >> 3 full
1571 * bytes of data, and nbits & 7 leftover bits. If there
1572 * are any leftover bits, then we clearly need another
1573 * byte. Wnat's not so obvious is that we *probably*
1574 * need another byte even if there aren't any leftovers:
1575 * the most-significant bit of the most-significant byte
1576 * acts like a sign bit, and it's usually got a sense
1577 * opposite of the one we need. The exception is longs
1578 * of the form -(2**(8*j-1)) for j > 0. Such a long is
1579 * its own 256's-complement, so has the right sign bit
1580 * even without the extra byte. That's a pain to check
1581 * for in advance, though, so we always grab an extra
1582 * byte at the start, and cut it back later if possible.
1583 */
1584 nbytes = (nbits >> 3) + 1;
1585 if (nbytes > INT_MAX) {
1586 PyErr_SetString(PyExc_OverflowError,
1587 "long too large to pickle");
1588 goto error;
1589 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001590 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001591 if (repr == NULL)
1592 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001593 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001594 i = _PyLong_AsByteArray((PyLongObject *)obj,
1595 pdata, nbytes,
1596 1 /* little endian */ , 1 /* signed */ );
1597 if (i < 0)
1598 goto error;
1599 /* If the long is negative, this may be a byte more than
1600 * needed. This is so iff the MSB is all redundant sign
1601 * bits.
1602 */
1603 if (sign < 0 &&
1604 nbytes > 1 &&
1605 pdata[nbytes - 1] == 0xff &&
1606 (pdata[nbytes - 2] & 0x80) != 0) {
1607 nbytes--;
1608 }
1609
1610 if (nbytes < 256) {
1611 header[0] = LONG1;
1612 header[1] = (unsigned char)nbytes;
1613 size = 2;
1614 }
1615 else {
1616 header[0] = LONG4;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001617 size = (Py_ssize_t) nbytes;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001618 for (i = 1; i < 5; i++) {
1619 header[i] = (unsigned char)(size & 0xff);
1620 size >>= 8;
1621 }
1622 size = 5;
1623 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001624 if (_Pickler_Write(self, header, size) < 0 ||
1625 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001626 goto error;
1627 }
1628 else {
1629 char *string;
1630
Mark Dickinson8dd05142009-01-20 20:43:58 +00001631 /* proto < 2: write the repr and newline. This is quadratic-time (in
1632 the number of digits), in both directions. We add a trailing 'L'
1633 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001634
1635 repr = PyObject_Repr(obj);
1636 if (repr == NULL)
1637 goto error;
1638
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001639 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001640 if (string == NULL)
1641 goto error;
1642
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001643 if (_Pickler_Write(self, &long_op, 1) < 0 ||
1644 _Pickler_Write(self, string, size) < 0 ||
1645 _Pickler_Write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001646 goto error;
1647 }
1648
1649 if (0) {
1650 error:
1651 status = -1;
1652 }
1653 Py_XDECREF(repr);
1654
1655 return status;
1656}
1657
1658static int
1659save_float(PicklerObject *self, PyObject *obj)
1660{
1661 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1662
1663 if (self->bin) {
1664 char pdata[9];
1665 pdata[0] = BINFLOAT;
1666 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1667 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001668 if (_Pickler_Write(self, pdata, 9) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001669 return -1;
Eric Smith0923d1d2009-04-16 20:16:10 +00001670 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001671 else {
Eric Smith0923d1d2009-04-16 20:16:10 +00001672 int result = -1;
1673 char *buf = NULL;
1674 char op = FLOAT;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001675
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001676 if (_Pickler_Write(self, &op, 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001677 goto done;
1678
Mark Dickinson3e09f432009-04-17 08:41:23 +00001679 buf = PyOS_double_to_string(x, 'g', 17, 0, NULL);
Eric Smith0923d1d2009-04-16 20:16:10 +00001680 if (!buf) {
1681 PyErr_NoMemory();
1682 goto done;
1683 }
1684
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001685 if (_Pickler_Write(self, buf, strlen(buf)) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001686 goto done;
1687
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001688 if (_Pickler_Write(self, "\n", 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001689 goto done;
1690
1691 result = 0;
1692done:
1693 PyMem_Free(buf);
1694 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001695 }
1696
1697 return 0;
1698}
1699
1700static int
1701save_bytes(PicklerObject *self, PyObject *obj)
1702{
1703 if (self->proto < 3) {
1704 /* Older pickle protocols do not have an opcode for pickling bytes
1705 objects. Therefore, we need to fake the copy protocol (i.e.,
1706 the __reduce__ method) to permit bytes object unpickling. */
1707 PyObject *reduce_value = NULL;
1708 PyObject *bytelist = NULL;
1709 int status;
1710
1711 bytelist = PySequence_List(obj);
1712 if (bytelist == NULL)
1713 return -1;
1714
1715 reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
1716 bytelist);
1717 if (reduce_value == NULL) {
1718 Py_DECREF(bytelist);
1719 return -1;
1720 }
1721
1722 /* save_reduce() will memoize the object automatically. */
1723 status = save_reduce(self, reduce_value, obj);
1724 Py_DECREF(reduce_value);
1725 Py_DECREF(bytelist);
1726 return status;
1727 }
1728 else {
1729 Py_ssize_t size;
1730 char header[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001731 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001732
1733 size = PyBytes_Size(obj);
1734 if (size < 0)
1735 return -1;
1736
1737 if (size < 256) {
1738 header[0] = SHORT_BINBYTES;
1739 header[1] = (unsigned char)size;
1740 len = 2;
1741 }
1742 else if (size <= 0xffffffffL) {
1743 header[0] = BINBYTES;
1744 header[1] = (unsigned char)(size & 0xff);
1745 header[2] = (unsigned char)((size >> 8) & 0xff);
1746 header[3] = (unsigned char)((size >> 16) & 0xff);
1747 header[4] = (unsigned char)((size >> 24) & 0xff);
1748 len = 5;
1749 }
1750 else {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001751 PyErr_SetString(PyExc_OverflowError,
1752 "cannot serialize a bytes object larger than 4GB");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001753 return -1; /* string too large */
1754 }
1755
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001756 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001757 return -1;
1758
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001759 if (_Pickler_Write(self, PyBytes_AS_STRING(obj), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001760 return -1;
1761
1762 if (memo_put(self, obj) < 0)
1763 return -1;
1764
1765 return 0;
1766 }
1767}
1768
1769/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1770 backslash and newline characters to \uXXXX escapes. */
1771static PyObject *
1772raw_unicode_escape(const Py_UNICODE *s, Py_ssize_t size)
1773{
1774 PyObject *repr, *result;
1775 char *p;
1776 char *q;
1777
1778 static const char *hexdigits = "0123456789abcdef";
1779
1780#ifdef Py_UNICODE_WIDE
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001781 const Py_ssize_t expandsize = 10;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001782#else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001783 const Py_ssize_t expandsize = 6;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001784#endif
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001785
1786 if (size > PY_SSIZE_T_MAX / expandsize)
1787 return PyErr_NoMemory();
1788
1789 repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001790 if (repr == NULL)
1791 return NULL;
1792 if (size == 0)
1793 goto done;
1794
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001795 p = q = PyByteArray_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001796 while (size-- > 0) {
1797 Py_UNICODE ch = *s++;
1798#ifdef Py_UNICODE_WIDE
1799 /* Map 32-bit characters to '\Uxxxxxxxx' */
1800 if (ch >= 0x10000) {
1801 *p++ = '\\';
1802 *p++ = 'U';
1803 *p++ = hexdigits[(ch >> 28) & 0xf];
1804 *p++ = hexdigits[(ch >> 24) & 0xf];
1805 *p++ = hexdigits[(ch >> 20) & 0xf];
1806 *p++ = hexdigits[(ch >> 16) & 0xf];
1807 *p++ = hexdigits[(ch >> 12) & 0xf];
1808 *p++ = hexdigits[(ch >> 8) & 0xf];
1809 *p++ = hexdigits[(ch >> 4) & 0xf];
1810 *p++ = hexdigits[ch & 15];
1811 }
1812 else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001813#else
1814 /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
1815 if (ch >= 0xD800 && ch < 0xDC00) {
1816 Py_UNICODE ch2;
1817 Py_UCS4 ucs;
1818
1819 ch2 = *s++;
1820 size--;
1821 if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
1822 ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
1823 *p++ = '\\';
1824 *p++ = 'U';
1825 *p++ = hexdigits[(ucs >> 28) & 0xf];
1826 *p++ = hexdigits[(ucs >> 24) & 0xf];
1827 *p++ = hexdigits[(ucs >> 20) & 0xf];
1828 *p++ = hexdigits[(ucs >> 16) & 0xf];
1829 *p++ = hexdigits[(ucs >> 12) & 0xf];
1830 *p++ = hexdigits[(ucs >> 8) & 0xf];
1831 *p++ = hexdigits[(ucs >> 4) & 0xf];
1832 *p++ = hexdigits[ucs & 0xf];
1833 continue;
1834 }
1835 /* Fall through: isolated surrogates are copied as-is */
1836 s--;
1837 size++;
1838 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001839#endif
1840 /* Map 16-bit characters to '\uxxxx' */
1841 if (ch >= 256 || ch == '\\' || ch == '\n') {
1842 *p++ = '\\';
1843 *p++ = 'u';
1844 *p++ = hexdigits[(ch >> 12) & 0xf];
1845 *p++ = hexdigits[(ch >> 8) & 0xf];
1846 *p++ = hexdigits[(ch >> 4) & 0xf];
1847 *p++ = hexdigits[ch & 15];
1848 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001849 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001850 else
1851 *p++ = (char) ch;
1852 }
1853 size = p - q;
1854
1855 done:
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001856 result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001857 Py_DECREF(repr);
1858 return result;
1859}
1860
1861static int
1862save_unicode(PicklerObject *self, PyObject *obj)
1863{
1864 Py_ssize_t size;
1865 PyObject *encoded = NULL;
1866
1867 if (self->bin) {
1868 char pdata[5];
1869
Victor Stinner485fb562010-04-13 11:07:24 +00001870 encoded = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(obj),
1871 PyUnicode_GET_SIZE(obj),
1872 "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001873 if (encoded == NULL)
1874 goto error;
1875
1876 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001877 if (size > 0xffffffffL) {
1878 PyErr_SetString(PyExc_OverflowError,
1879 "cannot serialize a string larger than 4GB");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001880 goto error; /* string too large */
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001881 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001882
1883 pdata[0] = BINUNICODE;
1884 pdata[1] = (unsigned char)(size & 0xff);
1885 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1886 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1887 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1888
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001889 if (_Pickler_Write(self, pdata, 5) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001890 goto error;
1891
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001892 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001893 goto error;
1894 }
1895 else {
1896 const char unicode_op = UNICODE;
1897
1898 encoded = raw_unicode_escape(PyUnicode_AS_UNICODE(obj),
1899 PyUnicode_GET_SIZE(obj));
1900 if (encoded == NULL)
1901 goto error;
1902
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001903 if (_Pickler_Write(self, &unicode_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001904 goto error;
1905
1906 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001907 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001908 goto error;
1909
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001910 if (_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001911 goto error;
1912 }
1913 if (memo_put(self, obj) < 0)
1914 goto error;
1915
1916 Py_DECREF(encoded);
1917 return 0;
1918
1919 error:
1920 Py_XDECREF(encoded);
1921 return -1;
1922}
1923
1924/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1925static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001926store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001927{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001928 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001929
1930 assert(PyTuple_Size(t) == len);
1931
1932 for (i = 0; i < len; i++) {
1933 PyObject *element = PyTuple_GET_ITEM(t, i);
1934
1935 if (element == NULL)
1936 return -1;
1937 if (save(self, element, 0) < 0)
1938 return -1;
1939 }
1940
1941 return 0;
1942}
1943
1944/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1945 * used across protocols to minimize the space needed to pickle them.
1946 * Tuples are also the only builtin immutable type that can be recursive
1947 * (a tuple can be reached from itself), and that requires some subtle
1948 * magic so that it works in all cases. IOW, this is a long routine.
1949 */
1950static int
1951save_tuple(PicklerObject *self, PyObject *obj)
1952{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001953 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001954
1955 const char mark_op = MARK;
1956 const char tuple_op = TUPLE;
1957 const char pop_op = POP;
1958 const char pop_mark_op = POP_MARK;
1959 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
1960
1961 if ((len = PyTuple_Size(obj)) < 0)
1962 return -1;
1963
1964 if (len == 0) {
1965 char pdata[2];
1966
1967 if (self->proto) {
1968 pdata[0] = EMPTY_TUPLE;
1969 len = 1;
1970 }
1971 else {
1972 pdata[0] = MARK;
1973 pdata[1] = TUPLE;
1974 len = 2;
1975 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001976 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001977 return -1;
1978 return 0;
1979 }
1980
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001981 /* The tuple isn't in the memo now. If it shows up there after
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001982 * saving the tuple elements, the tuple must be recursive, in
1983 * which case we'll pop everything we put on the stack, and fetch
1984 * its value from the memo.
1985 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001986 if (len <= 3 && self->proto >= 2) {
1987 /* Use TUPLE{1,2,3} opcodes. */
1988 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001989 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001990
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001991 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001992 /* pop the len elements */
1993 for (i = 0; i < len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001994 if (_Pickler_Write(self, &pop_op, 1) < 0)
1995 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001996 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001997 if (memo_get(self, obj) < 0)
1998 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001999
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002000 return 0;
2001 }
2002 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002003 if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2004 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002005 }
2006 goto memoize;
2007 }
2008
2009 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2010 * Generate MARK e1 e2 ... TUPLE
2011 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002012 if (_Pickler_Write(self, &mark_op, 1) < 0)
2013 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002014
2015 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002016 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002017
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002018 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002019 /* pop the stack stuff we pushed */
2020 if (self->bin) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002021 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2022 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002023 }
2024 else {
2025 /* Note that we pop one more than len, to remove
2026 * the MARK too.
2027 */
2028 for (i = 0; i <= len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002029 if (_Pickler_Write(self, &pop_op, 1) < 0)
2030 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002031 }
2032 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002033 if (memo_get(self, obj) < 0)
2034 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002035
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002036 return 0;
2037 }
2038 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002039 if (_Pickler_Write(self, &tuple_op, 1) < 0)
2040 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002041 }
2042
2043 memoize:
2044 if (memo_put(self, obj) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002045 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002046
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002047 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002048}
2049
2050/* iter is an iterator giving items, and we batch up chunks of
2051 * MARK item item ... item APPENDS
2052 * opcode sequences. Calling code should have arranged to first create an
2053 * empty list, or list-like object, for the APPENDS to operate on.
2054 * Returns 0 on success, <0 on error.
2055 */
2056static int
2057batch_list(PicklerObject *self, PyObject *iter)
2058{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002059 PyObject *obj = NULL;
2060 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002061 int i, n;
2062
2063 const char mark_op = MARK;
2064 const char append_op = APPEND;
2065 const char appends_op = APPENDS;
2066
2067 assert(iter != NULL);
2068
2069 /* XXX: I think this function could be made faster by avoiding the
2070 iterator interface and fetching objects directly from list using
2071 PyList_GET_ITEM.
2072 */
2073
2074 if (self->proto == 0) {
2075 /* APPENDS isn't available; do one at a time. */
2076 for (;;) {
2077 obj = PyIter_Next(iter);
2078 if (obj == NULL) {
2079 if (PyErr_Occurred())
2080 return -1;
2081 break;
2082 }
2083 i = save(self, obj, 0);
2084 Py_DECREF(obj);
2085 if (i < 0)
2086 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002087 if (_Pickler_Write(self, &append_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002088 return -1;
2089 }
2090 return 0;
2091 }
2092
2093 /* proto > 0: write in batches of BATCHSIZE. */
2094 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002095 /* Get first item */
2096 firstitem = PyIter_Next(iter);
2097 if (firstitem == NULL) {
2098 if (PyErr_Occurred())
2099 goto error;
2100
2101 /* nothing more to add */
2102 break;
2103 }
2104
2105 /* Try to get a second item */
2106 obj = PyIter_Next(iter);
2107 if (obj == NULL) {
2108 if (PyErr_Occurred())
2109 goto error;
2110
2111 /* Only one item to write */
2112 if (save(self, firstitem, 0) < 0)
2113 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002114 if (_Pickler_Write(self, &append_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002115 goto error;
2116 Py_CLEAR(firstitem);
2117 break;
2118 }
2119
2120 /* More than one item to write */
2121
2122 /* Pump out MARK, items, APPENDS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002123 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002124 goto error;
2125
2126 if (save(self, firstitem, 0) < 0)
2127 goto error;
2128 Py_CLEAR(firstitem);
2129 n = 1;
2130
2131 /* Fetch and save up to BATCHSIZE items */
2132 while (obj) {
2133 if (save(self, obj, 0) < 0)
2134 goto error;
2135 Py_CLEAR(obj);
2136 n += 1;
2137
2138 if (n == BATCHSIZE)
2139 break;
2140
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002141 obj = PyIter_Next(iter);
2142 if (obj == NULL) {
2143 if (PyErr_Occurred())
2144 goto error;
2145 break;
2146 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002147 }
2148
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002149 if (_Pickler_Write(self, &appends_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002150 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002151
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002152 } while (n == BATCHSIZE);
2153 return 0;
2154
2155 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002156 Py_XDECREF(firstitem);
2157 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002158 return -1;
2159}
2160
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002161/* This is a variant of batch_list() above, specialized for lists (with no
2162 * support for list subclasses). Like batch_list(), we batch up chunks of
2163 * MARK item item ... item APPENDS
2164 * opcode sequences. Calling code should have arranged to first create an
2165 * empty list, or list-like object, for the APPENDS to operate on.
2166 * Returns 0 on success, -1 on error.
2167 *
2168 * This version is considerably faster than batch_list(), if less general.
2169 *
2170 * Note that this only works for protocols > 0.
2171 */
2172static int
2173batch_list_exact(PicklerObject *self, PyObject *obj)
2174{
2175 PyObject *item = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002176 Py_ssize_t this_batch, total;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002177
2178 const char append_op = APPEND;
2179 const char appends_op = APPENDS;
2180 const char mark_op = MARK;
2181
2182 assert(obj != NULL);
2183 assert(self->proto > 0);
2184 assert(PyList_CheckExact(obj));
2185
2186 if (PyList_GET_SIZE(obj) == 1) {
2187 item = PyList_GET_ITEM(obj, 0);
2188 if (save(self, item, 0) < 0)
2189 return -1;
2190 if (_Pickler_Write(self, &append_op, 1) < 0)
2191 return -1;
2192 return 0;
2193 }
2194
2195 /* Write in batches of BATCHSIZE. */
2196 total = 0;
2197 do {
2198 this_batch = 0;
2199 if (_Pickler_Write(self, &mark_op, 1) < 0)
2200 return -1;
2201 while (total < PyList_GET_SIZE(obj)) {
2202 item = PyList_GET_ITEM(obj, total);
2203 if (save(self, item, 0) < 0)
2204 return -1;
2205 total++;
2206 if (++this_batch == BATCHSIZE)
2207 break;
2208 }
2209 if (_Pickler_Write(self, &appends_op, 1) < 0)
2210 return -1;
2211
2212 } while (total < PyList_GET_SIZE(obj));
2213
2214 return 0;
2215}
2216
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002217static int
2218save_list(PicklerObject *self, PyObject *obj)
2219{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002220 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002221 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002222 int status = 0;
2223
2224 if (self->fast && !fast_save_enter(self, obj))
2225 goto error;
2226
2227 /* Create an empty list. */
2228 if (self->bin) {
2229 header[0] = EMPTY_LIST;
2230 len = 1;
2231 }
2232 else {
2233 header[0] = MARK;
2234 header[1] = LIST;
2235 len = 2;
2236 }
2237
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002238 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002239 goto error;
2240
2241 /* Get list length, and bow out early if empty. */
2242 if ((len = PyList_Size(obj)) < 0)
2243 goto error;
2244
2245 if (memo_put(self, obj) < 0)
2246 goto error;
2247
2248 if (len != 0) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002249 /* Materialize the list elements. */
2250 if (PyList_CheckExact(obj) && self->proto > 0) {
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002251 if (Py_EnterRecursiveCall(" while pickling an object"))
2252 goto error;
2253 status = batch_list_exact(self, obj);
2254 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002255 } else {
2256 PyObject *iter = PyObject_GetIter(obj);
2257 if (iter == NULL)
2258 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002259
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002260 if (Py_EnterRecursiveCall(" while pickling an object")) {
2261 Py_DECREF(iter);
2262 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002263 }
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002264 status = batch_list(self, iter);
2265 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002266 Py_DECREF(iter);
2267 }
2268 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002269 if (0) {
2270 error:
2271 status = -1;
2272 }
2273
2274 if (self->fast && !fast_save_leave(self, obj))
2275 status = -1;
2276
2277 return status;
2278}
2279
2280/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
2281 * MARK key value ... key value SETITEMS
2282 * opcode sequences. Calling code should have arranged to first create an
2283 * empty dict, or dict-like object, for the SETITEMS to operate on.
2284 * Returns 0 on success, <0 on error.
2285 *
2286 * This is very much like batch_list(). The difference between saving
2287 * elements directly, and picking apart two-tuples, is so long-winded at
2288 * the C level, though, that attempts to combine these routines were too
2289 * ugly to bear.
2290 */
2291static int
2292batch_dict(PicklerObject *self, PyObject *iter)
2293{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002294 PyObject *obj = NULL;
2295 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002296 int i, n;
2297
2298 const char mark_op = MARK;
2299 const char setitem_op = SETITEM;
2300 const char setitems_op = SETITEMS;
2301
2302 assert(iter != NULL);
2303
2304 if (self->proto == 0) {
2305 /* SETITEMS isn't available; do one at a time. */
2306 for (;;) {
2307 obj = PyIter_Next(iter);
2308 if (obj == NULL) {
2309 if (PyErr_Occurred())
2310 return -1;
2311 break;
2312 }
2313 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2314 PyErr_SetString(PyExc_TypeError, "dict items "
2315 "iterator must return 2-tuples");
2316 return -1;
2317 }
2318 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
2319 if (i >= 0)
2320 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
2321 Py_DECREF(obj);
2322 if (i < 0)
2323 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002324 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002325 return -1;
2326 }
2327 return 0;
2328 }
2329
2330 /* proto > 0: write in batches of BATCHSIZE. */
2331 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002332 /* Get first item */
2333 firstitem = PyIter_Next(iter);
2334 if (firstitem == NULL) {
2335 if (PyErr_Occurred())
2336 goto error;
2337
2338 /* nothing more to add */
2339 break;
2340 }
2341 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
2342 PyErr_SetString(PyExc_TypeError, "dict items "
2343 "iterator must return 2-tuples");
2344 goto error;
2345 }
2346
2347 /* Try to get a second item */
2348 obj = PyIter_Next(iter);
2349 if (obj == NULL) {
2350 if (PyErr_Occurred())
2351 goto error;
2352
2353 /* Only one item to write */
2354 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2355 goto error;
2356 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2357 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002358 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002359 goto error;
2360 Py_CLEAR(firstitem);
2361 break;
2362 }
2363
2364 /* More than one item to write */
2365
2366 /* Pump out MARK, items, SETITEMS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002367 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002368 goto error;
2369
2370 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2371 goto error;
2372 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2373 goto error;
2374 Py_CLEAR(firstitem);
2375 n = 1;
2376
2377 /* Fetch and save up to BATCHSIZE items */
2378 while (obj) {
2379 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2380 PyErr_SetString(PyExc_TypeError, "dict items "
2381 "iterator must return 2-tuples");
2382 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002383 }
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002384 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
2385 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
2386 goto error;
2387 Py_CLEAR(obj);
2388 n += 1;
2389
2390 if (n == BATCHSIZE)
2391 break;
2392
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002393 obj = PyIter_Next(iter);
2394 if (obj == NULL) {
2395 if (PyErr_Occurred())
2396 goto error;
2397 break;
2398 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002399 }
2400
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002401 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002402 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002403
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002404 } while (n == BATCHSIZE);
2405 return 0;
2406
2407 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002408 Py_XDECREF(firstitem);
2409 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002410 return -1;
2411}
2412
Collin Winter5c9b02d2009-05-25 05:43:30 +00002413/* This is a variant of batch_dict() above that specializes for dicts, with no
2414 * support for dict subclasses. Like batch_dict(), we batch up chunks of
2415 * MARK key value ... key value SETITEMS
2416 * opcode sequences. Calling code should have arranged to first create an
2417 * empty dict, or dict-like object, for the SETITEMS to operate on.
2418 * Returns 0 on success, -1 on error.
2419 *
2420 * Note that this currently doesn't work for protocol 0.
2421 */
2422static int
2423batch_dict_exact(PicklerObject *self, PyObject *obj)
2424{
2425 PyObject *key = NULL, *value = NULL;
2426 int i;
2427 Py_ssize_t dict_size, ppos = 0;
2428
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00002429 const char mark_op = MARK;
2430 const char setitem_op = SETITEM;
2431 const char setitems_op = SETITEMS;
Collin Winter5c9b02d2009-05-25 05:43:30 +00002432
2433 assert(obj != NULL);
2434 assert(self->proto > 0);
2435
2436 dict_size = PyDict_Size(obj);
2437
2438 /* Special-case len(d) == 1 to save space. */
2439 if (dict_size == 1) {
2440 PyDict_Next(obj, &ppos, &key, &value);
2441 if (save(self, key, 0) < 0)
2442 return -1;
2443 if (save(self, value, 0) < 0)
2444 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002445 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002446 return -1;
2447 return 0;
2448 }
2449
2450 /* Write in batches of BATCHSIZE. */
2451 do {
2452 i = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002453 if (_Pickler_Write(self, &mark_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002454 return -1;
2455 while (PyDict_Next(obj, &ppos, &key, &value)) {
2456 if (save(self, key, 0) < 0)
2457 return -1;
2458 if (save(self, value, 0) < 0)
2459 return -1;
2460 if (++i == BATCHSIZE)
2461 break;
2462 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002463 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002464 return -1;
2465 if (PyDict_Size(obj) != dict_size) {
2466 PyErr_Format(
2467 PyExc_RuntimeError,
2468 "dictionary changed size during iteration");
2469 return -1;
2470 }
2471
2472 } while (i == BATCHSIZE);
2473 return 0;
2474}
2475
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002476static int
2477save_dict(PicklerObject *self, PyObject *obj)
2478{
2479 PyObject *items, *iter;
2480 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002481 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002482 int status = 0;
2483
2484 if (self->fast && !fast_save_enter(self, obj))
2485 goto error;
2486
2487 /* Create an empty dict. */
2488 if (self->bin) {
2489 header[0] = EMPTY_DICT;
2490 len = 1;
2491 }
2492 else {
2493 header[0] = MARK;
2494 header[1] = DICT;
2495 len = 2;
2496 }
2497
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002498 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002499 goto error;
2500
2501 /* Get dict size, and bow out early if empty. */
2502 if ((len = PyDict_Size(obj)) < 0)
2503 goto error;
2504
2505 if (memo_put(self, obj) < 0)
2506 goto error;
2507
2508 if (len != 0) {
2509 /* Save the dict items. */
Collin Winter5c9b02d2009-05-25 05:43:30 +00002510 if (PyDict_CheckExact(obj) && self->proto > 0) {
2511 /* We can take certain shortcuts if we know this is a dict and
2512 not a dict subclass. */
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002513 if (Py_EnterRecursiveCall(" while pickling an object"))
2514 goto error;
2515 status = batch_dict_exact(self, obj);
2516 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002517 } else {
2518 items = PyObject_CallMethod(obj, "items", "()");
2519 if (items == NULL)
2520 goto error;
2521 iter = PyObject_GetIter(items);
2522 Py_DECREF(items);
2523 if (iter == NULL)
2524 goto error;
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002525 if (Py_EnterRecursiveCall(" while pickling an object")) {
2526 Py_DECREF(iter);
2527 goto error;
2528 }
Collin Winter5c9b02d2009-05-25 05:43:30 +00002529 status = batch_dict(self, iter);
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002530 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002531 Py_DECREF(iter);
2532 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002533 }
2534
2535 if (0) {
2536 error:
2537 status = -1;
2538 }
2539
2540 if (self->fast && !fast_save_leave(self, obj))
2541 status = -1;
2542
2543 return status;
2544}
2545
2546static int
2547save_global(PicklerObject *self, PyObject *obj, PyObject *name)
2548{
2549 static PyObject *name_str = NULL;
2550 PyObject *global_name = NULL;
2551 PyObject *module_name = NULL;
2552 PyObject *module = NULL;
2553 PyObject *cls;
2554 int status = 0;
2555
2556 const char global_op = GLOBAL;
2557
2558 if (name_str == NULL) {
2559 name_str = PyUnicode_InternFromString("__name__");
2560 if (name_str == NULL)
2561 goto error;
2562 }
2563
2564 if (name) {
2565 global_name = name;
2566 Py_INCREF(global_name);
2567 }
2568 else {
2569 global_name = PyObject_GetAttr(obj, name_str);
2570 if (global_name == NULL)
2571 goto error;
2572 }
2573
2574 module_name = whichmodule(obj, global_name);
2575 if (module_name == NULL)
2576 goto error;
2577
2578 /* XXX: Change to use the import C API directly with level=0 to disallow
2579 relative imports.
2580
2581 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
2582 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
2583 custom import functions (IMHO, this would be a nice security
2584 feature). The import C API would need to be extended to support the
2585 extra parameters of __import__ to fix that. */
2586 module = PyImport_Import(module_name);
2587 if (module == NULL) {
2588 PyErr_Format(PicklingError,
2589 "Can't pickle %R: import of module %R failed",
2590 obj, module_name);
2591 goto error;
2592 }
2593 cls = PyObject_GetAttr(module, global_name);
2594 if (cls == NULL) {
2595 PyErr_Format(PicklingError,
2596 "Can't pickle %R: attribute lookup %S.%S failed",
2597 obj, module_name, global_name);
2598 goto error;
2599 }
2600 if (cls != obj) {
2601 Py_DECREF(cls);
2602 PyErr_Format(PicklingError,
2603 "Can't pickle %R: it's not the same object as %S.%S",
2604 obj, module_name, global_name);
2605 goto error;
2606 }
2607 Py_DECREF(cls);
2608
2609 if (self->proto >= 2) {
2610 /* See whether this is in the extension registry, and if
2611 * so generate an EXT opcode.
2612 */
2613 PyObject *code_obj; /* extension code as Python object */
2614 long code; /* extension code as C value */
2615 char pdata[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002616 Py_ssize_t n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002617
2618 PyTuple_SET_ITEM(two_tuple, 0, module_name);
2619 PyTuple_SET_ITEM(two_tuple, 1, global_name);
2620 code_obj = PyDict_GetItem(extension_registry, two_tuple);
2621 /* The object is not registered in the extension registry.
2622 This is the most likely code path. */
2623 if (code_obj == NULL)
2624 goto gen_global;
2625
2626 /* XXX: pickle.py doesn't check neither the type, nor the range
2627 of the value returned by the extension_registry. It should for
2628 consistency. */
2629
2630 /* Verify code_obj has the right type and value. */
2631 if (!PyLong_Check(code_obj)) {
2632 PyErr_Format(PicklingError,
2633 "Can't pickle %R: extension code %R isn't an integer",
2634 obj, code_obj);
2635 goto error;
2636 }
2637 code = PyLong_AS_LONG(code_obj);
2638 if (code <= 0 || code > 0x7fffffffL) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002639 if (!PyErr_Occurred())
2640 PyErr_Format(PicklingError,
2641 "Can't pickle %R: extension code %ld is out of range",
2642 obj, code);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002643 goto error;
2644 }
2645
2646 /* Generate an EXT opcode. */
2647 if (code <= 0xff) {
2648 pdata[0] = EXT1;
2649 pdata[1] = (unsigned char)code;
2650 n = 2;
2651 }
2652 else if (code <= 0xffff) {
2653 pdata[0] = EXT2;
2654 pdata[1] = (unsigned char)(code & 0xff);
2655 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2656 n = 3;
2657 }
2658 else {
2659 pdata[0] = EXT4;
2660 pdata[1] = (unsigned char)(code & 0xff);
2661 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2662 pdata[3] = (unsigned char)((code >> 16) & 0xff);
2663 pdata[4] = (unsigned char)((code >> 24) & 0xff);
2664 n = 5;
2665 }
2666
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002667 if (_Pickler_Write(self, pdata, n) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002668 goto error;
2669 }
2670 else {
2671 /* Generate a normal global opcode if we are using a pickle
2672 protocol <= 2, or if the object is not registered in the
2673 extension registry. */
2674 PyObject *encoded;
2675 PyObject *(*unicode_encoder)(PyObject *);
2676
2677 gen_global:
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002678 if (_Pickler_Write(self, &global_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002679 goto error;
2680
2681 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
2682 the module name and the global name using UTF-8. We do so only when
2683 we are using the pickle protocol newer than version 3. This is to
2684 ensure compatibility with older Unpickler running on Python 2.x. */
2685 if (self->proto >= 3) {
2686 unicode_encoder = PyUnicode_AsUTF8String;
2687 }
2688 else {
2689 unicode_encoder = PyUnicode_AsASCIIString;
2690 }
2691
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002692 /* For protocol < 3 and if the user didn't request against doing so,
2693 we convert module names to the old 2.x module names. */
2694 if (self->fix_imports) {
2695 PyObject *key;
2696 PyObject *item;
2697
2698 key = PyTuple_Pack(2, module_name, global_name);
2699 if (key == NULL)
2700 goto error;
2701 item = PyDict_GetItemWithError(name_mapping_3to2, key);
2702 Py_DECREF(key);
2703 if (item) {
2704 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
2705 PyErr_Format(PyExc_RuntimeError,
2706 "_compat_pickle.REVERSE_NAME_MAPPING values "
2707 "should be 2-tuples, not %.200s",
2708 Py_TYPE(item)->tp_name);
2709 goto error;
2710 }
2711 Py_CLEAR(module_name);
2712 Py_CLEAR(global_name);
2713 module_name = PyTuple_GET_ITEM(item, 0);
2714 global_name = PyTuple_GET_ITEM(item, 1);
2715 if (!PyUnicode_Check(module_name) ||
2716 !PyUnicode_Check(global_name)) {
2717 PyErr_Format(PyExc_RuntimeError,
2718 "_compat_pickle.REVERSE_NAME_MAPPING values "
2719 "should be pairs of str, not (%.200s, %.200s)",
2720 Py_TYPE(module_name)->tp_name,
2721 Py_TYPE(global_name)->tp_name);
2722 goto error;
2723 }
2724 Py_INCREF(module_name);
2725 Py_INCREF(global_name);
2726 }
2727 else if (PyErr_Occurred()) {
2728 goto error;
2729 }
2730
2731 item = PyDict_GetItemWithError(import_mapping_3to2, module_name);
2732 if (item) {
2733 if (!PyUnicode_Check(item)) {
2734 PyErr_Format(PyExc_RuntimeError,
2735 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
2736 "should be strings, not %.200s",
2737 Py_TYPE(item)->tp_name);
2738 goto error;
2739 }
2740 Py_CLEAR(module_name);
2741 module_name = item;
2742 Py_INCREF(module_name);
2743 }
2744 else if (PyErr_Occurred()) {
2745 goto error;
2746 }
2747 }
2748
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002749 /* Save the name of the module. */
2750 encoded = unicode_encoder(module_name);
2751 if (encoded == NULL) {
2752 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2753 PyErr_Format(PicklingError,
2754 "can't pickle module identifier '%S' using "
2755 "pickle protocol %i", module_name, self->proto);
2756 goto error;
2757 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002758 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002759 PyBytes_GET_SIZE(encoded)) < 0) {
2760 Py_DECREF(encoded);
2761 goto error;
2762 }
2763 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002764 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002765 goto error;
2766
2767 /* Save the name of the module. */
2768 encoded = unicode_encoder(global_name);
2769 if (encoded == NULL) {
2770 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2771 PyErr_Format(PicklingError,
2772 "can't pickle global identifier '%S' using "
2773 "pickle protocol %i", global_name, self->proto);
2774 goto error;
2775 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002776 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002777 PyBytes_GET_SIZE(encoded)) < 0) {
2778 Py_DECREF(encoded);
2779 goto error;
2780 }
2781 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002782 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002783 goto error;
2784
2785 /* Memoize the object. */
2786 if (memo_put(self, obj) < 0)
2787 goto error;
2788 }
2789
2790 if (0) {
2791 error:
2792 status = -1;
2793 }
2794 Py_XDECREF(module_name);
2795 Py_XDECREF(global_name);
2796 Py_XDECREF(module);
2797
2798 return status;
2799}
2800
2801static int
2802save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
2803{
2804 PyObject *pid = NULL;
2805 int status = 0;
2806
2807 const char persid_op = PERSID;
2808 const char binpersid_op = BINPERSID;
2809
2810 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002811 pid = _Pickler_FastCall(self, func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002812 if (pid == NULL)
2813 return -1;
2814
2815 if (pid != Py_None) {
2816 if (self->bin) {
2817 if (save(self, pid, 1) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002818 _Pickler_Write(self, &binpersid_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002819 goto error;
2820 }
2821 else {
2822 PyObject *pid_str = NULL;
2823 char *pid_ascii_bytes;
2824 Py_ssize_t size;
2825
2826 pid_str = PyObject_Str(pid);
2827 if (pid_str == NULL)
2828 goto error;
2829
2830 /* XXX: Should it check whether the persistent id only contains
2831 ASCII characters? And what if the pid contains embedded
2832 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00002833 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002834 Py_DECREF(pid_str);
2835 if (pid_ascii_bytes == NULL)
2836 goto error;
2837
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002838 if (_Pickler_Write(self, &persid_op, 1) < 0 ||
2839 _Pickler_Write(self, pid_ascii_bytes, size) < 0 ||
2840 _Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002841 goto error;
2842 }
2843 status = 1;
2844 }
2845
2846 if (0) {
2847 error:
2848 status = -1;
2849 }
2850 Py_XDECREF(pid);
2851
2852 return status;
2853}
2854
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002855static PyObject *
2856get_class(PyObject *obj)
2857{
2858 PyObject *cls;
2859 static PyObject *str_class;
2860
2861 if (str_class == NULL) {
2862 str_class = PyUnicode_InternFromString("__class__");
2863 if (str_class == NULL)
2864 return NULL;
2865 }
2866 cls = PyObject_GetAttr(obj, str_class);
2867 if (cls == NULL) {
2868 if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
2869 PyErr_Clear();
2870 cls = (PyObject *) Py_TYPE(obj);
2871 Py_INCREF(cls);
2872 }
2873 }
2874 return cls;
2875}
2876
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002877/* We're saving obj, and args is the 2-thru-5 tuple returned by the
2878 * appropriate __reduce__ method for obj.
2879 */
2880static int
2881save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
2882{
2883 PyObject *callable;
2884 PyObject *argtup;
2885 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002886 PyObject *listitems = Py_None;
2887 PyObject *dictitems = Py_None;
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002888 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002889
2890 int use_newobj = self->proto >= 2;
2891
2892 const char reduce_op = REDUCE;
2893 const char build_op = BUILD;
2894 const char newobj_op = NEWOBJ;
2895
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002896 size = PyTuple_Size(args);
2897 if (size < 2 || size > 5) {
2898 PyErr_SetString(PicklingError, "tuple returned by "
2899 "__reduce__ must contain 2 through 5 elements");
2900 return -1;
2901 }
2902
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002903 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
2904 &callable, &argtup, &state, &listitems, &dictitems))
2905 return -1;
2906
2907 if (!PyCallable_Check(callable)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002908 PyErr_SetString(PicklingError, "first item of the tuple "
2909 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002910 return -1;
2911 }
2912 if (!PyTuple_Check(argtup)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002913 PyErr_SetString(PicklingError, "second item of the tuple "
2914 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002915 return -1;
2916 }
2917
2918 if (state == Py_None)
2919 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002920
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002921 if (listitems == Py_None)
2922 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002923 else if (!PyIter_Check(listitems)) {
2924 PyErr_Format(PicklingError, "Fourth element of tuple"
2925 "returned by __reduce__ must be an iterator, not %s",
2926 Py_TYPE(listitems)->tp_name);
2927 return -1;
2928 }
2929
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002930 if (dictitems == Py_None)
2931 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002932 else if (!PyIter_Check(dictitems)) {
2933 PyErr_Format(PicklingError, "Fifth element of tuple"
2934 "returned by __reduce__ must be an iterator, not %s",
2935 Py_TYPE(dictitems)->tp_name);
2936 return -1;
2937 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002938
2939 /* Protocol 2 special case: if callable's name is __newobj__, use
2940 NEWOBJ. */
2941 if (use_newobj) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002942 static PyObject *newobj_str = NULL, *name_str = NULL;
2943 PyObject *name;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002944
2945 if (newobj_str == NULL) {
2946 newobj_str = PyUnicode_InternFromString("__newobj__");
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002947 name_str = PyUnicode_InternFromString("__name__");
2948 if (newobj_str == NULL || name_str == NULL)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002949 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002950 }
2951
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002952 name = PyObject_GetAttr(callable, name_str);
2953 if (name == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002954 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2955 PyErr_Clear();
2956 else
2957 return -1;
2958 use_newobj = 0;
2959 }
2960 else {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002961 use_newobj = PyUnicode_Check(name) &&
2962 PyUnicode_Compare(name, newobj_str) == 0;
2963 Py_DECREF(name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002964 }
2965 }
2966 if (use_newobj) {
2967 PyObject *cls;
2968 PyObject *newargtup;
2969 PyObject *obj_class;
2970 int p;
2971
2972 /* Sanity checks. */
2973 if (Py_SIZE(argtup) < 1) {
2974 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
2975 return -1;
2976 }
2977
2978 cls = PyTuple_GET_ITEM(argtup, 0);
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002979 if (!PyType_Check(cls)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002980 PyErr_SetString(PicklingError, "args[0] from "
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002981 "__newobj__ args is not a type");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002982 return -1;
2983 }
2984
2985 if (obj != NULL) {
Antoine Pitrou16c4ce12011-03-11 21:30:43 +01002986 obj_class = get_class(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002987 p = obj_class != cls; /* true iff a problem */
2988 Py_DECREF(obj_class);
2989 if (p) {
2990 PyErr_SetString(PicklingError, "args[0] from "
2991 "__newobj__ args has the wrong class");
2992 return -1;
2993 }
2994 }
2995 /* XXX: These calls save() are prone to infinite recursion. Imagine
2996 what happen if the value returned by the __reduce__() method of
2997 some extension type contains another object of the same type. Ouch!
2998
2999 Here is a quick example, that I ran into, to illustrate what I
3000 mean:
3001
3002 >>> import pickle, copyreg
3003 >>> copyreg.dispatch_table.pop(complex)
3004 >>> pickle.dumps(1+2j)
3005 Traceback (most recent call last):
3006 ...
3007 RuntimeError: maximum recursion depth exceeded
3008
3009 Removing the complex class from copyreg.dispatch_table made the
3010 __reduce_ex__() method emit another complex object:
3011
3012 >>> (1+1j).__reduce_ex__(2)
3013 (<function __newobj__ at 0xb7b71c3c>,
3014 (<class 'complex'>, (1+1j)), None, None, None)
3015
3016 Thus when save() was called on newargstup (the 2nd item) recursion
3017 ensued. Of course, the bug was in the complex class which had a
3018 broken __getnewargs__() that emitted another complex object. But,
3019 the point, here, is it is quite easy to end up with a broken reduce
3020 function. */
3021
3022 /* Save the class and its __new__ arguments. */
3023 if (save(self, cls, 0) < 0)
3024 return -1;
3025
3026 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
3027 if (newargtup == NULL)
3028 return -1;
3029
3030 p = save(self, newargtup, 0);
3031 Py_DECREF(newargtup);
3032 if (p < 0)
3033 return -1;
3034
3035 /* Add NEWOBJ opcode. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003036 if (_Pickler_Write(self, &newobj_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003037 return -1;
3038 }
3039 else { /* Not using NEWOBJ. */
3040 if (save(self, callable, 0) < 0 ||
3041 save(self, argtup, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003042 _Pickler_Write(self, &reduce_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003043 return -1;
3044 }
3045
3046 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
3047 the caller do not want to memoize the object. Not particularly useful,
3048 but that is to mimic the behavior save_reduce() in pickle.py when
3049 obj is None. */
3050 if (obj && memo_put(self, obj) < 0)
3051 return -1;
3052
3053 if (listitems && batch_list(self, listitems) < 0)
3054 return -1;
3055
3056 if (dictitems && batch_dict(self, dictitems) < 0)
3057 return -1;
3058
3059 if (state) {
3060 if (save(self, state, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003061 _Pickler_Write(self, &build_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003062 return -1;
3063 }
3064
3065 return 0;
3066}
3067
3068static int
3069save(PicklerObject *self, PyObject *obj, int pers_save)
3070{
3071 PyTypeObject *type;
3072 PyObject *reduce_func = NULL;
3073 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003074 int status = 0;
3075
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003076 if (Py_EnterRecursiveCall(" while pickling an object"))
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003077 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003078
3079 /* The extra pers_save argument is necessary to avoid calling save_pers()
3080 on its returned object. */
3081 if (!pers_save && self->pers_func) {
3082 /* save_pers() returns:
3083 -1 to signal an error;
3084 0 if it did nothing successfully;
3085 1 if a persistent id was saved.
3086 */
3087 if ((status = save_pers(self, obj, self->pers_func)) != 0)
3088 goto done;
3089 }
3090
3091 type = Py_TYPE(obj);
3092
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003093 /* The old cPickle had an optimization that used switch-case statement
3094 dispatching on the first letter of the type name. This has was removed
3095 since benchmarks shown that this optimization was actually slowing
3096 things down. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003097
3098 /* Atom types; these aren't memoized, so don't check the memo. */
3099
3100 if (obj == Py_None) {
3101 status = save_none(self, obj);
3102 goto done;
3103 }
3104 else if (obj == Py_False || obj == Py_True) {
3105 status = save_bool(self, obj);
3106 goto done;
3107 }
3108 else if (type == &PyLong_Type) {
3109 status = save_long(self, obj);
3110 goto done;
3111 }
3112 else if (type == &PyFloat_Type) {
3113 status = save_float(self, obj);
3114 goto done;
3115 }
3116
3117 /* Check the memo to see if it has the object. If so, generate
3118 a GET (or BINGET) opcode, instead of pickling the object
3119 once again. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003120 if (PyMemoTable_Get(self->memo, obj)) {
3121 if (memo_get(self, obj) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003122 goto error;
3123 goto done;
3124 }
3125
3126 if (type == &PyBytes_Type) {
3127 status = save_bytes(self, obj);
3128 goto done;
3129 }
3130 else if (type == &PyUnicode_Type) {
3131 status = save_unicode(self, obj);
3132 goto done;
3133 }
3134 else if (type == &PyDict_Type) {
3135 status = save_dict(self, obj);
3136 goto done;
3137 }
3138 else if (type == &PyList_Type) {
3139 status = save_list(self, obj);
3140 goto done;
3141 }
3142 else if (type == &PyTuple_Type) {
3143 status = save_tuple(self, obj);
3144 goto done;
3145 }
3146 else if (type == &PyType_Type) {
3147 status = save_global(self, obj, NULL);
3148 goto done;
3149 }
3150 else if (type == &PyFunction_Type) {
3151 status = save_global(self, obj, NULL);
3152 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
3153 /* fall back to reduce */
3154 PyErr_Clear();
3155 }
3156 else {
3157 goto done;
3158 }
3159 }
3160 else if (type == &PyCFunction_Type) {
3161 status = save_global(self, obj, NULL);
3162 goto done;
3163 }
3164 else if (PyType_IsSubtype(type, &PyType_Type)) {
3165 status = save_global(self, obj, NULL);
3166 goto done;
3167 }
3168
3169 /* XXX: This part needs some unit tests. */
3170
3171 /* Get a reduction callable, and call it. This may come from
3172 * copyreg.dispatch_table, the object's __reduce_ex__ method,
3173 * or the object's __reduce__ method.
3174 */
3175 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
3176 if (reduce_func != NULL) {
3177 /* Here, the reference count of the reduce_func object returned by
3178 PyDict_GetItem needs to be increased to be consistent with the one
3179 returned by PyObject_GetAttr. This is allow us to blindly DECREF
3180 reduce_func at the end of the save() routine.
3181 */
3182 Py_INCREF(reduce_func);
3183 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003184 reduce_value = _Pickler_FastCall(self, reduce_func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003185 }
3186 else {
3187 static PyObject *reduce_str = NULL;
3188 static PyObject *reduce_ex_str = NULL;
3189
3190 /* Cache the name of the reduce methods. */
3191 if (reduce_str == NULL) {
3192 reduce_str = PyUnicode_InternFromString("__reduce__");
3193 if (reduce_str == NULL)
3194 goto error;
3195 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
3196 if (reduce_ex_str == NULL)
3197 goto error;
3198 }
3199
3200 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
3201 automatically defined as __reduce__. While this is convenient, this
3202 make it impossible to know which method was actually called. Of
3203 course, this is not a big deal. But still, it would be nice to let
3204 the user know which method was called when something go
3205 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
3206 don't actually have to check for a __reduce__ method. */
3207
3208 /* Check for a __reduce_ex__ method. */
3209 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
3210 if (reduce_func != NULL) {
3211 PyObject *proto;
3212 proto = PyLong_FromLong(self->proto);
3213 if (proto != NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003214 reduce_value = _Pickler_FastCall(self, reduce_func, proto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003215 }
3216 }
3217 else {
3218 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3219 PyErr_Clear();
3220 else
3221 goto error;
3222 /* Check for a __reduce__ method. */
3223 reduce_func = PyObject_GetAttr(obj, reduce_str);
3224 if (reduce_func != NULL) {
3225 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
3226 }
3227 else {
3228 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
3229 type->tp_name, obj);
3230 goto error;
3231 }
3232 }
3233 }
3234
3235 if (reduce_value == NULL)
3236 goto error;
3237
3238 if (PyUnicode_Check(reduce_value)) {
3239 status = save_global(self, obj, reduce_value);
3240 goto done;
3241 }
3242
3243 if (!PyTuple_Check(reduce_value)) {
3244 PyErr_SetString(PicklingError,
3245 "__reduce__ must return a string or tuple");
3246 goto error;
3247 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003248
3249 status = save_reduce(self, reduce_value, obj);
3250
3251 if (0) {
3252 error:
3253 status = -1;
3254 }
3255 done:
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003256 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003257 Py_XDECREF(reduce_func);
3258 Py_XDECREF(reduce_value);
3259
3260 return status;
3261}
3262
3263static int
3264dump(PicklerObject *self, PyObject *obj)
3265{
3266 const char stop_op = STOP;
3267
3268 if (self->proto >= 2) {
3269 char header[2];
3270
3271 header[0] = PROTO;
3272 assert(self->proto >= 0 && self->proto < 256);
3273 header[1] = (unsigned char)self->proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003274 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003275 return -1;
3276 }
3277
3278 if (save(self, obj, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003279 _Pickler_Write(self, &stop_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003280 return -1;
3281
3282 return 0;
3283}
3284
3285PyDoc_STRVAR(Pickler_clear_memo_doc,
3286"clear_memo() -> None. Clears the pickler's \"memo\"."
3287"\n"
3288"The memo is the data structure that remembers which objects the\n"
3289"pickler has already seen, so that shared or recursive objects are\n"
3290"pickled by reference and not by value. This method is useful when\n"
3291"re-using picklers.");
3292
3293static PyObject *
3294Pickler_clear_memo(PicklerObject *self)
3295{
3296 if (self->memo)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003297 PyMemoTable_Clear(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003298
3299 Py_RETURN_NONE;
3300}
3301
3302PyDoc_STRVAR(Pickler_dump_doc,
3303"dump(obj) -> None. Write a pickled representation of obj to the open file.");
3304
3305static PyObject *
3306Pickler_dump(PicklerObject *self, PyObject *args)
3307{
3308 PyObject *obj;
3309
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00003310 /* Check whether the Pickler was initialized correctly (issue3664).
3311 Developers often forget to call __init__() in their subclasses, which
3312 would trigger a segfault without this check. */
3313 if (self->write == NULL) {
3314 PyErr_Format(PicklingError,
3315 "Pickler.__init__() was not called by %s.__init__()",
3316 Py_TYPE(self)->tp_name);
3317 return NULL;
3318 }
3319
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003320 if (!PyArg_ParseTuple(args, "O:dump", &obj))
3321 return NULL;
3322
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003323 if (_Pickler_ClearBuffer(self) < 0)
3324 return NULL;
3325
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003326 if (dump(self, obj) < 0)
3327 return NULL;
3328
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003329 if (_Pickler_FlushToFile(self) < 0)
3330 return NULL;
3331
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003332 Py_RETURN_NONE;
3333}
3334
3335static struct PyMethodDef Pickler_methods[] = {
3336 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
3337 Pickler_dump_doc},
3338 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
3339 Pickler_clear_memo_doc},
3340 {NULL, NULL} /* sentinel */
3341};
3342
3343static void
3344Pickler_dealloc(PicklerObject *self)
3345{
3346 PyObject_GC_UnTrack(self);
3347
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003348 Py_XDECREF(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003349 Py_XDECREF(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003350 Py_XDECREF(self->pers_func);
3351 Py_XDECREF(self->arg);
3352 Py_XDECREF(self->fast_memo);
3353
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003354 PyMemoTable_Del(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003355
3356 Py_TYPE(self)->tp_free((PyObject *)self);
3357}
3358
3359static int
3360Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
3361{
3362 Py_VISIT(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003363 Py_VISIT(self->pers_func);
3364 Py_VISIT(self->arg);
3365 Py_VISIT(self->fast_memo);
3366 return 0;
3367}
3368
3369static int
3370Pickler_clear(PicklerObject *self)
3371{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003372 Py_CLEAR(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003373 Py_CLEAR(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003374 Py_CLEAR(self->pers_func);
3375 Py_CLEAR(self->arg);
3376 Py_CLEAR(self->fast_memo);
3377
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003378 if (self->memo != NULL) {
3379 PyMemoTable *memo = self->memo;
3380 self->memo = NULL;
3381 PyMemoTable_Del(memo);
3382 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003383 return 0;
3384}
3385
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003386
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003387PyDoc_STRVAR(Pickler_doc,
3388"Pickler(file, protocol=None)"
3389"\n"
3390"This takes a binary file for writing a pickle data stream.\n"
3391"\n"
3392"The optional protocol argument tells the pickler to use the\n"
3393"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
3394"protocol is 3; a backward-incompatible protocol designed for\n"
3395"Python 3.0.\n"
3396"\n"
3397"Specifying a negative protocol version selects the highest\n"
3398"protocol version supported. The higher the protocol used, the\n"
3399"more recent the version of Python needed to read the pickle\n"
3400"produced.\n"
3401"\n"
3402"The file argument must have a write() method that accepts a single\n"
3403"bytes argument. It can thus be a file object opened for binary\n"
3404"writing, a io.BytesIO instance, or any other custom object that\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003405"meets this interface.\n"
3406"\n"
3407"If fix_imports is True and protocol is less than 3, pickle will try to\n"
3408"map the new Python 3.x names to the old module names used in Python\n"
3409"2.x, so that the pickle data stream is readable with Python 2.x.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003410
3411static int
3412Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
3413{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003414 static char *kwlist[] = {"file", "protocol", "fix_imports", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003415 PyObject *file;
3416 PyObject *proto_obj = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003417 PyObject *fix_imports = Py_True;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003418
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003419 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:Pickler",
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003420 kwlist, &file, &proto_obj, &fix_imports))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003421 return -1;
3422
3423 /* In case of multiple __init__() calls, clear previous content. */
3424 if (self->write != NULL)
3425 (void)Pickler_clear(self);
3426
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003427 if (_Pickler_SetProtocol(self, proto_obj, fix_imports) < 0)
3428 return -1;
3429
3430 if (_Pickler_SetOutputStream(self, file) < 0)
3431 return -1;
3432
3433 /* memo and output_buffer may have already been created in _Pickler_New */
3434 if (self->memo == NULL) {
3435 self->memo = PyMemoTable_New();
3436 if (self->memo == NULL)
3437 return -1;
3438 }
3439 self->output_len = 0;
3440 if (self->output_buffer == NULL) {
3441 self->max_output_len = WRITE_BUF_SIZE;
3442 self->output_buffer = PyBytes_FromStringAndSize(NULL,
3443 self->max_output_len);
3444 if (self->output_buffer == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003445 return -1;
3446 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003447
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003448 self->arg = NULL;
3449 self->fast = 0;
3450 self->fast_nesting = 0;
3451 self->fast_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003452 self->pers_func = NULL;
3453 if (PyObject_HasAttrString((PyObject *)self, "persistent_id")) {
3454 self->pers_func = PyObject_GetAttrString((PyObject *)self,
3455 "persistent_id");
3456 if (self->pers_func == NULL)
3457 return -1;
3458 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003459 return 0;
3460}
3461
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003462/* Define a proxy object for the Pickler's internal memo object. This is to
3463 * avoid breaking code like:
3464 * pickler.memo.clear()
3465 * and
3466 * pickler.memo = saved_memo
3467 * Is this a good idea? Not really, but we don't want to break code that uses
3468 * it. Note that we don't implement the entire mapping API here. This is
3469 * intentional, as these should be treated as black-box implementation details.
3470 */
3471
3472typedef struct {
3473 PyObject_HEAD
3474 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
3475} PicklerMemoProxyObject;
3476
3477PyDoc_STRVAR(pmp_clear_doc,
3478"memo.clear() -> None. Remove all items from memo.");
3479
3480static PyObject *
3481pmp_clear(PicklerMemoProxyObject *self)
3482{
3483 if (self->pickler->memo)
3484 PyMemoTable_Clear(self->pickler->memo);
3485 Py_RETURN_NONE;
3486}
3487
3488PyDoc_STRVAR(pmp_copy_doc,
3489"memo.copy() -> new_memo. Copy the memo to a new object.");
3490
3491static PyObject *
3492pmp_copy(PicklerMemoProxyObject *self)
3493{
3494 Py_ssize_t i;
3495 PyMemoTable *memo;
3496 PyObject *new_memo = PyDict_New();
3497 if (new_memo == NULL)
3498 return NULL;
3499
3500 memo = self->pickler->memo;
3501 for (i = 0; i < memo->mt_allocated; ++i) {
3502 PyMemoEntry entry = memo->mt_table[i];
3503 if (entry.me_key != NULL) {
3504 int status;
3505 PyObject *key, *value;
3506
3507 key = PyLong_FromVoidPtr(entry.me_key);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003508 value = Py_BuildValue("nO", entry.me_value, entry.me_key);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003509
3510 if (key == NULL || value == NULL) {
3511 Py_XDECREF(key);
3512 Py_XDECREF(value);
3513 goto error;
3514 }
3515 status = PyDict_SetItem(new_memo, key, value);
3516 Py_DECREF(key);
3517 Py_DECREF(value);
3518 if (status < 0)
3519 goto error;
3520 }
3521 }
3522 return new_memo;
3523
3524 error:
3525 Py_XDECREF(new_memo);
3526 return NULL;
3527}
3528
3529PyDoc_STRVAR(pmp_reduce_doc,
3530"memo.__reduce__(). Pickling support.");
3531
3532static PyObject *
3533pmp_reduce(PicklerMemoProxyObject *self, PyObject *args)
3534{
3535 PyObject *reduce_value, *dict_args;
3536 PyObject *contents = pmp_copy(self);
3537 if (contents == NULL)
3538 return NULL;
3539
3540 reduce_value = PyTuple_New(2);
3541 if (reduce_value == NULL) {
3542 Py_DECREF(contents);
3543 return NULL;
3544 }
3545 dict_args = PyTuple_New(1);
3546 if (dict_args == NULL) {
3547 Py_DECREF(contents);
3548 Py_DECREF(reduce_value);
3549 return NULL;
3550 }
3551 PyTuple_SET_ITEM(dict_args, 0, contents);
3552 Py_INCREF((PyObject *)&PyDict_Type);
3553 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
3554 PyTuple_SET_ITEM(reduce_value, 1, dict_args);
3555 return reduce_value;
3556}
3557
3558static PyMethodDef picklerproxy_methods[] = {
3559 {"clear", (PyCFunction)pmp_clear, METH_NOARGS, pmp_clear_doc},
3560 {"copy", (PyCFunction)pmp_copy, METH_NOARGS, pmp_copy_doc},
3561 {"__reduce__", (PyCFunction)pmp_reduce, METH_VARARGS, pmp_reduce_doc},
3562 {NULL, NULL} /* sentinel */
3563};
3564
3565static void
3566PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
3567{
3568 PyObject_GC_UnTrack(self);
3569 Py_XDECREF(self->pickler);
3570 PyObject_GC_Del((PyObject *)self);
3571}
3572
3573static int
3574PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
3575 visitproc visit, void *arg)
3576{
3577 Py_VISIT(self->pickler);
3578 return 0;
3579}
3580
3581static int
3582PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
3583{
3584 Py_CLEAR(self->pickler);
3585 return 0;
3586}
3587
3588static PyTypeObject PicklerMemoProxyType = {
3589 PyVarObject_HEAD_INIT(NULL, 0)
3590 "_pickle.PicklerMemoProxy", /*tp_name*/
3591 sizeof(PicklerMemoProxyObject), /*tp_basicsize*/
3592 0,
3593 (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */
3594 0, /* tp_print */
3595 0, /* tp_getattr */
3596 0, /* tp_setattr */
3597 0, /* tp_compare */
3598 0, /* tp_repr */
3599 0, /* tp_as_number */
3600 0, /* tp_as_sequence */
3601 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00003602 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003603 0, /* tp_call */
3604 0, /* tp_str */
3605 PyObject_GenericGetAttr, /* tp_getattro */
3606 PyObject_GenericSetAttr, /* tp_setattro */
3607 0, /* tp_as_buffer */
3608 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3609 0, /* tp_doc */
3610 (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */
3611 (inquiry)PicklerMemoProxy_clear, /* tp_clear */
3612 0, /* tp_richcompare */
3613 0, /* tp_weaklistoffset */
3614 0, /* tp_iter */
3615 0, /* tp_iternext */
3616 picklerproxy_methods, /* tp_methods */
3617};
3618
3619static PyObject *
3620PicklerMemoProxy_New(PicklerObject *pickler)
3621{
3622 PicklerMemoProxyObject *self;
3623
3624 self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
3625 if (self == NULL)
3626 return NULL;
3627 Py_INCREF(pickler);
3628 self->pickler = pickler;
3629 PyObject_GC_Track(self);
3630 return (PyObject *)self;
3631}
3632
3633/*****************************************************************************/
3634
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003635static PyObject *
3636Pickler_get_memo(PicklerObject *self)
3637{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003638 return PicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003639}
3640
3641static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003642Pickler_set_memo(PicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003643{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003644 PyMemoTable *new_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003645
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003646 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003647 PyErr_SetString(PyExc_TypeError,
3648 "attribute deletion is not supported");
3649 return -1;
3650 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003651
3652 if (Py_TYPE(obj) == &PicklerMemoProxyType) {
3653 PicklerObject *pickler =
3654 ((PicklerMemoProxyObject *)obj)->pickler;
3655
3656 new_memo = PyMemoTable_Copy(pickler->memo);
3657 if (new_memo == NULL)
3658 return -1;
3659 }
3660 else if (PyDict_Check(obj)) {
3661 Py_ssize_t i = 0;
3662 PyObject *key, *value;
3663
3664 new_memo = PyMemoTable_New();
3665 if (new_memo == NULL)
3666 return -1;
3667
3668 while (PyDict_Next(obj, &i, &key, &value)) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003669 Py_ssize_t memo_id;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003670 PyObject *memo_obj;
3671
3672 if (!PyTuple_Check(value) || Py_SIZE(value) != 2) {
3673 PyErr_SetString(PyExc_TypeError,
3674 "'memo' values must be 2-item tuples");
3675 goto error;
3676 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003677 memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003678 if (memo_id == -1 && PyErr_Occurred())
3679 goto error;
3680 memo_obj = PyTuple_GET_ITEM(value, 1);
3681 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
3682 goto error;
3683 }
3684 }
3685 else {
3686 PyErr_Format(PyExc_TypeError,
3687 "'memo' attribute must be an PicklerMemoProxy object"
3688 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003689 return -1;
3690 }
3691
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003692 PyMemoTable_Del(self->memo);
3693 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003694
3695 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003696
3697 error:
3698 if (new_memo)
3699 PyMemoTable_Del(new_memo);
3700 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003701}
3702
3703static PyObject *
3704Pickler_get_persid(PicklerObject *self)
3705{
3706 if (self->pers_func == NULL)
3707 PyErr_SetString(PyExc_AttributeError, "persistent_id");
3708 else
3709 Py_INCREF(self->pers_func);
3710 return self->pers_func;
3711}
3712
3713static int
3714Pickler_set_persid(PicklerObject *self, PyObject *value)
3715{
3716 PyObject *tmp;
3717
3718 if (value == NULL) {
3719 PyErr_SetString(PyExc_TypeError,
3720 "attribute deletion is not supported");
3721 return -1;
3722 }
3723 if (!PyCallable_Check(value)) {
3724 PyErr_SetString(PyExc_TypeError,
3725 "persistent_id must be a callable taking one argument");
3726 return -1;
3727 }
3728
3729 tmp = self->pers_func;
3730 Py_INCREF(value);
3731 self->pers_func = value;
3732 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
3733
3734 return 0;
3735}
3736
3737static PyMemberDef Pickler_members[] = {
3738 {"bin", T_INT, offsetof(PicklerObject, bin)},
3739 {"fast", T_INT, offsetof(PicklerObject, fast)},
3740 {NULL}
3741};
3742
3743static PyGetSetDef Pickler_getsets[] = {
3744 {"memo", (getter)Pickler_get_memo,
3745 (setter)Pickler_set_memo},
3746 {"persistent_id", (getter)Pickler_get_persid,
3747 (setter)Pickler_set_persid},
3748 {NULL}
3749};
3750
3751static PyTypeObject Pickler_Type = {
3752 PyVarObject_HEAD_INIT(NULL, 0)
3753 "_pickle.Pickler" , /*tp_name*/
3754 sizeof(PicklerObject), /*tp_basicsize*/
3755 0, /*tp_itemsize*/
3756 (destructor)Pickler_dealloc, /*tp_dealloc*/
3757 0, /*tp_print*/
3758 0, /*tp_getattr*/
3759 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00003760 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003761 0, /*tp_repr*/
3762 0, /*tp_as_number*/
3763 0, /*tp_as_sequence*/
3764 0, /*tp_as_mapping*/
3765 0, /*tp_hash*/
3766 0, /*tp_call*/
3767 0, /*tp_str*/
3768 0, /*tp_getattro*/
3769 0, /*tp_setattro*/
3770 0, /*tp_as_buffer*/
3771 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3772 Pickler_doc, /*tp_doc*/
3773 (traverseproc)Pickler_traverse, /*tp_traverse*/
3774 (inquiry)Pickler_clear, /*tp_clear*/
3775 0, /*tp_richcompare*/
3776 0, /*tp_weaklistoffset*/
3777 0, /*tp_iter*/
3778 0, /*tp_iternext*/
3779 Pickler_methods, /*tp_methods*/
3780 Pickler_members, /*tp_members*/
3781 Pickler_getsets, /*tp_getset*/
3782 0, /*tp_base*/
3783 0, /*tp_dict*/
3784 0, /*tp_descr_get*/
3785 0, /*tp_descr_set*/
3786 0, /*tp_dictoffset*/
3787 (initproc)Pickler_init, /*tp_init*/
3788 PyType_GenericAlloc, /*tp_alloc*/
3789 PyType_GenericNew, /*tp_new*/
3790 PyObject_GC_Del, /*tp_free*/
3791 0, /*tp_is_gc*/
3792};
3793
3794/* Temporary helper for calling self.find_class().
3795
3796 XXX: It would be nice to able to avoid Python function call overhead, by
3797 using directly the C version of find_class(), when find_class() is not
3798 overridden by a subclass. Although, this could become rather hackish. A
3799 simpler optimization would be to call the C function when self is not a
3800 subclass instance. */
3801static PyObject *
3802find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
3803{
3804 return PyObject_CallMethod((PyObject *)self, "find_class", "OO",
3805 module_name, global_name);
3806}
3807
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003808static Py_ssize_t
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003809marker(UnpicklerObject *self)
3810{
3811 if (self->num_marks < 1) {
3812 PyErr_SetString(UnpicklingError, "could not find MARK");
3813 return -1;
3814 }
3815
3816 return self->marks[--self->num_marks];
3817}
3818
3819static int
3820load_none(UnpicklerObject *self)
3821{
3822 PDATA_APPEND(self->stack, Py_None, -1);
3823 return 0;
3824}
3825
3826static int
3827bad_readline(void)
3828{
3829 PyErr_SetString(UnpicklingError, "pickle data was truncated");
3830 return -1;
3831}
3832
3833static int
3834load_int(UnpicklerObject *self)
3835{
3836 PyObject *value;
3837 char *endptr, *s;
3838 Py_ssize_t len;
3839 long x;
3840
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003841 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003842 return -1;
3843 if (len < 2)
3844 return bad_readline();
3845
3846 errno = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003847 /* XXX: Should the base argument of strtol() be explicitly set to 10?
3848 XXX(avassalotti): Should this uses PyOS_strtol()? */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003849 x = strtol(s, &endptr, 0);
3850
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003851 if (errno || (*endptr != '\n' && *endptr != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003852 /* Hm, maybe we've got something long. Let's try reading
3853 * it as a Python long object. */
3854 errno = 0;
3855 /* XXX: Same thing about the base here. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003856 value = PyLong_FromString(s, NULL, 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003857 if (value == NULL) {
3858 PyErr_SetString(PyExc_ValueError,
3859 "could not convert string to int");
3860 return -1;
3861 }
3862 }
3863 else {
3864 if (len == 3 && (x == 0 || x == 1)) {
3865 if ((value = PyBool_FromLong(x)) == NULL)
3866 return -1;
3867 }
3868 else {
3869 if ((value = PyLong_FromLong(x)) == NULL)
3870 return -1;
3871 }
3872 }
3873
3874 PDATA_PUSH(self->stack, value, -1);
3875 return 0;
3876}
3877
3878static int
3879load_bool(UnpicklerObject *self, PyObject *boolean)
3880{
3881 assert(boolean == Py_True || boolean == Py_False);
3882 PDATA_APPEND(self->stack, boolean, -1);
3883 return 0;
3884}
3885
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003886/* s contains x bytes of an unsigned little-endian integer. Return its value
3887 * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
3888 */
3889static Py_ssize_t
3890calc_binsize(char *bytes, int size)
3891{
3892 unsigned char *s = (unsigned char *)bytes;
3893 size_t x = 0;
3894
3895 assert(size == 4);
3896
3897 x = (size_t) s[0];
3898 x |= (size_t) s[1] << 8;
3899 x |= (size_t) s[2] << 16;
3900 x |= (size_t) s[3] << 24;
3901
3902 if (x > PY_SSIZE_T_MAX)
3903 return -1;
3904 else
3905 return (Py_ssize_t) x;
3906}
3907
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003908/* s contains x bytes of a little-endian integer. Return its value as a
3909 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
3910 * int, but when x is 4 it's a signed one. This is an historical source
3911 * of x-platform bugs.
3912 */
3913static long
3914calc_binint(char *bytes, int size)
3915{
3916 unsigned char *s = (unsigned char *)bytes;
3917 int i = size;
3918 long x = 0;
3919
3920 for (i = 0; i < size; i++) {
3921 x |= (long)s[i] << (i * 8);
3922 }
3923
3924 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
3925 * is signed, so on a box with longs bigger than 4 bytes we need
3926 * to extend a BININT's sign bit to the full width.
3927 */
3928 if (SIZEOF_LONG > 4 && size == 4) {
3929 x |= -(x & (1L << 31));
3930 }
3931
3932 return x;
3933}
3934
3935static int
3936load_binintx(UnpicklerObject *self, char *s, int size)
3937{
3938 PyObject *value;
3939 long x;
3940
3941 x = calc_binint(s, size);
3942
3943 if ((value = PyLong_FromLong(x)) == NULL)
3944 return -1;
3945
3946 PDATA_PUSH(self->stack, value, -1);
3947 return 0;
3948}
3949
3950static int
3951load_binint(UnpicklerObject *self)
3952{
3953 char *s;
3954
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003955 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003956 return -1;
3957
3958 return load_binintx(self, s, 4);
3959}
3960
3961static int
3962load_binint1(UnpicklerObject *self)
3963{
3964 char *s;
3965
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003966 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003967 return -1;
3968
3969 return load_binintx(self, s, 1);
3970}
3971
3972static int
3973load_binint2(UnpicklerObject *self)
3974{
3975 char *s;
3976
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003977 if (_Unpickler_Read(self, &s, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003978 return -1;
3979
3980 return load_binintx(self, s, 2);
3981}
3982
3983static int
3984load_long(UnpicklerObject *self)
3985{
3986 PyObject *value;
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00003987 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003988 Py_ssize_t len;
3989
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003990 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003991 return -1;
3992 if (len < 2)
3993 return bad_readline();
3994
Mark Dickinson8dd05142009-01-20 20:43:58 +00003995 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
3996 the 'L' before calling PyLong_FromString. In order to maintain
3997 compatibility with Python 3.0.0, we don't actually *require*
3998 the 'L' to be present. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003999 if (s[len-2] == 'L')
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00004000 s[len-2] = '\0';
Alexandre Vassalottie4bccb72009-01-24 01:47:57 +00004001 /* XXX: Should the base argument explicitly set to 10? */
4002 value = PyLong_FromString(s, NULL, 0);
Mark Dickinson8dd05142009-01-20 20:43:58 +00004003 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004004 return -1;
4005
4006 PDATA_PUSH(self->stack, value, -1);
4007 return 0;
4008}
4009
4010/* 'size' bytes contain the # of bytes of little-endian 256's-complement
4011 * data following.
4012 */
4013static int
4014load_counted_long(UnpicklerObject *self, int size)
4015{
4016 PyObject *value;
4017 char *nbytes;
4018 char *pdata;
4019
4020 assert(size == 1 || size == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004021 if (_Unpickler_Read(self, &nbytes, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004022 return -1;
4023
4024 size = calc_binint(nbytes, size);
4025 if (size < 0) {
4026 /* Corrupt or hostile pickle -- we never write one like this */
4027 PyErr_SetString(UnpicklingError,
4028 "LONG pickle has negative byte count");
4029 return -1;
4030 }
4031
4032 if (size == 0)
4033 value = PyLong_FromLong(0L);
4034 else {
4035 /* Read the raw little-endian bytes and convert. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004036 if (_Unpickler_Read(self, &pdata, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004037 return -1;
4038 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
4039 1 /* little endian */ , 1 /* signed */ );
4040 }
4041 if (value == NULL)
4042 return -1;
4043 PDATA_PUSH(self->stack, value, -1);
4044 return 0;
4045}
4046
4047static int
4048load_float(UnpicklerObject *self)
4049{
4050 PyObject *value;
4051 char *endptr, *s;
4052 Py_ssize_t len;
4053 double d;
4054
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004055 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004056 return -1;
4057 if (len < 2)
4058 return bad_readline();
4059
4060 errno = 0;
Mark Dickinson725bfd82009-05-03 20:33:40 +00004061 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
4062 if (d == -1.0 && PyErr_Occurred())
4063 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004064 if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004065 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
4066 return -1;
4067 }
Mark Dickinson725bfd82009-05-03 20:33:40 +00004068 value = PyFloat_FromDouble(d);
4069 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004070 return -1;
4071
4072 PDATA_PUSH(self->stack, value, -1);
4073 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004074}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004075
4076static int
4077load_binfloat(UnpicklerObject *self)
4078{
4079 PyObject *value;
4080 double x;
4081 char *s;
4082
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004083 if (_Unpickler_Read(self, &s, 8) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004084 return -1;
4085
4086 x = _PyFloat_Unpack8((unsigned char *)s, 0);
4087 if (x == -1.0 && PyErr_Occurred())
4088 return -1;
4089
4090 if ((value = PyFloat_FromDouble(x)) == NULL)
4091 return -1;
4092
4093 PDATA_PUSH(self->stack, value, -1);
4094 return 0;
4095}
4096
4097static int
4098load_string(UnpicklerObject *self)
4099{
4100 PyObject *bytes;
4101 PyObject *str = NULL;
4102 Py_ssize_t len;
4103 char *s, *p;
4104
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004105 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004106 return -1;
4107 if (len < 3)
4108 return bad_readline();
4109 if ((s = strdup(s)) == NULL) {
4110 PyErr_NoMemory();
4111 return -1;
4112 }
4113
4114 /* Strip outermost quotes */
4115 while (s[len - 1] <= ' ')
4116 len--;
4117 if (s[0] == '"' && s[len - 1] == '"') {
4118 s[len - 1] = '\0';
4119 p = s + 1;
4120 len -= 2;
4121 }
4122 else if (s[0] == '\'' && s[len - 1] == '\'') {
4123 s[len - 1] = '\0';
4124 p = s + 1;
4125 len -= 2;
4126 }
4127 else {
4128 free(s);
4129 PyErr_SetString(PyExc_ValueError, "insecure string pickle");
4130 return -1;
4131 }
4132
4133 /* Use the PyBytes API to decode the string, since that is what is used
4134 to encode, and then coerce the result to Unicode. */
4135 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
4136 free(s);
4137 if (bytes == NULL)
4138 return -1;
4139 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
4140 Py_DECREF(bytes);
4141 if (str == NULL)
4142 return -1;
4143
4144 PDATA_PUSH(self->stack, str, -1);
4145 return 0;
4146}
4147
4148static int
4149load_binbytes(UnpicklerObject *self)
4150{
4151 PyObject *bytes;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004152 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004153 char *s;
4154
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004155 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004156 return -1;
4157
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004158 x = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004159 if (x < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004160 PyErr_Format(PyExc_OverflowError,
4161 "BINBYTES exceeds system's maximum size of %zd bytes",
4162 PY_SSIZE_T_MAX
4163 );
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004164 return -1;
4165 }
4166
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004167 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004168 return -1;
4169 bytes = PyBytes_FromStringAndSize(s, x);
4170 if (bytes == NULL)
4171 return -1;
4172
4173 PDATA_PUSH(self->stack, bytes, -1);
4174 return 0;
4175}
4176
4177static int
4178load_short_binbytes(UnpicklerObject *self)
4179{
4180 PyObject *bytes;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004181 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004182 char *s;
4183
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004184 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004185 return -1;
4186
4187 x = (unsigned char)s[0];
4188
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004189 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004190 return -1;
4191
4192 bytes = PyBytes_FromStringAndSize(s, x);
4193 if (bytes == NULL)
4194 return -1;
4195
4196 PDATA_PUSH(self->stack, bytes, -1);
4197 return 0;
4198}
4199
4200static int
4201load_binstring(UnpicklerObject *self)
4202{
4203 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004204 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004205 char *s;
4206
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004207 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004208 return -1;
4209
4210 x = calc_binint(s, 4);
4211 if (x < 0) {
4212 PyErr_SetString(UnpicklingError,
4213 "BINSTRING pickle has negative byte count");
4214 return -1;
4215 }
4216
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004217 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004218 return -1;
4219
4220 /* Convert Python 2.x strings to unicode. */
4221 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4222 if (str == NULL)
4223 return -1;
4224
4225 PDATA_PUSH(self->stack, str, -1);
4226 return 0;
4227}
4228
4229static int
4230load_short_binstring(UnpicklerObject *self)
4231{
4232 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004233 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004234 char *s;
4235
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004236 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004237 return -1;
4238
4239 x = (unsigned char)s[0];
4240
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004241 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004242 return -1;
4243
4244 /* Convert Python 2.x strings to unicode. */
4245 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4246 if (str == NULL)
4247 return -1;
4248
4249 PDATA_PUSH(self->stack, str, -1);
4250 return 0;
4251}
4252
4253static int
4254load_unicode(UnpicklerObject *self)
4255{
4256 PyObject *str;
4257 Py_ssize_t len;
4258 char *s;
4259
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004260 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004261 return -1;
4262 if (len < 1)
4263 return bad_readline();
4264
4265 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
4266 if (str == NULL)
4267 return -1;
4268
4269 PDATA_PUSH(self->stack, str, -1);
4270 return 0;
4271}
4272
4273static int
4274load_binunicode(UnpicklerObject *self)
4275{
4276 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004277 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004278 char *s;
4279
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004280 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004281 return -1;
4282
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004283 size = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004284 if (size < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004285 PyErr_Format(PyExc_OverflowError,
4286 "BINUNICODE exceeds system's maximum size of %zd bytes",
4287 PY_SSIZE_T_MAX
4288 );
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004289 return -1;
4290 }
4291
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004292
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004293 if (_Unpickler_Read(self, &s, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004294 return -1;
4295
Victor Stinner485fb562010-04-13 11:07:24 +00004296 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004297 if (str == NULL)
4298 return -1;
4299
4300 PDATA_PUSH(self->stack, str, -1);
4301 return 0;
4302}
4303
4304static int
4305load_tuple(UnpicklerObject *self)
4306{
4307 PyObject *tuple;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004308 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004309
4310 if ((i = marker(self)) < 0)
4311 return -1;
4312
4313 tuple = Pdata_poptuple(self->stack, i);
4314 if (tuple == NULL)
4315 return -1;
4316 PDATA_PUSH(self->stack, tuple, -1);
4317 return 0;
4318}
4319
4320static int
4321load_counted_tuple(UnpicklerObject *self, int len)
4322{
4323 PyObject *tuple;
4324
4325 tuple = PyTuple_New(len);
4326 if (tuple == NULL)
4327 return -1;
4328
4329 while (--len >= 0) {
4330 PyObject *item;
4331
4332 PDATA_POP(self->stack, item);
4333 if (item == NULL)
4334 return -1;
4335 PyTuple_SET_ITEM(tuple, len, item);
4336 }
4337 PDATA_PUSH(self->stack, tuple, -1);
4338 return 0;
4339}
4340
4341static int
4342load_empty_list(UnpicklerObject *self)
4343{
4344 PyObject *list;
4345
4346 if ((list = PyList_New(0)) == NULL)
4347 return -1;
4348 PDATA_PUSH(self->stack, list, -1);
4349 return 0;
4350}
4351
4352static int
4353load_empty_dict(UnpicklerObject *self)
4354{
4355 PyObject *dict;
4356
4357 if ((dict = PyDict_New()) == NULL)
4358 return -1;
4359 PDATA_PUSH(self->stack, dict, -1);
4360 return 0;
4361}
4362
4363static int
4364load_list(UnpicklerObject *self)
4365{
4366 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004367 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004368
4369 if ((i = marker(self)) < 0)
4370 return -1;
4371
4372 list = Pdata_poplist(self->stack, i);
4373 if (list == NULL)
4374 return -1;
4375 PDATA_PUSH(self->stack, list, -1);
4376 return 0;
4377}
4378
4379static int
4380load_dict(UnpicklerObject *self)
4381{
4382 PyObject *dict, *key, *value;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004383 Py_ssize_t i, j, k;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004384
4385 if ((i = marker(self)) < 0)
4386 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004387 j = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004388
4389 if ((dict = PyDict_New()) == NULL)
4390 return -1;
4391
4392 for (k = i + 1; k < j; k += 2) {
4393 key = self->stack->data[k - 1];
4394 value = self->stack->data[k];
4395 if (PyDict_SetItem(dict, key, value) < 0) {
4396 Py_DECREF(dict);
4397 return -1;
4398 }
4399 }
4400 Pdata_clear(self->stack, i);
4401 PDATA_PUSH(self->stack, dict, -1);
4402 return 0;
4403}
4404
4405static PyObject *
4406instantiate(PyObject *cls, PyObject *args)
4407{
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004408 PyObject *result = NULL;
4409 /* Caller must assure args are a tuple. Normally, args come from
4410 Pdata_poptuple which packs objects from the top of the stack
4411 into a newly created tuple. */
4412 assert(PyTuple_Check(args));
4413 if (Py_SIZE(args) > 0 || !PyType_Check(cls) ||
4414 PyObject_HasAttrString(cls, "__getinitargs__")) {
4415 result = PyObject_CallObject(cls, args);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004416 }
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004417 else {
4418 result = PyObject_CallMethod(cls, "__new__", "O", cls);
4419 }
4420 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004421}
4422
4423static int
4424load_obj(UnpicklerObject *self)
4425{
4426 PyObject *cls, *args, *obj = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004427 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004428
4429 if ((i = marker(self)) < 0)
4430 return -1;
4431
4432 args = Pdata_poptuple(self->stack, i + 1);
4433 if (args == NULL)
4434 return -1;
4435
4436 PDATA_POP(self->stack, cls);
4437 if (cls) {
4438 obj = instantiate(cls, args);
4439 Py_DECREF(cls);
4440 }
4441 Py_DECREF(args);
4442 if (obj == NULL)
4443 return -1;
4444
4445 PDATA_PUSH(self->stack, obj, -1);
4446 return 0;
4447}
4448
4449static int
4450load_inst(UnpicklerObject *self)
4451{
4452 PyObject *cls = NULL;
4453 PyObject *args = NULL;
4454 PyObject *obj = NULL;
4455 PyObject *module_name;
4456 PyObject *class_name;
4457 Py_ssize_t len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004458 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004459 char *s;
4460
4461 if ((i = marker(self)) < 0)
4462 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004463 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004464 return -1;
4465 if (len < 2)
4466 return bad_readline();
4467
4468 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
4469 identifiers are permitted in Python 3.0, since the INST opcode is only
4470 supported by older protocols on Python 2.x. */
4471 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
4472 if (module_name == NULL)
4473 return -1;
4474
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004475 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004476 if (len < 2)
4477 return bad_readline();
4478 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004479 if (class_name != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004480 cls = find_class(self, module_name, class_name);
4481 Py_DECREF(class_name);
4482 }
4483 }
4484 Py_DECREF(module_name);
4485
4486 if (cls == NULL)
4487 return -1;
4488
4489 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
4490 obj = instantiate(cls, args);
4491 Py_DECREF(args);
4492 }
4493 Py_DECREF(cls);
4494
4495 if (obj == NULL)
4496 return -1;
4497
4498 PDATA_PUSH(self->stack, obj, -1);
4499 return 0;
4500}
4501
4502static int
4503load_newobj(UnpicklerObject *self)
4504{
4505 PyObject *args = NULL;
4506 PyObject *clsraw = NULL;
4507 PyTypeObject *cls; /* clsraw cast to its true type */
4508 PyObject *obj;
4509
4510 /* Stack is ... cls argtuple, and we want to call
4511 * cls.__new__(cls, *argtuple).
4512 */
4513 PDATA_POP(self->stack, args);
4514 if (args == NULL)
4515 goto error;
4516 if (!PyTuple_Check(args)) {
4517 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
4518 goto error;
4519 }
4520
4521 PDATA_POP(self->stack, clsraw);
4522 cls = (PyTypeObject *)clsraw;
4523 if (cls == NULL)
4524 goto error;
4525 if (!PyType_Check(cls)) {
4526 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4527 "isn't a type object");
4528 goto error;
4529 }
4530 if (cls->tp_new == NULL) {
4531 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4532 "has NULL tp_new");
4533 goto error;
4534 }
4535
4536 /* Call __new__. */
4537 obj = cls->tp_new(cls, args, NULL);
4538 if (obj == NULL)
4539 goto error;
4540
4541 Py_DECREF(args);
4542 Py_DECREF(clsraw);
4543 PDATA_PUSH(self->stack, obj, -1);
4544 return 0;
4545
4546 error:
4547 Py_XDECREF(args);
4548 Py_XDECREF(clsraw);
4549 return -1;
4550}
4551
4552static int
4553load_global(UnpicklerObject *self)
4554{
4555 PyObject *global = NULL;
4556 PyObject *module_name;
4557 PyObject *global_name;
4558 Py_ssize_t len;
4559 char *s;
4560
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004561 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004562 return -1;
4563 if (len < 2)
4564 return bad_readline();
4565 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4566 if (!module_name)
4567 return -1;
4568
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004569 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004570 if (len < 2) {
4571 Py_DECREF(module_name);
4572 return bad_readline();
4573 }
4574 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4575 if (global_name) {
4576 global = find_class(self, module_name, global_name);
4577 Py_DECREF(global_name);
4578 }
4579 }
4580 Py_DECREF(module_name);
4581
4582 if (global == NULL)
4583 return -1;
4584 PDATA_PUSH(self->stack, global, -1);
4585 return 0;
4586}
4587
4588static int
4589load_persid(UnpicklerObject *self)
4590{
4591 PyObject *pid;
4592 Py_ssize_t len;
4593 char *s;
4594
4595 if (self->pers_func) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004596 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004597 return -1;
4598 if (len < 2)
4599 return bad_readline();
4600
4601 pid = PyBytes_FromStringAndSize(s, len - 1);
4602 if (pid == NULL)
4603 return -1;
4604
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004605 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004606 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004607 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004608 if (pid == NULL)
4609 return -1;
4610
4611 PDATA_PUSH(self->stack, pid, -1);
4612 return 0;
4613 }
4614 else {
4615 PyErr_SetString(UnpicklingError,
4616 "A load persistent id instruction was encountered,\n"
4617 "but no persistent_load function was specified.");
4618 return -1;
4619 }
4620}
4621
4622static int
4623load_binpersid(UnpicklerObject *self)
4624{
4625 PyObject *pid;
4626
4627 if (self->pers_func) {
4628 PDATA_POP(self->stack, pid);
4629 if (pid == NULL)
4630 return -1;
4631
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004632 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004633 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004634 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004635 if (pid == NULL)
4636 return -1;
4637
4638 PDATA_PUSH(self->stack, pid, -1);
4639 return 0;
4640 }
4641 else {
4642 PyErr_SetString(UnpicklingError,
4643 "A load persistent id instruction was encountered,\n"
4644 "but no persistent_load function was specified.");
4645 return -1;
4646 }
4647}
4648
4649static int
4650load_pop(UnpicklerObject *self)
4651{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004652 Py_ssize_t len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004653
4654 /* Note that we split the (pickle.py) stack into two stacks,
4655 * an object stack and a mark stack. We have to be clever and
4656 * pop the right one. We do this by looking at the top of the
Collin Winter8ca69de2009-05-26 16:53:41 +00004657 * mark stack first, and only signalling a stack underflow if
4658 * the object stack is empty and the mark stack doesn't match
4659 * our expectations.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004660 */
Collin Winter8ca69de2009-05-26 16:53:41 +00004661 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004662 self->num_marks--;
Antoine Pitrou01a15ea2010-01-07 17:57:31 +00004663 } else if (len > 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004664 len--;
4665 Py_DECREF(self->stack->data[len]);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004666 Py_SIZE(self->stack) = len;
Collin Winter8ca69de2009-05-26 16:53:41 +00004667 } else {
4668 return stack_underflow();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004669 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004670 return 0;
4671}
4672
4673static int
4674load_pop_mark(UnpicklerObject *self)
4675{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004676 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004677
4678 if ((i = marker(self)) < 0)
4679 return -1;
4680
4681 Pdata_clear(self->stack, i);
4682
4683 return 0;
4684}
4685
4686static int
4687load_dup(UnpicklerObject *self)
4688{
4689 PyObject *last;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004690 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004691
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004692 if ((len = Py_SIZE(self->stack)) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004693 return stack_underflow();
4694 last = self->stack->data[len - 1];
4695 PDATA_APPEND(self->stack, last, -1);
4696 return 0;
4697}
4698
4699static int
4700load_get(UnpicklerObject *self)
4701{
4702 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004703 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004704 Py_ssize_t len;
4705 char *s;
4706
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004707 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004708 return -1;
4709 if (len < 2)
4710 return bad_readline();
4711
4712 key = PyLong_FromString(s, NULL, 10);
4713 if (key == NULL)
4714 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004715 idx = PyLong_AsSsize_t(key);
4716 if (idx == -1 && PyErr_Occurred()) {
4717 Py_DECREF(key);
4718 return -1;
4719 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004720
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004721 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004722 if (value == NULL) {
4723 if (!PyErr_Occurred())
4724 PyErr_SetObject(PyExc_KeyError, key);
4725 Py_DECREF(key);
4726 return -1;
4727 }
4728 Py_DECREF(key);
4729
4730 PDATA_APPEND(self->stack, value, -1);
4731 return 0;
4732}
4733
4734static int
4735load_binget(UnpicklerObject *self)
4736{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004737 PyObject *value;
4738 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004739 char *s;
4740
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004741 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004742 return -1;
4743
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004744 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004745
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004746 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004747 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004748 PyObject *key = PyLong_FromSsize_t(idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004749 if (!PyErr_Occurred())
4750 PyErr_SetObject(PyExc_KeyError, key);
4751 Py_DECREF(key);
4752 return -1;
4753 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004754
4755 PDATA_APPEND(self->stack, value, -1);
4756 return 0;
4757}
4758
4759static int
4760load_long_binget(UnpicklerObject *self)
4761{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004762 PyObject *value;
4763 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004764 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004765
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004766 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004767 return -1;
4768
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004769 idx = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004770
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004771 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004772 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004773 PyObject *key = PyLong_FromSsize_t(idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004774 if (!PyErr_Occurred())
4775 PyErr_SetObject(PyExc_KeyError, key);
4776 Py_DECREF(key);
4777 return -1;
4778 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004779
4780 PDATA_APPEND(self->stack, value, -1);
4781 return 0;
4782}
4783
4784/* Push an object from the extension registry (EXT[124]). nbytes is
4785 * the number of bytes following the opcode, holding the index (code) value.
4786 */
4787static int
4788load_extension(UnpicklerObject *self, int nbytes)
4789{
4790 char *codebytes; /* the nbytes bytes after the opcode */
4791 long code; /* calc_binint returns long */
4792 PyObject *py_code; /* code as a Python int */
4793 PyObject *obj; /* the object to push */
4794 PyObject *pair; /* (module_name, class_name) */
4795 PyObject *module_name, *class_name;
4796
4797 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004798 if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004799 return -1;
4800 code = calc_binint(codebytes, nbytes);
4801 if (code <= 0) { /* note that 0 is forbidden */
4802 /* Corrupt or hostile pickle. */
4803 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
4804 return -1;
4805 }
4806
4807 /* Look for the code in the cache. */
4808 py_code = PyLong_FromLong(code);
4809 if (py_code == NULL)
4810 return -1;
4811 obj = PyDict_GetItem(extension_cache, py_code);
4812 if (obj != NULL) {
4813 /* Bingo. */
4814 Py_DECREF(py_code);
4815 PDATA_APPEND(self->stack, obj, -1);
4816 return 0;
4817 }
4818
4819 /* Look up the (module_name, class_name) pair. */
4820 pair = PyDict_GetItem(inverted_registry, py_code);
4821 if (pair == NULL) {
4822 Py_DECREF(py_code);
4823 PyErr_Format(PyExc_ValueError, "unregistered extension "
4824 "code %ld", code);
4825 return -1;
4826 }
4827 /* Since the extension registry is manipulable via Python code,
4828 * confirm that pair is really a 2-tuple of strings.
4829 */
4830 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
4831 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
4832 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
4833 Py_DECREF(py_code);
4834 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
4835 "isn't a 2-tuple of strings", code);
4836 return -1;
4837 }
4838 /* Load the object. */
4839 obj = find_class(self, module_name, class_name);
4840 if (obj == NULL) {
4841 Py_DECREF(py_code);
4842 return -1;
4843 }
4844 /* Cache code -> obj. */
4845 code = PyDict_SetItem(extension_cache, py_code, obj);
4846 Py_DECREF(py_code);
4847 if (code < 0) {
4848 Py_DECREF(obj);
4849 return -1;
4850 }
4851 PDATA_PUSH(self->stack, obj, -1);
4852 return 0;
4853}
4854
4855static int
4856load_put(UnpicklerObject *self)
4857{
4858 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004859 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004860 Py_ssize_t len;
4861 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004862
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004863 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004864 return -1;
4865 if (len < 2)
4866 return bad_readline();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004867 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004868 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004869 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004870
4871 key = PyLong_FromString(s, NULL, 10);
4872 if (key == NULL)
4873 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004874 idx = PyLong_AsSsize_t(key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004875 Py_DECREF(key);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004876 if (idx < 0) {
4877 if (!PyErr_Occurred())
4878 PyErr_SetString(PyExc_ValueError,
4879 "negative PUT argument");
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004880 return -1;
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004881 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004882
4883 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004884}
4885
4886static int
4887load_binput(UnpicklerObject *self)
4888{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004889 PyObject *value;
4890 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004891 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004892
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004893 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004894 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004895
4896 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004897 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004898 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004899
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004900 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004901
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004902 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004903}
4904
4905static int
4906load_long_binput(UnpicklerObject *self)
4907{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004908 PyObject *value;
4909 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004910 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004911
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004912 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004913 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004914
4915 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004916 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004917 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004918
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004919 idx = calc_binsize(s, 4);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004920 if (idx < 0) {
4921 PyErr_SetString(PyExc_ValueError,
4922 "negative LONG_BINPUT argument");
4923 return -1;
4924 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004925
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004926 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004927}
4928
4929static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004930do_append(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004931{
4932 PyObject *value;
4933 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004934 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004935
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004936 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004937 if (x > len || x <= 0)
4938 return stack_underflow();
4939 if (len == x) /* nothing to do */
4940 return 0;
4941
4942 list = self->stack->data[x - 1];
4943
4944 if (PyList_Check(list)) {
4945 PyObject *slice;
4946 Py_ssize_t list_len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004947 int ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004948
4949 slice = Pdata_poplist(self->stack, x);
4950 if (!slice)
4951 return -1;
4952 list_len = PyList_GET_SIZE(list);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004953 ret = PyList_SetSlice(list, list_len, list_len, slice);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004954 Py_DECREF(slice);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004955 return ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004956 }
4957 else {
4958 PyObject *append_func;
4959
4960 append_func = PyObject_GetAttrString(list, "append");
4961 if (append_func == NULL)
4962 return -1;
4963 for (i = x; i < len; i++) {
4964 PyObject *result;
4965
4966 value = self->stack->data[i];
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004967 result = _Unpickler_FastCall(self, append_func, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004968 if (result == NULL) {
4969 Pdata_clear(self->stack, i + 1);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004970 Py_SIZE(self->stack) = x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004971 return -1;
4972 }
4973 Py_DECREF(result);
4974 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004975 Py_SIZE(self->stack) = x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004976 }
4977
4978 return 0;
4979}
4980
4981static int
4982load_append(UnpicklerObject *self)
4983{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004984 return do_append(self, Py_SIZE(self->stack) - 1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004985}
4986
4987static int
4988load_appends(UnpicklerObject *self)
4989{
4990 return do_append(self, marker(self));
4991}
4992
4993static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004994do_setitems(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004995{
4996 PyObject *value, *key;
4997 PyObject *dict;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004998 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004999 int status = 0;
5000
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005001 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005002 if (x > len || x <= 0)
5003 return stack_underflow();
5004 if (len == x) /* nothing to do */
5005 return 0;
5006 if ((len - x) % 2 != 0) {
5007 /* Currupt or hostile pickle -- we never write one like this. */
5008 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
5009 return -1;
5010 }
5011
5012 /* Here, dict does not actually need to be a PyDict; it could be anything
5013 that supports the __setitem__ attribute. */
5014 dict = self->stack->data[x - 1];
5015
5016 for (i = x + 1; i < len; i += 2) {
5017 key = self->stack->data[i - 1];
5018 value = self->stack->data[i];
5019 if (PyObject_SetItem(dict, key, value) < 0) {
5020 status = -1;
5021 break;
5022 }
5023 }
5024
5025 Pdata_clear(self->stack, x);
5026 return status;
5027}
5028
5029static int
5030load_setitem(UnpicklerObject *self)
5031{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005032 return do_setitems(self, Py_SIZE(self->stack) - 2);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005033}
5034
5035static int
5036load_setitems(UnpicklerObject *self)
5037{
5038 return do_setitems(self, marker(self));
5039}
5040
5041static int
5042load_build(UnpicklerObject *self)
5043{
5044 PyObject *state, *inst, *slotstate;
5045 PyObject *setstate;
5046 int status = 0;
5047
5048 /* Stack is ... instance, state. We want to leave instance at
5049 * the stack top, possibly mutated via instance.__setstate__(state).
5050 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005051 if (Py_SIZE(self->stack) < 2)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005052 return stack_underflow();
5053
5054 PDATA_POP(self->stack, state);
5055 if (state == NULL)
5056 return -1;
5057
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005058 inst = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005059
5060 setstate = PyObject_GetAttrString(inst, "__setstate__");
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005061 if (setstate == NULL) {
5062 if (PyErr_ExceptionMatches(PyExc_AttributeError))
5063 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00005064 else {
5065 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005066 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00005067 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005068 }
5069 else {
5070 PyObject *result;
5071
5072 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005073 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Antoine Pitroud79dc622008-09-05 00:03:33 +00005074 reference to state first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005075 result = _Unpickler_FastCall(self, setstate, state);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005076 Py_DECREF(setstate);
5077 if (result == NULL)
5078 return -1;
5079 Py_DECREF(result);
5080 return 0;
5081 }
5082
5083 /* A default __setstate__. First see whether state embeds a
5084 * slot state dict too (a proto 2 addition).
5085 */
5086 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
5087 PyObject *tmp = state;
5088
5089 state = PyTuple_GET_ITEM(tmp, 0);
5090 slotstate = PyTuple_GET_ITEM(tmp, 1);
5091 Py_INCREF(state);
5092 Py_INCREF(slotstate);
5093 Py_DECREF(tmp);
5094 }
5095 else
5096 slotstate = NULL;
5097
5098 /* Set inst.__dict__ from the state dict (if any). */
5099 if (state != Py_None) {
5100 PyObject *dict;
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005101 PyObject *d_key, *d_value;
5102 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005103
5104 if (!PyDict_Check(state)) {
5105 PyErr_SetString(UnpicklingError, "state is not a dictionary");
5106 goto error;
5107 }
5108 dict = PyObject_GetAttrString(inst, "__dict__");
5109 if (dict == NULL)
5110 goto error;
5111
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005112 i = 0;
5113 while (PyDict_Next(state, &i, &d_key, &d_value)) {
5114 /* normally the keys for instance attributes are
5115 interned. we should try to do that here. */
5116 Py_INCREF(d_key);
5117 if (PyUnicode_CheckExact(d_key))
5118 PyUnicode_InternInPlace(&d_key);
5119 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
5120 Py_DECREF(d_key);
5121 goto error;
5122 }
5123 Py_DECREF(d_key);
5124 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005125 Py_DECREF(dict);
5126 }
5127
5128 /* Also set instance attributes from the slotstate dict (if any). */
5129 if (slotstate != NULL) {
5130 PyObject *d_key, *d_value;
5131 Py_ssize_t i;
5132
5133 if (!PyDict_Check(slotstate)) {
5134 PyErr_SetString(UnpicklingError,
5135 "slot state is not a dictionary");
5136 goto error;
5137 }
5138 i = 0;
5139 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
5140 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
5141 goto error;
5142 }
5143 }
5144
5145 if (0) {
5146 error:
5147 status = -1;
5148 }
5149
5150 Py_DECREF(state);
5151 Py_XDECREF(slotstate);
5152 return status;
5153}
5154
5155static int
5156load_mark(UnpicklerObject *self)
5157{
5158
5159 /* Note that we split the (pickle.py) stack into two stacks, an
5160 * object stack and a mark stack. Here we push a mark onto the
5161 * mark stack.
5162 */
5163
5164 if ((self->num_marks + 1) >= self->marks_size) {
5165 size_t alloc;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005166 Py_ssize_t *marks;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005167
5168 /* Use the size_t type to check for overflow. */
5169 alloc = ((size_t)self->num_marks << 1) + 20;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005170 if (alloc > (PY_SSIZE_T_MAX / sizeof(Py_ssize_t)) ||
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00005171 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005172 PyErr_NoMemory();
5173 return -1;
5174 }
5175
5176 if (self->marks == NULL)
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005177 marks = (Py_ssize_t *) PyMem_Malloc(alloc * sizeof(Py_ssize_t));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005178 else
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005179 marks = (Py_ssize_t *) PyMem_Realloc(self->marks,
5180 alloc * sizeof(Py_ssize_t));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005181 if (marks == NULL) {
5182 PyErr_NoMemory();
5183 return -1;
5184 }
5185 self->marks = marks;
5186 self->marks_size = (Py_ssize_t)alloc;
5187 }
5188
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005189 self->marks[self->num_marks++] = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005190
5191 return 0;
5192}
5193
5194static int
5195load_reduce(UnpicklerObject *self)
5196{
5197 PyObject *callable = NULL;
5198 PyObject *argtup = NULL;
5199 PyObject *obj = NULL;
5200
5201 PDATA_POP(self->stack, argtup);
5202 if (argtup == NULL)
5203 return -1;
5204 PDATA_POP(self->stack, callable);
5205 if (callable) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005206 obj = PyObject_CallObject(callable, argtup);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005207 Py_DECREF(callable);
5208 }
5209 Py_DECREF(argtup);
5210
5211 if (obj == NULL)
5212 return -1;
5213
5214 PDATA_PUSH(self->stack, obj, -1);
5215 return 0;
5216}
5217
5218/* Just raises an error if we don't know the protocol specified. PROTO
5219 * is the first opcode for protocols >= 2.
5220 */
5221static int
5222load_proto(UnpicklerObject *self)
5223{
5224 char *s;
5225 int i;
5226
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005227 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005228 return -1;
5229
5230 i = (unsigned char)s[0];
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005231 if (i <= HIGHEST_PROTOCOL) {
5232 self->proto = i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005233 return 0;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005234 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005235
5236 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
5237 return -1;
5238}
5239
5240static PyObject *
5241load(UnpicklerObject *self)
5242{
5243 PyObject *err;
5244 PyObject *value = NULL;
5245 char *s;
5246
5247 self->num_marks = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005248 if (Py_SIZE(self->stack))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005249 Pdata_clear(self->stack, 0);
5250
5251 /* Convenient macros for the dispatch while-switch loop just below. */
5252#define OP(opcode, load_func) \
5253 case opcode: if (load_func(self) < 0) break; continue;
5254
5255#define OP_ARG(opcode, load_func, arg) \
5256 case opcode: if (load_func(self, (arg)) < 0) break; continue;
5257
5258 while (1) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005259 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005260 break;
5261
5262 switch ((enum opcode)s[0]) {
5263 OP(NONE, load_none)
5264 OP(BININT, load_binint)
5265 OP(BININT1, load_binint1)
5266 OP(BININT2, load_binint2)
5267 OP(INT, load_int)
5268 OP(LONG, load_long)
5269 OP_ARG(LONG1, load_counted_long, 1)
5270 OP_ARG(LONG4, load_counted_long, 4)
5271 OP(FLOAT, load_float)
5272 OP(BINFLOAT, load_binfloat)
5273 OP(BINBYTES, load_binbytes)
5274 OP(SHORT_BINBYTES, load_short_binbytes)
5275 OP(BINSTRING, load_binstring)
5276 OP(SHORT_BINSTRING, load_short_binstring)
5277 OP(STRING, load_string)
5278 OP(UNICODE, load_unicode)
5279 OP(BINUNICODE, load_binunicode)
5280 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
5281 OP_ARG(TUPLE1, load_counted_tuple, 1)
5282 OP_ARG(TUPLE2, load_counted_tuple, 2)
5283 OP_ARG(TUPLE3, load_counted_tuple, 3)
5284 OP(TUPLE, load_tuple)
5285 OP(EMPTY_LIST, load_empty_list)
5286 OP(LIST, load_list)
5287 OP(EMPTY_DICT, load_empty_dict)
5288 OP(DICT, load_dict)
5289 OP(OBJ, load_obj)
5290 OP(INST, load_inst)
5291 OP(NEWOBJ, load_newobj)
5292 OP(GLOBAL, load_global)
5293 OP(APPEND, load_append)
5294 OP(APPENDS, load_appends)
5295 OP(BUILD, load_build)
5296 OP(DUP, load_dup)
5297 OP(BINGET, load_binget)
5298 OP(LONG_BINGET, load_long_binget)
5299 OP(GET, load_get)
5300 OP(MARK, load_mark)
5301 OP(BINPUT, load_binput)
5302 OP(LONG_BINPUT, load_long_binput)
5303 OP(PUT, load_put)
5304 OP(POP, load_pop)
5305 OP(POP_MARK, load_pop_mark)
5306 OP(SETITEM, load_setitem)
5307 OP(SETITEMS, load_setitems)
5308 OP(PERSID, load_persid)
5309 OP(BINPERSID, load_binpersid)
5310 OP(REDUCE, load_reduce)
5311 OP(PROTO, load_proto)
5312 OP_ARG(EXT1, load_extension, 1)
5313 OP_ARG(EXT2, load_extension, 2)
5314 OP_ARG(EXT4, load_extension, 4)
5315 OP_ARG(NEWTRUE, load_bool, Py_True)
5316 OP_ARG(NEWFALSE, load_bool, Py_False)
5317
5318 case STOP:
5319 break;
5320
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005321 default:
Benjamin Petersonadde86d2011-09-23 13:41:41 -04005322 if (s[0] == '\0')
5323 PyErr_SetNone(PyExc_EOFError);
5324 else
5325 PyErr_Format(UnpicklingError,
5326 "invalid load key, '%c'.", s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005327 return NULL;
5328 }
5329
5330 break; /* and we are done! */
5331 }
5332
Antoine Pitrou04248a82010-10-12 20:51:21 +00005333 if (_Unpickler_SkipConsumed(self) < 0)
5334 return NULL;
5335
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005336 /* XXX: It is not clear what this is actually for. */
5337 if ((err = PyErr_Occurred())) {
5338 if (err == PyExc_EOFError) {
5339 PyErr_SetNone(PyExc_EOFError);
5340 }
5341 return NULL;
5342 }
5343
5344 PDATA_POP(self->stack, value);
5345 return value;
5346}
5347
5348PyDoc_STRVAR(Unpickler_load_doc,
5349"load() -> object. Load a pickle."
5350"\n"
5351"Read a pickled object representation from the open file object given in\n"
5352"the constructor, and return the reconstituted object hierarchy specified\n"
5353"therein.\n");
5354
5355static PyObject *
5356Unpickler_load(UnpicklerObject *self)
5357{
5358 /* Check whether the Unpickler was initialized correctly. This prevents
5359 segfaulting if a subclass overridden __init__ with a function that does
5360 not call Unpickler.__init__(). Here, we simply ensure that self->read
5361 is not NULL. */
5362 if (self->read == NULL) {
5363 PyErr_Format(UnpicklingError,
5364 "Unpickler.__init__() was not called by %s.__init__()",
5365 Py_TYPE(self)->tp_name);
5366 return NULL;
5367 }
5368
5369 return load(self);
5370}
5371
5372/* The name of find_class() is misleading. In newer pickle protocols, this
5373 function is used for loading any global (i.e., functions), not just
5374 classes. The name is kept only for backward compatibility. */
5375
5376PyDoc_STRVAR(Unpickler_find_class_doc,
5377"find_class(module_name, global_name) -> object.\n"
5378"\n"
5379"Return an object from a specified module, importing the module if\n"
5380"necessary. Subclasses may override this method (e.g. to restrict\n"
5381"unpickling of arbitrary classes and functions).\n"
5382"\n"
5383"This method is called whenever a class or a function object is\n"
5384"needed. Both arguments passed are str objects.\n");
5385
5386static PyObject *
5387Unpickler_find_class(UnpicklerObject *self, PyObject *args)
5388{
5389 PyObject *global;
5390 PyObject *modules_dict;
5391 PyObject *module;
5392 PyObject *module_name, *global_name;
5393
5394 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
5395 &module_name, &global_name))
5396 return NULL;
5397
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005398 /* Try to map the old names used in Python 2.x to the new ones used in
5399 Python 3.x. We do this only with old pickle protocols and when the
5400 user has not disabled the feature. */
5401 if (self->proto < 3 && self->fix_imports) {
5402 PyObject *key;
5403 PyObject *item;
5404
5405 /* Check if the global (i.e., a function or a class) was renamed
5406 or moved to another module. */
5407 key = PyTuple_Pack(2, module_name, global_name);
5408 if (key == NULL)
5409 return NULL;
5410 item = PyDict_GetItemWithError(name_mapping_2to3, key);
5411 Py_DECREF(key);
5412 if (item) {
5413 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
5414 PyErr_Format(PyExc_RuntimeError,
5415 "_compat_pickle.NAME_MAPPING values should be "
5416 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
5417 return NULL;
5418 }
5419 module_name = PyTuple_GET_ITEM(item, 0);
5420 global_name = PyTuple_GET_ITEM(item, 1);
5421 if (!PyUnicode_Check(module_name) ||
5422 !PyUnicode_Check(global_name)) {
5423 PyErr_Format(PyExc_RuntimeError,
5424 "_compat_pickle.NAME_MAPPING values should be "
5425 "pairs of str, not (%.200s, %.200s)",
5426 Py_TYPE(module_name)->tp_name,
5427 Py_TYPE(global_name)->tp_name);
5428 return NULL;
5429 }
5430 }
5431 else if (PyErr_Occurred()) {
5432 return NULL;
5433 }
5434
5435 /* Check if the module was renamed. */
5436 item = PyDict_GetItemWithError(import_mapping_2to3, module_name);
5437 if (item) {
5438 if (!PyUnicode_Check(item)) {
5439 PyErr_Format(PyExc_RuntimeError,
5440 "_compat_pickle.IMPORT_MAPPING values should be "
5441 "strings, not %.200s", Py_TYPE(item)->tp_name);
5442 return NULL;
5443 }
5444 module_name = item;
5445 }
5446 else if (PyErr_Occurred()) {
5447 return NULL;
5448 }
5449 }
5450
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005451 modules_dict = PySys_GetObject("modules");
5452 if (modules_dict == NULL)
5453 return NULL;
5454
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005455 module = PyDict_GetItemWithError(modules_dict, module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005456 if (module == NULL) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005457 if (PyErr_Occurred())
5458 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005459 module = PyImport_Import(module_name);
5460 if (module == NULL)
5461 return NULL;
5462 global = PyObject_GetAttr(module, global_name);
5463 Py_DECREF(module);
5464 }
5465 else {
5466 global = PyObject_GetAttr(module, global_name);
5467 }
5468 return global;
5469}
5470
5471static struct PyMethodDef Unpickler_methods[] = {
5472 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
5473 Unpickler_load_doc},
5474 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
5475 Unpickler_find_class_doc},
5476 {NULL, NULL} /* sentinel */
5477};
5478
5479static void
5480Unpickler_dealloc(UnpicklerObject *self)
5481{
5482 PyObject_GC_UnTrack((PyObject *)self);
5483 Py_XDECREF(self->readline);
5484 Py_XDECREF(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005485 Py_XDECREF(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005486 Py_XDECREF(self->stack);
5487 Py_XDECREF(self->pers_func);
5488 Py_XDECREF(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005489 if (self->buffer.buf != NULL) {
5490 PyBuffer_Release(&self->buffer);
5491 self->buffer.buf = NULL;
5492 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005493
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005494 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005495 PyMem_Free(self->marks);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005496 PyMem_Free(self->input_line);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005497 free(self->encoding);
5498 free(self->errors);
5499
5500 Py_TYPE(self)->tp_free((PyObject *)self);
5501}
5502
5503static int
5504Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
5505{
5506 Py_VISIT(self->readline);
5507 Py_VISIT(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005508 Py_VISIT(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005509 Py_VISIT(self->stack);
5510 Py_VISIT(self->pers_func);
5511 Py_VISIT(self->arg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005512 return 0;
5513}
5514
5515static int
5516Unpickler_clear(UnpicklerObject *self)
5517{
5518 Py_CLEAR(self->readline);
5519 Py_CLEAR(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005520 Py_CLEAR(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005521 Py_CLEAR(self->stack);
5522 Py_CLEAR(self->pers_func);
5523 Py_CLEAR(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005524 if (self->buffer.buf != NULL) {
5525 PyBuffer_Release(&self->buffer);
5526 self->buffer.buf = NULL;
5527 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005528
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005529 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005530 PyMem_Free(self->marks);
5531 self->marks = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005532 PyMem_Free(self->input_line);
5533 self->input_line = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005534 free(self->encoding);
5535 self->encoding = NULL;
5536 free(self->errors);
5537 self->errors = NULL;
5538
5539 return 0;
5540}
5541
5542PyDoc_STRVAR(Unpickler_doc,
5543"Unpickler(file, *, encoding='ASCII', errors='strict')"
5544"\n"
5545"This takes a binary file for reading a pickle data stream.\n"
5546"\n"
5547"The protocol version of the pickle is detected automatically, so no\n"
5548"proto argument is needed.\n"
5549"\n"
5550"The file-like object must have two methods, a read() method\n"
5551"that takes an integer argument, and a readline() method that\n"
5552"requires no arguments. Both methods should return bytes.\n"
5553"Thus file-like object can be a binary file object opened for\n"
5554"reading, a BytesIO object, or any other custom object that\n"
5555"meets this interface.\n"
5556"\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005557"Optional keyword arguments are *fix_imports*, *encoding* and *errors*,\n"
5558"which are used to control compatiblity support for pickle stream\n"
5559"generated by Python 2.x. If *fix_imports* is True, pickle will try to\n"
5560"map the old Python 2.x names to the new names used in Python 3.x. The\n"
5561"*encoding* and *errors* tell pickle how to decode 8-bit string\n"
5562"instances pickled by Python 2.x; these default to 'ASCII' and\n"
5563"'strict', respectively.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005564
5565static int
5566Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
5567{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005568 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005569 PyObject *file;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005570 PyObject *fix_imports = Py_True;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005571 char *encoding = NULL;
5572 char *errors = NULL;
5573
5574 /* XXX: That is an horrible error message. But, I don't know how to do
5575 better... */
5576 if (Py_SIZE(args) != 1) {
5577 PyErr_Format(PyExc_TypeError,
5578 "%s takes exactly one positional argument (%zd given)",
5579 Py_TYPE(self)->tp_name, Py_SIZE(args));
5580 return -1;
5581 }
5582
5583 /* Arguments parsing needs to be done in the __init__() method to allow
5584 subclasses to define their own __init__() method, which may (or may
5585 not) support Unpickler arguments. However, this means we need to be
5586 extra careful in the other Unpickler methods, since a subclass could
5587 forget to call Unpickler.__init__() thus breaking our internal
5588 invariants. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005589 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:Unpickler", kwlist,
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005590 &file, &fix_imports, &encoding, &errors))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005591 return -1;
5592
5593 /* In case of multiple __init__() calls, clear previous content. */
5594 if (self->read != NULL)
5595 (void)Unpickler_clear(self);
5596
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005597 if (_Unpickler_SetInputStream(self, file) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005598 return -1;
5599
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005600 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005601 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005602
5603 self->fix_imports = PyObject_IsTrue(fix_imports);
5604 if (self->fix_imports == -1)
5605 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005606
5607 if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) {
5608 self->pers_func = PyObject_GetAttrString((PyObject *)self,
5609 "persistent_load");
5610 if (self->pers_func == NULL)
5611 return -1;
5612 }
5613 else {
5614 self->pers_func = NULL;
5615 }
5616
5617 self->stack = (Pdata *)Pdata_New();
5618 if (self->stack == NULL)
5619 return -1;
5620
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005621 self->memo_size = 32;
5622 self->memo = _Unpickler_NewMemo(self->memo_size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005623 if (self->memo == NULL)
5624 return -1;
5625
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005626 self->arg = NULL;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005627 self->proto = 0;
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005628
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005629 return 0;
5630}
5631
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005632/* Define a proxy object for the Unpickler's internal memo object. This is to
5633 * avoid breaking code like:
5634 * unpickler.memo.clear()
5635 * and
5636 * unpickler.memo = saved_memo
5637 * Is this a good idea? Not really, but we don't want to break code that uses
5638 * it. Note that we don't implement the entire mapping API here. This is
5639 * intentional, as these should be treated as black-box implementation details.
5640 *
5641 * We do, however, have to implement pickling/unpickling support because of
5642 * real-world code like cvs2svn.
5643 */
5644
5645typedef struct {
5646 PyObject_HEAD
5647 UnpicklerObject *unpickler;
5648} UnpicklerMemoProxyObject;
5649
5650PyDoc_STRVAR(ump_clear_doc,
5651"memo.clear() -> None. Remove all items from memo.");
5652
5653static PyObject *
5654ump_clear(UnpicklerMemoProxyObject *self)
5655{
5656 _Unpickler_MemoCleanup(self->unpickler);
5657 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
5658 if (self->unpickler->memo == NULL)
5659 return NULL;
5660 Py_RETURN_NONE;
5661}
5662
5663PyDoc_STRVAR(ump_copy_doc,
5664"memo.copy() -> new_memo. Copy the memo to a new object.");
5665
5666static PyObject *
5667ump_copy(UnpicklerMemoProxyObject *self)
5668{
5669 Py_ssize_t i;
5670 PyObject *new_memo = PyDict_New();
5671 if (new_memo == NULL)
5672 return NULL;
5673
5674 for (i = 0; i < self->unpickler->memo_size; i++) {
5675 int status;
5676 PyObject *key, *value;
5677
5678 value = self->unpickler->memo[i];
5679 if (value == NULL)
5680 continue;
5681
5682 key = PyLong_FromSsize_t(i);
5683 if (key == NULL)
5684 goto error;
5685 status = PyDict_SetItem(new_memo, key, value);
5686 Py_DECREF(key);
5687 if (status < 0)
5688 goto error;
5689 }
5690 return new_memo;
5691
5692error:
5693 Py_DECREF(new_memo);
5694 return NULL;
5695}
5696
5697PyDoc_STRVAR(ump_reduce_doc,
5698"memo.__reduce__(). Pickling support.");
5699
5700static PyObject *
5701ump_reduce(UnpicklerMemoProxyObject *self, PyObject *args)
5702{
5703 PyObject *reduce_value;
5704 PyObject *constructor_args;
5705 PyObject *contents = ump_copy(self);
5706 if (contents == NULL)
5707 return NULL;
5708
5709 reduce_value = PyTuple_New(2);
5710 if (reduce_value == NULL) {
5711 Py_DECREF(contents);
5712 return NULL;
5713 }
5714 constructor_args = PyTuple_New(1);
5715 if (constructor_args == NULL) {
5716 Py_DECREF(contents);
5717 Py_DECREF(reduce_value);
5718 return NULL;
5719 }
5720 PyTuple_SET_ITEM(constructor_args, 0, contents);
5721 Py_INCREF((PyObject *)&PyDict_Type);
5722 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
5723 PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
5724 return reduce_value;
5725}
5726
5727static PyMethodDef unpicklerproxy_methods[] = {
5728 {"clear", (PyCFunction)ump_clear, METH_NOARGS, ump_clear_doc},
5729 {"copy", (PyCFunction)ump_copy, METH_NOARGS, ump_copy_doc},
5730 {"__reduce__", (PyCFunction)ump_reduce, METH_VARARGS, ump_reduce_doc},
5731 {NULL, NULL} /* sentinel */
5732};
5733
5734static void
5735UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
5736{
5737 PyObject_GC_UnTrack(self);
5738 Py_XDECREF(self->unpickler);
5739 PyObject_GC_Del((PyObject *)self);
5740}
5741
5742static int
5743UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
5744 visitproc visit, void *arg)
5745{
5746 Py_VISIT(self->unpickler);
5747 return 0;
5748}
5749
5750static int
5751UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
5752{
5753 Py_CLEAR(self->unpickler);
5754 return 0;
5755}
5756
5757static PyTypeObject UnpicklerMemoProxyType = {
5758 PyVarObject_HEAD_INIT(NULL, 0)
5759 "_pickle.UnpicklerMemoProxy", /*tp_name*/
5760 sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/
5761 0,
5762 (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */
5763 0, /* tp_print */
5764 0, /* tp_getattr */
5765 0, /* tp_setattr */
5766 0, /* tp_compare */
5767 0, /* tp_repr */
5768 0, /* tp_as_number */
5769 0, /* tp_as_sequence */
5770 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00005771 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005772 0, /* tp_call */
5773 0, /* tp_str */
5774 PyObject_GenericGetAttr, /* tp_getattro */
5775 PyObject_GenericSetAttr, /* tp_setattro */
5776 0, /* tp_as_buffer */
5777 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5778 0, /* tp_doc */
5779 (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */
5780 (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */
5781 0, /* tp_richcompare */
5782 0, /* tp_weaklistoffset */
5783 0, /* tp_iter */
5784 0, /* tp_iternext */
5785 unpicklerproxy_methods, /* tp_methods */
5786};
5787
5788static PyObject *
5789UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
5790{
5791 UnpicklerMemoProxyObject *self;
5792
5793 self = PyObject_GC_New(UnpicklerMemoProxyObject,
5794 &UnpicklerMemoProxyType);
5795 if (self == NULL)
5796 return NULL;
5797 Py_INCREF(unpickler);
5798 self->unpickler = unpickler;
5799 PyObject_GC_Track(self);
5800 return (PyObject *)self;
5801}
5802
5803/*****************************************************************************/
5804
5805
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005806static PyObject *
5807Unpickler_get_memo(UnpicklerObject *self)
5808{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005809 return UnpicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005810}
5811
5812static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005813Unpickler_set_memo(UnpicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005814{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005815 PyObject **new_memo;
5816 Py_ssize_t new_memo_size = 0;
5817 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005818
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005819 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005820 PyErr_SetString(PyExc_TypeError,
5821 "attribute deletion is not supported");
5822 return -1;
5823 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005824
5825 if (Py_TYPE(obj) == &UnpicklerMemoProxyType) {
5826 UnpicklerObject *unpickler =
5827 ((UnpicklerMemoProxyObject *)obj)->unpickler;
5828
5829 new_memo_size = unpickler->memo_size;
5830 new_memo = _Unpickler_NewMemo(new_memo_size);
5831 if (new_memo == NULL)
5832 return -1;
5833
5834 for (i = 0; i < new_memo_size; i++) {
5835 Py_XINCREF(unpickler->memo[i]);
5836 new_memo[i] = unpickler->memo[i];
5837 }
5838 }
5839 else if (PyDict_Check(obj)) {
5840 Py_ssize_t i = 0;
5841 PyObject *key, *value;
5842
5843 new_memo_size = PyDict_Size(obj);
5844 new_memo = _Unpickler_NewMemo(new_memo_size);
5845 if (new_memo == NULL)
5846 return -1;
5847
5848 while (PyDict_Next(obj, &i, &key, &value)) {
5849 Py_ssize_t idx;
5850 if (!PyLong_Check(key)) {
5851 PyErr_SetString(PyExc_TypeError,
5852 "memo key must be integers");
5853 goto error;
5854 }
5855 idx = PyLong_AsSsize_t(key);
5856 if (idx == -1 && PyErr_Occurred())
5857 goto error;
5858 if (_Unpickler_MemoPut(self, idx, value) < 0)
5859 goto error;
5860 }
5861 }
5862 else {
5863 PyErr_Format(PyExc_TypeError,
5864 "'memo' attribute must be an UnpicklerMemoProxy object"
5865 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005866 return -1;
5867 }
5868
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005869 _Unpickler_MemoCleanup(self);
5870 self->memo_size = new_memo_size;
5871 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005872
5873 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005874
5875 error:
5876 if (new_memo_size) {
5877 i = new_memo_size;
5878 while (--i >= 0) {
5879 Py_XDECREF(new_memo[i]);
5880 }
5881 PyMem_FREE(new_memo);
5882 }
5883 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005884}
5885
5886static PyObject *
5887Unpickler_get_persload(UnpicklerObject *self)
5888{
5889 if (self->pers_func == NULL)
5890 PyErr_SetString(PyExc_AttributeError, "persistent_load");
5891 else
5892 Py_INCREF(self->pers_func);
5893 return self->pers_func;
5894}
5895
5896static int
5897Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
5898{
5899 PyObject *tmp;
5900
5901 if (value == NULL) {
5902 PyErr_SetString(PyExc_TypeError,
5903 "attribute deletion is not supported");
5904 return -1;
5905 }
5906 if (!PyCallable_Check(value)) {
5907 PyErr_SetString(PyExc_TypeError,
5908 "persistent_load must be a callable taking "
5909 "one argument");
5910 return -1;
5911 }
5912
5913 tmp = self->pers_func;
5914 Py_INCREF(value);
5915 self->pers_func = value;
5916 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
5917
5918 return 0;
5919}
5920
5921static PyGetSetDef Unpickler_getsets[] = {
5922 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
5923 {"persistent_load", (getter)Unpickler_get_persload,
5924 (setter)Unpickler_set_persload},
5925 {NULL}
5926};
5927
5928static PyTypeObject Unpickler_Type = {
5929 PyVarObject_HEAD_INIT(NULL, 0)
5930 "_pickle.Unpickler", /*tp_name*/
5931 sizeof(UnpicklerObject), /*tp_basicsize*/
5932 0, /*tp_itemsize*/
5933 (destructor)Unpickler_dealloc, /*tp_dealloc*/
5934 0, /*tp_print*/
5935 0, /*tp_getattr*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005936 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00005937 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005938 0, /*tp_repr*/
5939 0, /*tp_as_number*/
5940 0, /*tp_as_sequence*/
5941 0, /*tp_as_mapping*/
5942 0, /*tp_hash*/
5943 0, /*tp_call*/
5944 0, /*tp_str*/
5945 0, /*tp_getattro*/
5946 0, /*tp_setattro*/
5947 0, /*tp_as_buffer*/
5948 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5949 Unpickler_doc, /*tp_doc*/
5950 (traverseproc)Unpickler_traverse, /*tp_traverse*/
5951 (inquiry)Unpickler_clear, /*tp_clear*/
5952 0, /*tp_richcompare*/
5953 0, /*tp_weaklistoffset*/
5954 0, /*tp_iter*/
5955 0, /*tp_iternext*/
5956 Unpickler_methods, /*tp_methods*/
5957 0, /*tp_members*/
5958 Unpickler_getsets, /*tp_getset*/
5959 0, /*tp_base*/
5960 0, /*tp_dict*/
5961 0, /*tp_descr_get*/
5962 0, /*tp_descr_set*/
5963 0, /*tp_dictoffset*/
5964 (initproc)Unpickler_init, /*tp_init*/
5965 PyType_GenericAlloc, /*tp_alloc*/
5966 PyType_GenericNew, /*tp_new*/
5967 PyObject_GC_Del, /*tp_free*/
5968 0, /*tp_is_gc*/
5969};
5970
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005971PyDoc_STRVAR(pickle_dump_doc,
5972"dump(obj, file, protocol=None, *, fix_imports=True) -> None\n"
5973"\n"
5974"Write a pickled representation of obj to the open file object file. This\n"
5975"is equivalent to ``Pickler(file, protocol).dump(obj)``, but may be more\n"
5976"efficient.\n"
5977"\n"
5978"The optional protocol argument tells the pickler to use the given protocol;\n"
5979"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
5980"backward-incompatible protocol designed for Python 3.0.\n"
5981"\n"
5982"Specifying a negative protocol version selects the highest protocol version\n"
5983"supported. The higher the protocol used, the more recent the version of\n"
5984"Python needed to read the pickle produced.\n"
5985"\n"
5986"The file argument must have a write() method that accepts a single bytes\n"
5987"argument. It can thus be a file object opened for binary writing, a\n"
5988"io.BytesIO instance, or any other custom object that meets this interface.\n"
5989"\n"
5990"If fix_imports is True and protocol is less than 3, pickle will try to\n"
5991"map the new Python 3.x names to the old module names used in Python 2.x,\n"
5992"so that the pickle data stream is readable with Python 2.x.\n");
5993
5994static PyObject *
5995pickle_dump(PyObject *self, PyObject *args, PyObject *kwds)
5996{
5997 static char *kwlist[] = {"obj", "file", "protocol", "fix_imports", 0};
5998 PyObject *obj;
5999 PyObject *file;
6000 PyObject *proto = NULL;
6001 PyObject *fix_imports = Py_True;
6002 PicklerObject *pickler;
6003
6004 /* fix_imports is a keyword-only argument. */
6005 if (Py_SIZE(args) > 3) {
6006 PyErr_Format(PyExc_TypeError,
6007 "pickle.dump() takes at most 3 positional "
6008 "argument (%zd given)", Py_SIZE(args));
6009 return NULL;
6010 }
6011
6012 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|OO:dump", kwlist,
6013 &obj, &file, &proto, &fix_imports))
6014 return NULL;
6015
6016 pickler = _Pickler_New();
6017 if (pickler == NULL)
6018 return NULL;
6019
6020 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
6021 goto error;
6022
6023 if (_Pickler_SetOutputStream(pickler, file) < 0)
6024 goto error;
6025
6026 if (dump(pickler, obj) < 0)
6027 goto error;
6028
6029 if (_Pickler_FlushToFile(pickler) < 0)
6030 goto error;
6031
6032 Py_DECREF(pickler);
6033 Py_RETURN_NONE;
6034
6035 error:
6036 Py_XDECREF(pickler);
6037 return NULL;
6038}
6039
6040PyDoc_STRVAR(pickle_dumps_doc,
6041"dumps(obj, protocol=None, *, fix_imports=True) -> bytes\n"
6042"\n"
6043"Return the pickled representation of the object as a bytes\n"
6044"object, instead of writing it to a file.\n"
6045"\n"
6046"The optional protocol argument tells the pickler to use the given protocol;\n"
6047"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
6048"backward-incompatible protocol designed for Python 3.0.\n"
6049"\n"
6050"Specifying a negative protocol version selects the highest protocol version\n"
6051"supported. The higher the protocol used, the more recent the version of\n"
6052"Python needed to read the pickle produced.\n"
6053"\n"
6054"If fix_imports is True and *protocol* is less than 3, pickle will try to\n"
6055"map the new Python 3.x names to the old module names used in Python 2.x,\n"
6056"so that the pickle data stream is readable with Python 2.x.\n");
6057
6058static PyObject *
6059pickle_dumps(PyObject *self, PyObject *args, PyObject *kwds)
6060{
6061 static char *kwlist[] = {"obj", "protocol", "fix_imports", 0};
6062 PyObject *obj;
6063 PyObject *proto = NULL;
6064 PyObject *result;
6065 PyObject *fix_imports = Py_True;
6066 PicklerObject *pickler;
6067
6068 /* fix_imports is a keyword-only argument. */
6069 if (Py_SIZE(args) > 2) {
6070 PyErr_Format(PyExc_TypeError,
6071 "pickle.dumps() takes at most 2 positional "
6072 "argument (%zd given)", Py_SIZE(args));
6073 return NULL;
6074 }
6075
6076 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:dumps", kwlist,
6077 &obj, &proto, &fix_imports))
6078 return NULL;
6079
6080 pickler = _Pickler_New();
6081 if (pickler == NULL)
6082 return NULL;
6083
6084 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
6085 goto error;
6086
6087 if (dump(pickler, obj) < 0)
6088 goto error;
6089
6090 result = _Pickler_GetString(pickler);
6091 Py_DECREF(pickler);
6092 return result;
6093
6094 error:
6095 Py_XDECREF(pickler);
6096 return NULL;
6097}
6098
6099PyDoc_STRVAR(pickle_load_doc,
6100"load(file, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6101"\n"
6102"Read a pickled object representation from the open file object file and\n"
6103"return the reconstituted object hierarchy specified therein. This is\n"
6104"equivalent to ``Unpickler(file).load()``, but may be more efficient.\n"
6105"\n"
6106"The protocol version of the pickle is detected automatically, so no protocol\n"
6107"argument is needed. Bytes past the pickled object's representation are\n"
6108"ignored.\n"
6109"\n"
6110"The argument file must have two methods, a read() method that takes an\n"
6111"integer argument, and a readline() method that requires no arguments. Both\n"
6112"methods should return bytes. Thus *file* can be a binary file object opened\n"
6113"for reading, a BytesIO object, or any other custom object that meets this\n"
6114"interface.\n"
6115"\n"
6116"Optional keyword arguments are fix_imports, encoding and errors,\n"
6117"which are used to control compatiblity support for pickle stream generated\n"
6118"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6119"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6120"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6121"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6122
6123static PyObject *
6124pickle_load(PyObject *self, PyObject *args, PyObject *kwds)
6125{
6126 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
6127 PyObject *file;
6128 PyObject *fix_imports = Py_True;
6129 PyObject *result;
6130 char *encoding = NULL;
6131 char *errors = NULL;
6132 UnpicklerObject *unpickler;
6133
6134 /* fix_imports, encoding and errors are a keyword-only argument. */
6135 if (Py_SIZE(args) != 1) {
6136 PyErr_Format(PyExc_TypeError,
6137 "pickle.load() takes exactly one positional "
6138 "argument (%zd given)", Py_SIZE(args));
6139 return NULL;
6140 }
6141
6142 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:load", kwlist,
6143 &file, &fix_imports, &encoding, &errors))
6144 return NULL;
6145
6146 unpickler = _Unpickler_New();
6147 if (unpickler == NULL)
6148 return NULL;
6149
6150 if (_Unpickler_SetInputStream(unpickler, file) < 0)
6151 goto error;
6152
6153 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6154 goto error;
6155
6156 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6157 if (unpickler->fix_imports == -1)
6158 goto error;
6159
6160 result = load(unpickler);
6161 Py_DECREF(unpickler);
6162 return result;
6163
6164 error:
6165 Py_XDECREF(unpickler);
6166 return NULL;
6167}
6168
6169PyDoc_STRVAR(pickle_loads_doc,
6170"loads(input, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6171"\n"
6172"Read a pickled object hierarchy from a bytes object and return the\n"
6173"reconstituted object hierarchy specified therein\n"
6174"\n"
6175"The protocol version of the pickle is detected automatically, so no protocol\n"
6176"argument is needed. Bytes past the pickled object's representation are\n"
6177"ignored.\n"
6178"\n"
6179"Optional keyword arguments are fix_imports, encoding and errors, which\n"
6180"are used to control compatiblity support for pickle stream generated\n"
6181"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6182"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6183"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6184"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6185
6186static PyObject *
6187pickle_loads(PyObject *self, PyObject *args, PyObject *kwds)
6188{
6189 static char *kwlist[] = {"input", "fix_imports", "encoding", "errors", 0};
6190 PyObject *input;
6191 PyObject *fix_imports = Py_True;
6192 PyObject *result;
6193 char *encoding = NULL;
6194 char *errors = NULL;
6195 UnpicklerObject *unpickler;
6196
6197 /* fix_imports, encoding and errors are a keyword-only argument. */
6198 if (Py_SIZE(args) != 1) {
6199 PyErr_Format(PyExc_TypeError,
6200 "pickle.loads() takes exactly one positional "
6201 "argument (%zd given)", Py_SIZE(args));
6202 return NULL;
6203 }
6204
6205 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:loads", kwlist,
6206 &input, &fix_imports, &encoding, &errors))
6207 return NULL;
6208
6209 unpickler = _Unpickler_New();
6210 if (unpickler == NULL)
6211 return NULL;
6212
6213 if (_Unpickler_SetStringInput(unpickler, input) < 0)
6214 goto error;
6215
6216 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6217 goto error;
6218
6219 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6220 if (unpickler->fix_imports == -1)
6221 goto error;
6222
6223 result = load(unpickler);
6224 Py_DECREF(unpickler);
6225 return result;
6226
6227 error:
6228 Py_XDECREF(unpickler);
6229 return NULL;
6230}
6231
6232
6233static struct PyMethodDef pickle_methods[] = {
6234 {"dump", (PyCFunction)pickle_dump, METH_VARARGS|METH_KEYWORDS,
6235 pickle_dump_doc},
6236 {"dumps", (PyCFunction)pickle_dumps, METH_VARARGS|METH_KEYWORDS,
6237 pickle_dumps_doc},
6238 {"load", (PyCFunction)pickle_load, METH_VARARGS|METH_KEYWORDS,
6239 pickle_load_doc},
6240 {"loads", (PyCFunction)pickle_loads, METH_VARARGS|METH_KEYWORDS,
6241 pickle_loads_doc},
6242 {NULL, NULL} /* sentinel */
6243};
6244
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006245static int
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006246initmodule(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006247{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006248 PyObject *copyreg = NULL;
6249 PyObject *compat_pickle = NULL;
6250
6251 /* XXX: We should ensure that the types of the dictionaries imported are
6252 exactly PyDict objects. Otherwise, it is possible to crash the pickle
6253 since we use the PyDict API directly to access these dictionaries. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006254
6255 copyreg = PyImport_ImportModule("copyreg");
6256 if (!copyreg)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006257 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006258 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
6259 if (!dispatch_table)
6260 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006261 extension_registry = \
6262 PyObject_GetAttrString(copyreg, "_extension_registry");
6263 if (!extension_registry)
6264 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006265 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
6266 if (!inverted_registry)
6267 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006268 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
6269 if (!extension_cache)
6270 goto error;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006271 Py_CLEAR(copyreg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006272
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006273 /* Load the 2.x -> 3.x stdlib module mapping tables */
6274 compat_pickle = PyImport_ImportModule("_compat_pickle");
6275 if (!compat_pickle)
6276 goto error;
6277 name_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
6278 if (!name_mapping_2to3)
6279 goto error;
6280 if (!PyDict_CheckExact(name_mapping_2to3)) {
6281 PyErr_Format(PyExc_RuntimeError,
6282 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
6283 Py_TYPE(name_mapping_2to3)->tp_name);
6284 goto error;
6285 }
6286 import_mapping_2to3 = PyObject_GetAttrString(compat_pickle,
6287 "IMPORT_MAPPING");
6288 if (!import_mapping_2to3)
6289 goto error;
6290 if (!PyDict_CheckExact(import_mapping_2to3)) {
6291 PyErr_Format(PyExc_RuntimeError,
6292 "_compat_pickle.IMPORT_MAPPING should be a dict, "
6293 "not %.200s", Py_TYPE(import_mapping_2to3)->tp_name);
6294 goto error;
6295 }
6296 /* ... and the 3.x -> 2.x mapping tables */
6297 name_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6298 "REVERSE_NAME_MAPPING");
6299 if (!name_mapping_3to2)
6300 goto error;
6301 if (!PyDict_CheckExact(name_mapping_3to2)) {
6302 PyErr_Format(PyExc_RuntimeError,
Ezio Melotti13925002011-03-16 11:05:33 +02006303 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006304 "not %.200s", Py_TYPE(name_mapping_3to2)->tp_name);
6305 goto error;
6306 }
6307 import_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6308 "REVERSE_IMPORT_MAPPING");
6309 if (!import_mapping_3to2)
6310 goto error;
6311 if (!PyDict_CheckExact(import_mapping_3to2)) {
6312 PyErr_Format(PyExc_RuntimeError,
6313 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
6314 "not %.200s", Py_TYPE(import_mapping_3to2)->tp_name);
6315 goto error;
6316 }
6317 Py_CLEAR(compat_pickle);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006318
6319 empty_tuple = PyTuple_New(0);
6320 if (empty_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006321 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006322 two_tuple = PyTuple_New(2);
6323 if (two_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006324 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006325 /* We use this temp container with no regard to refcounts, or to
6326 * keeping containees alive. Exempt from GC, because we don't
6327 * want anything looking at two_tuple() by magic.
6328 */
6329 PyObject_GC_UnTrack(two_tuple);
6330
6331 return 0;
6332
6333 error:
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006334 Py_CLEAR(copyreg);
6335 Py_CLEAR(dispatch_table);
6336 Py_CLEAR(extension_registry);
6337 Py_CLEAR(inverted_registry);
6338 Py_CLEAR(extension_cache);
6339 Py_CLEAR(compat_pickle);
6340 Py_CLEAR(name_mapping_2to3);
6341 Py_CLEAR(import_mapping_2to3);
6342 Py_CLEAR(name_mapping_3to2);
6343 Py_CLEAR(import_mapping_3to2);
6344 Py_CLEAR(empty_tuple);
6345 Py_CLEAR(two_tuple);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006346 return -1;
6347}
6348
6349static struct PyModuleDef _picklemodule = {
6350 PyModuleDef_HEAD_INIT,
6351 "_pickle",
6352 pickle_module_doc,
6353 -1,
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006354 pickle_methods,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006355 NULL,
6356 NULL,
6357 NULL,
6358 NULL
6359};
6360
6361PyMODINIT_FUNC
6362PyInit__pickle(void)
6363{
6364 PyObject *m;
6365
6366 if (PyType_Ready(&Unpickler_Type) < 0)
6367 return NULL;
6368 if (PyType_Ready(&Pickler_Type) < 0)
6369 return NULL;
6370 if (PyType_Ready(&Pdata_Type) < 0)
6371 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006372 if (PyType_Ready(&PicklerMemoProxyType) < 0)
6373 return NULL;
6374 if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
6375 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006376
6377 /* Create the module and add the functions. */
6378 m = PyModule_Create(&_picklemodule);
6379 if (m == NULL)
6380 return NULL;
6381
Antoine Pitrou8391cf42011-07-15 21:01:21 +02006382 Py_INCREF(&Pickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006383 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
6384 return NULL;
Antoine Pitrou8391cf42011-07-15 21:01:21 +02006385 Py_INCREF(&Unpickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006386 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
6387 return NULL;
6388
6389 /* Initialize the exceptions. */
6390 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
6391 if (PickleError == NULL)
6392 return NULL;
6393 PicklingError = \
6394 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
6395 if (PicklingError == NULL)
6396 return NULL;
6397 UnpicklingError = \
6398 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
6399 if (UnpicklingError == NULL)
6400 return NULL;
6401
6402 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
6403 return NULL;
6404 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
6405 return NULL;
6406 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
6407 return NULL;
6408
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006409 if (initmodule() < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006410 return NULL;
6411
6412 return m;
6413}