blob: 18eaa38bdb6c11ffef1a1557024e5cf5de6e349a [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000013/* Pickle opcodes. These must be kept updated with pickle.py.
14 Extensive docs are in pickletools.py. */
15enum opcode {
16 MARK = '(',
17 STOP = '.',
18 POP = '0',
19 POP_MARK = '1',
20 DUP = '2',
21 FLOAT = 'F',
22 INT = 'I',
23 BININT = 'J',
24 BININT1 = 'K',
25 LONG = 'L',
26 BININT2 = 'M',
27 NONE = 'N',
28 PERSID = 'P',
29 BINPERSID = 'Q',
30 REDUCE = 'R',
31 STRING = 'S',
32 BINSTRING = 'T',
33 SHORT_BINSTRING = 'U',
34 UNICODE = 'V',
35 BINUNICODE = 'X',
36 APPEND = 'a',
37 BUILD = 'b',
38 GLOBAL = 'c',
39 DICT = 'd',
40 EMPTY_DICT = '}',
41 APPENDS = 'e',
42 GET = 'g',
43 BINGET = 'h',
44 INST = 'i',
45 LONG_BINGET = 'j',
46 LIST = 'l',
47 EMPTY_LIST = ']',
48 OBJ = 'o',
49 PUT = 'p',
50 BINPUT = 'q',
51 LONG_BINPUT = 'r',
52 SETITEM = 's',
53 TUPLE = 't',
54 EMPTY_TUPLE = ')',
55 SETITEMS = 'u',
56 BINFLOAT = 'G',
57
58 /* Protocol 2. */
59 PROTO = '\x80',
60 NEWOBJ = '\x81',
61 EXT1 = '\x82',
62 EXT2 = '\x83',
63 EXT4 = '\x84',
64 TUPLE1 = '\x85',
65 TUPLE2 = '\x86',
66 TUPLE3 = '\x87',
67 NEWTRUE = '\x88',
68 NEWFALSE = '\x89',
69 LONG1 = '\x8a',
70 LONG4 = '\x8b',
71
72 /* Protocol 3 (Python 3.x) */
73 BINBYTES = 'B',
Victor Stinner132ef6c2010-11-09 09:39:41 +000074 SHORT_BINBYTES = 'C'
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000075};
76
77/* These aren't opcodes -- they're ways to pickle bools before protocol 2
78 * so that unpicklers written before bools were introduced unpickle them
79 * as ints, but unpicklers after can recognize that bools were intended.
80 * Note that protocol 2 added direct ways to pickle bools.
81 */
82#undef TRUE
83#define TRUE "I01\n"
84#undef FALSE
85#define FALSE "I00\n"
86
87enum {
88 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
89 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
90 break if this gets out of synch with pickle.py, but it's unclear that would
91 help anything either. */
92 BATCHSIZE = 1000,
93
94 /* Nesting limit until Pickler, when running in "fast mode", starts
95 checking for self-referential data-structures. */
96 FAST_NESTING_LIMIT = 50,
97
Antoine Pitrouea99c5c2010-09-09 18:33:21 +000098 /* Initial size of the write buffer of Pickler. */
99 WRITE_BUF_SIZE = 4096,
100
101 /* Maximum size of the write buffer of Pickler when pickling to a
102 stream. This is ignored for in-memory pickling. */
103 MAX_WRITE_BUF_SIZE = 64 * 1024,
Antoine Pitrou04248a82010-10-12 20:51:21 +0000104
105 /* Prefetch size when unpickling (disabled on unpeekable streams) */
Victor Stinner132ef6c2010-11-09 09:39:41 +0000106 PREFETCH = 8192 * 16
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000107};
108
109/* Exception classes for pickle. These should override the ones defined in
110 pickle.py, when the C-optimized Pickler and Unpickler are used. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000111static PyObject *PickleError = NULL;
112static PyObject *PicklingError = NULL;
113static PyObject *UnpicklingError = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000114
115/* copyreg.dispatch_table, {type_object: pickling_function} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000116static PyObject *dispatch_table = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000117/* For EXT[124] opcodes. */
118/* copyreg._extension_registry, {(module_name, function_name): code} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000119static PyObject *extension_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000120/* copyreg._inverted_registry, {code: (module_name, function_name)} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000121static PyObject *inverted_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000122/* copyreg._extension_cache, {code: object} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000123static PyObject *extension_cache = NULL;
124
125/* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */
126static PyObject *name_mapping_2to3 = NULL;
127/* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
128static PyObject *import_mapping_2to3 = NULL;
129/* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
130static PyObject *name_mapping_3to2 = NULL;
131static PyObject *import_mapping_3to2 = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000132
133/* XXX: Are these really nescessary? */
134/* As the name says, an empty tuple. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000135static PyObject *empty_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000136/* For looking up name pairs in copyreg._extension_registry. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000137static PyObject *two_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000138
139static int
140stack_underflow(void)
141{
142 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
143 return -1;
144}
145
146/* Internal data type used as the unpickling stack. */
147typedef struct {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000148 PyObject_VAR_HEAD
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000149 PyObject **data;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000150 Py_ssize_t allocated; /* number of slots in data allocated */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000151} Pdata;
152
153static void
154Pdata_dealloc(Pdata *self)
155{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200156 Py_ssize_t i = Py_SIZE(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000157 while (--i >= 0) {
158 Py_DECREF(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000159 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000160 PyMem_FREE(self->data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000161 PyObject_Del(self);
162}
163
164static PyTypeObject Pdata_Type = {
165 PyVarObject_HEAD_INIT(NULL, 0)
166 "_pickle.Pdata", /*tp_name*/
167 sizeof(Pdata), /*tp_basicsize*/
168 0, /*tp_itemsize*/
169 (destructor)Pdata_dealloc, /*tp_dealloc*/
170};
171
172static PyObject *
173Pdata_New(void)
174{
175 Pdata *self;
176
177 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
178 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000179 Py_SIZE(self) = 0;
180 self->allocated = 8;
181 self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000182 if (self->data)
183 return (PyObject *)self;
184 Py_DECREF(self);
185 return PyErr_NoMemory();
186}
187
188
189/* Retain only the initial clearto items. If clearto >= the current
190 * number of items, this is a (non-erroneous) NOP.
191 */
192static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200193Pdata_clear(Pdata *self, Py_ssize_t clearto)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000194{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200195 Py_ssize_t i = Py_SIZE(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000196
197 if (clearto < 0)
198 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000199 if (clearto >= i)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000200 return 0;
201
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000202 while (--i >= clearto) {
203 Py_CLEAR(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000204 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000205 Py_SIZE(self) = clearto;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000206 return 0;
207}
208
209static int
210Pdata_grow(Pdata *self)
211{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000212 PyObject **data = self->data;
213 Py_ssize_t allocated = self->allocated;
214 Py_ssize_t new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000215
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000216 new_allocated = (allocated >> 3) + 6;
217 /* check for integer overflow */
218 if (new_allocated > PY_SSIZE_T_MAX - allocated)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000219 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000220 new_allocated += allocated;
221 if (new_allocated > (PY_SSIZE_T_MAX / sizeof(PyObject *)))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000222 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000223 data = PyMem_REALLOC(data, new_allocated * sizeof(PyObject *));
224 if (data == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000225 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000226
227 self->data = data;
228 self->allocated = new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000229 return 0;
230
231 nomemory:
232 PyErr_NoMemory();
233 return -1;
234}
235
236/* D is a Pdata*. Pop the topmost element and store it into V, which
237 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
238 * is raised and V is set to NULL.
239 */
240static PyObject *
241Pdata_pop(Pdata *self)
242{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000243 if (Py_SIZE(self) == 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000244 PyErr_SetString(UnpicklingError, "bad pickle data");
245 return NULL;
246 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000247 return self->data[--Py_SIZE(self)];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000248}
249#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
250
251static int
252Pdata_push(Pdata *self, PyObject *obj)
253{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000254 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000255 return -1;
256 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000257 self->data[Py_SIZE(self)++] = obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000258 return 0;
259}
260
261/* Push an object on stack, transferring its ownership to the stack. */
262#define PDATA_PUSH(D, O, ER) do { \
263 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
264
265/* Push an object on stack, adding a new reference to the object. */
266#define PDATA_APPEND(D, O, ER) do { \
267 Py_INCREF((O)); \
268 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
269
270static PyObject *
271Pdata_poptuple(Pdata *self, Py_ssize_t start)
272{
273 PyObject *tuple;
274 Py_ssize_t len, i, j;
275
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000276 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000277 tuple = PyTuple_New(len);
278 if (tuple == NULL)
279 return NULL;
280 for (i = start, j = 0; j < len; i++, j++)
281 PyTuple_SET_ITEM(tuple, j, self->data[i]);
282
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000283 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000284 return tuple;
285}
286
287static PyObject *
288Pdata_poplist(Pdata *self, Py_ssize_t start)
289{
290 PyObject *list;
291 Py_ssize_t len, i, j;
292
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000293 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000294 list = PyList_New(len);
295 if (list == NULL)
296 return NULL;
297 for (i = start, j = 0; j < len; i++, j++)
298 PyList_SET_ITEM(list, j, self->data[i]);
299
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000300 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000301 return list;
302}
303
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000304typedef struct {
305 PyObject *me_key;
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200306 Py_ssize_t me_value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000307} PyMemoEntry;
308
309typedef struct {
310 Py_ssize_t mt_mask;
311 Py_ssize_t mt_used;
312 Py_ssize_t mt_allocated;
313 PyMemoEntry *mt_table;
314} PyMemoTable;
315
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000316typedef struct PicklerObject {
317 PyObject_HEAD
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000318 PyMemoTable *memo; /* Memo table, keep track of the seen
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000319 objects to support self-referential objects
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000320 pickling. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000321 PyObject *pers_func; /* persistent_id() method, can be NULL */
322 PyObject *arg;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000323
324 PyObject *write; /* write() method of the output stream. */
325 PyObject *output_buffer; /* Write into a local bytearray buffer before
326 flushing to the stream. */
327 Py_ssize_t output_len; /* Length of output_buffer. */
328 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000329 int proto; /* Pickle protocol number, >= 0 */
330 int bin; /* Boolean, true if proto > 0 */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200331 Py_ssize_t buf_size; /* Size of the current buffered pickle data */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000332 int fast; /* Enable fast mode if set to a true value.
333 The fast mode disable the usage of memo,
334 therefore speeding the pickling process by
335 not generating superfluous PUT opcodes. It
336 should not be used if with self-referential
337 objects. */
338 int fast_nesting;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000339 int fix_imports; /* Indicate whether Pickler should fix
340 the name of globals for Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000341 PyObject *fast_memo;
342} PicklerObject;
343
344typedef struct UnpicklerObject {
345 PyObject_HEAD
346 Pdata *stack; /* Pickle data stack, store unpickled objects. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000347
348 /* The unpickler memo is just an array of PyObject *s. Using a dict
349 is unnecessary, since the keys are contiguous ints. */
350 PyObject **memo;
351 Py_ssize_t memo_size;
352
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000353 PyObject *arg;
354 PyObject *pers_func; /* persistent_load() method, can be NULL. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000355
356 Py_buffer buffer;
357 char *input_buffer;
358 char *input_line;
359 Py_ssize_t input_len;
360 Py_ssize_t next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000361 Py_ssize_t prefetched_idx; /* index of first prefetched byte */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000362 PyObject *read; /* read() method of the input stream. */
363 PyObject *readline; /* readline() method of the input stream. */
Antoine Pitrou04248a82010-10-12 20:51:21 +0000364 PyObject *peek; /* peek() method of the input stream, or NULL */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000365
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000366 char *encoding; /* Name of the encoding to be used for
367 decoding strings pickled using Python
368 2.x. The default value is "ASCII" */
369 char *errors; /* Name of errors handling scheme to used when
370 decoding strings. The default value is
371 "strict". */
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -0500372 Py_ssize_t *marks; /* Mark stack, used for unpickling container
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000373 objects. */
374 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
375 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000376 int proto; /* Protocol of the pickle loaded. */
377 int fix_imports; /* Indicate whether Unpickler should fix
378 the name of globals pickled by Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000379} UnpicklerObject;
380
381/* Forward declarations */
382static int save(PicklerObject *, PyObject *, int);
383static int save_reduce(PicklerObject *, PyObject *, PyObject *);
384static PyTypeObject Pickler_Type;
385static PyTypeObject Unpickler_Type;
386
387
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000388/*************************************************************************
389 A custom hashtable mapping void* to longs. This is used by the pickler for
390 memoization. Using a custom hashtable rather than PyDict allows us to skip
391 a bunch of unnecessary object creation. This makes a huge performance
392 difference. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000393
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000394#define MT_MINSIZE 8
395#define PERTURB_SHIFT 5
396
397
398static PyMemoTable *
399PyMemoTable_New(void)
400{
401 PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
402 if (memo == NULL) {
403 PyErr_NoMemory();
404 return NULL;
405 }
406
407 memo->mt_used = 0;
408 memo->mt_allocated = MT_MINSIZE;
409 memo->mt_mask = MT_MINSIZE - 1;
410 memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
411 if (memo->mt_table == NULL) {
412 PyMem_FREE(memo);
413 PyErr_NoMemory();
414 return NULL;
415 }
416 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
417
418 return memo;
419}
420
421static PyMemoTable *
422PyMemoTable_Copy(PyMemoTable *self)
423{
424 Py_ssize_t i;
425 PyMemoTable *new = PyMemoTable_New();
426 if (new == NULL)
427 return NULL;
428
429 new->mt_used = self->mt_used;
430 new->mt_allocated = self->mt_allocated;
431 new->mt_mask = self->mt_mask;
432 /* The table we get from _New() is probably smaller than we wanted.
433 Free it and allocate one that's the right size. */
434 PyMem_FREE(new->mt_table);
435 new->mt_table = PyMem_MALLOC(self->mt_allocated * sizeof(PyMemoEntry));
436 if (new->mt_table == NULL) {
437 PyMem_FREE(new);
438 return NULL;
439 }
440 for (i = 0; i < self->mt_allocated; i++) {
441 Py_XINCREF(self->mt_table[i].me_key);
442 }
443 memcpy(new->mt_table, self->mt_table,
444 sizeof(PyMemoEntry) * self->mt_allocated);
445
446 return new;
447}
448
449static Py_ssize_t
450PyMemoTable_Size(PyMemoTable *self)
451{
452 return self->mt_used;
453}
454
455static int
456PyMemoTable_Clear(PyMemoTable *self)
457{
458 Py_ssize_t i = self->mt_allocated;
459
460 while (--i >= 0) {
461 Py_XDECREF(self->mt_table[i].me_key);
462 }
463 self->mt_used = 0;
464 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
465 return 0;
466}
467
468static void
469PyMemoTable_Del(PyMemoTable *self)
470{
471 if (self == NULL)
472 return;
473 PyMemoTable_Clear(self);
474
475 PyMem_FREE(self->mt_table);
476 PyMem_FREE(self);
477}
478
479/* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
480 can be considerably simpler than dictobject.c's lookdict(). */
481static PyMemoEntry *
482_PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
483{
484 size_t i;
485 size_t perturb;
486 size_t mask = (size_t)self->mt_mask;
487 PyMemoEntry *table = self->mt_table;
488 PyMemoEntry *entry;
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000489 Py_hash_t hash = (Py_hash_t)key >> 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000490
491 i = hash & mask;
492 entry = &table[i];
493 if (entry->me_key == NULL || entry->me_key == key)
494 return entry;
495
496 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
497 i = (i << 2) + i + perturb + 1;
498 entry = &table[i & mask];
499 if (entry->me_key == NULL || entry->me_key == key)
500 return entry;
501 }
502 assert(0); /* Never reached */
503 return NULL;
504}
505
506/* Returns -1 on failure, 0 on success. */
507static int
508_PyMemoTable_ResizeTable(PyMemoTable *self, Py_ssize_t min_size)
509{
510 PyMemoEntry *oldtable = NULL;
511 PyMemoEntry *oldentry, *newentry;
512 Py_ssize_t new_size = MT_MINSIZE;
513 Py_ssize_t to_process;
514
515 assert(min_size > 0);
516
517 /* Find the smallest valid table size >= min_size. */
518 while (new_size < min_size && new_size > 0)
519 new_size <<= 1;
520 if (new_size <= 0) {
521 PyErr_NoMemory();
522 return -1;
523 }
524 /* new_size needs to be a power of two. */
525 assert((new_size & (new_size - 1)) == 0);
526
527 /* Allocate new table. */
528 oldtable = self->mt_table;
529 self->mt_table = PyMem_MALLOC(new_size * sizeof(PyMemoEntry));
530 if (self->mt_table == NULL) {
531 PyMem_FREE(oldtable);
532 PyErr_NoMemory();
533 return -1;
534 }
535 self->mt_allocated = new_size;
536 self->mt_mask = new_size - 1;
537 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
538
539 /* Copy entries from the old table. */
540 to_process = self->mt_used;
541 for (oldentry = oldtable; to_process > 0; oldentry++) {
542 if (oldentry->me_key != NULL) {
543 to_process--;
544 /* newentry is a pointer to a chunk of the new
545 mt_table, so we're setting the key:value pair
546 in-place. */
547 newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
548 newentry->me_key = oldentry->me_key;
549 newentry->me_value = oldentry->me_value;
550 }
551 }
552
553 /* Deallocate the old table. */
554 PyMem_FREE(oldtable);
555 return 0;
556}
557
558/* Returns NULL on failure, a pointer to the value otherwise. */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200559static Py_ssize_t *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000560PyMemoTable_Get(PyMemoTable *self, PyObject *key)
561{
562 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
563 if (entry->me_key == NULL)
564 return NULL;
565 return &entry->me_value;
566}
567
568/* Returns -1 on failure, 0 on success. */
569static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200570PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000571{
572 PyMemoEntry *entry;
573
574 assert(key != NULL);
575
576 entry = _PyMemoTable_Lookup(self, key);
577 if (entry->me_key != NULL) {
578 entry->me_value = value;
579 return 0;
580 }
581 Py_INCREF(key);
582 entry->me_key = key;
583 entry->me_value = value;
584 self->mt_used++;
585
586 /* If we added a key, we can safely resize. Otherwise just return!
587 * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
588 *
589 * Quadrupling the size improves average table sparseness
590 * (reducing collisions) at the cost of some memory. It also halves
591 * the number of expensive resize operations in a growing memo table.
592 *
593 * Very large memo tables (over 50K items) use doubling instead.
594 * This may help applications with severe memory constraints.
595 */
596 if (!(self->mt_used * 3 >= (self->mt_mask + 1) * 2))
597 return 0;
598 return _PyMemoTable_ResizeTable(self,
599 (self->mt_used > 50000 ? 2 : 4) * self->mt_used);
600}
601
602#undef MT_MINSIZE
603#undef PERTURB_SHIFT
604
605/*************************************************************************/
606
607/* Helpers for creating the argument tuple passed to functions. This has the
608 performance advantage of calling PyTuple_New() only once.
609
610 XXX(avassalotti): Inline directly in _Pickler_FastCall() and
611 _Unpickler_FastCall(). */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000612#define ARG_TUP(self, obj) do { \
613 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
614 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
615 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
616 } \
617 else { \
618 Py_DECREF((obj)); \
619 } \
620 } while (0)
621
622#define FREE_ARG_TUP(self) do { \
623 if ((self)->arg->ob_refcnt > 1) \
624 Py_CLEAR((self)->arg); \
625 } while (0)
626
627/* A temporary cleaner API for fast single argument function call.
628
629 XXX: Does caching the argument tuple provides any real performance benefits?
630
631 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
632 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
633 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
634 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
635 (i.e, call PyTuple_New() and store the returned value in an array), to save
636 one second (wall clock time). Either ways, the loading time a pickle stream
637 large enough to generate this number of calls would be massively
638 overwhelmed by other factors, like I/O throughput, the GC traversal and
639 object allocation overhead. So, I really doubt these functions provide any
640 real benefits.
641
642 On the other hand, oprofile reports that pickle spends a lot of time in
643 these functions. But, that is probably more related to the function call
644 overhead, than the argument tuple allocation.
645
646 XXX: And, what is the reference behavior of these? Steal, borrow? At first
647 glance, it seems to steal the reference of 'arg' and borrow the reference
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000648 of 'func'. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000649static PyObject *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000650_Pickler_FastCall(PicklerObject *self, PyObject *func, PyObject *arg)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000651{
652 PyObject *result = NULL;
653
654 ARG_TUP(self, arg);
655 if (self->arg) {
656 result = PyObject_Call(func, self->arg, NULL);
657 FREE_ARG_TUP(self);
658 }
659 return result;
660}
661
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000662static int
663_Pickler_ClearBuffer(PicklerObject *self)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000664{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000665 Py_CLEAR(self->output_buffer);
666 self->output_buffer =
667 PyBytes_FromStringAndSize(NULL, self->max_output_len);
668 if (self->output_buffer == NULL)
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000669 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000670 self->output_len = 0;
671 return 0;
672}
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000673
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000674static PyObject *
675_Pickler_GetString(PicklerObject *self)
676{
677 PyObject *output_buffer = self->output_buffer;
678
679 assert(self->output_buffer != NULL);
680 self->output_buffer = NULL;
681 /* Resize down to exact size */
682 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
683 return NULL;
684 return output_buffer;
685}
686
687static int
688_Pickler_FlushToFile(PicklerObject *self)
689{
690 PyObject *output, *result;
691
692 assert(self->write != NULL);
693
694 output = _Pickler_GetString(self);
695 if (output == NULL)
696 return -1;
697
698 result = _Pickler_FastCall(self, self->write, output);
699 Py_XDECREF(result);
700 return (result == NULL) ? -1 : 0;
701}
702
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200703static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000704_Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t n)
705{
706 Py_ssize_t i, required;
707 char *buffer;
708
709 assert(s != NULL);
710
711 required = self->output_len + n;
712 if (required > self->max_output_len) {
713 if (self->write != NULL && required > MAX_WRITE_BUF_SIZE) {
714 /* XXX This reallocates a new buffer every time, which is a bit
715 wasteful. */
716 if (_Pickler_FlushToFile(self) < 0)
717 return -1;
718 if (_Pickler_ClearBuffer(self) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000719 return -1;
720 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000721 if (self->write != NULL && n > MAX_WRITE_BUF_SIZE) {
722 /* we already flushed above, so the buffer is empty */
723 PyObject *result;
724 /* XXX we could spare an intermediate copy and pass
725 a memoryview instead */
726 PyObject *output = PyBytes_FromStringAndSize(s, n);
727 if (s == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000728 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000729 result = _Pickler_FastCall(self, self->write, output);
730 Py_XDECREF(result);
731 return (result == NULL) ? -1 : 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000732 }
733 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000734 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
735 PyErr_NoMemory();
736 return -1;
737 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200738 self->max_output_len = (self->output_len + n) / 2 * 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000739 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
740 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000741 }
742 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000743 buffer = PyBytes_AS_STRING(self->output_buffer);
744 if (n < 8) {
745 /* This is faster than memcpy when the string is short. */
746 for (i = 0; i < n; i++) {
747 buffer[self->output_len + i] = s[i];
748 }
749 }
750 else {
751 memcpy(buffer + self->output_len, s, n);
752 }
753 self->output_len += n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000754 return n;
755}
756
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000757static PicklerObject *
758_Pickler_New(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000759{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000760 PicklerObject *self;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000761
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000762 self = PyObject_GC_New(PicklerObject, &Pickler_Type);
763 if (self == NULL)
764 return NULL;
765
766 self->pers_func = NULL;
767 self->arg = NULL;
768 self->write = NULL;
769 self->proto = 0;
770 self->bin = 0;
771 self->fast = 0;
772 self->fast_nesting = 0;
773 self->fix_imports = 0;
774 self->fast_memo = NULL;
775
776 self->memo = PyMemoTable_New();
777 if (self->memo == NULL) {
778 Py_DECREF(self);
779 return NULL;
780 }
781 self->max_output_len = WRITE_BUF_SIZE;
782 self->output_len = 0;
783 self->output_buffer = PyBytes_FromStringAndSize(NULL,
784 self->max_output_len);
785 if (self->output_buffer == NULL) {
786 Py_DECREF(self);
787 return NULL;
788 }
789 return self;
790}
791
792static int
793_Pickler_SetProtocol(PicklerObject *self, PyObject *proto_obj,
794 PyObject *fix_imports_obj)
795{
796 long proto = 0;
797 int fix_imports;
798
799 if (proto_obj == NULL || proto_obj == Py_None)
800 proto = DEFAULT_PROTOCOL;
801 else {
802 proto = PyLong_AsLong(proto_obj);
803 if (proto == -1 && PyErr_Occurred())
804 return -1;
805 }
806 if (proto < 0)
807 proto = HIGHEST_PROTOCOL;
808 if (proto > HIGHEST_PROTOCOL) {
809 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
810 HIGHEST_PROTOCOL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000811 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000812 }
813 fix_imports = PyObject_IsTrue(fix_imports_obj);
814 if (fix_imports == -1)
815 return -1;
816
817 self->proto = proto;
818 self->bin = proto > 0;
819 self->fix_imports = fix_imports && proto < 3;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000820
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000821 return 0;
822}
823
824/* Returns -1 (with an exception set) on failure, 0 on success. This may
825 be called once on a freshly created Pickler. */
826static int
827_Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
828{
829 assert(file != NULL);
830 self->write = PyObject_GetAttrString(file, "write");
831 if (self->write == NULL) {
832 if (PyErr_ExceptionMatches(PyExc_AttributeError))
833 PyErr_SetString(PyExc_TypeError,
834 "file must have a 'write' attribute");
835 return -1;
836 }
837
838 return 0;
839}
840
841/* See documentation for _Pickler_FastCall(). */
842static PyObject *
843_Unpickler_FastCall(UnpicklerObject *self, PyObject *func, PyObject *arg)
844{
845 PyObject *result = NULL;
846
847 ARG_TUP(self, arg);
848 if (self->arg) {
849 result = PyObject_Call(func, self->arg, NULL);
850 FREE_ARG_TUP(self);
851 }
852 return result;
853}
854
855/* Returns the size of the input on success, -1 on failure. This takes its
856 own reference to `input`. */
857static Py_ssize_t
858_Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
859{
860 if (self->buffer.buf != NULL)
861 PyBuffer_Release(&self->buffer);
862 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
863 return -1;
864 self->input_buffer = self->buffer.buf;
865 self->input_len = self->buffer.len;
866 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000867 self->prefetched_idx = self->input_len;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000868 return self->input_len;
869}
870
Antoine Pitrou04248a82010-10-12 20:51:21 +0000871static int
872_Unpickler_SkipConsumed(UnpicklerObject *self)
873{
874 Py_ssize_t consumed = self->next_read_idx - self->prefetched_idx;
875
876 if (consumed > 0) {
877 PyObject *r;
878 assert(self->peek); /* otherwise we did something wrong */
879 /* This makes an useless copy... */
880 r = PyObject_CallFunction(self->read, "n", consumed);
881 if (r == NULL)
882 return -1;
883 Py_DECREF(r);
884 self->prefetched_idx = self->next_read_idx;
885 }
886 return 0;
887}
888
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000889static const Py_ssize_t READ_WHOLE_LINE = -1;
890
891/* If reading from a file, we need to only pull the bytes we need, since there
892 may be multiple pickle objects arranged contiguously in the same input
893 buffer.
894
895 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
896 bytes from the input stream/buffer.
897
898 Update the unpickler's input buffer with the newly-read data. Returns -1 on
899 failure; on success, returns the number of bytes read from the file.
900
901 On success, self->input_len will be 0; this is intentional so that when
902 unpickling from a file, the "we've run out of data" code paths will trigger,
903 causing the Unpickler to go back to the file for more data. Use the returned
904 size to tell you how much data you can process. */
905static Py_ssize_t
906_Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
907{
908 PyObject *data;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000909 Py_ssize_t read_size, prefetched_size = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000910
911 assert(self->read != NULL);
Antoine Pitrou04248a82010-10-12 20:51:21 +0000912
913 if (_Unpickler_SkipConsumed(self) < 0)
914 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000915
916 if (n == READ_WHOLE_LINE)
917 data = PyObject_Call(self->readline, empty_tuple, NULL);
918 else {
919 PyObject *len = PyLong_FromSsize_t(n);
920 if (len == NULL)
921 return -1;
922 data = _Unpickler_FastCall(self, self->read, len);
923 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000924 if (data == NULL)
925 return -1;
926
Antoine Pitrou04248a82010-10-12 20:51:21 +0000927 /* Prefetch some data without advancing the file pointer, if possible */
928 if (self->peek) {
929 PyObject *len, *prefetched;
930 len = PyLong_FromSsize_t(PREFETCH);
931 if (len == NULL) {
932 Py_DECREF(data);
933 return -1;
934 }
935 prefetched = _Unpickler_FastCall(self, self->peek, len);
936 if (prefetched == NULL) {
937 if (PyErr_ExceptionMatches(PyExc_NotImplementedError)) {
938 /* peek() is probably not supported by the given file object */
939 PyErr_Clear();
940 Py_CLEAR(self->peek);
941 }
942 else {
943 Py_DECREF(data);
944 return -1;
945 }
946 }
947 else {
948 assert(PyBytes_Check(prefetched));
949 prefetched_size = PyBytes_GET_SIZE(prefetched);
950 PyBytes_ConcatAndDel(&data, prefetched);
951 if (data == NULL)
952 return -1;
953 }
954 }
955
956 read_size = _Unpickler_SetStringInput(self, data) - prefetched_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000957 Py_DECREF(data);
Antoine Pitrou04248a82010-10-12 20:51:21 +0000958 self->prefetched_idx = read_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000959 return read_size;
960}
961
962/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
963
964 This should be used for all data reads, rather than accessing the unpickler's
965 input buffer directly. This method deals correctly with reading from input
966 streams, which the input buffer doesn't deal with.
967
968 Note that when reading from a file-like object, self->next_read_idx won't
969 be updated (it should remain at 0 for the entire unpickling process). You
970 should use this function's return value to know how many bytes you can
971 consume.
972
973 Returns -1 (with an exception set) on failure. On success, return the
974 number of chars read. */
975static Py_ssize_t
976_Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n)
977{
Antoine Pitrou04248a82010-10-12 20:51:21 +0000978 Py_ssize_t num_read;
979
Antoine Pitrou04248a82010-10-12 20:51:21 +0000980 if (self->next_read_idx + n <= self->input_len) {
981 *s = self->input_buffer + self->next_read_idx;
982 self->next_read_idx += n;
983 return n;
984 }
985 if (!self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000986 PyErr_Format(PyExc_EOFError, "Ran out of input");
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000987 return -1;
988 }
Antoine Pitrou04248a82010-10-12 20:51:21 +0000989 num_read = _Unpickler_ReadFromFile(self, n);
990 if (num_read < 0)
991 return -1;
992 if (num_read < n) {
993 PyErr_Format(PyExc_EOFError, "Ran out of input");
994 return -1;
995 }
996 *s = self->input_buffer;
997 self->next_read_idx = n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000998 return n;
999}
1000
1001static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001002_Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1003 char **result)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001004{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001005 char *input_line = PyMem_Realloc(self->input_line, len + 1);
1006 if (input_line == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001007 return -1;
1008
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001009 memcpy(input_line, line, len);
1010 input_line[len] = '\0';
1011 self->input_line = input_line;
1012 *result = self->input_line;
1013 return len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001014}
1015
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001016/* Read a line from the input stream/buffer. If we run off the end of the input
1017 before hitting \n, return the data we found.
1018
1019 Returns the number of chars read, or -1 on failure. */
1020static Py_ssize_t
1021_Unpickler_Readline(UnpicklerObject *self, char **result)
1022{
1023 Py_ssize_t i, num_read;
1024
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001025 for (i = self->next_read_idx; i < self->input_len; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001026 if (self->input_buffer[i] == '\n') {
1027 char *line_start = self->input_buffer + self->next_read_idx;
1028 num_read = i - self->next_read_idx + 1;
1029 self->next_read_idx = i + 1;
1030 return _Unpickler_CopyLine(self, line_start, num_read, result);
1031 }
1032 }
1033 if (self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001034 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1035 if (num_read < 0)
1036 return -1;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001037 self->next_read_idx = num_read;
Antoine Pitrouf6c7a852011-08-11 21:04:02 +02001038 return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001039 }
1040
1041 /* If we get here, we've run off the end of the input string. Return the
1042 remaining string and let the caller figure it out. */
1043 *result = self->input_buffer + self->next_read_idx;
1044 num_read = i - self->next_read_idx;
1045 self->next_read_idx = i;
1046 return num_read;
1047}
1048
1049/* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1050 will be modified in place. */
1051static int
1052_Unpickler_ResizeMemoList(UnpicklerObject *self, Py_ssize_t new_size)
1053{
1054 Py_ssize_t i;
1055 PyObject **memo;
1056
1057 assert(new_size > self->memo_size);
1058
1059 memo = PyMem_REALLOC(self->memo, new_size * sizeof(PyObject *));
1060 if (memo == NULL) {
1061 PyErr_NoMemory();
1062 return -1;
1063 }
1064 self->memo = memo;
1065 for (i = self->memo_size; i < new_size; i++)
1066 self->memo[i] = NULL;
1067 self->memo_size = new_size;
1068 return 0;
1069}
1070
1071/* Returns NULL if idx is out of bounds. */
1072static PyObject *
1073_Unpickler_MemoGet(UnpicklerObject *self, Py_ssize_t idx)
1074{
1075 if (idx < 0 || idx >= self->memo_size)
1076 return NULL;
1077
1078 return self->memo[idx];
1079}
1080
1081/* Returns -1 (with an exception set) on failure, 0 on success.
1082 This takes its own reference to `value`. */
1083static int
1084_Unpickler_MemoPut(UnpicklerObject *self, Py_ssize_t idx, PyObject *value)
1085{
1086 PyObject *old_item;
1087
1088 if (idx >= self->memo_size) {
1089 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1090 return -1;
1091 assert(idx < self->memo_size);
1092 }
1093 Py_INCREF(value);
1094 old_item = self->memo[idx];
1095 self->memo[idx] = value;
1096 Py_XDECREF(old_item);
1097 return 0;
1098}
1099
1100static PyObject **
1101_Unpickler_NewMemo(Py_ssize_t new_size)
1102{
1103 PyObject **memo = PyMem_MALLOC(new_size * sizeof(PyObject *));
1104 if (memo == NULL)
1105 return NULL;
1106 memset(memo, 0, new_size * sizeof(PyObject *));
1107 return memo;
1108}
1109
1110/* Free the unpickler's memo, taking care to decref any items left in it. */
1111static void
1112_Unpickler_MemoCleanup(UnpicklerObject *self)
1113{
1114 Py_ssize_t i;
1115 PyObject **memo = self->memo;
1116
1117 if (self->memo == NULL)
1118 return;
1119 self->memo = NULL;
1120 i = self->memo_size;
1121 while (--i >= 0) {
1122 Py_XDECREF(memo[i]);
1123 }
1124 PyMem_FREE(memo);
1125}
1126
1127static UnpicklerObject *
1128_Unpickler_New(void)
1129{
1130 UnpicklerObject *self;
1131
1132 self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1133 if (self == NULL)
1134 return NULL;
1135
1136 self->stack = (Pdata *)Pdata_New();
1137 if (self->stack == NULL) {
1138 Py_DECREF(self);
1139 return NULL;
1140 }
1141 memset(&self->buffer, 0, sizeof(Py_buffer));
1142
1143 self->memo_size = 32;
1144 self->memo = _Unpickler_NewMemo(self->memo_size);
1145 if (self->memo == NULL) {
1146 Py_DECREF(self);
1147 return NULL;
1148 }
1149
1150 self->arg = NULL;
1151 self->pers_func = NULL;
1152 self->input_buffer = NULL;
1153 self->input_line = NULL;
1154 self->input_len = 0;
1155 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001156 self->prefetched_idx = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001157 self->read = NULL;
1158 self->readline = NULL;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001159 self->peek = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001160 self->encoding = NULL;
1161 self->errors = NULL;
1162 self->marks = NULL;
1163 self->num_marks = 0;
1164 self->marks_size = 0;
1165 self->proto = 0;
1166 self->fix_imports = 0;
1167
1168 return self;
1169}
1170
1171/* Returns -1 (with an exception set) on failure, 0 on success. This may
1172 be called once on a freshly created Pickler. */
1173static int
1174_Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1175{
Antoine Pitrou04248a82010-10-12 20:51:21 +00001176 self->peek = PyObject_GetAttrString(file, "peek");
1177 if (self->peek == NULL) {
1178 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1179 PyErr_Clear();
1180 else
1181 return -1;
1182 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001183 self->read = PyObject_GetAttrString(file, "read");
1184 self->readline = PyObject_GetAttrString(file, "readline");
1185 if (self->readline == NULL || self->read == NULL) {
1186 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1187 PyErr_SetString(PyExc_TypeError,
1188 "file must have 'read' and 'readline' attributes");
1189 Py_CLEAR(self->read);
1190 Py_CLEAR(self->readline);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001191 Py_CLEAR(self->peek);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001192 return -1;
1193 }
1194 return 0;
1195}
1196
1197/* Returns -1 (with an exception set) on failure, 0 on success. This may
1198 be called once on a freshly created Pickler. */
1199static int
1200_Unpickler_SetInputEncoding(UnpicklerObject *self,
1201 const char *encoding,
1202 const char *errors)
1203{
1204 if (encoding == NULL)
1205 encoding = "ASCII";
1206 if (errors == NULL)
1207 errors = "strict";
1208
1209 self->encoding = strdup(encoding);
1210 self->errors = strdup(errors);
1211 if (self->encoding == NULL || self->errors == NULL) {
1212 PyErr_NoMemory();
1213 return -1;
1214 }
1215 return 0;
1216}
1217
1218/* Generate a GET opcode for an object stored in the memo. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001219static int
1220memo_get(PicklerObject *self, PyObject *key)
1221{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001222 Py_ssize_t *value;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001223 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001224 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001225
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001226 value = PyMemoTable_Get(self->memo, key);
1227 if (value == NULL) {
1228 PyErr_SetObject(PyExc_KeyError, key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001229 return -1;
1230 }
1231
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001232 if (!self->bin) {
1233 pdata[0] = GET;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001234 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1235 "%" PY_FORMAT_SIZE_T "d\n", *value);
1236 len = strlen(pdata);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001237 }
1238 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001239 if (*value < 256) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001240 pdata[0] = BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001241 pdata[1] = (unsigned char)(*value & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001242 len = 2;
1243 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001244 else if (*value <= 0xffffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001245 pdata[0] = LONG_BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001246 pdata[1] = (unsigned char)(*value & 0xff);
1247 pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1248 pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1249 pdata[4] = (unsigned char)((*value >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001250 len = 5;
1251 }
1252 else { /* unlikely */
1253 PyErr_SetString(PicklingError,
1254 "memo id too large for LONG_BINGET");
1255 return -1;
1256 }
1257 }
1258
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001259 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001260 return -1;
1261
1262 return 0;
1263}
1264
1265/* Store an object in the memo, assign it a new unique ID based on the number
1266 of objects currently stored in the memo and generate a PUT opcode. */
1267static int
1268memo_put(PicklerObject *self, PyObject *obj)
1269{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001270 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001271 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001272 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001273 int status = 0;
1274
1275 if (self->fast)
1276 return 0;
1277
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001278 x = PyMemoTable_Size(self->memo);
1279 if (PyMemoTable_Set(self->memo, obj, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001280 goto error;
1281
1282 if (!self->bin) {
1283 pdata[0] = PUT;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001284 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1285 "%" PY_FORMAT_SIZE_T "d\n", x);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001286 len = strlen(pdata);
1287 }
1288 else {
1289 if (x < 256) {
1290 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00001291 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001292 len = 2;
1293 }
1294 else if (x <= 0xffffffffL) {
1295 pdata[0] = LONG_BINPUT;
1296 pdata[1] = (unsigned char)(x & 0xff);
1297 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1298 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1299 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1300 len = 5;
1301 }
1302 else { /* unlikely */
1303 PyErr_SetString(PicklingError,
1304 "memo id too large for LONG_BINPUT");
1305 return -1;
1306 }
1307 }
1308
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001309 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001310 goto error;
1311
1312 if (0) {
1313 error:
1314 status = -1;
1315 }
1316
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001317 return status;
1318}
1319
1320static PyObject *
1321whichmodule(PyObject *global, PyObject *global_name)
1322{
1323 Py_ssize_t i, j;
1324 static PyObject *module_str = NULL;
1325 static PyObject *main_str = NULL;
1326 PyObject *module_name;
1327 PyObject *modules_dict;
1328 PyObject *module;
1329 PyObject *obj;
1330
1331 if (module_str == NULL) {
1332 module_str = PyUnicode_InternFromString("__module__");
1333 if (module_str == NULL)
1334 return NULL;
1335 main_str = PyUnicode_InternFromString("__main__");
1336 if (main_str == NULL)
1337 return NULL;
1338 }
1339
1340 module_name = PyObject_GetAttr(global, module_str);
1341
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00001342 /* In some rare cases (e.g., bound methods of extension types),
1343 __module__ can be None. If it is so, then search sys.modules
1344 for the module of global. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001345 if (module_name == Py_None) {
1346 Py_DECREF(module_name);
1347 goto search;
1348 }
1349
1350 if (module_name) {
1351 return module_name;
1352 }
1353 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1354 PyErr_Clear();
1355 else
1356 return NULL;
1357
1358 search:
1359 modules_dict = PySys_GetObject("modules");
1360 if (modules_dict == NULL)
1361 return NULL;
1362
1363 i = 0;
1364 module_name = NULL;
1365 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
Mark Dickinson211c6252009-02-01 10:28:51 +00001366 if (PyObject_RichCompareBool(module_name, main_str, Py_EQ) == 1)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001367 continue;
1368
1369 obj = PyObject_GetAttr(module, global_name);
1370 if (obj == NULL) {
1371 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1372 PyErr_Clear();
1373 else
1374 return NULL;
1375 continue;
1376 }
1377
1378 if (obj != global) {
1379 Py_DECREF(obj);
1380 continue;
1381 }
1382
1383 Py_DECREF(obj);
1384 break;
1385 }
1386
1387 /* If no module is found, use __main__. */
1388 if (!j) {
1389 module_name = main_str;
1390 }
1391
1392 Py_INCREF(module_name);
1393 return module_name;
1394}
1395
1396/* fast_save_enter() and fast_save_leave() are guards against recursive
1397 objects when Pickler is used with the "fast mode" (i.e., with object
1398 memoization disabled). If the nesting of a list or dict object exceed
1399 FAST_NESTING_LIMIT, these guards will start keeping an internal
1400 reference to the seen list or dict objects and check whether these objects
1401 are recursive. These are not strictly necessary, since save() has a
1402 hard-coded recursion limit, but they give a nicer error message than the
1403 typical RuntimeError. */
1404static int
1405fast_save_enter(PicklerObject *self, PyObject *obj)
1406{
1407 /* if fast_nesting < 0, we're doing an error exit. */
1408 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1409 PyObject *key = NULL;
1410 if (self->fast_memo == NULL) {
1411 self->fast_memo = PyDict_New();
1412 if (self->fast_memo == NULL) {
1413 self->fast_nesting = -1;
1414 return 0;
1415 }
1416 }
1417 key = PyLong_FromVoidPtr(obj);
1418 if (key == NULL)
1419 return 0;
1420 if (PyDict_GetItem(self->fast_memo, key)) {
1421 Py_DECREF(key);
1422 PyErr_Format(PyExc_ValueError,
1423 "fast mode: can't pickle cyclic objects "
1424 "including object type %.200s at %p",
1425 obj->ob_type->tp_name, obj);
1426 self->fast_nesting = -1;
1427 return 0;
1428 }
1429 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
1430 Py_DECREF(key);
1431 self->fast_nesting = -1;
1432 return 0;
1433 }
1434 Py_DECREF(key);
1435 }
1436 return 1;
1437}
1438
1439static int
1440fast_save_leave(PicklerObject *self, PyObject *obj)
1441{
1442 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
1443 PyObject *key = PyLong_FromVoidPtr(obj);
1444 if (key == NULL)
1445 return 0;
1446 if (PyDict_DelItem(self->fast_memo, key) < 0) {
1447 Py_DECREF(key);
1448 return 0;
1449 }
1450 Py_DECREF(key);
1451 }
1452 return 1;
1453}
1454
1455static int
1456save_none(PicklerObject *self, PyObject *obj)
1457{
1458 const char none_op = NONE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001459 if (_Pickler_Write(self, &none_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001460 return -1;
1461
1462 return 0;
1463}
1464
1465static int
1466save_bool(PicklerObject *self, PyObject *obj)
1467{
1468 static const char *buf[2] = { FALSE, TRUE };
1469 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
1470 int p = (obj == Py_True);
1471
1472 if (self->proto >= 2) {
1473 const char bool_op = p ? NEWTRUE : NEWFALSE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001474 if (_Pickler_Write(self, &bool_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001475 return -1;
1476 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001477 else if (_Pickler_Write(self, buf[p], len[p]) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001478 return -1;
1479
1480 return 0;
1481}
1482
1483static int
1484save_int(PicklerObject *self, long x)
1485{
1486 char pdata[32];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001487 Py_ssize_t len = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001488
1489 if (!self->bin
1490#if SIZEOF_LONG > 4
1491 || x > 0x7fffffffL || x < -0x80000000L
1492#endif
1493 ) {
1494 /* Text-mode pickle, or long too big to fit in the 4-byte
1495 * signed BININT format: store as a string.
1496 */
Mark Dickinson8dd05142009-01-20 20:43:58 +00001497 pdata[0] = LONG; /* use LONG for consistency with pickle.py */
1498 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ldL\n", x);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001499 if (_Pickler_Write(self, pdata, strlen(pdata)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001500 return -1;
1501 }
1502 else {
1503 /* Binary pickle and x fits in a signed 4-byte int. */
1504 pdata[1] = (unsigned char)(x & 0xff);
1505 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1506 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1507 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1508
1509 if ((pdata[4] == 0) && (pdata[3] == 0)) {
1510 if (pdata[2] == 0) {
1511 pdata[0] = BININT1;
1512 len = 2;
1513 }
1514 else {
1515 pdata[0] = BININT2;
1516 len = 3;
1517 }
1518 }
1519 else {
1520 pdata[0] = BININT;
1521 len = 5;
1522 }
1523
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001524 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001525 return -1;
1526 }
1527
1528 return 0;
1529}
1530
1531static int
1532save_long(PicklerObject *self, PyObject *obj)
1533{
1534 PyObject *repr = NULL;
1535 Py_ssize_t size;
1536 long val = PyLong_AsLong(obj);
1537 int status = 0;
1538
1539 const char long_op = LONG;
1540
1541 if (val == -1 && PyErr_Occurred()) {
1542 /* out of range for int pickling */
1543 PyErr_Clear();
1544 }
1545 else
1546 return save_int(self, val);
1547
1548 if (self->proto >= 2) {
1549 /* Linear-time pickling. */
1550 size_t nbits;
1551 size_t nbytes;
1552 unsigned char *pdata;
1553 char header[5];
1554 int i;
1555 int sign = _PyLong_Sign(obj);
1556
1557 if (sign == 0) {
1558 header[0] = LONG1;
1559 header[1] = 0; /* It's 0 -- an empty bytestring. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001560 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001561 goto error;
1562 return 0;
1563 }
1564 nbits = _PyLong_NumBits(obj);
1565 if (nbits == (size_t)-1 && PyErr_Occurred())
1566 goto error;
1567 /* How many bytes do we need? There are nbits >> 3 full
1568 * bytes of data, and nbits & 7 leftover bits. If there
1569 * are any leftover bits, then we clearly need another
1570 * byte. Wnat's not so obvious is that we *probably*
1571 * need another byte even if there aren't any leftovers:
1572 * the most-significant bit of the most-significant byte
1573 * acts like a sign bit, and it's usually got a sense
1574 * opposite of the one we need. The exception is longs
1575 * of the form -(2**(8*j-1)) for j > 0. Such a long is
1576 * its own 256's-complement, so has the right sign bit
1577 * even without the extra byte. That's a pain to check
1578 * for in advance, though, so we always grab an extra
1579 * byte at the start, and cut it back later if possible.
1580 */
1581 nbytes = (nbits >> 3) + 1;
Antoine Pitroubf6ecf92012-11-24 20:40:21 +01001582 if (nbytes > 0x7fffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001583 PyErr_SetString(PyExc_OverflowError,
1584 "long too large to pickle");
1585 goto error;
1586 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001587 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001588 if (repr == NULL)
1589 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001590 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001591 i = _PyLong_AsByteArray((PyLongObject *)obj,
1592 pdata, nbytes,
1593 1 /* little endian */ , 1 /* signed */ );
1594 if (i < 0)
1595 goto error;
1596 /* If the long is negative, this may be a byte more than
1597 * needed. This is so iff the MSB is all redundant sign
1598 * bits.
1599 */
1600 if (sign < 0 &&
1601 nbytes > 1 &&
1602 pdata[nbytes - 1] == 0xff &&
1603 (pdata[nbytes - 2] & 0x80) != 0) {
1604 nbytes--;
1605 }
1606
1607 if (nbytes < 256) {
1608 header[0] = LONG1;
1609 header[1] = (unsigned char)nbytes;
1610 size = 2;
1611 }
1612 else {
1613 header[0] = LONG4;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001614 size = (Py_ssize_t) nbytes;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001615 for (i = 1; i < 5; i++) {
1616 header[i] = (unsigned char)(size & 0xff);
1617 size >>= 8;
1618 }
1619 size = 5;
1620 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001621 if (_Pickler_Write(self, header, size) < 0 ||
1622 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001623 goto error;
1624 }
1625 else {
1626 char *string;
1627
Mark Dickinson8dd05142009-01-20 20:43:58 +00001628 /* proto < 2: write the repr and newline. This is quadratic-time (in
1629 the number of digits), in both directions. We add a trailing 'L'
1630 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001631
1632 repr = PyObject_Repr(obj);
1633 if (repr == NULL)
1634 goto error;
1635
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001636 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001637 if (string == NULL)
1638 goto error;
1639
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001640 if (_Pickler_Write(self, &long_op, 1) < 0 ||
1641 _Pickler_Write(self, string, size) < 0 ||
1642 _Pickler_Write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001643 goto error;
1644 }
1645
1646 if (0) {
1647 error:
1648 status = -1;
1649 }
1650 Py_XDECREF(repr);
1651
1652 return status;
1653}
1654
1655static int
1656save_float(PicklerObject *self, PyObject *obj)
1657{
1658 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1659
1660 if (self->bin) {
1661 char pdata[9];
1662 pdata[0] = BINFLOAT;
1663 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1664 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001665 if (_Pickler_Write(self, pdata, 9) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001666 return -1;
Eric Smith0923d1d2009-04-16 20:16:10 +00001667 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001668 else {
Eric Smith0923d1d2009-04-16 20:16:10 +00001669 int result = -1;
1670 char *buf = NULL;
1671 char op = FLOAT;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001672
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001673 if (_Pickler_Write(self, &op, 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001674 goto done;
1675
Mark Dickinson3e09f432009-04-17 08:41:23 +00001676 buf = PyOS_double_to_string(x, 'g', 17, 0, NULL);
Eric Smith0923d1d2009-04-16 20:16:10 +00001677 if (!buf) {
1678 PyErr_NoMemory();
1679 goto done;
1680 }
1681
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001682 if (_Pickler_Write(self, buf, strlen(buf)) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001683 goto done;
1684
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001685 if (_Pickler_Write(self, "\n", 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001686 goto done;
1687
1688 result = 0;
1689done:
1690 PyMem_Free(buf);
1691 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001692 }
1693
1694 return 0;
1695}
1696
1697static int
1698save_bytes(PicklerObject *self, PyObject *obj)
1699{
1700 if (self->proto < 3) {
1701 /* Older pickle protocols do not have an opcode for pickling bytes
1702 objects. Therefore, we need to fake the copy protocol (i.e.,
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001703 the __reduce__ method) to permit bytes object unpickling.
1704
1705 Here we use a hack to be compatible with Python 2. Since in Python
1706 2 'bytes' is just an alias for 'str' (which has different
1707 parameters than the actual bytes object), we use codecs.encode
1708 to create the appropriate 'str' object when unpickled using
1709 Python 2 *and* the appropriate 'bytes' object when unpickled
1710 using Python 3. Again this is a hack and we don't need to do this
1711 with newer protocols. */
1712 static PyObject *codecs_encode = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001713 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001714 int status;
1715
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001716 if (codecs_encode == NULL) {
1717 PyObject *codecs_module = PyImport_ImportModule("codecs");
1718 if (codecs_module == NULL) {
1719 return -1;
1720 }
1721 codecs_encode = PyObject_GetAttrString(codecs_module, "encode");
1722 Py_DECREF(codecs_module);
1723 if (codecs_encode == NULL) {
1724 return -1;
1725 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001726 }
1727
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001728 if (PyBytes_GET_SIZE(obj) == 0) {
1729 reduce_value = Py_BuildValue("(O())", (PyObject*)&PyBytes_Type);
1730 }
1731 else {
1732 static PyObject *latin1 = NULL;
1733 PyObject *unicode_str =
1734 PyUnicode_DecodeLatin1(PyBytes_AS_STRING(obj),
1735 PyBytes_GET_SIZE(obj),
1736 "strict");
1737 if (unicode_str == NULL)
1738 return -1;
1739 if (latin1 == NULL) {
1740 latin1 = PyUnicode_InternFromString("latin1");
1741 if (latin1 == NULL)
1742 return -1;
1743 }
1744 reduce_value = Py_BuildValue("(O(OO))",
1745 codecs_encode, unicode_str, latin1);
1746 Py_DECREF(unicode_str);
1747 }
1748
1749 if (reduce_value == NULL)
1750 return -1;
1751
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001752 /* save_reduce() will memoize the object automatically. */
1753 status = save_reduce(self, reduce_value, obj);
1754 Py_DECREF(reduce_value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001755 return status;
1756 }
1757 else {
1758 Py_ssize_t size;
1759 char header[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001760 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001761
Alexandre Vassalotti3bfc65a2011-12-13 13:08:09 -05001762 size = PyBytes_GET_SIZE(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001763 if (size < 0)
1764 return -1;
1765
1766 if (size < 256) {
1767 header[0] = SHORT_BINBYTES;
1768 header[1] = (unsigned char)size;
1769 len = 2;
1770 }
1771 else if (size <= 0xffffffffL) {
1772 header[0] = BINBYTES;
1773 header[1] = (unsigned char)(size & 0xff);
1774 header[2] = (unsigned char)((size >> 8) & 0xff);
1775 header[3] = (unsigned char)((size >> 16) & 0xff);
1776 header[4] = (unsigned char)((size >> 24) & 0xff);
1777 len = 5;
1778 }
1779 else {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001780 PyErr_SetString(PyExc_OverflowError,
1781 "cannot serialize a bytes object larger than 4GB");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001782 return -1; /* string too large */
1783 }
1784
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001785 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001786 return -1;
1787
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001788 if (_Pickler_Write(self, PyBytes_AS_STRING(obj), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001789 return -1;
1790
1791 if (memo_put(self, obj) < 0)
1792 return -1;
1793
1794 return 0;
1795 }
1796}
1797
1798/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1799 backslash and newline characters to \uXXXX escapes. */
1800static PyObject *
1801raw_unicode_escape(const Py_UNICODE *s, Py_ssize_t size)
1802{
1803 PyObject *repr, *result;
1804 char *p;
1805 char *q;
1806
1807 static const char *hexdigits = "0123456789abcdef";
1808
1809#ifdef Py_UNICODE_WIDE
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001810 const Py_ssize_t expandsize = 10;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001811#else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001812 const Py_ssize_t expandsize = 6;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001813#endif
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001814
1815 if (size > PY_SSIZE_T_MAX / expandsize)
1816 return PyErr_NoMemory();
1817
1818 repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001819 if (repr == NULL)
1820 return NULL;
1821 if (size == 0)
1822 goto done;
1823
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001824 p = q = PyByteArray_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001825 while (size-- > 0) {
1826 Py_UNICODE ch = *s++;
1827#ifdef Py_UNICODE_WIDE
1828 /* Map 32-bit characters to '\Uxxxxxxxx' */
1829 if (ch >= 0x10000) {
1830 *p++ = '\\';
1831 *p++ = 'U';
1832 *p++ = hexdigits[(ch >> 28) & 0xf];
1833 *p++ = hexdigits[(ch >> 24) & 0xf];
1834 *p++ = hexdigits[(ch >> 20) & 0xf];
1835 *p++ = hexdigits[(ch >> 16) & 0xf];
1836 *p++ = hexdigits[(ch >> 12) & 0xf];
1837 *p++ = hexdigits[(ch >> 8) & 0xf];
1838 *p++ = hexdigits[(ch >> 4) & 0xf];
1839 *p++ = hexdigits[ch & 15];
1840 }
1841 else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001842#else
1843 /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
1844 if (ch >= 0xD800 && ch < 0xDC00) {
1845 Py_UNICODE ch2;
1846 Py_UCS4 ucs;
1847
1848 ch2 = *s++;
1849 size--;
1850 if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
1851 ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
1852 *p++ = '\\';
1853 *p++ = 'U';
1854 *p++ = hexdigits[(ucs >> 28) & 0xf];
1855 *p++ = hexdigits[(ucs >> 24) & 0xf];
1856 *p++ = hexdigits[(ucs >> 20) & 0xf];
1857 *p++ = hexdigits[(ucs >> 16) & 0xf];
1858 *p++ = hexdigits[(ucs >> 12) & 0xf];
1859 *p++ = hexdigits[(ucs >> 8) & 0xf];
1860 *p++ = hexdigits[(ucs >> 4) & 0xf];
1861 *p++ = hexdigits[ucs & 0xf];
1862 continue;
1863 }
1864 /* Fall through: isolated surrogates are copied as-is */
1865 s--;
1866 size++;
1867 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001868#endif
1869 /* Map 16-bit characters to '\uxxxx' */
1870 if (ch >= 256 || ch == '\\' || ch == '\n') {
1871 *p++ = '\\';
1872 *p++ = 'u';
1873 *p++ = hexdigits[(ch >> 12) & 0xf];
1874 *p++ = hexdigits[(ch >> 8) & 0xf];
1875 *p++ = hexdigits[(ch >> 4) & 0xf];
1876 *p++ = hexdigits[ch & 15];
1877 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001878 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001879 else
1880 *p++ = (char) ch;
1881 }
1882 size = p - q;
1883
1884 done:
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001885 result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001886 Py_DECREF(repr);
1887 return result;
1888}
1889
1890static int
1891save_unicode(PicklerObject *self, PyObject *obj)
1892{
1893 Py_ssize_t size;
1894 PyObject *encoded = NULL;
1895
1896 if (self->bin) {
1897 char pdata[5];
1898
Victor Stinner485fb562010-04-13 11:07:24 +00001899 encoded = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(obj),
1900 PyUnicode_GET_SIZE(obj),
1901 "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001902 if (encoded == NULL)
1903 goto error;
1904
1905 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001906 if (size > 0xffffffffL) {
1907 PyErr_SetString(PyExc_OverflowError,
1908 "cannot serialize a string larger than 4GB");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001909 goto error; /* string too large */
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001910 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001911
1912 pdata[0] = BINUNICODE;
1913 pdata[1] = (unsigned char)(size & 0xff);
1914 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1915 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1916 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1917
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001918 if (_Pickler_Write(self, pdata, 5) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001919 goto error;
1920
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001921 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001922 goto error;
1923 }
1924 else {
1925 const char unicode_op = UNICODE;
1926
1927 encoded = raw_unicode_escape(PyUnicode_AS_UNICODE(obj),
1928 PyUnicode_GET_SIZE(obj));
1929 if (encoded == NULL)
1930 goto error;
1931
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001932 if (_Pickler_Write(self, &unicode_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001933 goto error;
1934
1935 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001936 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001937 goto error;
1938
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001939 if (_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001940 goto error;
1941 }
1942 if (memo_put(self, obj) < 0)
1943 goto error;
1944
1945 Py_DECREF(encoded);
1946 return 0;
1947
1948 error:
1949 Py_XDECREF(encoded);
1950 return -1;
1951}
1952
1953/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1954static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001955store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001956{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001957 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001958
1959 assert(PyTuple_Size(t) == len);
1960
1961 for (i = 0; i < len; i++) {
1962 PyObject *element = PyTuple_GET_ITEM(t, i);
1963
1964 if (element == NULL)
1965 return -1;
1966 if (save(self, element, 0) < 0)
1967 return -1;
1968 }
1969
1970 return 0;
1971}
1972
1973/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1974 * used across protocols to minimize the space needed to pickle them.
1975 * Tuples are also the only builtin immutable type that can be recursive
1976 * (a tuple can be reached from itself), and that requires some subtle
1977 * magic so that it works in all cases. IOW, this is a long routine.
1978 */
1979static int
1980save_tuple(PicklerObject *self, PyObject *obj)
1981{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001982 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001983
1984 const char mark_op = MARK;
1985 const char tuple_op = TUPLE;
1986 const char pop_op = POP;
1987 const char pop_mark_op = POP_MARK;
1988 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
1989
1990 if ((len = PyTuple_Size(obj)) < 0)
1991 return -1;
1992
1993 if (len == 0) {
1994 char pdata[2];
1995
1996 if (self->proto) {
1997 pdata[0] = EMPTY_TUPLE;
1998 len = 1;
1999 }
2000 else {
2001 pdata[0] = MARK;
2002 pdata[1] = TUPLE;
2003 len = 2;
2004 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002005 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002006 return -1;
2007 return 0;
2008 }
2009
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002010 /* The tuple isn't in the memo now. If it shows up there after
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002011 * saving the tuple elements, the tuple must be recursive, in
2012 * which case we'll pop everything we put on the stack, and fetch
2013 * its value from the memo.
2014 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002015 if (len <= 3 && self->proto >= 2) {
2016 /* Use TUPLE{1,2,3} opcodes. */
2017 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002018 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002019
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002020 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002021 /* pop the len elements */
2022 for (i = 0; i < len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002023 if (_Pickler_Write(self, &pop_op, 1) < 0)
2024 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002025 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002026 if (memo_get(self, obj) < 0)
2027 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002028
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002029 return 0;
2030 }
2031 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002032 if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2033 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002034 }
2035 goto memoize;
2036 }
2037
2038 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2039 * Generate MARK e1 e2 ... TUPLE
2040 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002041 if (_Pickler_Write(self, &mark_op, 1) < 0)
2042 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002043
2044 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002045 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002046
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002047 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002048 /* pop the stack stuff we pushed */
2049 if (self->bin) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002050 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2051 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002052 }
2053 else {
2054 /* Note that we pop one more than len, to remove
2055 * the MARK too.
2056 */
2057 for (i = 0; i <= len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002058 if (_Pickler_Write(self, &pop_op, 1) < 0)
2059 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002060 }
2061 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002062 if (memo_get(self, obj) < 0)
2063 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002064
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002065 return 0;
2066 }
2067 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002068 if (_Pickler_Write(self, &tuple_op, 1) < 0)
2069 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002070 }
2071
2072 memoize:
2073 if (memo_put(self, obj) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002074 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002075
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002076 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002077}
2078
2079/* iter is an iterator giving items, and we batch up chunks of
2080 * MARK item item ... item APPENDS
2081 * opcode sequences. Calling code should have arranged to first create an
2082 * empty list, or list-like object, for the APPENDS to operate on.
2083 * Returns 0 on success, <0 on error.
2084 */
2085static int
2086batch_list(PicklerObject *self, PyObject *iter)
2087{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002088 PyObject *obj = NULL;
2089 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002090 int i, n;
2091
2092 const char mark_op = MARK;
2093 const char append_op = APPEND;
2094 const char appends_op = APPENDS;
2095
2096 assert(iter != NULL);
2097
2098 /* XXX: I think this function could be made faster by avoiding the
2099 iterator interface and fetching objects directly from list using
2100 PyList_GET_ITEM.
2101 */
2102
2103 if (self->proto == 0) {
2104 /* APPENDS isn't available; do one at a time. */
2105 for (;;) {
2106 obj = PyIter_Next(iter);
2107 if (obj == NULL) {
2108 if (PyErr_Occurred())
2109 return -1;
2110 break;
2111 }
2112 i = save(self, obj, 0);
2113 Py_DECREF(obj);
2114 if (i < 0)
2115 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002116 if (_Pickler_Write(self, &append_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002117 return -1;
2118 }
2119 return 0;
2120 }
2121
2122 /* proto > 0: write in batches of BATCHSIZE. */
2123 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002124 /* Get first item */
2125 firstitem = PyIter_Next(iter);
2126 if (firstitem == NULL) {
2127 if (PyErr_Occurred())
2128 goto error;
2129
2130 /* nothing more to add */
2131 break;
2132 }
2133
2134 /* Try to get a second item */
2135 obj = PyIter_Next(iter);
2136 if (obj == NULL) {
2137 if (PyErr_Occurred())
2138 goto error;
2139
2140 /* Only one item to write */
2141 if (save(self, firstitem, 0) < 0)
2142 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002143 if (_Pickler_Write(self, &append_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002144 goto error;
2145 Py_CLEAR(firstitem);
2146 break;
2147 }
2148
2149 /* More than one item to write */
2150
2151 /* Pump out MARK, items, APPENDS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002152 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002153 goto error;
2154
2155 if (save(self, firstitem, 0) < 0)
2156 goto error;
2157 Py_CLEAR(firstitem);
2158 n = 1;
2159
2160 /* Fetch and save up to BATCHSIZE items */
2161 while (obj) {
2162 if (save(self, obj, 0) < 0)
2163 goto error;
2164 Py_CLEAR(obj);
2165 n += 1;
2166
2167 if (n == BATCHSIZE)
2168 break;
2169
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002170 obj = PyIter_Next(iter);
2171 if (obj == NULL) {
2172 if (PyErr_Occurred())
2173 goto error;
2174 break;
2175 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002176 }
2177
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002178 if (_Pickler_Write(self, &appends_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002179 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002180
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002181 } while (n == BATCHSIZE);
2182 return 0;
2183
2184 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002185 Py_XDECREF(firstitem);
2186 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002187 return -1;
2188}
2189
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002190/* This is a variant of batch_list() above, specialized for lists (with no
2191 * support for list subclasses). Like batch_list(), we batch up chunks of
2192 * MARK item item ... item APPENDS
2193 * opcode sequences. Calling code should have arranged to first create an
2194 * empty list, or list-like object, for the APPENDS to operate on.
2195 * Returns 0 on success, -1 on error.
2196 *
2197 * This version is considerably faster than batch_list(), if less general.
2198 *
2199 * Note that this only works for protocols > 0.
2200 */
2201static int
2202batch_list_exact(PicklerObject *self, PyObject *obj)
2203{
2204 PyObject *item = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002205 Py_ssize_t this_batch, total;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002206
2207 const char append_op = APPEND;
2208 const char appends_op = APPENDS;
2209 const char mark_op = MARK;
2210
2211 assert(obj != NULL);
2212 assert(self->proto > 0);
2213 assert(PyList_CheckExact(obj));
2214
2215 if (PyList_GET_SIZE(obj) == 1) {
2216 item = PyList_GET_ITEM(obj, 0);
2217 if (save(self, item, 0) < 0)
2218 return -1;
2219 if (_Pickler_Write(self, &append_op, 1) < 0)
2220 return -1;
2221 return 0;
2222 }
2223
2224 /* Write in batches of BATCHSIZE. */
2225 total = 0;
2226 do {
2227 this_batch = 0;
2228 if (_Pickler_Write(self, &mark_op, 1) < 0)
2229 return -1;
2230 while (total < PyList_GET_SIZE(obj)) {
2231 item = PyList_GET_ITEM(obj, total);
2232 if (save(self, item, 0) < 0)
2233 return -1;
2234 total++;
2235 if (++this_batch == BATCHSIZE)
2236 break;
2237 }
2238 if (_Pickler_Write(self, &appends_op, 1) < 0)
2239 return -1;
2240
2241 } while (total < PyList_GET_SIZE(obj));
2242
2243 return 0;
2244}
2245
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002246static int
2247save_list(PicklerObject *self, PyObject *obj)
2248{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002249 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002250 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002251 int status = 0;
2252
2253 if (self->fast && !fast_save_enter(self, obj))
2254 goto error;
2255
2256 /* Create an empty list. */
2257 if (self->bin) {
2258 header[0] = EMPTY_LIST;
2259 len = 1;
2260 }
2261 else {
2262 header[0] = MARK;
2263 header[1] = LIST;
2264 len = 2;
2265 }
2266
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002267 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002268 goto error;
2269
2270 /* Get list length, and bow out early if empty. */
2271 if ((len = PyList_Size(obj)) < 0)
2272 goto error;
2273
2274 if (memo_put(self, obj) < 0)
2275 goto error;
2276
2277 if (len != 0) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002278 /* Materialize the list elements. */
2279 if (PyList_CheckExact(obj) && self->proto > 0) {
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002280 if (Py_EnterRecursiveCall(" while pickling an object"))
2281 goto error;
2282 status = batch_list_exact(self, obj);
2283 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002284 } else {
2285 PyObject *iter = PyObject_GetIter(obj);
2286 if (iter == NULL)
2287 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002288
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002289 if (Py_EnterRecursiveCall(" while pickling an object")) {
2290 Py_DECREF(iter);
2291 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002292 }
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002293 status = batch_list(self, iter);
2294 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002295 Py_DECREF(iter);
2296 }
2297 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002298 if (0) {
2299 error:
2300 status = -1;
2301 }
2302
2303 if (self->fast && !fast_save_leave(self, obj))
2304 status = -1;
2305
2306 return status;
2307}
2308
2309/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
2310 * MARK key value ... key value SETITEMS
2311 * opcode sequences. Calling code should have arranged to first create an
2312 * empty dict, or dict-like object, for the SETITEMS to operate on.
2313 * Returns 0 on success, <0 on error.
2314 *
2315 * This is very much like batch_list(). The difference between saving
2316 * elements directly, and picking apart two-tuples, is so long-winded at
2317 * the C level, though, that attempts to combine these routines were too
2318 * ugly to bear.
2319 */
2320static int
2321batch_dict(PicklerObject *self, PyObject *iter)
2322{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002323 PyObject *obj = NULL;
2324 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002325 int i, n;
2326
2327 const char mark_op = MARK;
2328 const char setitem_op = SETITEM;
2329 const char setitems_op = SETITEMS;
2330
2331 assert(iter != NULL);
2332
2333 if (self->proto == 0) {
2334 /* SETITEMS isn't available; do one at a time. */
2335 for (;;) {
2336 obj = PyIter_Next(iter);
2337 if (obj == NULL) {
2338 if (PyErr_Occurred())
2339 return -1;
2340 break;
2341 }
2342 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2343 PyErr_SetString(PyExc_TypeError, "dict items "
2344 "iterator must return 2-tuples");
2345 return -1;
2346 }
2347 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
2348 if (i >= 0)
2349 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
2350 Py_DECREF(obj);
2351 if (i < 0)
2352 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002353 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002354 return -1;
2355 }
2356 return 0;
2357 }
2358
2359 /* proto > 0: write in batches of BATCHSIZE. */
2360 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002361 /* Get first item */
2362 firstitem = PyIter_Next(iter);
2363 if (firstitem == NULL) {
2364 if (PyErr_Occurred())
2365 goto error;
2366
2367 /* nothing more to add */
2368 break;
2369 }
2370 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
2371 PyErr_SetString(PyExc_TypeError, "dict items "
2372 "iterator must return 2-tuples");
2373 goto error;
2374 }
2375
2376 /* Try to get a second item */
2377 obj = PyIter_Next(iter);
2378 if (obj == NULL) {
2379 if (PyErr_Occurred())
2380 goto error;
2381
2382 /* Only one item to write */
2383 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2384 goto error;
2385 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2386 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002387 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002388 goto error;
2389 Py_CLEAR(firstitem);
2390 break;
2391 }
2392
2393 /* More than one item to write */
2394
2395 /* Pump out MARK, items, SETITEMS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002396 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002397 goto error;
2398
2399 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2400 goto error;
2401 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2402 goto error;
2403 Py_CLEAR(firstitem);
2404 n = 1;
2405
2406 /* Fetch and save up to BATCHSIZE items */
2407 while (obj) {
2408 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2409 PyErr_SetString(PyExc_TypeError, "dict items "
2410 "iterator must return 2-tuples");
2411 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002412 }
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002413 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
2414 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
2415 goto error;
2416 Py_CLEAR(obj);
2417 n += 1;
2418
2419 if (n == BATCHSIZE)
2420 break;
2421
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002422 obj = PyIter_Next(iter);
2423 if (obj == NULL) {
2424 if (PyErr_Occurred())
2425 goto error;
2426 break;
2427 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002428 }
2429
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002430 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002431 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002432
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002433 } while (n == BATCHSIZE);
2434 return 0;
2435
2436 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002437 Py_XDECREF(firstitem);
2438 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002439 return -1;
2440}
2441
Collin Winter5c9b02d2009-05-25 05:43:30 +00002442/* This is a variant of batch_dict() above that specializes for dicts, with no
2443 * support for dict subclasses. Like batch_dict(), we batch up chunks of
2444 * MARK key value ... key value SETITEMS
2445 * opcode sequences. Calling code should have arranged to first create an
2446 * empty dict, or dict-like object, for the SETITEMS to operate on.
2447 * Returns 0 on success, -1 on error.
2448 *
2449 * Note that this currently doesn't work for protocol 0.
2450 */
2451static int
2452batch_dict_exact(PicklerObject *self, PyObject *obj)
2453{
2454 PyObject *key = NULL, *value = NULL;
2455 int i;
2456 Py_ssize_t dict_size, ppos = 0;
2457
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00002458 const char mark_op = MARK;
2459 const char setitem_op = SETITEM;
2460 const char setitems_op = SETITEMS;
Collin Winter5c9b02d2009-05-25 05:43:30 +00002461
2462 assert(obj != NULL);
2463 assert(self->proto > 0);
2464
2465 dict_size = PyDict_Size(obj);
2466
2467 /* Special-case len(d) == 1 to save space. */
2468 if (dict_size == 1) {
2469 PyDict_Next(obj, &ppos, &key, &value);
2470 if (save(self, key, 0) < 0)
2471 return -1;
2472 if (save(self, value, 0) < 0)
2473 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002474 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002475 return -1;
2476 return 0;
2477 }
2478
2479 /* Write in batches of BATCHSIZE. */
2480 do {
2481 i = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002482 if (_Pickler_Write(self, &mark_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002483 return -1;
2484 while (PyDict_Next(obj, &ppos, &key, &value)) {
2485 if (save(self, key, 0) < 0)
2486 return -1;
2487 if (save(self, value, 0) < 0)
2488 return -1;
2489 if (++i == BATCHSIZE)
2490 break;
2491 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002492 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002493 return -1;
2494 if (PyDict_Size(obj) != dict_size) {
2495 PyErr_Format(
2496 PyExc_RuntimeError,
2497 "dictionary changed size during iteration");
2498 return -1;
2499 }
2500
2501 } while (i == BATCHSIZE);
2502 return 0;
2503}
2504
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002505static int
2506save_dict(PicklerObject *self, PyObject *obj)
2507{
2508 PyObject *items, *iter;
2509 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002510 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002511 int status = 0;
2512
2513 if (self->fast && !fast_save_enter(self, obj))
2514 goto error;
2515
2516 /* Create an empty dict. */
2517 if (self->bin) {
2518 header[0] = EMPTY_DICT;
2519 len = 1;
2520 }
2521 else {
2522 header[0] = MARK;
2523 header[1] = DICT;
2524 len = 2;
2525 }
2526
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002527 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002528 goto error;
2529
2530 /* Get dict size, and bow out early if empty. */
2531 if ((len = PyDict_Size(obj)) < 0)
2532 goto error;
2533
2534 if (memo_put(self, obj) < 0)
2535 goto error;
2536
2537 if (len != 0) {
2538 /* Save the dict items. */
Collin Winter5c9b02d2009-05-25 05:43:30 +00002539 if (PyDict_CheckExact(obj) && self->proto > 0) {
2540 /* We can take certain shortcuts if we know this is a dict and
2541 not a dict subclass. */
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002542 if (Py_EnterRecursiveCall(" while pickling an object"))
2543 goto error;
2544 status = batch_dict_exact(self, obj);
2545 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002546 } else {
2547 items = PyObject_CallMethod(obj, "items", "()");
2548 if (items == NULL)
2549 goto error;
2550 iter = PyObject_GetIter(items);
2551 Py_DECREF(items);
2552 if (iter == NULL)
2553 goto error;
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002554 if (Py_EnterRecursiveCall(" while pickling an object")) {
2555 Py_DECREF(iter);
2556 goto error;
2557 }
Collin Winter5c9b02d2009-05-25 05:43:30 +00002558 status = batch_dict(self, iter);
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002559 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002560 Py_DECREF(iter);
2561 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002562 }
2563
2564 if (0) {
2565 error:
2566 status = -1;
2567 }
2568
2569 if (self->fast && !fast_save_leave(self, obj))
2570 status = -1;
2571
2572 return status;
2573}
2574
2575static int
2576save_global(PicklerObject *self, PyObject *obj, PyObject *name)
2577{
2578 static PyObject *name_str = NULL;
2579 PyObject *global_name = NULL;
2580 PyObject *module_name = NULL;
2581 PyObject *module = NULL;
2582 PyObject *cls;
2583 int status = 0;
2584
2585 const char global_op = GLOBAL;
2586
2587 if (name_str == NULL) {
2588 name_str = PyUnicode_InternFromString("__name__");
2589 if (name_str == NULL)
2590 goto error;
2591 }
2592
2593 if (name) {
2594 global_name = name;
2595 Py_INCREF(global_name);
2596 }
2597 else {
2598 global_name = PyObject_GetAttr(obj, name_str);
2599 if (global_name == NULL)
2600 goto error;
2601 }
2602
2603 module_name = whichmodule(obj, global_name);
2604 if (module_name == NULL)
2605 goto error;
2606
2607 /* XXX: Change to use the import C API directly with level=0 to disallow
2608 relative imports.
2609
2610 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
2611 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
2612 custom import functions (IMHO, this would be a nice security
2613 feature). The import C API would need to be extended to support the
2614 extra parameters of __import__ to fix that. */
2615 module = PyImport_Import(module_name);
2616 if (module == NULL) {
2617 PyErr_Format(PicklingError,
2618 "Can't pickle %R: import of module %R failed",
2619 obj, module_name);
2620 goto error;
2621 }
2622 cls = PyObject_GetAttr(module, global_name);
2623 if (cls == NULL) {
2624 PyErr_Format(PicklingError,
2625 "Can't pickle %R: attribute lookup %S.%S failed",
2626 obj, module_name, global_name);
2627 goto error;
2628 }
2629 if (cls != obj) {
2630 Py_DECREF(cls);
2631 PyErr_Format(PicklingError,
2632 "Can't pickle %R: it's not the same object as %S.%S",
2633 obj, module_name, global_name);
2634 goto error;
2635 }
2636 Py_DECREF(cls);
2637
2638 if (self->proto >= 2) {
2639 /* See whether this is in the extension registry, and if
2640 * so generate an EXT opcode.
2641 */
2642 PyObject *code_obj; /* extension code as Python object */
2643 long code; /* extension code as C value */
2644 char pdata[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002645 Py_ssize_t n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002646
2647 PyTuple_SET_ITEM(two_tuple, 0, module_name);
2648 PyTuple_SET_ITEM(two_tuple, 1, global_name);
2649 code_obj = PyDict_GetItem(extension_registry, two_tuple);
2650 /* The object is not registered in the extension registry.
2651 This is the most likely code path. */
2652 if (code_obj == NULL)
2653 goto gen_global;
2654
2655 /* XXX: pickle.py doesn't check neither the type, nor the range
2656 of the value returned by the extension_registry. It should for
2657 consistency. */
2658
2659 /* Verify code_obj has the right type and value. */
2660 if (!PyLong_Check(code_obj)) {
2661 PyErr_Format(PicklingError,
2662 "Can't pickle %R: extension code %R isn't an integer",
2663 obj, code_obj);
2664 goto error;
2665 }
2666 code = PyLong_AS_LONG(code_obj);
2667 if (code <= 0 || code > 0x7fffffffL) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002668 if (!PyErr_Occurred())
2669 PyErr_Format(PicklingError,
2670 "Can't pickle %R: extension code %ld is out of range",
2671 obj, code);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002672 goto error;
2673 }
2674
2675 /* Generate an EXT opcode. */
2676 if (code <= 0xff) {
2677 pdata[0] = EXT1;
2678 pdata[1] = (unsigned char)code;
2679 n = 2;
2680 }
2681 else if (code <= 0xffff) {
2682 pdata[0] = EXT2;
2683 pdata[1] = (unsigned char)(code & 0xff);
2684 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2685 n = 3;
2686 }
2687 else {
2688 pdata[0] = EXT4;
2689 pdata[1] = (unsigned char)(code & 0xff);
2690 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2691 pdata[3] = (unsigned char)((code >> 16) & 0xff);
2692 pdata[4] = (unsigned char)((code >> 24) & 0xff);
2693 n = 5;
2694 }
2695
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002696 if (_Pickler_Write(self, pdata, n) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002697 goto error;
2698 }
2699 else {
2700 /* Generate a normal global opcode if we are using a pickle
2701 protocol <= 2, or if the object is not registered in the
2702 extension registry. */
2703 PyObject *encoded;
2704 PyObject *(*unicode_encoder)(PyObject *);
2705
2706 gen_global:
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002707 if (_Pickler_Write(self, &global_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002708 goto error;
2709
2710 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
2711 the module name and the global name using UTF-8. We do so only when
2712 we are using the pickle protocol newer than version 3. This is to
2713 ensure compatibility with older Unpickler running on Python 2.x. */
2714 if (self->proto >= 3) {
2715 unicode_encoder = PyUnicode_AsUTF8String;
2716 }
2717 else {
2718 unicode_encoder = PyUnicode_AsASCIIString;
2719 }
2720
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002721 /* For protocol < 3 and if the user didn't request against doing so,
2722 we convert module names to the old 2.x module names. */
2723 if (self->fix_imports) {
2724 PyObject *key;
2725 PyObject *item;
2726
2727 key = PyTuple_Pack(2, module_name, global_name);
2728 if (key == NULL)
2729 goto error;
2730 item = PyDict_GetItemWithError(name_mapping_3to2, key);
2731 Py_DECREF(key);
2732 if (item) {
2733 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
2734 PyErr_Format(PyExc_RuntimeError,
2735 "_compat_pickle.REVERSE_NAME_MAPPING values "
2736 "should be 2-tuples, not %.200s",
2737 Py_TYPE(item)->tp_name);
2738 goto error;
2739 }
2740 Py_CLEAR(module_name);
2741 Py_CLEAR(global_name);
2742 module_name = PyTuple_GET_ITEM(item, 0);
2743 global_name = PyTuple_GET_ITEM(item, 1);
2744 if (!PyUnicode_Check(module_name) ||
2745 !PyUnicode_Check(global_name)) {
2746 PyErr_Format(PyExc_RuntimeError,
2747 "_compat_pickle.REVERSE_NAME_MAPPING values "
2748 "should be pairs of str, not (%.200s, %.200s)",
2749 Py_TYPE(module_name)->tp_name,
2750 Py_TYPE(global_name)->tp_name);
2751 goto error;
2752 }
2753 Py_INCREF(module_name);
2754 Py_INCREF(global_name);
2755 }
2756 else if (PyErr_Occurred()) {
2757 goto error;
2758 }
2759
2760 item = PyDict_GetItemWithError(import_mapping_3to2, module_name);
2761 if (item) {
2762 if (!PyUnicode_Check(item)) {
2763 PyErr_Format(PyExc_RuntimeError,
2764 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
2765 "should be strings, not %.200s",
2766 Py_TYPE(item)->tp_name);
2767 goto error;
2768 }
2769 Py_CLEAR(module_name);
2770 module_name = item;
2771 Py_INCREF(module_name);
2772 }
2773 else if (PyErr_Occurred()) {
2774 goto error;
2775 }
2776 }
2777
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002778 /* Save the name of the module. */
2779 encoded = unicode_encoder(module_name);
2780 if (encoded == NULL) {
2781 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2782 PyErr_Format(PicklingError,
2783 "can't pickle module identifier '%S' using "
2784 "pickle protocol %i", module_name, self->proto);
2785 goto error;
2786 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002787 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002788 PyBytes_GET_SIZE(encoded)) < 0) {
2789 Py_DECREF(encoded);
2790 goto error;
2791 }
2792 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002793 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002794 goto error;
2795
2796 /* Save the name of the module. */
2797 encoded = unicode_encoder(global_name);
2798 if (encoded == NULL) {
2799 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2800 PyErr_Format(PicklingError,
2801 "can't pickle global identifier '%S' using "
2802 "pickle protocol %i", global_name, self->proto);
2803 goto error;
2804 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002805 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002806 PyBytes_GET_SIZE(encoded)) < 0) {
2807 Py_DECREF(encoded);
2808 goto error;
2809 }
2810 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002811 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002812 goto error;
2813
2814 /* Memoize the object. */
2815 if (memo_put(self, obj) < 0)
2816 goto error;
2817 }
2818
2819 if (0) {
2820 error:
2821 status = -1;
2822 }
2823 Py_XDECREF(module_name);
2824 Py_XDECREF(global_name);
2825 Py_XDECREF(module);
2826
2827 return status;
2828}
2829
2830static int
2831save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
2832{
2833 PyObject *pid = NULL;
2834 int status = 0;
2835
2836 const char persid_op = PERSID;
2837 const char binpersid_op = BINPERSID;
2838
2839 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002840 pid = _Pickler_FastCall(self, func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002841 if (pid == NULL)
2842 return -1;
2843
2844 if (pid != Py_None) {
2845 if (self->bin) {
2846 if (save(self, pid, 1) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002847 _Pickler_Write(self, &binpersid_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002848 goto error;
2849 }
2850 else {
2851 PyObject *pid_str = NULL;
2852 char *pid_ascii_bytes;
2853 Py_ssize_t size;
2854
2855 pid_str = PyObject_Str(pid);
2856 if (pid_str == NULL)
2857 goto error;
2858
2859 /* XXX: Should it check whether the persistent id only contains
2860 ASCII characters? And what if the pid contains embedded
2861 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00002862 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002863 Py_DECREF(pid_str);
2864 if (pid_ascii_bytes == NULL)
2865 goto error;
2866
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002867 if (_Pickler_Write(self, &persid_op, 1) < 0 ||
2868 _Pickler_Write(self, pid_ascii_bytes, size) < 0 ||
2869 _Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002870 goto error;
2871 }
2872 status = 1;
2873 }
2874
2875 if (0) {
2876 error:
2877 status = -1;
2878 }
2879 Py_XDECREF(pid);
2880
2881 return status;
2882}
2883
2884/* We're saving obj, and args is the 2-thru-5 tuple returned by the
2885 * appropriate __reduce__ method for obj.
2886 */
2887static int
2888save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
2889{
2890 PyObject *callable;
2891 PyObject *argtup;
2892 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002893 PyObject *listitems = Py_None;
2894 PyObject *dictitems = Py_None;
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002895 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002896
2897 int use_newobj = self->proto >= 2;
2898
2899 const char reduce_op = REDUCE;
2900 const char build_op = BUILD;
2901 const char newobj_op = NEWOBJ;
2902
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002903 size = PyTuple_Size(args);
2904 if (size < 2 || size > 5) {
2905 PyErr_SetString(PicklingError, "tuple returned by "
2906 "__reduce__ must contain 2 through 5 elements");
2907 return -1;
2908 }
2909
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002910 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
2911 &callable, &argtup, &state, &listitems, &dictitems))
2912 return -1;
2913
2914 if (!PyCallable_Check(callable)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002915 PyErr_SetString(PicklingError, "first item of the tuple "
2916 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002917 return -1;
2918 }
2919 if (!PyTuple_Check(argtup)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002920 PyErr_SetString(PicklingError, "second item of the tuple "
2921 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002922 return -1;
2923 }
2924
2925 if (state == Py_None)
2926 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002927
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002928 if (listitems == Py_None)
2929 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002930 else if (!PyIter_Check(listitems)) {
2931 PyErr_Format(PicklingError, "Fourth element of tuple"
2932 "returned by __reduce__ must be an iterator, not %s",
2933 Py_TYPE(listitems)->tp_name);
2934 return -1;
2935 }
2936
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002937 if (dictitems == Py_None)
2938 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002939 else if (!PyIter_Check(dictitems)) {
2940 PyErr_Format(PicklingError, "Fifth element of tuple"
2941 "returned by __reduce__ must be an iterator, not %s",
2942 Py_TYPE(dictitems)->tp_name);
2943 return -1;
2944 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002945
2946 /* Protocol 2 special case: if callable's name is __newobj__, use
2947 NEWOBJ. */
2948 if (use_newobj) {
Antoine Pitrouff150f22010-10-22 21:41:05 +00002949 static PyObject *newobj_str = NULL;
2950 PyObject *name_str;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002951
2952 if (newobj_str == NULL) {
2953 newobj_str = PyUnicode_InternFromString("__newobj__");
Antoine Pitrouff150f22010-10-22 21:41:05 +00002954 if (newobj_str == NULL)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002955 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002956 }
2957
Antoine Pitrouff150f22010-10-22 21:41:05 +00002958 name_str = PyObject_GetAttrString(callable, "__name__");
2959 if (name_str == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002960 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2961 PyErr_Clear();
2962 else
2963 return -1;
2964 use_newobj = 0;
2965 }
2966 else {
Antoine Pitrouff150f22010-10-22 21:41:05 +00002967 use_newobj = PyUnicode_Check(name_str) &&
2968 PyUnicode_Compare(name_str, newobj_str) == 0;
2969 Py_DECREF(name_str);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002970 }
2971 }
2972 if (use_newobj) {
2973 PyObject *cls;
2974 PyObject *newargtup;
2975 PyObject *obj_class;
2976 int p;
2977
2978 /* Sanity checks. */
2979 if (Py_SIZE(argtup) < 1) {
2980 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
2981 return -1;
2982 }
2983
2984 cls = PyTuple_GET_ITEM(argtup, 0);
Antoine Pitrouff150f22010-10-22 21:41:05 +00002985 if (!PyObject_HasAttrString(cls, "__new__")) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002986 PyErr_SetString(PicklingError, "args[0] from "
Antoine Pitrouff150f22010-10-22 21:41:05 +00002987 "__newobj__ args has no __new__");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002988 return -1;
2989 }
2990
2991 if (obj != NULL) {
Antoine Pitrouff150f22010-10-22 21:41:05 +00002992 obj_class = PyObject_GetAttrString(obj, "__class__");
2993 if (obj_class == NULL) {
2994 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2995 PyErr_Clear();
2996 else
2997 return -1;
2998 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002999 p = obj_class != cls; /* true iff a problem */
3000 Py_DECREF(obj_class);
3001 if (p) {
3002 PyErr_SetString(PicklingError, "args[0] from "
3003 "__newobj__ args has the wrong class");
3004 return -1;
3005 }
3006 }
3007 /* XXX: These calls save() are prone to infinite recursion. Imagine
3008 what happen if the value returned by the __reduce__() method of
3009 some extension type contains another object of the same type. Ouch!
3010
3011 Here is a quick example, that I ran into, to illustrate what I
3012 mean:
3013
3014 >>> import pickle, copyreg
3015 >>> copyreg.dispatch_table.pop(complex)
3016 >>> pickle.dumps(1+2j)
3017 Traceback (most recent call last):
3018 ...
3019 RuntimeError: maximum recursion depth exceeded
3020
3021 Removing the complex class from copyreg.dispatch_table made the
3022 __reduce_ex__() method emit another complex object:
3023
3024 >>> (1+1j).__reduce_ex__(2)
3025 (<function __newobj__ at 0xb7b71c3c>,
3026 (<class 'complex'>, (1+1j)), None, None, None)
3027
3028 Thus when save() was called on newargstup (the 2nd item) recursion
3029 ensued. Of course, the bug was in the complex class which had a
3030 broken __getnewargs__() that emitted another complex object. But,
3031 the point, here, is it is quite easy to end up with a broken reduce
3032 function. */
3033
3034 /* Save the class and its __new__ arguments. */
3035 if (save(self, cls, 0) < 0)
3036 return -1;
3037
3038 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
3039 if (newargtup == NULL)
3040 return -1;
3041
3042 p = save(self, newargtup, 0);
3043 Py_DECREF(newargtup);
3044 if (p < 0)
3045 return -1;
3046
3047 /* Add NEWOBJ opcode. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003048 if (_Pickler_Write(self, &newobj_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003049 return -1;
3050 }
3051 else { /* Not using NEWOBJ. */
3052 if (save(self, callable, 0) < 0 ||
3053 save(self, argtup, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003054 _Pickler_Write(self, &reduce_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003055 return -1;
3056 }
3057
3058 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
3059 the caller do not want to memoize the object. Not particularly useful,
3060 but that is to mimic the behavior save_reduce() in pickle.py when
3061 obj is None. */
3062 if (obj && memo_put(self, obj) < 0)
3063 return -1;
3064
3065 if (listitems && batch_list(self, listitems) < 0)
3066 return -1;
3067
3068 if (dictitems && batch_dict(self, dictitems) < 0)
3069 return -1;
3070
3071 if (state) {
3072 if (save(self, state, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003073 _Pickler_Write(self, &build_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003074 return -1;
3075 }
3076
3077 return 0;
3078}
3079
3080static int
3081save(PicklerObject *self, PyObject *obj, int pers_save)
3082{
3083 PyTypeObject *type;
3084 PyObject *reduce_func = NULL;
3085 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003086 int status = 0;
3087
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003088 if (Py_EnterRecursiveCall(" while pickling an object"))
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003089 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003090
3091 /* The extra pers_save argument is necessary to avoid calling save_pers()
3092 on its returned object. */
3093 if (!pers_save && self->pers_func) {
3094 /* save_pers() returns:
3095 -1 to signal an error;
3096 0 if it did nothing successfully;
3097 1 if a persistent id was saved.
3098 */
3099 if ((status = save_pers(self, obj, self->pers_func)) != 0)
3100 goto done;
3101 }
3102
3103 type = Py_TYPE(obj);
3104
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003105 /* The old cPickle had an optimization that used switch-case statement
3106 dispatching on the first letter of the type name. This has was removed
3107 since benchmarks shown that this optimization was actually slowing
3108 things down. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003109
3110 /* Atom types; these aren't memoized, so don't check the memo. */
3111
3112 if (obj == Py_None) {
3113 status = save_none(self, obj);
3114 goto done;
3115 }
3116 else if (obj == Py_False || obj == Py_True) {
3117 status = save_bool(self, obj);
3118 goto done;
3119 }
3120 else if (type == &PyLong_Type) {
3121 status = save_long(self, obj);
3122 goto done;
3123 }
3124 else if (type == &PyFloat_Type) {
3125 status = save_float(self, obj);
3126 goto done;
3127 }
3128
3129 /* Check the memo to see if it has the object. If so, generate
3130 a GET (or BINGET) opcode, instead of pickling the object
3131 once again. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003132 if (PyMemoTable_Get(self->memo, obj)) {
3133 if (memo_get(self, obj) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003134 goto error;
3135 goto done;
3136 }
3137
3138 if (type == &PyBytes_Type) {
3139 status = save_bytes(self, obj);
3140 goto done;
3141 }
3142 else if (type == &PyUnicode_Type) {
3143 status = save_unicode(self, obj);
3144 goto done;
3145 }
3146 else if (type == &PyDict_Type) {
3147 status = save_dict(self, obj);
3148 goto done;
3149 }
3150 else if (type == &PyList_Type) {
3151 status = save_list(self, obj);
3152 goto done;
3153 }
3154 else if (type == &PyTuple_Type) {
3155 status = save_tuple(self, obj);
3156 goto done;
3157 }
3158 else if (type == &PyType_Type) {
3159 status = save_global(self, obj, NULL);
3160 goto done;
3161 }
3162 else if (type == &PyFunction_Type) {
3163 status = save_global(self, obj, NULL);
3164 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
3165 /* fall back to reduce */
3166 PyErr_Clear();
3167 }
3168 else {
3169 goto done;
3170 }
3171 }
3172 else if (type == &PyCFunction_Type) {
3173 status = save_global(self, obj, NULL);
3174 goto done;
3175 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003176
3177 /* XXX: This part needs some unit tests. */
3178
3179 /* Get a reduction callable, and call it. This may come from
3180 * copyreg.dispatch_table, the object's __reduce_ex__ method,
3181 * or the object's __reduce__ method.
3182 */
3183 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
3184 if (reduce_func != NULL) {
3185 /* Here, the reference count of the reduce_func object returned by
3186 PyDict_GetItem needs to be increased to be consistent with the one
3187 returned by PyObject_GetAttr. This is allow us to blindly DECREF
3188 reduce_func at the end of the save() routine.
3189 */
3190 Py_INCREF(reduce_func);
3191 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003192 reduce_value = _Pickler_FastCall(self, reduce_func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003193 }
Antoine Pitrouffd41d92011-10-04 09:23:04 +02003194 else if (PyType_IsSubtype(type, &PyType_Type)) {
3195 status = save_global(self, obj, NULL);
3196 goto done;
3197 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003198 else {
3199 static PyObject *reduce_str = NULL;
3200 static PyObject *reduce_ex_str = NULL;
3201
3202 /* Cache the name of the reduce methods. */
3203 if (reduce_str == NULL) {
3204 reduce_str = PyUnicode_InternFromString("__reduce__");
3205 if (reduce_str == NULL)
3206 goto error;
3207 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
3208 if (reduce_ex_str == NULL)
3209 goto error;
3210 }
3211
3212 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
3213 automatically defined as __reduce__. While this is convenient, this
3214 make it impossible to know which method was actually called. Of
3215 course, this is not a big deal. But still, it would be nice to let
3216 the user know which method was called when something go
3217 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
3218 don't actually have to check for a __reduce__ method. */
3219
3220 /* Check for a __reduce_ex__ method. */
3221 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
3222 if (reduce_func != NULL) {
3223 PyObject *proto;
3224 proto = PyLong_FromLong(self->proto);
3225 if (proto != NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003226 reduce_value = _Pickler_FastCall(self, reduce_func, proto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003227 }
3228 }
3229 else {
3230 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3231 PyErr_Clear();
3232 else
3233 goto error;
3234 /* Check for a __reduce__ method. */
3235 reduce_func = PyObject_GetAttr(obj, reduce_str);
3236 if (reduce_func != NULL) {
3237 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
3238 }
3239 else {
3240 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
3241 type->tp_name, obj);
3242 goto error;
3243 }
3244 }
3245 }
3246
3247 if (reduce_value == NULL)
3248 goto error;
3249
3250 if (PyUnicode_Check(reduce_value)) {
3251 status = save_global(self, obj, reduce_value);
3252 goto done;
3253 }
3254
3255 if (!PyTuple_Check(reduce_value)) {
3256 PyErr_SetString(PicklingError,
3257 "__reduce__ must return a string or tuple");
3258 goto error;
3259 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003260
3261 status = save_reduce(self, reduce_value, obj);
3262
3263 if (0) {
3264 error:
3265 status = -1;
3266 }
3267 done:
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003268 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003269 Py_XDECREF(reduce_func);
3270 Py_XDECREF(reduce_value);
3271
3272 return status;
3273}
3274
3275static int
3276dump(PicklerObject *self, PyObject *obj)
3277{
3278 const char stop_op = STOP;
3279
3280 if (self->proto >= 2) {
3281 char header[2];
3282
3283 header[0] = PROTO;
3284 assert(self->proto >= 0 && self->proto < 256);
3285 header[1] = (unsigned char)self->proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003286 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003287 return -1;
3288 }
3289
3290 if (save(self, obj, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003291 _Pickler_Write(self, &stop_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003292 return -1;
3293
3294 return 0;
3295}
3296
3297PyDoc_STRVAR(Pickler_clear_memo_doc,
3298"clear_memo() -> None. Clears the pickler's \"memo\"."
3299"\n"
3300"The memo is the data structure that remembers which objects the\n"
3301"pickler has already seen, so that shared or recursive objects are\n"
3302"pickled by reference and not by value. This method is useful when\n"
3303"re-using picklers.");
3304
3305static PyObject *
3306Pickler_clear_memo(PicklerObject *self)
3307{
3308 if (self->memo)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003309 PyMemoTable_Clear(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003310
3311 Py_RETURN_NONE;
3312}
3313
3314PyDoc_STRVAR(Pickler_dump_doc,
3315"dump(obj) -> None. Write a pickled representation of obj to the open file.");
3316
3317static PyObject *
3318Pickler_dump(PicklerObject *self, PyObject *args)
3319{
3320 PyObject *obj;
3321
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00003322 /* Check whether the Pickler was initialized correctly (issue3664).
3323 Developers often forget to call __init__() in their subclasses, which
3324 would trigger a segfault without this check. */
3325 if (self->write == NULL) {
3326 PyErr_Format(PicklingError,
3327 "Pickler.__init__() was not called by %s.__init__()",
3328 Py_TYPE(self)->tp_name);
3329 return NULL;
3330 }
3331
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003332 if (!PyArg_ParseTuple(args, "O:dump", &obj))
3333 return NULL;
3334
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003335 if (_Pickler_ClearBuffer(self) < 0)
3336 return NULL;
3337
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003338 if (dump(self, obj) < 0)
3339 return NULL;
3340
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003341 if (_Pickler_FlushToFile(self) < 0)
3342 return NULL;
3343
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003344 Py_RETURN_NONE;
3345}
3346
3347static struct PyMethodDef Pickler_methods[] = {
3348 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
3349 Pickler_dump_doc},
3350 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
3351 Pickler_clear_memo_doc},
3352 {NULL, NULL} /* sentinel */
3353};
3354
3355static void
3356Pickler_dealloc(PicklerObject *self)
3357{
3358 PyObject_GC_UnTrack(self);
3359
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003360 Py_XDECREF(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003361 Py_XDECREF(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003362 Py_XDECREF(self->pers_func);
3363 Py_XDECREF(self->arg);
3364 Py_XDECREF(self->fast_memo);
3365
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003366 PyMemoTable_Del(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003367
3368 Py_TYPE(self)->tp_free((PyObject *)self);
3369}
3370
3371static int
3372Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
3373{
3374 Py_VISIT(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003375 Py_VISIT(self->pers_func);
3376 Py_VISIT(self->arg);
3377 Py_VISIT(self->fast_memo);
3378 return 0;
3379}
3380
3381static int
3382Pickler_clear(PicklerObject *self)
3383{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003384 Py_CLEAR(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003385 Py_CLEAR(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003386 Py_CLEAR(self->pers_func);
3387 Py_CLEAR(self->arg);
3388 Py_CLEAR(self->fast_memo);
3389
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003390 if (self->memo != NULL) {
3391 PyMemoTable *memo = self->memo;
3392 self->memo = NULL;
3393 PyMemoTable_Del(memo);
3394 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003395 return 0;
3396}
3397
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003398
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003399PyDoc_STRVAR(Pickler_doc,
3400"Pickler(file, protocol=None)"
3401"\n"
3402"This takes a binary file for writing a pickle data stream.\n"
3403"\n"
3404"The optional protocol argument tells the pickler to use the\n"
3405"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
3406"protocol is 3; a backward-incompatible protocol designed for\n"
3407"Python 3.0.\n"
3408"\n"
3409"Specifying a negative protocol version selects the highest\n"
3410"protocol version supported. The higher the protocol used, the\n"
3411"more recent the version of Python needed to read the pickle\n"
3412"produced.\n"
3413"\n"
3414"The file argument must have a write() method that accepts a single\n"
3415"bytes argument. It can thus be a file object opened for binary\n"
3416"writing, a io.BytesIO instance, or any other custom object that\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003417"meets this interface.\n"
3418"\n"
3419"If fix_imports is True and protocol is less than 3, pickle will try to\n"
3420"map the new Python 3.x names to the old module names used in Python\n"
3421"2.x, so that the pickle data stream is readable with Python 2.x.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003422
3423static int
3424Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
3425{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003426 static char *kwlist[] = {"file", "protocol", "fix_imports", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003427 PyObject *file;
3428 PyObject *proto_obj = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003429 PyObject *fix_imports = Py_True;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003430
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003431 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:Pickler",
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003432 kwlist, &file, &proto_obj, &fix_imports))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003433 return -1;
3434
3435 /* In case of multiple __init__() calls, clear previous content. */
3436 if (self->write != NULL)
3437 (void)Pickler_clear(self);
3438
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003439 if (_Pickler_SetProtocol(self, proto_obj, fix_imports) < 0)
3440 return -1;
3441
3442 if (_Pickler_SetOutputStream(self, file) < 0)
3443 return -1;
3444
3445 /* memo and output_buffer may have already been created in _Pickler_New */
3446 if (self->memo == NULL) {
3447 self->memo = PyMemoTable_New();
3448 if (self->memo == NULL)
3449 return -1;
3450 }
3451 self->output_len = 0;
3452 if (self->output_buffer == NULL) {
3453 self->max_output_len = WRITE_BUF_SIZE;
3454 self->output_buffer = PyBytes_FromStringAndSize(NULL,
3455 self->max_output_len);
3456 if (self->output_buffer == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003457 return -1;
3458 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003459
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003460 self->arg = NULL;
3461 self->fast = 0;
3462 self->fast_nesting = 0;
3463 self->fast_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003464 self->pers_func = NULL;
3465 if (PyObject_HasAttrString((PyObject *)self, "persistent_id")) {
3466 self->pers_func = PyObject_GetAttrString((PyObject *)self,
3467 "persistent_id");
3468 if (self->pers_func == NULL)
3469 return -1;
3470 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003471 return 0;
3472}
3473
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003474/* Define a proxy object for the Pickler's internal memo object. This is to
3475 * avoid breaking code like:
3476 * pickler.memo.clear()
3477 * and
3478 * pickler.memo = saved_memo
3479 * Is this a good idea? Not really, but we don't want to break code that uses
3480 * it. Note that we don't implement the entire mapping API here. This is
3481 * intentional, as these should be treated as black-box implementation details.
3482 */
3483
3484typedef struct {
3485 PyObject_HEAD
3486 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
3487} PicklerMemoProxyObject;
3488
3489PyDoc_STRVAR(pmp_clear_doc,
3490"memo.clear() -> None. Remove all items from memo.");
3491
3492static PyObject *
3493pmp_clear(PicklerMemoProxyObject *self)
3494{
3495 if (self->pickler->memo)
3496 PyMemoTable_Clear(self->pickler->memo);
3497 Py_RETURN_NONE;
3498}
3499
3500PyDoc_STRVAR(pmp_copy_doc,
3501"memo.copy() -> new_memo. Copy the memo to a new object.");
3502
3503static PyObject *
3504pmp_copy(PicklerMemoProxyObject *self)
3505{
3506 Py_ssize_t i;
3507 PyMemoTable *memo;
3508 PyObject *new_memo = PyDict_New();
3509 if (new_memo == NULL)
3510 return NULL;
3511
3512 memo = self->pickler->memo;
3513 for (i = 0; i < memo->mt_allocated; ++i) {
3514 PyMemoEntry entry = memo->mt_table[i];
3515 if (entry.me_key != NULL) {
3516 int status;
3517 PyObject *key, *value;
3518
3519 key = PyLong_FromVoidPtr(entry.me_key);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003520 value = Py_BuildValue("nO", entry.me_value, entry.me_key);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003521
3522 if (key == NULL || value == NULL) {
3523 Py_XDECREF(key);
3524 Py_XDECREF(value);
3525 goto error;
3526 }
3527 status = PyDict_SetItem(new_memo, key, value);
3528 Py_DECREF(key);
3529 Py_DECREF(value);
3530 if (status < 0)
3531 goto error;
3532 }
3533 }
3534 return new_memo;
3535
3536 error:
3537 Py_XDECREF(new_memo);
3538 return NULL;
3539}
3540
3541PyDoc_STRVAR(pmp_reduce_doc,
3542"memo.__reduce__(). Pickling support.");
3543
3544static PyObject *
3545pmp_reduce(PicklerMemoProxyObject *self, PyObject *args)
3546{
3547 PyObject *reduce_value, *dict_args;
3548 PyObject *contents = pmp_copy(self);
3549 if (contents == NULL)
3550 return NULL;
3551
3552 reduce_value = PyTuple_New(2);
3553 if (reduce_value == NULL) {
3554 Py_DECREF(contents);
3555 return NULL;
3556 }
3557 dict_args = PyTuple_New(1);
3558 if (dict_args == NULL) {
3559 Py_DECREF(contents);
3560 Py_DECREF(reduce_value);
3561 return NULL;
3562 }
3563 PyTuple_SET_ITEM(dict_args, 0, contents);
3564 Py_INCREF((PyObject *)&PyDict_Type);
3565 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
3566 PyTuple_SET_ITEM(reduce_value, 1, dict_args);
3567 return reduce_value;
3568}
3569
3570static PyMethodDef picklerproxy_methods[] = {
3571 {"clear", (PyCFunction)pmp_clear, METH_NOARGS, pmp_clear_doc},
3572 {"copy", (PyCFunction)pmp_copy, METH_NOARGS, pmp_copy_doc},
3573 {"__reduce__", (PyCFunction)pmp_reduce, METH_VARARGS, pmp_reduce_doc},
3574 {NULL, NULL} /* sentinel */
3575};
3576
3577static void
3578PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
3579{
3580 PyObject_GC_UnTrack(self);
3581 Py_XDECREF(self->pickler);
3582 PyObject_GC_Del((PyObject *)self);
3583}
3584
3585static int
3586PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
3587 visitproc visit, void *arg)
3588{
3589 Py_VISIT(self->pickler);
3590 return 0;
3591}
3592
3593static int
3594PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
3595{
3596 Py_CLEAR(self->pickler);
3597 return 0;
3598}
3599
3600static PyTypeObject PicklerMemoProxyType = {
3601 PyVarObject_HEAD_INIT(NULL, 0)
3602 "_pickle.PicklerMemoProxy", /*tp_name*/
3603 sizeof(PicklerMemoProxyObject), /*tp_basicsize*/
3604 0,
3605 (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */
3606 0, /* tp_print */
3607 0, /* tp_getattr */
3608 0, /* tp_setattr */
3609 0, /* tp_compare */
3610 0, /* tp_repr */
3611 0, /* tp_as_number */
3612 0, /* tp_as_sequence */
3613 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00003614 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003615 0, /* tp_call */
3616 0, /* tp_str */
3617 PyObject_GenericGetAttr, /* tp_getattro */
3618 PyObject_GenericSetAttr, /* tp_setattro */
3619 0, /* tp_as_buffer */
3620 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3621 0, /* tp_doc */
3622 (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */
3623 (inquiry)PicklerMemoProxy_clear, /* tp_clear */
3624 0, /* tp_richcompare */
3625 0, /* tp_weaklistoffset */
3626 0, /* tp_iter */
3627 0, /* tp_iternext */
3628 picklerproxy_methods, /* tp_methods */
3629};
3630
3631static PyObject *
3632PicklerMemoProxy_New(PicklerObject *pickler)
3633{
3634 PicklerMemoProxyObject *self;
3635
3636 self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
3637 if (self == NULL)
3638 return NULL;
3639 Py_INCREF(pickler);
3640 self->pickler = pickler;
3641 PyObject_GC_Track(self);
3642 return (PyObject *)self;
3643}
3644
3645/*****************************************************************************/
3646
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003647static PyObject *
3648Pickler_get_memo(PicklerObject *self)
3649{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003650 return PicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003651}
3652
3653static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003654Pickler_set_memo(PicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003655{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003656 PyMemoTable *new_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003657
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003658 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003659 PyErr_SetString(PyExc_TypeError,
3660 "attribute deletion is not supported");
3661 return -1;
3662 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003663
3664 if (Py_TYPE(obj) == &PicklerMemoProxyType) {
3665 PicklerObject *pickler =
3666 ((PicklerMemoProxyObject *)obj)->pickler;
3667
3668 new_memo = PyMemoTable_Copy(pickler->memo);
3669 if (new_memo == NULL)
3670 return -1;
3671 }
3672 else if (PyDict_Check(obj)) {
3673 Py_ssize_t i = 0;
3674 PyObject *key, *value;
3675
3676 new_memo = PyMemoTable_New();
3677 if (new_memo == NULL)
3678 return -1;
3679
3680 while (PyDict_Next(obj, &i, &key, &value)) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003681 Py_ssize_t memo_id;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003682 PyObject *memo_obj;
3683
3684 if (!PyTuple_Check(value) || Py_SIZE(value) != 2) {
3685 PyErr_SetString(PyExc_TypeError,
3686 "'memo' values must be 2-item tuples");
3687 goto error;
3688 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003689 memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003690 if (memo_id == -1 && PyErr_Occurred())
3691 goto error;
3692 memo_obj = PyTuple_GET_ITEM(value, 1);
3693 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
3694 goto error;
3695 }
3696 }
3697 else {
3698 PyErr_Format(PyExc_TypeError,
3699 "'memo' attribute must be an PicklerMemoProxy object"
3700 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003701 return -1;
3702 }
3703
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003704 PyMemoTable_Del(self->memo);
3705 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003706
3707 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003708
3709 error:
3710 if (new_memo)
3711 PyMemoTable_Del(new_memo);
3712 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003713}
3714
3715static PyObject *
3716Pickler_get_persid(PicklerObject *self)
3717{
3718 if (self->pers_func == NULL)
3719 PyErr_SetString(PyExc_AttributeError, "persistent_id");
3720 else
3721 Py_INCREF(self->pers_func);
3722 return self->pers_func;
3723}
3724
3725static int
3726Pickler_set_persid(PicklerObject *self, PyObject *value)
3727{
3728 PyObject *tmp;
3729
3730 if (value == NULL) {
3731 PyErr_SetString(PyExc_TypeError,
3732 "attribute deletion is not supported");
3733 return -1;
3734 }
3735 if (!PyCallable_Check(value)) {
3736 PyErr_SetString(PyExc_TypeError,
3737 "persistent_id must be a callable taking one argument");
3738 return -1;
3739 }
3740
3741 tmp = self->pers_func;
3742 Py_INCREF(value);
3743 self->pers_func = value;
3744 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
3745
3746 return 0;
3747}
3748
3749static PyMemberDef Pickler_members[] = {
3750 {"bin", T_INT, offsetof(PicklerObject, bin)},
3751 {"fast", T_INT, offsetof(PicklerObject, fast)},
3752 {NULL}
3753};
3754
3755static PyGetSetDef Pickler_getsets[] = {
3756 {"memo", (getter)Pickler_get_memo,
3757 (setter)Pickler_set_memo},
3758 {"persistent_id", (getter)Pickler_get_persid,
3759 (setter)Pickler_set_persid},
3760 {NULL}
3761};
3762
3763static PyTypeObject Pickler_Type = {
3764 PyVarObject_HEAD_INIT(NULL, 0)
3765 "_pickle.Pickler" , /*tp_name*/
3766 sizeof(PicklerObject), /*tp_basicsize*/
3767 0, /*tp_itemsize*/
3768 (destructor)Pickler_dealloc, /*tp_dealloc*/
3769 0, /*tp_print*/
3770 0, /*tp_getattr*/
3771 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00003772 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003773 0, /*tp_repr*/
3774 0, /*tp_as_number*/
3775 0, /*tp_as_sequence*/
3776 0, /*tp_as_mapping*/
3777 0, /*tp_hash*/
3778 0, /*tp_call*/
3779 0, /*tp_str*/
3780 0, /*tp_getattro*/
3781 0, /*tp_setattro*/
3782 0, /*tp_as_buffer*/
3783 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3784 Pickler_doc, /*tp_doc*/
3785 (traverseproc)Pickler_traverse, /*tp_traverse*/
3786 (inquiry)Pickler_clear, /*tp_clear*/
3787 0, /*tp_richcompare*/
3788 0, /*tp_weaklistoffset*/
3789 0, /*tp_iter*/
3790 0, /*tp_iternext*/
3791 Pickler_methods, /*tp_methods*/
3792 Pickler_members, /*tp_members*/
3793 Pickler_getsets, /*tp_getset*/
3794 0, /*tp_base*/
3795 0, /*tp_dict*/
3796 0, /*tp_descr_get*/
3797 0, /*tp_descr_set*/
3798 0, /*tp_dictoffset*/
3799 (initproc)Pickler_init, /*tp_init*/
3800 PyType_GenericAlloc, /*tp_alloc*/
3801 PyType_GenericNew, /*tp_new*/
3802 PyObject_GC_Del, /*tp_free*/
3803 0, /*tp_is_gc*/
3804};
3805
3806/* Temporary helper for calling self.find_class().
3807
3808 XXX: It would be nice to able to avoid Python function call overhead, by
3809 using directly the C version of find_class(), when find_class() is not
3810 overridden by a subclass. Although, this could become rather hackish. A
3811 simpler optimization would be to call the C function when self is not a
3812 subclass instance. */
3813static PyObject *
3814find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
3815{
3816 return PyObject_CallMethod((PyObject *)self, "find_class", "OO",
3817 module_name, global_name);
3818}
3819
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003820static Py_ssize_t
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003821marker(UnpicklerObject *self)
3822{
3823 if (self->num_marks < 1) {
3824 PyErr_SetString(UnpicklingError, "could not find MARK");
3825 return -1;
3826 }
3827
3828 return self->marks[--self->num_marks];
3829}
3830
3831static int
3832load_none(UnpicklerObject *self)
3833{
3834 PDATA_APPEND(self->stack, Py_None, -1);
3835 return 0;
3836}
3837
3838static int
3839bad_readline(void)
3840{
3841 PyErr_SetString(UnpicklingError, "pickle data was truncated");
3842 return -1;
3843}
3844
3845static int
3846load_int(UnpicklerObject *self)
3847{
3848 PyObject *value;
3849 char *endptr, *s;
3850 Py_ssize_t len;
3851 long x;
3852
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003853 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003854 return -1;
3855 if (len < 2)
3856 return bad_readline();
3857
3858 errno = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003859 /* XXX: Should the base argument of strtol() be explicitly set to 10?
3860 XXX(avassalotti): Should this uses PyOS_strtol()? */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003861 x = strtol(s, &endptr, 0);
3862
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003863 if (errno || (*endptr != '\n' && *endptr != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003864 /* Hm, maybe we've got something long. Let's try reading
3865 * it as a Python long object. */
3866 errno = 0;
3867 /* XXX: Same thing about the base here. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003868 value = PyLong_FromString(s, NULL, 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003869 if (value == NULL) {
3870 PyErr_SetString(PyExc_ValueError,
3871 "could not convert string to int");
3872 return -1;
3873 }
3874 }
3875 else {
3876 if (len == 3 && (x == 0 || x == 1)) {
3877 if ((value = PyBool_FromLong(x)) == NULL)
3878 return -1;
3879 }
3880 else {
3881 if ((value = PyLong_FromLong(x)) == NULL)
3882 return -1;
3883 }
3884 }
3885
3886 PDATA_PUSH(self->stack, value, -1);
3887 return 0;
3888}
3889
3890static int
3891load_bool(UnpicklerObject *self, PyObject *boolean)
3892{
3893 assert(boolean == Py_True || boolean == Py_False);
3894 PDATA_APPEND(self->stack, boolean, -1);
3895 return 0;
3896}
3897
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003898/* s contains x bytes of an unsigned little-endian integer. Return its value
3899 * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
3900 */
3901static Py_ssize_t
3902calc_binsize(char *bytes, int size)
3903{
3904 unsigned char *s = (unsigned char *)bytes;
3905 size_t x = 0;
3906
3907 assert(size == 4);
3908
3909 x = (size_t) s[0];
3910 x |= (size_t) s[1] << 8;
3911 x |= (size_t) s[2] << 16;
3912 x |= (size_t) s[3] << 24;
3913
3914 if (x > PY_SSIZE_T_MAX)
3915 return -1;
3916 else
3917 return (Py_ssize_t) x;
3918}
3919
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003920/* s contains x bytes of a little-endian integer. Return its value as a
3921 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
3922 * int, but when x is 4 it's a signed one. This is an historical source
3923 * of x-platform bugs.
3924 */
3925static long
3926calc_binint(char *bytes, int size)
3927{
3928 unsigned char *s = (unsigned char *)bytes;
3929 int i = size;
3930 long x = 0;
3931
3932 for (i = 0; i < size; i++) {
3933 x |= (long)s[i] << (i * 8);
3934 }
3935
3936 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
3937 * is signed, so on a box with longs bigger than 4 bytes we need
3938 * to extend a BININT's sign bit to the full width.
3939 */
3940 if (SIZEOF_LONG > 4 && size == 4) {
3941 x |= -(x & (1L << 31));
3942 }
3943
3944 return x;
3945}
3946
3947static int
3948load_binintx(UnpicklerObject *self, char *s, int size)
3949{
3950 PyObject *value;
3951 long x;
3952
3953 x = calc_binint(s, size);
3954
3955 if ((value = PyLong_FromLong(x)) == NULL)
3956 return -1;
3957
3958 PDATA_PUSH(self->stack, value, -1);
3959 return 0;
3960}
3961
3962static int
3963load_binint(UnpicklerObject *self)
3964{
3965 char *s;
3966
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003967 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003968 return -1;
3969
3970 return load_binintx(self, s, 4);
3971}
3972
3973static int
3974load_binint1(UnpicklerObject *self)
3975{
3976 char *s;
3977
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003978 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003979 return -1;
3980
3981 return load_binintx(self, s, 1);
3982}
3983
3984static int
3985load_binint2(UnpicklerObject *self)
3986{
3987 char *s;
3988
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003989 if (_Unpickler_Read(self, &s, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003990 return -1;
3991
3992 return load_binintx(self, s, 2);
3993}
3994
3995static int
3996load_long(UnpicklerObject *self)
3997{
3998 PyObject *value;
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00003999 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004000 Py_ssize_t len;
4001
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004002 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004003 return -1;
4004 if (len < 2)
4005 return bad_readline();
4006
Mark Dickinson8dd05142009-01-20 20:43:58 +00004007 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
4008 the 'L' before calling PyLong_FromString. In order to maintain
4009 compatibility with Python 3.0.0, we don't actually *require*
4010 the 'L' to be present. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004011 if (s[len-2] == 'L')
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00004012 s[len-2] = '\0';
Alexandre Vassalottie4bccb72009-01-24 01:47:57 +00004013 /* XXX: Should the base argument explicitly set to 10? */
4014 value = PyLong_FromString(s, NULL, 0);
Mark Dickinson8dd05142009-01-20 20:43:58 +00004015 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004016 return -1;
4017
4018 PDATA_PUSH(self->stack, value, -1);
4019 return 0;
4020}
4021
4022/* 'size' bytes contain the # of bytes of little-endian 256's-complement
4023 * data following.
4024 */
4025static int
4026load_counted_long(UnpicklerObject *self, int size)
4027{
4028 PyObject *value;
4029 char *nbytes;
4030 char *pdata;
4031
4032 assert(size == 1 || size == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004033 if (_Unpickler_Read(self, &nbytes, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004034 return -1;
4035
4036 size = calc_binint(nbytes, size);
4037 if (size < 0) {
4038 /* Corrupt or hostile pickle -- we never write one like this */
4039 PyErr_SetString(UnpicklingError,
4040 "LONG pickle has negative byte count");
4041 return -1;
4042 }
4043
4044 if (size == 0)
4045 value = PyLong_FromLong(0L);
4046 else {
4047 /* Read the raw little-endian bytes and convert. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004048 if (_Unpickler_Read(self, &pdata, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004049 return -1;
4050 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
4051 1 /* little endian */ , 1 /* signed */ );
4052 }
4053 if (value == NULL)
4054 return -1;
4055 PDATA_PUSH(self->stack, value, -1);
4056 return 0;
4057}
4058
4059static int
4060load_float(UnpicklerObject *self)
4061{
4062 PyObject *value;
4063 char *endptr, *s;
4064 Py_ssize_t len;
4065 double d;
4066
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004067 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004068 return -1;
4069 if (len < 2)
4070 return bad_readline();
4071
4072 errno = 0;
Mark Dickinson725bfd82009-05-03 20:33:40 +00004073 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
4074 if (d == -1.0 && PyErr_Occurred())
4075 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004076 if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004077 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
4078 return -1;
4079 }
Mark Dickinson725bfd82009-05-03 20:33:40 +00004080 value = PyFloat_FromDouble(d);
4081 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004082 return -1;
4083
4084 PDATA_PUSH(self->stack, value, -1);
4085 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004086}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004087
4088static int
4089load_binfloat(UnpicklerObject *self)
4090{
4091 PyObject *value;
4092 double x;
4093 char *s;
4094
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004095 if (_Unpickler_Read(self, &s, 8) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004096 return -1;
4097
4098 x = _PyFloat_Unpack8((unsigned char *)s, 0);
4099 if (x == -1.0 && PyErr_Occurred())
4100 return -1;
4101
4102 if ((value = PyFloat_FromDouble(x)) == NULL)
4103 return -1;
4104
4105 PDATA_PUSH(self->stack, value, -1);
4106 return 0;
4107}
4108
4109static int
4110load_string(UnpicklerObject *self)
4111{
4112 PyObject *bytes;
4113 PyObject *str = NULL;
4114 Py_ssize_t len;
4115 char *s, *p;
4116
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004117 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004118 return -1;
4119 if (len < 3)
4120 return bad_readline();
4121 if ((s = strdup(s)) == NULL) {
4122 PyErr_NoMemory();
4123 return -1;
4124 }
4125
4126 /* Strip outermost quotes */
4127 while (s[len - 1] <= ' ')
4128 len--;
4129 if (s[0] == '"' && s[len - 1] == '"') {
4130 s[len - 1] = '\0';
4131 p = s + 1;
4132 len -= 2;
4133 }
4134 else if (s[0] == '\'' && s[len - 1] == '\'') {
4135 s[len - 1] = '\0';
4136 p = s + 1;
4137 len -= 2;
4138 }
4139 else {
4140 free(s);
4141 PyErr_SetString(PyExc_ValueError, "insecure string pickle");
4142 return -1;
4143 }
4144
4145 /* Use the PyBytes API to decode the string, since that is what is used
4146 to encode, and then coerce the result to Unicode. */
4147 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
4148 free(s);
4149 if (bytes == NULL)
4150 return -1;
4151 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
4152 Py_DECREF(bytes);
4153 if (str == NULL)
4154 return -1;
4155
4156 PDATA_PUSH(self->stack, str, -1);
4157 return 0;
4158}
4159
4160static int
4161load_binbytes(UnpicklerObject *self)
4162{
4163 PyObject *bytes;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004164 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004165 char *s;
4166
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004167 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004168 return -1;
4169
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004170 x = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004171 if (x < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004172 PyErr_Format(PyExc_OverflowError,
4173 "BINBYTES exceeds system's maximum size of %zd bytes",
4174 PY_SSIZE_T_MAX
4175 );
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004176 return -1;
4177 }
4178
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004179 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004180 return -1;
4181 bytes = PyBytes_FromStringAndSize(s, x);
4182 if (bytes == NULL)
4183 return -1;
4184
4185 PDATA_PUSH(self->stack, bytes, -1);
4186 return 0;
4187}
4188
4189static int
4190load_short_binbytes(UnpicklerObject *self)
4191{
4192 PyObject *bytes;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004193 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004194 char *s;
4195
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004196 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004197 return -1;
4198
4199 x = (unsigned char)s[0];
4200
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004201 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004202 return -1;
4203
4204 bytes = PyBytes_FromStringAndSize(s, x);
4205 if (bytes == NULL)
4206 return -1;
4207
4208 PDATA_PUSH(self->stack, bytes, -1);
4209 return 0;
4210}
4211
4212static int
4213load_binstring(UnpicklerObject *self)
4214{
4215 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004216 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004217 char *s;
4218
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004219 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004220 return -1;
4221
4222 x = calc_binint(s, 4);
4223 if (x < 0) {
4224 PyErr_SetString(UnpicklingError,
4225 "BINSTRING pickle has negative byte count");
4226 return -1;
4227 }
4228
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004229 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004230 return -1;
4231
4232 /* Convert Python 2.x strings to unicode. */
4233 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4234 if (str == NULL)
4235 return -1;
4236
4237 PDATA_PUSH(self->stack, str, -1);
4238 return 0;
4239}
4240
4241static int
4242load_short_binstring(UnpicklerObject *self)
4243{
4244 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004245 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004246 char *s;
4247
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004248 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004249 return -1;
4250
4251 x = (unsigned char)s[0];
4252
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004253 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004254 return -1;
4255
4256 /* Convert Python 2.x strings to unicode. */
4257 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4258 if (str == NULL)
4259 return -1;
4260
4261 PDATA_PUSH(self->stack, str, -1);
4262 return 0;
4263}
4264
4265static int
4266load_unicode(UnpicklerObject *self)
4267{
4268 PyObject *str;
4269 Py_ssize_t len;
4270 char *s;
4271
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004272 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004273 return -1;
4274 if (len < 1)
4275 return bad_readline();
4276
4277 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
4278 if (str == NULL)
4279 return -1;
4280
4281 PDATA_PUSH(self->stack, str, -1);
4282 return 0;
4283}
4284
4285static int
4286load_binunicode(UnpicklerObject *self)
4287{
4288 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004289 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004290 char *s;
4291
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004292 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004293 return -1;
4294
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004295 size = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004296 if (size < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004297 PyErr_Format(PyExc_OverflowError,
4298 "BINUNICODE exceeds system's maximum size of %zd bytes",
4299 PY_SSIZE_T_MAX
4300 );
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004301 return -1;
4302 }
4303
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004304
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004305 if (_Unpickler_Read(self, &s, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004306 return -1;
4307
Victor Stinner485fb562010-04-13 11:07:24 +00004308 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004309 if (str == NULL)
4310 return -1;
4311
4312 PDATA_PUSH(self->stack, str, -1);
4313 return 0;
4314}
4315
4316static int
4317load_tuple(UnpicklerObject *self)
4318{
4319 PyObject *tuple;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004320 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004321
4322 if ((i = marker(self)) < 0)
4323 return -1;
4324
4325 tuple = Pdata_poptuple(self->stack, i);
4326 if (tuple == NULL)
4327 return -1;
4328 PDATA_PUSH(self->stack, tuple, -1);
4329 return 0;
4330}
4331
4332static int
4333load_counted_tuple(UnpicklerObject *self, int len)
4334{
4335 PyObject *tuple;
4336
4337 tuple = PyTuple_New(len);
4338 if (tuple == NULL)
4339 return -1;
4340
4341 while (--len >= 0) {
4342 PyObject *item;
4343
4344 PDATA_POP(self->stack, item);
4345 if (item == NULL)
4346 return -1;
4347 PyTuple_SET_ITEM(tuple, len, item);
4348 }
4349 PDATA_PUSH(self->stack, tuple, -1);
4350 return 0;
4351}
4352
4353static int
4354load_empty_list(UnpicklerObject *self)
4355{
4356 PyObject *list;
4357
4358 if ((list = PyList_New(0)) == NULL)
4359 return -1;
4360 PDATA_PUSH(self->stack, list, -1);
4361 return 0;
4362}
4363
4364static int
4365load_empty_dict(UnpicklerObject *self)
4366{
4367 PyObject *dict;
4368
4369 if ((dict = PyDict_New()) == NULL)
4370 return -1;
4371 PDATA_PUSH(self->stack, dict, -1);
4372 return 0;
4373}
4374
4375static int
4376load_list(UnpicklerObject *self)
4377{
4378 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004379 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004380
4381 if ((i = marker(self)) < 0)
4382 return -1;
4383
4384 list = Pdata_poplist(self->stack, i);
4385 if (list == NULL)
4386 return -1;
4387 PDATA_PUSH(self->stack, list, -1);
4388 return 0;
4389}
4390
4391static int
4392load_dict(UnpicklerObject *self)
4393{
4394 PyObject *dict, *key, *value;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004395 Py_ssize_t i, j, k;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004396
4397 if ((i = marker(self)) < 0)
4398 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004399 j = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004400
4401 if ((dict = PyDict_New()) == NULL)
4402 return -1;
4403
4404 for (k = i + 1; k < j; k += 2) {
4405 key = self->stack->data[k - 1];
4406 value = self->stack->data[k];
4407 if (PyDict_SetItem(dict, key, value) < 0) {
4408 Py_DECREF(dict);
4409 return -1;
4410 }
4411 }
4412 Pdata_clear(self->stack, i);
4413 PDATA_PUSH(self->stack, dict, -1);
4414 return 0;
4415}
4416
4417static PyObject *
4418instantiate(PyObject *cls, PyObject *args)
4419{
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004420 PyObject *result = NULL;
4421 /* Caller must assure args are a tuple. Normally, args come from
4422 Pdata_poptuple which packs objects from the top of the stack
4423 into a newly created tuple. */
4424 assert(PyTuple_Check(args));
4425 if (Py_SIZE(args) > 0 || !PyType_Check(cls) ||
4426 PyObject_HasAttrString(cls, "__getinitargs__")) {
4427 result = PyObject_CallObject(cls, args);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004428 }
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004429 else {
4430 result = PyObject_CallMethod(cls, "__new__", "O", cls);
4431 }
4432 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004433}
4434
4435static int
4436load_obj(UnpicklerObject *self)
4437{
4438 PyObject *cls, *args, *obj = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004439 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004440
4441 if ((i = marker(self)) < 0)
4442 return -1;
4443
4444 args = Pdata_poptuple(self->stack, i + 1);
4445 if (args == NULL)
4446 return -1;
4447
4448 PDATA_POP(self->stack, cls);
4449 if (cls) {
4450 obj = instantiate(cls, args);
4451 Py_DECREF(cls);
4452 }
4453 Py_DECREF(args);
4454 if (obj == NULL)
4455 return -1;
4456
4457 PDATA_PUSH(self->stack, obj, -1);
4458 return 0;
4459}
4460
4461static int
4462load_inst(UnpicklerObject *self)
4463{
4464 PyObject *cls = NULL;
4465 PyObject *args = NULL;
4466 PyObject *obj = NULL;
4467 PyObject *module_name;
4468 PyObject *class_name;
4469 Py_ssize_t len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004470 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004471 char *s;
4472
4473 if ((i = marker(self)) < 0)
4474 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004475 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004476 return -1;
4477 if (len < 2)
4478 return bad_readline();
4479
4480 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
4481 identifiers are permitted in Python 3.0, since the INST opcode is only
4482 supported by older protocols on Python 2.x. */
4483 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
4484 if (module_name == NULL)
4485 return -1;
4486
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004487 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004488 if (len < 2)
4489 return bad_readline();
4490 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004491 if (class_name != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004492 cls = find_class(self, module_name, class_name);
4493 Py_DECREF(class_name);
4494 }
4495 }
4496 Py_DECREF(module_name);
4497
4498 if (cls == NULL)
4499 return -1;
4500
4501 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
4502 obj = instantiate(cls, args);
4503 Py_DECREF(args);
4504 }
4505 Py_DECREF(cls);
4506
4507 if (obj == NULL)
4508 return -1;
4509
4510 PDATA_PUSH(self->stack, obj, -1);
4511 return 0;
4512}
4513
4514static int
4515load_newobj(UnpicklerObject *self)
4516{
4517 PyObject *args = NULL;
4518 PyObject *clsraw = NULL;
4519 PyTypeObject *cls; /* clsraw cast to its true type */
4520 PyObject *obj;
4521
4522 /* Stack is ... cls argtuple, and we want to call
4523 * cls.__new__(cls, *argtuple).
4524 */
4525 PDATA_POP(self->stack, args);
4526 if (args == NULL)
4527 goto error;
4528 if (!PyTuple_Check(args)) {
4529 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
4530 goto error;
4531 }
4532
4533 PDATA_POP(self->stack, clsraw);
4534 cls = (PyTypeObject *)clsraw;
4535 if (cls == NULL)
4536 goto error;
4537 if (!PyType_Check(cls)) {
4538 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4539 "isn't a type object");
4540 goto error;
4541 }
4542 if (cls->tp_new == NULL) {
4543 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4544 "has NULL tp_new");
4545 goto error;
4546 }
4547
4548 /* Call __new__. */
4549 obj = cls->tp_new(cls, args, NULL);
4550 if (obj == NULL)
4551 goto error;
4552
4553 Py_DECREF(args);
4554 Py_DECREF(clsraw);
4555 PDATA_PUSH(self->stack, obj, -1);
4556 return 0;
4557
4558 error:
4559 Py_XDECREF(args);
4560 Py_XDECREF(clsraw);
4561 return -1;
4562}
4563
4564static int
4565load_global(UnpicklerObject *self)
4566{
4567 PyObject *global = NULL;
4568 PyObject *module_name;
4569 PyObject *global_name;
4570 Py_ssize_t len;
4571 char *s;
4572
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004573 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004574 return -1;
4575 if (len < 2)
4576 return bad_readline();
4577 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4578 if (!module_name)
4579 return -1;
4580
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004581 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004582 if (len < 2) {
4583 Py_DECREF(module_name);
4584 return bad_readline();
4585 }
4586 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4587 if (global_name) {
4588 global = find_class(self, module_name, global_name);
4589 Py_DECREF(global_name);
4590 }
4591 }
4592 Py_DECREF(module_name);
4593
4594 if (global == NULL)
4595 return -1;
4596 PDATA_PUSH(self->stack, global, -1);
4597 return 0;
4598}
4599
4600static int
4601load_persid(UnpicklerObject *self)
4602{
4603 PyObject *pid;
4604 Py_ssize_t len;
4605 char *s;
4606
4607 if (self->pers_func) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004608 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004609 return -1;
4610 if (len < 2)
4611 return bad_readline();
4612
4613 pid = PyBytes_FromStringAndSize(s, len - 1);
4614 if (pid == NULL)
4615 return -1;
4616
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004617 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004618 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004619 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004620 if (pid == NULL)
4621 return -1;
4622
4623 PDATA_PUSH(self->stack, pid, -1);
4624 return 0;
4625 }
4626 else {
4627 PyErr_SetString(UnpicklingError,
4628 "A load persistent id instruction was encountered,\n"
4629 "but no persistent_load function was specified.");
4630 return -1;
4631 }
4632}
4633
4634static int
4635load_binpersid(UnpicklerObject *self)
4636{
4637 PyObject *pid;
4638
4639 if (self->pers_func) {
4640 PDATA_POP(self->stack, pid);
4641 if (pid == NULL)
4642 return -1;
4643
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004644 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004645 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004646 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004647 if (pid == NULL)
4648 return -1;
4649
4650 PDATA_PUSH(self->stack, pid, -1);
4651 return 0;
4652 }
4653 else {
4654 PyErr_SetString(UnpicklingError,
4655 "A load persistent id instruction was encountered,\n"
4656 "but no persistent_load function was specified.");
4657 return -1;
4658 }
4659}
4660
4661static int
4662load_pop(UnpicklerObject *self)
4663{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004664 Py_ssize_t len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004665
4666 /* Note that we split the (pickle.py) stack into two stacks,
4667 * an object stack and a mark stack. We have to be clever and
4668 * pop the right one. We do this by looking at the top of the
Collin Winter8ca69de2009-05-26 16:53:41 +00004669 * mark stack first, and only signalling a stack underflow if
4670 * the object stack is empty and the mark stack doesn't match
4671 * our expectations.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004672 */
Collin Winter8ca69de2009-05-26 16:53:41 +00004673 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004674 self->num_marks--;
Antoine Pitrou01a15ea2010-01-07 17:57:31 +00004675 } else if (len > 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004676 len--;
4677 Py_DECREF(self->stack->data[len]);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004678 Py_SIZE(self->stack) = len;
Collin Winter8ca69de2009-05-26 16:53:41 +00004679 } else {
4680 return stack_underflow();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004681 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004682 return 0;
4683}
4684
4685static int
4686load_pop_mark(UnpicklerObject *self)
4687{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004688 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004689
4690 if ((i = marker(self)) < 0)
4691 return -1;
4692
4693 Pdata_clear(self->stack, i);
4694
4695 return 0;
4696}
4697
4698static int
4699load_dup(UnpicklerObject *self)
4700{
4701 PyObject *last;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004702 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004703
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004704 if ((len = Py_SIZE(self->stack)) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004705 return stack_underflow();
4706 last = self->stack->data[len - 1];
4707 PDATA_APPEND(self->stack, last, -1);
4708 return 0;
4709}
4710
4711static int
4712load_get(UnpicklerObject *self)
4713{
4714 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004715 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004716 Py_ssize_t len;
4717 char *s;
4718
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004719 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004720 return -1;
4721 if (len < 2)
4722 return bad_readline();
4723
4724 key = PyLong_FromString(s, NULL, 10);
4725 if (key == NULL)
4726 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004727 idx = PyLong_AsSsize_t(key);
4728 if (idx == -1 && PyErr_Occurred()) {
4729 Py_DECREF(key);
4730 return -1;
4731 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004732
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004733 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004734 if (value == NULL) {
4735 if (!PyErr_Occurred())
4736 PyErr_SetObject(PyExc_KeyError, key);
4737 Py_DECREF(key);
4738 return -1;
4739 }
4740 Py_DECREF(key);
4741
4742 PDATA_APPEND(self->stack, value, -1);
4743 return 0;
4744}
4745
4746static int
4747load_binget(UnpicklerObject *self)
4748{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004749 PyObject *value;
4750 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004751 char *s;
4752
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004753 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004754 return -1;
4755
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004756 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004757
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004758 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004759 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004760 PyObject *key = PyLong_FromSsize_t(idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004761 if (!PyErr_Occurred())
4762 PyErr_SetObject(PyExc_KeyError, key);
4763 Py_DECREF(key);
4764 return -1;
4765 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004766
4767 PDATA_APPEND(self->stack, value, -1);
4768 return 0;
4769}
4770
4771static int
4772load_long_binget(UnpicklerObject *self)
4773{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004774 PyObject *value;
4775 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004776 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004777
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004778 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004779 return -1;
4780
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004781 idx = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004782
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004783 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004784 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004785 PyObject *key = PyLong_FromSsize_t(idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004786 if (!PyErr_Occurred())
4787 PyErr_SetObject(PyExc_KeyError, key);
4788 Py_DECREF(key);
4789 return -1;
4790 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004791
4792 PDATA_APPEND(self->stack, value, -1);
4793 return 0;
4794}
4795
4796/* Push an object from the extension registry (EXT[124]). nbytes is
4797 * the number of bytes following the opcode, holding the index (code) value.
4798 */
4799static int
4800load_extension(UnpicklerObject *self, int nbytes)
4801{
4802 char *codebytes; /* the nbytes bytes after the opcode */
4803 long code; /* calc_binint returns long */
4804 PyObject *py_code; /* code as a Python int */
4805 PyObject *obj; /* the object to push */
4806 PyObject *pair; /* (module_name, class_name) */
4807 PyObject *module_name, *class_name;
4808
4809 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004810 if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004811 return -1;
4812 code = calc_binint(codebytes, nbytes);
4813 if (code <= 0) { /* note that 0 is forbidden */
4814 /* Corrupt or hostile pickle. */
4815 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
4816 return -1;
4817 }
4818
4819 /* Look for the code in the cache. */
4820 py_code = PyLong_FromLong(code);
4821 if (py_code == NULL)
4822 return -1;
4823 obj = PyDict_GetItem(extension_cache, py_code);
4824 if (obj != NULL) {
4825 /* Bingo. */
4826 Py_DECREF(py_code);
4827 PDATA_APPEND(self->stack, obj, -1);
4828 return 0;
4829 }
4830
4831 /* Look up the (module_name, class_name) pair. */
4832 pair = PyDict_GetItem(inverted_registry, py_code);
4833 if (pair == NULL) {
4834 Py_DECREF(py_code);
4835 PyErr_Format(PyExc_ValueError, "unregistered extension "
4836 "code %ld", code);
4837 return -1;
4838 }
4839 /* Since the extension registry is manipulable via Python code,
4840 * confirm that pair is really a 2-tuple of strings.
4841 */
4842 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
4843 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
4844 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
4845 Py_DECREF(py_code);
4846 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
4847 "isn't a 2-tuple of strings", code);
4848 return -1;
4849 }
4850 /* Load the object. */
4851 obj = find_class(self, module_name, class_name);
4852 if (obj == NULL) {
4853 Py_DECREF(py_code);
4854 return -1;
4855 }
4856 /* Cache code -> obj. */
4857 code = PyDict_SetItem(extension_cache, py_code, obj);
4858 Py_DECREF(py_code);
4859 if (code < 0) {
4860 Py_DECREF(obj);
4861 return -1;
4862 }
4863 PDATA_PUSH(self->stack, obj, -1);
4864 return 0;
4865}
4866
4867static int
4868load_put(UnpicklerObject *self)
4869{
4870 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004871 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004872 Py_ssize_t len;
4873 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004874
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004875 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004876 return -1;
4877 if (len < 2)
4878 return bad_readline();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004879 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004880 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004881 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004882
4883 key = PyLong_FromString(s, NULL, 10);
4884 if (key == NULL)
4885 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004886 idx = PyLong_AsSsize_t(key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004887 Py_DECREF(key);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004888 if (idx < 0) {
4889 if (!PyErr_Occurred())
4890 PyErr_SetString(PyExc_ValueError,
4891 "negative PUT argument");
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004892 return -1;
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004893 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004894
4895 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004896}
4897
4898static int
4899load_binput(UnpicklerObject *self)
4900{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004901 PyObject *value;
4902 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004903 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004904
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004905 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004906 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004907
4908 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004909 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004910 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004911
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004912 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004913
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004914 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004915}
4916
4917static int
4918load_long_binput(UnpicklerObject *self)
4919{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004920 PyObject *value;
4921 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004922 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004923
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004924 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004925 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004926
4927 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004928 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004929 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004930
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004931 idx = calc_binsize(s, 4);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004932 if (idx < 0) {
4933 PyErr_SetString(PyExc_ValueError,
4934 "negative LONG_BINPUT argument");
4935 return -1;
4936 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004937
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004938 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004939}
4940
4941static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004942do_append(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004943{
4944 PyObject *value;
4945 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004946 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004947
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004948 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004949 if (x > len || x <= 0)
4950 return stack_underflow();
4951 if (len == x) /* nothing to do */
4952 return 0;
4953
4954 list = self->stack->data[x - 1];
4955
4956 if (PyList_Check(list)) {
4957 PyObject *slice;
4958 Py_ssize_t list_len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004959 int ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004960
4961 slice = Pdata_poplist(self->stack, x);
4962 if (!slice)
4963 return -1;
4964 list_len = PyList_GET_SIZE(list);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004965 ret = PyList_SetSlice(list, list_len, list_len, slice);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004966 Py_DECREF(slice);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004967 return ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004968 }
4969 else {
4970 PyObject *append_func;
4971
4972 append_func = PyObject_GetAttrString(list, "append");
4973 if (append_func == NULL)
4974 return -1;
4975 for (i = x; i < len; i++) {
4976 PyObject *result;
4977
4978 value = self->stack->data[i];
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004979 result = _Unpickler_FastCall(self, append_func, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004980 if (result == NULL) {
4981 Pdata_clear(self->stack, i + 1);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004982 Py_SIZE(self->stack) = x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004983 return -1;
4984 }
4985 Py_DECREF(result);
4986 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004987 Py_SIZE(self->stack) = x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004988 }
4989
4990 return 0;
4991}
4992
4993static int
4994load_append(UnpicklerObject *self)
4995{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004996 return do_append(self, Py_SIZE(self->stack) - 1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004997}
4998
4999static int
5000load_appends(UnpicklerObject *self)
5001{
5002 return do_append(self, marker(self));
5003}
5004
5005static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005006do_setitems(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005007{
5008 PyObject *value, *key;
5009 PyObject *dict;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005010 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005011 int status = 0;
5012
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005013 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005014 if (x > len || x <= 0)
5015 return stack_underflow();
5016 if (len == x) /* nothing to do */
5017 return 0;
5018 if ((len - x) % 2 != 0) {
5019 /* Currupt or hostile pickle -- we never write one like this. */
5020 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
5021 return -1;
5022 }
5023
5024 /* Here, dict does not actually need to be a PyDict; it could be anything
5025 that supports the __setitem__ attribute. */
5026 dict = self->stack->data[x - 1];
5027
5028 for (i = x + 1; i < len; i += 2) {
5029 key = self->stack->data[i - 1];
5030 value = self->stack->data[i];
5031 if (PyObject_SetItem(dict, key, value) < 0) {
5032 status = -1;
5033 break;
5034 }
5035 }
5036
5037 Pdata_clear(self->stack, x);
5038 return status;
5039}
5040
5041static int
5042load_setitem(UnpicklerObject *self)
5043{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005044 return do_setitems(self, Py_SIZE(self->stack) - 2);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005045}
5046
5047static int
5048load_setitems(UnpicklerObject *self)
5049{
5050 return do_setitems(self, marker(self));
5051}
5052
5053static int
5054load_build(UnpicklerObject *self)
5055{
5056 PyObject *state, *inst, *slotstate;
5057 PyObject *setstate;
5058 int status = 0;
5059
5060 /* Stack is ... instance, state. We want to leave instance at
5061 * the stack top, possibly mutated via instance.__setstate__(state).
5062 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005063 if (Py_SIZE(self->stack) < 2)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005064 return stack_underflow();
5065
5066 PDATA_POP(self->stack, state);
5067 if (state == NULL)
5068 return -1;
5069
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005070 inst = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005071
5072 setstate = PyObject_GetAttrString(inst, "__setstate__");
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005073 if (setstate == NULL) {
5074 if (PyErr_ExceptionMatches(PyExc_AttributeError))
5075 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00005076 else {
5077 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005078 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00005079 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005080 }
5081 else {
5082 PyObject *result;
5083
5084 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005085 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Antoine Pitroud79dc622008-09-05 00:03:33 +00005086 reference to state first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005087 result = _Unpickler_FastCall(self, setstate, state);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005088 Py_DECREF(setstate);
5089 if (result == NULL)
5090 return -1;
5091 Py_DECREF(result);
5092 return 0;
5093 }
5094
5095 /* A default __setstate__. First see whether state embeds a
5096 * slot state dict too (a proto 2 addition).
5097 */
5098 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
5099 PyObject *tmp = state;
5100
5101 state = PyTuple_GET_ITEM(tmp, 0);
5102 slotstate = PyTuple_GET_ITEM(tmp, 1);
5103 Py_INCREF(state);
5104 Py_INCREF(slotstate);
5105 Py_DECREF(tmp);
5106 }
5107 else
5108 slotstate = NULL;
5109
5110 /* Set inst.__dict__ from the state dict (if any). */
5111 if (state != Py_None) {
5112 PyObject *dict;
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005113 PyObject *d_key, *d_value;
5114 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005115
5116 if (!PyDict_Check(state)) {
5117 PyErr_SetString(UnpicklingError, "state is not a dictionary");
5118 goto error;
5119 }
5120 dict = PyObject_GetAttrString(inst, "__dict__");
5121 if (dict == NULL)
5122 goto error;
5123
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005124 i = 0;
5125 while (PyDict_Next(state, &i, &d_key, &d_value)) {
5126 /* normally the keys for instance attributes are
5127 interned. we should try to do that here. */
5128 Py_INCREF(d_key);
5129 if (PyUnicode_CheckExact(d_key))
5130 PyUnicode_InternInPlace(&d_key);
5131 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
5132 Py_DECREF(d_key);
5133 goto error;
5134 }
5135 Py_DECREF(d_key);
5136 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005137 Py_DECREF(dict);
5138 }
5139
5140 /* Also set instance attributes from the slotstate dict (if any). */
5141 if (slotstate != NULL) {
5142 PyObject *d_key, *d_value;
5143 Py_ssize_t i;
5144
5145 if (!PyDict_Check(slotstate)) {
5146 PyErr_SetString(UnpicklingError,
5147 "slot state is not a dictionary");
5148 goto error;
5149 }
5150 i = 0;
5151 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
5152 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
5153 goto error;
5154 }
5155 }
5156
5157 if (0) {
5158 error:
5159 status = -1;
5160 }
5161
5162 Py_DECREF(state);
5163 Py_XDECREF(slotstate);
5164 return status;
5165}
5166
5167static int
5168load_mark(UnpicklerObject *self)
5169{
5170
5171 /* Note that we split the (pickle.py) stack into two stacks, an
5172 * object stack and a mark stack. Here we push a mark onto the
5173 * mark stack.
5174 */
5175
5176 if ((self->num_marks + 1) >= self->marks_size) {
5177 size_t alloc;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005178 Py_ssize_t *marks;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005179
5180 /* Use the size_t type to check for overflow. */
5181 alloc = ((size_t)self->num_marks << 1) + 20;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005182 if (alloc > (PY_SSIZE_T_MAX / sizeof(Py_ssize_t)) ||
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00005183 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005184 PyErr_NoMemory();
5185 return -1;
5186 }
5187
5188 if (self->marks == NULL)
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005189 marks = (Py_ssize_t *) PyMem_Malloc(alloc * sizeof(Py_ssize_t));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005190 else
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005191 marks = (Py_ssize_t *) PyMem_Realloc(self->marks,
5192 alloc * sizeof(Py_ssize_t));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005193 if (marks == NULL) {
5194 PyErr_NoMemory();
5195 return -1;
5196 }
5197 self->marks = marks;
5198 self->marks_size = (Py_ssize_t)alloc;
5199 }
5200
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005201 self->marks[self->num_marks++] = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005202
5203 return 0;
5204}
5205
5206static int
5207load_reduce(UnpicklerObject *self)
5208{
5209 PyObject *callable = NULL;
5210 PyObject *argtup = NULL;
5211 PyObject *obj = NULL;
5212
5213 PDATA_POP(self->stack, argtup);
5214 if (argtup == NULL)
5215 return -1;
5216 PDATA_POP(self->stack, callable);
5217 if (callable) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005218 obj = PyObject_CallObject(callable, argtup);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005219 Py_DECREF(callable);
5220 }
5221 Py_DECREF(argtup);
5222
5223 if (obj == NULL)
5224 return -1;
5225
5226 PDATA_PUSH(self->stack, obj, -1);
5227 return 0;
5228}
5229
5230/* Just raises an error if we don't know the protocol specified. PROTO
5231 * is the first opcode for protocols >= 2.
5232 */
5233static int
5234load_proto(UnpicklerObject *self)
5235{
5236 char *s;
5237 int i;
5238
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005239 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005240 return -1;
5241
5242 i = (unsigned char)s[0];
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005243 if (i <= HIGHEST_PROTOCOL) {
5244 self->proto = i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005245 return 0;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005246 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005247
5248 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
5249 return -1;
5250}
5251
5252static PyObject *
5253load(UnpicklerObject *self)
5254{
5255 PyObject *err;
5256 PyObject *value = NULL;
5257 char *s;
5258
5259 self->num_marks = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005260 if (Py_SIZE(self->stack))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005261 Pdata_clear(self->stack, 0);
5262
5263 /* Convenient macros for the dispatch while-switch loop just below. */
5264#define OP(opcode, load_func) \
5265 case opcode: if (load_func(self) < 0) break; continue;
5266
5267#define OP_ARG(opcode, load_func, arg) \
5268 case opcode: if (load_func(self, (arg)) < 0) break; continue;
5269
5270 while (1) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005271 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005272 break;
5273
5274 switch ((enum opcode)s[0]) {
5275 OP(NONE, load_none)
5276 OP(BININT, load_binint)
5277 OP(BININT1, load_binint1)
5278 OP(BININT2, load_binint2)
5279 OP(INT, load_int)
5280 OP(LONG, load_long)
5281 OP_ARG(LONG1, load_counted_long, 1)
5282 OP_ARG(LONG4, load_counted_long, 4)
5283 OP(FLOAT, load_float)
5284 OP(BINFLOAT, load_binfloat)
5285 OP(BINBYTES, load_binbytes)
5286 OP(SHORT_BINBYTES, load_short_binbytes)
5287 OP(BINSTRING, load_binstring)
5288 OP(SHORT_BINSTRING, load_short_binstring)
5289 OP(STRING, load_string)
5290 OP(UNICODE, load_unicode)
5291 OP(BINUNICODE, load_binunicode)
5292 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
5293 OP_ARG(TUPLE1, load_counted_tuple, 1)
5294 OP_ARG(TUPLE2, load_counted_tuple, 2)
5295 OP_ARG(TUPLE3, load_counted_tuple, 3)
5296 OP(TUPLE, load_tuple)
5297 OP(EMPTY_LIST, load_empty_list)
5298 OP(LIST, load_list)
5299 OP(EMPTY_DICT, load_empty_dict)
5300 OP(DICT, load_dict)
5301 OP(OBJ, load_obj)
5302 OP(INST, load_inst)
5303 OP(NEWOBJ, load_newobj)
5304 OP(GLOBAL, load_global)
5305 OP(APPEND, load_append)
5306 OP(APPENDS, load_appends)
5307 OP(BUILD, load_build)
5308 OP(DUP, load_dup)
5309 OP(BINGET, load_binget)
5310 OP(LONG_BINGET, load_long_binget)
5311 OP(GET, load_get)
5312 OP(MARK, load_mark)
5313 OP(BINPUT, load_binput)
5314 OP(LONG_BINPUT, load_long_binput)
5315 OP(PUT, load_put)
5316 OP(POP, load_pop)
5317 OP(POP_MARK, load_pop_mark)
5318 OP(SETITEM, load_setitem)
5319 OP(SETITEMS, load_setitems)
5320 OP(PERSID, load_persid)
5321 OP(BINPERSID, load_binpersid)
5322 OP(REDUCE, load_reduce)
5323 OP(PROTO, load_proto)
5324 OP_ARG(EXT1, load_extension, 1)
5325 OP_ARG(EXT2, load_extension, 2)
5326 OP_ARG(EXT4, load_extension, 4)
5327 OP_ARG(NEWTRUE, load_bool, Py_True)
5328 OP_ARG(NEWFALSE, load_bool, Py_False)
5329
5330 case STOP:
5331 break;
5332
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005333 default:
Benjamin Petersonadde86d2011-09-23 13:41:41 -04005334 if (s[0] == '\0')
5335 PyErr_SetNone(PyExc_EOFError);
5336 else
5337 PyErr_Format(UnpicklingError,
5338 "invalid load key, '%c'.", s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005339 return NULL;
5340 }
5341
5342 break; /* and we are done! */
5343 }
5344
Antoine Pitrou04248a82010-10-12 20:51:21 +00005345 if (_Unpickler_SkipConsumed(self) < 0)
5346 return NULL;
5347
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005348 /* XXX: It is not clear what this is actually for. */
5349 if ((err = PyErr_Occurred())) {
5350 if (err == PyExc_EOFError) {
5351 PyErr_SetNone(PyExc_EOFError);
5352 }
5353 return NULL;
5354 }
5355
5356 PDATA_POP(self->stack, value);
5357 return value;
5358}
5359
5360PyDoc_STRVAR(Unpickler_load_doc,
5361"load() -> object. Load a pickle."
5362"\n"
5363"Read a pickled object representation from the open file object given in\n"
5364"the constructor, and return the reconstituted object hierarchy specified\n"
5365"therein.\n");
5366
5367static PyObject *
5368Unpickler_load(UnpicklerObject *self)
5369{
5370 /* Check whether the Unpickler was initialized correctly. This prevents
5371 segfaulting if a subclass overridden __init__ with a function that does
5372 not call Unpickler.__init__(). Here, we simply ensure that self->read
5373 is not NULL. */
5374 if (self->read == NULL) {
5375 PyErr_Format(UnpicklingError,
5376 "Unpickler.__init__() was not called by %s.__init__()",
5377 Py_TYPE(self)->tp_name);
5378 return NULL;
5379 }
5380
5381 return load(self);
5382}
5383
5384/* The name of find_class() is misleading. In newer pickle protocols, this
5385 function is used for loading any global (i.e., functions), not just
5386 classes. The name is kept only for backward compatibility. */
5387
5388PyDoc_STRVAR(Unpickler_find_class_doc,
5389"find_class(module_name, global_name) -> object.\n"
5390"\n"
5391"Return an object from a specified module, importing the module if\n"
5392"necessary. Subclasses may override this method (e.g. to restrict\n"
5393"unpickling of arbitrary classes and functions).\n"
5394"\n"
5395"This method is called whenever a class or a function object is\n"
5396"needed. Both arguments passed are str objects.\n");
5397
5398static PyObject *
5399Unpickler_find_class(UnpicklerObject *self, PyObject *args)
5400{
5401 PyObject *global;
5402 PyObject *modules_dict;
5403 PyObject *module;
5404 PyObject *module_name, *global_name;
5405
5406 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
5407 &module_name, &global_name))
5408 return NULL;
5409
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005410 /* Try to map the old names used in Python 2.x to the new ones used in
5411 Python 3.x. We do this only with old pickle protocols and when the
5412 user has not disabled the feature. */
5413 if (self->proto < 3 && self->fix_imports) {
5414 PyObject *key;
5415 PyObject *item;
5416
5417 /* Check if the global (i.e., a function or a class) was renamed
5418 or moved to another module. */
5419 key = PyTuple_Pack(2, module_name, global_name);
5420 if (key == NULL)
5421 return NULL;
5422 item = PyDict_GetItemWithError(name_mapping_2to3, key);
5423 Py_DECREF(key);
5424 if (item) {
5425 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
5426 PyErr_Format(PyExc_RuntimeError,
5427 "_compat_pickle.NAME_MAPPING values should be "
5428 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
5429 return NULL;
5430 }
5431 module_name = PyTuple_GET_ITEM(item, 0);
5432 global_name = PyTuple_GET_ITEM(item, 1);
5433 if (!PyUnicode_Check(module_name) ||
5434 !PyUnicode_Check(global_name)) {
5435 PyErr_Format(PyExc_RuntimeError,
5436 "_compat_pickle.NAME_MAPPING values should be "
5437 "pairs of str, not (%.200s, %.200s)",
5438 Py_TYPE(module_name)->tp_name,
5439 Py_TYPE(global_name)->tp_name);
5440 return NULL;
5441 }
5442 }
5443 else if (PyErr_Occurred()) {
5444 return NULL;
5445 }
5446
5447 /* Check if the module was renamed. */
5448 item = PyDict_GetItemWithError(import_mapping_2to3, module_name);
5449 if (item) {
5450 if (!PyUnicode_Check(item)) {
5451 PyErr_Format(PyExc_RuntimeError,
5452 "_compat_pickle.IMPORT_MAPPING values should be "
5453 "strings, not %.200s", Py_TYPE(item)->tp_name);
5454 return NULL;
5455 }
5456 module_name = item;
5457 }
5458 else if (PyErr_Occurred()) {
5459 return NULL;
5460 }
5461 }
5462
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005463 modules_dict = PySys_GetObject("modules");
5464 if (modules_dict == NULL)
5465 return NULL;
5466
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005467 module = PyDict_GetItemWithError(modules_dict, module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005468 if (module == NULL) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005469 if (PyErr_Occurred())
5470 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005471 module = PyImport_Import(module_name);
5472 if (module == NULL)
5473 return NULL;
5474 global = PyObject_GetAttr(module, global_name);
5475 Py_DECREF(module);
5476 }
5477 else {
5478 global = PyObject_GetAttr(module, global_name);
5479 }
5480 return global;
5481}
5482
5483static struct PyMethodDef Unpickler_methods[] = {
5484 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
5485 Unpickler_load_doc},
5486 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
5487 Unpickler_find_class_doc},
5488 {NULL, NULL} /* sentinel */
5489};
5490
5491static void
5492Unpickler_dealloc(UnpicklerObject *self)
5493{
5494 PyObject_GC_UnTrack((PyObject *)self);
5495 Py_XDECREF(self->readline);
5496 Py_XDECREF(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005497 Py_XDECREF(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005498 Py_XDECREF(self->stack);
5499 Py_XDECREF(self->pers_func);
5500 Py_XDECREF(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005501 if (self->buffer.buf != NULL) {
5502 PyBuffer_Release(&self->buffer);
5503 self->buffer.buf = NULL;
5504 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005505
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005506 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005507 PyMem_Free(self->marks);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005508 PyMem_Free(self->input_line);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005509 free(self->encoding);
5510 free(self->errors);
5511
5512 Py_TYPE(self)->tp_free((PyObject *)self);
5513}
5514
5515static int
5516Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
5517{
5518 Py_VISIT(self->readline);
5519 Py_VISIT(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005520 Py_VISIT(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005521 Py_VISIT(self->stack);
5522 Py_VISIT(self->pers_func);
5523 Py_VISIT(self->arg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005524 return 0;
5525}
5526
5527static int
5528Unpickler_clear(UnpicklerObject *self)
5529{
5530 Py_CLEAR(self->readline);
5531 Py_CLEAR(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005532 Py_CLEAR(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005533 Py_CLEAR(self->stack);
5534 Py_CLEAR(self->pers_func);
5535 Py_CLEAR(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005536 if (self->buffer.buf != NULL) {
5537 PyBuffer_Release(&self->buffer);
5538 self->buffer.buf = NULL;
5539 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005540
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005541 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005542 PyMem_Free(self->marks);
5543 self->marks = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005544 PyMem_Free(self->input_line);
5545 self->input_line = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005546 free(self->encoding);
5547 self->encoding = NULL;
5548 free(self->errors);
5549 self->errors = NULL;
5550
5551 return 0;
5552}
5553
5554PyDoc_STRVAR(Unpickler_doc,
5555"Unpickler(file, *, encoding='ASCII', errors='strict')"
5556"\n"
5557"This takes a binary file for reading a pickle data stream.\n"
5558"\n"
5559"The protocol version of the pickle is detected automatically, so no\n"
5560"proto argument is needed.\n"
5561"\n"
5562"The file-like object must have two methods, a read() method\n"
5563"that takes an integer argument, and a readline() method that\n"
5564"requires no arguments. Both methods should return bytes.\n"
5565"Thus file-like object can be a binary file object opened for\n"
5566"reading, a BytesIO object, or any other custom object that\n"
5567"meets this interface.\n"
5568"\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005569"Optional keyword arguments are *fix_imports*, *encoding* and *errors*,\n"
5570"which are used to control compatiblity support for pickle stream\n"
5571"generated by Python 2.x. If *fix_imports* is True, pickle will try to\n"
5572"map the old Python 2.x names to the new names used in Python 3.x. The\n"
5573"*encoding* and *errors* tell pickle how to decode 8-bit string\n"
5574"instances pickled by Python 2.x; these default to 'ASCII' and\n"
5575"'strict', respectively.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005576
5577static int
5578Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
5579{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005580 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005581 PyObject *file;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005582 PyObject *fix_imports = Py_True;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005583 char *encoding = NULL;
5584 char *errors = NULL;
5585
5586 /* XXX: That is an horrible error message. But, I don't know how to do
5587 better... */
5588 if (Py_SIZE(args) != 1) {
5589 PyErr_Format(PyExc_TypeError,
5590 "%s takes exactly one positional argument (%zd given)",
5591 Py_TYPE(self)->tp_name, Py_SIZE(args));
5592 return -1;
5593 }
5594
5595 /* Arguments parsing needs to be done in the __init__() method to allow
5596 subclasses to define their own __init__() method, which may (or may
5597 not) support Unpickler arguments. However, this means we need to be
5598 extra careful in the other Unpickler methods, since a subclass could
5599 forget to call Unpickler.__init__() thus breaking our internal
5600 invariants. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005601 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:Unpickler", kwlist,
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005602 &file, &fix_imports, &encoding, &errors))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005603 return -1;
5604
5605 /* In case of multiple __init__() calls, clear previous content. */
5606 if (self->read != NULL)
5607 (void)Unpickler_clear(self);
5608
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005609 if (_Unpickler_SetInputStream(self, file) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005610 return -1;
5611
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005612 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005613 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005614
5615 self->fix_imports = PyObject_IsTrue(fix_imports);
5616 if (self->fix_imports == -1)
5617 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005618
5619 if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) {
5620 self->pers_func = PyObject_GetAttrString((PyObject *)self,
5621 "persistent_load");
5622 if (self->pers_func == NULL)
5623 return -1;
5624 }
5625 else {
5626 self->pers_func = NULL;
5627 }
5628
5629 self->stack = (Pdata *)Pdata_New();
5630 if (self->stack == NULL)
5631 return -1;
5632
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005633 self->memo_size = 32;
5634 self->memo = _Unpickler_NewMemo(self->memo_size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005635 if (self->memo == NULL)
5636 return -1;
5637
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005638 self->arg = NULL;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005639 self->proto = 0;
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005640
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005641 return 0;
5642}
5643
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005644/* Define a proxy object for the Unpickler's internal memo object. This is to
5645 * avoid breaking code like:
5646 * unpickler.memo.clear()
5647 * and
5648 * unpickler.memo = saved_memo
5649 * Is this a good idea? Not really, but we don't want to break code that uses
5650 * it. Note that we don't implement the entire mapping API here. This is
5651 * intentional, as these should be treated as black-box implementation details.
5652 *
5653 * We do, however, have to implement pickling/unpickling support because of
5654 * real-world code like cvs2svn.
5655 */
5656
5657typedef struct {
5658 PyObject_HEAD
5659 UnpicklerObject *unpickler;
5660} UnpicklerMemoProxyObject;
5661
5662PyDoc_STRVAR(ump_clear_doc,
5663"memo.clear() -> None. Remove all items from memo.");
5664
5665static PyObject *
5666ump_clear(UnpicklerMemoProxyObject *self)
5667{
5668 _Unpickler_MemoCleanup(self->unpickler);
5669 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
5670 if (self->unpickler->memo == NULL)
5671 return NULL;
5672 Py_RETURN_NONE;
5673}
5674
5675PyDoc_STRVAR(ump_copy_doc,
5676"memo.copy() -> new_memo. Copy the memo to a new object.");
5677
5678static PyObject *
5679ump_copy(UnpicklerMemoProxyObject *self)
5680{
5681 Py_ssize_t i;
5682 PyObject *new_memo = PyDict_New();
5683 if (new_memo == NULL)
5684 return NULL;
5685
5686 for (i = 0; i < self->unpickler->memo_size; i++) {
5687 int status;
5688 PyObject *key, *value;
5689
5690 value = self->unpickler->memo[i];
5691 if (value == NULL)
5692 continue;
5693
5694 key = PyLong_FromSsize_t(i);
5695 if (key == NULL)
5696 goto error;
5697 status = PyDict_SetItem(new_memo, key, value);
5698 Py_DECREF(key);
5699 if (status < 0)
5700 goto error;
5701 }
5702 return new_memo;
5703
5704error:
5705 Py_DECREF(new_memo);
5706 return NULL;
5707}
5708
5709PyDoc_STRVAR(ump_reduce_doc,
5710"memo.__reduce__(). Pickling support.");
5711
5712static PyObject *
5713ump_reduce(UnpicklerMemoProxyObject *self, PyObject *args)
5714{
5715 PyObject *reduce_value;
5716 PyObject *constructor_args;
5717 PyObject *contents = ump_copy(self);
5718 if (contents == NULL)
5719 return NULL;
5720
5721 reduce_value = PyTuple_New(2);
5722 if (reduce_value == NULL) {
5723 Py_DECREF(contents);
5724 return NULL;
5725 }
5726 constructor_args = PyTuple_New(1);
5727 if (constructor_args == NULL) {
5728 Py_DECREF(contents);
5729 Py_DECREF(reduce_value);
5730 return NULL;
5731 }
5732 PyTuple_SET_ITEM(constructor_args, 0, contents);
5733 Py_INCREF((PyObject *)&PyDict_Type);
5734 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
5735 PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
5736 return reduce_value;
5737}
5738
5739static PyMethodDef unpicklerproxy_methods[] = {
5740 {"clear", (PyCFunction)ump_clear, METH_NOARGS, ump_clear_doc},
5741 {"copy", (PyCFunction)ump_copy, METH_NOARGS, ump_copy_doc},
5742 {"__reduce__", (PyCFunction)ump_reduce, METH_VARARGS, ump_reduce_doc},
5743 {NULL, NULL} /* sentinel */
5744};
5745
5746static void
5747UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
5748{
5749 PyObject_GC_UnTrack(self);
5750 Py_XDECREF(self->unpickler);
5751 PyObject_GC_Del((PyObject *)self);
5752}
5753
5754static int
5755UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
5756 visitproc visit, void *arg)
5757{
5758 Py_VISIT(self->unpickler);
5759 return 0;
5760}
5761
5762static int
5763UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
5764{
5765 Py_CLEAR(self->unpickler);
5766 return 0;
5767}
5768
5769static PyTypeObject UnpicklerMemoProxyType = {
5770 PyVarObject_HEAD_INIT(NULL, 0)
5771 "_pickle.UnpicklerMemoProxy", /*tp_name*/
5772 sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/
5773 0,
5774 (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */
5775 0, /* tp_print */
5776 0, /* tp_getattr */
5777 0, /* tp_setattr */
5778 0, /* tp_compare */
5779 0, /* tp_repr */
5780 0, /* tp_as_number */
5781 0, /* tp_as_sequence */
5782 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00005783 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005784 0, /* tp_call */
5785 0, /* tp_str */
5786 PyObject_GenericGetAttr, /* tp_getattro */
5787 PyObject_GenericSetAttr, /* tp_setattro */
5788 0, /* tp_as_buffer */
5789 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5790 0, /* tp_doc */
5791 (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */
5792 (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */
5793 0, /* tp_richcompare */
5794 0, /* tp_weaklistoffset */
5795 0, /* tp_iter */
5796 0, /* tp_iternext */
5797 unpicklerproxy_methods, /* tp_methods */
5798};
5799
5800static PyObject *
5801UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
5802{
5803 UnpicklerMemoProxyObject *self;
5804
5805 self = PyObject_GC_New(UnpicklerMemoProxyObject,
5806 &UnpicklerMemoProxyType);
5807 if (self == NULL)
5808 return NULL;
5809 Py_INCREF(unpickler);
5810 self->unpickler = unpickler;
5811 PyObject_GC_Track(self);
5812 return (PyObject *)self;
5813}
5814
5815/*****************************************************************************/
5816
5817
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005818static PyObject *
5819Unpickler_get_memo(UnpicklerObject *self)
5820{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005821 return UnpicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005822}
5823
5824static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005825Unpickler_set_memo(UnpicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005826{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005827 PyObject **new_memo;
5828 Py_ssize_t new_memo_size = 0;
5829 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005830
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005831 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005832 PyErr_SetString(PyExc_TypeError,
5833 "attribute deletion is not supported");
5834 return -1;
5835 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005836
5837 if (Py_TYPE(obj) == &UnpicklerMemoProxyType) {
5838 UnpicklerObject *unpickler =
5839 ((UnpicklerMemoProxyObject *)obj)->unpickler;
5840
5841 new_memo_size = unpickler->memo_size;
5842 new_memo = _Unpickler_NewMemo(new_memo_size);
5843 if (new_memo == NULL)
5844 return -1;
5845
5846 for (i = 0; i < new_memo_size; i++) {
5847 Py_XINCREF(unpickler->memo[i]);
5848 new_memo[i] = unpickler->memo[i];
5849 }
5850 }
5851 else if (PyDict_Check(obj)) {
5852 Py_ssize_t i = 0;
5853 PyObject *key, *value;
5854
5855 new_memo_size = PyDict_Size(obj);
5856 new_memo = _Unpickler_NewMemo(new_memo_size);
5857 if (new_memo == NULL)
5858 return -1;
5859
5860 while (PyDict_Next(obj, &i, &key, &value)) {
5861 Py_ssize_t idx;
5862 if (!PyLong_Check(key)) {
5863 PyErr_SetString(PyExc_TypeError,
5864 "memo key must be integers");
5865 goto error;
5866 }
5867 idx = PyLong_AsSsize_t(key);
5868 if (idx == -1 && PyErr_Occurred())
5869 goto error;
5870 if (_Unpickler_MemoPut(self, idx, value) < 0)
5871 goto error;
5872 }
5873 }
5874 else {
5875 PyErr_Format(PyExc_TypeError,
5876 "'memo' attribute must be an UnpicklerMemoProxy object"
5877 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005878 return -1;
5879 }
5880
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005881 _Unpickler_MemoCleanup(self);
5882 self->memo_size = new_memo_size;
5883 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005884
5885 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005886
5887 error:
5888 if (new_memo_size) {
5889 i = new_memo_size;
5890 while (--i >= 0) {
5891 Py_XDECREF(new_memo[i]);
5892 }
5893 PyMem_FREE(new_memo);
5894 }
5895 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005896}
5897
5898static PyObject *
5899Unpickler_get_persload(UnpicklerObject *self)
5900{
5901 if (self->pers_func == NULL)
5902 PyErr_SetString(PyExc_AttributeError, "persistent_load");
5903 else
5904 Py_INCREF(self->pers_func);
5905 return self->pers_func;
5906}
5907
5908static int
5909Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
5910{
5911 PyObject *tmp;
5912
5913 if (value == NULL) {
5914 PyErr_SetString(PyExc_TypeError,
5915 "attribute deletion is not supported");
5916 return -1;
5917 }
5918 if (!PyCallable_Check(value)) {
5919 PyErr_SetString(PyExc_TypeError,
5920 "persistent_load must be a callable taking "
5921 "one argument");
5922 return -1;
5923 }
5924
5925 tmp = self->pers_func;
5926 Py_INCREF(value);
5927 self->pers_func = value;
5928 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
5929
5930 return 0;
5931}
5932
5933static PyGetSetDef Unpickler_getsets[] = {
5934 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
5935 {"persistent_load", (getter)Unpickler_get_persload,
5936 (setter)Unpickler_set_persload},
5937 {NULL}
5938};
5939
5940static PyTypeObject Unpickler_Type = {
5941 PyVarObject_HEAD_INIT(NULL, 0)
5942 "_pickle.Unpickler", /*tp_name*/
5943 sizeof(UnpicklerObject), /*tp_basicsize*/
5944 0, /*tp_itemsize*/
5945 (destructor)Unpickler_dealloc, /*tp_dealloc*/
5946 0, /*tp_print*/
5947 0, /*tp_getattr*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005948 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00005949 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005950 0, /*tp_repr*/
5951 0, /*tp_as_number*/
5952 0, /*tp_as_sequence*/
5953 0, /*tp_as_mapping*/
5954 0, /*tp_hash*/
5955 0, /*tp_call*/
5956 0, /*tp_str*/
5957 0, /*tp_getattro*/
5958 0, /*tp_setattro*/
5959 0, /*tp_as_buffer*/
5960 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5961 Unpickler_doc, /*tp_doc*/
5962 (traverseproc)Unpickler_traverse, /*tp_traverse*/
5963 (inquiry)Unpickler_clear, /*tp_clear*/
5964 0, /*tp_richcompare*/
5965 0, /*tp_weaklistoffset*/
5966 0, /*tp_iter*/
5967 0, /*tp_iternext*/
5968 Unpickler_methods, /*tp_methods*/
5969 0, /*tp_members*/
5970 Unpickler_getsets, /*tp_getset*/
5971 0, /*tp_base*/
5972 0, /*tp_dict*/
5973 0, /*tp_descr_get*/
5974 0, /*tp_descr_set*/
5975 0, /*tp_dictoffset*/
5976 (initproc)Unpickler_init, /*tp_init*/
5977 PyType_GenericAlloc, /*tp_alloc*/
5978 PyType_GenericNew, /*tp_new*/
5979 PyObject_GC_Del, /*tp_free*/
5980 0, /*tp_is_gc*/
5981};
5982
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005983PyDoc_STRVAR(pickle_dump_doc,
5984"dump(obj, file, protocol=None, *, fix_imports=True) -> None\n"
5985"\n"
5986"Write a pickled representation of obj to the open file object file. This\n"
5987"is equivalent to ``Pickler(file, protocol).dump(obj)``, but may be more\n"
5988"efficient.\n"
5989"\n"
5990"The optional protocol argument tells the pickler to use the given protocol;\n"
5991"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
5992"backward-incompatible protocol designed for Python 3.0.\n"
5993"\n"
5994"Specifying a negative protocol version selects the highest protocol version\n"
5995"supported. The higher the protocol used, the more recent the version of\n"
5996"Python needed to read the pickle produced.\n"
5997"\n"
5998"The file argument must have a write() method that accepts a single bytes\n"
5999"argument. It can thus be a file object opened for binary writing, a\n"
6000"io.BytesIO instance, or any other custom object that meets this interface.\n"
6001"\n"
6002"If fix_imports is True and protocol is less than 3, pickle will try to\n"
6003"map the new Python 3.x names to the old module names used in Python 2.x,\n"
6004"so that the pickle data stream is readable with Python 2.x.\n");
6005
6006static PyObject *
6007pickle_dump(PyObject *self, PyObject *args, PyObject *kwds)
6008{
6009 static char *kwlist[] = {"obj", "file", "protocol", "fix_imports", 0};
6010 PyObject *obj;
6011 PyObject *file;
6012 PyObject *proto = NULL;
6013 PyObject *fix_imports = Py_True;
6014 PicklerObject *pickler;
6015
6016 /* fix_imports is a keyword-only argument. */
6017 if (Py_SIZE(args) > 3) {
6018 PyErr_Format(PyExc_TypeError,
6019 "pickle.dump() takes at most 3 positional "
6020 "argument (%zd given)", Py_SIZE(args));
6021 return NULL;
6022 }
6023
6024 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|OO:dump", kwlist,
6025 &obj, &file, &proto, &fix_imports))
6026 return NULL;
6027
6028 pickler = _Pickler_New();
6029 if (pickler == NULL)
6030 return NULL;
6031
6032 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
6033 goto error;
6034
6035 if (_Pickler_SetOutputStream(pickler, file) < 0)
6036 goto error;
6037
6038 if (dump(pickler, obj) < 0)
6039 goto error;
6040
6041 if (_Pickler_FlushToFile(pickler) < 0)
6042 goto error;
6043
6044 Py_DECREF(pickler);
6045 Py_RETURN_NONE;
6046
6047 error:
6048 Py_XDECREF(pickler);
6049 return NULL;
6050}
6051
6052PyDoc_STRVAR(pickle_dumps_doc,
6053"dumps(obj, protocol=None, *, fix_imports=True) -> bytes\n"
6054"\n"
6055"Return the pickled representation of the object as a bytes\n"
6056"object, instead of writing it to a file.\n"
6057"\n"
6058"The optional protocol argument tells the pickler to use the given protocol;\n"
6059"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
6060"backward-incompatible protocol designed for Python 3.0.\n"
6061"\n"
6062"Specifying a negative protocol version selects the highest protocol version\n"
6063"supported. The higher the protocol used, the more recent the version of\n"
6064"Python needed to read the pickle produced.\n"
6065"\n"
6066"If fix_imports is True and *protocol* is less than 3, pickle will try to\n"
6067"map the new Python 3.x names to the old module names used in Python 2.x,\n"
6068"so that the pickle data stream is readable with Python 2.x.\n");
6069
6070static PyObject *
6071pickle_dumps(PyObject *self, PyObject *args, PyObject *kwds)
6072{
6073 static char *kwlist[] = {"obj", "protocol", "fix_imports", 0};
6074 PyObject *obj;
6075 PyObject *proto = NULL;
6076 PyObject *result;
6077 PyObject *fix_imports = Py_True;
6078 PicklerObject *pickler;
6079
6080 /* fix_imports is a keyword-only argument. */
6081 if (Py_SIZE(args) > 2) {
6082 PyErr_Format(PyExc_TypeError,
6083 "pickle.dumps() takes at most 2 positional "
6084 "argument (%zd given)", Py_SIZE(args));
6085 return NULL;
6086 }
6087
6088 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:dumps", kwlist,
6089 &obj, &proto, &fix_imports))
6090 return NULL;
6091
6092 pickler = _Pickler_New();
6093 if (pickler == NULL)
6094 return NULL;
6095
6096 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
6097 goto error;
6098
6099 if (dump(pickler, obj) < 0)
6100 goto error;
6101
6102 result = _Pickler_GetString(pickler);
6103 Py_DECREF(pickler);
6104 return result;
6105
6106 error:
6107 Py_XDECREF(pickler);
6108 return NULL;
6109}
6110
6111PyDoc_STRVAR(pickle_load_doc,
6112"load(file, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6113"\n"
6114"Read a pickled object representation from the open file object file and\n"
6115"return the reconstituted object hierarchy specified therein. This is\n"
6116"equivalent to ``Unpickler(file).load()``, but may be more efficient.\n"
6117"\n"
6118"The protocol version of the pickle is detected automatically, so no protocol\n"
6119"argument is needed. Bytes past the pickled object's representation are\n"
6120"ignored.\n"
6121"\n"
6122"The argument file must have two methods, a read() method that takes an\n"
6123"integer argument, and a readline() method that requires no arguments. Both\n"
6124"methods should return bytes. Thus *file* can be a binary file object opened\n"
6125"for reading, a BytesIO object, or any other custom object that meets this\n"
6126"interface.\n"
6127"\n"
6128"Optional keyword arguments are fix_imports, encoding and errors,\n"
6129"which are used to control compatiblity support for pickle stream generated\n"
6130"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6131"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6132"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6133"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6134
6135static PyObject *
6136pickle_load(PyObject *self, PyObject *args, PyObject *kwds)
6137{
6138 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
6139 PyObject *file;
6140 PyObject *fix_imports = Py_True;
6141 PyObject *result;
6142 char *encoding = NULL;
6143 char *errors = NULL;
6144 UnpicklerObject *unpickler;
6145
6146 /* fix_imports, encoding and errors are a keyword-only argument. */
6147 if (Py_SIZE(args) != 1) {
6148 PyErr_Format(PyExc_TypeError,
6149 "pickle.load() takes exactly one positional "
6150 "argument (%zd given)", Py_SIZE(args));
6151 return NULL;
6152 }
6153
6154 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:load", kwlist,
6155 &file, &fix_imports, &encoding, &errors))
6156 return NULL;
6157
6158 unpickler = _Unpickler_New();
6159 if (unpickler == NULL)
6160 return NULL;
6161
6162 if (_Unpickler_SetInputStream(unpickler, file) < 0)
6163 goto error;
6164
6165 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6166 goto error;
6167
6168 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6169 if (unpickler->fix_imports == -1)
6170 goto error;
6171
6172 result = load(unpickler);
6173 Py_DECREF(unpickler);
6174 return result;
6175
6176 error:
6177 Py_XDECREF(unpickler);
6178 return NULL;
6179}
6180
6181PyDoc_STRVAR(pickle_loads_doc,
6182"loads(input, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6183"\n"
6184"Read a pickled object hierarchy from a bytes object and return the\n"
6185"reconstituted object hierarchy specified therein\n"
6186"\n"
6187"The protocol version of the pickle is detected automatically, so no protocol\n"
6188"argument is needed. Bytes past the pickled object's representation are\n"
6189"ignored.\n"
6190"\n"
6191"Optional keyword arguments are fix_imports, encoding and errors, which\n"
6192"are used to control compatiblity support for pickle stream generated\n"
6193"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6194"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6195"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6196"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6197
6198static PyObject *
6199pickle_loads(PyObject *self, PyObject *args, PyObject *kwds)
6200{
6201 static char *kwlist[] = {"input", "fix_imports", "encoding", "errors", 0};
6202 PyObject *input;
6203 PyObject *fix_imports = Py_True;
6204 PyObject *result;
6205 char *encoding = NULL;
6206 char *errors = NULL;
6207 UnpicklerObject *unpickler;
6208
6209 /* fix_imports, encoding and errors are a keyword-only argument. */
6210 if (Py_SIZE(args) != 1) {
6211 PyErr_Format(PyExc_TypeError,
6212 "pickle.loads() takes exactly one positional "
6213 "argument (%zd given)", Py_SIZE(args));
6214 return NULL;
6215 }
6216
6217 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:loads", kwlist,
6218 &input, &fix_imports, &encoding, &errors))
6219 return NULL;
6220
6221 unpickler = _Unpickler_New();
6222 if (unpickler == NULL)
6223 return NULL;
6224
6225 if (_Unpickler_SetStringInput(unpickler, input) < 0)
6226 goto error;
6227
6228 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6229 goto error;
6230
6231 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6232 if (unpickler->fix_imports == -1)
6233 goto error;
6234
6235 result = load(unpickler);
6236 Py_DECREF(unpickler);
6237 return result;
6238
6239 error:
6240 Py_XDECREF(unpickler);
6241 return NULL;
6242}
6243
6244
6245static struct PyMethodDef pickle_methods[] = {
6246 {"dump", (PyCFunction)pickle_dump, METH_VARARGS|METH_KEYWORDS,
6247 pickle_dump_doc},
6248 {"dumps", (PyCFunction)pickle_dumps, METH_VARARGS|METH_KEYWORDS,
6249 pickle_dumps_doc},
6250 {"load", (PyCFunction)pickle_load, METH_VARARGS|METH_KEYWORDS,
6251 pickle_load_doc},
6252 {"loads", (PyCFunction)pickle_loads, METH_VARARGS|METH_KEYWORDS,
6253 pickle_loads_doc},
6254 {NULL, NULL} /* sentinel */
6255};
6256
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006257static int
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006258initmodule(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006259{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006260 PyObject *copyreg = NULL;
6261 PyObject *compat_pickle = NULL;
6262
6263 /* XXX: We should ensure that the types of the dictionaries imported are
6264 exactly PyDict objects. Otherwise, it is possible to crash the pickle
6265 since we use the PyDict API directly to access these dictionaries. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006266
6267 copyreg = PyImport_ImportModule("copyreg");
6268 if (!copyreg)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006269 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006270 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
6271 if (!dispatch_table)
6272 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006273 extension_registry = \
6274 PyObject_GetAttrString(copyreg, "_extension_registry");
6275 if (!extension_registry)
6276 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006277 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
6278 if (!inverted_registry)
6279 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006280 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
6281 if (!extension_cache)
6282 goto error;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006283 Py_CLEAR(copyreg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006284
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006285 /* Load the 2.x -> 3.x stdlib module mapping tables */
6286 compat_pickle = PyImport_ImportModule("_compat_pickle");
6287 if (!compat_pickle)
6288 goto error;
6289 name_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
6290 if (!name_mapping_2to3)
6291 goto error;
6292 if (!PyDict_CheckExact(name_mapping_2to3)) {
6293 PyErr_Format(PyExc_RuntimeError,
6294 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
6295 Py_TYPE(name_mapping_2to3)->tp_name);
6296 goto error;
6297 }
6298 import_mapping_2to3 = PyObject_GetAttrString(compat_pickle,
6299 "IMPORT_MAPPING");
6300 if (!import_mapping_2to3)
6301 goto error;
6302 if (!PyDict_CheckExact(import_mapping_2to3)) {
6303 PyErr_Format(PyExc_RuntimeError,
6304 "_compat_pickle.IMPORT_MAPPING should be a dict, "
6305 "not %.200s", Py_TYPE(import_mapping_2to3)->tp_name);
6306 goto error;
6307 }
6308 /* ... and the 3.x -> 2.x mapping tables */
6309 name_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6310 "REVERSE_NAME_MAPPING");
6311 if (!name_mapping_3to2)
6312 goto error;
6313 if (!PyDict_CheckExact(name_mapping_3to2)) {
6314 PyErr_Format(PyExc_RuntimeError,
Ezio Melotti13925002011-03-16 11:05:33 +02006315 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006316 "not %.200s", Py_TYPE(name_mapping_3to2)->tp_name);
6317 goto error;
6318 }
6319 import_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6320 "REVERSE_IMPORT_MAPPING");
6321 if (!import_mapping_3to2)
6322 goto error;
6323 if (!PyDict_CheckExact(import_mapping_3to2)) {
6324 PyErr_Format(PyExc_RuntimeError,
6325 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
6326 "not %.200s", Py_TYPE(import_mapping_3to2)->tp_name);
6327 goto error;
6328 }
6329 Py_CLEAR(compat_pickle);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006330
6331 empty_tuple = PyTuple_New(0);
6332 if (empty_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006333 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006334 two_tuple = PyTuple_New(2);
6335 if (two_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006336 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006337 /* We use this temp container with no regard to refcounts, or to
6338 * keeping containees alive. Exempt from GC, because we don't
6339 * want anything looking at two_tuple() by magic.
6340 */
6341 PyObject_GC_UnTrack(two_tuple);
6342
6343 return 0;
6344
6345 error:
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006346 Py_CLEAR(copyreg);
6347 Py_CLEAR(dispatch_table);
6348 Py_CLEAR(extension_registry);
6349 Py_CLEAR(inverted_registry);
6350 Py_CLEAR(extension_cache);
6351 Py_CLEAR(compat_pickle);
6352 Py_CLEAR(name_mapping_2to3);
6353 Py_CLEAR(import_mapping_2to3);
6354 Py_CLEAR(name_mapping_3to2);
6355 Py_CLEAR(import_mapping_3to2);
6356 Py_CLEAR(empty_tuple);
6357 Py_CLEAR(two_tuple);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006358 return -1;
6359}
6360
6361static struct PyModuleDef _picklemodule = {
6362 PyModuleDef_HEAD_INIT,
6363 "_pickle",
6364 pickle_module_doc,
6365 -1,
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006366 pickle_methods,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006367 NULL,
6368 NULL,
6369 NULL,
6370 NULL
6371};
6372
6373PyMODINIT_FUNC
6374PyInit__pickle(void)
6375{
6376 PyObject *m;
6377
6378 if (PyType_Ready(&Unpickler_Type) < 0)
6379 return NULL;
6380 if (PyType_Ready(&Pickler_Type) < 0)
6381 return NULL;
6382 if (PyType_Ready(&Pdata_Type) < 0)
6383 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006384 if (PyType_Ready(&PicklerMemoProxyType) < 0)
6385 return NULL;
6386 if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
6387 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006388
6389 /* Create the module and add the functions. */
6390 m = PyModule_Create(&_picklemodule);
6391 if (m == NULL)
6392 return NULL;
6393
Antoine Pitrou8391cf42011-07-15 21:01:21 +02006394 Py_INCREF(&Pickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006395 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
6396 return NULL;
Antoine Pitrou8391cf42011-07-15 21:01:21 +02006397 Py_INCREF(&Unpickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006398 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
6399 return NULL;
6400
6401 /* Initialize the exceptions. */
6402 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
6403 if (PickleError == NULL)
6404 return NULL;
6405 PicklingError = \
6406 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
6407 if (PicklingError == NULL)
6408 return NULL;
6409 UnpicklingError = \
6410 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
6411 if (UnpicklingError == NULL)
6412 return NULL;
6413
6414 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
6415 return NULL;
6416 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
6417 return NULL;
6418 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
6419 return NULL;
6420
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006421 if (initmodule() < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006422 return NULL;
6423
6424 return m;
6425}