blob: 20ee30299afbe99ada2d7ba129d68e43e947b157 [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000013/* Pickle opcodes. These must be kept updated with pickle.py.
14 Extensive docs are in pickletools.py. */
15enum opcode {
16 MARK = '(',
17 STOP = '.',
18 POP = '0',
19 POP_MARK = '1',
20 DUP = '2',
21 FLOAT = 'F',
22 INT = 'I',
23 BININT = 'J',
24 BININT1 = 'K',
25 LONG = 'L',
26 BININT2 = 'M',
27 NONE = 'N',
28 PERSID = 'P',
29 BINPERSID = 'Q',
30 REDUCE = 'R',
31 STRING = 'S',
32 BINSTRING = 'T',
33 SHORT_BINSTRING = 'U',
34 UNICODE = 'V',
35 BINUNICODE = 'X',
36 APPEND = 'a',
37 BUILD = 'b',
38 GLOBAL = 'c',
39 DICT = 'd',
40 EMPTY_DICT = '}',
41 APPENDS = 'e',
42 GET = 'g',
43 BINGET = 'h',
44 INST = 'i',
45 LONG_BINGET = 'j',
46 LIST = 'l',
47 EMPTY_LIST = ']',
48 OBJ = 'o',
49 PUT = 'p',
50 BINPUT = 'q',
51 LONG_BINPUT = 'r',
52 SETITEM = 's',
53 TUPLE = 't',
54 EMPTY_TUPLE = ')',
55 SETITEMS = 'u',
56 BINFLOAT = 'G',
57
58 /* Protocol 2. */
59 PROTO = '\x80',
60 NEWOBJ = '\x81',
61 EXT1 = '\x82',
62 EXT2 = '\x83',
63 EXT4 = '\x84',
64 TUPLE1 = '\x85',
65 TUPLE2 = '\x86',
66 TUPLE3 = '\x87',
67 NEWTRUE = '\x88',
68 NEWFALSE = '\x89',
69 LONG1 = '\x8a',
70 LONG4 = '\x8b',
71
72 /* Protocol 3 (Python 3.x) */
73 BINBYTES = 'B',
Victor Stinner132ef6c2010-11-09 09:39:41 +000074 SHORT_BINBYTES = 'C'
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000075};
76
77/* These aren't opcodes -- they're ways to pickle bools before protocol 2
78 * so that unpicklers written before bools were introduced unpickle them
79 * as ints, but unpicklers after can recognize that bools were intended.
80 * Note that protocol 2 added direct ways to pickle bools.
81 */
82#undef TRUE
83#define TRUE "I01\n"
84#undef FALSE
85#define FALSE "I00\n"
86
87enum {
88 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
89 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
90 break if this gets out of synch with pickle.py, but it's unclear that would
91 help anything either. */
92 BATCHSIZE = 1000,
93
94 /* Nesting limit until Pickler, when running in "fast mode", starts
95 checking for self-referential data-structures. */
96 FAST_NESTING_LIMIT = 50,
97
Antoine Pitrouea99c5c2010-09-09 18:33:21 +000098 /* Initial size of the write buffer of Pickler. */
99 WRITE_BUF_SIZE = 4096,
100
101 /* Maximum size of the write buffer of Pickler when pickling to a
102 stream. This is ignored for in-memory pickling. */
103 MAX_WRITE_BUF_SIZE = 64 * 1024,
Antoine Pitrou04248a82010-10-12 20:51:21 +0000104
105 /* Prefetch size when unpickling (disabled on unpeekable streams) */
Victor Stinner132ef6c2010-11-09 09:39:41 +0000106 PREFETCH = 8192 * 16
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000107};
108
109/* Exception classes for pickle. These should override the ones defined in
110 pickle.py, when the C-optimized Pickler and Unpickler are used. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000111static PyObject *PickleError = NULL;
112static PyObject *PicklingError = NULL;
113static PyObject *UnpicklingError = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000114
115/* copyreg.dispatch_table, {type_object: pickling_function} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000116static PyObject *dispatch_table = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000117/* For EXT[124] opcodes. */
118/* copyreg._extension_registry, {(module_name, function_name): code} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000119static PyObject *extension_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000120/* copyreg._inverted_registry, {code: (module_name, function_name)} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000121static PyObject *inverted_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000122/* copyreg._extension_cache, {code: object} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000123static PyObject *extension_cache = NULL;
124
125/* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */
126static PyObject *name_mapping_2to3 = NULL;
127/* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
128static PyObject *import_mapping_2to3 = NULL;
129/* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
130static PyObject *name_mapping_3to2 = NULL;
131static PyObject *import_mapping_3to2 = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000132
133/* XXX: Are these really nescessary? */
134/* As the name says, an empty tuple. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000135static PyObject *empty_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000136/* For looking up name pairs in copyreg._extension_registry. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000137static PyObject *two_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000138
139static int
140stack_underflow(void)
141{
142 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
143 return -1;
144}
145
146/* Internal data type used as the unpickling stack. */
147typedef struct {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000148 PyObject_VAR_HEAD
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000149 PyObject **data;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000150 Py_ssize_t allocated; /* number of slots in data allocated */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000151} Pdata;
152
153static void
154Pdata_dealloc(Pdata *self)
155{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200156 Py_ssize_t i = Py_SIZE(self);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000157 while (--i >= 0) {
158 Py_DECREF(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000159 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000160 PyMem_FREE(self->data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000161 PyObject_Del(self);
162}
163
164static PyTypeObject Pdata_Type = {
165 PyVarObject_HEAD_INIT(NULL, 0)
166 "_pickle.Pdata", /*tp_name*/
167 sizeof(Pdata), /*tp_basicsize*/
168 0, /*tp_itemsize*/
169 (destructor)Pdata_dealloc, /*tp_dealloc*/
170};
171
172static PyObject *
173Pdata_New(void)
174{
175 Pdata *self;
176
177 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
178 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000179 Py_SIZE(self) = 0;
180 self->allocated = 8;
181 self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000182 if (self->data)
183 return (PyObject *)self;
184 Py_DECREF(self);
185 return PyErr_NoMemory();
186}
187
188
189/* Retain only the initial clearto items. If clearto >= the current
190 * number of items, this is a (non-erroneous) NOP.
191 */
192static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200193Pdata_clear(Pdata *self, Py_ssize_t clearto)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000194{
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200195 Py_ssize_t i = Py_SIZE(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000196
197 if (clearto < 0)
198 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000199 if (clearto >= i)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000200 return 0;
201
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000202 while (--i >= clearto) {
203 Py_CLEAR(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000204 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000205 Py_SIZE(self) = clearto;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000206 return 0;
207}
208
209static int
210Pdata_grow(Pdata *self)
211{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000212 PyObject **data = self->data;
213 Py_ssize_t allocated = self->allocated;
214 Py_ssize_t new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000215
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000216 new_allocated = (allocated >> 3) + 6;
217 /* check for integer overflow */
218 if (new_allocated > PY_SSIZE_T_MAX - allocated)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000219 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000220 new_allocated += allocated;
221 if (new_allocated > (PY_SSIZE_T_MAX / sizeof(PyObject *)))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000222 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000223 data = PyMem_REALLOC(data, new_allocated * sizeof(PyObject *));
224 if (data == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000225 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000226
227 self->data = data;
228 self->allocated = new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000229 return 0;
230
231 nomemory:
232 PyErr_NoMemory();
233 return -1;
234}
235
236/* D is a Pdata*. Pop the topmost element and store it into V, which
237 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
238 * is raised and V is set to NULL.
239 */
240static PyObject *
241Pdata_pop(Pdata *self)
242{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000243 if (Py_SIZE(self) == 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000244 PyErr_SetString(UnpicklingError, "bad pickle data");
245 return NULL;
246 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000247 return self->data[--Py_SIZE(self)];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000248}
249#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
250
251static int
252Pdata_push(Pdata *self, PyObject *obj)
253{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000254 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000255 return -1;
256 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000257 self->data[Py_SIZE(self)++] = obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000258 return 0;
259}
260
261/* Push an object on stack, transferring its ownership to the stack. */
262#define PDATA_PUSH(D, O, ER) do { \
263 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
264
265/* Push an object on stack, adding a new reference to the object. */
266#define PDATA_APPEND(D, O, ER) do { \
267 Py_INCREF((O)); \
268 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
269
270static PyObject *
271Pdata_poptuple(Pdata *self, Py_ssize_t start)
272{
273 PyObject *tuple;
274 Py_ssize_t len, i, j;
275
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000276 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000277 tuple = PyTuple_New(len);
278 if (tuple == NULL)
279 return NULL;
280 for (i = start, j = 0; j < len; i++, j++)
281 PyTuple_SET_ITEM(tuple, j, self->data[i]);
282
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000283 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000284 return tuple;
285}
286
287static PyObject *
288Pdata_poplist(Pdata *self, Py_ssize_t start)
289{
290 PyObject *list;
291 Py_ssize_t len, i, j;
292
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000293 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000294 list = PyList_New(len);
295 if (list == NULL)
296 return NULL;
297 for (i = start, j = 0; j < len; i++, j++)
298 PyList_SET_ITEM(list, j, self->data[i]);
299
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000300 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000301 return list;
302}
303
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000304typedef struct {
305 PyObject *me_key;
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200306 Py_ssize_t me_value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000307} PyMemoEntry;
308
309typedef struct {
310 Py_ssize_t mt_mask;
311 Py_ssize_t mt_used;
312 Py_ssize_t mt_allocated;
313 PyMemoEntry *mt_table;
314} PyMemoTable;
315
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000316typedef struct PicklerObject {
317 PyObject_HEAD
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000318 PyMemoTable *memo; /* Memo table, keep track of the seen
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000319 objects to support self-referential objects
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000320 pickling. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000321 PyObject *pers_func; /* persistent_id() method, can be NULL */
322 PyObject *arg;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000323
324 PyObject *write; /* write() method of the output stream. */
325 PyObject *output_buffer; /* Write into a local bytearray buffer before
326 flushing to the stream. */
327 Py_ssize_t output_len; /* Length of output_buffer. */
328 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000329 int proto; /* Pickle protocol number, >= 0 */
330 int bin; /* Boolean, true if proto > 0 */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200331 Py_ssize_t buf_size; /* Size of the current buffered pickle data */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000332 int fast; /* Enable fast mode if set to a true value.
333 The fast mode disable the usage of memo,
334 therefore speeding the pickling process by
335 not generating superfluous PUT opcodes. It
336 should not be used if with self-referential
337 objects. */
338 int fast_nesting;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000339 int fix_imports; /* Indicate whether Pickler should fix
340 the name of globals for Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000341 PyObject *fast_memo;
342} PicklerObject;
343
344typedef struct UnpicklerObject {
345 PyObject_HEAD
346 Pdata *stack; /* Pickle data stack, store unpickled objects. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000347
348 /* The unpickler memo is just an array of PyObject *s. Using a dict
349 is unnecessary, since the keys are contiguous ints. */
350 PyObject **memo;
351 Py_ssize_t memo_size;
352
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000353 PyObject *arg;
354 PyObject *pers_func; /* persistent_load() method, can be NULL. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000355
356 Py_buffer buffer;
357 char *input_buffer;
358 char *input_line;
359 Py_ssize_t input_len;
360 Py_ssize_t next_read_idx;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000361 Py_ssize_t prefetched_idx; /* index of first prefetched byte */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000362 PyObject *read; /* read() method of the input stream. */
363 PyObject *readline; /* readline() method of the input stream. */
Antoine Pitrou04248a82010-10-12 20:51:21 +0000364 PyObject *peek; /* peek() method of the input stream, or NULL */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000365
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000366 char *encoding; /* Name of the encoding to be used for
367 decoding strings pickled using Python
368 2.x. The default value is "ASCII" */
369 char *errors; /* Name of errors handling scheme to used when
370 decoding strings. The default value is
371 "strict". */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200372 Py_ssize_t *marks; /* Mark stack, used for unpickling container
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000373 objects. */
374 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
375 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000376 int proto; /* Protocol of the pickle loaded. */
377 int fix_imports; /* Indicate whether Unpickler should fix
378 the name of globals pickled by Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000379} UnpicklerObject;
380
381/* Forward declarations */
382static int save(PicklerObject *, PyObject *, int);
383static int save_reduce(PicklerObject *, PyObject *, PyObject *);
384static PyTypeObject Pickler_Type;
385static PyTypeObject Unpickler_Type;
386
387
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000388/*************************************************************************
389 A custom hashtable mapping void* to longs. This is used by the pickler for
390 memoization. Using a custom hashtable rather than PyDict allows us to skip
391 a bunch of unnecessary object creation. This makes a huge performance
392 difference. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000393
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000394#define MT_MINSIZE 8
395#define PERTURB_SHIFT 5
396
397
398static PyMemoTable *
399PyMemoTable_New(void)
400{
401 PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
402 if (memo == NULL) {
403 PyErr_NoMemory();
404 return NULL;
405 }
406
407 memo->mt_used = 0;
408 memo->mt_allocated = MT_MINSIZE;
409 memo->mt_mask = MT_MINSIZE - 1;
410 memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
411 if (memo->mt_table == NULL) {
412 PyMem_FREE(memo);
413 PyErr_NoMemory();
414 return NULL;
415 }
416 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
417
418 return memo;
419}
420
421static PyMemoTable *
422PyMemoTable_Copy(PyMemoTable *self)
423{
424 Py_ssize_t i;
425 PyMemoTable *new = PyMemoTable_New();
426 if (new == NULL)
427 return NULL;
428
429 new->mt_used = self->mt_used;
430 new->mt_allocated = self->mt_allocated;
431 new->mt_mask = self->mt_mask;
432 /* The table we get from _New() is probably smaller than we wanted.
433 Free it and allocate one that's the right size. */
434 PyMem_FREE(new->mt_table);
435 new->mt_table = PyMem_MALLOC(self->mt_allocated * sizeof(PyMemoEntry));
436 if (new->mt_table == NULL) {
437 PyMem_FREE(new);
438 return NULL;
439 }
440 for (i = 0; i < self->mt_allocated; i++) {
441 Py_XINCREF(self->mt_table[i].me_key);
442 }
443 memcpy(new->mt_table, self->mt_table,
444 sizeof(PyMemoEntry) * self->mt_allocated);
445
446 return new;
447}
448
449static Py_ssize_t
450PyMemoTable_Size(PyMemoTable *self)
451{
452 return self->mt_used;
453}
454
455static int
456PyMemoTable_Clear(PyMemoTable *self)
457{
458 Py_ssize_t i = self->mt_allocated;
459
460 while (--i >= 0) {
461 Py_XDECREF(self->mt_table[i].me_key);
462 }
463 self->mt_used = 0;
464 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
465 return 0;
466}
467
468static void
469PyMemoTable_Del(PyMemoTable *self)
470{
471 if (self == NULL)
472 return;
473 PyMemoTable_Clear(self);
474
475 PyMem_FREE(self->mt_table);
476 PyMem_FREE(self);
477}
478
479/* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
480 can be considerably simpler than dictobject.c's lookdict(). */
481static PyMemoEntry *
482_PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
483{
484 size_t i;
485 size_t perturb;
486 size_t mask = (size_t)self->mt_mask;
487 PyMemoEntry *table = self->mt_table;
488 PyMemoEntry *entry;
Benjamin Peterson8f67d082010-10-17 20:54:53 +0000489 Py_hash_t hash = (Py_hash_t)key >> 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000490
491 i = hash & mask;
492 entry = &table[i];
493 if (entry->me_key == NULL || entry->me_key == key)
494 return entry;
495
496 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
497 i = (i << 2) + i + perturb + 1;
498 entry = &table[i & mask];
499 if (entry->me_key == NULL || entry->me_key == key)
500 return entry;
501 }
502 assert(0); /* Never reached */
503 return NULL;
504}
505
506/* Returns -1 on failure, 0 on success. */
507static int
508_PyMemoTable_ResizeTable(PyMemoTable *self, Py_ssize_t min_size)
509{
510 PyMemoEntry *oldtable = NULL;
511 PyMemoEntry *oldentry, *newentry;
512 Py_ssize_t new_size = MT_MINSIZE;
513 Py_ssize_t to_process;
514
515 assert(min_size > 0);
516
517 /* Find the smallest valid table size >= min_size. */
518 while (new_size < min_size && new_size > 0)
519 new_size <<= 1;
520 if (new_size <= 0) {
521 PyErr_NoMemory();
522 return -1;
523 }
524 /* new_size needs to be a power of two. */
525 assert((new_size & (new_size - 1)) == 0);
526
527 /* Allocate new table. */
528 oldtable = self->mt_table;
529 self->mt_table = PyMem_MALLOC(new_size * sizeof(PyMemoEntry));
530 if (self->mt_table == NULL) {
531 PyMem_FREE(oldtable);
532 PyErr_NoMemory();
533 return -1;
534 }
535 self->mt_allocated = new_size;
536 self->mt_mask = new_size - 1;
537 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
538
539 /* Copy entries from the old table. */
540 to_process = self->mt_used;
541 for (oldentry = oldtable; to_process > 0; oldentry++) {
542 if (oldentry->me_key != NULL) {
543 to_process--;
544 /* newentry is a pointer to a chunk of the new
545 mt_table, so we're setting the key:value pair
546 in-place. */
547 newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
548 newentry->me_key = oldentry->me_key;
549 newentry->me_value = oldentry->me_value;
550 }
551 }
552
553 /* Deallocate the old table. */
554 PyMem_FREE(oldtable);
555 return 0;
556}
557
558/* Returns NULL on failure, a pointer to the value otherwise. */
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200559static Py_ssize_t *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000560PyMemoTable_Get(PyMemoTable *self, PyObject *key)
561{
562 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
563 if (entry->me_key == NULL)
564 return NULL;
565 return &entry->me_value;
566}
567
568/* Returns -1 on failure, 0 on success. */
569static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200570PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000571{
572 PyMemoEntry *entry;
573
574 assert(key != NULL);
575
576 entry = _PyMemoTable_Lookup(self, key);
577 if (entry->me_key != NULL) {
578 entry->me_value = value;
579 return 0;
580 }
581 Py_INCREF(key);
582 entry->me_key = key;
583 entry->me_value = value;
584 self->mt_used++;
585
586 /* If we added a key, we can safely resize. Otherwise just return!
587 * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
588 *
589 * Quadrupling the size improves average table sparseness
590 * (reducing collisions) at the cost of some memory. It also halves
591 * the number of expensive resize operations in a growing memo table.
592 *
593 * Very large memo tables (over 50K items) use doubling instead.
594 * This may help applications with severe memory constraints.
595 */
596 if (!(self->mt_used * 3 >= (self->mt_mask + 1) * 2))
597 return 0;
598 return _PyMemoTable_ResizeTable(self,
599 (self->mt_used > 50000 ? 2 : 4) * self->mt_used);
600}
601
602#undef MT_MINSIZE
603#undef PERTURB_SHIFT
604
605/*************************************************************************/
606
607/* Helpers for creating the argument tuple passed to functions. This has the
608 performance advantage of calling PyTuple_New() only once.
609
610 XXX(avassalotti): Inline directly in _Pickler_FastCall() and
611 _Unpickler_FastCall(). */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000612#define ARG_TUP(self, obj) do { \
613 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
614 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
615 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
616 } \
617 else { \
618 Py_DECREF((obj)); \
619 } \
620 } while (0)
621
622#define FREE_ARG_TUP(self) do { \
623 if ((self)->arg->ob_refcnt > 1) \
624 Py_CLEAR((self)->arg); \
625 } while (0)
626
627/* A temporary cleaner API for fast single argument function call.
628
629 XXX: Does caching the argument tuple provides any real performance benefits?
630
631 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
632 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
633 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
634 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
635 (i.e, call PyTuple_New() and store the returned value in an array), to save
636 one second (wall clock time). Either ways, the loading time a pickle stream
637 large enough to generate this number of calls would be massively
638 overwhelmed by other factors, like I/O throughput, the GC traversal and
639 object allocation overhead. So, I really doubt these functions provide any
640 real benefits.
641
642 On the other hand, oprofile reports that pickle spends a lot of time in
643 these functions. But, that is probably more related to the function call
644 overhead, than the argument tuple allocation.
645
646 XXX: And, what is the reference behavior of these? Steal, borrow? At first
647 glance, it seems to steal the reference of 'arg' and borrow the reference
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000648 of 'func'. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000649static PyObject *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000650_Pickler_FastCall(PicklerObject *self, PyObject *func, PyObject *arg)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000651{
652 PyObject *result = NULL;
653
654 ARG_TUP(self, arg);
655 if (self->arg) {
656 result = PyObject_Call(func, self->arg, NULL);
657 FREE_ARG_TUP(self);
658 }
659 return result;
660}
661
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000662static int
663_Pickler_ClearBuffer(PicklerObject *self)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000664{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000665 Py_CLEAR(self->output_buffer);
666 self->output_buffer =
667 PyBytes_FromStringAndSize(NULL, self->max_output_len);
668 if (self->output_buffer == NULL)
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000669 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000670 self->output_len = 0;
671 return 0;
672}
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000673
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000674static PyObject *
675_Pickler_GetString(PicklerObject *self)
676{
677 PyObject *output_buffer = self->output_buffer;
678
679 assert(self->output_buffer != NULL);
680 self->output_buffer = NULL;
681 /* Resize down to exact size */
682 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
683 return NULL;
684 return output_buffer;
685}
686
687static int
688_Pickler_FlushToFile(PicklerObject *self)
689{
690 PyObject *output, *result;
691
692 assert(self->write != NULL);
693
694 output = _Pickler_GetString(self);
695 if (output == NULL)
696 return -1;
697
698 result = _Pickler_FastCall(self, self->write, output);
699 Py_XDECREF(result);
700 return (result == NULL) ? -1 : 0;
701}
702
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200703static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000704_Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t n)
705{
706 Py_ssize_t i, required;
707 char *buffer;
708
709 assert(s != NULL);
710
711 required = self->output_len + n;
712 if (required > self->max_output_len) {
713 if (self->write != NULL && required > MAX_WRITE_BUF_SIZE) {
714 /* XXX This reallocates a new buffer every time, which is a bit
715 wasteful. */
716 if (_Pickler_FlushToFile(self) < 0)
717 return -1;
718 if (_Pickler_ClearBuffer(self) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000719 return -1;
720 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000721 if (self->write != NULL && n > MAX_WRITE_BUF_SIZE) {
722 /* we already flushed above, so the buffer is empty */
723 PyObject *result;
724 /* XXX we could spare an intermediate copy and pass
725 a memoryview instead */
726 PyObject *output = PyBytes_FromStringAndSize(s, n);
727 if (s == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000728 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000729 result = _Pickler_FastCall(self, self->write, output);
730 Py_XDECREF(result);
731 return (result == NULL) ? -1 : 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000732 }
733 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000734 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
735 PyErr_NoMemory();
736 return -1;
737 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +0200738 self->max_output_len = (self->output_len + n) / 2 * 3;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000739 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
740 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000741 }
742 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000743 buffer = PyBytes_AS_STRING(self->output_buffer);
744 if (n < 8) {
745 /* This is faster than memcpy when the string is short. */
746 for (i = 0; i < n; i++) {
747 buffer[self->output_len + i] = s[i];
748 }
749 }
750 else {
751 memcpy(buffer + self->output_len, s, n);
752 }
753 self->output_len += n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000754 return n;
755}
756
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000757static PicklerObject *
758_Pickler_New(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000759{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000760 PicklerObject *self;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000761
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000762 self = PyObject_GC_New(PicklerObject, &Pickler_Type);
763 if (self == NULL)
764 return NULL;
765
766 self->pers_func = NULL;
767 self->arg = NULL;
768 self->write = NULL;
769 self->proto = 0;
770 self->bin = 0;
771 self->fast = 0;
772 self->fast_nesting = 0;
773 self->fix_imports = 0;
774 self->fast_memo = NULL;
775
776 self->memo = PyMemoTable_New();
777 if (self->memo == NULL) {
778 Py_DECREF(self);
779 return NULL;
780 }
781 self->max_output_len = WRITE_BUF_SIZE;
782 self->output_len = 0;
783 self->output_buffer = PyBytes_FromStringAndSize(NULL,
784 self->max_output_len);
785 if (self->output_buffer == NULL) {
786 Py_DECREF(self);
787 return NULL;
788 }
789 return self;
790}
791
792static int
793_Pickler_SetProtocol(PicklerObject *self, PyObject *proto_obj,
794 PyObject *fix_imports_obj)
795{
796 long proto = 0;
797 int fix_imports;
798
799 if (proto_obj == NULL || proto_obj == Py_None)
800 proto = DEFAULT_PROTOCOL;
801 else {
802 proto = PyLong_AsLong(proto_obj);
803 if (proto == -1 && PyErr_Occurred())
804 return -1;
805 }
806 if (proto < 0)
807 proto = HIGHEST_PROTOCOL;
808 if (proto > HIGHEST_PROTOCOL) {
809 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
810 HIGHEST_PROTOCOL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000811 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000812 }
813 fix_imports = PyObject_IsTrue(fix_imports_obj);
814 if (fix_imports == -1)
815 return -1;
816
817 self->proto = proto;
818 self->bin = proto > 0;
819 self->fix_imports = fix_imports && proto < 3;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000820
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000821 return 0;
822}
823
824/* Returns -1 (with an exception set) on failure, 0 on success. This may
825 be called once on a freshly created Pickler. */
826static int
827_Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
828{
829 assert(file != NULL);
830 self->write = PyObject_GetAttrString(file, "write");
831 if (self->write == NULL) {
832 if (PyErr_ExceptionMatches(PyExc_AttributeError))
833 PyErr_SetString(PyExc_TypeError,
834 "file must have a 'write' attribute");
835 return -1;
836 }
837
838 return 0;
839}
840
841/* See documentation for _Pickler_FastCall(). */
842static PyObject *
843_Unpickler_FastCall(UnpicklerObject *self, PyObject *func, PyObject *arg)
844{
845 PyObject *result = NULL;
846
847 ARG_TUP(self, arg);
848 if (self->arg) {
849 result = PyObject_Call(func, self->arg, NULL);
850 FREE_ARG_TUP(self);
851 }
852 return result;
853}
854
855/* Returns the size of the input on success, -1 on failure. This takes its
856 own reference to `input`. */
857static Py_ssize_t
858_Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
859{
860 if (self->buffer.buf != NULL)
861 PyBuffer_Release(&self->buffer);
862 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
863 return -1;
864 self->input_buffer = self->buffer.buf;
865 self->input_len = self->buffer.len;
866 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000867 self->prefetched_idx = self->input_len;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000868 return self->input_len;
869}
870
Antoine Pitrou04248a82010-10-12 20:51:21 +0000871static int
872_Unpickler_SkipConsumed(UnpicklerObject *self)
873{
874 Py_ssize_t consumed = self->next_read_idx - self->prefetched_idx;
875
876 if (consumed > 0) {
877 PyObject *r;
878 assert(self->peek); /* otherwise we did something wrong */
879 /* This makes an useless copy... */
880 r = PyObject_CallFunction(self->read, "n", consumed);
881 if (r == NULL)
882 return -1;
883 Py_DECREF(r);
884 self->prefetched_idx = self->next_read_idx;
885 }
886 return 0;
887}
888
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000889static const Py_ssize_t READ_WHOLE_LINE = -1;
890
891/* If reading from a file, we need to only pull the bytes we need, since there
892 may be multiple pickle objects arranged contiguously in the same input
893 buffer.
894
895 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
896 bytes from the input stream/buffer.
897
898 Update the unpickler's input buffer with the newly-read data. Returns -1 on
899 failure; on success, returns the number of bytes read from the file.
900
901 On success, self->input_len will be 0; this is intentional so that when
902 unpickling from a file, the "we've run out of data" code paths will trigger,
903 causing the Unpickler to go back to the file for more data. Use the returned
904 size to tell you how much data you can process. */
905static Py_ssize_t
906_Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
907{
908 PyObject *data;
Antoine Pitrou04248a82010-10-12 20:51:21 +0000909 Py_ssize_t read_size, prefetched_size = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000910
911 assert(self->read != NULL);
Antoine Pitrou04248a82010-10-12 20:51:21 +0000912
913 if (_Unpickler_SkipConsumed(self) < 0)
914 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000915
916 if (n == READ_WHOLE_LINE)
917 data = PyObject_Call(self->readline, empty_tuple, NULL);
918 else {
919 PyObject *len = PyLong_FromSsize_t(n);
920 if (len == NULL)
921 return -1;
922 data = _Unpickler_FastCall(self, self->read, len);
923 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000924 if (data == NULL)
925 return -1;
926
Antoine Pitrou04248a82010-10-12 20:51:21 +0000927 /* Prefetch some data without advancing the file pointer, if possible */
928 if (self->peek) {
929 PyObject *len, *prefetched;
930 len = PyLong_FromSsize_t(PREFETCH);
931 if (len == NULL) {
932 Py_DECREF(data);
933 return -1;
934 }
935 prefetched = _Unpickler_FastCall(self, self->peek, len);
936 if (prefetched == NULL) {
937 if (PyErr_ExceptionMatches(PyExc_NotImplementedError)) {
938 /* peek() is probably not supported by the given file object */
939 PyErr_Clear();
940 Py_CLEAR(self->peek);
941 }
942 else {
943 Py_DECREF(data);
944 return -1;
945 }
946 }
947 else {
948 assert(PyBytes_Check(prefetched));
949 prefetched_size = PyBytes_GET_SIZE(prefetched);
950 PyBytes_ConcatAndDel(&data, prefetched);
951 if (data == NULL)
952 return -1;
953 }
954 }
955
956 read_size = _Unpickler_SetStringInput(self, data) - prefetched_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000957 Py_DECREF(data);
Antoine Pitrou04248a82010-10-12 20:51:21 +0000958 self->prefetched_idx = read_size;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000959 return read_size;
960}
961
962/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
963
964 This should be used for all data reads, rather than accessing the unpickler's
965 input buffer directly. This method deals correctly with reading from input
966 streams, which the input buffer doesn't deal with.
967
968 Note that when reading from a file-like object, self->next_read_idx won't
969 be updated (it should remain at 0 for the entire unpickling process). You
970 should use this function's return value to know how many bytes you can
971 consume.
972
973 Returns -1 (with an exception set) on failure. On success, return the
974 number of chars read. */
975static Py_ssize_t
976_Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n)
977{
Antoine Pitrou04248a82010-10-12 20:51:21 +0000978 Py_ssize_t num_read;
979
Antoine Pitrou04248a82010-10-12 20:51:21 +0000980 if (self->next_read_idx + n <= self->input_len) {
981 *s = self->input_buffer + self->next_read_idx;
982 self->next_read_idx += n;
983 return n;
984 }
985 if (!self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000986 PyErr_Format(PyExc_EOFError, "Ran out of input");
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000987 return -1;
988 }
Antoine Pitrou04248a82010-10-12 20:51:21 +0000989 num_read = _Unpickler_ReadFromFile(self, n);
990 if (num_read < 0)
991 return -1;
992 if (num_read < n) {
993 PyErr_Format(PyExc_EOFError, "Ran out of input");
994 return -1;
995 }
996 *s = self->input_buffer;
997 self->next_read_idx = n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000998 return n;
999}
1000
1001static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001002_Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
1003 char **result)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001004{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001005 char *input_line = PyMem_Realloc(self->input_line, len + 1);
1006 if (input_line == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001007 return -1;
1008
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001009 memcpy(input_line, line, len);
1010 input_line[len] = '\0';
1011 self->input_line = input_line;
1012 *result = self->input_line;
1013 return len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001014}
1015
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001016/* Read a line from the input stream/buffer. If we run off the end of the input
1017 before hitting \n, return the data we found.
1018
1019 Returns the number of chars read, or -1 on failure. */
1020static Py_ssize_t
1021_Unpickler_Readline(UnpicklerObject *self, char **result)
1022{
1023 Py_ssize_t i, num_read;
1024
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001025 for (i = self->next_read_idx; i < self->input_len; i++) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001026 if (self->input_buffer[i] == '\n') {
1027 char *line_start = self->input_buffer + self->next_read_idx;
1028 num_read = i - self->next_read_idx + 1;
1029 self->next_read_idx = i + 1;
1030 return _Unpickler_CopyLine(self, line_start, num_read, result);
1031 }
1032 }
1033 if (self->read) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001034 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
1035 if (num_read < 0)
1036 return -1;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001037 self->next_read_idx = num_read;
Antoine Pitrouf6c7a852011-08-11 21:04:02 +02001038 return _Unpickler_CopyLine(self, self->input_buffer, num_read, result);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001039 }
1040
1041 /* If we get here, we've run off the end of the input string. Return the
1042 remaining string and let the caller figure it out. */
1043 *result = self->input_buffer + self->next_read_idx;
1044 num_read = i - self->next_read_idx;
1045 self->next_read_idx = i;
1046 return num_read;
1047}
1048
1049/* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1050 will be modified in place. */
1051static int
1052_Unpickler_ResizeMemoList(UnpicklerObject *self, Py_ssize_t new_size)
1053{
1054 Py_ssize_t i;
1055 PyObject **memo;
1056
1057 assert(new_size > self->memo_size);
1058
1059 memo = PyMem_REALLOC(self->memo, new_size * sizeof(PyObject *));
1060 if (memo == NULL) {
1061 PyErr_NoMemory();
1062 return -1;
1063 }
1064 self->memo = memo;
1065 for (i = self->memo_size; i < new_size; i++)
1066 self->memo[i] = NULL;
1067 self->memo_size = new_size;
1068 return 0;
1069}
1070
1071/* Returns NULL if idx is out of bounds. */
1072static PyObject *
1073_Unpickler_MemoGet(UnpicklerObject *self, Py_ssize_t idx)
1074{
1075 if (idx < 0 || idx >= self->memo_size)
1076 return NULL;
1077
1078 return self->memo[idx];
1079}
1080
1081/* Returns -1 (with an exception set) on failure, 0 on success.
1082 This takes its own reference to `value`. */
1083static int
1084_Unpickler_MemoPut(UnpicklerObject *self, Py_ssize_t idx, PyObject *value)
1085{
1086 PyObject *old_item;
1087
1088 if (idx >= self->memo_size) {
1089 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1090 return -1;
1091 assert(idx < self->memo_size);
1092 }
1093 Py_INCREF(value);
1094 old_item = self->memo[idx];
1095 self->memo[idx] = value;
1096 Py_XDECREF(old_item);
1097 return 0;
1098}
1099
1100static PyObject **
1101_Unpickler_NewMemo(Py_ssize_t new_size)
1102{
1103 PyObject **memo = PyMem_MALLOC(new_size * sizeof(PyObject *));
1104 if (memo == NULL)
1105 return NULL;
1106 memset(memo, 0, new_size * sizeof(PyObject *));
1107 return memo;
1108}
1109
1110/* Free the unpickler's memo, taking care to decref any items left in it. */
1111static void
1112_Unpickler_MemoCleanup(UnpicklerObject *self)
1113{
1114 Py_ssize_t i;
1115 PyObject **memo = self->memo;
1116
1117 if (self->memo == NULL)
1118 return;
1119 self->memo = NULL;
1120 i = self->memo_size;
1121 while (--i >= 0) {
1122 Py_XDECREF(memo[i]);
1123 }
1124 PyMem_FREE(memo);
1125}
1126
1127static UnpicklerObject *
1128_Unpickler_New(void)
1129{
1130 UnpicklerObject *self;
1131
1132 self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1133 if (self == NULL)
1134 return NULL;
1135
1136 self->stack = (Pdata *)Pdata_New();
1137 if (self->stack == NULL) {
1138 Py_DECREF(self);
1139 return NULL;
1140 }
1141 memset(&self->buffer, 0, sizeof(Py_buffer));
1142
1143 self->memo_size = 32;
1144 self->memo = _Unpickler_NewMemo(self->memo_size);
1145 if (self->memo == NULL) {
1146 Py_DECREF(self);
1147 return NULL;
1148 }
1149
1150 self->arg = NULL;
1151 self->pers_func = NULL;
1152 self->input_buffer = NULL;
1153 self->input_line = NULL;
1154 self->input_len = 0;
1155 self->next_read_idx = 0;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001156 self->prefetched_idx = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001157 self->read = NULL;
1158 self->readline = NULL;
Antoine Pitrou04248a82010-10-12 20:51:21 +00001159 self->peek = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001160 self->encoding = NULL;
1161 self->errors = NULL;
1162 self->marks = NULL;
1163 self->num_marks = 0;
1164 self->marks_size = 0;
1165 self->proto = 0;
1166 self->fix_imports = 0;
1167
1168 return self;
1169}
1170
1171/* Returns -1 (with an exception set) on failure, 0 on success. This may
1172 be called once on a freshly created Pickler. */
1173static int
1174_Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1175{
Antoine Pitrou04248a82010-10-12 20:51:21 +00001176 self->peek = PyObject_GetAttrString(file, "peek");
1177 if (self->peek == NULL) {
1178 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1179 PyErr_Clear();
1180 else
1181 return -1;
1182 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001183 self->read = PyObject_GetAttrString(file, "read");
1184 self->readline = PyObject_GetAttrString(file, "readline");
1185 if (self->readline == NULL || self->read == NULL) {
1186 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1187 PyErr_SetString(PyExc_TypeError,
1188 "file must have 'read' and 'readline' attributes");
1189 Py_CLEAR(self->read);
1190 Py_CLEAR(self->readline);
Antoine Pitrou04248a82010-10-12 20:51:21 +00001191 Py_CLEAR(self->peek);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001192 return -1;
1193 }
1194 return 0;
1195}
1196
1197/* Returns -1 (with an exception set) on failure, 0 on success. This may
1198 be called once on a freshly created Pickler. */
1199static int
1200_Unpickler_SetInputEncoding(UnpicklerObject *self,
1201 const char *encoding,
1202 const char *errors)
1203{
1204 if (encoding == NULL)
1205 encoding = "ASCII";
1206 if (errors == NULL)
1207 errors = "strict";
1208
1209 self->encoding = strdup(encoding);
1210 self->errors = strdup(errors);
1211 if (self->encoding == NULL || self->errors == NULL) {
1212 PyErr_NoMemory();
1213 return -1;
1214 }
1215 return 0;
1216}
1217
1218/* Generate a GET opcode for an object stored in the memo. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001219static int
1220memo_get(PicklerObject *self, PyObject *key)
1221{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001222 Py_ssize_t *value;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001223 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001224 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001225
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001226 value = PyMemoTable_Get(self->memo, key);
1227 if (value == NULL) {
1228 PyErr_SetObject(PyExc_KeyError, key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001229 return -1;
1230 }
1231
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001232 if (!self->bin) {
1233 pdata[0] = GET;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001234 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1235 "%" PY_FORMAT_SIZE_T "d\n", *value);
1236 len = strlen(pdata);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001237 }
1238 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001239 if (*value < 256) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001240 pdata[0] = BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001241 pdata[1] = (unsigned char)(*value & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001242 len = 2;
1243 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001244 else if (*value <= 0xffffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001245 pdata[0] = LONG_BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001246 pdata[1] = (unsigned char)(*value & 0xff);
1247 pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1248 pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1249 pdata[4] = (unsigned char)((*value >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001250 len = 5;
1251 }
1252 else { /* unlikely */
1253 PyErr_SetString(PicklingError,
1254 "memo id too large for LONG_BINGET");
1255 return -1;
1256 }
1257 }
1258
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001259 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001260 return -1;
1261
1262 return 0;
1263}
1264
1265/* Store an object in the memo, assign it a new unique ID based on the number
1266 of objects currently stored in the memo and generate a PUT opcode. */
1267static int
1268memo_put(PicklerObject *self, PyObject *obj)
1269{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001270 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001271 char pdata[30];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001272 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001273 int status = 0;
1274
1275 if (self->fast)
1276 return 0;
1277
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001278 x = PyMemoTable_Size(self->memo);
1279 if (PyMemoTable_Set(self->memo, obj, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001280 goto error;
1281
1282 if (!self->bin) {
1283 pdata[0] = PUT;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001284 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1,
1285 "%" PY_FORMAT_SIZE_T "d\n", x);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001286 len = strlen(pdata);
1287 }
1288 else {
1289 if (x < 256) {
1290 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00001291 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001292 len = 2;
1293 }
1294 else if (x <= 0xffffffffL) {
1295 pdata[0] = LONG_BINPUT;
1296 pdata[1] = (unsigned char)(x & 0xff);
1297 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1298 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1299 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1300 len = 5;
1301 }
1302 else { /* unlikely */
1303 PyErr_SetString(PicklingError,
1304 "memo id too large for LONG_BINPUT");
1305 return -1;
1306 }
1307 }
1308
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001309 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001310 goto error;
1311
1312 if (0) {
1313 error:
1314 status = -1;
1315 }
1316
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001317 return status;
1318}
1319
1320static PyObject *
1321whichmodule(PyObject *global, PyObject *global_name)
1322{
1323 Py_ssize_t i, j;
1324 static PyObject *module_str = NULL;
1325 static PyObject *main_str = NULL;
1326 PyObject *module_name;
1327 PyObject *modules_dict;
1328 PyObject *module;
1329 PyObject *obj;
1330
1331 if (module_str == NULL) {
1332 module_str = PyUnicode_InternFromString("__module__");
1333 if (module_str == NULL)
1334 return NULL;
1335 main_str = PyUnicode_InternFromString("__main__");
1336 if (main_str == NULL)
1337 return NULL;
1338 }
1339
1340 module_name = PyObject_GetAttr(global, module_str);
1341
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00001342 /* In some rare cases (e.g., bound methods of extension types),
1343 __module__ can be None. If it is so, then search sys.modules
1344 for the module of global. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001345 if (module_name == Py_None) {
1346 Py_DECREF(module_name);
1347 goto search;
1348 }
1349
1350 if (module_name) {
1351 return module_name;
1352 }
1353 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1354 PyErr_Clear();
1355 else
1356 return NULL;
1357
1358 search:
1359 modules_dict = PySys_GetObject("modules");
1360 if (modules_dict == NULL)
1361 return NULL;
1362
1363 i = 0;
1364 module_name = NULL;
1365 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
Mark Dickinson211c6252009-02-01 10:28:51 +00001366 if (PyObject_RichCompareBool(module_name, main_str, Py_EQ) == 1)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001367 continue;
1368
1369 obj = PyObject_GetAttr(module, global_name);
1370 if (obj == NULL) {
1371 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1372 PyErr_Clear();
1373 else
1374 return NULL;
1375 continue;
1376 }
1377
1378 if (obj != global) {
1379 Py_DECREF(obj);
1380 continue;
1381 }
1382
1383 Py_DECREF(obj);
1384 break;
1385 }
1386
1387 /* If no module is found, use __main__. */
1388 if (!j) {
1389 module_name = main_str;
1390 }
1391
1392 Py_INCREF(module_name);
1393 return module_name;
1394}
1395
1396/* fast_save_enter() and fast_save_leave() are guards against recursive
1397 objects when Pickler is used with the "fast mode" (i.e., with object
1398 memoization disabled). If the nesting of a list or dict object exceed
1399 FAST_NESTING_LIMIT, these guards will start keeping an internal
1400 reference to the seen list or dict objects and check whether these objects
1401 are recursive. These are not strictly necessary, since save() has a
1402 hard-coded recursion limit, but they give a nicer error message than the
1403 typical RuntimeError. */
1404static int
1405fast_save_enter(PicklerObject *self, PyObject *obj)
1406{
1407 /* if fast_nesting < 0, we're doing an error exit. */
1408 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1409 PyObject *key = NULL;
1410 if (self->fast_memo == NULL) {
1411 self->fast_memo = PyDict_New();
1412 if (self->fast_memo == NULL) {
1413 self->fast_nesting = -1;
1414 return 0;
1415 }
1416 }
1417 key = PyLong_FromVoidPtr(obj);
1418 if (key == NULL)
1419 return 0;
1420 if (PyDict_GetItem(self->fast_memo, key)) {
1421 Py_DECREF(key);
1422 PyErr_Format(PyExc_ValueError,
1423 "fast mode: can't pickle cyclic objects "
1424 "including object type %.200s at %p",
1425 obj->ob_type->tp_name, obj);
1426 self->fast_nesting = -1;
1427 return 0;
1428 }
1429 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
1430 Py_DECREF(key);
1431 self->fast_nesting = -1;
1432 return 0;
1433 }
1434 Py_DECREF(key);
1435 }
1436 return 1;
1437}
1438
1439static int
1440fast_save_leave(PicklerObject *self, PyObject *obj)
1441{
1442 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
1443 PyObject *key = PyLong_FromVoidPtr(obj);
1444 if (key == NULL)
1445 return 0;
1446 if (PyDict_DelItem(self->fast_memo, key) < 0) {
1447 Py_DECREF(key);
1448 return 0;
1449 }
1450 Py_DECREF(key);
1451 }
1452 return 1;
1453}
1454
1455static int
1456save_none(PicklerObject *self, PyObject *obj)
1457{
1458 const char none_op = NONE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001459 if (_Pickler_Write(self, &none_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001460 return -1;
1461
1462 return 0;
1463}
1464
1465static int
1466save_bool(PicklerObject *self, PyObject *obj)
1467{
1468 static const char *buf[2] = { FALSE, TRUE };
1469 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
1470 int p = (obj == Py_True);
1471
1472 if (self->proto >= 2) {
1473 const char bool_op = p ? NEWTRUE : NEWFALSE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001474 if (_Pickler_Write(self, &bool_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001475 return -1;
1476 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001477 else if (_Pickler_Write(self, buf[p], len[p]) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001478 return -1;
1479
1480 return 0;
1481}
1482
1483static int
1484save_int(PicklerObject *self, long x)
1485{
1486 char pdata[32];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001487 Py_ssize_t len = 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001488
1489 if (!self->bin
1490#if SIZEOF_LONG > 4
1491 || x > 0x7fffffffL || x < -0x80000000L
1492#endif
1493 ) {
1494 /* Text-mode pickle, or long too big to fit in the 4-byte
1495 * signed BININT format: store as a string.
1496 */
Mark Dickinson8dd05142009-01-20 20:43:58 +00001497 pdata[0] = LONG; /* use LONG for consistency with pickle.py */
1498 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ldL\n", x);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001499 if (_Pickler_Write(self, pdata, strlen(pdata)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001500 return -1;
1501 }
1502 else {
1503 /* Binary pickle and x fits in a signed 4-byte int. */
1504 pdata[1] = (unsigned char)(x & 0xff);
1505 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1506 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1507 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1508
1509 if ((pdata[4] == 0) && (pdata[3] == 0)) {
1510 if (pdata[2] == 0) {
1511 pdata[0] = BININT1;
1512 len = 2;
1513 }
1514 else {
1515 pdata[0] = BININT2;
1516 len = 3;
1517 }
1518 }
1519 else {
1520 pdata[0] = BININT;
1521 len = 5;
1522 }
1523
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001524 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001525 return -1;
1526 }
1527
1528 return 0;
1529}
1530
1531static int
1532save_long(PicklerObject *self, PyObject *obj)
1533{
1534 PyObject *repr = NULL;
1535 Py_ssize_t size;
1536 long val = PyLong_AsLong(obj);
1537 int status = 0;
1538
1539 const char long_op = LONG;
1540
1541 if (val == -1 && PyErr_Occurred()) {
1542 /* out of range for int pickling */
1543 PyErr_Clear();
1544 }
1545 else
1546 return save_int(self, val);
1547
1548 if (self->proto >= 2) {
1549 /* Linear-time pickling. */
1550 size_t nbits;
1551 size_t nbytes;
1552 unsigned char *pdata;
1553 char header[5];
1554 int i;
1555 int sign = _PyLong_Sign(obj);
1556
1557 if (sign == 0) {
1558 header[0] = LONG1;
1559 header[1] = 0; /* It's 0 -- an empty bytestring. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001560 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001561 goto error;
1562 return 0;
1563 }
1564 nbits = _PyLong_NumBits(obj);
1565 if (nbits == (size_t)-1 && PyErr_Occurred())
1566 goto error;
1567 /* How many bytes do we need? There are nbits >> 3 full
1568 * bytes of data, and nbits & 7 leftover bits. If there
1569 * are any leftover bits, then we clearly need another
1570 * byte. Wnat's not so obvious is that we *probably*
1571 * need another byte even if there aren't any leftovers:
1572 * the most-significant bit of the most-significant byte
1573 * acts like a sign bit, and it's usually got a sense
1574 * opposite of the one we need. The exception is longs
1575 * of the form -(2**(8*j-1)) for j > 0. Such a long is
1576 * its own 256's-complement, so has the right sign bit
1577 * even without the extra byte. That's a pain to check
1578 * for in advance, though, so we always grab an extra
1579 * byte at the start, and cut it back later if possible.
1580 */
1581 nbytes = (nbits >> 3) + 1;
1582 if (nbytes > INT_MAX) {
1583 PyErr_SetString(PyExc_OverflowError,
1584 "long too large to pickle");
1585 goto error;
1586 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001587 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001588 if (repr == NULL)
1589 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001590 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001591 i = _PyLong_AsByteArray((PyLongObject *)obj,
1592 pdata, nbytes,
1593 1 /* little endian */ , 1 /* signed */ );
1594 if (i < 0)
1595 goto error;
1596 /* If the long is negative, this may be a byte more than
1597 * needed. This is so iff the MSB is all redundant sign
1598 * bits.
1599 */
1600 if (sign < 0 &&
1601 nbytes > 1 &&
1602 pdata[nbytes - 1] == 0xff &&
1603 (pdata[nbytes - 2] & 0x80) != 0) {
1604 nbytes--;
1605 }
1606
1607 if (nbytes < 256) {
1608 header[0] = LONG1;
1609 header[1] = (unsigned char)nbytes;
1610 size = 2;
1611 }
1612 else {
1613 header[0] = LONG4;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001614 size = (Py_ssize_t) nbytes;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001615 for (i = 1; i < 5; i++) {
1616 header[i] = (unsigned char)(size & 0xff);
1617 size >>= 8;
1618 }
1619 size = 5;
1620 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001621 if (_Pickler_Write(self, header, size) < 0 ||
1622 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001623 goto error;
1624 }
1625 else {
1626 char *string;
1627
Mark Dickinson8dd05142009-01-20 20:43:58 +00001628 /* proto < 2: write the repr and newline. This is quadratic-time (in
1629 the number of digits), in both directions. We add a trailing 'L'
1630 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001631
1632 repr = PyObject_Repr(obj);
1633 if (repr == NULL)
1634 goto error;
1635
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001636 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001637 if (string == NULL)
1638 goto error;
1639
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001640 if (_Pickler_Write(self, &long_op, 1) < 0 ||
1641 _Pickler_Write(self, string, size) < 0 ||
1642 _Pickler_Write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001643 goto error;
1644 }
1645
1646 if (0) {
1647 error:
1648 status = -1;
1649 }
1650 Py_XDECREF(repr);
1651
1652 return status;
1653}
1654
1655static int
1656save_float(PicklerObject *self, PyObject *obj)
1657{
1658 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1659
1660 if (self->bin) {
1661 char pdata[9];
1662 pdata[0] = BINFLOAT;
1663 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1664 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001665 if (_Pickler_Write(self, pdata, 9) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001666 return -1;
Eric Smith0923d1d2009-04-16 20:16:10 +00001667 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001668 else {
Eric Smith0923d1d2009-04-16 20:16:10 +00001669 int result = -1;
1670 char *buf = NULL;
1671 char op = FLOAT;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001672
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001673 if (_Pickler_Write(self, &op, 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001674 goto done;
1675
Mark Dickinson3e09f432009-04-17 08:41:23 +00001676 buf = PyOS_double_to_string(x, 'g', 17, 0, NULL);
Eric Smith0923d1d2009-04-16 20:16:10 +00001677 if (!buf) {
1678 PyErr_NoMemory();
1679 goto done;
1680 }
1681
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001682 if (_Pickler_Write(self, buf, strlen(buf)) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001683 goto done;
1684
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001685 if (_Pickler_Write(self, "\n", 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001686 goto done;
1687
1688 result = 0;
1689done:
1690 PyMem_Free(buf);
1691 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001692 }
1693
1694 return 0;
1695}
1696
1697static int
1698save_bytes(PicklerObject *self, PyObject *obj)
1699{
1700 if (self->proto < 3) {
1701 /* Older pickle protocols do not have an opcode for pickling bytes
1702 objects. Therefore, we need to fake the copy protocol (i.e.,
1703 the __reduce__ method) to permit bytes object unpickling. */
1704 PyObject *reduce_value = NULL;
1705 PyObject *bytelist = NULL;
1706 int status;
1707
1708 bytelist = PySequence_List(obj);
1709 if (bytelist == NULL)
1710 return -1;
1711
1712 reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
1713 bytelist);
1714 if (reduce_value == NULL) {
1715 Py_DECREF(bytelist);
1716 return -1;
1717 }
1718
1719 /* save_reduce() will memoize the object automatically. */
1720 status = save_reduce(self, reduce_value, obj);
1721 Py_DECREF(reduce_value);
1722 Py_DECREF(bytelist);
1723 return status;
1724 }
1725 else {
1726 Py_ssize_t size;
1727 char header[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001728 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001729
1730 size = PyBytes_Size(obj);
1731 if (size < 0)
1732 return -1;
1733
1734 if (size < 256) {
1735 header[0] = SHORT_BINBYTES;
1736 header[1] = (unsigned char)size;
1737 len = 2;
1738 }
1739 else if (size <= 0xffffffffL) {
1740 header[0] = BINBYTES;
1741 header[1] = (unsigned char)(size & 0xff);
1742 header[2] = (unsigned char)((size >> 8) & 0xff);
1743 header[3] = (unsigned char)((size >> 16) & 0xff);
1744 header[4] = (unsigned char)((size >> 24) & 0xff);
1745 len = 5;
1746 }
1747 else {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001748 PyErr_SetString(PyExc_OverflowError,
1749 "cannot serialize a bytes object larger than 4GB");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001750 return -1; /* string too large */
1751 }
1752
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001753 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001754 return -1;
1755
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001756 if (_Pickler_Write(self, PyBytes_AS_STRING(obj), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001757 return -1;
1758
1759 if (memo_put(self, obj) < 0)
1760 return -1;
1761
1762 return 0;
1763 }
1764}
1765
1766/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1767 backslash and newline characters to \uXXXX escapes. */
1768static PyObject *
1769raw_unicode_escape(const Py_UNICODE *s, Py_ssize_t size)
1770{
1771 PyObject *repr, *result;
1772 char *p;
1773 char *q;
1774
1775 static const char *hexdigits = "0123456789abcdef";
1776
1777#ifdef Py_UNICODE_WIDE
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001778 const Py_ssize_t expandsize = 10;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001779#else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001780 const Py_ssize_t expandsize = 6;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001781#endif
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001782
1783 if (size > PY_SSIZE_T_MAX / expandsize)
1784 return PyErr_NoMemory();
1785
1786 repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001787 if (repr == NULL)
1788 return NULL;
1789 if (size == 0)
1790 goto done;
1791
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001792 p = q = PyByteArray_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001793 while (size-- > 0) {
1794 Py_UNICODE ch = *s++;
1795#ifdef Py_UNICODE_WIDE
1796 /* Map 32-bit characters to '\Uxxxxxxxx' */
1797 if (ch >= 0x10000) {
1798 *p++ = '\\';
1799 *p++ = 'U';
1800 *p++ = hexdigits[(ch >> 28) & 0xf];
1801 *p++ = hexdigits[(ch >> 24) & 0xf];
1802 *p++ = hexdigits[(ch >> 20) & 0xf];
1803 *p++ = hexdigits[(ch >> 16) & 0xf];
1804 *p++ = hexdigits[(ch >> 12) & 0xf];
1805 *p++ = hexdigits[(ch >> 8) & 0xf];
1806 *p++ = hexdigits[(ch >> 4) & 0xf];
1807 *p++ = hexdigits[ch & 15];
1808 }
1809 else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001810#else
1811 /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
1812 if (ch >= 0xD800 && ch < 0xDC00) {
1813 Py_UNICODE ch2;
1814 Py_UCS4 ucs;
1815
1816 ch2 = *s++;
1817 size--;
1818 if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
1819 ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
1820 *p++ = '\\';
1821 *p++ = 'U';
1822 *p++ = hexdigits[(ucs >> 28) & 0xf];
1823 *p++ = hexdigits[(ucs >> 24) & 0xf];
1824 *p++ = hexdigits[(ucs >> 20) & 0xf];
1825 *p++ = hexdigits[(ucs >> 16) & 0xf];
1826 *p++ = hexdigits[(ucs >> 12) & 0xf];
1827 *p++ = hexdigits[(ucs >> 8) & 0xf];
1828 *p++ = hexdigits[(ucs >> 4) & 0xf];
1829 *p++ = hexdigits[ucs & 0xf];
1830 continue;
1831 }
1832 /* Fall through: isolated surrogates are copied as-is */
1833 s--;
1834 size++;
1835 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001836#endif
1837 /* Map 16-bit characters to '\uxxxx' */
1838 if (ch >= 256 || ch == '\\' || ch == '\n') {
1839 *p++ = '\\';
1840 *p++ = 'u';
1841 *p++ = hexdigits[(ch >> 12) & 0xf];
1842 *p++ = hexdigits[(ch >> 8) & 0xf];
1843 *p++ = hexdigits[(ch >> 4) & 0xf];
1844 *p++ = hexdigits[ch & 15];
1845 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001846 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001847 else
1848 *p++ = (char) ch;
1849 }
1850 size = p - q;
1851
1852 done:
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001853 result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001854 Py_DECREF(repr);
1855 return result;
1856}
1857
1858static int
1859save_unicode(PicklerObject *self, PyObject *obj)
1860{
1861 Py_ssize_t size;
1862 PyObject *encoded = NULL;
1863
1864 if (self->bin) {
1865 char pdata[5];
1866
Victor Stinner485fb562010-04-13 11:07:24 +00001867 encoded = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(obj),
1868 PyUnicode_GET_SIZE(obj),
1869 "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001870 if (encoded == NULL)
1871 goto error;
1872
1873 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001874 if (size > 0xffffffffL) {
1875 PyErr_SetString(PyExc_OverflowError,
1876 "cannot serialize a string larger than 4GB");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001877 goto error; /* string too large */
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001878 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001879
1880 pdata[0] = BINUNICODE;
1881 pdata[1] = (unsigned char)(size & 0xff);
1882 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1883 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1884 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1885
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001886 if (_Pickler_Write(self, pdata, 5) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001887 goto error;
1888
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001889 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001890 goto error;
1891 }
1892 else {
1893 const char unicode_op = UNICODE;
1894
1895 encoded = raw_unicode_escape(PyUnicode_AS_UNICODE(obj),
1896 PyUnicode_GET_SIZE(obj));
1897 if (encoded == NULL)
1898 goto error;
1899
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001900 if (_Pickler_Write(self, &unicode_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001901 goto error;
1902
1903 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001904 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001905 goto error;
1906
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001907 if (_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001908 goto error;
1909 }
1910 if (memo_put(self, obj) < 0)
1911 goto error;
1912
1913 Py_DECREF(encoded);
1914 return 0;
1915
1916 error:
1917 Py_XDECREF(encoded);
1918 return -1;
1919}
1920
1921/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1922static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001923store_tuple_elements(PicklerObject *self, PyObject *t, Py_ssize_t len)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001924{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001925 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001926
1927 assert(PyTuple_Size(t) == len);
1928
1929 for (i = 0; i < len; i++) {
1930 PyObject *element = PyTuple_GET_ITEM(t, i);
1931
1932 if (element == NULL)
1933 return -1;
1934 if (save(self, element, 0) < 0)
1935 return -1;
1936 }
1937
1938 return 0;
1939}
1940
1941/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1942 * used across protocols to minimize the space needed to pickle them.
1943 * Tuples are also the only builtin immutable type that can be recursive
1944 * (a tuple can be reached from itself), and that requires some subtle
1945 * magic so that it works in all cases. IOW, this is a long routine.
1946 */
1947static int
1948save_tuple(PicklerObject *self, PyObject *obj)
1949{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02001950 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001951
1952 const char mark_op = MARK;
1953 const char tuple_op = TUPLE;
1954 const char pop_op = POP;
1955 const char pop_mark_op = POP_MARK;
1956 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
1957
1958 if ((len = PyTuple_Size(obj)) < 0)
1959 return -1;
1960
1961 if (len == 0) {
1962 char pdata[2];
1963
1964 if (self->proto) {
1965 pdata[0] = EMPTY_TUPLE;
1966 len = 1;
1967 }
1968 else {
1969 pdata[0] = MARK;
1970 pdata[1] = TUPLE;
1971 len = 2;
1972 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001973 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001974 return -1;
1975 return 0;
1976 }
1977
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001978 /* The tuple isn't in the memo now. If it shows up there after
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001979 * saving the tuple elements, the tuple must be recursive, in
1980 * which case we'll pop everything we put on the stack, and fetch
1981 * its value from the memo.
1982 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001983 if (len <= 3 && self->proto >= 2) {
1984 /* Use TUPLE{1,2,3} opcodes. */
1985 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001986 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001987
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001988 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001989 /* pop the len elements */
1990 for (i = 0; i < len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001991 if (_Pickler_Write(self, &pop_op, 1) < 0)
1992 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001993 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001994 if (memo_get(self, obj) < 0)
1995 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001996
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001997 return 0;
1998 }
1999 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002000 if (_Pickler_Write(self, len2opcode + len, 1) < 0)
2001 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002002 }
2003 goto memoize;
2004 }
2005
2006 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
2007 * Generate MARK e1 e2 ... TUPLE
2008 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002009 if (_Pickler_Write(self, &mark_op, 1) < 0)
2010 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002011
2012 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002013 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002014
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002015 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002016 /* pop the stack stuff we pushed */
2017 if (self->bin) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002018 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
2019 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002020 }
2021 else {
2022 /* Note that we pop one more than len, to remove
2023 * the MARK too.
2024 */
2025 for (i = 0; i <= len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002026 if (_Pickler_Write(self, &pop_op, 1) < 0)
2027 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002028 }
2029 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002030 if (memo_get(self, obj) < 0)
2031 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002032
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002033 return 0;
2034 }
2035 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002036 if (_Pickler_Write(self, &tuple_op, 1) < 0)
2037 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002038 }
2039
2040 memoize:
2041 if (memo_put(self, obj) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002042 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002043
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002044 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002045}
2046
2047/* iter is an iterator giving items, and we batch up chunks of
2048 * MARK item item ... item APPENDS
2049 * opcode sequences. Calling code should have arranged to first create an
2050 * empty list, or list-like object, for the APPENDS to operate on.
2051 * Returns 0 on success, <0 on error.
2052 */
2053static int
2054batch_list(PicklerObject *self, PyObject *iter)
2055{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002056 PyObject *obj = NULL;
2057 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002058 int i, n;
2059
2060 const char mark_op = MARK;
2061 const char append_op = APPEND;
2062 const char appends_op = APPENDS;
2063
2064 assert(iter != NULL);
2065
2066 /* XXX: I think this function could be made faster by avoiding the
2067 iterator interface and fetching objects directly from list using
2068 PyList_GET_ITEM.
2069 */
2070
2071 if (self->proto == 0) {
2072 /* APPENDS isn't available; do one at a time. */
2073 for (;;) {
2074 obj = PyIter_Next(iter);
2075 if (obj == NULL) {
2076 if (PyErr_Occurred())
2077 return -1;
2078 break;
2079 }
2080 i = save(self, obj, 0);
2081 Py_DECREF(obj);
2082 if (i < 0)
2083 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002084 if (_Pickler_Write(self, &append_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002085 return -1;
2086 }
2087 return 0;
2088 }
2089
2090 /* proto > 0: write in batches of BATCHSIZE. */
2091 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002092 /* Get first item */
2093 firstitem = PyIter_Next(iter);
2094 if (firstitem == NULL) {
2095 if (PyErr_Occurred())
2096 goto error;
2097
2098 /* nothing more to add */
2099 break;
2100 }
2101
2102 /* Try to get a second item */
2103 obj = PyIter_Next(iter);
2104 if (obj == NULL) {
2105 if (PyErr_Occurred())
2106 goto error;
2107
2108 /* Only one item to write */
2109 if (save(self, firstitem, 0) < 0)
2110 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002111 if (_Pickler_Write(self, &append_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002112 goto error;
2113 Py_CLEAR(firstitem);
2114 break;
2115 }
2116
2117 /* More than one item to write */
2118
2119 /* Pump out MARK, items, APPENDS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002120 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002121 goto error;
2122
2123 if (save(self, firstitem, 0) < 0)
2124 goto error;
2125 Py_CLEAR(firstitem);
2126 n = 1;
2127
2128 /* Fetch and save up to BATCHSIZE items */
2129 while (obj) {
2130 if (save(self, obj, 0) < 0)
2131 goto error;
2132 Py_CLEAR(obj);
2133 n += 1;
2134
2135 if (n == BATCHSIZE)
2136 break;
2137
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002138 obj = PyIter_Next(iter);
2139 if (obj == NULL) {
2140 if (PyErr_Occurred())
2141 goto error;
2142 break;
2143 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002144 }
2145
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002146 if (_Pickler_Write(self, &appends_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002147 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002148
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002149 } while (n == BATCHSIZE);
2150 return 0;
2151
2152 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002153 Py_XDECREF(firstitem);
2154 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002155 return -1;
2156}
2157
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002158/* This is a variant of batch_list() above, specialized for lists (with no
2159 * support for list subclasses). Like batch_list(), we batch up chunks of
2160 * MARK item item ... item APPENDS
2161 * opcode sequences. Calling code should have arranged to first create an
2162 * empty list, or list-like object, for the APPENDS to operate on.
2163 * Returns 0 on success, -1 on error.
2164 *
2165 * This version is considerably faster than batch_list(), if less general.
2166 *
2167 * Note that this only works for protocols > 0.
2168 */
2169static int
2170batch_list_exact(PicklerObject *self, PyObject *obj)
2171{
2172 PyObject *item = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002173 Py_ssize_t this_batch, total;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002174
2175 const char append_op = APPEND;
2176 const char appends_op = APPENDS;
2177 const char mark_op = MARK;
2178
2179 assert(obj != NULL);
2180 assert(self->proto > 0);
2181 assert(PyList_CheckExact(obj));
2182
2183 if (PyList_GET_SIZE(obj) == 1) {
2184 item = PyList_GET_ITEM(obj, 0);
2185 if (save(self, item, 0) < 0)
2186 return -1;
2187 if (_Pickler_Write(self, &append_op, 1) < 0)
2188 return -1;
2189 return 0;
2190 }
2191
2192 /* Write in batches of BATCHSIZE. */
2193 total = 0;
2194 do {
2195 this_batch = 0;
2196 if (_Pickler_Write(self, &mark_op, 1) < 0)
2197 return -1;
2198 while (total < PyList_GET_SIZE(obj)) {
2199 item = PyList_GET_ITEM(obj, total);
2200 if (save(self, item, 0) < 0)
2201 return -1;
2202 total++;
2203 if (++this_batch == BATCHSIZE)
2204 break;
2205 }
2206 if (_Pickler_Write(self, &appends_op, 1) < 0)
2207 return -1;
2208
2209 } while (total < PyList_GET_SIZE(obj));
2210
2211 return 0;
2212}
2213
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002214static int
2215save_list(PicklerObject *self, PyObject *obj)
2216{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002217 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002218 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002219 int status = 0;
2220
2221 if (self->fast && !fast_save_enter(self, obj))
2222 goto error;
2223
2224 /* Create an empty list. */
2225 if (self->bin) {
2226 header[0] = EMPTY_LIST;
2227 len = 1;
2228 }
2229 else {
2230 header[0] = MARK;
2231 header[1] = LIST;
2232 len = 2;
2233 }
2234
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002235 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002236 goto error;
2237
2238 /* Get list length, and bow out early if empty. */
2239 if ((len = PyList_Size(obj)) < 0)
2240 goto error;
2241
2242 if (memo_put(self, obj) < 0)
2243 goto error;
2244
2245 if (len != 0) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002246 /* Materialize the list elements. */
2247 if (PyList_CheckExact(obj) && self->proto > 0) {
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002248 if (Py_EnterRecursiveCall(" while pickling an object"))
2249 goto error;
2250 status = batch_list_exact(self, obj);
2251 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002252 } else {
2253 PyObject *iter = PyObject_GetIter(obj);
2254 if (iter == NULL)
2255 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002256
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002257 if (Py_EnterRecursiveCall(" while pickling an object")) {
2258 Py_DECREF(iter);
2259 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002260 }
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002261 status = batch_list(self, iter);
2262 Py_LeaveRecursiveCall();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002263 Py_DECREF(iter);
2264 }
2265 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002266 if (0) {
2267 error:
2268 status = -1;
2269 }
2270
2271 if (self->fast && !fast_save_leave(self, obj))
2272 status = -1;
2273
2274 return status;
2275}
2276
2277/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
2278 * MARK key value ... key value SETITEMS
2279 * opcode sequences. Calling code should have arranged to first create an
2280 * empty dict, or dict-like object, for the SETITEMS to operate on.
2281 * Returns 0 on success, <0 on error.
2282 *
2283 * This is very much like batch_list(). The difference between saving
2284 * elements directly, and picking apart two-tuples, is so long-winded at
2285 * the C level, though, that attempts to combine these routines were too
2286 * ugly to bear.
2287 */
2288static int
2289batch_dict(PicklerObject *self, PyObject *iter)
2290{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002291 PyObject *obj = NULL;
2292 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002293 int i, n;
2294
2295 const char mark_op = MARK;
2296 const char setitem_op = SETITEM;
2297 const char setitems_op = SETITEMS;
2298
2299 assert(iter != NULL);
2300
2301 if (self->proto == 0) {
2302 /* SETITEMS isn't available; do one at a time. */
2303 for (;;) {
2304 obj = PyIter_Next(iter);
2305 if (obj == NULL) {
2306 if (PyErr_Occurred())
2307 return -1;
2308 break;
2309 }
2310 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2311 PyErr_SetString(PyExc_TypeError, "dict items "
2312 "iterator must return 2-tuples");
2313 return -1;
2314 }
2315 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
2316 if (i >= 0)
2317 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
2318 Py_DECREF(obj);
2319 if (i < 0)
2320 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002321 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002322 return -1;
2323 }
2324 return 0;
2325 }
2326
2327 /* proto > 0: write in batches of BATCHSIZE. */
2328 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002329 /* Get first item */
2330 firstitem = PyIter_Next(iter);
2331 if (firstitem == NULL) {
2332 if (PyErr_Occurred())
2333 goto error;
2334
2335 /* nothing more to add */
2336 break;
2337 }
2338 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
2339 PyErr_SetString(PyExc_TypeError, "dict items "
2340 "iterator must return 2-tuples");
2341 goto error;
2342 }
2343
2344 /* Try to get a second item */
2345 obj = PyIter_Next(iter);
2346 if (obj == NULL) {
2347 if (PyErr_Occurred())
2348 goto error;
2349
2350 /* Only one item to write */
2351 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2352 goto error;
2353 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2354 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002355 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002356 goto error;
2357 Py_CLEAR(firstitem);
2358 break;
2359 }
2360
2361 /* More than one item to write */
2362
2363 /* Pump out MARK, items, SETITEMS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002364 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002365 goto error;
2366
2367 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2368 goto error;
2369 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2370 goto error;
2371 Py_CLEAR(firstitem);
2372 n = 1;
2373
2374 /* Fetch and save up to BATCHSIZE items */
2375 while (obj) {
2376 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2377 PyErr_SetString(PyExc_TypeError, "dict items "
2378 "iterator must return 2-tuples");
2379 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002380 }
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002381 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
2382 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
2383 goto error;
2384 Py_CLEAR(obj);
2385 n += 1;
2386
2387 if (n == BATCHSIZE)
2388 break;
2389
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002390 obj = PyIter_Next(iter);
2391 if (obj == NULL) {
2392 if (PyErr_Occurred())
2393 goto error;
2394 break;
2395 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002396 }
2397
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002398 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002399 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002400
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002401 } while (n == BATCHSIZE);
2402 return 0;
2403
2404 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002405 Py_XDECREF(firstitem);
2406 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002407 return -1;
2408}
2409
Collin Winter5c9b02d2009-05-25 05:43:30 +00002410/* This is a variant of batch_dict() above that specializes for dicts, with no
2411 * support for dict subclasses. Like batch_dict(), we batch up chunks of
2412 * MARK key value ... key value SETITEMS
2413 * opcode sequences. Calling code should have arranged to first create an
2414 * empty dict, or dict-like object, for the SETITEMS to operate on.
2415 * Returns 0 on success, -1 on error.
2416 *
2417 * Note that this currently doesn't work for protocol 0.
2418 */
2419static int
2420batch_dict_exact(PicklerObject *self, PyObject *obj)
2421{
2422 PyObject *key = NULL, *value = NULL;
2423 int i;
2424 Py_ssize_t dict_size, ppos = 0;
2425
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00002426 const char mark_op = MARK;
2427 const char setitem_op = SETITEM;
2428 const char setitems_op = SETITEMS;
Collin Winter5c9b02d2009-05-25 05:43:30 +00002429
2430 assert(obj != NULL);
2431 assert(self->proto > 0);
2432
2433 dict_size = PyDict_Size(obj);
2434
2435 /* Special-case len(d) == 1 to save space. */
2436 if (dict_size == 1) {
2437 PyDict_Next(obj, &ppos, &key, &value);
2438 if (save(self, key, 0) < 0)
2439 return -1;
2440 if (save(self, value, 0) < 0)
2441 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002442 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002443 return -1;
2444 return 0;
2445 }
2446
2447 /* Write in batches of BATCHSIZE. */
2448 do {
2449 i = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002450 if (_Pickler_Write(self, &mark_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002451 return -1;
2452 while (PyDict_Next(obj, &ppos, &key, &value)) {
2453 if (save(self, key, 0) < 0)
2454 return -1;
2455 if (save(self, value, 0) < 0)
2456 return -1;
2457 if (++i == BATCHSIZE)
2458 break;
2459 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002460 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002461 return -1;
2462 if (PyDict_Size(obj) != dict_size) {
2463 PyErr_Format(
2464 PyExc_RuntimeError,
2465 "dictionary changed size during iteration");
2466 return -1;
2467 }
2468
2469 } while (i == BATCHSIZE);
2470 return 0;
2471}
2472
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002473static int
2474save_dict(PicklerObject *self, PyObject *obj)
2475{
2476 PyObject *items, *iter;
2477 char header[3];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002478 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002479 int status = 0;
2480
2481 if (self->fast && !fast_save_enter(self, obj))
2482 goto error;
2483
2484 /* Create an empty dict. */
2485 if (self->bin) {
2486 header[0] = EMPTY_DICT;
2487 len = 1;
2488 }
2489 else {
2490 header[0] = MARK;
2491 header[1] = DICT;
2492 len = 2;
2493 }
2494
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002495 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002496 goto error;
2497
2498 /* Get dict size, and bow out early if empty. */
2499 if ((len = PyDict_Size(obj)) < 0)
2500 goto error;
2501
2502 if (memo_put(self, obj) < 0)
2503 goto error;
2504
2505 if (len != 0) {
2506 /* Save the dict items. */
Collin Winter5c9b02d2009-05-25 05:43:30 +00002507 if (PyDict_CheckExact(obj) && self->proto > 0) {
2508 /* We can take certain shortcuts if we know this is a dict and
2509 not a dict subclass. */
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002510 if (Py_EnterRecursiveCall(" while pickling an object"))
2511 goto error;
2512 status = batch_dict_exact(self, obj);
2513 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002514 } else {
2515 items = PyObject_CallMethod(obj, "items", "()");
2516 if (items == NULL)
2517 goto error;
2518 iter = PyObject_GetIter(items);
2519 Py_DECREF(items);
2520 if (iter == NULL)
2521 goto error;
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002522 if (Py_EnterRecursiveCall(" while pickling an object")) {
2523 Py_DECREF(iter);
2524 goto error;
2525 }
Collin Winter5c9b02d2009-05-25 05:43:30 +00002526 status = batch_dict(self, iter);
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00002527 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00002528 Py_DECREF(iter);
2529 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002530 }
2531
2532 if (0) {
2533 error:
2534 status = -1;
2535 }
2536
2537 if (self->fast && !fast_save_leave(self, obj))
2538 status = -1;
2539
2540 return status;
2541}
2542
2543static int
2544save_global(PicklerObject *self, PyObject *obj, PyObject *name)
2545{
2546 static PyObject *name_str = NULL;
2547 PyObject *global_name = NULL;
2548 PyObject *module_name = NULL;
2549 PyObject *module = NULL;
2550 PyObject *cls;
2551 int status = 0;
2552
2553 const char global_op = GLOBAL;
2554
2555 if (name_str == NULL) {
2556 name_str = PyUnicode_InternFromString("__name__");
2557 if (name_str == NULL)
2558 goto error;
2559 }
2560
2561 if (name) {
2562 global_name = name;
2563 Py_INCREF(global_name);
2564 }
2565 else {
2566 global_name = PyObject_GetAttr(obj, name_str);
2567 if (global_name == NULL)
2568 goto error;
2569 }
2570
2571 module_name = whichmodule(obj, global_name);
2572 if (module_name == NULL)
2573 goto error;
2574
2575 /* XXX: Change to use the import C API directly with level=0 to disallow
2576 relative imports.
2577
2578 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
2579 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
2580 custom import functions (IMHO, this would be a nice security
2581 feature). The import C API would need to be extended to support the
2582 extra parameters of __import__ to fix that. */
2583 module = PyImport_Import(module_name);
2584 if (module == NULL) {
2585 PyErr_Format(PicklingError,
2586 "Can't pickle %R: import of module %R failed",
2587 obj, module_name);
2588 goto error;
2589 }
2590 cls = PyObject_GetAttr(module, global_name);
2591 if (cls == NULL) {
2592 PyErr_Format(PicklingError,
2593 "Can't pickle %R: attribute lookup %S.%S failed",
2594 obj, module_name, global_name);
2595 goto error;
2596 }
2597 if (cls != obj) {
2598 Py_DECREF(cls);
2599 PyErr_Format(PicklingError,
2600 "Can't pickle %R: it's not the same object as %S.%S",
2601 obj, module_name, global_name);
2602 goto error;
2603 }
2604 Py_DECREF(cls);
2605
2606 if (self->proto >= 2) {
2607 /* See whether this is in the extension registry, and if
2608 * so generate an EXT opcode.
2609 */
2610 PyObject *code_obj; /* extension code as Python object */
2611 long code; /* extension code as C value */
2612 char pdata[5];
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002613 Py_ssize_t n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002614
2615 PyTuple_SET_ITEM(two_tuple, 0, module_name);
2616 PyTuple_SET_ITEM(two_tuple, 1, global_name);
2617 code_obj = PyDict_GetItem(extension_registry, two_tuple);
2618 /* The object is not registered in the extension registry.
2619 This is the most likely code path. */
2620 if (code_obj == NULL)
2621 goto gen_global;
2622
2623 /* XXX: pickle.py doesn't check neither the type, nor the range
2624 of the value returned by the extension_registry. It should for
2625 consistency. */
2626
2627 /* Verify code_obj has the right type and value. */
2628 if (!PyLong_Check(code_obj)) {
2629 PyErr_Format(PicklingError,
2630 "Can't pickle %R: extension code %R isn't an integer",
2631 obj, code_obj);
2632 goto error;
2633 }
2634 code = PyLong_AS_LONG(code_obj);
2635 if (code <= 0 || code > 0x7fffffffL) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02002636 if (!PyErr_Occurred())
2637 PyErr_Format(PicklingError,
2638 "Can't pickle %R: extension code %ld is out of range",
2639 obj, code);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002640 goto error;
2641 }
2642
2643 /* Generate an EXT opcode. */
2644 if (code <= 0xff) {
2645 pdata[0] = EXT1;
2646 pdata[1] = (unsigned char)code;
2647 n = 2;
2648 }
2649 else if (code <= 0xffff) {
2650 pdata[0] = EXT2;
2651 pdata[1] = (unsigned char)(code & 0xff);
2652 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2653 n = 3;
2654 }
2655 else {
2656 pdata[0] = EXT4;
2657 pdata[1] = (unsigned char)(code & 0xff);
2658 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2659 pdata[3] = (unsigned char)((code >> 16) & 0xff);
2660 pdata[4] = (unsigned char)((code >> 24) & 0xff);
2661 n = 5;
2662 }
2663
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002664 if (_Pickler_Write(self, pdata, n) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002665 goto error;
2666 }
2667 else {
2668 /* Generate a normal global opcode if we are using a pickle
2669 protocol <= 2, or if the object is not registered in the
2670 extension registry. */
2671 PyObject *encoded;
2672 PyObject *(*unicode_encoder)(PyObject *);
2673
2674 gen_global:
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002675 if (_Pickler_Write(self, &global_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002676 goto error;
2677
2678 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
2679 the module name and the global name using UTF-8. We do so only when
2680 we are using the pickle protocol newer than version 3. This is to
2681 ensure compatibility with older Unpickler running on Python 2.x. */
2682 if (self->proto >= 3) {
2683 unicode_encoder = PyUnicode_AsUTF8String;
2684 }
2685 else {
2686 unicode_encoder = PyUnicode_AsASCIIString;
2687 }
2688
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002689 /* For protocol < 3 and if the user didn't request against doing so,
2690 we convert module names to the old 2.x module names. */
2691 if (self->fix_imports) {
2692 PyObject *key;
2693 PyObject *item;
2694
2695 key = PyTuple_Pack(2, module_name, global_name);
2696 if (key == NULL)
2697 goto error;
2698 item = PyDict_GetItemWithError(name_mapping_3to2, key);
2699 Py_DECREF(key);
2700 if (item) {
2701 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
2702 PyErr_Format(PyExc_RuntimeError,
2703 "_compat_pickle.REVERSE_NAME_MAPPING values "
2704 "should be 2-tuples, not %.200s",
2705 Py_TYPE(item)->tp_name);
2706 goto error;
2707 }
2708 Py_CLEAR(module_name);
2709 Py_CLEAR(global_name);
2710 module_name = PyTuple_GET_ITEM(item, 0);
2711 global_name = PyTuple_GET_ITEM(item, 1);
2712 if (!PyUnicode_Check(module_name) ||
2713 !PyUnicode_Check(global_name)) {
2714 PyErr_Format(PyExc_RuntimeError,
2715 "_compat_pickle.REVERSE_NAME_MAPPING values "
2716 "should be pairs of str, not (%.200s, %.200s)",
2717 Py_TYPE(module_name)->tp_name,
2718 Py_TYPE(global_name)->tp_name);
2719 goto error;
2720 }
2721 Py_INCREF(module_name);
2722 Py_INCREF(global_name);
2723 }
2724 else if (PyErr_Occurred()) {
2725 goto error;
2726 }
2727
2728 item = PyDict_GetItemWithError(import_mapping_3to2, module_name);
2729 if (item) {
2730 if (!PyUnicode_Check(item)) {
2731 PyErr_Format(PyExc_RuntimeError,
2732 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
2733 "should be strings, not %.200s",
2734 Py_TYPE(item)->tp_name);
2735 goto error;
2736 }
2737 Py_CLEAR(module_name);
2738 module_name = item;
2739 Py_INCREF(module_name);
2740 }
2741 else if (PyErr_Occurred()) {
2742 goto error;
2743 }
2744 }
2745
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002746 /* Save the name of the module. */
2747 encoded = unicode_encoder(module_name);
2748 if (encoded == NULL) {
2749 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2750 PyErr_Format(PicklingError,
2751 "can't pickle module identifier '%S' using "
2752 "pickle protocol %i", module_name, self->proto);
2753 goto error;
2754 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002755 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002756 PyBytes_GET_SIZE(encoded)) < 0) {
2757 Py_DECREF(encoded);
2758 goto error;
2759 }
2760 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002761 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002762 goto error;
2763
2764 /* Save the name of the module. */
2765 encoded = unicode_encoder(global_name);
2766 if (encoded == NULL) {
2767 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2768 PyErr_Format(PicklingError,
2769 "can't pickle global identifier '%S' using "
2770 "pickle protocol %i", global_name, self->proto);
2771 goto error;
2772 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002773 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002774 PyBytes_GET_SIZE(encoded)) < 0) {
2775 Py_DECREF(encoded);
2776 goto error;
2777 }
2778 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002779 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002780 goto error;
2781
2782 /* Memoize the object. */
2783 if (memo_put(self, obj) < 0)
2784 goto error;
2785 }
2786
2787 if (0) {
2788 error:
2789 status = -1;
2790 }
2791 Py_XDECREF(module_name);
2792 Py_XDECREF(global_name);
2793 Py_XDECREF(module);
2794
2795 return status;
2796}
2797
2798static int
2799save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
2800{
2801 PyObject *pid = NULL;
2802 int status = 0;
2803
2804 const char persid_op = PERSID;
2805 const char binpersid_op = BINPERSID;
2806
2807 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002808 pid = _Pickler_FastCall(self, func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002809 if (pid == NULL)
2810 return -1;
2811
2812 if (pid != Py_None) {
2813 if (self->bin) {
2814 if (save(self, pid, 1) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002815 _Pickler_Write(self, &binpersid_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002816 goto error;
2817 }
2818 else {
2819 PyObject *pid_str = NULL;
2820 char *pid_ascii_bytes;
2821 Py_ssize_t size;
2822
2823 pid_str = PyObject_Str(pid);
2824 if (pid_str == NULL)
2825 goto error;
2826
2827 /* XXX: Should it check whether the persistent id only contains
2828 ASCII characters? And what if the pid contains embedded
2829 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00002830 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002831 Py_DECREF(pid_str);
2832 if (pid_ascii_bytes == NULL)
2833 goto error;
2834
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002835 if (_Pickler_Write(self, &persid_op, 1) < 0 ||
2836 _Pickler_Write(self, pid_ascii_bytes, size) < 0 ||
2837 _Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002838 goto error;
2839 }
2840 status = 1;
2841 }
2842
2843 if (0) {
2844 error:
2845 status = -1;
2846 }
2847 Py_XDECREF(pid);
2848
2849 return status;
2850}
2851
2852/* We're saving obj, and args is the 2-thru-5 tuple returned by the
2853 * appropriate __reduce__ method for obj.
2854 */
2855static int
2856save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
2857{
2858 PyObject *callable;
2859 PyObject *argtup;
2860 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002861 PyObject *listitems = Py_None;
2862 PyObject *dictitems = Py_None;
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002863 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002864
2865 int use_newobj = self->proto >= 2;
2866
2867 const char reduce_op = REDUCE;
2868 const char build_op = BUILD;
2869 const char newobj_op = NEWOBJ;
2870
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002871 size = PyTuple_Size(args);
2872 if (size < 2 || size > 5) {
2873 PyErr_SetString(PicklingError, "tuple returned by "
2874 "__reduce__ must contain 2 through 5 elements");
2875 return -1;
2876 }
2877
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002878 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
2879 &callable, &argtup, &state, &listitems, &dictitems))
2880 return -1;
2881
2882 if (!PyCallable_Check(callable)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002883 PyErr_SetString(PicklingError, "first item of the tuple "
2884 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002885 return -1;
2886 }
2887 if (!PyTuple_Check(argtup)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002888 PyErr_SetString(PicklingError, "second item of the tuple "
2889 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002890 return -1;
2891 }
2892
2893 if (state == Py_None)
2894 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002895
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002896 if (listitems == Py_None)
2897 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002898 else if (!PyIter_Check(listitems)) {
2899 PyErr_Format(PicklingError, "Fourth element of tuple"
2900 "returned by __reduce__ must be an iterator, not %s",
2901 Py_TYPE(listitems)->tp_name);
2902 return -1;
2903 }
2904
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002905 if (dictitems == Py_None)
2906 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002907 else if (!PyIter_Check(dictitems)) {
2908 PyErr_Format(PicklingError, "Fifth element of tuple"
2909 "returned by __reduce__ must be an iterator, not %s",
2910 Py_TYPE(dictitems)->tp_name);
2911 return -1;
2912 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002913
2914 /* Protocol 2 special case: if callable's name is __newobj__, use
2915 NEWOBJ. */
2916 if (use_newobj) {
Antoine Pitrouff150f22010-10-22 21:41:05 +00002917 static PyObject *newobj_str = NULL;
2918 PyObject *name_str;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002919
2920 if (newobj_str == NULL) {
2921 newobj_str = PyUnicode_InternFromString("__newobj__");
Antoine Pitrouff150f22010-10-22 21:41:05 +00002922 if (newobj_str == NULL)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002923 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002924 }
2925
Antoine Pitrouff150f22010-10-22 21:41:05 +00002926 name_str = PyObject_GetAttrString(callable, "__name__");
2927 if (name_str == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002928 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2929 PyErr_Clear();
2930 else
2931 return -1;
2932 use_newobj = 0;
2933 }
2934 else {
Antoine Pitrouff150f22010-10-22 21:41:05 +00002935 use_newobj = PyUnicode_Check(name_str) &&
2936 PyUnicode_Compare(name_str, newobj_str) == 0;
2937 Py_DECREF(name_str);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002938 }
2939 }
2940 if (use_newobj) {
2941 PyObject *cls;
2942 PyObject *newargtup;
2943 PyObject *obj_class;
2944 int p;
2945
2946 /* Sanity checks. */
2947 if (Py_SIZE(argtup) < 1) {
2948 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
2949 return -1;
2950 }
2951
2952 cls = PyTuple_GET_ITEM(argtup, 0);
Antoine Pitrouff150f22010-10-22 21:41:05 +00002953 if (!PyObject_HasAttrString(cls, "__new__")) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002954 PyErr_SetString(PicklingError, "args[0] from "
Antoine Pitrouff150f22010-10-22 21:41:05 +00002955 "__newobj__ args has no __new__");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002956 return -1;
2957 }
2958
2959 if (obj != NULL) {
Antoine Pitrouff150f22010-10-22 21:41:05 +00002960 obj_class = PyObject_GetAttrString(obj, "__class__");
2961 if (obj_class == NULL) {
2962 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2963 PyErr_Clear();
2964 else
2965 return -1;
2966 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002967 p = obj_class != cls; /* true iff a problem */
2968 Py_DECREF(obj_class);
2969 if (p) {
2970 PyErr_SetString(PicklingError, "args[0] from "
2971 "__newobj__ args has the wrong class");
2972 return -1;
2973 }
2974 }
2975 /* XXX: These calls save() are prone to infinite recursion. Imagine
2976 what happen if the value returned by the __reduce__() method of
2977 some extension type contains another object of the same type. Ouch!
2978
2979 Here is a quick example, that I ran into, to illustrate what I
2980 mean:
2981
2982 >>> import pickle, copyreg
2983 >>> copyreg.dispatch_table.pop(complex)
2984 >>> pickle.dumps(1+2j)
2985 Traceback (most recent call last):
2986 ...
2987 RuntimeError: maximum recursion depth exceeded
2988
2989 Removing the complex class from copyreg.dispatch_table made the
2990 __reduce_ex__() method emit another complex object:
2991
2992 >>> (1+1j).__reduce_ex__(2)
2993 (<function __newobj__ at 0xb7b71c3c>,
2994 (<class 'complex'>, (1+1j)), None, None, None)
2995
2996 Thus when save() was called on newargstup (the 2nd item) recursion
2997 ensued. Of course, the bug was in the complex class which had a
2998 broken __getnewargs__() that emitted another complex object. But,
2999 the point, here, is it is quite easy to end up with a broken reduce
3000 function. */
3001
3002 /* Save the class and its __new__ arguments. */
3003 if (save(self, cls, 0) < 0)
3004 return -1;
3005
3006 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
3007 if (newargtup == NULL)
3008 return -1;
3009
3010 p = save(self, newargtup, 0);
3011 Py_DECREF(newargtup);
3012 if (p < 0)
3013 return -1;
3014
3015 /* Add NEWOBJ opcode. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003016 if (_Pickler_Write(self, &newobj_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003017 return -1;
3018 }
3019 else { /* Not using NEWOBJ. */
3020 if (save(self, callable, 0) < 0 ||
3021 save(self, argtup, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003022 _Pickler_Write(self, &reduce_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003023 return -1;
3024 }
3025
3026 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
3027 the caller do not want to memoize the object. Not particularly useful,
3028 but that is to mimic the behavior save_reduce() in pickle.py when
3029 obj is None. */
3030 if (obj && memo_put(self, obj) < 0)
3031 return -1;
3032
3033 if (listitems && batch_list(self, listitems) < 0)
3034 return -1;
3035
3036 if (dictitems && batch_dict(self, dictitems) < 0)
3037 return -1;
3038
3039 if (state) {
3040 if (save(self, state, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003041 _Pickler_Write(self, &build_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003042 return -1;
3043 }
3044
3045 return 0;
3046}
3047
3048static int
3049save(PicklerObject *self, PyObject *obj, int pers_save)
3050{
3051 PyTypeObject *type;
3052 PyObject *reduce_func = NULL;
3053 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003054 int status = 0;
3055
Antoine Pitroue6d4c5b2011-01-23 17:12:25 +00003056 if (Py_EnterRecursiveCall(" while pickling an object"))
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003057 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003058
3059 /* The extra pers_save argument is necessary to avoid calling save_pers()
3060 on its returned object. */
3061 if (!pers_save && self->pers_func) {
3062 /* save_pers() returns:
3063 -1 to signal an error;
3064 0 if it did nothing successfully;
3065 1 if a persistent id was saved.
3066 */
3067 if ((status = save_pers(self, obj, self->pers_func)) != 0)
3068 goto done;
3069 }
3070
3071 type = Py_TYPE(obj);
3072
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003073 /* The old cPickle had an optimization that used switch-case statement
3074 dispatching on the first letter of the type name. This has was removed
3075 since benchmarks shown that this optimization was actually slowing
3076 things down. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003077
3078 /* Atom types; these aren't memoized, so don't check the memo. */
3079
3080 if (obj == Py_None) {
3081 status = save_none(self, obj);
3082 goto done;
3083 }
3084 else if (obj == Py_False || obj == Py_True) {
3085 status = save_bool(self, obj);
3086 goto done;
3087 }
3088 else if (type == &PyLong_Type) {
3089 status = save_long(self, obj);
3090 goto done;
3091 }
3092 else if (type == &PyFloat_Type) {
3093 status = save_float(self, obj);
3094 goto done;
3095 }
3096
3097 /* Check the memo to see if it has the object. If so, generate
3098 a GET (or BINGET) opcode, instead of pickling the object
3099 once again. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003100 if (PyMemoTable_Get(self->memo, obj)) {
3101 if (memo_get(self, obj) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003102 goto error;
3103 goto done;
3104 }
3105
3106 if (type == &PyBytes_Type) {
3107 status = save_bytes(self, obj);
3108 goto done;
3109 }
3110 else if (type == &PyUnicode_Type) {
3111 status = save_unicode(self, obj);
3112 goto done;
3113 }
3114 else if (type == &PyDict_Type) {
3115 status = save_dict(self, obj);
3116 goto done;
3117 }
3118 else if (type == &PyList_Type) {
3119 status = save_list(self, obj);
3120 goto done;
3121 }
3122 else if (type == &PyTuple_Type) {
3123 status = save_tuple(self, obj);
3124 goto done;
3125 }
3126 else if (type == &PyType_Type) {
3127 status = save_global(self, obj, NULL);
3128 goto done;
3129 }
3130 else if (type == &PyFunction_Type) {
3131 status = save_global(self, obj, NULL);
3132 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
3133 /* fall back to reduce */
3134 PyErr_Clear();
3135 }
3136 else {
3137 goto done;
3138 }
3139 }
3140 else if (type == &PyCFunction_Type) {
3141 status = save_global(self, obj, NULL);
3142 goto done;
3143 }
3144 else if (PyType_IsSubtype(type, &PyType_Type)) {
3145 status = save_global(self, obj, NULL);
3146 goto done;
3147 }
3148
3149 /* XXX: This part needs some unit tests. */
3150
3151 /* Get a reduction callable, and call it. This may come from
3152 * copyreg.dispatch_table, the object's __reduce_ex__ method,
3153 * or the object's __reduce__ method.
3154 */
3155 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
3156 if (reduce_func != NULL) {
3157 /* Here, the reference count of the reduce_func object returned by
3158 PyDict_GetItem needs to be increased to be consistent with the one
3159 returned by PyObject_GetAttr. This is allow us to blindly DECREF
3160 reduce_func at the end of the save() routine.
3161 */
3162 Py_INCREF(reduce_func);
3163 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003164 reduce_value = _Pickler_FastCall(self, reduce_func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003165 }
3166 else {
3167 static PyObject *reduce_str = NULL;
3168 static PyObject *reduce_ex_str = NULL;
3169
3170 /* Cache the name of the reduce methods. */
3171 if (reduce_str == NULL) {
3172 reduce_str = PyUnicode_InternFromString("__reduce__");
3173 if (reduce_str == NULL)
3174 goto error;
3175 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
3176 if (reduce_ex_str == NULL)
3177 goto error;
3178 }
3179
3180 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
3181 automatically defined as __reduce__. While this is convenient, this
3182 make it impossible to know which method was actually called. Of
3183 course, this is not a big deal. But still, it would be nice to let
3184 the user know which method was called when something go
3185 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
3186 don't actually have to check for a __reduce__ method. */
3187
3188 /* Check for a __reduce_ex__ method. */
3189 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
3190 if (reduce_func != NULL) {
3191 PyObject *proto;
3192 proto = PyLong_FromLong(self->proto);
3193 if (proto != NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003194 reduce_value = _Pickler_FastCall(self, reduce_func, proto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003195 }
3196 }
3197 else {
3198 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3199 PyErr_Clear();
3200 else
3201 goto error;
3202 /* Check for a __reduce__ method. */
3203 reduce_func = PyObject_GetAttr(obj, reduce_str);
3204 if (reduce_func != NULL) {
3205 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
3206 }
3207 else {
3208 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
3209 type->tp_name, obj);
3210 goto error;
3211 }
3212 }
3213 }
3214
3215 if (reduce_value == NULL)
3216 goto error;
3217
3218 if (PyUnicode_Check(reduce_value)) {
3219 status = save_global(self, obj, reduce_value);
3220 goto done;
3221 }
3222
3223 if (!PyTuple_Check(reduce_value)) {
3224 PyErr_SetString(PicklingError,
3225 "__reduce__ must return a string or tuple");
3226 goto error;
3227 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003228
3229 status = save_reduce(self, reduce_value, obj);
3230
3231 if (0) {
3232 error:
3233 status = -1;
3234 }
3235 done:
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003236 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003237 Py_XDECREF(reduce_func);
3238 Py_XDECREF(reduce_value);
3239
3240 return status;
3241}
3242
3243static int
3244dump(PicklerObject *self, PyObject *obj)
3245{
3246 const char stop_op = STOP;
3247
3248 if (self->proto >= 2) {
3249 char header[2];
3250
3251 header[0] = PROTO;
3252 assert(self->proto >= 0 && self->proto < 256);
3253 header[1] = (unsigned char)self->proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003254 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003255 return -1;
3256 }
3257
3258 if (save(self, obj, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003259 _Pickler_Write(self, &stop_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003260 return -1;
3261
3262 return 0;
3263}
3264
3265PyDoc_STRVAR(Pickler_clear_memo_doc,
3266"clear_memo() -> None. Clears the pickler's \"memo\"."
3267"\n"
3268"The memo is the data structure that remembers which objects the\n"
3269"pickler has already seen, so that shared or recursive objects are\n"
3270"pickled by reference and not by value. This method is useful when\n"
3271"re-using picklers.");
3272
3273static PyObject *
3274Pickler_clear_memo(PicklerObject *self)
3275{
3276 if (self->memo)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003277 PyMemoTable_Clear(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003278
3279 Py_RETURN_NONE;
3280}
3281
3282PyDoc_STRVAR(Pickler_dump_doc,
3283"dump(obj) -> None. Write a pickled representation of obj to the open file.");
3284
3285static PyObject *
3286Pickler_dump(PicklerObject *self, PyObject *args)
3287{
3288 PyObject *obj;
3289
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00003290 /* Check whether the Pickler was initialized correctly (issue3664).
3291 Developers often forget to call __init__() in their subclasses, which
3292 would trigger a segfault without this check. */
3293 if (self->write == NULL) {
3294 PyErr_Format(PicklingError,
3295 "Pickler.__init__() was not called by %s.__init__()",
3296 Py_TYPE(self)->tp_name);
3297 return NULL;
3298 }
3299
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003300 if (!PyArg_ParseTuple(args, "O:dump", &obj))
3301 return NULL;
3302
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003303 if (_Pickler_ClearBuffer(self) < 0)
3304 return NULL;
3305
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003306 if (dump(self, obj) < 0)
3307 return NULL;
3308
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003309 if (_Pickler_FlushToFile(self) < 0)
3310 return NULL;
3311
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003312 Py_RETURN_NONE;
3313}
3314
3315static struct PyMethodDef Pickler_methods[] = {
3316 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
3317 Pickler_dump_doc},
3318 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
3319 Pickler_clear_memo_doc},
3320 {NULL, NULL} /* sentinel */
3321};
3322
3323static void
3324Pickler_dealloc(PicklerObject *self)
3325{
3326 PyObject_GC_UnTrack(self);
3327
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003328 Py_XDECREF(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003329 Py_XDECREF(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003330 Py_XDECREF(self->pers_func);
3331 Py_XDECREF(self->arg);
3332 Py_XDECREF(self->fast_memo);
3333
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003334 PyMemoTable_Del(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003335
3336 Py_TYPE(self)->tp_free((PyObject *)self);
3337}
3338
3339static int
3340Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
3341{
3342 Py_VISIT(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003343 Py_VISIT(self->pers_func);
3344 Py_VISIT(self->arg);
3345 Py_VISIT(self->fast_memo);
3346 return 0;
3347}
3348
3349static int
3350Pickler_clear(PicklerObject *self)
3351{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003352 Py_CLEAR(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003353 Py_CLEAR(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003354 Py_CLEAR(self->pers_func);
3355 Py_CLEAR(self->arg);
3356 Py_CLEAR(self->fast_memo);
3357
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003358 if (self->memo != NULL) {
3359 PyMemoTable *memo = self->memo;
3360 self->memo = NULL;
3361 PyMemoTable_Del(memo);
3362 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003363 return 0;
3364}
3365
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003366
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003367PyDoc_STRVAR(Pickler_doc,
3368"Pickler(file, protocol=None)"
3369"\n"
3370"This takes a binary file for writing a pickle data stream.\n"
3371"\n"
3372"The optional protocol argument tells the pickler to use the\n"
3373"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
3374"protocol is 3; a backward-incompatible protocol designed for\n"
3375"Python 3.0.\n"
3376"\n"
3377"Specifying a negative protocol version selects the highest\n"
3378"protocol version supported. The higher the protocol used, the\n"
3379"more recent the version of Python needed to read the pickle\n"
3380"produced.\n"
3381"\n"
3382"The file argument must have a write() method that accepts a single\n"
3383"bytes argument. It can thus be a file object opened for binary\n"
3384"writing, a io.BytesIO instance, or any other custom object that\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003385"meets this interface.\n"
3386"\n"
3387"If fix_imports is True and protocol is less than 3, pickle will try to\n"
3388"map the new Python 3.x names to the old module names used in Python\n"
3389"2.x, so that the pickle data stream is readable with Python 2.x.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003390
3391static int
3392Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
3393{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003394 static char *kwlist[] = {"file", "protocol", "fix_imports", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003395 PyObject *file;
3396 PyObject *proto_obj = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003397 PyObject *fix_imports = Py_True;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003398
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003399 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:Pickler",
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003400 kwlist, &file, &proto_obj, &fix_imports))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003401 return -1;
3402
3403 /* In case of multiple __init__() calls, clear previous content. */
3404 if (self->write != NULL)
3405 (void)Pickler_clear(self);
3406
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003407 if (_Pickler_SetProtocol(self, proto_obj, fix_imports) < 0)
3408 return -1;
3409
3410 if (_Pickler_SetOutputStream(self, file) < 0)
3411 return -1;
3412
3413 /* memo and output_buffer may have already been created in _Pickler_New */
3414 if (self->memo == NULL) {
3415 self->memo = PyMemoTable_New();
3416 if (self->memo == NULL)
3417 return -1;
3418 }
3419 self->output_len = 0;
3420 if (self->output_buffer == NULL) {
3421 self->max_output_len = WRITE_BUF_SIZE;
3422 self->output_buffer = PyBytes_FromStringAndSize(NULL,
3423 self->max_output_len);
3424 if (self->output_buffer == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003425 return -1;
3426 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003427
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003428 self->arg = NULL;
3429 self->fast = 0;
3430 self->fast_nesting = 0;
3431 self->fast_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003432 self->pers_func = NULL;
3433 if (PyObject_HasAttrString((PyObject *)self, "persistent_id")) {
3434 self->pers_func = PyObject_GetAttrString((PyObject *)self,
3435 "persistent_id");
3436 if (self->pers_func == NULL)
3437 return -1;
3438 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003439 return 0;
3440}
3441
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003442/* Define a proxy object for the Pickler's internal memo object. This is to
3443 * avoid breaking code like:
3444 * pickler.memo.clear()
3445 * and
3446 * pickler.memo = saved_memo
3447 * Is this a good idea? Not really, but we don't want to break code that uses
3448 * it. Note that we don't implement the entire mapping API here. This is
3449 * intentional, as these should be treated as black-box implementation details.
3450 */
3451
3452typedef struct {
3453 PyObject_HEAD
3454 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
3455} PicklerMemoProxyObject;
3456
3457PyDoc_STRVAR(pmp_clear_doc,
3458"memo.clear() -> None. Remove all items from memo.");
3459
3460static PyObject *
3461pmp_clear(PicklerMemoProxyObject *self)
3462{
3463 if (self->pickler->memo)
3464 PyMemoTable_Clear(self->pickler->memo);
3465 Py_RETURN_NONE;
3466}
3467
3468PyDoc_STRVAR(pmp_copy_doc,
3469"memo.copy() -> new_memo. Copy the memo to a new object.");
3470
3471static PyObject *
3472pmp_copy(PicklerMemoProxyObject *self)
3473{
3474 Py_ssize_t i;
3475 PyMemoTable *memo;
3476 PyObject *new_memo = PyDict_New();
3477 if (new_memo == NULL)
3478 return NULL;
3479
3480 memo = self->pickler->memo;
3481 for (i = 0; i < memo->mt_allocated; ++i) {
3482 PyMemoEntry entry = memo->mt_table[i];
3483 if (entry.me_key != NULL) {
3484 int status;
3485 PyObject *key, *value;
3486
3487 key = PyLong_FromVoidPtr(entry.me_key);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003488 value = Py_BuildValue("nO", entry.me_value, entry.me_key);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003489
3490 if (key == NULL || value == NULL) {
3491 Py_XDECREF(key);
3492 Py_XDECREF(value);
3493 goto error;
3494 }
3495 status = PyDict_SetItem(new_memo, key, value);
3496 Py_DECREF(key);
3497 Py_DECREF(value);
3498 if (status < 0)
3499 goto error;
3500 }
3501 }
3502 return new_memo;
3503
3504 error:
3505 Py_XDECREF(new_memo);
3506 return NULL;
3507}
3508
3509PyDoc_STRVAR(pmp_reduce_doc,
3510"memo.__reduce__(). Pickling support.");
3511
3512static PyObject *
3513pmp_reduce(PicklerMemoProxyObject *self, PyObject *args)
3514{
3515 PyObject *reduce_value, *dict_args;
3516 PyObject *contents = pmp_copy(self);
3517 if (contents == NULL)
3518 return NULL;
3519
3520 reduce_value = PyTuple_New(2);
3521 if (reduce_value == NULL) {
3522 Py_DECREF(contents);
3523 return NULL;
3524 }
3525 dict_args = PyTuple_New(1);
3526 if (dict_args == NULL) {
3527 Py_DECREF(contents);
3528 Py_DECREF(reduce_value);
3529 return NULL;
3530 }
3531 PyTuple_SET_ITEM(dict_args, 0, contents);
3532 Py_INCREF((PyObject *)&PyDict_Type);
3533 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
3534 PyTuple_SET_ITEM(reduce_value, 1, dict_args);
3535 return reduce_value;
3536}
3537
3538static PyMethodDef picklerproxy_methods[] = {
3539 {"clear", (PyCFunction)pmp_clear, METH_NOARGS, pmp_clear_doc},
3540 {"copy", (PyCFunction)pmp_copy, METH_NOARGS, pmp_copy_doc},
3541 {"__reduce__", (PyCFunction)pmp_reduce, METH_VARARGS, pmp_reduce_doc},
3542 {NULL, NULL} /* sentinel */
3543};
3544
3545static void
3546PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
3547{
3548 PyObject_GC_UnTrack(self);
3549 Py_XDECREF(self->pickler);
3550 PyObject_GC_Del((PyObject *)self);
3551}
3552
3553static int
3554PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
3555 visitproc visit, void *arg)
3556{
3557 Py_VISIT(self->pickler);
3558 return 0;
3559}
3560
3561static int
3562PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
3563{
3564 Py_CLEAR(self->pickler);
3565 return 0;
3566}
3567
3568static PyTypeObject PicklerMemoProxyType = {
3569 PyVarObject_HEAD_INIT(NULL, 0)
3570 "_pickle.PicklerMemoProxy", /*tp_name*/
3571 sizeof(PicklerMemoProxyObject), /*tp_basicsize*/
3572 0,
3573 (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */
3574 0, /* tp_print */
3575 0, /* tp_getattr */
3576 0, /* tp_setattr */
3577 0, /* tp_compare */
3578 0, /* tp_repr */
3579 0, /* tp_as_number */
3580 0, /* tp_as_sequence */
3581 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00003582 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003583 0, /* tp_call */
3584 0, /* tp_str */
3585 PyObject_GenericGetAttr, /* tp_getattro */
3586 PyObject_GenericSetAttr, /* tp_setattro */
3587 0, /* tp_as_buffer */
3588 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3589 0, /* tp_doc */
3590 (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */
3591 (inquiry)PicklerMemoProxy_clear, /* tp_clear */
3592 0, /* tp_richcompare */
3593 0, /* tp_weaklistoffset */
3594 0, /* tp_iter */
3595 0, /* tp_iternext */
3596 picklerproxy_methods, /* tp_methods */
3597};
3598
3599static PyObject *
3600PicklerMemoProxy_New(PicklerObject *pickler)
3601{
3602 PicklerMemoProxyObject *self;
3603
3604 self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
3605 if (self == NULL)
3606 return NULL;
3607 Py_INCREF(pickler);
3608 self->pickler = pickler;
3609 PyObject_GC_Track(self);
3610 return (PyObject *)self;
3611}
3612
3613/*****************************************************************************/
3614
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003615static PyObject *
3616Pickler_get_memo(PicklerObject *self)
3617{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003618 return PicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003619}
3620
3621static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003622Pickler_set_memo(PicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003623{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003624 PyMemoTable *new_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003625
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003626 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003627 PyErr_SetString(PyExc_TypeError,
3628 "attribute deletion is not supported");
3629 return -1;
3630 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003631
3632 if (Py_TYPE(obj) == &PicklerMemoProxyType) {
3633 PicklerObject *pickler =
3634 ((PicklerMemoProxyObject *)obj)->pickler;
3635
3636 new_memo = PyMemoTable_Copy(pickler->memo);
3637 if (new_memo == NULL)
3638 return -1;
3639 }
3640 else if (PyDict_Check(obj)) {
3641 Py_ssize_t i = 0;
3642 PyObject *key, *value;
3643
3644 new_memo = PyMemoTable_New();
3645 if (new_memo == NULL)
3646 return -1;
3647
3648 while (PyDict_Next(obj, &i, &key, &value)) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003649 Py_ssize_t memo_id;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003650 PyObject *memo_obj;
3651
3652 if (!PyTuple_Check(value) || Py_SIZE(value) != 2) {
3653 PyErr_SetString(PyExc_TypeError,
3654 "'memo' values must be 2-item tuples");
3655 goto error;
3656 }
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003657 memo_id = PyLong_AsSsize_t(PyTuple_GET_ITEM(value, 0));
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003658 if (memo_id == -1 && PyErr_Occurred())
3659 goto error;
3660 memo_obj = PyTuple_GET_ITEM(value, 1);
3661 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
3662 goto error;
3663 }
3664 }
3665 else {
3666 PyErr_Format(PyExc_TypeError,
3667 "'memo' attribute must be an PicklerMemoProxy object"
3668 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003669 return -1;
3670 }
3671
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003672 PyMemoTable_Del(self->memo);
3673 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003674
3675 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003676
3677 error:
3678 if (new_memo)
3679 PyMemoTable_Del(new_memo);
3680 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003681}
3682
3683static PyObject *
3684Pickler_get_persid(PicklerObject *self)
3685{
3686 if (self->pers_func == NULL)
3687 PyErr_SetString(PyExc_AttributeError, "persistent_id");
3688 else
3689 Py_INCREF(self->pers_func);
3690 return self->pers_func;
3691}
3692
3693static int
3694Pickler_set_persid(PicklerObject *self, PyObject *value)
3695{
3696 PyObject *tmp;
3697
3698 if (value == NULL) {
3699 PyErr_SetString(PyExc_TypeError,
3700 "attribute deletion is not supported");
3701 return -1;
3702 }
3703 if (!PyCallable_Check(value)) {
3704 PyErr_SetString(PyExc_TypeError,
3705 "persistent_id must be a callable taking one argument");
3706 return -1;
3707 }
3708
3709 tmp = self->pers_func;
3710 Py_INCREF(value);
3711 self->pers_func = value;
3712 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
3713
3714 return 0;
3715}
3716
3717static PyMemberDef Pickler_members[] = {
3718 {"bin", T_INT, offsetof(PicklerObject, bin)},
3719 {"fast", T_INT, offsetof(PicklerObject, fast)},
3720 {NULL}
3721};
3722
3723static PyGetSetDef Pickler_getsets[] = {
3724 {"memo", (getter)Pickler_get_memo,
3725 (setter)Pickler_set_memo},
3726 {"persistent_id", (getter)Pickler_get_persid,
3727 (setter)Pickler_set_persid},
3728 {NULL}
3729};
3730
3731static PyTypeObject Pickler_Type = {
3732 PyVarObject_HEAD_INIT(NULL, 0)
3733 "_pickle.Pickler" , /*tp_name*/
3734 sizeof(PicklerObject), /*tp_basicsize*/
3735 0, /*tp_itemsize*/
3736 (destructor)Pickler_dealloc, /*tp_dealloc*/
3737 0, /*tp_print*/
3738 0, /*tp_getattr*/
3739 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00003740 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003741 0, /*tp_repr*/
3742 0, /*tp_as_number*/
3743 0, /*tp_as_sequence*/
3744 0, /*tp_as_mapping*/
3745 0, /*tp_hash*/
3746 0, /*tp_call*/
3747 0, /*tp_str*/
3748 0, /*tp_getattro*/
3749 0, /*tp_setattro*/
3750 0, /*tp_as_buffer*/
3751 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3752 Pickler_doc, /*tp_doc*/
3753 (traverseproc)Pickler_traverse, /*tp_traverse*/
3754 (inquiry)Pickler_clear, /*tp_clear*/
3755 0, /*tp_richcompare*/
3756 0, /*tp_weaklistoffset*/
3757 0, /*tp_iter*/
3758 0, /*tp_iternext*/
3759 Pickler_methods, /*tp_methods*/
3760 Pickler_members, /*tp_members*/
3761 Pickler_getsets, /*tp_getset*/
3762 0, /*tp_base*/
3763 0, /*tp_dict*/
3764 0, /*tp_descr_get*/
3765 0, /*tp_descr_set*/
3766 0, /*tp_dictoffset*/
3767 (initproc)Pickler_init, /*tp_init*/
3768 PyType_GenericAlloc, /*tp_alloc*/
3769 PyType_GenericNew, /*tp_new*/
3770 PyObject_GC_Del, /*tp_free*/
3771 0, /*tp_is_gc*/
3772};
3773
3774/* Temporary helper for calling self.find_class().
3775
3776 XXX: It would be nice to able to avoid Python function call overhead, by
3777 using directly the C version of find_class(), when find_class() is not
3778 overridden by a subclass. Although, this could become rather hackish. A
3779 simpler optimization would be to call the C function when self is not a
3780 subclass instance. */
3781static PyObject *
3782find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
3783{
3784 return PyObject_CallMethod((PyObject *)self, "find_class", "OO",
3785 module_name, global_name);
3786}
3787
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003788static Py_ssize_t
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003789marker(UnpicklerObject *self)
3790{
3791 if (self->num_marks < 1) {
3792 PyErr_SetString(UnpicklingError, "could not find MARK");
3793 return -1;
3794 }
3795
3796 return self->marks[--self->num_marks];
3797}
3798
3799static int
3800load_none(UnpicklerObject *self)
3801{
3802 PDATA_APPEND(self->stack, Py_None, -1);
3803 return 0;
3804}
3805
3806static int
3807bad_readline(void)
3808{
3809 PyErr_SetString(UnpicklingError, "pickle data was truncated");
3810 return -1;
3811}
3812
3813static int
3814load_int(UnpicklerObject *self)
3815{
3816 PyObject *value;
3817 char *endptr, *s;
3818 Py_ssize_t len;
3819 long x;
3820
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003821 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003822 return -1;
3823 if (len < 2)
3824 return bad_readline();
3825
3826 errno = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003827 /* XXX: Should the base argument of strtol() be explicitly set to 10?
3828 XXX(avassalotti): Should this uses PyOS_strtol()? */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003829 x = strtol(s, &endptr, 0);
3830
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003831 if (errno || (*endptr != '\n' && *endptr != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003832 /* Hm, maybe we've got something long. Let's try reading
3833 * it as a Python long object. */
3834 errno = 0;
3835 /* XXX: Same thing about the base here. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003836 value = PyLong_FromString(s, NULL, 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003837 if (value == NULL) {
3838 PyErr_SetString(PyExc_ValueError,
3839 "could not convert string to int");
3840 return -1;
3841 }
3842 }
3843 else {
3844 if (len == 3 && (x == 0 || x == 1)) {
3845 if ((value = PyBool_FromLong(x)) == NULL)
3846 return -1;
3847 }
3848 else {
3849 if ((value = PyLong_FromLong(x)) == NULL)
3850 return -1;
3851 }
3852 }
3853
3854 PDATA_PUSH(self->stack, value, -1);
3855 return 0;
3856}
3857
3858static int
3859load_bool(UnpicklerObject *self, PyObject *boolean)
3860{
3861 assert(boolean == Py_True || boolean == Py_False);
3862 PDATA_APPEND(self->stack, boolean, -1);
3863 return 0;
3864}
3865
Antoine Pitrou82be19f2011-08-29 23:09:33 +02003866/* s contains x bytes of an unsigned little-endian integer. Return its value
3867 * as a C Py_ssize_t, or -1 if it's higher than PY_SSIZE_T_MAX.
3868 */
3869static Py_ssize_t
3870calc_binsize(char *bytes, int size)
3871{
3872 unsigned char *s = (unsigned char *)bytes;
3873 size_t x = 0;
3874
3875 assert(size == 4);
3876
3877 x = (size_t) s[0];
3878 x |= (size_t) s[1] << 8;
3879 x |= (size_t) s[2] << 16;
3880 x |= (size_t) s[3] << 24;
3881
3882 if (x > PY_SSIZE_T_MAX)
3883 return -1;
3884 else
3885 return (Py_ssize_t) x;
3886}
3887
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003888/* s contains x bytes of a little-endian integer. Return its value as a
3889 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
3890 * int, but when x is 4 it's a signed one. This is an historical source
3891 * of x-platform bugs.
3892 */
3893static long
3894calc_binint(char *bytes, int size)
3895{
3896 unsigned char *s = (unsigned char *)bytes;
3897 int i = size;
3898 long x = 0;
3899
3900 for (i = 0; i < size; i++) {
3901 x |= (long)s[i] << (i * 8);
3902 }
3903
3904 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
3905 * is signed, so on a box with longs bigger than 4 bytes we need
3906 * to extend a BININT's sign bit to the full width.
3907 */
3908 if (SIZEOF_LONG > 4 && size == 4) {
3909 x |= -(x & (1L << 31));
3910 }
3911
3912 return x;
3913}
3914
3915static int
3916load_binintx(UnpicklerObject *self, char *s, int size)
3917{
3918 PyObject *value;
3919 long x;
3920
3921 x = calc_binint(s, size);
3922
3923 if ((value = PyLong_FromLong(x)) == NULL)
3924 return -1;
3925
3926 PDATA_PUSH(self->stack, value, -1);
3927 return 0;
3928}
3929
3930static int
3931load_binint(UnpicklerObject *self)
3932{
3933 char *s;
3934
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003935 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003936 return -1;
3937
3938 return load_binintx(self, s, 4);
3939}
3940
3941static int
3942load_binint1(UnpicklerObject *self)
3943{
3944 char *s;
3945
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003946 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003947 return -1;
3948
3949 return load_binintx(self, s, 1);
3950}
3951
3952static int
3953load_binint2(UnpicklerObject *self)
3954{
3955 char *s;
3956
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003957 if (_Unpickler_Read(self, &s, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003958 return -1;
3959
3960 return load_binintx(self, s, 2);
3961}
3962
3963static int
3964load_long(UnpicklerObject *self)
3965{
3966 PyObject *value;
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00003967 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003968 Py_ssize_t len;
3969
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003970 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003971 return -1;
3972 if (len < 2)
3973 return bad_readline();
3974
Mark Dickinson8dd05142009-01-20 20:43:58 +00003975 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
3976 the 'L' before calling PyLong_FromString. In order to maintain
3977 compatibility with Python 3.0.0, we don't actually *require*
3978 the 'L' to be present. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003979 if (s[len-2] == 'L')
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00003980 s[len-2] = '\0';
Alexandre Vassalottie4bccb72009-01-24 01:47:57 +00003981 /* XXX: Should the base argument explicitly set to 10? */
3982 value = PyLong_FromString(s, NULL, 0);
Mark Dickinson8dd05142009-01-20 20:43:58 +00003983 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003984 return -1;
3985
3986 PDATA_PUSH(self->stack, value, -1);
3987 return 0;
3988}
3989
3990/* 'size' bytes contain the # of bytes of little-endian 256's-complement
3991 * data following.
3992 */
3993static int
3994load_counted_long(UnpicklerObject *self, int size)
3995{
3996 PyObject *value;
3997 char *nbytes;
3998 char *pdata;
3999
4000 assert(size == 1 || size == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004001 if (_Unpickler_Read(self, &nbytes, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004002 return -1;
4003
4004 size = calc_binint(nbytes, size);
4005 if (size < 0) {
4006 /* Corrupt or hostile pickle -- we never write one like this */
4007 PyErr_SetString(UnpicklingError,
4008 "LONG pickle has negative byte count");
4009 return -1;
4010 }
4011
4012 if (size == 0)
4013 value = PyLong_FromLong(0L);
4014 else {
4015 /* Read the raw little-endian bytes and convert. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004016 if (_Unpickler_Read(self, &pdata, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004017 return -1;
4018 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
4019 1 /* little endian */ , 1 /* signed */ );
4020 }
4021 if (value == NULL)
4022 return -1;
4023 PDATA_PUSH(self->stack, value, -1);
4024 return 0;
4025}
4026
4027static int
4028load_float(UnpicklerObject *self)
4029{
4030 PyObject *value;
4031 char *endptr, *s;
4032 Py_ssize_t len;
4033 double d;
4034
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004035 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004036 return -1;
4037 if (len < 2)
4038 return bad_readline();
4039
4040 errno = 0;
Mark Dickinson725bfd82009-05-03 20:33:40 +00004041 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
4042 if (d == -1.0 && PyErr_Occurred())
4043 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004044 if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004045 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
4046 return -1;
4047 }
Mark Dickinson725bfd82009-05-03 20:33:40 +00004048 value = PyFloat_FromDouble(d);
4049 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004050 return -1;
4051
4052 PDATA_PUSH(self->stack, value, -1);
4053 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004054}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004055
4056static int
4057load_binfloat(UnpicklerObject *self)
4058{
4059 PyObject *value;
4060 double x;
4061 char *s;
4062
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004063 if (_Unpickler_Read(self, &s, 8) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004064 return -1;
4065
4066 x = _PyFloat_Unpack8((unsigned char *)s, 0);
4067 if (x == -1.0 && PyErr_Occurred())
4068 return -1;
4069
4070 if ((value = PyFloat_FromDouble(x)) == NULL)
4071 return -1;
4072
4073 PDATA_PUSH(self->stack, value, -1);
4074 return 0;
4075}
4076
4077static int
4078load_string(UnpicklerObject *self)
4079{
4080 PyObject *bytes;
4081 PyObject *str = NULL;
4082 Py_ssize_t len;
4083 char *s, *p;
4084
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004085 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004086 return -1;
4087 if (len < 3)
4088 return bad_readline();
4089 if ((s = strdup(s)) == NULL) {
4090 PyErr_NoMemory();
4091 return -1;
4092 }
4093
4094 /* Strip outermost quotes */
4095 while (s[len - 1] <= ' ')
4096 len--;
4097 if (s[0] == '"' && s[len - 1] == '"') {
4098 s[len - 1] = '\0';
4099 p = s + 1;
4100 len -= 2;
4101 }
4102 else if (s[0] == '\'' && s[len - 1] == '\'') {
4103 s[len - 1] = '\0';
4104 p = s + 1;
4105 len -= 2;
4106 }
4107 else {
4108 free(s);
4109 PyErr_SetString(PyExc_ValueError, "insecure string pickle");
4110 return -1;
4111 }
4112
4113 /* Use the PyBytes API to decode the string, since that is what is used
4114 to encode, and then coerce the result to Unicode. */
4115 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
4116 free(s);
4117 if (bytes == NULL)
4118 return -1;
4119 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
4120 Py_DECREF(bytes);
4121 if (str == NULL)
4122 return -1;
4123
4124 PDATA_PUSH(self->stack, str, -1);
4125 return 0;
4126}
4127
4128static int
4129load_binbytes(UnpicklerObject *self)
4130{
4131 PyObject *bytes;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004132 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004133 char *s;
4134
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004135 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004136 return -1;
4137
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004138 x = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004139 if (x < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004140 PyErr_Format(PyExc_OverflowError,
4141 "BINBYTES exceeds system's maximum size of %zd bytes",
4142 PY_SSIZE_T_MAX
4143 );
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004144 return -1;
4145 }
4146
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004147 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004148 return -1;
4149 bytes = PyBytes_FromStringAndSize(s, x);
4150 if (bytes == NULL)
4151 return -1;
4152
4153 PDATA_PUSH(self->stack, bytes, -1);
4154 return 0;
4155}
4156
4157static int
4158load_short_binbytes(UnpicklerObject *self)
4159{
4160 PyObject *bytes;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004161 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004162 char *s;
4163
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004164 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004165 return -1;
4166
4167 x = (unsigned char)s[0];
4168
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004169 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004170 return -1;
4171
4172 bytes = PyBytes_FromStringAndSize(s, x);
4173 if (bytes == NULL)
4174 return -1;
4175
4176 PDATA_PUSH(self->stack, bytes, -1);
4177 return 0;
4178}
4179
4180static int
4181load_binstring(UnpicklerObject *self)
4182{
4183 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004184 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004185 char *s;
4186
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004187 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004188 return -1;
4189
4190 x = calc_binint(s, 4);
4191 if (x < 0) {
4192 PyErr_SetString(UnpicklingError,
4193 "BINSTRING pickle has negative byte count");
4194 return -1;
4195 }
4196
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004197 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004198 return -1;
4199
4200 /* Convert Python 2.x strings to unicode. */
4201 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4202 if (str == NULL)
4203 return -1;
4204
4205 PDATA_PUSH(self->stack, str, -1);
4206 return 0;
4207}
4208
4209static int
4210load_short_binstring(UnpicklerObject *self)
4211{
4212 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004213 Py_ssize_t x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004214 char *s;
4215
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004216 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004217 return -1;
4218
4219 x = (unsigned char)s[0];
4220
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004221 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004222 return -1;
4223
4224 /* Convert Python 2.x strings to unicode. */
4225 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4226 if (str == NULL)
4227 return -1;
4228
4229 PDATA_PUSH(self->stack, str, -1);
4230 return 0;
4231}
4232
4233static int
4234load_unicode(UnpicklerObject *self)
4235{
4236 PyObject *str;
4237 Py_ssize_t len;
4238 char *s;
4239
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004240 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004241 return -1;
4242 if (len < 1)
4243 return bad_readline();
4244
4245 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
4246 if (str == NULL)
4247 return -1;
4248
4249 PDATA_PUSH(self->stack, str, -1);
4250 return 0;
4251}
4252
4253static int
4254load_binunicode(UnpicklerObject *self)
4255{
4256 PyObject *str;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004257 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004258 char *s;
4259
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004260 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004261 return -1;
4262
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004263 size = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004264 if (size < 0) {
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004265 PyErr_Format(PyExc_OverflowError,
4266 "BINUNICODE exceeds system's maximum size of %zd bytes",
4267 PY_SSIZE_T_MAX
4268 );
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004269 return -1;
4270 }
4271
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004272
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004273 if (_Unpickler_Read(self, &s, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004274 return -1;
4275
Victor Stinner485fb562010-04-13 11:07:24 +00004276 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004277 if (str == NULL)
4278 return -1;
4279
4280 PDATA_PUSH(self->stack, str, -1);
4281 return 0;
4282}
4283
4284static int
4285load_tuple(UnpicklerObject *self)
4286{
4287 PyObject *tuple;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004288 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004289
4290 if ((i = marker(self)) < 0)
4291 return -1;
4292
4293 tuple = Pdata_poptuple(self->stack, i);
4294 if (tuple == NULL)
4295 return -1;
4296 PDATA_PUSH(self->stack, tuple, -1);
4297 return 0;
4298}
4299
4300static int
4301load_counted_tuple(UnpicklerObject *self, int len)
4302{
4303 PyObject *tuple;
4304
4305 tuple = PyTuple_New(len);
4306 if (tuple == NULL)
4307 return -1;
4308
4309 while (--len >= 0) {
4310 PyObject *item;
4311
4312 PDATA_POP(self->stack, item);
4313 if (item == NULL)
4314 return -1;
4315 PyTuple_SET_ITEM(tuple, len, item);
4316 }
4317 PDATA_PUSH(self->stack, tuple, -1);
4318 return 0;
4319}
4320
4321static int
4322load_empty_list(UnpicklerObject *self)
4323{
4324 PyObject *list;
4325
4326 if ((list = PyList_New(0)) == NULL)
4327 return -1;
4328 PDATA_PUSH(self->stack, list, -1);
4329 return 0;
4330}
4331
4332static int
4333load_empty_dict(UnpicklerObject *self)
4334{
4335 PyObject *dict;
4336
4337 if ((dict = PyDict_New()) == NULL)
4338 return -1;
4339 PDATA_PUSH(self->stack, dict, -1);
4340 return 0;
4341}
4342
4343static int
4344load_list(UnpicklerObject *self)
4345{
4346 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004347 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004348
4349 if ((i = marker(self)) < 0)
4350 return -1;
4351
4352 list = Pdata_poplist(self->stack, i);
4353 if (list == NULL)
4354 return -1;
4355 PDATA_PUSH(self->stack, list, -1);
4356 return 0;
4357}
4358
4359static int
4360load_dict(UnpicklerObject *self)
4361{
4362 PyObject *dict, *key, *value;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004363 Py_ssize_t i, j, k;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004364
4365 if ((i = marker(self)) < 0)
4366 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004367 j = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004368
4369 if ((dict = PyDict_New()) == NULL)
4370 return -1;
4371
4372 for (k = i + 1; k < j; k += 2) {
4373 key = self->stack->data[k - 1];
4374 value = self->stack->data[k];
4375 if (PyDict_SetItem(dict, key, value) < 0) {
4376 Py_DECREF(dict);
4377 return -1;
4378 }
4379 }
4380 Pdata_clear(self->stack, i);
4381 PDATA_PUSH(self->stack, dict, -1);
4382 return 0;
4383}
4384
4385static PyObject *
4386instantiate(PyObject *cls, PyObject *args)
4387{
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004388 PyObject *result = NULL;
4389 /* Caller must assure args are a tuple. Normally, args come from
4390 Pdata_poptuple which packs objects from the top of the stack
4391 into a newly created tuple. */
4392 assert(PyTuple_Check(args));
4393 if (Py_SIZE(args) > 0 || !PyType_Check(cls) ||
4394 PyObject_HasAttrString(cls, "__getinitargs__")) {
4395 result = PyObject_CallObject(cls, args);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004396 }
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004397 else {
4398 result = PyObject_CallMethod(cls, "__new__", "O", cls);
4399 }
4400 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004401}
4402
4403static int
4404load_obj(UnpicklerObject *self)
4405{
4406 PyObject *cls, *args, *obj = NULL;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004407 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004408
4409 if ((i = marker(self)) < 0)
4410 return -1;
4411
4412 args = Pdata_poptuple(self->stack, i + 1);
4413 if (args == NULL)
4414 return -1;
4415
4416 PDATA_POP(self->stack, cls);
4417 if (cls) {
4418 obj = instantiate(cls, args);
4419 Py_DECREF(cls);
4420 }
4421 Py_DECREF(args);
4422 if (obj == NULL)
4423 return -1;
4424
4425 PDATA_PUSH(self->stack, obj, -1);
4426 return 0;
4427}
4428
4429static int
4430load_inst(UnpicklerObject *self)
4431{
4432 PyObject *cls = NULL;
4433 PyObject *args = NULL;
4434 PyObject *obj = NULL;
4435 PyObject *module_name;
4436 PyObject *class_name;
4437 Py_ssize_t len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004438 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004439 char *s;
4440
4441 if ((i = marker(self)) < 0)
4442 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004443 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004444 return -1;
4445 if (len < 2)
4446 return bad_readline();
4447
4448 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
4449 identifiers are permitted in Python 3.0, since the INST opcode is only
4450 supported by older protocols on Python 2.x. */
4451 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
4452 if (module_name == NULL)
4453 return -1;
4454
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004455 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004456 if (len < 2)
4457 return bad_readline();
4458 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004459 if (class_name != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004460 cls = find_class(self, module_name, class_name);
4461 Py_DECREF(class_name);
4462 }
4463 }
4464 Py_DECREF(module_name);
4465
4466 if (cls == NULL)
4467 return -1;
4468
4469 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
4470 obj = instantiate(cls, args);
4471 Py_DECREF(args);
4472 }
4473 Py_DECREF(cls);
4474
4475 if (obj == NULL)
4476 return -1;
4477
4478 PDATA_PUSH(self->stack, obj, -1);
4479 return 0;
4480}
4481
4482static int
4483load_newobj(UnpicklerObject *self)
4484{
4485 PyObject *args = NULL;
4486 PyObject *clsraw = NULL;
4487 PyTypeObject *cls; /* clsraw cast to its true type */
4488 PyObject *obj;
4489
4490 /* Stack is ... cls argtuple, and we want to call
4491 * cls.__new__(cls, *argtuple).
4492 */
4493 PDATA_POP(self->stack, args);
4494 if (args == NULL)
4495 goto error;
4496 if (!PyTuple_Check(args)) {
4497 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
4498 goto error;
4499 }
4500
4501 PDATA_POP(self->stack, clsraw);
4502 cls = (PyTypeObject *)clsraw;
4503 if (cls == NULL)
4504 goto error;
4505 if (!PyType_Check(cls)) {
4506 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4507 "isn't a type object");
4508 goto error;
4509 }
4510 if (cls->tp_new == NULL) {
4511 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4512 "has NULL tp_new");
4513 goto error;
4514 }
4515
4516 /* Call __new__. */
4517 obj = cls->tp_new(cls, args, NULL);
4518 if (obj == NULL)
4519 goto error;
4520
4521 Py_DECREF(args);
4522 Py_DECREF(clsraw);
4523 PDATA_PUSH(self->stack, obj, -1);
4524 return 0;
4525
4526 error:
4527 Py_XDECREF(args);
4528 Py_XDECREF(clsraw);
4529 return -1;
4530}
4531
4532static int
4533load_global(UnpicklerObject *self)
4534{
4535 PyObject *global = NULL;
4536 PyObject *module_name;
4537 PyObject *global_name;
4538 Py_ssize_t len;
4539 char *s;
4540
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004541 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004542 return -1;
4543 if (len < 2)
4544 return bad_readline();
4545 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4546 if (!module_name)
4547 return -1;
4548
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004549 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004550 if (len < 2) {
4551 Py_DECREF(module_name);
4552 return bad_readline();
4553 }
4554 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4555 if (global_name) {
4556 global = find_class(self, module_name, global_name);
4557 Py_DECREF(global_name);
4558 }
4559 }
4560 Py_DECREF(module_name);
4561
4562 if (global == NULL)
4563 return -1;
4564 PDATA_PUSH(self->stack, global, -1);
4565 return 0;
4566}
4567
4568static int
4569load_persid(UnpicklerObject *self)
4570{
4571 PyObject *pid;
4572 Py_ssize_t len;
4573 char *s;
4574
4575 if (self->pers_func) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004576 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004577 return -1;
4578 if (len < 2)
4579 return bad_readline();
4580
4581 pid = PyBytes_FromStringAndSize(s, len - 1);
4582 if (pid == NULL)
4583 return -1;
4584
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004585 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004586 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004587 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004588 if (pid == NULL)
4589 return -1;
4590
4591 PDATA_PUSH(self->stack, pid, -1);
4592 return 0;
4593 }
4594 else {
4595 PyErr_SetString(UnpicklingError,
4596 "A load persistent id instruction was encountered,\n"
4597 "but no persistent_load function was specified.");
4598 return -1;
4599 }
4600}
4601
4602static int
4603load_binpersid(UnpicklerObject *self)
4604{
4605 PyObject *pid;
4606
4607 if (self->pers_func) {
4608 PDATA_POP(self->stack, pid);
4609 if (pid == NULL)
4610 return -1;
4611
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004612 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004613 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004614 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004615 if (pid == NULL)
4616 return -1;
4617
4618 PDATA_PUSH(self->stack, pid, -1);
4619 return 0;
4620 }
4621 else {
4622 PyErr_SetString(UnpicklingError,
4623 "A load persistent id instruction was encountered,\n"
4624 "but no persistent_load function was specified.");
4625 return -1;
4626 }
4627}
4628
4629static int
4630load_pop(UnpicklerObject *self)
4631{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004632 Py_ssize_t len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004633
4634 /* Note that we split the (pickle.py) stack into two stacks,
4635 * an object stack and a mark stack. We have to be clever and
4636 * pop the right one. We do this by looking at the top of the
Collin Winter8ca69de2009-05-26 16:53:41 +00004637 * mark stack first, and only signalling a stack underflow if
4638 * the object stack is empty and the mark stack doesn't match
4639 * our expectations.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004640 */
Collin Winter8ca69de2009-05-26 16:53:41 +00004641 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004642 self->num_marks--;
Antoine Pitrou01a15ea2010-01-07 17:57:31 +00004643 } else if (len > 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004644 len--;
4645 Py_DECREF(self->stack->data[len]);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004646 Py_SIZE(self->stack) = len;
Collin Winter8ca69de2009-05-26 16:53:41 +00004647 } else {
4648 return stack_underflow();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004649 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004650 return 0;
4651}
4652
4653static int
4654load_pop_mark(UnpicklerObject *self)
4655{
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004656 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004657
4658 if ((i = marker(self)) < 0)
4659 return -1;
4660
4661 Pdata_clear(self->stack, i);
4662
4663 return 0;
4664}
4665
4666static int
4667load_dup(UnpicklerObject *self)
4668{
4669 PyObject *last;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004670 Py_ssize_t len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004671
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004672 if ((len = Py_SIZE(self->stack)) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004673 return stack_underflow();
4674 last = self->stack->data[len - 1];
4675 PDATA_APPEND(self->stack, last, -1);
4676 return 0;
4677}
4678
4679static int
4680load_get(UnpicklerObject *self)
4681{
4682 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004683 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004684 Py_ssize_t len;
4685 char *s;
4686
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004687 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004688 return -1;
4689 if (len < 2)
4690 return bad_readline();
4691
4692 key = PyLong_FromString(s, NULL, 10);
4693 if (key == NULL)
4694 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004695 idx = PyLong_AsSsize_t(key);
4696 if (idx == -1 && PyErr_Occurred()) {
4697 Py_DECREF(key);
4698 return -1;
4699 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004700
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004701 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004702 if (value == NULL) {
4703 if (!PyErr_Occurred())
4704 PyErr_SetObject(PyExc_KeyError, key);
4705 Py_DECREF(key);
4706 return -1;
4707 }
4708 Py_DECREF(key);
4709
4710 PDATA_APPEND(self->stack, value, -1);
4711 return 0;
4712}
4713
4714static int
4715load_binget(UnpicklerObject *self)
4716{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004717 PyObject *value;
4718 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004719 char *s;
4720
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004721 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004722 return -1;
4723
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004724 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004725
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004726 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004727 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004728 PyObject *key = PyLong_FromSsize_t(idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004729 if (!PyErr_Occurred())
4730 PyErr_SetObject(PyExc_KeyError, key);
4731 Py_DECREF(key);
4732 return -1;
4733 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004734
4735 PDATA_APPEND(self->stack, value, -1);
4736 return 0;
4737}
4738
4739static int
4740load_long_binget(UnpicklerObject *self)
4741{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004742 PyObject *value;
4743 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004744 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004745
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004746 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004747 return -1;
4748
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004749 idx = calc_binsize(s, 4);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004750
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004751 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004752 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004753 PyObject *key = PyLong_FromSsize_t(idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004754 if (!PyErr_Occurred())
4755 PyErr_SetObject(PyExc_KeyError, key);
4756 Py_DECREF(key);
4757 return -1;
4758 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004759
4760 PDATA_APPEND(self->stack, value, -1);
4761 return 0;
4762}
4763
4764/* Push an object from the extension registry (EXT[124]). nbytes is
4765 * the number of bytes following the opcode, holding the index (code) value.
4766 */
4767static int
4768load_extension(UnpicklerObject *self, int nbytes)
4769{
4770 char *codebytes; /* the nbytes bytes after the opcode */
4771 long code; /* calc_binint returns long */
4772 PyObject *py_code; /* code as a Python int */
4773 PyObject *obj; /* the object to push */
4774 PyObject *pair; /* (module_name, class_name) */
4775 PyObject *module_name, *class_name;
4776
4777 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004778 if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004779 return -1;
4780 code = calc_binint(codebytes, nbytes);
4781 if (code <= 0) { /* note that 0 is forbidden */
4782 /* Corrupt or hostile pickle. */
4783 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
4784 return -1;
4785 }
4786
4787 /* Look for the code in the cache. */
4788 py_code = PyLong_FromLong(code);
4789 if (py_code == NULL)
4790 return -1;
4791 obj = PyDict_GetItem(extension_cache, py_code);
4792 if (obj != NULL) {
4793 /* Bingo. */
4794 Py_DECREF(py_code);
4795 PDATA_APPEND(self->stack, obj, -1);
4796 return 0;
4797 }
4798
4799 /* Look up the (module_name, class_name) pair. */
4800 pair = PyDict_GetItem(inverted_registry, py_code);
4801 if (pair == NULL) {
4802 Py_DECREF(py_code);
4803 PyErr_Format(PyExc_ValueError, "unregistered extension "
4804 "code %ld", code);
4805 return -1;
4806 }
4807 /* Since the extension registry is manipulable via Python code,
4808 * confirm that pair is really a 2-tuple of strings.
4809 */
4810 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
4811 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
4812 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
4813 Py_DECREF(py_code);
4814 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
4815 "isn't a 2-tuple of strings", code);
4816 return -1;
4817 }
4818 /* Load the object. */
4819 obj = find_class(self, module_name, class_name);
4820 if (obj == NULL) {
4821 Py_DECREF(py_code);
4822 return -1;
4823 }
4824 /* Cache code -> obj. */
4825 code = PyDict_SetItem(extension_cache, py_code, obj);
4826 Py_DECREF(py_code);
4827 if (code < 0) {
4828 Py_DECREF(obj);
4829 return -1;
4830 }
4831 PDATA_PUSH(self->stack, obj, -1);
4832 return 0;
4833}
4834
4835static int
4836load_put(UnpicklerObject *self)
4837{
4838 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004839 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004840 Py_ssize_t len;
4841 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004842
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004843 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004844 return -1;
4845 if (len < 2)
4846 return bad_readline();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004847 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004848 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004849 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004850
4851 key = PyLong_FromString(s, NULL, 10);
4852 if (key == NULL)
4853 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004854 idx = PyLong_AsSsize_t(key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004855 Py_DECREF(key);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004856 if (idx < 0) {
4857 if (!PyErr_Occurred())
4858 PyErr_SetString(PyExc_ValueError,
4859 "negative PUT argument");
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004860 return -1;
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004861 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004862
4863 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004864}
4865
4866static int
4867load_binput(UnpicklerObject *self)
4868{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004869 PyObject *value;
4870 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004871 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004872
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004873 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004874 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004875
4876 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004877 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004878 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004879
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004880 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004881
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004882 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004883}
4884
4885static int
4886load_long_binput(UnpicklerObject *self)
4887{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004888 PyObject *value;
4889 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004890 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004891
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004892 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004893 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004894
4895 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004896 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004897 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004898
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004899 idx = calc_binsize(s, 4);
Antoine Pitrou55549ec2011-08-30 00:27:10 +02004900 if (idx < 0) {
4901 PyErr_SetString(PyExc_ValueError,
4902 "negative LONG_BINPUT argument");
4903 return -1;
4904 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004905
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004906 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004907}
4908
4909static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004910do_append(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004911{
4912 PyObject *value;
4913 PyObject *list;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004914 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004915
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004916 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004917 if (x > len || x <= 0)
4918 return stack_underflow();
4919 if (len == x) /* nothing to do */
4920 return 0;
4921
4922 list = self->stack->data[x - 1];
4923
4924 if (PyList_Check(list)) {
4925 PyObject *slice;
4926 Py_ssize_t list_len;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004927 int ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004928
4929 slice = Pdata_poplist(self->stack, x);
4930 if (!slice)
4931 return -1;
4932 list_len = PyList_GET_SIZE(list);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004933 ret = PyList_SetSlice(list, list_len, list_len, slice);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004934 Py_DECREF(slice);
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004935 return ret;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004936 }
4937 else {
4938 PyObject *append_func;
4939
4940 append_func = PyObject_GetAttrString(list, "append");
4941 if (append_func == NULL)
4942 return -1;
4943 for (i = x; i < len; i++) {
4944 PyObject *result;
4945
4946 value = self->stack->data[i];
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004947 result = _Unpickler_FastCall(self, append_func, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004948 if (result == NULL) {
4949 Pdata_clear(self->stack, i + 1);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004950 Py_SIZE(self->stack) = x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004951 return -1;
4952 }
4953 Py_DECREF(result);
4954 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004955 Py_SIZE(self->stack) = x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004956 }
4957
4958 return 0;
4959}
4960
4961static int
4962load_append(UnpicklerObject *self)
4963{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004964 return do_append(self, Py_SIZE(self->stack) - 1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004965}
4966
4967static int
4968load_appends(UnpicklerObject *self)
4969{
4970 return do_append(self, marker(self));
4971}
4972
4973static int
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004974do_setitems(UnpicklerObject *self, Py_ssize_t x)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004975{
4976 PyObject *value, *key;
4977 PyObject *dict;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02004978 Py_ssize_t len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004979 int status = 0;
4980
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004981 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004982 if (x > len || x <= 0)
4983 return stack_underflow();
4984 if (len == x) /* nothing to do */
4985 return 0;
4986 if ((len - x) % 2 != 0) {
4987 /* Currupt or hostile pickle -- we never write one like this. */
4988 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
4989 return -1;
4990 }
4991
4992 /* Here, dict does not actually need to be a PyDict; it could be anything
4993 that supports the __setitem__ attribute. */
4994 dict = self->stack->data[x - 1];
4995
4996 for (i = x + 1; i < len; i += 2) {
4997 key = self->stack->data[i - 1];
4998 value = self->stack->data[i];
4999 if (PyObject_SetItem(dict, key, value) < 0) {
5000 status = -1;
5001 break;
5002 }
5003 }
5004
5005 Pdata_clear(self->stack, x);
5006 return status;
5007}
5008
5009static int
5010load_setitem(UnpicklerObject *self)
5011{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005012 return do_setitems(self, Py_SIZE(self->stack) - 2);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005013}
5014
5015static int
5016load_setitems(UnpicklerObject *self)
5017{
5018 return do_setitems(self, marker(self));
5019}
5020
5021static int
5022load_build(UnpicklerObject *self)
5023{
5024 PyObject *state, *inst, *slotstate;
5025 PyObject *setstate;
5026 int status = 0;
5027
5028 /* Stack is ... instance, state. We want to leave instance at
5029 * the stack top, possibly mutated via instance.__setstate__(state).
5030 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005031 if (Py_SIZE(self->stack) < 2)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005032 return stack_underflow();
5033
5034 PDATA_POP(self->stack, state);
5035 if (state == NULL)
5036 return -1;
5037
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005038 inst = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005039
5040 setstate = PyObject_GetAttrString(inst, "__setstate__");
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005041 if (setstate == NULL) {
5042 if (PyErr_ExceptionMatches(PyExc_AttributeError))
5043 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00005044 else {
5045 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00005046 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00005047 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005048 }
5049 else {
5050 PyObject *result;
5051
5052 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005053 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Antoine Pitroud79dc622008-09-05 00:03:33 +00005054 reference to state first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005055 result = _Unpickler_FastCall(self, setstate, state);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005056 Py_DECREF(setstate);
5057 if (result == NULL)
5058 return -1;
5059 Py_DECREF(result);
5060 return 0;
5061 }
5062
5063 /* A default __setstate__. First see whether state embeds a
5064 * slot state dict too (a proto 2 addition).
5065 */
5066 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
5067 PyObject *tmp = state;
5068
5069 state = PyTuple_GET_ITEM(tmp, 0);
5070 slotstate = PyTuple_GET_ITEM(tmp, 1);
5071 Py_INCREF(state);
5072 Py_INCREF(slotstate);
5073 Py_DECREF(tmp);
5074 }
5075 else
5076 slotstate = NULL;
5077
5078 /* Set inst.__dict__ from the state dict (if any). */
5079 if (state != Py_None) {
5080 PyObject *dict;
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005081 PyObject *d_key, *d_value;
5082 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005083
5084 if (!PyDict_Check(state)) {
5085 PyErr_SetString(UnpicklingError, "state is not a dictionary");
5086 goto error;
5087 }
5088 dict = PyObject_GetAttrString(inst, "__dict__");
5089 if (dict == NULL)
5090 goto error;
5091
Antoine Pitroua9f48a02009-05-02 21:41:14 +00005092 i = 0;
5093 while (PyDict_Next(state, &i, &d_key, &d_value)) {
5094 /* normally the keys for instance attributes are
5095 interned. we should try to do that here. */
5096 Py_INCREF(d_key);
5097 if (PyUnicode_CheckExact(d_key))
5098 PyUnicode_InternInPlace(&d_key);
5099 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
5100 Py_DECREF(d_key);
5101 goto error;
5102 }
5103 Py_DECREF(d_key);
5104 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005105 Py_DECREF(dict);
5106 }
5107
5108 /* Also set instance attributes from the slotstate dict (if any). */
5109 if (slotstate != NULL) {
5110 PyObject *d_key, *d_value;
5111 Py_ssize_t i;
5112
5113 if (!PyDict_Check(slotstate)) {
5114 PyErr_SetString(UnpicklingError,
5115 "slot state is not a dictionary");
5116 goto error;
5117 }
5118 i = 0;
5119 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
5120 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
5121 goto error;
5122 }
5123 }
5124
5125 if (0) {
5126 error:
5127 status = -1;
5128 }
5129
5130 Py_DECREF(state);
5131 Py_XDECREF(slotstate);
5132 return status;
5133}
5134
5135static int
5136load_mark(UnpicklerObject *self)
5137{
5138
5139 /* Note that we split the (pickle.py) stack into two stacks, an
5140 * object stack and a mark stack. Here we push a mark onto the
5141 * mark stack.
5142 */
5143
5144 if ((self->num_marks + 1) >= self->marks_size) {
5145 size_t alloc;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005146 Py_ssize_t *marks;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005147
5148 /* Use the size_t type to check for overflow. */
5149 alloc = ((size_t)self->num_marks << 1) + 20;
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005150 if (alloc > (PY_SSIZE_T_MAX / sizeof(Py_ssize_t)) ||
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00005151 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005152 PyErr_NoMemory();
5153 return -1;
5154 }
5155
5156 if (self->marks == NULL)
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005157 marks = (Py_ssize_t *) PyMem_Malloc(alloc * sizeof(Py_ssize_t));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005158 else
Antoine Pitrou82be19f2011-08-29 23:09:33 +02005159 marks = (Py_ssize_t *) PyMem_Realloc(self->marks,
5160 alloc * sizeof(Py_ssize_t));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005161 if (marks == NULL) {
5162 PyErr_NoMemory();
5163 return -1;
5164 }
5165 self->marks = marks;
5166 self->marks_size = (Py_ssize_t)alloc;
5167 }
5168
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005169 self->marks[self->num_marks++] = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005170
5171 return 0;
5172}
5173
5174static int
5175load_reduce(UnpicklerObject *self)
5176{
5177 PyObject *callable = NULL;
5178 PyObject *argtup = NULL;
5179 PyObject *obj = NULL;
5180
5181 PDATA_POP(self->stack, argtup);
5182 if (argtup == NULL)
5183 return -1;
5184 PDATA_POP(self->stack, callable);
5185 if (callable) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005186 obj = PyObject_CallObject(callable, argtup);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005187 Py_DECREF(callable);
5188 }
5189 Py_DECREF(argtup);
5190
5191 if (obj == NULL)
5192 return -1;
5193
5194 PDATA_PUSH(self->stack, obj, -1);
5195 return 0;
5196}
5197
5198/* Just raises an error if we don't know the protocol specified. PROTO
5199 * is the first opcode for protocols >= 2.
5200 */
5201static int
5202load_proto(UnpicklerObject *self)
5203{
5204 char *s;
5205 int i;
5206
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005207 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005208 return -1;
5209
5210 i = (unsigned char)s[0];
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005211 if (i <= HIGHEST_PROTOCOL) {
5212 self->proto = i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005213 return 0;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005214 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005215
5216 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
5217 return -1;
5218}
5219
5220static PyObject *
5221load(UnpicklerObject *self)
5222{
5223 PyObject *err;
5224 PyObject *value = NULL;
5225 char *s;
5226
5227 self->num_marks = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005228 if (Py_SIZE(self->stack))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005229 Pdata_clear(self->stack, 0);
5230
5231 /* Convenient macros for the dispatch while-switch loop just below. */
5232#define OP(opcode, load_func) \
5233 case opcode: if (load_func(self) < 0) break; continue;
5234
5235#define OP_ARG(opcode, load_func, arg) \
5236 case opcode: if (load_func(self, (arg)) < 0) break; continue;
5237
5238 while (1) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005239 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005240 break;
5241
5242 switch ((enum opcode)s[0]) {
5243 OP(NONE, load_none)
5244 OP(BININT, load_binint)
5245 OP(BININT1, load_binint1)
5246 OP(BININT2, load_binint2)
5247 OP(INT, load_int)
5248 OP(LONG, load_long)
5249 OP_ARG(LONG1, load_counted_long, 1)
5250 OP_ARG(LONG4, load_counted_long, 4)
5251 OP(FLOAT, load_float)
5252 OP(BINFLOAT, load_binfloat)
5253 OP(BINBYTES, load_binbytes)
5254 OP(SHORT_BINBYTES, load_short_binbytes)
5255 OP(BINSTRING, load_binstring)
5256 OP(SHORT_BINSTRING, load_short_binstring)
5257 OP(STRING, load_string)
5258 OP(UNICODE, load_unicode)
5259 OP(BINUNICODE, load_binunicode)
5260 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
5261 OP_ARG(TUPLE1, load_counted_tuple, 1)
5262 OP_ARG(TUPLE2, load_counted_tuple, 2)
5263 OP_ARG(TUPLE3, load_counted_tuple, 3)
5264 OP(TUPLE, load_tuple)
5265 OP(EMPTY_LIST, load_empty_list)
5266 OP(LIST, load_list)
5267 OP(EMPTY_DICT, load_empty_dict)
5268 OP(DICT, load_dict)
5269 OP(OBJ, load_obj)
5270 OP(INST, load_inst)
5271 OP(NEWOBJ, load_newobj)
5272 OP(GLOBAL, load_global)
5273 OP(APPEND, load_append)
5274 OP(APPENDS, load_appends)
5275 OP(BUILD, load_build)
5276 OP(DUP, load_dup)
5277 OP(BINGET, load_binget)
5278 OP(LONG_BINGET, load_long_binget)
5279 OP(GET, load_get)
5280 OP(MARK, load_mark)
5281 OP(BINPUT, load_binput)
5282 OP(LONG_BINPUT, load_long_binput)
5283 OP(PUT, load_put)
5284 OP(POP, load_pop)
5285 OP(POP_MARK, load_pop_mark)
5286 OP(SETITEM, load_setitem)
5287 OP(SETITEMS, load_setitems)
5288 OP(PERSID, load_persid)
5289 OP(BINPERSID, load_binpersid)
5290 OP(REDUCE, load_reduce)
5291 OP(PROTO, load_proto)
5292 OP_ARG(EXT1, load_extension, 1)
5293 OP_ARG(EXT2, load_extension, 2)
5294 OP_ARG(EXT4, load_extension, 4)
5295 OP_ARG(NEWTRUE, load_bool, Py_True)
5296 OP_ARG(NEWFALSE, load_bool, Py_False)
5297
5298 case STOP:
5299 break;
5300
5301 case '\0':
5302 PyErr_SetNone(PyExc_EOFError);
5303 return NULL;
5304
5305 default:
5306 PyErr_Format(UnpicklingError,
5307 "invalid load key, '%c'.", s[0]);
5308 return NULL;
5309 }
5310
5311 break; /* and we are done! */
5312 }
5313
Antoine Pitrou04248a82010-10-12 20:51:21 +00005314 if (_Unpickler_SkipConsumed(self) < 0)
5315 return NULL;
5316
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005317 /* XXX: It is not clear what this is actually for. */
5318 if ((err = PyErr_Occurred())) {
5319 if (err == PyExc_EOFError) {
5320 PyErr_SetNone(PyExc_EOFError);
5321 }
5322 return NULL;
5323 }
5324
5325 PDATA_POP(self->stack, value);
5326 return value;
5327}
5328
5329PyDoc_STRVAR(Unpickler_load_doc,
5330"load() -> object. Load a pickle."
5331"\n"
5332"Read a pickled object representation from the open file object given in\n"
5333"the constructor, and return the reconstituted object hierarchy specified\n"
5334"therein.\n");
5335
5336static PyObject *
5337Unpickler_load(UnpicklerObject *self)
5338{
5339 /* Check whether the Unpickler was initialized correctly. This prevents
5340 segfaulting if a subclass overridden __init__ with a function that does
5341 not call Unpickler.__init__(). Here, we simply ensure that self->read
5342 is not NULL. */
5343 if (self->read == NULL) {
5344 PyErr_Format(UnpicklingError,
5345 "Unpickler.__init__() was not called by %s.__init__()",
5346 Py_TYPE(self)->tp_name);
5347 return NULL;
5348 }
5349
5350 return load(self);
5351}
5352
5353/* The name of find_class() is misleading. In newer pickle protocols, this
5354 function is used for loading any global (i.e., functions), not just
5355 classes. The name is kept only for backward compatibility. */
5356
5357PyDoc_STRVAR(Unpickler_find_class_doc,
5358"find_class(module_name, global_name) -> object.\n"
5359"\n"
5360"Return an object from a specified module, importing the module if\n"
5361"necessary. Subclasses may override this method (e.g. to restrict\n"
5362"unpickling of arbitrary classes and functions).\n"
5363"\n"
5364"This method is called whenever a class or a function object is\n"
5365"needed. Both arguments passed are str objects.\n");
5366
5367static PyObject *
5368Unpickler_find_class(UnpicklerObject *self, PyObject *args)
5369{
5370 PyObject *global;
5371 PyObject *modules_dict;
5372 PyObject *module;
5373 PyObject *module_name, *global_name;
5374
5375 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
5376 &module_name, &global_name))
5377 return NULL;
5378
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005379 /* Try to map the old names used in Python 2.x to the new ones used in
5380 Python 3.x. We do this only with old pickle protocols and when the
5381 user has not disabled the feature. */
5382 if (self->proto < 3 && self->fix_imports) {
5383 PyObject *key;
5384 PyObject *item;
5385
5386 /* Check if the global (i.e., a function or a class) was renamed
5387 or moved to another module. */
5388 key = PyTuple_Pack(2, module_name, global_name);
5389 if (key == NULL)
5390 return NULL;
5391 item = PyDict_GetItemWithError(name_mapping_2to3, key);
5392 Py_DECREF(key);
5393 if (item) {
5394 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
5395 PyErr_Format(PyExc_RuntimeError,
5396 "_compat_pickle.NAME_MAPPING values should be "
5397 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
5398 return NULL;
5399 }
5400 module_name = PyTuple_GET_ITEM(item, 0);
5401 global_name = PyTuple_GET_ITEM(item, 1);
5402 if (!PyUnicode_Check(module_name) ||
5403 !PyUnicode_Check(global_name)) {
5404 PyErr_Format(PyExc_RuntimeError,
5405 "_compat_pickle.NAME_MAPPING values should be "
5406 "pairs of str, not (%.200s, %.200s)",
5407 Py_TYPE(module_name)->tp_name,
5408 Py_TYPE(global_name)->tp_name);
5409 return NULL;
5410 }
5411 }
5412 else if (PyErr_Occurred()) {
5413 return NULL;
5414 }
5415
5416 /* Check if the module was renamed. */
5417 item = PyDict_GetItemWithError(import_mapping_2to3, module_name);
5418 if (item) {
5419 if (!PyUnicode_Check(item)) {
5420 PyErr_Format(PyExc_RuntimeError,
5421 "_compat_pickle.IMPORT_MAPPING values should be "
5422 "strings, not %.200s", Py_TYPE(item)->tp_name);
5423 return NULL;
5424 }
5425 module_name = item;
5426 }
5427 else if (PyErr_Occurred()) {
5428 return NULL;
5429 }
5430 }
5431
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005432 modules_dict = PySys_GetObject("modules");
5433 if (modules_dict == NULL)
5434 return NULL;
5435
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005436 module = PyDict_GetItemWithError(modules_dict, module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005437 if (module == NULL) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005438 if (PyErr_Occurred())
5439 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005440 module = PyImport_Import(module_name);
5441 if (module == NULL)
5442 return NULL;
5443 global = PyObject_GetAttr(module, global_name);
5444 Py_DECREF(module);
5445 }
5446 else {
5447 global = PyObject_GetAttr(module, global_name);
5448 }
5449 return global;
5450}
5451
5452static struct PyMethodDef Unpickler_methods[] = {
5453 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
5454 Unpickler_load_doc},
5455 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
5456 Unpickler_find_class_doc},
5457 {NULL, NULL} /* sentinel */
5458};
5459
5460static void
5461Unpickler_dealloc(UnpicklerObject *self)
5462{
5463 PyObject_GC_UnTrack((PyObject *)self);
5464 Py_XDECREF(self->readline);
5465 Py_XDECREF(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005466 Py_XDECREF(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005467 Py_XDECREF(self->stack);
5468 Py_XDECREF(self->pers_func);
5469 Py_XDECREF(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005470 if (self->buffer.buf != NULL) {
5471 PyBuffer_Release(&self->buffer);
5472 self->buffer.buf = NULL;
5473 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005474
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005475 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005476 PyMem_Free(self->marks);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005477 PyMem_Free(self->input_line);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005478 free(self->encoding);
5479 free(self->errors);
5480
5481 Py_TYPE(self)->tp_free((PyObject *)self);
5482}
5483
5484static int
5485Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
5486{
5487 Py_VISIT(self->readline);
5488 Py_VISIT(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005489 Py_VISIT(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005490 Py_VISIT(self->stack);
5491 Py_VISIT(self->pers_func);
5492 Py_VISIT(self->arg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005493 return 0;
5494}
5495
5496static int
5497Unpickler_clear(UnpicklerObject *self)
5498{
5499 Py_CLEAR(self->readline);
5500 Py_CLEAR(self->read);
Antoine Pitrou04248a82010-10-12 20:51:21 +00005501 Py_CLEAR(self->peek);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005502 Py_CLEAR(self->stack);
5503 Py_CLEAR(self->pers_func);
5504 Py_CLEAR(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005505 if (self->buffer.buf != NULL) {
5506 PyBuffer_Release(&self->buffer);
5507 self->buffer.buf = NULL;
5508 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005509
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005510 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005511 PyMem_Free(self->marks);
5512 self->marks = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005513 PyMem_Free(self->input_line);
5514 self->input_line = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005515 free(self->encoding);
5516 self->encoding = NULL;
5517 free(self->errors);
5518 self->errors = NULL;
5519
5520 return 0;
5521}
5522
5523PyDoc_STRVAR(Unpickler_doc,
5524"Unpickler(file, *, encoding='ASCII', errors='strict')"
5525"\n"
5526"This takes a binary file for reading a pickle data stream.\n"
5527"\n"
5528"The protocol version of the pickle is detected automatically, so no\n"
5529"proto argument is needed.\n"
5530"\n"
5531"The file-like object must have two methods, a read() method\n"
5532"that takes an integer argument, and a readline() method that\n"
5533"requires no arguments. Both methods should return bytes.\n"
5534"Thus file-like object can be a binary file object opened for\n"
5535"reading, a BytesIO object, or any other custom object that\n"
5536"meets this interface.\n"
5537"\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005538"Optional keyword arguments are *fix_imports*, *encoding* and *errors*,\n"
5539"which are used to control compatiblity support for pickle stream\n"
5540"generated by Python 2.x. If *fix_imports* is True, pickle will try to\n"
5541"map the old Python 2.x names to the new names used in Python 3.x. The\n"
5542"*encoding* and *errors* tell pickle how to decode 8-bit string\n"
5543"instances pickled by Python 2.x; these default to 'ASCII' and\n"
5544"'strict', respectively.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005545
5546static int
5547Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
5548{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005549 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005550 PyObject *file;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005551 PyObject *fix_imports = Py_True;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005552 char *encoding = NULL;
5553 char *errors = NULL;
5554
5555 /* XXX: That is an horrible error message. But, I don't know how to do
5556 better... */
5557 if (Py_SIZE(args) != 1) {
5558 PyErr_Format(PyExc_TypeError,
5559 "%s takes exactly one positional argument (%zd given)",
5560 Py_TYPE(self)->tp_name, Py_SIZE(args));
5561 return -1;
5562 }
5563
5564 /* Arguments parsing needs to be done in the __init__() method to allow
5565 subclasses to define their own __init__() method, which may (or may
5566 not) support Unpickler arguments. However, this means we need to be
5567 extra careful in the other Unpickler methods, since a subclass could
5568 forget to call Unpickler.__init__() thus breaking our internal
5569 invariants. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005570 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:Unpickler", kwlist,
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005571 &file, &fix_imports, &encoding, &errors))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005572 return -1;
5573
5574 /* In case of multiple __init__() calls, clear previous content. */
5575 if (self->read != NULL)
5576 (void)Unpickler_clear(self);
5577
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005578 if (_Unpickler_SetInputStream(self, file) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005579 return -1;
5580
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005581 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005582 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005583
5584 self->fix_imports = PyObject_IsTrue(fix_imports);
5585 if (self->fix_imports == -1)
5586 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005587
5588 if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) {
5589 self->pers_func = PyObject_GetAttrString((PyObject *)self,
5590 "persistent_load");
5591 if (self->pers_func == NULL)
5592 return -1;
5593 }
5594 else {
5595 self->pers_func = NULL;
5596 }
5597
5598 self->stack = (Pdata *)Pdata_New();
5599 if (self->stack == NULL)
5600 return -1;
5601
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005602 self->memo_size = 32;
5603 self->memo = _Unpickler_NewMemo(self->memo_size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005604 if (self->memo == NULL)
5605 return -1;
5606
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005607 self->arg = NULL;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005608 self->proto = 0;
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005609
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005610 return 0;
5611}
5612
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005613/* Define a proxy object for the Unpickler's internal memo object. This is to
5614 * avoid breaking code like:
5615 * unpickler.memo.clear()
5616 * and
5617 * unpickler.memo = saved_memo
5618 * Is this a good idea? Not really, but we don't want to break code that uses
5619 * it. Note that we don't implement the entire mapping API here. This is
5620 * intentional, as these should be treated as black-box implementation details.
5621 *
5622 * We do, however, have to implement pickling/unpickling support because of
5623 * real-world code like cvs2svn.
5624 */
5625
5626typedef struct {
5627 PyObject_HEAD
5628 UnpicklerObject *unpickler;
5629} UnpicklerMemoProxyObject;
5630
5631PyDoc_STRVAR(ump_clear_doc,
5632"memo.clear() -> None. Remove all items from memo.");
5633
5634static PyObject *
5635ump_clear(UnpicklerMemoProxyObject *self)
5636{
5637 _Unpickler_MemoCleanup(self->unpickler);
5638 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
5639 if (self->unpickler->memo == NULL)
5640 return NULL;
5641 Py_RETURN_NONE;
5642}
5643
5644PyDoc_STRVAR(ump_copy_doc,
5645"memo.copy() -> new_memo. Copy the memo to a new object.");
5646
5647static PyObject *
5648ump_copy(UnpicklerMemoProxyObject *self)
5649{
5650 Py_ssize_t i;
5651 PyObject *new_memo = PyDict_New();
5652 if (new_memo == NULL)
5653 return NULL;
5654
5655 for (i = 0; i < self->unpickler->memo_size; i++) {
5656 int status;
5657 PyObject *key, *value;
5658
5659 value = self->unpickler->memo[i];
5660 if (value == NULL)
5661 continue;
5662
5663 key = PyLong_FromSsize_t(i);
5664 if (key == NULL)
5665 goto error;
5666 status = PyDict_SetItem(new_memo, key, value);
5667 Py_DECREF(key);
5668 if (status < 0)
5669 goto error;
5670 }
5671 return new_memo;
5672
5673error:
5674 Py_DECREF(new_memo);
5675 return NULL;
5676}
5677
5678PyDoc_STRVAR(ump_reduce_doc,
5679"memo.__reduce__(). Pickling support.");
5680
5681static PyObject *
5682ump_reduce(UnpicklerMemoProxyObject *self, PyObject *args)
5683{
5684 PyObject *reduce_value;
5685 PyObject *constructor_args;
5686 PyObject *contents = ump_copy(self);
5687 if (contents == NULL)
5688 return NULL;
5689
5690 reduce_value = PyTuple_New(2);
5691 if (reduce_value == NULL) {
5692 Py_DECREF(contents);
5693 return NULL;
5694 }
5695 constructor_args = PyTuple_New(1);
5696 if (constructor_args == NULL) {
5697 Py_DECREF(contents);
5698 Py_DECREF(reduce_value);
5699 return NULL;
5700 }
5701 PyTuple_SET_ITEM(constructor_args, 0, contents);
5702 Py_INCREF((PyObject *)&PyDict_Type);
5703 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
5704 PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
5705 return reduce_value;
5706}
5707
5708static PyMethodDef unpicklerproxy_methods[] = {
5709 {"clear", (PyCFunction)ump_clear, METH_NOARGS, ump_clear_doc},
5710 {"copy", (PyCFunction)ump_copy, METH_NOARGS, ump_copy_doc},
5711 {"__reduce__", (PyCFunction)ump_reduce, METH_VARARGS, ump_reduce_doc},
5712 {NULL, NULL} /* sentinel */
5713};
5714
5715static void
5716UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
5717{
5718 PyObject_GC_UnTrack(self);
5719 Py_XDECREF(self->unpickler);
5720 PyObject_GC_Del((PyObject *)self);
5721}
5722
5723static int
5724UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
5725 visitproc visit, void *arg)
5726{
5727 Py_VISIT(self->unpickler);
5728 return 0;
5729}
5730
5731static int
5732UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
5733{
5734 Py_CLEAR(self->unpickler);
5735 return 0;
5736}
5737
5738static PyTypeObject UnpicklerMemoProxyType = {
5739 PyVarObject_HEAD_INIT(NULL, 0)
5740 "_pickle.UnpicklerMemoProxy", /*tp_name*/
5741 sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/
5742 0,
5743 (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */
5744 0, /* tp_print */
5745 0, /* tp_getattr */
5746 0, /* tp_setattr */
5747 0, /* tp_compare */
5748 0, /* tp_repr */
5749 0, /* tp_as_number */
5750 0, /* tp_as_sequence */
5751 0, /* tp_as_mapping */
Georg Brandlf038b322010-10-18 07:35:09 +00005752 PyObject_HashNotImplemented, /* tp_hash */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005753 0, /* tp_call */
5754 0, /* tp_str */
5755 PyObject_GenericGetAttr, /* tp_getattro */
5756 PyObject_GenericSetAttr, /* tp_setattro */
5757 0, /* tp_as_buffer */
5758 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5759 0, /* tp_doc */
5760 (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */
5761 (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */
5762 0, /* tp_richcompare */
5763 0, /* tp_weaklistoffset */
5764 0, /* tp_iter */
5765 0, /* tp_iternext */
5766 unpicklerproxy_methods, /* tp_methods */
5767};
5768
5769static PyObject *
5770UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
5771{
5772 UnpicklerMemoProxyObject *self;
5773
5774 self = PyObject_GC_New(UnpicklerMemoProxyObject,
5775 &UnpicklerMemoProxyType);
5776 if (self == NULL)
5777 return NULL;
5778 Py_INCREF(unpickler);
5779 self->unpickler = unpickler;
5780 PyObject_GC_Track(self);
5781 return (PyObject *)self;
5782}
5783
5784/*****************************************************************************/
5785
5786
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005787static PyObject *
5788Unpickler_get_memo(UnpicklerObject *self)
5789{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005790 return UnpicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005791}
5792
5793static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005794Unpickler_set_memo(UnpicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005795{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005796 PyObject **new_memo;
5797 Py_ssize_t new_memo_size = 0;
5798 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005799
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005800 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005801 PyErr_SetString(PyExc_TypeError,
5802 "attribute deletion is not supported");
5803 return -1;
5804 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005805
5806 if (Py_TYPE(obj) == &UnpicklerMemoProxyType) {
5807 UnpicklerObject *unpickler =
5808 ((UnpicklerMemoProxyObject *)obj)->unpickler;
5809
5810 new_memo_size = unpickler->memo_size;
5811 new_memo = _Unpickler_NewMemo(new_memo_size);
5812 if (new_memo == NULL)
5813 return -1;
5814
5815 for (i = 0; i < new_memo_size; i++) {
5816 Py_XINCREF(unpickler->memo[i]);
5817 new_memo[i] = unpickler->memo[i];
5818 }
5819 }
5820 else if (PyDict_Check(obj)) {
5821 Py_ssize_t i = 0;
5822 PyObject *key, *value;
5823
5824 new_memo_size = PyDict_Size(obj);
5825 new_memo = _Unpickler_NewMemo(new_memo_size);
5826 if (new_memo == NULL)
5827 return -1;
5828
5829 while (PyDict_Next(obj, &i, &key, &value)) {
5830 Py_ssize_t idx;
5831 if (!PyLong_Check(key)) {
5832 PyErr_SetString(PyExc_TypeError,
5833 "memo key must be integers");
5834 goto error;
5835 }
5836 idx = PyLong_AsSsize_t(key);
5837 if (idx == -1 && PyErr_Occurred())
5838 goto error;
5839 if (_Unpickler_MemoPut(self, idx, value) < 0)
5840 goto error;
5841 }
5842 }
5843 else {
5844 PyErr_Format(PyExc_TypeError,
5845 "'memo' attribute must be an UnpicklerMemoProxy object"
5846 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005847 return -1;
5848 }
5849
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005850 _Unpickler_MemoCleanup(self);
5851 self->memo_size = new_memo_size;
5852 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005853
5854 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005855
5856 error:
5857 if (new_memo_size) {
5858 i = new_memo_size;
5859 while (--i >= 0) {
5860 Py_XDECREF(new_memo[i]);
5861 }
5862 PyMem_FREE(new_memo);
5863 }
5864 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005865}
5866
5867static PyObject *
5868Unpickler_get_persload(UnpicklerObject *self)
5869{
5870 if (self->pers_func == NULL)
5871 PyErr_SetString(PyExc_AttributeError, "persistent_load");
5872 else
5873 Py_INCREF(self->pers_func);
5874 return self->pers_func;
5875}
5876
5877static int
5878Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
5879{
5880 PyObject *tmp;
5881
5882 if (value == NULL) {
5883 PyErr_SetString(PyExc_TypeError,
5884 "attribute deletion is not supported");
5885 return -1;
5886 }
5887 if (!PyCallable_Check(value)) {
5888 PyErr_SetString(PyExc_TypeError,
5889 "persistent_load must be a callable taking "
5890 "one argument");
5891 return -1;
5892 }
5893
5894 tmp = self->pers_func;
5895 Py_INCREF(value);
5896 self->pers_func = value;
5897 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
5898
5899 return 0;
5900}
5901
5902static PyGetSetDef Unpickler_getsets[] = {
5903 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
5904 {"persistent_load", (getter)Unpickler_get_persload,
5905 (setter)Unpickler_set_persload},
5906 {NULL}
5907};
5908
5909static PyTypeObject Unpickler_Type = {
5910 PyVarObject_HEAD_INIT(NULL, 0)
5911 "_pickle.Unpickler", /*tp_name*/
5912 sizeof(UnpicklerObject), /*tp_basicsize*/
5913 0, /*tp_itemsize*/
5914 (destructor)Unpickler_dealloc, /*tp_dealloc*/
5915 0, /*tp_print*/
5916 0, /*tp_getattr*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005917 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00005918 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005919 0, /*tp_repr*/
5920 0, /*tp_as_number*/
5921 0, /*tp_as_sequence*/
5922 0, /*tp_as_mapping*/
5923 0, /*tp_hash*/
5924 0, /*tp_call*/
5925 0, /*tp_str*/
5926 0, /*tp_getattro*/
5927 0, /*tp_setattro*/
5928 0, /*tp_as_buffer*/
5929 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5930 Unpickler_doc, /*tp_doc*/
5931 (traverseproc)Unpickler_traverse, /*tp_traverse*/
5932 (inquiry)Unpickler_clear, /*tp_clear*/
5933 0, /*tp_richcompare*/
5934 0, /*tp_weaklistoffset*/
5935 0, /*tp_iter*/
5936 0, /*tp_iternext*/
5937 Unpickler_methods, /*tp_methods*/
5938 0, /*tp_members*/
5939 Unpickler_getsets, /*tp_getset*/
5940 0, /*tp_base*/
5941 0, /*tp_dict*/
5942 0, /*tp_descr_get*/
5943 0, /*tp_descr_set*/
5944 0, /*tp_dictoffset*/
5945 (initproc)Unpickler_init, /*tp_init*/
5946 PyType_GenericAlloc, /*tp_alloc*/
5947 PyType_GenericNew, /*tp_new*/
5948 PyObject_GC_Del, /*tp_free*/
5949 0, /*tp_is_gc*/
5950};
5951
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005952PyDoc_STRVAR(pickle_dump_doc,
5953"dump(obj, file, protocol=None, *, fix_imports=True) -> None\n"
5954"\n"
5955"Write a pickled representation of obj to the open file object file. This\n"
5956"is equivalent to ``Pickler(file, protocol).dump(obj)``, but may be more\n"
5957"efficient.\n"
5958"\n"
5959"The optional protocol argument tells the pickler to use the given protocol;\n"
5960"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
5961"backward-incompatible protocol designed for Python 3.0.\n"
5962"\n"
5963"Specifying a negative protocol version selects the highest protocol version\n"
5964"supported. The higher the protocol used, the more recent the version of\n"
5965"Python needed to read the pickle produced.\n"
5966"\n"
5967"The file argument must have a write() method that accepts a single bytes\n"
5968"argument. It can thus be a file object opened for binary writing, a\n"
5969"io.BytesIO instance, or any other custom object that meets this interface.\n"
5970"\n"
5971"If fix_imports is True and protocol is less than 3, pickle will try to\n"
5972"map the new Python 3.x names to the old module names used in Python 2.x,\n"
5973"so that the pickle data stream is readable with Python 2.x.\n");
5974
5975static PyObject *
5976pickle_dump(PyObject *self, PyObject *args, PyObject *kwds)
5977{
5978 static char *kwlist[] = {"obj", "file", "protocol", "fix_imports", 0};
5979 PyObject *obj;
5980 PyObject *file;
5981 PyObject *proto = NULL;
5982 PyObject *fix_imports = Py_True;
5983 PicklerObject *pickler;
5984
5985 /* fix_imports is a keyword-only argument. */
5986 if (Py_SIZE(args) > 3) {
5987 PyErr_Format(PyExc_TypeError,
5988 "pickle.dump() takes at most 3 positional "
5989 "argument (%zd given)", Py_SIZE(args));
5990 return NULL;
5991 }
5992
5993 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|OO:dump", kwlist,
5994 &obj, &file, &proto, &fix_imports))
5995 return NULL;
5996
5997 pickler = _Pickler_New();
5998 if (pickler == NULL)
5999 return NULL;
6000
6001 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
6002 goto error;
6003
6004 if (_Pickler_SetOutputStream(pickler, file) < 0)
6005 goto error;
6006
6007 if (dump(pickler, obj) < 0)
6008 goto error;
6009
6010 if (_Pickler_FlushToFile(pickler) < 0)
6011 goto error;
6012
6013 Py_DECREF(pickler);
6014 Py_RETURN_NONE;
6015
6016 error:
6017 Py_XDECREF(pickler);
6018 return NULL;
6019}
6020
6021PyDoc_STRVAR(pickle_dumps_doc,
6022"dumps(obj, protocol=None, *, fix_imports=True) -> bytes\n"
6023"\n"
6024"Return the pickled representation of the object as a bytes\n"
6025"object, instead of writing it to a file.\n"
6026"\n"
6027"The optional protocol argument tells the pickler to use the given protocol;\n"
6028"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
6029"backward-incompatible protocol designed for Python 3.0.\n"
6030"\n"
6031"Specifying a negative protocol version selects the highest protocol version\n"
6032"supported. The higher the protocol used, the more recent the version of\n"
6033"Python needed to read the pickle produced.\n"
6034"\n"
6035"If fix_imports is True and *protocol* is less than 3, pickle will try to\n"
6036"map the new Python 3.x names to the old module names used in Python 2.x,\n"
6037"so that the pickle data stream is readable with Python 2.x.\n");
6038
6039static PyObject *
6040pickle_dumps(PyObject *self, PyObject *args, PyObject *kwds)
6041{
6042 static char *kwlist[] = {"obj", "protocol", "fix_imports", 0};
6043 PyObject *obj;
6044 PyObject *proto = NULL;
6045 PyObject *result;
6046 PyObject *fix_imports = Py_True;
6047 PicklerObject *pickler;
6048
6049 /* fix_imports is a keyword-only argument. */
6050 if (Py_SIZE(args) > 2) {
6051 PyErr_Format(PyExc_TypeError,
6052 "pickle.dumps() takes at most 2 positional "
6053 "argument (%zd given)", Py_SIZE(args));
6054 return NULL;
6055 }
6056
6057 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:dumps", kwlist,
6058 &obj, &proto, &fix_imports))
6059 return NULL;
6060
6061 pickler = _Pickler_New();
6062 if (pickler == NULL)
6063 return NULL;
6064
6065 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
6066 goto error;
6067
6068 if (dump(pickler, obj) < 0)
6069 goto error;
6070
6071 result = _Pickler_GetString(pickler);
6072 Py_DECREF(pickler);
6073 return result;
6074
6075 error:
6076 Py_XDECREF(pickler);
6077 return NULL;
6078}
6079
6080PyDoc_STRVAR(pickle_load_doc,
6081"load(file, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6082"\n"
6083"Read a pickled object representation from the open file object file and\n"
6084"return the reconstituted object hierarchy specified therein. This is\n"
6085"equivalent to ``Unpickler(file).load()``, but may be more efficient.\n"
6086"\n"
6087"The protocol version of the pickle is detected automatically, so no protocol\n"
6088"argument is needed. Bytes past the pickled object's representation are\n"
6089"ignored.\n"
6090"\n"
6091"The argument file must have two methods, a read() method that takes an\n"
6092"integer argument, and a readline() method that requires no arguments. Both\n"
6093"methods should return bytes. Thus *file* can be a binary file object opened\n"
6094"for reading, a BytesIO object, or any other custom object that meets this\n"
6095"interface.\n"
6096"\n"
6097"Optional keyword arguments are fix_imports, encoding and errors,\n"
6098"which are used to control compatiblity support for pickle stream generated\n"
6099"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6100"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6101"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6102"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6103
6104static PyObject *
6105pickle_load(PyObject *self, PyObject *args, PyObject *kwds)
6106{
6107 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
6108 PyObject *file;
6109 PyObject *fix_imports = Py_True;
6110 PyObject *result;
6111 char *encoding = NULL;
6112 char *errors = NULL;
6113 UnpicklerObject *unpickler;
6114
6115 /* fix_imports, encoding and errors are a keyword-only argument. */
6116 if (Py_SIZE(args) != 1) {
6117 PyErr_Format(PyExc_TypeError,
6118 "pickle.load() takes exactly one positional "
6119 "argument (%zd given)", Py_SIZE(args));
6120 return NULL;
6121 }
6122
6123 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:load", kwlist,
6124 &file, &fix_imports, &encoding, &errors))
6125 return NULL;
6126
6127 unpickler = _Unpickler_New();
6128 if (unpickler == NULL)
6129 return NULL;
6130
6131 if (_Unpickler_SetInputStream(unpickler, file) < 0)
6132 goto error;
6133
6134 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6135 goto error;
6136
6137 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6138 if (unpickler->fix_imports == -1)
6139 goto error;
6140
6141 result = load(unpickler);
6142 Py_DECREF(unpickler);
6143 return result;
6144
6145 error:
6146 Py_XDECREF(unpickler);
6147 return NULL;
6148}
6149
6150PyDoc_STRVAR(pickle_loads_doc,
6151"loads(input, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6152"\n"
6153"Read a pickled object hierarchy from a bytes object and return the\n"
6154"reconstituted object hierarchy specified therein\n"
6155"\n"
6156"The protocol version of the pickle is detected automatically, so no protocol\n"
6157"argument is needed. Bytes past the pickled object's representation are\n"
6158"ignored.\n"
6159"\n"
6160"Optional keyword arguments are fix_imports, encoding and errors, which\n"
6161"are used to control compatiblity support for pickle stream generated\n"
6162"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6163"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6164"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6165"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6166
6167static PyObject *
6168pickle_loads(PyObject *self, PyObject *args, PyObject *kwds)
6169{
6170 static char *kwlist[] = {"input", "fix_imports", "encoding", "errors", 0};
6171 PyObject *input;
6172 PyObject *fix_imports = Py_True;
6173 PyObject *result;
6174 char *encoding = NULL;
6175 char *errors = NULL;
6176 UnpicklerObject *unpickler;
6177
6178 /* fix_imports, encoding and errors are a keyword-only argument. */
6179 if (Py_SIZE(args) != 1) {
6180 PyErr_Format(PyExc_TypeError,
6181 "pickle.loads() takes exactly one positional "
6182 "argument (%zd given)", Py_SIZE(args));
6183 return NULL;
6184 }
6185
6186 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:loads", kwlist,
6187 &input, &fix_imports, &encoding, &errors))
6188 return NULL;
6189
6190 unpickler = _Unpickler_New();
6191 if (unpickler == NULL)
6192 return NULL;
6193
6194 if (_Unpickler_SetStringInput(unpickler, input) < 0)
6195 goto error;
6196
6197 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6198 goto error;
6199
6200 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6201 if (unpickler->fix_imports == -1)
6202 goto error;
6203
6204 result = load(unpickler);
6205 Py_DECREF(unpickler);
6206 return result;
6207
6208 error:
6209 Py_XDECREF(unpickler);
6210 return NULL;
6211}
6212
6213
6214static struct PyMethodDef pickle_methods[] = {
6215 {"dump", (PyCFunction)pickle_dump, METH_VARARGS|METH_KEYWORDS,
6216 pickle_dump_doc},
6217 {"dumps", (PyCFunction)pickle_dumps, METH_VARARGS|METH_KEYWORDS,
6218 pickle_dumps_doc},
6219 {"load", (PyCFunction)pickle_load, METH_VARARGS|METH_KEYWORDS,
6220 pickle_load_doc},
6221 {"loads", (PyCFunction)pickle_loads, METH_VARARGS|METH_KEYWORDS,
6222 pickle_loads_doc},
6223 {NULL, NULL} /* sentinel */
6224};
6225
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006226static int
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006227initmodule(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006228{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006229 PyObject *copyreg = NULL;
6230 PyObject *compat_pickle = NULL;
6231
6232 /* XXX: We should ensure that the types of the dictionaries imported are
6233 exactly PyDict objects. Otherwise, it is possible to crash the pickle
6234 since we use the PyDict API directly to access these dictionaries. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006235
6236 copyreg = PyImport_ImportModule("copyreg");
6237 if (!copyreg)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006238 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006239 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
6240 if (!dispatch_table)
6241 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006242 extension_registry = \
6243 PyObject_GetAttrString(copyreg, "_extension_registry");
6244 if (!extension_registry)
6245 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006246 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
6247 if (!inverted_registry)
6248 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006249 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
6250 if (!extension_cache)
6251 goto error;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006252 Py_CLEAR(copyreg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006253
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006254 /* Load the 2.x -> 3.x stdlib module mapping tables */
6255 compat_pickle = PyImport_ImportModule("_compat_pickle");
6256 if (!compat_pickle)
6257 goto error;
6258 name_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
6259 if (!name_mapping_2to3)
6260 goto error;
6261 if (!PyDict_CheckExact(name_mapping_2to3)) {
6262 PyErr_Format(PyExc_RuntimeError,
6263 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
6264 Py_TYPE(name_mapping_2to3)->tp_name);
6265 goto error;
6266 }
6267 import_mapping_2to3 = PyObject_GetAttrString(compat_pickle,
6268 "IMPORT_MAPPING");
6269 if (!import_mapping_2to3)
6270 goto error;
6271 if (!PyDict_CheckExact(import_mapping_2to3)) {
6272 PyErr_Format(PyExc_RuntimeError,
6273 "_compat_pickle.IMPORT_MAPPING should be a dict, "
6274 "not %.200s", Py_TYPE(import_mapping_2to3)->tp_name);
6275 goto error;
6276 }
6277 /* ... and the 3.x -> 2.x mapping tables */
6278 name_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6279 "REVERSE_NAME_MAPPING");
6280 if (!name_mapping_3to2)
6281 goto error;
6282 if (!PyDict_CheckExact(name_mapping_3to2)) {
6283 PyErr_Format(PyExc_RuntimeError,
Ezio Melotti13925002011-03-16 11:05:33 +02006284 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006285 "not %.200s", Py_TYPE(name_mapping_3to2)->tp_name);
6286 goto error;
6287 }
6288 import_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6289 "REVERSE_IMPORT_MAPPING");
6290 if (!import_mapping_3to2)
6291 goto error;
6292 if (!PyDict_CheckExact(import_mapping_3to2)) {
6293 PyErr_Format(PyExc_RuntimeError,
6294 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
6295 "not %.200s", Py_TYPE(import_mapping_3to2)->tp_name);
6296 goto error;
6297 }
6298 Py_CLEAR(compat_pickle);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006299
6300 empty_tuple = PyTuple_New(0);
6301 if (empty_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006302 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006303 two_tuple = PyTuple_New(2);
6304 if (two_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006305 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006306 /* We use this temp container with no regard to refcounts, or to
6307 * keeping containees alive. Exempt from GC, because we don't
6308 * want anything looking at two_tuple() by magic.
6309 */
6310 PyObject_GC_UnTrack(two_tuple);
6311
6312 return 0;
6313
6314 error:
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006315 Py_CLEAR(copyreg);
6316 Py_CLEAR(dispatch_table);
6317 Py_CLEAR(extension_registry);
6318 Py_CLEAR(inverted_registry);
6319 Py_CLEAR(extension_cache);
6320 Py_CLEAR(compat_pickle);
6321 Py_CLEAR(name_mapping_2to3);
6322 Py_CLEAR(import_mapping_2to3);
6323 Py_CLEAR(name_mapping_3to2);
6324 Py_CLEAR(import_mapping_3to2);
6325 Py_CLEAR(empty_tuple);
6326 Py_CLEAR(two_tuple);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006327 return -1;
6328}
6329
6330static struct PyModuleDef _picklemodule = {
6331 PyModuleDef_HEAD_INIT,
6332 "_pickle",
6333 pickle_module_doc,
6334 -1,
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006335 pickle_methods,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006336 NULL,
6337 NULL,
6338 NULL,
6339 NULL
6340};
6341
6342PyMODINIT_FUNC
6343PyInit__pickle(void)
6344{
6345 PyObject *m;
6346
6347 if (PyType_Ready(&Unpickler_Type) < 0)
6348 return NULL;
6349 if (PyType_Ready(&Pickler_Type) < 0)
6350 return NULL;
6351 if (PyType_Ready(&Pdata_Type) < 0)
6352 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006353 if (PyType_Ready(&PicklerMemoProxyType) < 0)
6354 return NULL;
6355 if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
6356 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006357
6358 /* Create the module and add the functions. */
6359 m = PyModule_Create(&_picklemodule);
6360 if (m == NULL)
6361 return NULL;
6362
Antoine Pitrou8391cf42011-07-15 21:01:21 +02006363 Py_INCREF(&Pickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006364 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
6365 return NULL;
Antoine Pitrou8391cf42011-07-15 21:01:21 +02006366 Py_INCREF(&Unpickler_Type);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006367 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
6368 return NULL;
6369
6370 /* Initialize the exceptions. */
6371 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
6372 if (PickleError == NULL)
6373 return NULL;
6374 PicklingError = \
6375 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
6376 if (PicklingError == NULL)
6377 return NULL;
6378 UnpicklingError = \
6379 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
6380 if (UnpicklingError == NULL)
6381 return NULL;
6382
6383 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
6384 return NULL;
6385 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
6386 return NULL;
6387 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
6388 return NULL;
6389
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006390 if (initmodule() < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006391 return NULL;
6392
6393 return m;
6394}