blob: f9dc0456d19545161c208ac5ec6d88858e8498a4 [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
Alexandre Vassalottica2d6102008-06-12 18:26:05 +000013/* Pickle opcodes. These must be kept updated with pickle.py.
14 Extensive docs are in pickletools.py. */
15enum opcode {
16 MARK = '(',
17 STOP = '.',
18 POP = '0',
19 POP_MARK = '1',
20 DUP = '2',
21 FLOAT = 'F',
22 INT = 'I',
23 BININT = 'J',
24 BININT1 = 'K',
25 LONG = 'L',
26 BININT2 = 'M',
27 NONE = 'N',
28 PERSID = 'P',
29 BINPERSID = 'Q',
30 REDUCE = 'R',
31 STRING = 'S',
32 BINSTRING = 'T',
33 SHORT_BINSTRING = 'U',
34 UNICODE = 'V',
35 BINUNICODE = 'X',
36 APPEND = 'a',
37 BUILD = 'b',
38 GLOBAL = 'c',
39 DICT = 'd',
40 EMPTY_DICT = '}',
41 APPENDS = 'e',
42 GET = 'g',
43 BINGET = 'h',
44 INST = 'i',
45 LONG_BINGET = 'j',
46 LIST = 'l',
47 EMPTY_LIST = ']',
48 OBJ = 'o',
49 PUT = 'p',
50 BINPUT = 'q',
51 LONG_BINPUT = 'r',
52 SETITEM = 's',
53 TUPLE = 't',
54 EMPTY_TUPLE = ')',
55 SETITEMS = 'u',
56 BINFLOAT = 'G',
57
58 /* Protocol 2. */
59 PROTO = '\x80',
60 NEWOBJ = '\x81',
61 EXT1 = '\x82',
62 EXT2 = '\x83',
63 EXT4 = '\x84',
64 TUPLE1 = '\x85',
65 TUPLE2 = '\x86',
66 TUPLE3 = '\x87',
67 NEWTRUE = '\x88',
68 NEWFALSE = '\x89',
69 LONG1 = '\x8a',
70 LONG4 = '\x8b',
71
72 /* Protocol 3 (Python 3.x) */
73 BINBYTES = 'B',
74 SHORT_BINBYTES = 'C',
75};
76
77/* These aren't opcodes -- they're ways to pickle bools before protocol 2
78 * so that unpicklers written before bools were introduced unpickle them
79 * as ints, but unpicklers after can recognize that bools were intended.
80 * Note that protocol 2 added direct ways to pickle bools.
81 */
82#undef TRUE
83#define TRUE "I01\n"
84#undef FALSE
85#define FALSE "I00\n"
86
87enum {
88 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
89 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
90 break if this gets out of synch with pickle.py, but it's unclear that would
91 help anything either. */
92 BATCHSIZE = 1000,
93
94 /* Nesting limit until Pickler, when running in "fast mode", starts
95 checking for self-referential data-structures. */
96 FAST_NESTING_LIMIT = 50,
97
Antoine Pitrouea99c5c2010-09-09 18:33:21 +000098 /* Initial size of the write buffer of Pickler. */
99 WRITE_BUF_SIZE = 4096,
100
101 /* Maximum size of the write buffer of Pickler when pickling to a
102 stream. This is ignored for in-memory pickling. */
103 MAX_WRITE_BUF_SIZE = 64 * 1024,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000104};
105
106/* Exception classes for pickle. These should override the ones defined in
107 pickle.py, when the C-optimized Pickler and Unpickler are used. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000108static PyObject *PickleError = NULL;
109static PyObject *PicklingError = NULL;
110static PyObject *UnpicklingError = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000111
112/* copyreg.dispatch_table, {type_object: pickling_function} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000113static PyObject *dispatch_table = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000114/* For EXT[124] opcodes. */
115/* copyreg._extension_registry, {(module_name, function_name): code} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000116static PyObject *extension_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000117/* copyreg._inverted_registry, {code: (module_name, function_name)} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000118static PyObject *inverted_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000119/* copyreg._extension_cache, {code: object} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000120static PyObject *extension_cache = NULL;
121
122/* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */
123static PyObject *name_mapping_2to3 = NULL;
124/* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
125static PyObject *import_mapping_2to3 = NULL;
126/* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
127static PyObject *name_mapping_3to2 = NULL;
128static PyObject *import_mapping_3to2 = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000129
130/* XXX: Are these really nescessary? */
131/* As the name says, an empty tuple. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000132static PyObject *empty_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000133/* For looking up name pairs in copyreg._extension_registry. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000134static PyObject *two_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000135
136static int
137stack_underflow(void)
138{
139 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
140 return -1;
141}
142
143/* Internal data type used as the unpickling stack. */
144typedef struct {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000145 PyObject_VAR_HEAD
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000146 PyObject **data;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000147 Py_ssize_t allocated; /* number of slots in data allocated */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000148} Pdata;
149
150static void
151Pdata_dealloc(Pdata *self)
152{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000153 int i = Py_SIZE(self);
154 while (--i >= 0) {
155 Py_DECREF(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000156 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000157 PyMem_FREE(self->data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000158 PyObject_Del(self);
159}
160
161static PyTypeObject Pdata_Type = {
162 PyVarObject_HEAD_INIT(NULL, 0)
163 "_pickle.Pdata", /*tp_name*/
164 sizeof(Pdata), /*tp_basicsize*/
165 0, /*tp_itemsize*/
166 (destructor)Pdata_dealloc, /*tp_dealloc*/
167};
168
169static PyObject *
170Pdata_New(void)
171{
172 Pdata *self;
173
174 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
175 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000176 Py_SIZE(self) = 0;
177 self->allocated = 8;
178 self->data = PyMem_MALLOC(self->allocated * sizeof(PyObject *));
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000179 if (self->data)
180 return (PyObject *)self;
181 Py_DECREF(self);
182 return PyErr_NoMemory();
183}
184
185
186/* Retain only the initial clearto items. If clearto >= the current
187 * number of items, this is a (non-erroneous) NOP.
188 */
189static int
190Pdata_clear(Pdata *self, int clearto)
191{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000192 int i = Py_SIZE(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000193
194 if (clearto < 0)
195 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000196 if (clearto >= i)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000197 return 0;
198
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000199 while (--i >= clearto) {
200 Py_CLEAR(self->data[i]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000201 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000202 Py_SIZE(self) = clearto;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000203 return 0;
204}
205
206static int
207Pdata_grow(Pdata *self)
208{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000209 PyObject **data = self->data;
210 Py_ssize_t allocated = self->allocated;
211 Py_ssize_t new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000212
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000213 new_allocated = (allocated >> 3) + 6;
214 /* check for integer overflow */
215 if (new_allocated > PY_SSIZE_T_MAX - allocated)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000216 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000217 new_allocated += allocated;
218 if (new_allocated > (PY_SSIZE_T_MAX / sizeof(PyObject *)))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000219 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000220 data = PyMem_REALLOC(data, new_allocated * sizeof(PyObject *));
221 if (data == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000222 goto nomemory;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000223
224 self->data = data;
225 self->allocated = new_allocated;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000226 return 0;
227
228 nomemory:
229 PyErr_NoMemory();
230 return -1;
231}
232
233/* D is a Pdata*. Pop the topmost element and store it into V, which
234 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
235 * is raised and V is set to NULL.
236 */
237static PyObject *
238Pdata_pop(Pdata *self)
239{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000240 if (Py_SIZE(self) == 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000241 PyErr_SetString(UnpicklingError, "bad pickle data");
242 return NULL;
243 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000244 return self->data[--Py_SIZE(self)];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000245}
246#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
247
248static int
249Pdata_push(Pdata *self, PyObject *obj)
250{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000251 if (Py_SIZE(self) == self->allocated && Pdata_grow(self) < 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000252 return -1;
253 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000254 self->data[Py_SIZE(self)++] = obj;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000255 return 0;
256}
257
258/* Push an object on stack, transferring its ownership to the stack. */
259#define PDATA_PUSH(D, O, ER) do { \
260 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
261
262/* Push an object on stack, adding a new reference to the object. */
263#define PDATA_APPEND(D, O, ER) do { \
264 Py_INCREF((O)); \
265 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
266
267static PyObject *
268Pdata_poptuple(Pdata *self, Py_ssize_t start)
269{
270 PyObject *tuple;
271 Py_ssize_t len, i, j;
272
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000273 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000274 tuple = PyTuple_New(len);
275 if (tuple == NULL)
276 return NULL;
277 for (i = start, j = 0; j < len; i++, j++)
278 PyTuple_SET_ITEM(tuple, j, self->data[i]);
279
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000280 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000281 return tuple;
282}
283
284static PyObject *
285Pdata_poplist(Pdata *self, Py_ssize_t start)
286{
287 PyObject *list;
288 Py_ssize_t len, i, j;
289
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000290 len = Py_SIZE(self) - start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000291 list = PyList_New(len);
292 if (list == NULL)
293 return NULL;
294 for (i = start, j = 0; j < len; i++, j++)
295 PyList_SET_ITEM(list, j, self->data[i]);
296
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000297 Py_SIZE(self) = start;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000298 return list;
299}
300
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000301typedef struct {
302 PyObject *me_key;
303 long me_value;
304} PyMemoEntry;
305
306typedef struct {
307 Py_ssize_t mt_mask;
308 Py_ssize_t mt_used;
309 Py_ssize_t mt_allocated;
310 PyMemoEntry *mt_table;
311} PyMemoTable;
312
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000313typedef struct PicklerObject {
314 PyObject_HEAD
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000315 PyMemoTable *memo; /* Memo table, keep track of the seen
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000316 objects to support self-referential objects
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000317 pickling. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000318 PyObject *pers_func; /* persistent_id() method, can be NULL */
319 PyObject *arg;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000320
321 PyObject *write; /* write() method of the output stream. */
322 PyObject *output_buffer; /* Write into a local bytearray buffer before
323 flushing to the stream. */
324 Py_ssize_t output_len; /* Length of output_buffer. */
325 Py_ssize_t max_output_len; /* Allocation size of output_buffer. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000326 int proto; /* Pickle protocol number, >= 0 */
327 int bin; /* Boolean, true if proto > 0 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000328 int buf_size; /* Size of the current buffered pickle data */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000329 int fast; /* Enable fast mode if set to a true value.
330 The fast mode disable the usage of memo,
331 therefore speeding the pickling process by
332 not generating superfluous PUT opcodes. It
333 should not be used if with self-referential
334 objects. */
335 int fast_nesting;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000336 int fix_imports; /* Indicate whether Pickler should fix
337 the name of globals for Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000338 PyObject *fast_memo;
339} PicklerObject;
340
341typedef struct UnpicklerObject {
342 PyObject_HEAD
343 Pdata *stack; /* Pickle data stack, store unpickled objects. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000344
345 /* The unpickler memo is just an array of PyObject *s. Using a dict
346 is unnecessary, since the keys are contiguous ints. */
347 PyObject **memo;
348 Py_ssize_t memo_size;
349
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000350 PyObject *arg;
351 PyObject *pers_func; /* persistent_load() method, can be NULL. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000352
353 Py_buffer buffer;
354 char *input_buffer;
355 char *input_line;
356 Py_ssize_t input_len;
357 Py_ssize_t next_read_idx;
358 PyObject *read; /* read() method of the input stream. */
359 PyObject *readline; /* readline() method of the input stream. */
360
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000361 char *encoding; /* Name of the encoding to be used for
362 decoding strings pickled using Python
363 2.x. The default value is "ASCII" */
364 char *errors; /* Name of errors handling scheme to used when
365 decoding strings. The default value is
366 "strict". */
367 int *marks; /* Mark stack, used for unpickling container
368 objects. */
369 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
370 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000371 int proto; /* Protocol of the pickle loaded. */
372 int fix_imports; /* Indicate whether Unpickler should fix
373 the name of globals pickled by Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000374} UnpicklerObject;
375
376/* Forward declarations */
377static int save(PicklerObject *, PyObject *, int);
378static int save_reduce(PicklerObject *, PyObject *, PyObject *);
379static PyTypeObject Pickler_Type;
380static PyTypeObject Unpickler_Type;
381
382
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000383/*************************************************************************
384 A custom hashtable mapping void* to longs. This is used by the pickler for
385 memoization. Using a custom hashtable rather than PyDict allows us to skip
386 a bunch of unnecessary object creation. This makes a huge performance
387 difference. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000388
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000389#define MT_MINSIZE 8
390#define PERTURB_SHIFT 5
391
392
393static PyMemoTable *
394PyMemoTable_New(void)
395{
396 PyMemoTable *memo = PyMem_MALLOC(sizeof(PyMemoTable));
397 if (memo == NULL) {
398 PyErr_NoMemory();
399 return NULL;
400 }
401
402 memo->mt_used = 0;
403 memo->mt_allocated = MT_MINSIZE;
404 memo->mt_mask = MT_MINSIZE - 1;
405 memo->mt_table = PyMem_MALLOC(MT_MINSIZE * sizeof(PyMemoEntry));
406 if (memo->mt_table == NULL) {
407 PyMem_FREE(memo);
408 PyErr_NoMemory();
409 return NULL;
410 }
411 memset(memo->mt_table, 0, MT_MINSIZE * sizeof(PyMemoEntry));
412
413 return memo;
414}
415
416static PyMemoTable *
417PyMemoTable_Copy(PyMemoTable *self)
418{
419 Py_ssize_t i;
420 PyMemoTable *new = PyMemoTable_New();
421 if (new == NULL)
422 return NULL;
423
424 new->mt_used = self->mt_used;
425 new->mt_allocated = self->mt_allocated;
426 new->mt_mask = self->mt_mask;
427 /* The table we get from _New() is probably smaller than we wanted.
428 Free it and allocate one that's the right size. */
429 PyMem_FREE(new->mt_table);
430 new->mt_table = PyMem_MALLOC(self->mt_allocated * sizeof(PyMemoEntry));
431 if (new->mt_table == NULL) {
432 PyMem_FREE(new);
433 return NULL;
434 }
435 for (i = 0; i < self->mt_allocated; i++) {
436 Py_XINCREF(self->mt_table[i].me_key);
437 }
438 memcpy(new->mt_table, self->mt_table,
439 sizeof(PyMemoEntry) * self->mt_allocated);
440
441 return new;
442}
443
444static Py_ssize_t
445PyMemoTable_Size(PyMemoTable *self)
446{
447 return self->mt_used;
448}
449
450static int
451PyMemoTable_Clear(PyMemoTable *self)
452{
453 Py_ssize_t i = self->mt_allocated;
454
455 while (--i >= 0) {
456 Py_XDECREF(self->mt_table[i].me_key);
457 }
458 self->mt_used = 0;
459 memset(self->mt_table, 0, self->mt_allocated * sizeof(PyMemoEntry));
460 return 0;
461}
462
463static void
464PyMemoTable_Del(PyMemoTable *self)
465{
466 if (self == NULL)
467 return;
468 PyMemoTable_Clear(self);
469
470 PyMem_FREE(self->mt_table);
471 PyMem_FREE(self);
472}
473
474/* Since entries cannot be deleted from this hashtable, _PyMemoTable_Lookup()
475 can be considerably simpler than dictobject.c's lookdict(). */
476static PyMemoEntry *
477_PyMemoTable_Lookup(PyMemoTable *self, PyObject *key)
478{
479 size_t i;
480 size_t perturb;
481 size_t mask = (size_t)self->mt_mask;
482 PyMemoEntry *table = self->mt_table;
483 PyMemoEntry *entry;
484 long hash = (long)key >> 3;
485
486 i = hash & mask;
487 entry = &table[i];
488 if (entry->me_key == NULL || entry->me_key == key)
489 return entry;
490
491 for (perturb = hash; ; perturb >>= PERTURB_SHIFT) {
492 i = (i << 2) + i + perturb + 1;
493 entry = &table[i & mask];
494 if (entry->me_key == NULL || entry->me_key == key)
495 return entry;
496 }
497 assert(0); /* Never reached */
498 return NULL;
499}
500
501/* Returns -1 on failure, 0 on success. */
502static int
503_PyMemoTable_ResizeTable(PyMemoTable *self, Py_ssize_t min_size)
504{
505 PyMemoEntry *oldtable = NULL;
506 PyMemoEntry *oldentry, *newentry;
507 Py_ssize_t new_size = MT_MINSIZE;
508 Py_ssize_t to_process;
509
510 assert(min_size > 0);
511
512 /* Find the smallest valid table size >= min_size. */
513 while (new_size < min_size && new_size > 0)
514 new_size <<= 1;
515 if (new_size <= 0) {
516 PyErr_NoMemory();
517 return -1;
518 }
519 /* new_size needs to be a power of two. */
520 assert((new_size & (new_size - 1)) == 0);
521
522 /* Allocate new table. */
523 oldtable = self->mt_table;
524 self->mt_table = PyMem_MALLOC(new_size * sizeof(PyMemoEntry));
525 if (self->mt_table == NULL) {
526 PyMem_FREE(oldtable);
527 PyErr_NoMemory();
528 return -1;
529 }
530 self->mt_allocated = new_size;
531 self->mt_mask = new_size - 1;
532 memset(self->mt_table, 0, sizeof(PyMemoEntry) * new_size);
533
534 /* Copy entries from the old table. */
535 to_process = self->mt_used;
536 for (oldentry = oldtable; to_process > 0; oldentry++) {
537 if (oldentry->me_key != NULL) {
538 to_process--;
539 /* newentry is a pointer to a chunk of the new
540 mt_table, so we're setting the key:value pair
541 in-place. */
542 newentry = _PyMemoTable_Lookup(self, oldentry->me_key);
543 newentry->me_key = oldentry->me_key;
544 newentry->me_value = oldentry->me_value;
545 }
546 }
547
548 /* Deallocate the old table. */
549 PyMem_FREE(oldtable);
550 return 0;
551}
552
553/* Returns NULL on failure, a pointer to the value otherwise. */
554static long *
555PyMemoTable_Get(PyMemoTable *self, PyObject *key)
556{
557 PyMemoEntry *entry = _PyMemoTable_Lookup(self, key);
558 if (entry->me_key == NULL)
559 return NULL;
560 return &entry->me_value;
561}
562
563/* Returns -1 on failure, 0 on success. */
564static int
565PyMemoTable_Set(PyMemoTable *self, PyObject *key, long value)
566{
567 PyMemoEntry *entry;
568
569 assert(key != NULL);
570
571 entry = _PyMemoTable_Lookup(self, key);
572 if (entry->me_key != NULL) {
573 entry->me_value = value;
574 return 0;
575 }
576 Py_INCREF(key);
577 entry->me_key = key;
578 entry->me_value = value;
579 self->mt_used++;
580
581 /* If we added a key, we can safely resize. Otherwise just return!
582 * If used >= 2/3 size, adjust size. Normally, this quaduples the size.
583 *
584 * Quadrupling the size improves average table sparseness
585 * (reducing collisions) at the cost of some memory. It also halves
586 * the number of expensive resize operations in a growing memo table.
587 *
588 * Very large memo tables (over 50K items) use doubling instead.
589 * This may help applications with severe memory constraints.
590 */
591 if (!(self->mt_used * 3 >= (self->mt_mask + 1) * 2))
592 return 0;
593 return _PyMemoTable_ResizeTable(self,
594 (self->mt_used > 50000 ? 2 : 4) * self->mt_used);
595}
596
597#undef MT_MINSIZE
598#undef PERTURB_SHIFT
599
600/*************************************************************************/
601
602/* Helpers for creating the argument tuple passed to functions. This has the
603 performance advantage of calling PyTuple_New() only once.
604
605 XXX(avassalotti): Inline directly in _Pickler_FastCall() and
606 _Unpickler_FastCall(). */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000607#define ARG_TUP(self, obj) do { \
608 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
609 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
610 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
611 } \
612 else { \
613 Py_DECREF((obj)); \
614 } \
615 } while (0)
616
617#define FREE_ARG_TUP(self) do { \
618 if ((self)->arg->ob_refcnt > 1) \
619 Py_CLEAR((self)->arg); \
620 } while (0)
621
622/* A temporary cleaner API for fast single argument function call.
623
624 XXX: Does caching the argument tuple provides any real performance benefits?
625
626 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
627 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
628 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
629 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
630 (i.e, call PyTuple_New() and store the returned value in an array), to save
631 one second (wall clock time). Either ways, the loading time a pickle stream
632 large enough to generate this number of calls would be massively
633 overwhelmed by other factors, like I/O throughput, the GC traversal and
634 object allocation overhead. So, I really doubt these functions provide any
635 real benefits.
636
637 On the other hand, oprofile reports that pickle spends a lot of time in
638 these functions. But, that is probably more related to the function call
639 overhead, than the argument tuple allocation.
640
641 XXX: And, what is the reference behavior of these? Steal, borrow? At first
642 glance, it seems to steal the reference of 'arg' and borrow the reference
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000643 of 'func'. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000644static PyObject *
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000645_Pickler_FastCall(PicklerObject *self, PyObject *func, PyObject *arg)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000646{
647 PyObject *result = NULL;
648
649 ARG_TUP(self, arg);
650 if (self->arg) {
651 result = PyObject_Call(func, self->arg, NULL);
652 FREE_ARG_TUP(self);
653 }
654 return result;
655}
656
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000657static int
658_Pickler_ClearBuffer(PicklerObject *self)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000659{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000660 Py_CLEAR(self->output_buffer);
661 self->output_buffer =
662 PyBytes_FromStringAndSize(NULL, self->max_output_len);
663 if (self->output_buffer == NULL)
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000664 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000665 self->output_len = 0;
666 return 0;
667}
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000668
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000669static PyObject *
670_Pickler_GetString(PicklerObject *self)
671{
672 PyObject *output_buffer = self->output_buffer;
673
674 assert(self->output_buffer != NULL);
675 self->output_buffer = NULL;
676 /* Resize down to exact size */
677 if (_PyBytes_Resize(&output_buffer, self->output_len) < 0)
678 return NULL;
679 return output_buffer;
680}
681
682static int
683_Pickler_FlushToFile(PicklerObject *self)
684{
685 PyObject *output, *result;
686
687 assert(self->write != NULL);
688
689 output = _Pickler_GetString(self);
690 if (output == NULL)
691 return -1;
692
693 result = _Pickler_FastCall(self, self->write, output);
694 Py_XDECREF(result);
695 return (result == NULL) ? -1 : 0;
696}
697
698static int
699_Pickler_Write(PicklerObject *self, const char *s, Py_ssize_t n)
700{
701 Py_ssize_t i, required;
702 char *buffer;
703
704 assert(s != NULL);
705
706 required = self->output_len + n;
707 if (required > self->max_output_len) {
708 if (self->write != NULL && required > MAX_WRITE_BUF_SIZE) {
709 /* XXX This reallocates a new buffer every time, which is a bit
710 wasteful. */
711 if (_Pickler_FlushToFile(self) < 0)
712 return -1;
713 if (_Pickler_ClearBuffer(self) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000714 return -1;
715 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000716 if (self->write != NULL && n > MAX_WRITE_BUF_SIZE) {
717 /* we already flushed above, so the buffer is empty */
718 PyObject *result;
719 /* XXX we could spare an intermediate copy and pass
720 a memoryview instead */
721 PyObject *output = PyBytes_FromStringAndSize(s, n);
722 if (s == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000723 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000724 result = _Pickler_FastCall(self, self->write, output);
725 Py_XDECREF(result);
726 return (result == NULL) ? -1 : 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000727 }
728 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000729 if (self->output_len >= PY_SSIZE_T_MAX / 2 - n) {
730 PyErr_NoMemory();
731 return -1;
732 }
733 self->max_output_len = (self->output_len + n) * 2;
734 if (_PyBytes_Resize(&self->output_buffer, self->max_output_len) < 0)
735 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000736 }
737 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000738 buffer = PyBytes_AS_STRING(self->output_buffer);
739 if (n < 8) {
740 /* This is faster than memcpy when the string is short. */
741 for (i = 0; i < n; i++) {
742 buffer[self->output_len + i] = s[i];
743 }
744 }
745 else {
746 memcpy(buffer + self->output_len, s, n);
747 }
748 self->output_len += n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000749 return n;
750}
751
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000752static PicklerObject *
753_Pickler_New(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000754{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000755 PicklerObject *self;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000756
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000757 self = PyObject_GC_New(PicklerObject, &Pickler_Type);
758 if (self == NULL)
759 return NULL;
760
761 self->pers_func = NULL;
762 self->arg = NULL;
763 self->write = NULL;
764 self->proto = 0;
765 self->bin = 0;
766 self->fast = 0;
767 self->fast_nesting = 0;
768 self->fix_imports = 0;
769 self->fast_memo = NULL;
770
771 self->memo = PyMemoTable_New();
772 if (self->memo == NULL) {
773 Py_DECREF(self);
774 return NULL;
775 }
776 self->max_output_len = WRITE_BUF_SIZE;
777 self->output_len = 0;
778 self->output_buffer = PyBytes_FromStringAndSize(NULL,
779 self->max_output_len);
780 if (self->output_buffer == NULL) {
781 Py_DECREF(self);
782 return NULL;
783 }
784 return self;
785}
786
787static int
788_Pickler_SetProtocol(PicklerObject *self, PyObject *proto_obj,
789 PyObject *fix_imports_obj)
790{
791 long proto = 0;
792 int fix_imports;
793
794 if (proto_obj == NULL || proto_obj == Py_None)
795 proto = DEFAULT_PROTOCOL;
796 else {
797 proto = PyLong_AsLong(proto_obj);
798 if (proto == -1 && PyErr_Occurred())
799 return -1;
800 }
801 if (proto < 0)
802 proto = HIGHEST_PROTOCOL;
803 if (proto > HIGHEST_PROTOCOL) {
804 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
805 HIGHEST_PROTOCOL);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000806 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000807 }
808 fix_imports = PyObject_IsTrue(fix_imports_obj);
809 if (fix_imports == -1)
810 return -1;
811
812 self->proto = proto;
813 self->bin = proto > 0;
814 self->fix_imports = fix_imports && proto < 3;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000815
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000816 return 0;
817}
818
819/* Returns -1 (with an exception set) on failure, 0 on success. This may
820 be called once on a freshly created Pickler. */
821static int
822_Pickler_SetOutputStream(PicklerObject *self, PyObject *file)
823{
824 assert(file != NULL);
825 self->write = PyObject_GetAttrString(file, "write");
826 if (self->write == NULL) {
827 if (PyErr_ExceptionMatches(PyExc_AttributeError))
828 PyErr_SetString(PyExc_TypeError,
829 "file must have a 'write' attribute");
830 return -1;
831 }
832
833 return 0;
834}
835
836/* See documentation for _Pickler_FastCall(). */
837static PyObject *
838_Unpickler_FastCall(UnpicklerObject *self, PyObject *func, PyObject *arg)
839{
840 PyObject *result = NULL;
841
842 ARG_TUP(self, arg);
843 if (self->arg) {
844 result = PyObject_Call(func, self->arg, NULL);
845 FREE_ARG_TUP(self);
846 }
847 return result;
848}
849
850/* Returns the size of the input on success, -1 on failure. This takes its
851 own reference to `input`. */
852static Py_ssize_t
853_Unpickler_SetStringInput(UnpicklerObject *self, PyObject *input)
854{
855 if (self->buffer.buf != NULL)
856 PyBuffer_Release(&self->buffer);
857 if (PyObject_GetBuffer(input, &self->buffer, PyBUF_CONTIG_RO) < 0)
858 return -1;
859 self->input_buffer = self->buffer.buf;
860 self->input_len = self->buffer.len;
861 self->next_read_idx = 0;
862 return self->input_len;
863}
864
865static const Py_ssize_t READ_WHOLE_LINE = -1;
866
867/* If reading from a file, we need to only pull the bytes we need, since there
868 may be multiple pickle objects arranged contiguously in the same input
869 buffer.
870
871 If `n` is READ_WHOLE_LINE, read a whole line. Otherwise, read up to `n`
872 bytes from the input stream/buffer.
873
874 Update the unpickler's input buffer with the newly-read data. Returns -1 on
875 failure; on success, returns the number of bytes read from the file.
876
877 On success, self->input_len will be 0; this is intentional so that when
878 unpickling from a file, the "we've run out of data" code paths will trigger,
879 causing the Unpickler to go back to the file for more data. Use the returned
880 size to tell you how much data you can process. */
881static Py_ssize_t
882_Unpickler_ReadFromFile(UnpicklerObject *self, Py_ssize_t n)
883{
884 PyObject *data;
885 Py_ssize_t read_size;
886
887 assert(self->read != NULL);
888 assert(self->next_read_idx == 0);
889
890 if (n == READ_WHOLE_LINE)
891 data = PyObject_Call(self->readline, empty_tuple, NULL);
892 else {
893 PyObject *len = PyLong_FromSsize_t(n);
894 if (len == NULL)
895 return -1;
896 data = _Unpickler_FastCall(self, self->read, len);
897 }
898
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000899 if (data == NULL)
900 return -1;
901
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000902 read_size = _Unpickler_SetStringInput(self, data);
903 self->input_len = 0;
904 Py_DECREF(data);
905 return read_size;
906}
907
908/* Read `n` bytes from the unpickler's data source, storing the result in `*s`.
909
910 This should be used for all data reads, rather than accessing the unpickler's
911 input buffer directly. This method deals correctly with reading from input
912 streams, which the input buffer doesn't deal with.
913
914 Note that when reading from a file-like object, self->next_read_idx won't
915 be updated (it should remain at 0 for the entire unpickling process). You
916 should use this function's return value to know how many bytes you can
917 consume.
918
919 Returns -1 (with an exception set) on failure. On success, return the
920 number of chars read. */
921static Py_ssize_t
922_Unpickler_Read(UnpicklerObject *self, char **s, Py_ssize_t n)
923{
924 if (n == 0) {
925 *s = NULL;
926 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000927 }
928
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000929 /* This condition will always be true if self->read. */
930 if (self->next_read_idx + n > self->input_len) {
931 if (self->read) {
932 Py_ssize_t num_read;
933 assert(self->next_read_idx == self->input_len);
934 num_read = _Unpickler_ReadFromFile(self, n);
935 if (n < 0)
936 return -1;
937 if (num_read == n) {
938 *s = self->input_buffer;
939 return num_read;
940 }
941 }
942 PyErr_Format(PyExc_EOFError, "Ran out of input");
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000943 return -1;
944 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000945 assert(self->read == NULL);
946 *s = self->input_buffer + self->next_read_idx;
947 self->next_read_idx += n;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000948 return n;
949}
950
951static Py_ssize_t
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000952_Unpickler_CopyLine(UnpicklerObject *self, char *line, Py_ssize_t len,
953 char **result)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000954{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000955 char *input_line = PyMem_Realloc(self->input_line, len + 1);
956 if (input_line == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000957 return -1;
958
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000959 memcpy(input_line, line, len);
960 input_line[len] = '\0';
961 self->input_line = input_line;
962 *result = self->input_line;
963 return len;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000964}
965
Antoine Pitrouea99c5c2010-09-09 18:33:21 +0000966/* Read a line from the input stream/buffer. If we run off the end of the input
967 before hitting \n, return the data we found.
968
969 Returns the number of chars read, or -1 on failure. */
970static Py_ssize_t
971_Unpickler_Readline(UnpicklerObject *self, char **result)
972{
973 Py_ssize_t i, num_read;
974
975 /* This loop will never be entered if self->read is not NULL. */
976 for (i = self->next_read_idx; i < self->input_len; i++) {
977 assert(self->read == NULL);
978 if (self->input_buffer[i] == '\n') {
979 char *line_start = self->input_buffer + self->next_read_idx;
980 num_read = i - self->next_read_idx + 1;
981 self->next_read_idx = i + 1;
982 return _Unpickler_CopyLine(self, line_start, num_read, result);
983 }
984 }
985 if (self->read) {
986 assert(self->next_read_idx == self->input_len);
987 num_read = _Unpickler_ReadFromFile(self, READ_WHOLE_LINE);
988 if (num_read < 0)
989 return -1;
990 *result = self->input_buffer;
991 return num_read;
992 }
993
994 /* If we get here, we've run off the end of the input string. Return the
995 remaining string and let the caller figure it out. */
996 *result = self->input_buffer + self->next_read_idx;
997 num_read = i - self->next_read_idx;
998 self->next_read_idx = i;
999 return num_read;
1000}
1001
1002/* Returns -1 (with an exception set) on failure, 0 on success. The memo array
1003 will be modified in place. */
1004static int
1005_Unpickler_ResizeMemoList(UnpicklerObject *self, Py_ssize_t new_size)
1006{
1007 Py_ssize_t i;
1008 PyObject **memo;
1009
1010 assert(new_size > self->memo_size);
1011
1012 memo = PyMem_REALLOC(self->memo, new_size * sizeof(PyObject *));
1013 if (memo == NULL) {
1014 PyErr_NoMemory();
1015 return -1;
1016 }
1017 self->memo = memo;
1018 for (i = self->memo_size; i < new_size; i++)
1019 self->memo[i] = NULL;
1020 self->memo_size = new_size;
1021 return 0;
1022}
1023
1024/* Returns NULL if idx is out of bounds. */
1025static PyObject *
1026_Unpickler_MemoGet(UnpicklerObject *self, Py_ssize_t idx)
1027{
1028 if (idx < 0 || idx >= self->memo_size)
1029 return NULL;
1030
1031 return self->memo[idx];
1032}
1033
1034/* Returns -1 (with an exception set) on failure, 0 on success.
1035 This takes its own reference to `value`. */
1036static int
1037_Unpickler_MemoPut(UnpicklerObject *self, Py_ssize_t idx, PyObject *value)
1038{
1039 PyObject *old_item;
1040
1041 if (idx >= self->memo_size) {
1042 if (_Unpickler_ResizeMemoList(self, idx * 2) < 0)
1043 return -1;
1044 assert(idx < self->memo_size);
1045 }
1046 Py_INCREF(value);
1047 old_item = self->memo[idx];
1048 self->memo[idx] = value;
1049 Py_XDECREF(old_item);
1050 return 0;
1051}
1052
1053static PyObject **
1054_Unpickler_NewMemo(Py_ssize_t new_size)
1055{
1056 PyObject **memo = PyMem_MALLOC(new_size * sizeof(PyObject *));
1057 if (memo == NULL)
1058 return NULL;
1059 memset(memo, 0, new_size * sizeof(PyObject *));
1060 return memo;
1061}
1062
1063/* Free the unpickler's memo, taking care to decref any items left in it. */
1064static void
1065_Unpickler_MemoCleanup(UnpicklerObject *self)
1066{
1067 Py_ssize_t i;
1068 PyObject **memo = self->memo;
1069
1070 if (self->memo == NULL)
1071 return;
1072 self->memo = NULL;
1073 i = self->memo_size;
1074 while (--i >= 0) {
1075 Py_XDECREF(memo[i]);
1076 }
1077 PyMem_FREE(memo);
1078}
1079
1080static UnpicklerObject *
1081_Unpickler_New(void)
1082{
1083 UnpicklerObject *self;
1084
1085 self = PyObject_GC_New(UnpicklerObject, &Unpickler_Type);
1086 if (self == NULL)
1087 return NULL;
1088
1089 self->stack = (Pdata *)Pdata_New();
1090 if (self->stack == NULL) {
1091 Py_DECREF(self);
1092 return NULL;
1093 }
1094 memset(&self->buffer, 0, sizeof(Py_buffer));
1095
1096 self->memo_size = 32;
1097 self->memo = _Unpickler_NewMemo(self->memo_size);
1098 if (self->memo == NULL) {
1099 Py_DECREF(self);
1100 return NULL;
1101 }
1102
1103 self->arg = NULL;
1104 self->pers_func = NULL;
1105 self->input_buffer = NULL;
1106 self->input_line = NULL;
1107 self->input_len = 0;
1108 self->next_read_idx = 0;
1109 self->read = NULL;
1110 self->readline = NULL;
1111 self->encoding = NULL;
1112 self->errors = NULL;
1113 self->marks = NULL;
1114 self->num_marks = 0;
1115 self->marks_size = 0;
1116 self->proto = 0;
1117 self->fix_imports = 0;
1118
1119 return self;
1120}
1121
1122/* Returns -1 (with an exception set) on failure, 0 on success. This may
1123 be called once on a freshly created Pickler. */
1124static int
1125_Unpickler_SetInputStream(UnpicklerObject *self, PyObject *file)
1126{
1127 self->read = PyObject_GetAttrString(file, "read");
1128 self->readline = PyObject_GetAttrString(file, "readline");
1129 if (self->readline == NULL || self->read == NULL) {
1130 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1131 PyErr_SetString(PyExc_TypeError,
1132 "file must have 'read' and 'readline' attributes");
1133 Py_CLEAR(self->read);
1134 Py_CLEAR(self->readline);
1135 return -1;
1136 }
1137 return 0;
1138}
1139
1140/* Returns -1 (with an exception set) on failure, 0 on success. This may
1141 be called once on a freshly created Pickler. */
1142static int
1143_Unpickler_SetInputEncoding(UnpicklerObject *self,
1144 const char *encoding,
1145 const char *errors)
1146{
1147 if (encoding == NULL)
1148 encoding = "ASCII";
1149 if (errors == NULL)
1150 errors = "strict";
1151
1152 self->encoding = strdup(encoding);
1153 self->errors = strdup(errors);
1154 if (self->encoding == NULL || self->errors == NULL) {
1155 PyErr_NoMemory();
1156 return -1;
1157 }
1158 return 0;
1159}
1160
1161/* Generate a GET opcode for an object stored in the memo. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001162static int
1163memo_get(PicklerObject *self, PyObject *key)
1164{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001165 long *value;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001166 char pdata[30];
1167 int len;
1168
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001169 value = PyMemoTable_Get(self->memo, key);
1170 if (value == NULL) {
1171 PyErr_SetObject(PyExc_KeyError, key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001172 return -1;
1173 }
1174
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001175 if (!self->bin) {
1176 pdata[0] = GET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001177 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", *value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001178 len = (int)strlen(pdata);
1179 }
1180 else {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001181 if (*value < 256) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001182 pdata[0] = BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001183 pdata[1] = (unsigned char)(*value & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001184 len = 2;
1185 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001186 else if (*value <= 0xffffffffL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001187 pdata[0] = LONG_BINGET;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001188 pdata[1] = (unsigned char)(*value & 0xff);
1189 pdata[2] = (unsigned char)((*value >> 8) & 0xff);
1190 pdata[3] = (unsigned char)((*value >> 16) & 0xff);
1191 pdata[4] = (unsigned char)((*value >> 24) & 0xff);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001192 len = 5;
1193 }
1194 else { /* unlikely */
1195 PyErr_SetString(PicklingError,
1196 "memo id too large for LONG_BINGET");
1197 return -1;
1198 }
1199 }
1200
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001201 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001202 return -1;
1203
1204 return 0;
1205}
1206
1207/* Store an object in the memo, assign it a new unique ID based on the number
1208 of objects currently stored in the memo and generate a PUT opcode. */
1209static int
1210memo_put(PicklerObject *self, PyObject *obj)
1211{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001212 long x;
1213 char pdata[30];
1214 int len;
1215 int status = 0;
1216
1217 if (self->fast)
1218 return 0;
1219
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001220 x = PyMemoTable_Size(self->memo);
1221 if (PyMemoTable_Set(self->memo, obj, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001222 goto error;
1223
1224 if (!self->bin) {
1225 pdata[0] = PUT;
1226 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
1227 len = strlen(pdata);
1228 }
1229 else {
1230 if (x < 256) {
1231 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00001232 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001233 len = 2;
1234 }
1235 else if (x <= 0xffffffffL) {
1236 pdata[0] = LONG_BINPUT;
1237 pdata[1] = (unsigned char)(x & 0xff);
1238 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1239 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1240 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1241 len = 5;
1242 }
1243 else { /* unlikely */
1244 PyErr_SetString(PicklingError,
1245 "memo id too large for LONG_BINPUT");
1246 return -1;
1247 }
1248 }
1249
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001250 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001251 goto error;
1252
1253 if (0) {
1254 error:
1255 status = -1;
1256 }
1257
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001258 return status;
1259}
1260
1261static PyObject *
1262whichmodule(PyObject *global, PyObject *global_name)
1263{
1264 Py_ssize_t i, j;
1265 static PyObject *module_str = NULL;
1266 static PyObject *main_str = NULL;
1267 PyObject *module_name;
1268 PyObject *modules_dict;
1269 PyObject *module;
1270 PyObject *obj;
1271
1272 if (module_str == NULL) {
1273 module_str = PyUnicode_InternFromString("__module__");
1274 if (module_str == NULL)
1275 return NULL;
1276 main_str = PyUnicode_InternFromString("__main__");
1277 if (main_str == NULL)
1278 return NULL;
1279 }
1280
1281 module_name = PyObject_GetAttr(global, module_str);
1282
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00001283 /* In some rare cases (e.g., bound methods of extension types),
1284 __module__ can be None. If it is so, then search sys.modules
1285 for the module of global. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001286 if (module_name == Py_None) {
1287 Py_DECREF(module_name);
1288 goto search;
1289 }
1290
1291 if (module_name) {
1292 return module_name;
1293 }
1294 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1295 PyErr_Clear();
1296 else
1297 return NULL;
1298
1299 search:
1300 modules_dict = PySys_GetObject("modules");
1301 if (modules_dict == NULL)
1302 return NULL;
1303
1304 i = 0;
1305 module_name = NULL;
1306 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
Mark Dickinson211c6252009-02-01 10:28:51 +00001307 if (PyObject_RichCompareBool(module_name, main_str, Py_EQ) == 1)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001308 continue;
1309
1310 obj = PyObject_GetAttr(module, global_name);
1311 if (obj == NULL) {
1312 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1313 PyErr_Clear();
1314 else
1315 return NULL;
1316 continue;
1317 }
1318
1319 if (obj != global) {
1320 Py_DECREF(obj);
1321 continue;
1322 }
1323
1324 Py_DECREF(obj);
1325 break;
1326 }
1327
1328 /* If no module is found, use __main__. */
1329 if (!j) {
1330 module_name = main_str;
1331 }
1332
1333 Py_INCREF(module_name);
1334 return module_name;
1335}
1336
1337/* fast_save_enter() and fast_save_leave() are guards against recursive
1338 objects when Pickler is used with the "fast mode" (i.e., with object
1339 memoization disabled). If the nesting of a list or dict object exceed
1340 FAST_NESTING_LIMIT, these guards will start keeping an internal
1341 reference to the seen list or dict objects and check whether these objects
1342 are recursive. These are not strictly necessary, since save() has a
1343 hard-coded recursion limit, but they give a nicer error message than the
1344 typical RuntimeError. */
1345static int
1346fast_save_enter(PicklerObject *self, PyObject *obj)
1347{
1348 /* if fast_nesting < 0, we're doing an error exit. */
1349 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
1350 PyObject *key = NULL;
1351 if (self->fast_memo == NULL) {
1352 self->fast_memo = PyDict_New();
1353 if (self->fast_memo == NULL) {
1354 self->fast_nesting = -1;
1355 return 0;
1356 }
1357 }
1358 key = PyLong_FromVoidPtr(obj);
1359 if (key == NULL)
1360 return 0;
1361 if (PyDict_GetItem(self->fast_memo, key)) {
1362 Py_DECREF(key);
1363 PyErr_Format(PyExc_ValueError,
1364 "fast mode: can't pickle cyclic objects "
1365 "including object type %.200s at %p",
1366 obj->ob_type->tp_name, obj);
1367 self->fast_nesting = -1;
1368 return 0;
1369 }
1370 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
1371 Py_DECREF(key);
1372 self->fast_nesting = -1;
1373 return 0;
1374 }
1375 Py_DECREF(key);
1376 }
1377 return 1;
1378}
1379
1380static int
1381fast_save_leave(PicklerObject *self, PyObject *obj)
1382{
1383 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
1384 PyObject *key = PyLong_FromVoidPtr(obj);
1385 if (key == NULL)
1386 return 0;
1387 if (PyDict_DelItem(self->fast_memo, key) < 0) {
1388 Py_DECREF(key);
1389 return 0;
1390 }
1391 Py_DECREF(key);
1392 }
1393 return 1;
1394}
1395
1396static int
1397save_none(PicklerObject *self, PyObject *obj)
1398{
1399 const char none_op = NONE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001400 if (_Pickler_Write(self, &none_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001401 return -1;
1402
1403 return 0;
1404}
1405
1406static int
1407save_bool(PicklerObject *self, PyObject *obj)
1408{
1409 static const char *buf[2] = { FALSE, TRUE };
1410 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
1411 int p = (obj == Py_True);
1412
1413 if (self->proto >= 2) {
1414 const char bool_op = p ? NEWTRUE : NEWFALSE;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001415 if (_Pickler_Write(self, &bool_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001416 return -1;
1417 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001418 else if (_Pickler_Write(self, buf[p], len[p]) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001419 return -1;
1420
1421 return 0;
1422}
1423
1424static int
1425save_int(PicklerObject *self, long x)
1426{
1427 char pdata[32];
1428 int len = 0;
1429
1430 if (!self->bin
1431#if SIZEOF_LONG > 4
1432 || x > 0x7fffffffL || x < -0x80000000L
1433#endif
1434 ) {
1435 /* Text-mode pickle, or long too big to fit in the 4-byte
1436 * signed BININT format: store as a string.
1437 */
Mark Dickinson8dd05142009-01-20 20:43:58 +00001438 pdata[0] = LONG; /* use LONG for consistency with pickle.py */
1439 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ldL\n", x);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001440 if (_Pickler_Write(self, pdata, strlen(pdata)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001441 return -1;
1442 }
1443 else {
1444 /* Binary pickle and x fits in a signed 4-byte int. */
1445 pdata[1] = (unsigned char)(x & 0xff);
1446 pdata[2] = (unsigned char)((x >> 8) & 0xff);
1447 pdata[3] = (unsigned char)((x >> 16) & 0xff);
1448 pdata[4] = (unsigned char)((x >> 24) & 0xff);
1449
1450 if ((pdata[4] == 0) && (pdata[3] == 0)) {
1451 if (pdata[2] == 0) {
1452 pdata[0] = BININT1;
1453 len = 2;
1454 }
1455 else {
1456 pdata[0] = BININT2;
1457 len = 3;
1458 }
1459 }
1460 else {
1461 pdata[0] = BININT;
1462 len = 5;
1463 }
1464
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001465 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001466 return -1;
1467 }
1468
1469 return 0;
1470}
1471
1472static int
1473save_long(PicklerObject *self, PyObject *obj)
1474{
1475 PyObject *repr = NULL;
1476 Py_ssize_t size;
1477 long val = PyLong_AsLong(obj);
1478 int status = 0;
1479
1480 const char long_op = LONG;
1481
1482 if (val == -1 && PyErr_Occurred()) {
1483 /* out of range for int pickling */
1484 PyErr_Clear();
1485 }
1486 else
1487 return save_int(self, val);
1488
1489 if (self->proto >= 2) {
1490 /* Linear-time pickling. */
1491 size_t nbits;
1492 size_t nbytes;
1493 unsigned char *pdata;
1494 char header[5];
1495 int i;
1496 int sign = _PyLong_Sign(obj);
1497
1498 if (sign == 0) {
1499 header[0] = LONG1;
1500 header[1] = 0; /* It's 0 -- an empty bytestring. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001501 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001502 goto error;
1503 return 0;
1504 }
1505 nbits = _PyLong_NumBits(obj);
1506 if (nbits == (size_t)-1 && PyErr_Occurred())
1507 goto error;
1508 /* How many bytes do we need? There are nbits >> 3 full
1509 * bytes of data, and nbits & 7 leftover bits. If there
1510 * are any leftover bits, then we clearly need another
1511 * byte. Wnat's not so obvious is that we *probably*
1512 * need another byte even if there aren't any leftovers:
1513 * the most-significant bit of the most-significant byte
1514 * acts like a sign bit, and it's usually got a sense
1515 * opposite of the one we need. The exception is longs
1516 * of the form -(2**(8*j-1)) for j > 0. Such a long is
1517 * its own 256's-complement, so has the right sign bit
1518 * even without the extra byte. That's a pain to check
1519 * for in advance, though, so we always grab an extra
1520 * byte at the start, and cut it back later if possible.
1521 */
1522 nbytes = (nbits >> 3) + 1;
1523 if (nbytes > INT_MAX) {
1524 PyErr_SetString(PyExc_OverflowError,
1525 "long too large to pickle");
1526 goto error;
1527 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001528 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001529 if (repr == NULL)
1530 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +00001531 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001532 i = _PyLong_AsByteArray((PyLongObject *)obj,
1533 pdata, nbytes,
1534 1 /* little endian */ , 1 /* signed */ );
1535 if (i < 0)
1536 goto error;
1537 /* If the long is negative, this may be a byte more than
1538 * needed. This is so iff the MSB is all redundant sign
1539 * bits.
1540 */
1541 if (sign < 0 &&
1542 nbytes > 1 &&
1543 pdata[nbytes - 1] == 0xff &&
1544 (pdata[nbytes - 2] & 0x80) != 0) {
1545 nbytes--;
1546 }
1547
1548 if (nbytes < 256) {
1549 header[0] = LONG1;
1550 header[1] = (unsigned char)nbytes;
1551 size = 2;
1552 }
1553 else {
1554 header[0] = LONG4;
1555 size = (int)nbytes;
1556 for (i = 1; i < 5; i++) {
1557 header[i] = (unsigned char)(size & 0xff);
1558 size >>= 8;
1559 }
1560 size = 5;
1561 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001562 if (_Pickler_Write(self, header, size) < 0 ||
1563 _Pickler_Write(self, (char *)pdata, (int)nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001564 goto error;
1565 }
1566 else {
1567 char *string;
1568
Mark Dickinson8dd05142009-01-20 20:43:58 +00001569 /* proto < 2: write the repr and newline. This is quadratic-time (in
1570 the number of digits), in both directions. We add a trailing 'L'
1571 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001572
1573 repr = PyObject_Repr(obj);
1574 if (repr == NULL)
1575 goto error;
1576
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001577 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001578 if (string == NULL)
1579 goto error;
1580
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001581 if (_Pickler_Write(self, &long_op, 1) < 0 ||
1582 _Pickler_Write(self, string, size) < 0 ||
1583 _Pickler_Write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001584 goto error;
1585 }
1586
1587 if (0) {
1588 error:
1589 status = -1;
1590 }
1591 Py_XDECREF(repr);
1592
1593 return status;
1594}
1595
1596static int
1597save_float(PicklerObject *self, PyObject *obj)
1598{
1599 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1600
1601 if (self->bin) {
1602 char pdata[9];
1603 pdata[0] = BINFLOAT;
1604 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1605 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001606 if (_Pickler_Write(self, pdata, 9) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001607 return -1;
Eric Smith0923d1d2009-04-16 20:16:10 +00001608 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001609 else {
Eric Smith0923d1d2009-04-16 20:16:10 +00001610 int result = -1;
1611 char *buf = NULL;
1612 char op = FLOAT;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001613
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001614 if (_Pickler_Write(self, &op, 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001615 goto done;
1616
Mark Dickinson3e09f432009-04-17 08:41:23 +00001617 buf = PyOS_double_to_string(x, 'g', 17, 0, NULL);
Eric Smith0923d1d2009-04-16 20:16:10 +00001618 if (!buf) {
1619 PyErr_NoMemory();
1620 goto done;
1621 }
1622
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001623 if (_Pickler_Write(self, buf, strlen(buf)) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001624 goto done;
1625
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001626 if (_Pickler_Write(self, "\n", 1) < 0)
Eric Smith0923d1d2009-04-16 20:16:10 +00001627 goto done;
1628
1629 result = 0;
1630done:
1631 PyMem_Free(buf);
1632 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001633 }
1634
1635 return 0;
1636}
1637
1638static int
1639save_bytes(PicklerObject *self, PyObject *obj)
1640{
1641 if (self->proto < 3) {
1642 /* Older pickle protocols do not have an opcode for pickling bytes
1643 objects. Therefore, we need to fake the copy protocol (i.e.,
1644 the __reduce__ method) to permit bytes object unpickling. */
1645 PyObject *reduce_value = NULL;
1646 PyObject *bytelist = NULL;
1647 int status;
1648
1649 bytelist = PySequence_List(obj);
1650 if (bytelist == NULL)
1651 return -1;
1652
1653 reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
1654 bytelist);
1655 if (reduce_value == NULL) {
1656 Py_DECREF(bytelist);
1657 return -1;
1658 }
1659
1660 /* save_reduce() will memoize the object automatically. */
1661 status = save_reduce(self, reduce_value, obj);
1662 Py_DECREF(reduce_value);
1663 Py_DECREF(bytelist);
1664 return status;
1665 }
1666 else {
1667 Py_ssize_t size;
1668 char header[5];
1669 int len;
1670
1671 size = PyBytes_Size(obj);
1672 if (size < 0)
1673 return -1;
1674
1675 if (size < 256) {
1676 header[0] = SHORT_BINBYTES;
1677 header[1] = (unsigned char)size;
1678 len = 2;
1679 }
1680 else if (size <= 0xffffffffL) {
1681 header[0] = BINBYTES;
1682 header[1] = (unsigned char)(size & 0xff);
1683 header[2] = (unsigned char)((size >> 8) & 0xff);
1684 header[3] = (unsigned char)((size >> 16) & 0xff);
1685 header[4] = (unsigned char)((size >> 24) & 0xff);
1686 len = 5;
1687 }
1688 else {
1689 return -1; /* string too large */
1690 }
1691
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001692 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001693 return -1;
1694
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001695 if (_Pickler_Write(self, PyBytes_AS_STRING(obj), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001696 return -1;
1697
1698 if (memo_put(self, obj) < 0)
1699 return -1;
1700
1701 return 0;
1702 }
1703}
1704
1705/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1706 backslash and newline characters to \uXXXX escapes. */
1707static PyObject *
1708raw_unicode_escape(const Py_UNICODE *s, Py_ssize_t size)
1709{
1710 PyObject *repr, *result;
1711 char *p;
1712 char *q;
1713
1714 static const char *hexdigits = "0123456789abcdef";
1715
1716#ifdef Py_UNICODE_WIDE
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001717 const Py_ssize_t expandsize = 10;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001718#else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001719 const Py_ssize_t expandsize = 6;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001720#endif
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001721
1722 if (size > PY_SSIZE_T_MAX / expandsize)
1723 return PyErr_NoMemory();
1724
1725 repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001726 if (repr == NULL)
1727 return NULL;
1728 if (size == 0)
1729 goto done;
1730
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001731 p = q = PyByteArray_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001732 while (size-- > 0) {
1733 Py_UNICODE ch = *s++;
1734#ifdef Py_UNICODE_WIDE
1735 /* Map 32-bit characters to '\Uxxxxxxxx' */
1736 if (ch >= 0x10000) {
1737 *p++ = '\\';
1738 *p++ = 'U';
1739 *p++ = hexdigits[(ch >> 28) & 0xf];
1740 *p++ = hexdigits[(ch >> 24) & 0xf];
1741 *p++ = hexdigits[(ch >> 20) & 0xf];
1742 *p++ = hexdigits[(ch >> 16) & 0xf];
1743 *p++ = hexdigits[(ch >> 12) & 0xf];
1744 *p++ = hexdigits[(ch >> 8) & 0xf];
1745 *p++ = hexdigits[(ch >> 4) & 0xf];
1746 *p++ = hexdigits[ch & 15];
1747 }
1748 else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001749#else
1750 /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
1751 if (ch >= 0xD800 && ch < 0xDC00) {
1752 Py_UNICODE ch2;
1753 Py_UCS4 ucs;
1754
1755 ch2 = *s++;
1756 size--;
1757 if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
1758 ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
1759 *p++ = '\\';
1760 *p++ = 'U';
1761 *p++ = hexdigits[(ucs >> 28) & 0xf];
1762 *p++ = hexdigits[(ucs >> 24) & 0xf];
1763 *p++ = hexdigits[(ucs >> 20) & 0xf];
1764 *p++ = hexdigits[(ucs >> 16) & 0xf];
1765 *p++ = hexdigits[(ucs >> 12) & 0xf];
1766 *p++ = hexdigits[(ucs >> 8) & 0xf];
1767 *p++ = hexdigits[(ucs >> 4) & 0xf];
1768 *p++ = hexdigits[ucs & 0xf];
1769 continue;
1770 }
1771 /* Fall through: isolated surrogates are copied as-is */
1772 s--;
1773 size++;
1774 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001775#endif
1776 /* Map 16-bit characters to '\uxxxx' */
1777 if (ch >= 256 || ch == '\\' || ch == '\n') {
1778 *p++ = '\\';
1779 *p++ = 'u';
1780 *p++ = hexdigits[(ch >> 12) & 0xf];
1781 *p++ = hexdigits[(ch >> 8) & 0xf];
1782 *p++ = hexdigits[(ch >> 4) & 0xf];
1783 *p++ = hexdigits[ch & 15];
1784 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001785 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001786 else
1787 *p++ = (char) ch;
1788 }
1789 size = p - q;
1790
1791 done:
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001792 result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001793 Py_DECREF(repr);
1794 return result;
1795}
1796
1797static int
1798save_unicode(PicklerObject *self, PyObject *obj)
1799{
1800 Py_ssize_t size;
1801 PyObject *encoded = NULL;
1802
1803 if (self->bin) {
1804 char pdata[5];
1805
Victor Stinner485fb562010-04-13 11:07:24 +00001806 encoded = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(obj),
1807 PyUnicode_GET_SIZE(obj),
1808 "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001809 if (encoded == NULL)
1810 goto error;
1811
1812 size = PyBytes_GET_SIZE(encoded);
1813 if (size < 0 || size > 0xffffffffL)
1814 goto error; /* string too large */
1815
1816 pdata[0] = BINUNICODE;
1817 pdata[1] = (unsigned char)(size & 0xff);
1818 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1819 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1820 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1821
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001822 if (_Pickler_Write(self, pdata, 5) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001823 goto error;
1824
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001825 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001826 goto error;
1827 }
1828 else {
1829 const char unicode_op = UNICODE;
1830
1831 encoded = raw_unicode_escape(PyUnicode_AS_UNICODE(obj),
1832 PyUnicode_GET_SIZE(obj));
1833 if (encoded == NULL)
1834 goto error;
1835
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001836 if (_Pickler_Write(self, &unicode_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001837 goto error;
1838
1839 size = PyBytes_GET_SIZE(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001840 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded), size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001841 goto error;
1842
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001843 if (_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001844 goto error;
1845 }
1846 if (memo_put(self, obj) < 0)
1847 goto error;
1848
1849 Py_DECREF(encoded);
1850 return 0;
1851
1852 error:
1853 Py_XDECREF(encoded);
1854 return -1;
1855}
1856
1857/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1858static int
1859store_tuple_elements(PicklerObject *self, PyObject *t, int len)
1860{
1861 int i;
1862
1863 assert(PyTuple_Size(t) == len);
1864
1865 for (i = 0; i < len; i++) {
1866 PyObject *element = PyTuple_GET_ITEM(t, i);
1867
1868 if (element == NULL)
1869 return -1;
1870 if (save(self, element, 0) < 0)
1871 return -1;
1872 }
1873
1874 return 0;
1875}
1876
1877/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1878 * used across protocols to minimize the space needed to pickle them.
1879 * Tuples are also the only builtin immutable type that can be recursive
1880 * (a tuple can be reached from itself), and that requires some subtle
1881 * magic so that it works in all cases. IOW, this is a long routine.
1882 */
1883static int
1884save_tuple(PicklerObject *self, PyObject *obj)
1885{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001886 int len, i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001887
1888 const char mark_op = MARK;
1889 const char tuple_op = TUPLE;
1890 const char pop_op = POP;
1891 const char pop_mark_op = POP_MARK;
1892 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
1893
1894 if ((len = PyTuple_Size(obj)) < 0)
1895 return -1;
1896
1897 if (len == 0) {
1898 char pdata[2];
1899
1900 if (self->proto) {
1901 pdata[0] = EMPTY_TUPLE;
1902 len = 1;
1903 }
1904 else {
1905 pdata[0] = MARK;
1906 pdata[1] = TUPLE;
1907 len = 2;
1908 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001909 if (_Pickler_Write(self, pdata, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001910 return -1;
1911 return 0;
1912 }
1913
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001914 /* The tuple isn't in the memo now. If it shows up there after
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001915 * saving the tuple elements, the tuple must be recursive, in
1916 * which case we'll pop everything we put on the stack, and fetch
1917 * its value from the memo.
1918 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001919 if (len <= 3 && self->proto >= 2) {
1920 /* Use TUPLE{1,2,3} opcodes. */
1921 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001922 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001923
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001924 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001925 /* pop the len elements */
1926 for (i = 0; i < len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001927 if (_Pickler_Write(self, &pop_op, 1) < 0)
1928 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001929 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001930 if (memo_get(self, obj) < 0)
1931 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001932
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001933 return 0;
1934 }
1935 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001936 if (_Pickler_Write(self, len2opcode + len, 1) < 0)
1937 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001938 }
1939 goto memoize;
1940 }
1941
1942 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
1943 * Generate MARK e1 e2 ... TUPLE
1944 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001945 if (_Pickler_Write(self, &mark_op, 1) < 0)
1946 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001947
1948 if (store_tuple_elements(self, obj, len) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001949 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001950
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001951 if (PyMemoTable_Get(self->memo, obj)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001952 /* pop the stack stuff we pushed */
1953 if (self->bin) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001954 if (_Pickler_Write(self, &pop_mark_op, 1) < 0)
1955 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001956 }
1957 else {
1958 /* Note that we pop one more than len, to remove
1959 * the MARK too.
1960 */
1961 for (i = 0; i <= len; i++)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001962 if (_Pickler_Write(self, &pop_op, 1) < 0)
1963 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001964 }
1965 /* fetch from memo */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001966 if (memo_get(self, obj) < 0)
1967 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001968
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001969 return 0;
1970 }
1971 else { /* Not recursive. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001972 if (_Pickler_Write(self, &tuple_op, 1) < 0)
1973 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001974 }
1975
1976 memoize:
1977 if (memo_put(self, obj) < 0)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001978 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001979
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00001980 return 0;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001981}
1982
1983/* iter is an iterator giving items, and we batch up chunks of
1984 * MARK item item ... item APPENDS
1985 * opcode sequences. Calling code should have arranged to first create an
1986 * empty list, or list-like object, for the APPENDS to operate on.
1987 * Returns 0 on success, <0 on error.
1988 */
1989static int
1990batch_list(PicklerObject *self, PyObject *iter)
1991{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001992 PyObject *obj = NULL;
1993 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001994 int i, n;
1995
1996 const char mark_op = MARK;
1997 const char append_op = APPEND;
1998 const char appends_op = APPENDS;
1999
2000 assert(iter != NULL);
2001
2002 /* XXX: I think this function could be made faster by avoiding the
2003 iterator interface and fetching objects directly from list using
2004 PyList_GET_ITEM.
2005 */
2006
2007 if (self->proto == 0) {
2008 /* APPENDS isn't available; do one at a time. */
2009 for (;;) {
2010 obj = PyIter_Next(iter);
2011 if (obj == NULL) {
2012 if (PyErr_Occurred())
2013 return -1;
2014 break;
2015 }
2016 i = save(self, obj, 0);
2017 Py_DECREF(obj);
2018 if (i < 0)
2019 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002020 if (_Pickler_Write(self, &append_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002021 return -1;
2022 }
2023 return 0;
2024 }
2025
2026 /* proto > 0: write in batches of BATCHSIZE. */
2027 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002028 /* Get first item */
2029 firstitem = PyIter_Next(iter);
2030 if (firstitem == NULL) {
2031 if (PyErr_Occurred())
2032 goto error;
2033
2034 /* nothing more to add */
2035 break;
2036 }
2037
2038 /* Try to get a second item */
2039 obj = PyIter_Next(iter);
2040 if (obj == NULL) {
2041 if (PyErr_Occurred())
2042 goto error;
2043
2044 /* Only one item to write */
2045 if (save(self, firstitem, 0) < 0)
2046 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002047 if (_Pickler_Write(self, &append_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002048 goto error;
2049 Py_CLEAR(firstitem);
2050 break;
2051 }
2052
2053 /* More than one item to write */
2054
2055 /* Pump out MARK, items, APPENDS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002056 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002057 goto error;
2058
2059 if (save(self, firstitem, 0) < 0)
2060 goto error;
2061 Py_CLEAR(firstitem);
2062 n = 1;
2063
2064 /* Fetch and save up to BATCHSIZE items */
2065 while (obj) {
2066 if (save(self, obj, 0) < 0)
2067 goto error;
2068 Py_CLEAR(obj);
2069 n += 1;
2070
2071 if (n == BATCHSIZE)
2072 break;
2073
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002074 obj = PyIter_Next(iter);
2075 if (obj == NULL) {
2076 if (PyErr_Occurred())
2077 goto error;
2078 break;
2079 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002080 }
2081
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002082 if (_Pickler_Write(self, &appends_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002083 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002084
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002085 } while (n == BATCHSIZE);
2086 return 0;
2087
2088 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002089 Py_XDECREF(firstitem);
2090 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002091 return -1;
2092}
2093
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002094/* This is a variant of batch_list() above, specialized for lists (with no
2095 * support for list subclasses). Like batch_list(), we batch up chunks of
2096 * MARK item item ... item APPENDS
2097 * opcode sequences. Calling code should have arranged to first create an
2098 * empty list, or list-like object, for the APPENDS to operate on.
2099 * Returns 0 on success, -1 on error.
2100 *
2101 * This version is considerably faster than batch_list(), if less general.
2102 *
2103 * Note that this only works for protocols > 0.
2104 */
2105static int
2106batch_list_exact(PicklerObject *self, PyObject *obj)
2107{
2108 PyObject *item = NULL;
2109 int this_batch, total;
2110
2111 const char append_op = APPEND;
2112 const char appends_op = APPENDS;
2113 const char mark_op = MARK;
2114
2115 assert(obj != NULL);
2116 assert(self->proto > 0);
2117 assert(PyList_CheckExact(obj));
2118
2119 if (PyList_GET_SIZE(obj) == 1) {
2120 item = PyList_GET_ITEM(obj, 0);
2121 if (save(self, item, 0) < 0)
2122 return -1;
2123 if (_Pickler_Write(self, &append_op, 1) < 0)
2124 return -1;
2125 return 0;
2126 }
2127
2128 /* Write in batches of BATCHSIZE. */
2129 total = 0;
2130 do {
2131 this_batch = 0;
2132 if (_Pickler_Write(self, &mark_op, 1) < 0)
2133 return -1;
2134 while (total < PyList_GET_SIZE(obj)) {
2135 item = PyList_GET_ITEM(obj, total);
2136 if (save(self, item, 0) < 0)
2137 return -1;
2138 total++;
2139 if (++this_batch == BATCHSIZE)
2140 break;
2141 }
2142 if (_Pickler_Write(self, &appends_op, 1) < 0)
2143 return -1;
2144
2145 } while (total < PyList_GET_SIZE(obj));
2146
2147 return 0;
2148}
2149
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002150static int
2151save_list(PicklerObject *self, PyObject *obj)
2152{
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002153 char header[3];
2154 int len;
2155 int status = 0;
2156
2157 if (self->fast && !fast_save_enter(self, obj))
2158 goto error;
2159
2160 /* Create an empty list. */
2161 if (self->bin) {
2162 header[0] = EMPTY_LIST;
2163 len = 1;
2164 }
2165 else {
2166 header[0] = MARK;
2167 header[1] = LIST;
2168 len = 2;
2169 }
2170
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002171 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002172 goto error;
2173
2174 /* Get list length, and bow out early if empty. */
2175 if ((len = PyList_Size(obj)) < 0)
2176 goto error;
2177
2178 if (memo_put(self, obj) < 0)
2179 goto error;
2180
2181 if (len != 0) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002182 /* Materialize the list elements. */
2183 if (PyList_CheckExact(obj) && self->proto > 0) {
2184 if (Py_EnterRecursiveCall(" while pickling an object") == 0) {
2185 status = batch_list_exact(self, obj);
2186 Py_LeaveRecursiveCall();
2187 }
2188 } else {
2189 PyObject *iter = PyObject_GetIter(obj);
2190 if (iter == NULL)
2191 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002192
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002193 if (Py_EnterRecursiveCall(" while pickling an object") == 0) {
2194 status = batch_list(self, iter);
2195 Py_LeaveRecursiveCall();
2196 }
2197 Py_DECREF(iter);
2198 }
2199 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002200 if (0) {
2201 error:
2202 status = -1;
2203 }
2204
2205 if (self->fast && !fast_save_leave(self, obj))
2206 status = -1;
2207
2208 return status;
2209}
2210
2211/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
2212 * MARK key value ... key value SETITEMS
2213 * opcode sequences. Calling code should have arranged to first create an
2214 * empty dict, or dict-like object, for the SETITEMS to operate on.
2215 * Returns 0 on success, <0 on error.
2216 *
2217 * This is very much like batch_list(). The difference between saving
2218 * elements directly, and picking apart two-tuples, is so long-winded at
2219 * the C level, though, that attempts to combine these routines were too
2220 * ugly to bear.
2221 */
2222static int
2223batch_dict(PicklerObject *self, PyObject *iter)
2224{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002225 PyObject *obj = NULL;
2226 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002227 int i, n;
2228
2229 const char mark_op = MARK;
2230 const char setitem_op = SETITEM;
2231 const char setitems_op = SETITEMS;
2232
2233 assert(iter != NULL);
2234
2235 if (self->proto == 0) {
2236 /* SETITEMS isn't available; do one at a time. */
2237 for (;;) {
2238 obj = PyIter_Next(iter);
2239 if (obj == NULL) {
2240 if (PyErr_Occurred())
2241 return -1;
2242 break;
2243 }
2244 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2245 PyErr_SetString(PyExc_TypeError, "dict items "
2246 "iterator must return 2-tuples");
2247 return -1;
2248 }
2249 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
2250 if (i >= 0)
2251 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
2252 Py_DECREF(obj);
2253 if (i < 0)
2254 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002255 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002256 return -1;
2257 }
2258 return 0;
2259 }
2260
2261 /* proto > 0: write in batches of BATCHSIZE. */
2262 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002263 /* Get first item */
2264 firstitem = PyIter_Next(iter);
2265 if (firstitem == NULL) {
2266 if (PyErr_Occurred())
2267 goto error;
2268
2269 /* nothing more to add */
2270 break;
2271 }
2272 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
2273 PyErr_SetString(PyExc_TypeError, "dict items "
2274 "iterator must return 2-tuples");
2275 goto error;
2276 }
2277
2278 /* Try to get a second item */
2279 obj = PyIter_Next(iter);
2280 if (obj == NULL) {
2281 if (PyErr_Occurred())
2282 goto error;
2283
2284 /* Only one item to write */
2285 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2286 goto error;
2287 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2288 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002289 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002290 goto error;
2291 Py_CLEAR(firstitem);
2292 break;
2293 }
2294
2295 /* More than one item to write */
2296
2297 /* Pump out MARK, items, SETITEMS. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002298 if (_Pickler_Write(self, &mark_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002299 goto error;
2300
2301 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
2302 goto error;
2303 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
2304 goto error;
2305 Py_CLEAR(firstitem);
2306 n = 1;
2307
2308 /* Fetch and save up to BATCHSIZE items */
2309 while (obj) {
2310 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
2311 PyErr_SetString(PyExc_TypeError, "dict items "
2312 "iterator must return 2-tuples");
2313 goto error;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002314 }
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002315 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
2316 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
2317 goto error;
2318 Py_CLEAR(obj);
2319 n += 1;
2320
2321 if (n == BATCHSIZE)
2322 break;
2323
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002324 obj = PyIter_Next(iter);
2325 if (obj == NULL) {
2326 if (PyErr_Occurred())
2327 goto error;
2328 break;
2329 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002330 }
2331
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002332 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002333 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002334
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002335 } while (n == BATCHSIZE);
2336 return 0;
2337
2338 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00002339 Py_XDECREF(firstitem);
2340 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002341 return -1;
2342}
2343
Collin Winter5c9b02d2009-05-25 05:43:30 +00002344/* This is a variant of batch_dict() above that specializes for dicts, with no
2345 * support for dict subclasses. Like batch_dict(), we batch up chunks of
2346 * MARK key value ... key value SETITEMS
2347 * opcode sequences. Calling code should have arranged to first create an
2348 * empty dict, or dict-like object, for the SETITEMS to operate on.
2349 * Returns 0 on success, -1 on error.
2350 *
2351 * Note that this currently doesn't work for protocol 0.
2352 */
2353static int
2354batch_dict_exact(PicklerObject *self, PyObject *obj)
2355{
2356 PyObject *key = NULL, *value = NULL;
2357 int i;
2358 Py_ssize_t dict_size, ppos = 0;
2359
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00002360 const char mark_op = MARK;
2361 const char setitem_op = SETITEM;
2362 const char setitems_op = SETITEMS;
Collin Winter5c9b02d2009-05-25 05:43:30 +00002363
2364 assert(obj != NULL);
2365 assert(self->proto > 0);
2366
2367 dict_size = PyDict_Size(obj);
2368
2369 /* Special-case len(d) == 1 to save space. */
2370 if (dict_size == 1) {
2371 PyDict_Next(obj, &ppos, &key, &value);
2372 if (save(self, key, 0) < 0)
2373 return -1;
2374 if (save(self, value, 0) < 0)
2375 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002376 if (_Pickler_Write(self, &setitem_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002377 return -1;
2378 return 0;
2379 }
2380
2381 /* Write in batches of BATCHSIZE. */
2382 do {
2383 i = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002384 if (_Pickler_Write(self, &mark_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002385 return -1;
2386 while (PyDict_Next(obj, &ppos, &key, &value)) {
2387 if (save(self, key, 0) < 0)
2388 return -1;
2389 if (save(self, value, 0) < 0)
2390 return -1;
2391 if (++i == BATCHSIZE)
2392 break;
2393 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002394 if (_Pickler_Write(self, &setitems_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00002395 return -1;
2396 if (PyDict_Size(obj) != dict_size) {
2397 PyErr_Format(
2398 PyExc_RuntimeError,
2399 "dictionary changed size during iteration");
2400 return -1;
2401 }
2402
2403 } while (i == BATCHSIZE);
2404 return 0;
2405}
2406
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002407static int
2408save_dict(PicklerObject *self, PyObject *obj)
2409{
2410 PyObject *items, *iter;
2411 char header[3];
2412 int len;
2413 int status = 0;
2414
2415 if (self->fast && !fast_save_enter(self, obj))
2416 goto error;
2417
2418 /* Create an empty dict. */
2419 if (self->bin) {
2420 header[0] = EMPTY_DICT;
2421 len = 1;
2422 }
2423 else {
2424 header[0] = MARK;
2425 header[1] = DICT;
2426 len = 2;
2427 }
2428
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002429 if (_Pickler_Write(self, header, len) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002430 goto error;
2431
2432 /* Get dict size, and bow out early if empty. */
2433 if ((len = PyDict_Size(obj)) < 0)
2434 goto error;
2435
2436 if (memo_put(self, obj) < 0)
2437 goto error;
2438
2439 if (len != 0) {
2440 /* Save the dict items. */
Collin Winter5c9b02d2009-05-25 05:43:30 +00002441 if (PyDict_CheckExact(obj) && self->proto > 0) {
2442 /* We can take certain shortcuts if we know this is a dict and
2443 not a dict subclass. */
2444 if (Py_EnterRecursiveCall(" while pickling an object") == 0) {
2445 status = batch_dict_exact(self, obj);
2446 Py_LeaveRecursiveCall();
2447 }
2448 } else {
2449 items = PyObject_CallMethod(obj, "items", "()");
2450 if (items == NULL)
2451 goto error;
2452 iter = PyObject_GetIter(items);
2453 Py_DECREF(items);
2454 if (iter == NULL)
2455 goto error;
2456 status = batch_dict(self, iter);
2457 Py_DECREF(iter);
2458 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002459 }
2460
2461 if (0) {
2462 error:
2463 status = -1;
2464 }
2465
2466 if (self->fast && !fast_save_leave(self, obj))
2467 status = -1;
2468
2469 return status;
2470}
2471
2472static int
2473save_global(PicklerObject *self, PyObject *obj, PyObject *name)
2474{
2475 static PyObject *name_str = NULL;
2476 PyObject *global_name = NULL;
2477 PyObject *module_name = NULL;
2478 PyObject *module = NULL;
2479 PyObject *cls;
2480 int status = 0;
2481
2482 const char global_op = GLOBAL;
2483
2484 if (name_str == NULL) {
2485 name_str = PyUnicode_InternFromString("__name__");
2486 if (name_str == NULL)
2487 goto error;
2488 }
2489
2490 if (name) {
2491 global_name = name;
2492 Py_INCREF(global_name);
2493 }
2494 else {
2495 global_name = PyObject_GetAttr(obj, name_str);
2496 if (global_name == NULL)
2497 goto error;
2498 }
2499
2500 module_name = whichmodule(obj, global_name);
2501 if (module_name == NULL)
2502 goto error;
2503
2504 /* XXX: Change to use the import C API directly with level=0 to disallow
2505 relative imports.
2506
2507 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
2508 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
2509 custom import functions (IMHO, this would be a nice security
2510 feature). The import C API would need to be extended to support the
2511 extra parameters of __import__ to fix that. */
2512 module = PyImport_Import(module_name);
2513 if (module == NULL) {
2514 PyErr_Format(PicklingError,
2515 "Can't pickle %R: import of module %R failed",
2516 obj, module_name);
2517 goto error;
2518 }
2519 cls = PyObject_GetAttr(module, global_name);
2520 if (cls == NULL) {
2521 PyErr_Format(PicklingError,
2522 "Can't pickle %R: attribute lookup %S.%S failed",
2523 obj, module_name, global_name);
2524 goto error;
2525 }
2526 if (cls != obj) {
2527 Py_DECREF(cls);
2528 PyErr_Format(PicklingError,
2529 "Can't pickle %R: it's not the same object as %S.%S",
2530 obj, module_name, global_name);
2531 goto error;
2532 }
2533 Py_DECREF(cls);
2534
2535 if (self->proto >= 2) {
2536 /* See whether this is in the extension registry, and if
2537 * so generate an EXT opcode.
2538 */
2539 PyObject *code_obj; /* extension code as Python object */
2540 long code; /* extension code as C value */
2541 char pdata[5];
2542 int n;
2543
2544 PyTuple_SET_ITEM(two_tuple, 0, module_name);
2545 PyTuple_SET_ITEM(two_tuple, 1, global_name);
2546 code_obj = PyDict_GetItem(extension_registry, two_tuple);
2547 /* The object is not registered in the extension registry.
2548 This is the most likely code path. */
2549 if (code_obj == NULL)
2550 goto gen_global;
2551
2552 /* XXX: pickle.py doesn't check neither the type, nor the range
2553 of the value returned by the extension_registry. It should for
2554 consistency. */
2555
2556 /* Verify code_obj has the right type and value. */
2557 if (!PyLong_Check(code_obj)) {
2558 PyErr_Format(PicklingError,
2559 "Can't pickle %R: extension code %R isn't an integer",
2560 obj, code_obj);
2561 goto error;
2562 }
2563 code = PyLong_AS_LONG(code_obj);
2564 if (code <= 0 || code > 0x7fffffffL) {
2565 PyErr_Format(PicklingError,
2566 "Can't pickle %R: extension code %ld is out of range",
2567 obj, code);
2568 goto error;
2569 }
2570
2571 /* Generate an EXT opcode. */
2572 if (code <= 0xff) {
2573 pdata[0] = EXT1;
2574 pdata[1] = (unsigned char)code;
2575 n = 2;
2576 }
2577 else if (code <= 0xffff) {
2578 pdata[0] = EXT2;
2579 pdata[1] = (unsigned char)(code & 0xff);
2580 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2581 n = 3;
2582 }
2583 else {
2584 pdata[0] = EXT4;
2585 pdata[1] = (unsigned char)(code & 0xff);
2586 pdata[2] = (unsigned char)((code >> 8) & 0xff);
2587 pdata[3] = (unsigned char)((code >> 16) & 0xff);
2588 pdata[4] = (unsigned char)((code >> 24) & 0xff);
2589 n = 5;
2590 }
2591
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002592 if (_Pickler_Write(self, pdata, n) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002593 goto error;
2594 }
2595 else {
2596 /* Generate a normal global opcode if we are using a pickle
2597 protocol <= 2, or if the object is not registered in the
2598 extension registry. */
2599 PyObject *encoded;
2600 PyObject *(*unicode_encoder)(PyObject *);
2601
2602 gen_global:
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002603 if (_Pickler_Write(self, &global_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002604 goto error;
2605
2606 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
2607 the module name and the global name using UTF-8. We do so only when
2608 we are using the pickle protocol newer than version 3. This is to
2609 ensure compatibility with older Unpickler running on Python 2.x. */
2610 if (self->proto >= 3) {
2611 unicode_encoder = PyUnicode_AsUTF8String;
2612 }
2613 else {
2614 unicode_encoder = PyUnicode_AsASCIIString;
2615 }
2616
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002617 /* For protocol < 3 and if the user didn't request against doing so,
2618 we convert module names to the old 2.x module names. */
2619 if (self->fix_imports) {
2620 PyObject *key;
2621 PyObject *item;
2622
2623 key = PyTuple_Pack(2, module_name, global_name);
2624 if (key == NULL)
2625 goto error;
2626 item = PyDict_GetItemWithError(name_mapping_3to2, key);
2627 Py_DECREF(key);
2628 if (item) {
2629 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
2630 PyErr_Format(PyExc_RuntimeError,
2631 "_compat_pickle.REVERSE_NAME_MAPPING values "
2632 "should be 2-tuples, not %.200s",
2633 Py_TYPE(item)->tp_name);
2634 goto error;
2635 }
2636 Py_CLEAR(module_name);
2637 Py_CLEAR(global_name);
2638 module_name = PyTuple_GET_ITEM(item, 0);
2639 global_name = PyTuple_GET_ITEM(item, 1);
2640 if (!PyUnicode_Check(module_name) ||
2641 !PyUnicode_Check(global_name)) {
2642 PyErr_Format(PyExc_RuntimeError,
2643 "_compat_pickle.REVERSE_NAME_MAPPING values "
2644 "should be pairs of str, not (%.200s, %.200s)",
2645 Py_TYPE(module_name)->tp_name,
2646 Py_TYPE(global_name)->tp_name);
2647 goto error;
2648 }
2649 Py_INCREF(module_name);
2650 Py_INCREF(global_name);
2651 }
2652 else if (PyErr_Occurred()) {
2653 goto error;
2654 }
2655
2656 item = PyDict_GetItemWithError(import_mapping_3to2, module_name);
2657 if (item) {
2658 if (!PyUnicode_Check(item)) {
2659 PyErr_Format(PyExc_RuntimeError,
2660 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
2661 "should be strings, not %.200s",
2662 Py_TYPE(item)->tp_name);
2663 goto error;
2664 }
2665 Py_CLEAR(module_name);
2666 module_name = item;
2667 Py_INCREF(module_name);
2668 }
2669 else if (PyErr_Occurred()) {
2670 goto error;
2671 }
2672 }
2673
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002674 /* Save the name of the module. */
2675 encoded = unicode_encoder(module_name);
2676 if (encoded == NULL) {
2677 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2678 PyErr_Format(PicklingError,
2679 "can't pickle module identifier '%S' using "
2680 "pickle protocol %i", module_name, self->proto);
2681 goto error;
2682 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002683 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002684 PyBytes_GET_SIZE(encoded)) < 0) {
2685 Py_DECREF(encoded);
2686 goto error;
2687 }
2688 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002689 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002690 goto error;
2691
2692 /* Save the name of the module. */
2693 encoded = unicode_encoder(global_name);
2694 if (encoded == NULL) {
2695 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2696 PyErr_Format(PicklingError,
2697 "can't pickle global identifier '%S' using "
2698 "pickle protocol %i", global_name, self->proto);
2699 goto error;
2700 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002701 if (_Pickler_Write(self, PyBytes_AS_STRING(encoded),
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002702 PyBytes_GET_SIZE(encoded)) < 0) {
2703 Py_DECREF(encoded);
2704 goto error;
2705 }
2706 Py_DECREF(encoded);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002707 if(_Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002708 goto error;
2709
2710 /* Memoize the object. */
2711 if (memo_put(self, obj) < 0)
2712 goto error;
2713 }
2714
2715 if (0) {
2716 error:
2717 status = -1;
2718 }
2719 Py_XDECREF(module_name);
2720 Py_XDECREF(global_name);
2721 Py_XDECREF(module);
2722
2723 return status;
2724}
2725
2726static int
2727save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
2728{
2729 PyObject *pid = NULL;
2730 int status = 0;
2731
2732 const char persid_op = PERSID;
2733 const char binpersid_op = BINPERSID;
2734
2735 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002736 pid = _Pickler_FastCall(self, func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002737 if (pid == NULL)
2738 return -1;
2739
2740 if (pid != Py_None) {
2741 if (self->bin) {
2742 if (save(self, pid, 1) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002743 _Pickler_Write(self, &binpersid_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002744 goto error;
2745 }
2746 else {
2747 PyObject *pid_str = NULL;
2748 char *pid_ascii_bytes;
2749 Py_ssize_t size;
2750
2751 pid_str = PyObject_Str(pid);
2752 if (pid_str == NULL)
2753 goto error;
2754
2755 /* XXX: Should it check whether the persistent id only contains
2756 ASCII characters? And what if the pid contains embedded
2757 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00002758 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002759 Py_DECREF(pid_str);
2760 if (pid_ascii_bytes == NULL)
2761 goto error;
2762
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002763 if (_Pickler_Write(self, &persid_op, 1) < 0 ||
2764 _Pickler_Write(self, pid_ascii_bytes, size) < 0 ||
2765 _Pickler_Write(self, "\n", 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002766 goto error;
2767 }
2768 status = 1;
2769 }
2770
2771 if (0) {
2772 error:
2773 status = -1;
2774 }
2775 Py_XDECREF(pid);
2776
2777 return status;
2778}
2779
2780/* We're saving obj, and args is the 2-thru-5 tuple returned by the
2781 * appropriate __reduce__ method for obj.
2782 */
2783static int
2784save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
2785{
2786 PyObject *callable;
2787 PyObject *argtup;
2788 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002789 PyObject *listitems = Py_None;
2790 PyObject *dictitems = Py_None;
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002791 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002792
2793 int use_newobj = self->proto >= 2;
2794
2795 const char reduce_op = REDUCE;
2796 const char build_op = BUILD;
2797 const char newobj_op = NEWOBJ;
2798
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002799 size = PyTuple_Size(args);
2800 if (size < 2 || size > 5) {
2801 PyErr_SetString(PicklingError, "tuple returned by "
2802 "__reduce__ must contain 2 through 5 elements");
2803 return -1;
2804 }
2805
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002806 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
2807 &callable, &argtup, &state, &listitems, &dictitems))
2808 return -1;
2809
2810 if (!PyCallable_Check(callable)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002811 PyErr_SetString(PicklingError, "first item of the tuple "
2812 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002813 return -1;
2814 }
2815 if (!PyTuple_Check(argtup)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002816 PyErr_SetString(PicklingError, "second item of the tuple "
2817 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002818 return -1;
2819 }
2820
2821 if (state == Py_None)
2822 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002823
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002824 if (listitems == Py_None)
2825 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002826 else if (!PyIter_Check(listitems)) {
2827 PyErr_Format(PicklingError, "Fourth element of tuple"
2828 "returned by __reduce__ must be an iterator, not %s",
2829 Py_TYPE(listitems)->tp_name);
2830 return -1;
2831 }
2832
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002833 if (dictitems == Py_None)
2834 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002835 else if (!PyIter_Check(dictitems)) {
2836 PyErr_Format(PicklingError, "Fifth element of tuple"
2837 "returned by __reduce__ must be an iterator, not %s",
2838 Py_TYPE(dictitems)->tp_name);
2839 return -1;
2840 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002841
2842 /* Protocol 2 special case: if callable's name is __newobj__, use
2843 NEWOBJ. */
2844 if (use_newobj) {
2845 static PyObject *newobj_str = NULL;
2846 PyObject *name_str;
2847
2848 if (newobj_str == NULL) {
2849 newobj_str = PyUnicode_InternFromString("__newobj__");
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002850 if (newobj_str == NULL)
2851 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002852 }
2853
2854 name_str = PyObject_GetAttrString(callable, "__name__");
2855 if (name_str == NULL) {
2856 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2857 PyErr_Clear();
2858 else
2859 return -1;
2860 use_newobj = 0;
2861 }
2862 else {
2863 use_newobj = PyUnicode_Check(name_str) &&
2864 PyUnicode_Compare(name_str, newobj_str) == 0;
2865 Py_DECREF(name_str);
2866 }
2867 }
2868 if (use_newobj) {
2869 PyObject *cls;
2870 PyObject *newargtup;
2871 PyObject *obj_class;
2872 int p;
2873
2874 /* Sanity checks. */
2875 if (Py_SIZE(argtup) < 1) {
2876 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
2877 return -1;
2878 }
2879
2880 cls = PyTuple_GET_ITEM(argtup, 0);
2881 if (!PyObject_HasAttrString(cls, "__new__")) {
2882 PyErr_SetString(PicklingError, "args[0] from "
2883 "__newobj__ args has no __new__");
2884 return -1;
2885 }
2886
2887 if (obj != NULL) {
2888 obj_class = PyObject_GetAttrString(obj, "__class__");
2889 if (obj_class == NULL) {
2890 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2891 PyErr_Clear();
2892 else
2893 return -1;
2894 }
2895 p = obj_class != cls; /* true iff a problem */
2896 Py_DECREF(obj_class);
2897 if (p) {
2898 PyErr_SetString(PicklingError, "args[0] from "
2899 "__newobj__ args has the wrong class");
2900 return -1;
2901 }
2902 }
2903 /* XXX: These calls save() are prone to infinite recursion. Imagine
2904 what happen if the value returned by the __reduce__() method of
2905 some extension type contains another object of the same type. Ouch!
2906
2907 Here is a quick example, that I ran into, to illustrate what I
2908 mean:
2909
2910 >>> import pickle, copyreg
2911 >>> copyreg.dispatch_table.pop(complex)
2912 >>> pickle.dumps(1+2j)
2913 Traceback (most recent call last):
2914 ...
2915 RuntimeError: maximum recursion depth exceeded
2916
2917 Removing the complex class from copyreg.dispatch_table made the
2918 __reduce_ex__() method emit another complex object:
2919
2920 >>> (1+1j).__reduce_ex__(2)
2921 (<function __newobj__ at 0xb7b71c3c>,
2922 (<class 'complex'>, (1+1j)), None, None, None)
2923
2924 Thus when save() was called on newargstup (the 2nd item) recursion
2925 ensued. Of course, the bug was in the complex class which had a
2926 broken __getnewargs__() that emitted another complex object. But,
2927 the point, here, is it is quite easy to end up with a broken reduce
2928 function. */
2929
2930 /* Save the class and its __new__ arguments. */
2931 if (save(self, cls, 0) < 0)
2932 return -1;
2933
2934 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
2935 if (newargtup == NULL)
2936 return -1;
2937
2938 p = save(self, newargtup, 0);
2939 Py_DECREF(newargtup);
2940 if (p < 0)
2941 return -1;
2942
2943 /* Add NEWOBJ opcode. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002944 if (_Pickler_Write(self, &newobj_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002945 return -1;
2946 }
2947 else { /* Not using NEWOBJ. */
2948 if (save(self, callable, 0) < 0 ||
2949 save(self, argtup, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002950 _Pickler_Write(self, &reduce_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002951 return -1;
2952 }
2953
2954 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
2955 the caller do not want to memoize the object. Not particularly useful,
2956 but that is to mimic the behavior save_reduce() in pickle.py when
2957 obj is None. */
2958 if (obj && memo_put(self, obj) < 0)
2959 return -1;
2960
2961 if (listitems && batch_list(self, listitems) < 0)
2962 return -1;
2963
2964 if (dictitems && batch_dict(self, dictitems) < 0)
2965 return -1;
2966
2967 if (state) {
2968 if (save(self, state, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00002969 _Pickler_Write(self, &build_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002970 return -1;
2971 }
2972
2973 return 0;
2974}
2975
2976static int
2977save(PicklerObject *self, PyObject *obj, int pers_save)
2978{
2979 PyTypeObject *type;
2980 PyObject *reduce_func = NULL;
2981 PyObject *reduce_value = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002982 int status = 0;
2983
Alexandre Vassalottidff18342008-07-13 18:48:30 +00002984 if (Py_EnterRecursiveCall(" while pickling an object") < 0)
2985 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002986
2987 /* The extra pers_save argument is necessary to avoid calling save_pers()
2988 on its returned object. */
2989 if (!pers_save && self->pers_func) {
2990 /* save_pers() returns:
2991 -1 to signal an error;
2992 0 if it did nothing successfully;
2993 1 if a persistent id was saved.
2994 */
2995 if ((status = save_pers(self, obj, self->pers_func)) != 0)
2996 goto done;
2997 }
2998
2999 type = Py_TYPE(obj);
3000
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003001 /* The old cPickle had an optimization that used switch-case statement
3002 dispatching on the first letter of the type name. This has was removed
3003 since benchmarks shown that this optimization was actually slowing
3004 things down. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003005
3006 /* Atom types; these aren't memoized, so don't check the memo. */
3007
3008 if (obj == Py_None) {
3009 status = save_none(self, obj);
3010 goto done;
3011 }
3012 else if (obj == Py_False || obj == Py_True) {
3013 status = save_bool(self, obj);
3014 goto done;
3015 }
3016 else if (type == &PyLong_Type) {
3017 status = save_long(self, obj);
3018 goto done;
3019 }
3020 else if (type == &PyFloat_Type) {
3021 status = save_float(self, obj);
3022 goto done;
3023 }
3024
3025 /* Check the memo to see if it has the object. If so, generate
3026 a GET (or BINGET) opcode, instead of pickling the object
3027 once again. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003028 if (PyMemoTable_Get(self->memo, obj)) {
3029 if (memo_get(self, obj) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003030 goto error;
3031 goto done;
3032 }
3033
3034 if (type == &PyBytes_Type) {
3035 status = save_bytes(self, obj);
3036 goto done;
3037 }
3038 else if (type == &PyUnicode_Type) {
3039 status = save_unicode(self, obj);
3040 goto done;
3041 }
3042 else if (type == &PyDict_Type) {
3043 status = save_dict(self, obj);
3044 goto done;
3045 }
3046 else if (type == &PyList_Type) {
3047 status = save_list(self, obj);
3048 goto done;
3049 }
3050 else if (type == &PyTuple_Type) {
3051 status = save_tuple(self, obj);
3052 goto done;
3053 }
3054 else if (type == &PyType_Type) {
3055 status = save_global(self, obj, NULL);
3056 goto done;
3057 }
3058 else if (type == &PyFunction_Type) {
3059 status = save_global(self, obj, NULL);
3060 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
3061 /* fall back to reduce */
3062 PyErr_Clear();
3063 }
3064 else {
3065 goto done;
3066 }
3067 }
3068 else if (type == &PyCFunction_Type) {
3069 status = save_global(self, obj, NULL);
3070 goto done;
3071 }
3072 else if (PyType_IsSubtype(type, &PyType_Type)) {
3073 status = save_global(self, obj, NULL);
3074 goto done;
3075 }
3076
3077 /* XXX: This part needs some unit tests. */
3078
3079 /* Get a reduction callable, and call it. This may come from
3080 * copyreg.dispatch_table, the object's __reduce_ex__ method,
3081 * or the object's __reduce__ method.
3082 */
3083 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
3084 if (reduce_func != NULL) {
3085 /* Here, the reference count of the reduce_func object returned by
3086 PyDict_GetItem needs to be increased to be consistent with the one
3087 returned by PyObject_GetAttr. This is allow us to blindly DECREF
3088 reduce_func at the end of the save() routine.
3089 */
3090 Py_INCREF(reduce_func);
3091 Py_INCREF(obj);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003092 reduce_value = _Pickler_FastCall(self, reduce_func, obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003093 }
3094 else {
3095 static PyObject *reduce_str = NULL;
3096 static PyObject *reduce_ex_str = NULL;
3097
3098 /* Cache the name of the reduce methods. */
3099 if (reduce_str == NULL) {
3100 reduce_str = PyUnicode_InternFromString("__reduce__");
3101 if (reduce_str == NULL)
3102 goto error;
3103 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
3104 if (reduce_ex_str == NULL)
3105 goto error;
3106 }
3107
3108 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
3109 automatically defined as __reduce__. While this is convenient, this
3110 make it impossible to know which method was actually called. Of
3111 course, this is not a big deal. But still, it would be nice to let
3112 the user know which method was called when something go
3113 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
3114 don't actually have to check for a __reduce__ method. */
3115
3116 /* Check for a __reduce_ex__ method. */
3117 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
3118 if (reduce_func != NULL) {
3119 PyObject *proto;
3120 proto = PyLong_FromLong(self->proto);
3121 if (proto != NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003122 reduce_value = _Pickler_FastCall(self, reduce_func, proto);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003123 }
3124 }
3125 else {
3126 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3127 PyErr_Clear();
3128 else
3129 goto error;
3130 /* Check for a __reduce__ method. */
3131 reduce_func = PyObject_GetAttr(obj, reduce_str);
3132 if (reduce_func != NULL) {
3133 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
3134 }
3135 else {
3136 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
3137 type->tp_name, obj);
3138 goto error;
3139 }
3140 }
3141 }
3142
3143 if (reduce_value == NULL)
3144 goto error;
3145
3146 if (PyUnicode_Check(reduce_value)) {
3147 status = save_global(self, obj, reduce_value);
3148 goto done;
3149 }
3150
3151 if (!PyTuple_Check(reduce_value)) {
3152 PyErr_SetString(PicklingError,
3153 "__reduce__ must return a string or tuple");
3154 goto error;
3155 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003156
3157 status = save_reduce(self, reduce_value, obj);
3158
3159 if (0) {
3160 error:
3161 status = -1;
3162 }
3163 done:
Alexandre Vassalottidff18342008-07-13 18:48:30 +00003164 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003165 Py_XDECREF(reduce_func);
3166 Py_XDECREF(reduce_value);
3167
3168 return status;
3169}
3170
3171static int
3172dump(PicklerObject *self, PyObject *obj)
3173{
3174 const char stop_op = STOP;
3175
3176 if (self->proto >= 2) {
3177 char header[2];
3178
3179 header[0] = PROTO;
3180 assert(self->proto >= 0 && self->proto < 256);
3181 header[1] = (unsigned char)self->proto;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003182 if (_Pickler_Write(self, header, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003183 return -1;
3184 }
3185
3186 if (save(self, obj, 0) < 0 ||
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003187 _Pickler_Write(self, &stop_op, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003188 return -1;
3189
3190 return 0;
3191}
3192
3193PyDoc_STRVAR(Pickler_clear_memo_doc,
3194"clear_memo() -> None. Clears the pickler's \"memo\"."
3195"\n"
3196"The memo is the data structure that remembers which objects the\n"
3197"pickler has already seen, so that shared or recursive objects are\n"
3198"pickled by reference and not by value. This method is useful when\n"
3199"re-using picklers.");
3200
3201static PyObject *
3202Pickler_clear_memo(PicklerObject *self)
3203{
3204 if (self->memo)
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003205 PyMemoTable_Clear(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003206
3207 Py_RETURN_NONE;
3208}
3209
3210PyDoc_STRVAR(Pickler_dump_doc,
3211"dump(obj) -> None. Write a pickled representation of obj to the open file.");
3212
3213static PyObject *
3214Pickler_dump(PicklerObject *self, PyObject *args)
3215{
3216 PyObject *obj;
3217
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00003218 /* Check whether the Pickler was initialized correctly (issue3664).
3219 Developers often forget to call __init__() in their subclasses, which
3220 would trigger a segfault without this check. */
3221 if (self->write == NULL) {
3222 PyErr_Format(PicklingError,
3223 "Pickler.__init__() was not called by %s.__init__()",
3224 Py_TYPE(self)->tp_name);
3225 return NULL;
3226 }
3227
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003228 if (!PyArg_ParseTuple(args, "O:dump", &obj))
3229 return NULL;
3230
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003231 if (_Pickler_ClearBuffer(self) < 0)
3232 return NULL;
3233
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003234 if (dump(self, obj) < 0)
3235 return NULL;
3236
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003237 if (_Pickler_FlushToFile(self) < 0)
3238 return NULL;
3239
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003240 Py_RETURN_NONE;
3241}
3242
3243static struct PyMethodDef Pickler_methods[] = {
3244 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
3245 Pickler_dump_doc},
3246 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
3247 Pickler_clear_memo_doc},
3248 {NULL, NULL} /* sentinel */
3249};
3250
3251static void
3252Pickler_dealloc(PicklerObject *self)
3253{
3254 PyObject_GC_UnTrack(self);
3255
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003256 Py_XDECREF(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003257 Py_XDECREF(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003258 Py_XDECREF(self->pers_func);
3259 Py_XDECREF(self->arg);
3260 Py_XDECREF(self->fast_memo);
3261
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003262 PyMemoTable_Del(self->memo);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003263
3264 Py_TYPE(self)->tp_free((PyObject *)self);
3265}
3266
3267static int
3268Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
3269{
3270 Py_VISIT(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003271 Py_VISIT(self->pers_func);
3272 Py_VISIT(self->arg);
3273 Py_VISIT(self->fast_memo);
3274 return 0;
3275}
3276
3277static int
3278Pickler_clear(PicklerObject *self)
3279{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003280 Py_CLEAR(self->output_buffer);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003281 Py_CLEAR(self->write);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003282 Py_CLEAR(self->pers_func);
3283 Py_CLEAR(self->arg);
3284 Py_CLEAR(self->fast_memo);
3285
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003286 if (self->memo != NULL) {
3287 PyMemoTable *memo = self->memo;
3288 self->memo = NULL;
3289 PyMemoTable_Del(memo);
3290 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003291 return 0;
3292}
3293
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003294
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003295PyDoc_STRVAR(Pickler_doc,
3296"Pickler(file, protocol=None)"
3297"\n"
3298"This takes a binary file for writing a pickle data stream.\n"
3299"\n"
3300"The optional protocol argument tells the pickler to use the\n"
3301"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
3302"protocol is 3; a backward-incompatible protocol designed for\n"
3303"Python 3.0.\n"
3304"\n"
3305"Specifying a negative protocol version selects the highest\n"
3306"protocol version supported. The higher the protocol used, the\n"
3307"more recent the version of Python needed to read the pickle\n"
3308"produced.\n"
3309"\n"
3310"The file argument must have a write() method that accepts a single\n"
3311"bytes argument. It can thus be a file object opened for binary\n"
3312"writing, a io.BytesIO instance, or any other custom object that\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003313"meets this interface.\n"
3314"\n"
3315"If fix_imports is True and protocol is less than 3, pickle will try to\n"
3316"map the new Python 3.x names to the old module names used in Python\n"
3317"2.x, so that the pickle data stream is readable with Python 2.x.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003318
3319static int
3320Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
3321{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003322 static char *kwlist[] = {"file", "protocol", "fix_imports", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003323 PyObject *file;
3324 PyObject *proto_obj = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003325 PyObject *fix_imports = Py_True;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003326
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003327 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:Pickler",
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003328 kwlist, &file, &proto_obj, &fix_imports))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003329 return -1;
3330
3331 /* In case of multiple __init__() calls, clear previous content. */
3332 if (self->write != NULL)
3333 (void)Pickler_clear(self);
3334
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003335 if (_Pickler_SetProtocol(self, proto_obj, fix_imports) < 0)
3336 return -1;
3337
3338 if (_Pickler_SetOutputStream(self, file) < 0)
3339 return -1;
3340
3341 /* memo and output_buffer may have already been created in _Pickler_New */
3342 if (self->memo == NULL) {
3343 self->memo = PyMemoTable_New();
3344 if (self->memo == NULL)
3345 return -1;
3346 }
3347 self->output_len = 0;
3348 if (self->output_buffer == NULL) {
3349 self->max_output_len = WRITE_BUF_SIZE;
3350 self->output_buffer = PyBytes_FromStringAndSize(NULL,
3351 self->max_output_len);
3352 if (self->output_buffer == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003353 return -1;
3354 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003355
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00003356 self->arg = NULL;
3357 self->fast = 0;
3358 self->fast_nesting = 0;
3359 self->fast_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003360 self->pers_func = NULL;
3361 if (PyObject_HasAttrString((PyObject *)self, "persistent_id")) {
3362 self->pers_func = PyObject_GetAttrString((PyObject *)self,
3363 "persistent_id");
3364 if (self->pers_func == NULL)
3365 return -1;
3366 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003367 return 0;
3368}
3369
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003370/* Define a proxy object for the Pickler's internal memo object. This is to
3371 * avoid breaking code like:
3372 * pickler.memo.clear()
3373 * and
3374 * pickler.memo = saved_memo
3375 * Is this a good idea? Not really, but we don't want to break code that uses
3376 * it. Note that we don't implement the entire mapping API here. This is
3377 * intentional, as these should be treated as black-box implementation details.
3378 */
3379
3380typedef struct {
3381 PyObject_HEAD
3382 PicklerObject *pickler; /* Pickler whose memo table we're proxying. */
3383} PicklerMemoProxyObject;
3384
3385PyDoc_STRVAR(pmp_clear_doc,
3386"memo.clear() -> None. Remove all items from memo.");
3387
3388static PyObject *
3389pmp_clear(PicklerMemoProxyObject *self)
3390{
3391 if (self->pickler->memo)
3392 PyMemoTable_Clear(self->pickler->memo);
3393 Py_RETURN_NONE;
3394}
3395
3396PyDoc_STRVAR(pmp_copy_doc,
3397"memo.copy() -> new_memo. Copy the memo to a new object.");
3398
3399static PyObject *
3400pmp_copy(PicklerMemoProxyObject *self)
3401{
3402 Py_ssize_t i;
3403 PyMemoTable *memo;
3404 PyObject *new_memo = PyDict_New();
3405 if (new_memo == NULL)
3406 return NULL;
3407
3408 memo = self->pickler->memo;
3409 for (i = 0; i < memo->mt_allocated; ++i) {
3410 PyMemoEntry entry = memo->mt_table[i];
3411 if (entry.me_key != NULL) {
3412 int status;
3413 PyObject *key, *value;
3414
3415 key = PyLong_FromVoidPtr(entry.me_key);
3416 value = Py_BuildValue("lO", entry.me_value, entry.me_key);
3417
3418 if (key == NULL || value == NULL) {
3419 Py_XDECREF(key);
3420 Py_XDECREF(value);
3421 goto error;
3422 }
3423 status = PyDict_SetItem(new_memo, key, value);
3424 Py_DECREF(key);
3425 Py_DECREF(value);
3426 if (status < 0)
3427 goto error;
3428 }
3429 }
3430 return new_memo;
3431
3432 error:
3433 Py_XDECREF(new_memo);
3434 return NULL;
3435}
3436
3437PyDoc_STRVAR(pmp_reduce_doc,
3438"memo.__reduce__(). Pickling support.");
3439
3440static PyObject *
3441pmp_reduce(PicklerMemoProxyObject *self, PyObject *args)
3442{
3443 PyObject *reduce_value, *dict_args;
3444 PyObject *contents = pmp_copy(self);
3445 if (contents == NULL)
3446 return NULL;
3447
3448 reduce_value = PyTuple_New(2);
3449 if (reduce_value == NULL) {
3450 Py_DECREF(contents);
3451 return NULL;
3452 }
3453 dict_args = PyTuple_New(1);
3454 if (dict_args == NULL) {
3455 Py_DECREF(contents);
3456 Py_DECREF(reduce_value);
3457 return NULL;
3458 }
3459 PyTuple_SET_ITEM(dict_args, 0, contents);
3460 Py_INCREF((PyObject *)&PyDict_Type);
3461 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
3462 PyTuple_SET_ITEM(reduce_value, 1, dict_args);
3463 return reduce_value;
3464}
3465
3466static PyMethodDef picklerproxy_methods[] = {
3467 {"clear", (PyCFunction)pmp_clear, METH_NOARGS, pmp_clear_doc},
3468 {"copy", (PyCFunction)pmp_copy, METH_NOARGS, pmp_copy_doc},
3469 {"__reduce__", (PyCFunction)pmp_reduce, METH_VARARGS, pmp_reduce_doc},
3470 {NULL, NULL} /* sentinel */
3471};
3472
3473static void
3474PicklerMemoProxy_dealloc(PicklerMemoProxyObject *self)
3475{
3476 PyObject_GC_UnTrack(self);
3477 Py_XDECREF(self->pickler);
3478 PyObject_GC_Del((PyObject *)self);
3479}
3480
3481static int
3482PicklerMemoProxy_traverse(PicklerMemoProxyObject *self,
3483 visitproc visit, void *arg)
3484{
3485 Py_VISIT(self->pickler);
3486 return 0;
3487}
3488
3489static int
3490PicklerMemoProxy_clear(PicklerMemoProxyObject *self)
3491{
3492 Py_CLEAR(self->pickler);
3493 return 0;
3494}
3495
3496static PyTypeObject PicklerMemoProxyType = {
3497 PyVarObject_HEAD_INIT(NULL, 0)
3498 "_pickle.PicklerMemoProxy", /*tp_name*/
3499 sizeof(PicklerMemoProxyObject), /*tp_basicsize*/
3500 0,
3501 (destructor)PicklerMemoProxy_dealloc, /* tp_dealloc */
3502 0, /* tp_print */
3503 0, /* tp_getattr */
3504 0, /* tp_setattr */
3505 0, /* tp_compare */
3506 0, /* tp_repr */
3507 0, /* tp_as_number */
3508 0, /* tp_as_sequence */
3509 0, /* tp_as_mapping */
3510 (hashfunc)PyObject_HashNotImplemented, /* tp_hash */
3511 0, /* tp_call */
3512 0, /* tp_str */
3513 PyObject_GenericGetAttr, /* tp_getattro */
3514 PyObject_GenericSetAttr, /* tp_setattro */
3515 0, /* tp_as_buffer */
3516 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3517 0, /* tp_doc */
3518 (traverseproc)PicklerMemoProxy_traverse, /* tp_traverse */
3519 (inquiry)PicklerMemoProxy_clear, /* tp_clear */
3520 0, /* tp_richcompare */
3521 0, /* tp_weaklistoffset */
3522 0, /* tp_iter */
3523 0, /* tp_iternext */
3524 picklerproxy_methods, /* tp_methods */
3525};
3526
3527static PyObject *
3528PicklerMemoProxy_New(PicklerObject *pickler)
3529{
3530 PicklerMemoProxyObject *self;
3531
3532 self = PyObject_GC_New(PicklerMemoProxyObject, &PicklerMemoProxyType);
3533 if (self == NULL)
3534 return NULL;
3535 Py_INCREF(pickler);
3536 self->pickler = pickler;
3537 PyObject_GC_Track(self);
3538 return (PyObject *)self;
3539}
3540
3541/*****************************************************************************/
3542
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003543static PyObject *
3544Pickler_get_memo(PicklerObject *self)
3545{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003546 return PicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003547}
3548
3549static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003550Pickler_set_memo(PicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003551{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003552 PyMemoTable *new_memo = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003553
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003554 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003555 PyErr_SetString(PyExc_TypeError,
3556 "attribute deletion is not supported");
3557 return -1;
3558 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003559
3560 if (Py_TYPE(obj) == &PicklerMemoProxyType) {
3561 PicklerObject *pickler =
3562 ((PicklerMemoProxyObject *)obj)->pickler;
3563
3564 new_memo = PyMemoTable_Copy(pickler->memo);
3565 if (new_memo == NULL)
3566 return -1;
3567 }
3568 else if (PyDict_Check(obj)) {
3569 Py_ssize_t i = 0;
3570 PyObject *key, *value;
3571
3572 new_memo = PyMemoTable_New();
3573 if (new_memo == NULL)
3574 return -1;
3575
3576 while (PyDict_Next(obj, &i, &key, &value)) {
3577 long memo_id;
3578 PyObject *memo_obj;
3579
3580 if (!PyTuple_Check(value) || Py_SIZE(value) != 2) {
3581 PyErr_SetString(PyExc_TypeError,
3582 "'memo' values must be 2-item tuples");
3583 goto error;
3584 }
3585 memo_id = PyLong_AsLong(PyTuple_GET_ITEM(value, 0));
3586 if (memo_id == -1 && PyErr_Occurred())
3587 goto error;
3588 memo_obj = PyTuple_GET_ITEM(value, 1);
3589 if (PyMemoTable_Set(new_memo, memo_obj, memo_id) < 0)
3590 goto error;
3591 }
3592 }
3593 else {
3594 PyErr_Format(PyExc_TypeError,
3595 "'memo' attribute must be an PicklerMemoProxy object"
3596 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003597 return -1;
3598 }
3599
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003600 PyMemoTable_Del(self->memo);
3601 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003602
3603 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003604
3605 error:
3606 if (new_memo)
3607 PyMemoTable_Del(new_memo);
3608 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003609}
3610
3611static PyObject *
3612Pickler_get_persid(PicklerObject *self)
3613{
3614 if (self->pers_func == NULL)
3615 PyErr_SetString(PyExc_AttributeError, "persistent_id");
3616 else
3617 Py_INCREF(self->pers_func);
3618 return self->pers_func;
3619}
3620
3621static int
3622Pickler_set_persid(PicklerObject *self, PyObject *value)
3623{
3624 PyObject *tmp;
3625
3626 if (value == NULL) {
3627 PyErr_SetString(PyExc_TypeError,
3628 "attribute deletion is not supported");
3629 return -1;
3630 }
3631 if (!PyCallable_Check(value)) {
3632 PyErr_SetString(PyExc_TypeError,
3633 "persistent_id must be a callable taking one argument");
3634 return -1;
3635 }
3636
3637 tmp = self->pers_func;
3638 Py_INCREF(value);
3639 self->pers_func = value;
3640 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
3641
3642 return 0;
3643}
3644
3645static PyMemberDef Pickler_members[] = {
3646 {"bin", T_INT, offsetof(PicklerObject, bin)},
3647 {"fast", T_INT, offsetof(PicklerObject, fast)},
3648 {NULL}
3649};
3650
3651static PyGetSetDef Pickler_getsets[] = {
3652 {"memo", (getter)Pickler_get_memo,
3653 (setter)Pickler_set_memo},
3654 {"persistent_id", (getter)Pickler_get_persid,
3655 (setter)Pickler_set_persid},
3656 {NULL}
3657};
3658
3659static PyTypeObject Pickler_Type = {
3660 PyVarObject_HEAD_INIT(NULL, 0)
3661 "_pickle.Pickler" , /*tp_name*/
3662 sizeof(PicklerObject), /*tp_basicsize*/
3663 0, /*tp_itemsize*/
3664 (destructor)Pickler_dealloc, /*tp_dealloc*/
3665 0, /*tp_print*/
3666 0, /*tp_getattr*/
3667 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00003668 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003669 0, /*tp_repr*/
3670 0, /*tp_as_number*/
3671 0, /*tp_as_sequence*/
3672 0, /*tp_as_mapping*/
3673 0, /*tp_hash*/
3674 0, /*tp_call*/
3675 0, /*tp_str*/
3676 0, /*tp_getattro*/
3677 0, /*tp_setattro*/
3678 0, /*tp_as_buffer*/
3679 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
3680 Pickler_doc, /*tp_doc*/
3681 (traverseproc)Pickler_traverse, /*tp_traverse*/
3682 (inquiry)Pickler_clear, /*tp_clear*/
3683 0, /*tp_richcompare*/
3684 0, /*tp_weaklistoffset*/
3685 0, /*tp_iter*/
3686 0, /*tp_iternext*/
3687 Pickler_methods, /*tp_methods*/
3688 Pickler_members, /*tp_members*/
3689 Pickler_getsets, /*tp_getset*/
3690 0, /*tp_base*/
3691 0, /*tp_dict*/
3692 0, /*tp_descr_get*/
3693 0, /*tp_descr_set*/
3694 0, /*tp_dictoffset*/
3695 (initproc)Pickler_init, /*tp_init*/
3696 PyType_GenericAlloc, /*tp_alloc*/
3697 PyType_GenericNew, /*tp_new*/
3698 PyObject_GC_Del, /*tp_free*/
3699 0, /*tp_is_gc*/
3700};
3701
3702/* Temporary helper for calling self.find_class().
3703
3704 XXX: It would be nice to able to avoid Python function call overhead, by
3705 using directly the C version of find_class(), when find_class() is not
3706 overridden by a subclass. Although, this could become rather hackish. A
3707 simpler optimization would be to call the C function when self is not a
3708 subclass instance. */
3709static PyObject *
3710find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
3711{
3712 return PyObject_CallMethod((PyObject *)self, "find_class", "OO",
3713 module_name, global_name);
3714}
3715
3716static int
3717marker(UnpicklerObject *self)
3718{
3719 if (self->num_marks < 1) {
3720 PyErr_SetString(UnpicklingError, "could not find MARK");
3721 return -1;
3722 }
3723
3724 return self->marks[--self->num_marks];
3725}
3726
3727static int
3728load_none(UnpicklerObject *self)
3729{
3730 PDATA_APPEND(self->stack, Py_None, -1);
3731 return 0;
3732}
3733
3734static int
3735bad_readline(void)
3736{
3737 PyErr_SetString(UnpicklingError, "pickle data was truncated");
3738 return -1;
3739}
3740
3741static int
3742load_int(UnpicklerObject *self)
3743{
3744 PyObject *value;
3745 char *endptr, *s;
3746 Py_ssize_t len;
3747 long x;
3748
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003749 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003750 return -1;
3751 if (len < 2)
3752 return bad_readline();
3753
3754 errno = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003755 /* XXX: Should the base argument of strtol() be explicitly set to 10?
3756 XXX(avassalotti): Should this uses PyOS_strtol()? */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003757 x = strtol(s, &endptr, 0);
3758
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003759 if (errno || (*endptr != '\n' && *endptr != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003760 /* Hm, maybe we've got something long. Let's try reading
3761 * it as a Python long object. */
3762 errno = 0;
3763 /* XXX: Same thing about the base here. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003764 value = PyLong_FromString(s, NULL, 0);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003765 if (value == NULL) {
3766 PyErr_SetString(PyExc_ValueError,
3767 "could not convert string to int");
3768 return -1;
3769 }
3770 }
3771 else {
3772 if (len == 3 && (x == 0 || x == 1)) {
3773 if ((value = PyBool_FromLong(x)) == NULL)
3774 return -1;
3775 }
3776 else {
3777 if ((value = PyLong_FromLong(x)) == NULL)
3778 return -1;
3779 }
3780 }
3781
3782 PDATA_PUSH(self->stack, value, -1);
3783 return 0;
3784}
3785
3786static int
3787load_bool(UnpicklerObject *self, PyObject *boolean)
3788{
3789 assert(boolean == Py_True || boolean == Py_False);
3790 PDATA_APPEND(self->stack, boolean, -1);
3791 return 0;
3792}
3793
3794/* s contains x bytes of a little-endian integer. Return its value as a
3795 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
3796 * int, but when x is 4 it's a signed one. This is an historical source
3797 * of x-platform bugs.
3798 */
3799static long
3800calc_binint(char *bytes, int size)
3801{
3802 unsigned char *s = (unsigned char *)bytes;
3803 int i = size;
3804 long x = 0;
3805
3806 for (i = 0; i < size; i++) {
3807 x |= (long)s[i] << (i * 8);
3808 }
3809
3810 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
3811 * is signed, so on a box with longs bigger than 4 bytes we need
3812 * to extend a BININT's sign bit to the full width.
3813 */
3814 if (SIZEOF_LONG > 4 && size == 4) {
3815 x |= -(x & (1L << 31));
3816 }
3817
3818 return x;
3819}
3820
3821static int
3822load_binintx(UnpicklerObject *self, char *s, int size)
3823{
3824 PyObject *value;
3825 long x;
3826
3827 x = calc_binint(s, size);
3828
3829 if ((value = PyLong_FromLong(x)) == NULL)
3830 return -1;
3831
3832 PDATA_PUSH(self->stack, value, -1);
3833 return 0;
3834}
3835
3836static int
3837load_binint(UnpicklerObject *self)
3838{
3839 char *s;
3840
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003841 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003842 return -1;
3843
3844 return load_binintx(self, s, 4);
3845}
3846
3847static int
3848load_binint1(UnpicklerObject *self)
3849{
3850 char *s;
3851
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003852 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003853 return -1;
3854
3855 return load_binintx(self, s, 1);
3856}
3857
3858static int
3859load_binint2(UnpicklerObject *self)
3860{
3861 char *s;
3862
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003863 if (_Unpickler_Read(self, &s, 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003864 return -1;
3865
3866 return load_binintx(self, s, 2);
3867}
3868
3869static int
3870load_long(UnpicklerObject *self)
3871{
3872 PyObject *value;
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00003873 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003874 Py_ssize_t len;
3875
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003876 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003877 return -1;
3878 if (len < 2)
3879 return bad_readline();
3880
Mark Dickinson8dd05142009-01-20 20:43:58 +00003881 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
3882 the 'L' before calling PyLong_FromString. In order to maintain
3883 compatibility with Python 3.0.0, we don't actually *require*
3884 the 'L' to be present. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003885 if (s[len-2] == 'L')
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00003886 s[len-2] = '\0';
Alexandre Vassalottie4bccb72009-01-24 01:47:57 +00003887 /* XXX: Should the base argument explicitly set to 10? */
3888 value = PyLong_FromString(s, NULL, 0);
Mark Dickinson8dd05142009-01-20 20:43:58 +00003889 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003890 return -1;
3891
3892 PDATA_PUSH(self->stack, value, -1);
3893 return 0;
3894}
3895
3896/* 'size' bytes contain the # of bytes of little-endian 256's-complement
3897 * data following.
3898 */
3899static int
3900load_counted_long(UnpicklerObject *self, int size)
3901{
3902 PyObject *value;
3903 char *nbytes;
3904 char *pdata;
3905
3906 assert(size == 1 || size == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003907 if (_Unpickler_Read(self, &nbytes, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003908 return -1;
3909
3910 size = calc_binint(nbytes, size);
3911 if (size < 0) {
3912 /* Corrupt or hostile pickle -- we never write one like this */
3913 PyErr_SetString(UnpicklingError,
3914 "LONG pickle has negative byte count");
3915 return -1;
3916 }
3917
3918 if (size == 0)
3919 value = PyLong_FromLong(0L);
3920 else {
3921 /* Read the raw little-endian bytes and convert. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003922 if (_Unpickler_Read(self, &pdata, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003923 return -1;
3924 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
3925 1 /* little endian */ , 1 /* signed */ );
3926 }
3927 if (value == NULL)
3928 return -1;
3929 PDATA_PUSH(self->stack, value, -1);
3930 return 0;
3931}
3932
3933static int
3934load_float(UnpicklerObject *self)
3935{
3936 PyObject *value;
3937 char *endptr, *s;
3938 Py_ssize_t len;
3939 double d;
3940
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003941 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003942 return -1;
3943 if (len < 2)
3944 return bad_readline();
3945
3946 errno = 0;
Mark Dickinson725bfd82009-05-03 20:33:40 +00003947 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
3948 if (d == -1.0 && PyErr_Occurred())
3949 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003950 if ((endptr[0] != '\n') && (endptr[0] != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003951 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
3952 return -1;
3953 }
Mark Dickinson725bfd82009-05-03 20:33:40 +00003954 value = PyFloat_FromDouble(d);
3955 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003956 return -1;
3957
3958 PDATA_PUSH(self->stack, value, -1);
3959 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003960}
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003961
3962static int
3963load_binfloat(UnpicklerObject *self)
3964{
3965 PyObject *value;
3966 double x;
3967 char *s;
3968
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003969 if (_Unpickler_Read(self, &s, 8) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003970 return -1;
3971
3972 x = _PyFloat_Unpack8((unsigned char *)s, 0);
3973 if (x == -1.0 && PyErr_Occurred())
3974 return -1;
3975
3976 if ((value = PyFloat_FromDouble(x)) == NULL)
3977 return -1;
3978
3979 PDATA_PUSH(self->stack, value, -1);
3980 return 0;
3981}
3982
3983static int
3984load_string(UnpicklerObject *self)
3985{
3986 PyObject *bytes;
3987 PyObject *str = NULL;
3988 Py_ssize_t len;
3989 char *s, *p;
3990
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00003991 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003992 return -1;
3993 if (len < 3)
3994 return bad_readline();
3995 if ((s = strdup(s)) == NULL) {
3996 PyErr_NoMemory();
3997 return -1;
3998 }
3999
4000 /* Strip outermost quotes */
4001 while (s[len - 1] <= ' ')
4002 len--;
4003 if (s[0] == '"' && s[len - 1] == '"') {
4004 s[len - 1] = '\0';
4005 p = s + 1;
4006 len -= 2;
4007 }
4008 else if (s[0] == '\'' && s[len - 1] == '\'') {
4009 s[len - 1] = '\0';
4010 p = s + 1;
4011 len -= 2;
4012 }
4013 else {
4014 free(s);
4015 PyErr_SetString(PyExc_ValueError, "insecure string pickle");
4016 return -1;
4017 }
4018
4019 /* Use the PyBytes API to decode the string, since that is what is used
4020 to encode, and then coerce the result to Unicode. */
4021 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
4022 free(s);
4023 if (bytes == NULL)
4024 return -1;
4025 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
4026 Py_DECREF(bytes);
4027 if (str == NULL)
4028 return -1;
4029
4030 PDATA_PUSH(self->stack, str, -1);
4031 return 0;
4032}
4033
4034static int
4035load_binbytes(UnpicklerObject *self)
4036{
4037 PyObject *bytes;
4038 long x;
4039 char *s;
4040
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004041 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004042 return -1;
4043
4044 x = calc_binint(s, 4);
4045 if (x < 0) {
4046 PyErr_SetString(UnpicklingError,
4047 "BINBYTES pickle has negative byte count");
4048 return -1;
4049 }
4050
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004051 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004052 return -1;
4053 bytes = PyBytes_FromStringAndSize(s, x);
4054 if (bytes == NULL)
4055 return -1;
4056
4057 PDATA_PUSH(self->stack, bytes, -1);
4058 return 0;
4059}
4060
4061static int
4062load_short_binbytes(UnpicklerObject *self)
4063{
4064 PyObject *bytes;
4065 unsigned char x;
4066 char *s;
4067
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004068 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004069 return -1;
4070
4071 x = (unsigned char)s[0];
4072
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004073 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004074 return -1;
4075
4076 bytes = PyBytes_FromStringAndSize(s, x);
4077 if (bytes == NULL)
4078 return -1;
4079
4080 PDATA_PUSH(self->stack, bytes, -1);
4081 return 0;
4082}
4083
4084static int
4085load_binstring(UnpicklerObject *self)
4086{
4087 PyObject *str;
4088 long x;
4089 char *s;
4090
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004091 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004092 return -1;
4093
4094 x = calc_binint(s, 4);
4095 if (x < 0) {
4096 PyErr_SetString(UnpicklingError,
4097 "BINSTRING pickle has negative byte count");
4098 return -1;
4099 }
4100
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004101 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004102 return -1;
4103
4104 /* Convert Python 2.x strings to unicode. */
4105 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4106 if (str == NULL)
4107 return -1;
4108
4109 PDATA_PUSH(self->stack, str, -1);
4110 return 0;
4111}
4112
4113static int
4114load_short_binstring(UnpicklerObject *self)
4115{
4116 PyObject *str;
4117 unsigned char x;
4118 char *s;
4119
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004120 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004121 return -1;
4122
4123 x = (unsigned char)s[0];
4124
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004125 if (_Unpickler_Read(self, &s, x) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004126 return -1;
4127
4128 /* Convert Python 2.x strings to unicode. */
4129 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
4130 if (str == NULL)
4131 return -1;
4132
4133 PDATA_PUSH(self->stack, str, -1);
4134 return 0;
4135}
4136
4137static int
4138load_unicode(UnpicklerObject *self)
4139{
4140 PyObject *str;
4141 Py_ssize_t len;
4142 char *s;
4143
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004144 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004145 return -1;
4146 if (len < 1)
4147 return bad_readline();
4148
4149 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
4150 if (str == NULL)
4151 return -1;
4152
4153 PDATA_PUSH(self->stack, str, -1);
4154 return 0;
4155}
4156
4157static int
4158load_binunicode(UnpicklerObject *self)
4159{
4160 PyObject *str;
4161 long size;
4162 char *s;
4163
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004164 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004165 return -1;
4166
4167 size = calc_binint(s, 4);
4168 if (size < 0) {
4169 PyErr_SetString(UnpicklingError,
4170 "BINUNICODE pickle has negative byte count");
4171 return -1;
4172 }
4173
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004174 if (_Unpickler_Read(self, &s, size) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004175 return -1;
4176
Victor Stinner485fb562010-04-13 11:07:24 +00004177 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004178 if (str == NULL)
4179 return -1;
4180
4181 PDATA_PUSH(self->stack, str, -1);
4182 return 0;
4183}
4184
4185static int
4186load_tuple(UnpicklerObject *self)
4187{
4188 PyObject *tuple;
4189 int i;
4190
4191 if ((i = marker(self)) < 0)
4192 return -1;
4193
4194 tuple = Pdata_poptuple(self->stack, i);
4195 if (tuple == NULL)
4196 return -1;
4197 PDATA_PUSH(self->stack, tuple, -1);
4198 return 0;
4199}
4200
4201static int
4202load_counted_tuple(UnpicklerObject *self, int len)
4203{
4204 PyObject *tuple;
4205
4206 tuple = PyTuple_New(len);
4207 if (tuple == NULL)
4208 return -1;
4209
4210 while (--len >= 0) {
4211 PyObject *item;
4212
4213 PDATA_POP(self->stack, item);
4214 if (item == NULL)
4215 return -1;
4216 PyTuple_SET_ITEM(tuple, len, item);
4217 }
4218 PDATA_PUSH(self->stack, tuple, -1);
4219 return 0;
4220}
4221
4222static int
4223load_empty_list(UnpicklerObject *self)
4224{
4225 PyObject *list;
4226
4227 if ((list = PyList_New(0)) == NULL)
4228 return -1;
4229 PDATA_PUSH(self->stack, list, -1);
4230 return 0;
4231}
4232
4233static int
4234load_empty_dict(UnpicklerObject *self)
4235{
4236 PyObject *dict;
4237
4238 if ((dict = PyDict_New()) == NULL)
4239 return -1;
4240 PDATA_PUSH(self->stack, dict, -1);
4241 return 0;
4242}
4243
4244static int
4245load_list(UnpicklerObject *self)
4246{
4247 PyObject *list;
4248 int i;
4249
4250 if ((i = marker(self)) < 0)
4251 return -1;
4252
4253 list = Pdata_poplist(self->stack, i);
4254 if (list == NULL)
4255 return -1;
4256 PDATA_PUSH(self->stack, list, -1);
4257 return 0;
4258}
4259
4260static int
4261load_dict(UnpicklerObject *self)
4262{
4263 PyObject *dict, *key, *value;
4264 int i, j, k;
4265
4266 if ((i = marker(self)) < 0)
4267 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004268 j = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004269
4270 if ((dict = PyDict_New()) == NULL)
4271 return -1;
4272
4273 for (k = i + 1; k < j; k += 2) {
4274 key = self->stack->data[k - 1];
4275 value = self->stack->data[k];
4276 if (PyDict_SetItem(dict, key, value) < 0) {
4277 Py_DECREF(dict);
4278 return -1;
4279 }
4280 }
4281 Pdata_clear(self->stack, i);
4282 PDATA_PUSH(self->stack, dict, -1);
4283 return 0;
4284}
4285
4286static PyObject *
4287instantiate(PyObject *cls, PyObject *args)
4288{
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004289 PyObject *result = NULL;
4290 /* Caller must assure args are a tuple. Normally, args come from
4291 Pdata_poptuple which packs objects from the top of the stack
4292 into a newly created tuple. */
4293 assert(PyTuple_Check(args));
4294 if (Py_SIZE(args) > 0 || !PyType_Check(cls) ||
4295 PyObject_HasAttrString(cls, "__getinitargs__")) {
4296 result = PyObject_CallObject(cls, args);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004297 }
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004298 else {
4299 result = PyObject_CallMethod(cls, "__new__", "O", cls);
4300 }
4301 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004302}
4303
4304static int
4305load_obj(UnpicklerObject *self)
4306{
4307 PyObject *cls, *args, *obj = NULL;
4308 int i;
4309
4310 if ((i = marker(self)) < 0)
4311 return -1;
4312
4313 args = Pdata_poptuple(self->stack, i + 1);
4314 if (args == NULL)
4315 return -1;
4316
4317 PDATA_POP(self->stack, cls);
4318 if (cls) {
4319 obj = instantiate(cls, args);
4320 Py_DECREF(cls);
4321 }
4322 Py_DECREF(args);
4323 if (obj == NULL)
4324 return -1;
4325
4326 PDATA_PUSH(self->stack, obj, -1);
4327 return 0;
4328}
4329
4330static int
4331load_inst(UnpicklerObject *self)
4332{
4333 PyObject *cls = NULL;
4334 PyObject *args = NULL;
4335 PyObject *obj = NULL;
4336 PyObject *module_name;
4337 PyObject *class_name;
4338 Py_ssize_t len;
4339 int i;
4340 char *s;
4341
4342 if ((i = marker(self)) < 0)
4343 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004344 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004345 return -1;
4346 if (len < 2)
4347 return bad_readline();
4348
4349 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
4350 identifiers are permitted in Python 3.0, since the INST opcode is only
4351 supported by older protocols on Python 2.x. */
4352 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
4353 if (module_name == NULL)
4354 return -1;
4355
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004356 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004357 if (len < 2)
4358 return bad_readline();
4359 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00004360 if (class_name != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004361 cls = find_class(self, module_name, class_name);
4362 Py_DECREF(class_name);
4363 }
4364 }
4365 Py_DECREF(module_name);
4366
4367 if (cls == NULL)
4368 return -1;
4369
4370 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
4371 obj = instantiate(cls, args);
4372 Py_DECREF(args);
4373 }
4374 Py_DECREF(cls);
4375
4376 if (obj == NULL)
4377 return -1;
4378
4379 PDATA_PUSH(self->stack, obj, -1);
4380 return 0;
4381}
4382
4383static int
4384load_newobj(UnpicklerObject *self)
4385{
4386 PyObject *args = NULL;
4387 PyObject *clsraw = NULL;
4388 PyTypeObject *cls; /* clsraw cast to its true type */
4389 PyObject *obj;
4390
4391 /* Stack is ... cls argtuple, and we want to call
4392 * cls.__new__(cls, *argtuple).
4393 */
4394 PDATA_POP(self->stack, args);
4395 if (args == NULL)
4396 goto error;
4397 if (!PyTuple_Check(args)) {
4398 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
4399 goto error;
4400 }
4401
4402 PDATA_POP(self->stack, clsraw);
4403 cls = (PyTypeObject *)clsraw;
4404 if (cls == NULL)
4405 goto error;
4406 if (!PyType_Check(cls)) {
4407 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4408 "isn't a type object");
4409 goto error;
4410 }
4411 if (cls->tp_new == NULL) {
4412 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
4413 "has NULL tp_new");
4414 goto error;
4415 }
4416
4417 /* Call __new__. */
4418 obj = cls->tp_new(cls, args, NULL);
4419 if (obj == NULL)
4420 goto error;
4421
4422 Py_DECREF(args);
4423 Py_DECREF(clsraw);
4424 PDATA_PUSH(self->stack, obj, -1);
4425 return 0;
4426
4427 error:
4428 Py_XDECREF(args);
4429 Py_XDECREF(clsraw);
4430 return -1;
4431}
4432
4433static int
4434load_global(UnpicklerObject *self)
4435{
4436 PyObject *global = NULL;
4437 PyObject *module_name;
4438 PyObject *global_name;
4439 Py_ssize_t len;
4440 char *s;
4441
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004442 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004443 return -1;
4444 if (len < 2)
4445 return bad_readline();
4446 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4447 if (!module_name)
4448 return -1;
4449
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004450 if ((len = _Unpickler_Readline(self, &s)) >= 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004451 if (len < 2) {
4452 Py_DECREF(module_name);
4453 return bad_readline();
4454 }
4455 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
4456 if (global_name) {
4457 global = find_class(self, module_name, global_name);
4458 Py_DECREF(global_name);
4459 }
4460 }
4461 Py_DECREF(module_name);
4462
4463 if (global == NULL)
4464 return -1;
4465 PDATA_PUSH(self->stack, global, -1);
4466 return 0;
4467}
4468
4469static int
4470load_persid(UnpicklerObject *self)
4471{
4472 PyObject *pid;
4473 Py_ssize_t len;
4474 char *s;
4475
4476 if (self->pers_func) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004477 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004478 return -1;
4479 if (len < 2)
4480 return bad_readline();
4481
4482 pid = PyBytes_FromStringAndSize(s, len - 1);
4483 if (pid == NULL)
4484 return -1;
4485
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004486 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004487 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004488 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004489 if (pid == NULL)
4490 return -1;
4491
4492 PDATA_PUSH(self->stack, pid, -1);
4493 return 0;
4494 }
4495 else {
4496 PyErr_SetString(UnpicklingError,
4497 "A load persistent id instruction was encountered,\n"
4498 "but no persistent_load function was specified.");
4499 return -1;
4500 }
4501}
4502
4503static int
4504load_binpersid(UnpicklerObject *self)
4505{
4506 PyObject *pid;
4507
4508 if (self->pers_func) {
4509 PDATA_POP(self->stack, pid);
4510 if (pid == NULL)
4511 return -1;
4512
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004513 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004514 reference to pid first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004515 pid = _Unpickler_FastCall(self, self->pers_func, pid);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004516 if (pid == NULL)
4517 return -1;
4518
4519 PDATA_PUSH(self->stack, pid, -1);
4520 return 0;
4521 }
4522 else {
4523 PyErr_SetString(UnpicklingError,
4524 "A load persistent id instruction was encountered,\n"
4525 "but no persistent_load function was specified.");
4526 return -1;
4527 }
4528}
4529
4530static int
4531load_pop(UnpicklerObject *self)
4532{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004533 int len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004534
4535 /* Note that we split the (pickle.py) stack into two stacks,
4536 * an object stack and a mark stack. We have to be clever and
4537 * pop the right one. We do this by looking at the top of the
Collin Winter8ca69de2009-05-26 16:53:41 +00004538 * mark stack first, and only signalling a stack underflow if
4539 * the object stack is empty and the mark stack doesn't match
4540 * our expectations.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004541 */
Collin Winter8ca69de2009-05-26 16:53:41 +00004542 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004543 self->num_marks--;
Antoine Pitrou01a15ea2010-01-07 17:57:31 +00004544 } else if (len > 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004545 len--;
4546 Py_DECREF(self->stack->data[len]);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004547 Py_SIZE(self->stack) = len;
Collin Winter8ca69de2009-05-26 16:53:41 +00004548 } else {
4549 return stack_underflow();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004550 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004551 return 0;
4552}
4553
4554static int
4555load_pop_mark(UnpicklerObject *self)
4556{
4557 int i;
4558
4559 if ((i = marker(self)) < 0)
4560 return -1;
4561
4562 Pdata_clear(self->stack, i);
4563
4564 return 0;
4565}
4566
4567static int
4568load_dup(UnpicklerObject *self)
4569{
4570 PyObject *last;
4571 int len;
4572
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004573 if ((len = Py_SIZE(self->stack)) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004574 return stack_underflow();
4575 last = self->stack->data[len - 1];
4576 PDATA_APPEND(self->stack, last, -1);
4577 return 0;
4578}
4579
4580static int
4581load_get(UnpicklerObject *self)
4582{
4583 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004584 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004585 Py_ssize_t len;
4586 char *s;
4587
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004588 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004589 return -1;
4590 if (len < 2)
4591 return bad_readline();
4592
4593 key = PyLong_FromString(s, NULL, 10);
4594 if (key == NULL)
4595 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004596 idx = PyLong_AsSsize_t(key);
4597 if (idx == -1 && PyErr_Occurred()) {
4598 Py_DECREF(key);
4599 return -1;
4600 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004601
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004602 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004603 if (value == NULL) {
4604 if (!PyErr_Occurred())
4605 PyErr_SetObject(PyExc_KeyError, key);
4606 Py_DECREF(key);
4607 return -1;
4608 }
4609 Py_DECREF(key);
4610
4611 PDATA_APPEND(self->stack, value, -1);
4612 return 0;
4613}
4614
4615static int
4616load_binget(UnpicklerObject *self)
4617{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004618 PyObject *value;
4619 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004620 char *s;
4621
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004622 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004623 return -1;
4624
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004625 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004626
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004627 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004628 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004629 PyObject *key = PyLong_FromSsize_t(idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004630 if (!PyErr_Occurred())
4631 PyErr_SetObject(PyExc_KeyError, key);
4632 Py_DECREF(key);
4633 return -1;
4634 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004635
4636 PDATA_APPEND(self->stack, value, -1);
4637 return 0;
4638}
4639
4640static int
4641load_long_binget(UnpicklerObject *self)
4642{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004643 PyObject *value;
4644 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004645 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004646
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004647 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004648 return -1;
4649
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004650 idx = (long)Py_CHARMASK(s[0]);
4651 idx |= (long)Py_CHARMASK(s[1]) << 8;
4652 idx |= (long)Py_CHARMASK(s[2]) << 16;
4653 idx |= (long)Py_CHARMASK(s[3]) << 24;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004654
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004655 value = _Unpickler_MemoGet(self, idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004656 if (value == NULL) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004657 PyObject *key = PyLong_FromSsize_t(idx);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004658 if (!PyErr_Occurred())
4659 PyErr_SetObject(PyExc_KeyError, key);
4660 Py_DECREF(key);
4661 return -1;
4662 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004663
4664 PDATA_APPEND(self->stack, value, -1);
4665 return 0;
4666}
4667
4668/* Push an object from the extension registry (EXT[124]). nbytes is
4669 * the number of bytes following the opcode, holding the index (code) value.
4670 */
4671static int
4672load_extension(UnpicklerObject *self, int nbytes)
4673{
4674 char *codebytes; /* the nbytes bytes after the opcode */
4675 long code; /* calc_binint returns long */
4676 PyObject *py_code; /* code as a Python int */
4677 PyObject *obj; /* the object to push */
4678 PyObject *pair; /* (module_name, class_name) */
4679 PyObject *module_name, *class_name;
4680
4681 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004682 if (_Unpickler_Read(self, &codebytes, nbytes) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004683 return -1;
4684 code = calc_binint(codebytes, nbytes);
4685 if (code <= 0) { /* note that 0 is forbidden */
4686 /* Corrupt or hostile pickle. */
4687 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
4688 return -1;
4689 }
4690
4691 /* Look for the code in the cache. */
4692 py_code = PyLong_FromLong(code);
4693 if (py_code == NULL)
4694 return -1;
4695 obj = PyDict_GetItem(extension_cache, py_code);
4696 if (obj != NULL) {
4697 /* Bingo. */
4698 Py_DECREF(py_code);
4699 PDATA_APPEND(self->stack, obj, -1);
4700 return 0;
4701 }
4702
4703 /* Look up the (module_name, class_name) pair. */
4704 pair = PyDict_GetItem(inverted_registry, py_code);
4705 if (pair == NULL) {
4706 Py_DECREF(py_code);
4707 PyErr_Format(PyExc_ValueError, "unregistered extension "
4708 "code %ld", code);
4709 return -1;
4710 }
4711 /* Since the extension registry is manipulable via Python code,
4712 * confirm that pair is really a 2-tuple of strings.
4713 */
4714 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
4715 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
4716 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
4717 Py_DECREF(py_code);
4718 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
4719 "isn't a 2-tuple of strings", code);
4720 return -1;
4721 }
4722 /* Load the object. */
4723 obj = find_class(self, module_name, class_name);
4724 if (obj == NULL) {
4725 Py_DECREF(py_code);
4726 return -1;
4727 }
4728 /* Cache code -> obj. */
4729 code = PyDict_SetItem(extension_cache, py_code, obj);
4730 Py_DECREF(py_code);
4731 if (code < 0) {
4732 Py_DECREF(obj);
4733 return -1;
4734 }
4735 PDATA_PUSH(self->stack, obj, -1);
4736 return 0;
4737}
4738
4739static int
4740load_put(UnpicklerObject *self)
4741{
4742 PyObject *key, *value;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004743 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004744 Py_ssize_t len;
4745 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004746
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004747 if ((len = _Unpickler_Readline(self, &s)) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004748 return -1;
4749 if (len < 2)
4750 return bad_readline();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004751 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004752 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004753 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004754
4755 key = PyLong_FromString(s, NULL, 10);
4756 if (key == NULL)
4757 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004758 idx = PyLong_AsSsize_t(key);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004759 Py_DECREF(key);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004760 if (idx == -1 && PyErr_Occurred())
4761 return -1;
4762
4763 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004764}
4765
4766static int
4767load_binput(UnpicklerObject *self)
4768{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004769 PyObject *value;
4770 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004771 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004772
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004773 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004774 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004775
4776 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004777 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004778 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004779
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004780 idx = Py_CHARMASK(s[0]);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004781
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004782 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004783}
4784
4785static int
4786load_long_binput(UnpicklerObject *self)
4787{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004788 PyObject *value;
4789 Py_ssize_t idx;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004790 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004791
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004792 if (_Unpickler_Read(self, &s, 4) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004793 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004794
4795 if (Py_SIZE(self->stack) <= 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004796 return stack_underflow();
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004797 value = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004798
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004799 idx = (long)Py_CHARMASK(s[0]);
4800 idx |= (long)Py_CHARMASK(s[1]) << 8;
4801 idx |= (long)Py_CHARMASK(s[2]) << 16;
4802 idx |= (long)Py_CHARMASK(s[3]) << 24;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004803
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004804 return _Unpickler_MemoPut(self, idx, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004805}
4806
4807static int
4808do_append(UnpicklerObject *self, int x)
4809{
4810 PyObject *value;
4811 PyObject *list;
4812 int len, i;
4813
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004814 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004815 if (x > len || x <= 0)
4816 return stack_underflow();
4817 if (len == x) /* nothing to do */
4818 return 0;
4819
4820 list = self->stack->data[x - 1];
4821
4822 if (PyList_Check(list)) {
4823 PyObject *slice;
4824 Py_ssize_t list_len;
4825
4826 slice = Pdata_poplist(self->stack, x);
4827 if (!slice)
4828 return -1;
4829 list_len = PyList_GET_SIZE(list);
4830 i = PyList_SetSlice(list, list_len, list_len, slice);
4831 Py_DECREF(slice);
4832 return i;
4833 }
4834 else {
4835 PyObject *append_func;
4836
4837 append_func = PyObject_GetAttrString(list, "append");
4838 if (append_func == NULL)
4839 return -1;
4840 for (i = x; i < len; i++) {
4841 PyObject *result;
4842
4843 value = self->stack->data[i];
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004844 result = _Unpickler_FastCall(self, append_func, value);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004845 if (result == NULL) {
4846 Pdata_clear(self->stack, i + 1);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004847 Py_SIZE(self->stack) = x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004848 return -1;
4849 }
4850 Py_DECREF(result);
4851 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004852 Py_SIZE(self->stack) = x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004853 }
4854
4855 return 0;
4856}
4857
4858static int
4859load_append(UnpicklerObject *self)
4860{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004861 return do_append(self, Py_SIZE(self->stack) - 1);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004862}
4863
4864static int
4865load_appends(UnpicklerObject *self)
4866{
4867 return do_append(self, marker(self));
4868}
4869
4870static int
4871do_setitems(UnpicklerObject *self, int x)
4872{
4873 PyObject *value, *key;
4874 PyObject *dict;
4875 int len, i;
4876 int status = 0;
4877
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004878 len = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004879 if (x > len || x <= 0)
4880 return stack_underflow();
4881 if (len == x) /* nothing to do */
4882 return 0;
4883 if ((len - x) % 2 != 0) {
4884 /* Currupt or hostile pickle -- we never write one like this. */
4885 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
4886 return -1;
4887 }
4888
4889 /* Here, dict does not actually need to be a PyDict; it could be anything
4890 that supports the __setitem__ attribute. */
4891 dict = self->stack->data[x - 1];
4892
4893 for (i = x + 1; i < len; i += 2) {
4894 key = self->stack->data[i - 1];
4895 value = self->stack->data[i];
4896 if (PyObject_SetItem(dict, key, value) < 0) {
4897 status = -1;
4898 break;
4899 }
4900 }
4901
4902 Pdata_clear(self->stack, x);
4903 return status;
4904}
4905
4906static int
4907load_setitem(UnpicklerObject *self)
4908{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004909 return do_setitems(self, Py_SIZE(self->stack) - 2);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004910}
4911
4912static int
4913load_setitems(UnpicklerObject *self)
4914{
4915 return do_setitems(self, marker(self));
4916}
4917
4918static int
4919load_build(UnpicklerObject *self)
4920{
4921 PyObject *state, *inst, *slotstate;
4922 PyObject *setstate;
4923 int status = 0;
4924
4925 /* Stack is ... instance, state. We want to leave instance at
4926 * the stack top, possibly mutated via instance.__setstate__(state).
4927 */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004928 if (Py_SIZE(self->stack) < 2)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004929 return stack_underflow();
4930
4931 PDATA_POP(self->stack, state);
4932 if (state == NULL)
4933 return -1;
4934
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004935 inst = self->stack->data[Py_SIZE(self->stack) - 1];
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004936
4937 setstate = PyObject_GetAttrString(inst, "__setstate__");
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00004938 if (setstate == NULL) {
4939 if (PyErr_ExceptionMatches(PyExc_AttributeError))
4940 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00004941 else {
4942 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00004943 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00004944 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004945 }
4946 else {
4947 PyObject *result;
4948
4949 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004950 /* Ugh... this does not leak since _Unpickler_FastCall() steals the
Antoine Pitroud79dc622008-09-05 00:03:33 +00004951 reference to state first. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00004952 result = _Unpickler_FastCall(self, setstate, state);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004953 Py_DECREF(setstate);
4954 if (result == NULL)
4955 return -1;
4956 Py_DECREF(result);
4957 return 0;
4958 }
4959
4960 /* A default __setstate__. First see whether state embeds a
4961 * slot state dict too (a proto 2 addition).
4962 */
4963 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
4964 PyObject *tmp = state;
4965
4966 state = PyTuple_GET_ITEM(tmp, 0);
4967 slotstate = PyTuple_GET_ITEM(tmp, 1);
4968 Py_INCREF(state);
4969 Py_INCREF(slotstate);
4970 Py_DECREF(tmp);
4971 }
4972 else
4973 slotstate = NULL;
4974
4975 /* Set inst.__dict__ from the state dict (if any). */
4976 if (state != Py_None) {
4977 PyObject *dict;
Antoine Pitroua9f48a02009-05-02 21:41:14 +00004978 PyObject *d_key, *d_value;
4979 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004980
4981 if (!PyDict_Check(state)) {
4982 PyErr_SetString(UnpicklingError, "state is not a dictionary");
4983 goto error;
4984 }
4985 dict = PyObject_GetAttrString(inst, "__dict__");
4986 if (dict == NULL)
4987 goto error;
4988
Antoine Pitroua9f48a02009-05-02 21:41:14 +00004989 i = 0;
4990 while (PyDict_Next(state, &i, &d_key, &d_value)) {
4991 /* normally the keys for instance attributes are
4992 interned. we should try to do that here. */
4993 Py_INCREF(d_key);
4994 if (PyUnicode_CheckExact(d_key))
4995 PyUnicode_InternInPlace(&d_key);
4996 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
4997 Py_DECREF(d_key);
4998 goto error;
4999 }
5000 Py_DECREF(d_key);
5001 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005002 Py_DECREF(dict);
5003 }
5004
5005 /* Also set instance attributes from the slotstate dict (if any). */
5006 if (slotstate != NULL) {
5007 PyObject *d_key, *d_value;
5008 Py_ssize_t i;
5009
5010 if (!PyDict_Check(slotstate)) {
5011 PyErr_SetString(UnpicklingError,
5012 "slot state is not a dictionary");
5013 goto error;
5014 }
5015 i = 0;
5016 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
5017 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
5018 goto error;
5019 }
5020 }
5021
5022 if (0) {
5023 error:
5024 status = -1;
5025 }
5026
5027 Py_DECREF(state);
5028 Py_XDECREF(slotstate);
5029 return status;
5030}
5031
5032static int
5033load_mark(UnpicklerObject *self)
5034{
5035
5036 /* Note that we split the (pickle.py) stack into two stacks, an
5037 * object stack and a mark stack. Here we push a mark onto the
5038 * mark stack.
5039 */
5040
5041 if ((self->num_marks + 1) >= self->marks_size) {
5042 size_t alloc;
5043 int *marks;
5044
5045 /* Use the size_t type to check for overflow. */
5046 alloc = ((size_t)self->num_marks << 1) + 20;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00005047 if (alloc > PY_SSIZE_T_MAX ||
5048 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005049 PyErr_NoMemory();
5050 return -1;
5051 }
5052
5053 if (self->marks == NULL)
5054 marks = (int *)PyMem_Malloc(alloc * sizeof(int));
5055 else
5056 marks = (int *)PyMem_Realloc(self->marks, alloc * sizeof(int));
5057 if (marks == NULL) {
5058 PyErr_NoMemory();
5059 return -1;
5060 }
5061 self->marks = marks;
5062 self->marks_size = (Py_ssize_t)alloc;
5063 }
5064
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005065 self->marks[self->num_marks++] = Py_SIZE(self->stack);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005066
5067 return 0;
5068}
5069
5070static int
5071load_reduce(UnpicklerObject *self)
5072{
5073 PyObject *callable = NULL;
5074 PyObject *argtup = NULL;
5075 PyObject *obj = NULL;
5076
5077 PDATA_POP(self->stack, argtup);
5078 if (argtup == NULL)
5079 return -1;
5080 PDATA_POP(self->stack, callable);
5081 if (callable) {
Alexander Belopolskyd92f0402010-07-17 22:50:45 +00005082 obj = PyObject_CallObject(callable, argtup);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005083 Py_DECREF(callable);
5084 }
5085 Py_DECREF(argtup);
5086
5087 if (obj == NULL)
5088 return -1;
5089
5090 PDATA_PUSH(self->stack, obj, -1);
5091 return 0;
5092}
5093
5094/* Just raises an error if we don't know the protocol specified. PROTO
5095 * is the first opcode for protocols >= 2.
5096 */
5097static int
5098load_proto(UnpicklerObject *self)
5099{
5100 char *s;
5101 int i;
5102
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005103 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005104 return -1;
5105
5106 i = (unsigned char)s[0];
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005107 if (i <= HIGHEST_PROTOCOL) {
5108 self->proto = i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005109 return 0;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005110 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005111
5112 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
5113 return -1;
5114}
5115
5116static PyObject *
5117load(UnpicklerObject *self)
5118{
5119 PyObject *err;
5120 PyObject *value = NULL;
5121 char *s;
5122
5123 self->num_marks = 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005124 if (Py_SIZE(self->stack))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005125 Pdata_clear(self->stack, 0);
5126
5127 /* Convenient macros for the dispatch while-switch loop just below. */
5128#define OP(opcode, load_func) \
5129 case opcode: if (load_func(self) < 0) break; continue;
5130
5131#define OP_ARG(opcode, load_func, arg) \
5132 case opcode: if (load_func(self, (arg)) < 0) break; continue;
5133
5134 while (1) {
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005135 if (_Unpickler_Read(self, &s, 1) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005136 break;
5137
5138 switch ((enum opcode)s[0]) {
5139 OP(NONE, load_none)
5140 OP(BININT, load_binint)
5141 OP(BININT1, load_binint1)
5142 OP(BININT2, load_binint2)
5143 OP(INT, load_int)
5144 OP(LONG, load_long)
5145 OP_ARG(LONG1, load_counted_long, 1)
5146 OP_ARG(LONG4, load_counted_long, 4)
5147 OP(FLOAT, load_float)
5148 OP(BINFLOAT, load_binfloat)
5149 OP(BINBYTES, load_binbytes)
5150 OP(SHORT_BINBYTES, load_short_binbytes)
5151 OP(BINSTRING, load_binstring)
5152 OP(SHORT_BINSTRING, load_short_binstring)
5153 OP(STRING, load_string)
5154 OP(UNICODE, load_unicode)
5155 OP(BINUNICODE, load_binunicode)
5156 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
5157 OP_ARG(TUPLE1, load_counted_tuple, 1)
5158 OP_ARG(TUPLE2, load_counted_tuple, 2)
5159 OP_ARG(TUPLE3, load_counted_tuple, 3)
5160 OP(TUPLE, load_tuple)
5161 OP(EMPTY_LIST, load_empty_list)
5162 OP(LIST, load_list)
5163 OP(EMPTY_DICT, load_empty_dict)
5164 OP(DICT, load_dict)
5165 OP(OBJ, load_obj)
5166 OP(INST, load_inst)
5167 OP(NEWOBJ, load_newobj)
5168 OP(GLOBAL, load_global)
5169 OP(APPEND, load_append)
5170 OP(APPENDS, load_appends)
5171 OP(BUILD, load_build)
5172 OP(DUP, load_dup)
5173 OP(BINGET, load_binget)
5174 OP(LONG_BINGET, load_long_binget)
5175 OP(GET, load_get)
5176 OP(MARK, load_mark)
5177 OP(BINPUT, load_binput)
5178 OP(LONG_BINPUT, load_long_binput)
5179 OP(PUT, load_put)
5180 OP(POP, load_pop)
5181 OP(POP_MARK, load_pop_mark)
5182 OP(SETITEM, load_setitem)
5183 OP(SETITEMS, load_setitems)
5184 OP(PERSID, load_persid)
5185 OP(BINPERSID, load_binpersid)
5186 OP(REDUCE, load_reduce)
5187 OP(PROTO, load_proto)
5188 OP_ARG(EXT1, load_extension, 1)
5189 OP_ARG(EXT2, load_extension, 2)
5190 OP_ARG(EXT4, load_extension, 4)
5191 OP_ARG(NEWTRUE, load_bool, Py_True)
5192 OP_ARG(NEWFALSE, load_bool, Py_False)
5193
5194 case STOP:
5195 break;
5196
5197 case '\0':
5198 PyErr_SetNone(PyExc_EOFError);
5199 return NULL;
5200
5201 default:
5202 PyErr_Format(UnpicklingError,
5203 "invalid load key, '%c'.", s[0]);
5204 return NULL;
5205 }
5206
5207 break; /* and we are done! */
5208 }
5209
5210 /* XXX: It is not clear what this is actually for. */
5211 if ((err = PyErr_Occurred())) {
5212 if (err == PyExc_EOFError) {
5213 PyErr_SetNone(PyExc_EOFError);
5214 }
5215 return NULL;
5216 }
5217
5218 PDATA_POP(self->stack, value);
5219 return value;
5220}
5221
5222PyDoc_STRVAR(Unpickler_load_doc,
5223"load() -> object. Load a pickle."
5224"\n"
5225"Read a pickled object representation from the open file object given in\n"
5226"the constructor, and return the reconstituted object hierarchy specified\n"
5227"therein.\n");
5228
5229static PyObject *
5230Unpickler_load(UnpicklerObject *self)
5231{
5232 /* Check whether the Unpickler was initialized correctly. This prevents
5233 segfaulting if a subclass overridden __init__ with a function that does
5234 not call Unpickler.__init__(). Here, we simply ensure that self->read
5235 is not NULL. */
5236 if (self->read == NULL) {
5237 PyErr_Format(UnpicklingError,
5238 "Unpickler.__init__() was not called by %s.__init__()",
5239 Py_TYPE(self)->tp_name);
5240 return NULL;
5241 }
5242
5243 return load(self);
5244}
5245
5246/* The name of find_class() is misleading. In newer pickle protocols, this
5247 function is used for loading any global (i.e., functions), not just
5248 classes. The name is kept only for backward compatibility. */
5249
5250PyDoc_STRVAR(Unpickler_find_class_doc,
5251"find_class(module_name, global_name) -> object.\n"
5252"\n"
5253"Return an object from a specified module, importing the module if\n"
5254"necessary. Subclasses may override this method (e.g. to restrict\n"
5255"unpickling of arbitrary classes and functions).\n"
5256"\n"
5257"This method is called whenever a class or a function object is\n"
5258"needed. Both arguments passed are str objects.\n");
5259
5260static PyObject *
5261Unpickler_find_class(UnpicklerObject *self, PyObject *args)
5262{
5263 PyObject *global;
5264 PyObject *modules_dict;
5265 PyObject *module;
5266 PyObject *module_name, *global_name;
5267
5268 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
5269 &module_name, &global_name))
5270 return NULL;
5271
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005272 /* Try to map the old names used in Python 2.x to the new ones used in
5273 Python 3.x. We do this only with old pickle protocols and when the
5274 user has not disabled the feature. */
5275 if (self->proto < 3 && self->fix_imports) {
5276 PyObject *key;
5277 PyObject *item;
5278
5279 /* Check if the global (i.e., a function or a class) was renamed
5280 or moved to another module. */
5281 key = PyTuple_Pack(2, module_name, global_name);
5282 if (key == NULL)
5283 return NULL;
5284 item = PyDict_GetItemWithError(name_mapping_2to3, key);
5285 Py_DECREF(key);
5286 if (item) {
5287 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
5288 PyErr_Format(PyExc_RuntimeError,
5289 "_compat_pickle.NAME_MAPPING values should be "
5290 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
5291 return NULL;
5292 }
5293 module_name = PyTuple_GET_ITEM(item, 0);
5294 global_name = PyTuple_GET_ITEM(item, 1);
5295 if (!PyUnicode_Check(module_name) ||
5296 !PyUnicode_Check(global_name)) {
5297 PyErr_Format(PyExc_RuntimeError,
5298 "_compat_pickle.NAME_MAPPING values should be "
5299 "pairs of str, not (%.200s, %.200s)",
5300 Py_TYPE(module_name)->tp_name,
5301 Py_TYPE(global_name)->tp_name);
5302 return NULL;
5303 }
5304 }
5305 else if (PyErr_Occurred()) {
5306 return NULL;
5307 }
5308
5309 /* Check if the module was renamed. */
5310 item = PyDict_GetItemWithError(import_mapping_2to3, module_name);
5311 if (item) {
5312 if (!PyUnicode_Check(item)) {
5313 PyErr_Format(PyExc_RuntimeError,
5314 "_compat_pickle.IMPORT_MAPPING values should be "
5315 "strings, not %.200s", Py_TYPE(item)->tp_name);
5316 return NULL;
5317 }
5318 module_name = item;
5319 }
5320 else if (PyErr_Occurred()) {
5321 return NULL;
5322 }
5323 }
5324
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005325 modules_dict = PySys_GetObject("modules");
5326 if (modules_dict == NULL)
5327 return NULL;
5328
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005329 module = PyDict_GetItemWithError(modules_dict, module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005330 if (module == NULL) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005331 if (PyErr_Occurred())
5332 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005333 module = PyImport_Import(module_name);
5334 if (module == NULL)
5335 return NULL;
5336 global = PyObject_GetAttr(module, global_name);
5337 Py_DECREF(module);
5338 }
5339 else {
5340 global = PyObject_GetAttr(module, global_name);
5341 }
5342 return global;
5343}
5344
5345static struct PyMethodDef Unpickler_methods[] = {
5346 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
5347 Unpickler_load_doc},
5348 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
5349 Unpickler_find_class_doc},
5350 {NULL, NULL} /* sentinel */
5351};
5352
5353static void
5354Unpickler_dealloc(UnpicklerObject *self)
5355{
5356 PyObject_GC_UnTrack((PyObject *)self);
5357 Py_XDECREF(self->readline);
5358 Py_XDECREF(self->read);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005359 Py_XDECREF(self->stack);
5360 Py_XDECREF(self->pers_func);
5361 Py_XDECREF(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005362 if (self->buffer.buf != NULL) {
5363 PyBuffer_Release(&self->buffer);
5364 self->buffer.buf = NULL;
5365 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005366
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005367 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005368 PyMem_Free(self->marks);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005369 PyMem_Free(self->input_line);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005370 free(self->encoding);
5371 free(self->errors);
5372
5373 Py_TYPE(self)->tp_free((PyObject *)self);
5374}
5375
5376static int
5377Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
5378{
5379 Py_VISIT(self->readline);
5380 Py_VISIT(self->read);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005381 Py_VISIT(self->stack);
5382 Py_VISIT(self->pers_func);
5383 Py_VISIT(self->arg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005384 return 0;
5385}
5386
5387static int
5388Unpickler_clear(UnpicklerObject *self)
5389{
5390 Py_CLEAR(self->readline);
5391 Py_CLEAR(self->read);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005392 Py_CLEAR(self->stack);
5393 Py_CLEAR(self->pers_func);
5394 Py_CLEAR(self->arg);
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005395 if (self->buffer.buf != NULL) {
5396 PyBuffer_Release(&self->buffer);
5397 self->buffer.buf = NULL;
5398 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005399
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005400 _Unpickler_MemoCleanup(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005401 PyMem_Free(self->marks);
5402 self->marks = NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005403 PyMem_Free(self->input_line);
5404 self->input_line = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005405 free(self->encoding);
5406 self->encoding = NULL;
5407 free(self->errors);
5408 self->errors = NULL;
5409
5410 return 0;
5411}
5412
5413PyDoc_STRVAR(Unpickler_doc,
5414"Unpickler(file, *, encoding='ASCII', errors='strict')"
5415"\n"
5416"This takes a binary file for reading a pickle data stream.\n"
5417"\n"
5418"The protocol version of the pickle is detected automatically, so no\n"
5419"proto argument is needed.\n"
5420"\n"
5421"The file-like object must have two methods, a read() method\n"
5422"that takes an integer argument, and a readline() method that\n"
5423"requires no arguments. Both methods should return bytes.\n"
5424"Thus file-like object can be a binary file object opened for\n"
5425"reading, a BytesIO object, or any other custom object that\n"
5426"meets this interface.\n"
5427"\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005428"Optional keyword arguments are *fix_imports*, *encoding* and *errors*,\n"
5429"which are used to control compatiblity support for pickle stream\n"
5430"generated by Python 2.x. If *fix_imports* is True, pickle will try to\n"
5431"map the old Python 2.x names to the new names used in Python 3.x. The\n"
5432"*encoding* and *errors* tell pickle how to decode 8-bit string\n"
5433"instances pickled by Python 2.x; these default to 'ASCII' and\n"
5434"'strict', respectively.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005435
5436static int
5437Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
5438{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005439 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005440 PyObject *file;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005441 PyObject *fix_imports = Py_True;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005442 char *encoding = NULL;
5443 char *errors = NULL;
5444
5445 /* XXX: That is an horrible error message. But, I don't know how to do
5446 better... */
5447 if (Py_SIZE(args) != 1) {
5448 PyErr_Format(PyExc_TypeError,
5449 "%s takes exactly one positional argument (%zd given)",
5450 Py_TYPE(self)->tp_name, Py_SIZE(args));
5451 return -1;
5452 }
5453
5454 /* Arguments parsing needs to be done in the __init__() method to allow
5455 subclasses to define their own __init__() method, which may (or may
5456 not) support Unpickler arguments. However, this means we need to be
5457 extra careful in the other Unpickler methods, since a subclass could
5458 forget to call Unpickler.__init__() thus breaking our internal
5459 invariants. */
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005460 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:Unpickler", kwlist,
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005461 &file, &fix_imports, &encoding, &errors))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005462 return -1;
5463
5464 /* In case of multiple __init__() calls, clear previous content. */
5465 if (self->read != NULL)
5466 (void)Unpickler_clear(self);
5467
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005468 if (_Unpickler_SetInputStream(self, file) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005469 return -1;
5470
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005471 if (_Unpickler_SetInputEncoding(self, encoding, errors) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005472 return -1;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005473
5474 self->fix_imports = PyObject_IsTrue(fix_imports);
5475 if (self->fix_imports == -1)
5476 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005477
5478 if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) {
5479 self->pers_func = PyObject_GetAttrString((PyObject *)self,
5480 "persistent_load");
5481 if (self->pers_func == NULL)
5482 return -1;
5483 }
5484 else {
5485 self->pers_func = NULL;
5486 }
5487
5488 self->stack = (Pdata *)Pdata_New();
5489 if (self->stack == NULL)
5490 return -1;
5491
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005492 self->memo_size = 32;
5493 self->memo = _Unpickler_NewMemo(self->memo_size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005494 if (self->memo == NULL)
5495 return -1;
5496
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005497 self->arg = NULL;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00005498 self->proto = 0;
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00005499
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005500 return 0;
5501}
5502
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005503/* Define a proxy object for the Unpickler's internal memo object. This is to
5504 * avoid breaking code like:
5505 * unpickler.memo.clear()
5506 * and
5507 * unpickler.memo = saved_memo
5508 * Is this a good idea? Not really, but we don't want to break code that uses
5509 * it. Note that we don't implement the entire mapping API here. This is
5510 * intentional, as these should be treated as black-box implementation details.
5511 *
5512 * We do, however, have to implement pickling/unpickling support because of
5513 * real-world code like cvs2svn.
5514 */
5515
5516typedef struct {
5517 PyObject_HEAD
5518 UnpicklerObject *unpickler;
5519} UnpicklerMemoProxyObject;
5520
5521PyDoc_STRVAR(ump_clear_doc,
5522"memo.clear() -> None. Remove all items from memo.");
5523
5524static PyObject *
5525ump_clear(UnpicklerMemoProxyObject *self)
5526{
5527 _Unpickler_MemoCleanup(self->unpickler);
5528 self->unpickler->memo = _Unpickler_NewMemo(self->unpickler->memo_size);
5529 if (self->unpickler->memo == NULL)
5530 return NULL;
5531 Py_RETURN_NONE;
5532}
5533
5534PyDoc_STRVAR(ump_copy_doc,
5535"memo.copy() -> new_memo. Copy the memo to a new object.");
5536
5537static PyObject *
5538ump_copy(UnpicklerMemoProxyObject *self)
5539{
5540 Py_ssize_t i;
5541 PyObject *new_memo = PyDict_New();
5542 if (new_memo == NULL)
5543 return NULL;
5544
5545 for (i = 0; i < self->unpickler->memo_size; i++) {
5546 int status;
5547 PyObject *key, *value;
5548
5549 value = self->unpickler->memo[i];
5550 if (value == NULL)
5551 continue;
5552
5553 key = PyLong_FromSsize_t(i);
5554 if (key == NULL)
5555 goto error;
5556 status = PyDict_SetItem(new_memo, key, value);
5557 Py_DECREF(key);
5558 if (status < 0)
5559 goto error;
5560 }
5561 return new_memo;
5562
5563error:
5564 Py_DECREF(new_memo);
5565 return NULL;
5566}
5567
5568PyDoc_STRVAR(ump_reduce_doc,
5569"memo.__reduce__(). Pickling support.");
5570
5571static PyObject *
5572ump_reduce(UnpicklerMemoProxyObject *self, PyObject *args)
5573{
5574 PyObject *reduce_value;
5575 PyObject *constructor_args;
5576 PyObject *contents = ump_copy(self);
5577 if (contents == NULL)
5578 return NULL;
5579
5580 reduce_value = PyTuple_New(2);
5581 if (reduce_value == NULL) {
5582 Py_DECREF(contents);
5583 return NULL;
5584 }
5585 constructor_args = PyTuple_New(1);
5586 if (constructor_args == NULL) {
5587 Py_DECREF(contents);
5588 Py_DECREF(reduce_value);
5589 return NULL;
5590 }
5591 PyTuple_SET_ITEM(constructor_args, 0, contents);
5592 Py_INCREF((PyObject *)&PyDict_Type);
5593 PyTuple_SET_ITEM(reduce_value, 0, (PyObject *)&PyDict_Type);
5594 PyTuple_SET_ITEM(reduce_value, 1, constructor_args);
5595 return reduce_value;
5596}
5597
5598static PyMethodDef unpicklerproxy_methods[] = {
5599 {"clear", (PyCFunction)ump_clear, METH_NOARGS, ump_clear_doc},
5600 {"copy", (PyCFunction)ump_copy, METH_NOARGS, ump_copy_doc},
5601 {"__reduce__", (PyCFunction)ump_reduce, METH_VARARGS, ump_reduce_doc},
5602 {NULL, NULL} /* sentinel */
5603};
5604
5605static void
5606UnpicklerMemoProxy_dealloc(UnpicklerMemoProxyObject *self)
5607{
5608 PyObject_GC_UnTrack(self);
5609 Py_XDECREF(self->unpickler);
5610 PyObject_GC_Del((PyObject *)self);
5611}
5612
5613static int
5614UnpicklerMemoProxy_traverse(UnpicklerMemoProxyObject *self,
5615 visitproc visit, void *arg)
5616{
5617 Py_VISIT(self->unpickler);
5618 return 0;
5619}
5620
5621static int
5622UnpicklerMemoProxy_clear(UnpicklerMemoProxyObject *self)
5623{
5624 Py_CLEAR(self->unpickler);
5625 return 0;
5626}
5627
5628static PyTypeObject UnpicklerMemoProxyType = {
5629 PyVarObject_HEAD_INIT(NULL, 0)
5630 "_pickle.UnpicklerMemoProxy", /*tp_name*/
5631 sizeof(UnpicklerMemoProxyObject), /*tp_basicsize*/
5632 0,
5633 (destructor)UnpicklerMemoProxy_dealloc, /* tp_dealloc */
5634 0, /* tp_print */
5635 0, /* tp_getattr */
5636 0, /* tp_setattr */
5637 0, /* tp_compare */
5638 0, /* tp_repr */
5639 0, /* tp_as_number */
5640 0, /* tp_as_sequence */
5641 0, /* tp_as_mapping */
5642 (hashfunc)PyObject_HashNotImplemented, /* tp_hash */
5643 0, /* tp_call */
5644 0, /* tp_str */
5645 PyObject_GenericGetAttr, /* tp_getattro */
5646 PyObject_GenericSetAttr, /* tp_setattro */
5647 0, /* tp_as_buffer */
5648 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5649 0, /* tp_doc */
5650 (traverseproc)UnpicklerMemoProxy_traverse, /* tp_traverse */
5651 (inquiry)UnpicklerMemoProxy_clear, /* tp_clear */
5652 0, /* tp_richcompare */
5653 0, /* tp_weaklistoffset */
5654 0, /* tp_iter */
5655 0, /* tp_iternext */
5656 unpicklerproxy_methods, /* tp_methods */
5657};
5658
5659static PyObject *
5660UnpicklerMemoProxy_New(UnpicklerObject *unpickler)
5661{
5662 UnpicklerMemoProxyObject *self;
5663
5664 self = PyObject_GC_New(UnpicklerMemoProxyObject,
5665 &UnpicklerMemoProxyType);
5666 if (self == NULL)
5667 return NULL;
5668 Py_INCREF(unpickler);
5669 self->unpickler = unpickler;
5670 PyObject_GC_Track(self);
5671 return (PyObject *)self;
5672}
5673
5674/*****************************************************************************/
5675
5676
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005677static PyObject *
5678Unpickler_get_memo(UnpicklerObject *self)
5679{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005680 return UnpicklerMemoProxy_New(self);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005681}
5682
5683static int
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005684Unpickler_set_memo(UnpicklerObject *self, PyObject *obj)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005685{
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005686 PyObject **new_memo;
5687 Py_ssize_t new_memo_size = 0;
5688 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005689
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005690 if (obj == NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005691 PyErr_SetString(PyExc_TypeError,
5692 "attribute deletion is not supported");
5693 return -1;
5694 }
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005695
5696 if (Py_TYPE(obj) == &UnpicklerMemoProxyType) {
5697 UnpicklerObject *unpickler =
5698 ((UnpicklerMemoProxyObject *)obj)->unpickler;
5699
5700 new_memo_size = unpickler->memo_size;
5701 new_memo = _Unpickler_NewMemo(new_memo_size);
5702 if (new_memo == NULL)
5703 return -1;
5704
5705 for (i = 0; i < new_memo_size; i++) {
5706 Py_XINCREF(unpickler->memo[i]);
5707 new_memo[i] = unpickler->memo[i];
5708 }
5709 }
5710 else if (PyDict_Check(obj)) {
5711 Py_ssize_t i = 0;
5712 PyObject *key, *value;
5713
5714 new_memo_size = PyDict_Size(obj);
5715 new_memo = _Unpickler_NewMemo(new_memo_size);
5716 if (new_memo == NULL)
5717 return -1;
5718
5719 while (PyDict_Next(obj, &i, &key, &value)) {
5720 Py_ssize_t idx;
5721 if (!PyLong_Check(key)) {
5722 PyErr_SetString(PyExc_TypeError,
5723 "memo key must be integers");
5724 goto error;
5725 }
5726 idx = PyLong_AsSsize_t(key);
5727 if (idx == -1 && PyErr_Occurred())
5728 goto error;
5729 if (_Unpickler_MemoPut(self, idx, value) < 0)
5730 goto error;
5731 }
5732 }
5733 else {
5734 PyErr_Format(PyExc_TypeError,
5735 "'memo' attribute must be an UnpicklerMemoProxy object"
5736 "or dict, not %.200s", Py_TYPE(obj)->tp_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005737 return -1;
5738 }
5739
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005740 _Unpickler_MemoCleanup(self);
5741 self->memo_size = new_memo_size;
5742 self->memo = new_memo;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005743
5744 return 0;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005745
5746 error:
5747 if (new_memo_size) {
5748 i = new_memo_size;
5749 while (--i >= 0) {
5750 Py_XDECREF(new_memo[i]);
5751 }
5752 PyMem_FREE(new_memo);
5753 }
5754 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005755}
5756
5757static PyObject *
5758Unpickler_get_persload(UnpicklerObject *self)
5759{
5760 if (self->pers_func == NULL)
5761 PyErr_SetString(PyExc_AttributeError, "persistent_load");
5762 else
5763 Py_INCREF(self->pers_func);
5764 return self->pers_func;
5765}
5766
5767static int
5768Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
5769{
5770 PyObject *tmp;
5771
5772 if (value == NULL) {
5773 PyErr_SetString(PyExc_TypeError,
5774 "attribute deletion is not supported");
5775 return -1;
5776 }
5777 if (!PyCallable_Check(value)) {
5778 PyErr_SetString(PyExc_TypeError,
5779 "persistent_load must be a callable taking "
5780 "one argument");
5781 return -1;
5782 }
5783
5784 tmp = self->pers_func;
5785 Py_INCREF(value);
5786 self->pers_func = value;
5787 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
5788
5789 return 0;
5790}
5791
5792static PyGetSetDef Unpickler_getsets[] = {
5793 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
5794 {"persistent_load", (getter)Unpickler_get_persload,
5795 (setter)Unpickler_set_persload},
5796 {NULL}
5797};
5798
5799static PyTypeObject Unpickler_Type = {
5800 PyVarObject_HEAD_INIT(NULL, 0)
5801 "_pickle.Unpickler", /*tp_name*/
5802 sizeof(UnpicklerObject), /*tp_basicsize*/
5803 0, /*tp_itemsize*/
5804 (destructor)Unpickler_dealloc, /*tp_dealloc*/
5805 0, /*tp_print*/
5806 0, /*tp_getattr*/
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005807 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00005808 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00005809 0, /*tp_repr*/
5810 0, /*tp_as_number*/
5811 0, /*tp_as_sequence*/
5812 0, /*tp_as_mapping*/
5813 0, /*tp_hash*/
5814 0, /*tp_call*/
5815 0, /*tp_str*/
5816 0, /*tp_getattro*/
5817 0, /*tp_setattro*/
5818 0, /*tp_as_buffer*/
5819 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
5820 Unpickler_doc, /*tp_doc*/
5821 (traverseproc)Unpickler_traverse, /*tp_traverse*/
5822 (inquiry)Unpickler_clear, /*tp_clear*/
5823 0, /*tp_richcompare*/
5824 0, /*tp_weaklistoffset*/
5825 0, /*tp_iter*/
5826 0, /*tp_iternext*/
5827 Unpickler_methods, /*tp_methods*/
5828 0, /*tp_members*/
5829 Unpickler_getsets, /*tp_getset*/
5830 0, /*tp_base*/
5831 0, /*tp_dict*/
5832 0, /*tp_descr_get*/
5833 0, /*tp_descr_set*/
5834 0, /*tp_dictoffset*/
5835 (initproc)Unpickler_init, /*tp_init*/
5836 PyType_GenericAlloc, /*tp_alloc*/
5837 PyType_GenericNew, /*tp_new*/
5838 PyObject_GC_Del, /*tp_free*/
5839 0, /*tp_is_gc*/
5840};
5841
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00005842PyDoc_STRVAR(pickle_dump_doc,
5843"dump(obj, file, protocol=None, *, fix_imports=True) -> None\n"
5844"\n"
5845"Write a pickled representation of obj to the open file object file. This\n"
5846"is equivalent to ``Pickler(file, protocol).dump(obj)``, but may be more\n"
5847"efficient.\n"
5848"\n"
5849"The optional protocol argument tells the pickler to use the given protocol;\n"
5850"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
5851"backward-incompatible protocol designed for Python 3.0.\n"
5852"\n"
5853"Specifying a negative protocol version selects the highest protocol version\n"
5854"supported. The higher the protocol used, the more recent the version of\n"
5855"Python needed to read the pickle produced.\n"
5856"\n"
5857"The file argument must have a write() method that accepts a single bytes\n"
5858"argument. It can thus be a file object opened for binary writing, a\n"
5859"io.BytesIO instance, or any other custom object that meets this interface.\n"
5860"\n"
5861"If fix_imports is True and protocol is less than 3, pickle will try to\n"
5862"map the new Python 3.x names to the old module names used in Python 2.x,\n"
5863"so that the pickle data stream is readable with Python 2.x.\n");
5864
5865static PyObject *
5866pickle_dump(PyObject *self, PyObject *args, PyObject *kwds)
5867{
5868 static char *kwlist[] = {"obj", "file", "protocol", "fix_imports", 0};
5869 PyObject *obj;
5870 PyObject *file;
5871 PyObject *proto = NULL;
5872 PyObject *fix_imports = Py_True;
5873 PicklerObject *pickler;
5874
5875 /* fix_imports is a keyword-only argument. */
5876 if (Py_SIZE(args) > 3) {
5877 PyErr_Format(PyExc_TypeError,
5878 "pickle.dump() takes at most 3 positional "
5879 "argument (%zd given)", Py_SIZE(args));
5880 return NULL;
5881 }
5882
5883 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|OO:dump", kwlist,
5884 &obj, &file, &proto, &fix_imports))
5885 return NULL;
5886
5887 pickler = _Pickler_New();
5888 if (pickler == NULL)
5889 return NULL;
5890
5891 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
5892 goto error;
5893
5894 if (_Pickler_SetOutputStream(pickler, file) < 0)
5895 goto error;
5896
5897 if (dump(pickler, obj) < 0)
5898 goto error;
5899
5900 if (_Pickler_FlushToFile(pickler) < 0)
5901 goto error;
5902
5903 Py_DECREF(pickler);
5904 Py_RETURN_NONE;
5905
5906 error:
5907 Py_XDECREF(pickler);
5908 return NULL;
5909}
5910
5911PyDoc_STRVAR(pickle_dumps_doc,
5912"dumps(obj, protocol=None, *, fix_imports=True) -> bytes\n"
5913"\n"
5914"Return the pickled representation of the object as a bytes\n"
5915"object, instead of writing it to a file.\n"
5916"\n"
5917"The optional protocol argument tells the pickler to use the given protocol;\n"
5918"supported protocols are 0, 1, 2, 3. The default protocol is 3; a\n"
5919"backward-incompatible protocol designed for Python 3.0.\n"
5920"\n"
5921"Specifying a negative protocol version selects the highest protocol version\n"
5922"supported. The higher the protocol used, the more recent the version of\n"
5923"Python needed to read the pickle produced.\n"
5924"\n"
5925"If fix_imports is True and *protocol* is less than 3, pickle will try to\n"
5926"map the new Python 3.x names to the old module names used in Python 2.x,\n"
5927"so that the pickle data stream is readable with Python 2.x.\n");
5928
5929static PyObject *
5930pickle_dumps(PyObject *self, PyObject *args, PyObject *kwds)
5931{
5932 static char *kwlist[] = {"obj", "protocol", "fix_imports", 0};
5933 PyObject *obj;
5934 PyObject *proto = NULL;
5935 PyObject *result;
5936 PyObject *fix_imports = Py_True;
5937 PicklerObject *pickler;
5938
5939 /* fix_imports is a keyword-only argument. */
5940 if (Py_SIZE(args) > 2) {
5941 PyErr_Format(PyExc_TypeError,
5942 "pickle.dumps() takes at most 2 positional "
5943 "argument (%zd given)", Py_SIZE(args));
5944 return NULL;
5945 }
5946
5947 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|OO:dumps", kwlist,
5948 &obj, &proto, &fix_imports))
5949 return NULL;
5950
5951 pickler = _Pickler_New();
5952 if (pickler == NULL)
5953 return NULL;
5954
5955 if (_Pickler_SetProtocol(pickler, proto, fix_imports) < 0)
5956 goto error;
5957
5958 if (dump(pickler, obj) < 0)
5959 goto error;
5960
5961 result = _Pickler_GetString(pickler);
5962 Py_DECREF(pickler);
5963 return result;
5964
5965 error:
5966 Py_XDECREF(pickler);
5967 return NULL;
5968}
5969
5970PyDoc_STRVAR(pickle_load_doc,
5971"load(file, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
5972"\n"
5973"Read a pickled object representation from the open file object file and\n"
5974"return the reconstituted object hierarchy specified therein. This is\n"
5975"equivalent to ``Unpickler(file).load()``, but may be more efficient.\n"
5976"\n"
5977"The protocol version of the pickle is detected automatically, so no protocol\n"
5978"argument is needed. Bytes past the pickled object's representation are\n"
5979"ignored.\n"
5980"\n"
5981"The argument file must have two methods, a read() method that takes an\n"
5982"integer argument, and a readline() method that requires no arguments. Both\n"
5983"methods should return bytes. Thus *file* can be a binary file object opened\n"
5984"for reading, a BytesIO object, or any other custom object that meets this\n"
5985"interface.\n"
5986"\n"
5987"Optional keyword arguments are fix_imports, encoding and errors,\n"
5988"which are used to control compatiblity support for pickle stream generated\n"
5989"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
5990"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
5991"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
5992"2.x; these default to 'ASCII' and 'strict', respectively.\n");
5993
5994static PyObject *
5995pickle_load(PyObject *self, PyObject *args, PyObject *kwds)
5996{
5997 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
5998 PyObject *file;
5999 PyObject *fix_imports = Py_True;
6000 PyObject *result;
6001 char *encoding = NULL;
6002 char *errors = NULL;
6003 UnpicklerObject *unpickler;
6004
6005 /* fix_imports, encoding and errors are a keyword-only argument. */
6006 if (Py_SIZE(args) != 1) {
6007 PyErr_Format(PyExc_TypeError,
6008 "pickle.load() takes exactly one positional "
6009 "argument (%zd given)", Py_SIZE(args));
6010 return NULL;
6011 }
6012
6013 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:load", kwlist,
6014 &file, &fix_imports, &encoding, &errors))
6015 return NULL;
6016
6017 unpickler = _Unpickler_New();
6018 if (unpickler == NULL)
6019 return NULL;
6020
6021 if (_Unpickler_SetInputStream(unpickler, file) < 0)
6022 goto error;
6023
6024 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6025 goto error;
6026
6027 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6028 if (unpickler->fix_imports == -1)
6029 goto error;
6030
6031 result = load(unpickler);
6032 Py_DECREF(unpickler);
6033 return result;
6034
6035 error:
6036 Py_XDECREF(unpickler);
6037 return NULL;
6038}
6039
6040PyDoc_STRVAR(pickle_loads_doc,
6041"loads(input, *, fix_imports=True, encoding='ASCII', errors='strict') -> object\n"
6042"\n"
6043"Read a pickled object hierarchy from a bytes object and return the\n"
6044"reconstituted object hierarchy specified therein\n"
6045"\n"
6046"The protocol version of the pickle is detected automatically, so no protocol\n"
6047"argument is needed. Bytes past the pickled object's representation are\n"
6048"ignored.\n"
6049"\n"
6050"Optional keyword arguments are fix_imports, encoding and errors, which\n"
6051"are used to control compatiblity support for pickle stream generated\n"
6052"by Python 2.x. If fix_imports is True, pickle will try to map the old\n"
6053"Python 2.x names to the new names used in Python 3.x. The encoding and\n"
6054"errors tell pickle how to decode 8-bit string instances pickled by Python\n"
6055"2.x; these default to 'ASCII' and 'strict', respectively.\n");
6056
6057static PyObject *
6058pickle_loads(PyObject *self, PyObject *args, PyObject *kwds)
6059{
6060 static char *kwlist[] = {"input", "fix_imports", "encoding", "errors", 0};
6061 PyObject *input;
6062 PyObject *fix_imports = Py_True;
6063 PyObject *result;
6064 char *encoding = NULL;
6065 char *errors = NULL;
6066 UnpicklerObject *unpickler;
6067
6068 /* fix_imports, encoding and errors are a keyword-only argument. */
6069 if (Py_SIZE(args) != 1) {
6070 PyErr_Format(PyExc_TypeError,
6071 "pickle.loads() takes exactly one positional "
6072 "argument (%zd given)", Py_SIZE(args));
6073 return NULL;
6074 }
6075
6076 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oss:loads", kwlist,
6077 &input, &fix_imports, &encoding, &errors))
6078 return NULL;
6079
6080 unpickler = _Unpickler_New();
6081 if (unpickler == NULL)
6082 return NULL;
6083
6084 if (_Unpickler_SetStringInput(unpickler, input) < 0)
6085 goto error;
6086
6087 if (_Unpickler_SetInputEncoding(unpickler, encoding, errors) < 0)
6088 goto error;
6089
6090 unpickler->fix_imports = PyObject_IsTrue(fix_imports);
6091 if (unpickler->fix_imports == -1)
6092 goto error;
6093
6094 result = load(unpickler);
6095 Py_DECREF(unpickler);
6096 return result;
6097
6098 error:
6099 Py_XDECREF(unpickler);
6100 return NULL;
6101}
6102
6103
6104static struct PyMethodDef pickle_methods[] = {
6105 {"dump", (PyCFunction)pickle_dump, METH_VARARGS|METH_KEYWORDS,
6106 pickle_dump_doc},
6107 {"dumps", (PyCFunction)pickle_dumps, METH_VARARGS|METH_KEYWORDS,
6108 pickle_dumps_doc},
6109 {"load", (PyCFunction)pickle_load, METH_VARARGS|METH_KEYWORDS,
6110 pickle_load_doc},
6111 {"loads", (PyCFunction)pickle_loads, METH_VARARGS|METH_KEYWORDS,
6112 pickle_loads_doc},
6113 {NULL, NULL} /* sentinel */
6114};
6115
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006116static int
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006117initmodule(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006118{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006119 PyObject *copyreg = NULL;
6120 PyObject *compat_pickle = NULL;
6121
6122 /* XXX: We should ensure that the types of the dictionaries imported are
6123 exactly PyDict objects. Otherwise, it is possible to crash the pickle
6124 since we use the PyDict API directly to access these dictionaries. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006125
6126 copyreg = PyImport_ImportModule("copyreg");
6127 if (!copyreg)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006128 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006129 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
6130 if (!dispatch_table)
6131 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006132 extension_registry = \
6133 PyObject_GetAttrString(copyreg, "_extension_registry");
6134 if (!extension_registry)
6135 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006136 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
6137 if (!inverted_registry)
6138 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006139 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
6140 if (!extension_cache)
6141 goto error;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006142 Py_CLEAR(copyreg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006143
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006144 /* Load the 2.x -> 3.x stdlib module mapping tables */
6145 compat_pickle = PyImport_ImportModule("_compat_pickle");
6146 if (!compat_pickle)
6147 goto error;
6148 name_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
6149 if (!name_mapping_2to3)
6150 goto error;
6151 if (!PyDict_CheckExact(name_mapping_2to3)) {
6152 PyErr_Format(PyExc_RuntimeError,
6153 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
6154 Py_TYPE(name_mapping_2to3)->tp_name);
6155 goto error;
6156 }
6157 import_mapping_2to3 = PyObject_GetAttrString(compat_pickle,
6158 "IMPORT_MAPPING");
6159 if (!import_mapping_2to3)
6160 goto error;
6161 if (!PyDict_CheckExact(import_mapping_2to3)) {
6162 PyErr_Format(PyExc_RuntimeError,
6163 "_compat_pickle.IMPORT_MAPPING should be a dict, "
6164 "not %.200s", Py_TYPE(import_mapping_2to3)->tp_name);
6165 goto error;
6166 }
6167 /* ... and the 3.x -> 2.x mapping tables */
6168 name_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6169 "REVERSE_NAME_MAPPING");
6170 if (!name_mapping_3to2)
6171 goto error;
6172 if (!PyDict_CheckExact(name_mapping_3to2)) {
6173 PyErr_Format(PyExc_RuntimeError,
6174 "_compat_pickle.REVERSE_NAME_MAPPING shouldbe a dict, "
6175 "not %.200s", Py_TYPE(name_mapping_3to2)->tp_name);
6176 goto error;
6177 }
6178 import_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
6179 "REVERSE_IMPORT_MAPPING");
6180 if (!import_mapping_3to2)
6181 goto error;
6182 if (!PyDict_CheckExact(import_mapping_3to2)) {
6183 PyErr_Format(PyExc_RuntimeError,
6184 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
6185 "not %.200s", Py_TYPE(import_mapping_3to2)->tp_name);
6186 goto error;
6187 }
6188 Py_CLEAR(compat_pickle);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006189
6190 empty_tuple = PyTuple_New(0);
6191 if (empty_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006192 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006193 two_tuple = PyTuple_New(2);
6194 if (two_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006195 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006196 /* We use this temp container with no regard to refcounts, or to
6197 * keeping containees alive. Exempt from GC, because we don't
6198 * want anything looking at two_tuple() by magic.
6199 */
6200 PyObject_GC_UnTrack(two_tuple);
6201
6202 return 0;
6203
6204 error:
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006205 Py_CLEAR(copyreg);
6206 Py_CLEAR(dispatch_table);
6207 Py_CLEAR(extension_registry);
6208 Py_CLEAR(inverted_registry);
6209 Py_CLEAR(extension_cache);
6210 Py_CLEAR(compat_pickle);
6211 Py_CLEAR(name_mapping_2to3);
6212 Py_CLEAR(import_mapping_2to3);
6213 Py_CLEAR(name_mapping_3to2);
6214 Py_CLEAR(import_mapping_3to2);
6215 Py_CLEAR(empty_tuple);
6216 Py_CLEAR(two_tuple);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006217 return -1;
6218}
6219
6220static struct PyModuleDef _picklemodule = {
6221 PyModuleDef_HEAD_INIT,
6222 "_pickle",
6223 pickle_module_doc,
6224 -1,
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006225 pickle_methods,
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006226 NULL,
6227 NULL,
6228 NULL,
6229 NULL
6230};
6231
6232PyMODINIT_FUNC
6233PyInit__pickle(void)
6234{
6235 PyObject *m;
6236
6237 if (PyType_Ready(&Unpickler_Type) < 0)
6238 return NULL;
6239 if (PyType_Ready(&Pickler_Type) < 0)
6240 return NULL;
6241 if (PyType_Ready(&Pdata_Type) < 0)
6242 return NULL;
Antoine Pitrouea99c5c2010-09-09 18:33:21 +00006243 if (PyType_Ready(&PicklerMemoProxyType) < 0)
6244 return NULL;
6245 if (PyType_Ready(&UnpicklerMemoProxyType) < 0)
6246 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006247
6248 /* Create the module and add the functions. */
6249 m = PyModule_Create(&_picklemodule);
6250 if (m == NULL)
6251 return NULL;
6252
6253 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
6254 return NULL;
6255 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
6256 return NULL;
6257
6258 /* Initialize the exceptions. */
6259 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
6260 if (PickleError == NULL)
6261 return NULL;
6262 PicklingError = \
6263 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
6264 if (PicklingError == NULL)
6265 return NULL;
6266 UnpicklingError = \
6267 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
6268 if (UnpicklingError == NULL)
6269 return NULL;
6270
6271 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
6272 return NULL;
6273 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
6274 return NULL;
6275 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
6276 return NULL;
6277
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00006278 if (initmodule() < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00006279 return NULL;
6280
6281 return m;
6282}