blob: 5102e2491d2dcbbfc8802b1d268598a73b298d03 [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
13
14/* Pickle opcodes. These must be kept updated with pickle.py.
15 Extensive docs are in pickletools.py. */
16enum opcode {
17 MARK = '(',
18 STOP = '.',
19 POP = '0',
20 POP_MARK = '1',
21 DUP = '2',
22 FLOAT = 'F',
23 INT = 'I',
24 BININT = 'J',
25 BININT1 = 'K',
26 LONG = 'L',
27 BININT2 = 'M',
28 NONE = 'N',
29 PERSID = 'P',
30 BINPERSID = 'Q',
31 REDUCE = 'R',
32 STRING = 'S',
33 BINSTRING = 'T',
34 SHORT_BINSTRING = 'U',
35 UNICODE = 'V',
36 BINUNICODE = 'X',
37 APPEND = 'a',
38 BUILD = 'b',
39 GLOBAL = 'c',
40 DICT = 'd',
41 EMPTY_DICT = '}',
42 APPENDS = 'e',
43 GET = 'g',
44 BINGET = 'h',
45 INST = 'i',
46 LONG_BINGET = 'j',
47 LIST = 'l',
48 EMPTY_LIST = ']',
49 OBJ = 'o',
50 PUT = 'p',
51 BINPUT = 'q',
52 LONG_BINPUT = 'r',
53 SETITEM = 's',
54 TUPLE = 't',
55 EMPTY_TUPLE = ')',
56 SETITEMS = 'u',
57 BINFLOAT = 'G',
58
59 /* Protocol 2. */
60 PROTO = '\x80',
61 NEWOBJ = '\x81',
62 EXT1 = '\x82',
63 EXT2 = '\x83',
64 EXT4 = '\x84',
65 TUPLE1 = '\x85',
66 TUPLE2 = '\x86',
67 TUPLE3 = '\x87',
68 NEWTRUE = '\x88',
69 NEWFALSE = '\x89',
70 LONG1 = '\x8a',
71 LONG4 = '\x8b',
72
73 /* Protocol 3 (Python 3.x) */
74 BINBYTES = 'B',
75 SHORT_BINBYTES = 'C',
76};
77
78/* These aren't opcodes -- they're ways to pickle bools before protocol 2
79 * so that unpicklers written before bools were introduced unpickle them
80 * as ints, but unpicklers after can recognize that bools were intended.
81 * Note that protocol 2 added direct ways to pickle bools.
82 */
83#undef TRUE
84#define TRUE "I01\n"
85#undef FALSE
86#define FALSE "I00\n"
87
88enum {
89 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
90 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
91 break if this gets out of synch with pickle.py, but it's unclear that would
92 help anything either. */
93 BATCHSIZE = 1000,
94
95 /* Nesting limit until Pickler, when running in "fast mode", starts
96 checking for self-referential data-structures. */
97 FAST_NESTING_LIMIT = 50,
98
99 /* Size of the write buffer of Pickler. Higher values will reduce the
100 number of calls to the write() method of the output stream. */
101 WRITE_BUF_SIZE = 256,
102};
103
104/* Exception classes for pickle. These should override the ones defined in
105 pickle.py, when the C-optimized Pickler and Unpickler are used. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000106static PyObject *PickleError = NULL;
107static PyObject *PicklingError = NULL;
108static PyObject *UnpicklingError = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000109
110/* copyreg.dispatch_table, {type_object: pickling_function} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000111static PyObject *dispatch_table = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000112/* For EXT[124] opcodes. */
113/* copyreg._extension_registry, {(module_name, function_name): code} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000114static PyObject *extension_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000115/* copyreg._inverted_registry, {code: (module_name, function_name)} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000116static PyObject *inverted_registry = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000117/* copyreg._extension_cache, {code: object} */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000118static PyObject *extension_cache = NULL;
119
120/* _compat_pickle.NAME_MAPPING, {(oldmodule, oldname): (newmodule, newname)} */
121static PyObject *name_mapping_2to3 = NULL;
122/* _compat_pickle.IMPORT_MAPPING, {oldmodule: newmodule} */
123static PyObject *import_mapping_2to3 = NULL;
124/* Same, but with REVERSE_NAME_MAPPING / REVERSE_IMPORT_MAPPING */
125static PyObject *name_mapping_3to2 = NULL;
126static PyObject *import_mapping_3to2 = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000127
128/* XXX: Are these really nescessary? */
129/* As the name says, an empty tuple. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000130static PyObject *empty_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000131/* For looking up name pairs in copyreg._extension_registry. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000132static PyObject *two_tuple = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000133
134static int
135stack_underflow(void)
136{
137 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
138 return -1;
139}
140
141/* Internal data type used as the unpickling stack. */
142typedef struct {
143 PyObject_HEAD
144 int length; /* number of initial slots in data currently used */
145 int size; /* number of slots in data allocated */
146 PyObject **data;
147} Pdata;
148
149static void
150Pdata_dealloc(Pdata *self)
151{
152 int i;
153 PyObject **p;
154
155 for (i = self->length, p = self->data; --i >= 0; p++) {
156 Py_DECREF(*p);
157 }
158 if (self->data)
159 PyMem_Free(self->data);
160 PyObject_Del(self);
161}
162
163static PyTypeObject Pdata_Type = {
164 PyVarObject_HEAD_INIT(NULL, 0)
165 "_pickle.Pdata", /*tp_name*/
166 sizeof(Pdata), /*tp_basicsize*/
167 0, /*tp_itemsize*/
168 (destructor)Pdata_dealloc, /*tp_dealloc*/
169};
170
171static PyObject *
172Pdata_New(void)
173{
174 Pdata *self;
175
176 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
177 return NULL;
178 self->size = 8;
179 self->length = 0;
180 self->data = PyMem_Malloc(self->size * sizeof(PyObject *));
181 if (self->data)
182 return (PyObject *)self;
183 Py_DECREF(self);
184 return PyErr_NoMemory();
185}
186
187
188/* Retain only the initial clearto items. If clearto >= the current
189 * number of items, this is a (non-erroneous) NOP.
190 */
191static int
192Pdata_clear(Pdata *self, int clearto)
193{
194 int i;
195 PyObject **p;
196
197 if (clearto < 0)
198 return stack_underflow();
199 if (clearto >= self->length)
200 return 0;
201
202 for (i = self->length, p = self->data + clearto; --i >= clearto; p++) {
203 Py_CLEAR(*p);
204 }
205 self->length = clearto;
206
207 return 0;
208}
209
210static int
211Pdata_grow(Pdata *self)
212{
213 int bigger;
214 size_t nbytes;
215 PyObject **tmp;
216
217 bigger = (self->size << 1) + 1;
218 if (bigger <= 0) /* was 0, or new value overflows */
219 goto nomemory;
220 if ((int)(size_t)bigger != bigger)
221 goto nomemory;
222 nbytes = (size_t)bigger * sizeof(PyObject *);
223 if (nbytes / sizeof(PyObject *) != (size_t)bigger)
224 goto nomemory;
225 tmp = PyMem_Realloc(self->data, nbytes);
226 if (tmp == NULL)
227 goto nomemory;
228 self->data = tmp;
229 self->size = bigger;
230 return 0;
231
232 nomemory:
233 PyErr_NoMemory();
234 return -1;
235}
236
237/* D is a Pdata*. Pop the topmost element and store it into V, which
238 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
239 * is raised and V is set to NULL.
240 */
241static PyObject *
242Pdata_pop(Pdata *self)
243{
244 if (self->length == 0) {
245 PyErr_SetString(UnpicklingError, "bad pickle data");
246 return NULL;
247 }
248 return self->data[--(self->length)];
249}
250#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
251
252static int
253Pdata_push(Pdata *self, PyObject *obj)
254{
255 if (self->length == self->size && Pdata_grow(self) < 0) {
256 return -1;
257 }
258 self->data[self->length++] = obj;
259 return 0;
260}
261
262/* Push an object on stack, transferring its ownership to the stack. */
263#define PDATA_PUSH(D, O, ER) do { \
264 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
265
266/* Push an object on stack, adding a new reference to the object. */
267#define PDATA_APPEND(D, O, ER) do { \
268 Py_INCREF((O)); \
269 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
270
271static PyObject *
272Pdata_poptuple(Pdata *self, Py_ssize_t start)
273{
274 PyObject *tuple;
275 Py_ssize_t len, i, j;
276
277 len = self->length - start;
278 tuple = PyTuple_New(len);
279 if (tuple == NULL)
280 return NULL;
281 for (i = start, j = 0; j < len; i++, j++)
282 PyTuple_SET_ITEM(tuple, j, self->data[i]);
283
284 self->length = start;
285 return tuple;
286}
287
288static PyObject *
289Pdata_poplist(Pdata *self, Py_ssize_t start)
290{
291 PyObject *list;
292 Py_ssize_t len, i, j;
293
294 len = self->length - start;
295 list = PyList_New(len);
296 if (list == NULL)
297 return NULL;
298 for (i = start, j = 0; j < len; i++, j++)
299 PyList_SET_ITEM(list, j, self->data[i]);
300
301 self->length = start;
302 return list;
303}
304
305typedef struct PicklerObject {
306 PyObject_HEAD
307 PyObject *write; /* write() method of the output stream */
308 PyObject *memo; /* Memo dictionary, keep track of the seen
309 objects to support self-referential objects
310 pickling. */
311 PyObject *pers_func; /* persistent_id() method, can be NULL */
312 PyObject *arg;
313 int proto; /* Pickle protocol number, >= 0 */
314 int bin; /* Boolean, true if proto > 0 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000315 int buf_size; /* Size of the current buffered pickle data */
316 char *write_buf; /* Write buffer, this is to avoid calling the
317 write() method of the output stream too
318 often. */
319 int fast; /* Enable fast mode if set to a true value.
320 The fast mode disable the usage of memo,
321 therefore speeding the pickling process by
322 not generating superfluous PUT opcodes. It
323 should not be used if with self-referential
324 objects. */
325 int fast_nesting;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000326 int fix_imports; /* Indicate whether Pickler should fix
327 the name of globals for Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000328 PyObject *fast_memo;
329} PicklerObject;
330
331typedef struct UnpicklerObject {
332 PyObject_HEAD
333 Pdata *stack; /* Pickle data stack, store unpickled objects. */
334 PyObject *readline; /* readline() method of the output stream */
335 PyObject *read; /* read() method of the output stream */
336 PyObject *memo; /* Memo dictionary, provide the objects stored
337 using the PUT opcodes. */
338 PyObject *arg;
339 PyObject *pers_func; /* persistent_load() method, can be NULL. */
340 PyObject *last_string; /* Reference to the last string read by the
341 readline() method. */
342 char *buffer; /* Reading buffer. */
343 char *encoding; /* Name of the encoding to be used for
344 decoding strings pickled using Python
345 2.x. The default value is "ASCII" */
346 char *errors; /* Name of errors handling scheme to used when
347 decoding strings. The default value is
348 "strict". */
349 int *marks; /* Mark stack, used for unpickling container
350 objects. */
351 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
352 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +0000353 int proto; /* Protocol of the pickle loaded. */
354 int fix_imports; /* Indicate whether Unpickler should fix
355 the name of globals pickled by Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000356} UnpicklerObject;
357
358/* Forward declarations */
359static int save(PicklerObject *, PyObject *, int);
360static int save_reduce(PicklerObject *, PyObject *, PyObject *);
361static PyTypeObject Pickler_Type;
362static PyTypeObject Unpickler_Type;
363
364
365/* Helpers for creating the argument tuple passed to functions. This has the
366 performance advantage of calling PyTuple_New() only once. */
367
368#define ARG_TUP(self, obj) do { \
369 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
370 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
371 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
372 } \
373 else { \
374 Py_DECREF((obj)); \
375 } \
376 } while (0)
377
378#define FREE_ARG_TUP(self) do { \
379 if ((self)->arg->ob_refcnt > 1) \
380 Py_CLEAR((self)->arg); \
381 } while (0)
382
383/* A temporary cleaner API for fast single argument function call.
384
385 XXX: Does caching the argument tuple provides any real performance benefits?
386
387 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
388 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
389 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
390 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
391 (i.e, call PyTuple_New() and store the returned value in an array), to save
392 one second (wall clock time). Either ways, the loading time a pickle stream
393 large enough to generate this number of calls would be massively
394 overwhelmed by other factors, like I/O throughput, the GC traversal and
395 object allocation overhead. So, I really doubt these functions provide any
396 real benefits.
397
398 On the other hand, oprofile reports that pickle spends a lot of time in
399 these functions. But, that is probably more related to the function call
400 overhead, than the argument tuple allocation.
401
402 XXX: And, what is the reference behavior of these? Steal, borrow? At first
403 glance, it seems to steal the reference of 'arg' and borrow the reference
404 of 'func'.
405 */
406static PyObject *
407pickler_call(PicklerObject *self, PyObject *func, PyObject *arg)
408{
409 PyObject *result = NULL;
410
411 ARG_TUP(self, arg);
412 if (self->arg) {
413 result = PyObject_Call(func, self->arg, NULL);
414 FREE_ARG_TUP(self);
415 }
416 return result;
417}
418
419static PyObject *
420unpickler_call(UnpicklerObject *self, PyObject *func, PyObject *arg)
421{
422 PyObject *result = NULL;
423
424 ARG_TUP(self, arg);
425 if (self->arg) {
426 result = PyObject_Call(func, self->arg, NULL);
427 FREE_ARG_TUP(self);
428 }
429 return result;
430}
431
432static Py_ssize_t
433pickler_write(PicklerObject *self, const char *s, Py_ssize_t n)
434{
435 PyObject *data, *result;
436
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000437 if (self->write_buf == NULL) {
438 PyErr_SetString(PyExc_SystemError, "invalid write buffer");
439 return -1;
440 }
441
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000442 if (s == NULL) {
443 if (!(self->buf_size))
444 return 0;
445 data = PyBytes_FromStringAndSize(self->write_buf, self->buf_size);
446 if (data == NULL)
447 return -1;
448 }
449 else {
450 if (self->buf_size && (n + self->buf_size) > WRITE_BUF_SIZE) {
451 if (pickler_write(self, NULL, 0) < 0)
452 return -1;
453 }
454
455 if (n > WRITE_BUF_SIZE) {
456 if (!(data = PyBytes_FromStringAndSize(s, n)))
457 return -1;
458 }
459 else {
460 memcpy(self->write_buf + self->buf_size, s, n);
461 self->buf_size += n;
462 return n;
463 }
464 }
465
466 /* object with write method */
467 result = pickler_call(self, self->write, data);
468 if (result == NULL)
469 return -1;
470
471 Py_DECREF(result);
472 self->buf_size = 0;
473 return n;
474}
475
476/* XXX: These read/readline functions ought to be optimized. Buffered I/O
477 might help a lot, especially with the new (but much slower) io library.
478 On the other hand, the added complexity might not worth it.
479 */
480
481/* Read at least n characters from the input stream and set s to the current
482 reading position. */
483static Py_ssize_t
484unpickler_read(UnpicklerObject *self, char **s, Py_ssize_t n)
485{
486 PyObject *len;
487 PyObject *data;
488
489 len = PyLong_FromSsize_t(n);
490 if (len == NULL)
491 return -1;
492
493 data = unpickler_call(self, self->read, len);
494 if (data == NULL)
495 return -1;
496
497 /* XXX: Should bytearray be supported too? */
498 if (!PyBytes_Check(data)) {
499 PyErr_SetString(PyExc_ValueError,
Antoine Pitrouec804782010-09-04 18:46:56 +0000500 "read() from the underlying stream did not "
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000501 "return bytes");
Amaury Forgeot d'Arc5f952572008-11-25 21:11:54 +0000502 Py_DECREF(data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000503 return -1;
504 }
505
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000506 if (PyBytes_GET_SIZE(data) != n) {
507 PyErr_SetNone(PyExc_EOFError);
Amaury Forgeot d'Arc5f952572008-11-25 21:11:54 +0000508 Py_DECREF(data);
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000509 return -1;
510 }
511
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000512 Py_XDECREF(self->last_string);
513 self->last_string = data;
514
515 if (!(*s = PyBytes_AS_STRING(data)))
516 return -1;
517
518 return n;
519}
520
521static Py_ssize_t
522unpickler_readline(UnpicklerObject *self, char **s)
523{
524 PyObject *data;
525
526 data = PyObject_CallObject(self->readline, empty_tuple);
527 if (data == NULL)
528 return -1;
529
530 /* XXX: Should bytearray be supported too? */
531 if (!PyBytes_Check(data)) {
532 PyErr_SetString(PyExc_ValueError,
Antoine Pitrouec804782010-09-04 18:46:56 +0000533 "readline() from the underlying stream did not "
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000534 "return bytes");
535 return -1;
536 }
537
538 Py_XDECREF(self->last_string);
539 self->last_string = data;
540
541 if (!(*s = PyBytes_AS_STRING(data)))
542 return -1;
543
544 return PyBytes_GET_SIZE(data);
545}
546
547/* Generate a GET opcode for an object stored in the memo. The 'key' argument
548 should be the address of the object as returned by PyLong_FromVoidPtr(). */
549static int
550memo_get(PicklerObject *self, PyObject *key)
551{
552 PyObject *value;
553 PyObject *memo_id;
554 long x;
555 char pdata[30];
556 int len;
557
558 value = PyDict_GetItemWithError(self->memo, key);
559 if (value == NULL) {
560 if (!PyErr_Occurred())
561 PyErr_SetObject(PyExc_KeyError, key);
562 return -1;
563 }
564
565 memo_id = PyTuple_GetItem(value, 0);
566 if (memo_id == NULL)
567 return -1;
568
569 if (!PyLong_Check(memo_id)) {
570 PyErr_SetString(PicklingError, "memo id must be an integer");
571 return -1;
572 }
573 x = PyLong_AsLong(memo_id);
574 if (x == -1 && PyErr_Occurred())
575 return -1;
576
577 if (!self->bin) {
578 pdata[0] = GET;
579 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
580 len = (int)strlen(pdata);
581 }
582 else {
583 if (x < 256) {
584 pdata[0] = BINGET;
585 pdata[1] = (unsigned char)(x & 0xff);
586 len = 2;
587 }
588 else if (x <= 0xffffffffL) {
589 pdata[0] = LONG_BINGET;
590 pdata[1] = (unsigned char)(x & 0xff);
591 pdata[2] = (unsigned char)((x >> 8) & 0xff);
592 pdata[3] = (unsigned char)((x >> 16) & 0xff);
593 pdata[4] = (unsigned char)((x >> 24) & 0xff);
594 len = 5;
595 }
596 else { /* unlikely */
597 PyErr_SetString(PicklingError,
598 "memo id too large for LONG_BINGET");
599 return -1;
600 }
601 }
602
603 if (pickler_write(self, pdata, len) < 0)
604 return -1;
605
606 return 0;
607}
608
609/* Store an object in the memo, assign it a new unique ID based on the number
610 of objects currently stored in the memo and generate a PUT opcode. */
611static int
612memo_put(PicklerObject *self, PyObject *obj)
613{
614 PyObject *key = NULL;
615 PyObject *memo_id = NULL;
616 PyObject *tuple = NULL;
617 long x;
618 char pdata[30];
619 int len;
620 int status = 0;
621
622 if (self->fast)
623 return 0;
624
625 key = PyLong_FromVoidPtr(obj);
626 if (key == NULL)
627 goto error;
628 if ((x = PyDict_Size(self->memo)) < 0)
629 goto error;
630 memo_id = PyLong_FromLong(x);
631 if (memo_id == NULL)
632 goto error;
633 tuple = PyTuple_New(2);
634 if (tuple == NULL)
635 goto error;
636
637 Py_INCREF(memo_id);
638 PyTuple_SET_ITEM(tuple, 0, memo_id);
639 Py_INCREF(obj);
640 PyTuple_SET_ITEM(tuple, 1, obj);
641 if (PyDict_SetItem(self->memo, key, tuple) < 0)
642 goto error;
643
644 if (!self->bin) {
645 pdata[0] = PUT;
646 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
647 len = strlen(pdata);
648 }
649 else {
650 if (x < 256) {
651 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +0000652 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000653 len = 2;
654 }
655 else if (x <= 0xffffffffL) {
656 pdata[0] = LONG_BINPUT;
657 pdata[1] = (unsigned char)(x & 0xff);
658 pdata[2] = (unsigned char)((x >> 8) & 0xff);
659 pdata[3] = (unsigned char)((x >> 16) & 0xff);
660 pdata[4] = (unsigned char)((x >> 24) & 0xff);
661 len = 5;
662 }
663 else { /* unlikely */
664 PyErr_SetString(PicklingError,
665 "memo id too large for LONG_BINPUT");
666 return -1;
667 }
668 }
669
670 if (pickler_write(self, pdata, len) < 0)
671 goto error;
672
673 if (0) {
674 error:
675 status = -1;
676 }
677
678 Py_XDECREF(key);
679 Py_XDECREF(memo_id);
680 Py_XDECREF(tuple);
681
682 return status;
683}
684
685static PyObject *
686whichmodule(PyObject *global, PyObject *global_name)
687{
688 Py_ssize_t i, j;
689 static PyObject *module_str = NULL;
690 static PyObject *main_str = NULL;
691 PyObject *module_name;
692 PyObject *modules_dict;
693 PyObject *module;
694 PyObject *obj;
695
696 if (module_str == NULL) {
697 module_str = PyUnicode_InternFromString("__module__");
698 if (module_str == NULL)
699 return NULL;
700 main_str = PyUnicode_InternFromString("__main__");
701 if (main_str == NULL)
702 return NULL;
703 }
704
705 module_name = PyObject_GetAttr(global, module_str);
706
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +0000707 /* In some rare cases (e.g., bound methods of extension types),
708 __module__ can be None. If it is so, then search sys.modules
709 for the module of global. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000710 if (module_name == Py_None) {
711 Py_DECREF(module_name);
712 goto search;
713 }
714
715 if (module_name) {
716 return module_name;
717 }
718 if (PyErr_ExceptionMatches(PyExc_AttributeError))
719 PyErr_Clear();
720 else
721 return NULL;
722
723 search:
724 modules_dict = PySys_GetObject("modules");
725 if (modules_dict == NULL)
726 return NULL;
727
728 i = 0;
729 module_name = NULL;
730 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
Mark Dickinson211c6252009-02-01 10:28:51 +0000731 if (PyObject_RichCompareBool(module_name, main_str, Py_EQ) == 1)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000732 continue;
733
734 obj = PyObject_GetAttr(module, global_name);
735 if (obj == NULL) {
736 if (PyErr_ExceptionMatches(PyExc_AttributeError))
737 PyErr_Clear();
738 else
739 return NULL;
740 continue;
741 }
742
743 if (obj != global) {
744 Py_DECREF(obj);
745 continue;
746 }
747
748 Py_DECREF(obj);
749 break;
750 }
751
752 /* If no module is found, use __main__. */
753 if (!j) {
754 module_name = main_str;
755 }
756
757 Py_INCREF(module_name);
758 return module_name;
759}
760
761/* fast_save_enter() and fast_save_leave() are guards against recursive
762 objects when Pickler is used with the "fast mode" (i.e., with object
763 memoization disabled). If the nesting of a list or dict object exceed
764 FAST_NESTING_LIMIT, these guards will start keeping an internal
765 reference to the seen list or dict objects and check whether these objects
766 are recursive. These are not strictly necessary, since save() has a
767 hard-coded recursion limit, but they give a nicer error message than the
768 typical RuntimeError. */
769static int
770fast_save_enter(PicklerObject *self, PyObject *obj)
771{
772 /* if fast_nesting < 0, we're doing an error exit. */
773 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
774 PyObject *key = NULL;
775 if (self->fast_memo == NULL) {
776 self->fast_memo = PyDict_New();
777 if (self->fast_memo == NULL) {
778 self->fast_nesting = -1;
779 return 0;
780 }
781 }
782 key = PyLong_FromVoidPtr(obj);
783 if (key == NULL)
784 return 0;
785 if (PyDict_GetItem(self->fast_memo, key)) {
786 Py_DECREF(key);
787 PyErr_Format(PyExc_ValueError,
788 "fast mode: can't pickle cyclic objects "
789 "including object type %.200s at %p",
790 obj->ob_type->tp_name, obj);
791 self->fast_nesting = -1;
792 return 0;
793 }
794 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
795 Py_DECREF(key);
796 self->fast_nesting = -1;
797 return 0;
798 }
799 Py_DECREF(key);
800 }
801 return 1;
802}
803
804static int
805fast_save_leave(PicklerObject *self, PyObject *obj)
806{
807 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
808 PyObject *key = PyLong_FromVoidPtr(obj);
809 if (key == NULL)
810 return 0;
811 if (PyDict_DelItem(self->fast_memo, key) < 0) {
812 Py_DECREF(key);
813 return 0;
814 }
815 Py_DECREF(key);
816 }
817 return 1;
818}
819
820static int
821save_none(PicklerObject *self, PyObject *obj)
822{
823 const char none_op = NONE;
824 if (pickler_write(self, &none_op, 1) < 0)
825 return -1;
826
827 return 0;
828}
829
830static int
831save_bool(PicklerObject *self, PyObject *obj)
832{
833 static const char *buf[2] = { FALSE, TRUE };
834 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
835 int p = (obj == Py_True);
836
837 if (self->proto >= 2) {
838 const char bool_op = p ? NEWTRUE : NEWFALSE;
839 if (pickler_write(self, &bool_op, 1) < 0)
840 return -1;
841 }
842 else if (pickler_write(self, buf[p], len[p]) < 0)
843 return -1;
844
845 return 0;
846}
847
848static int
849save_int(PicklerObject *self, long x)
850{
851 char pdata[32];
852 int len = 0;
853
854 if (!self->bin
855#if SIZEOF_LONG > 4
856 || x > 0x7fffffffL || x < -0x80000000L
857#endif
858 ) {
859 /* Text-mode pickle, or long too big to fit in the 4-byte
860 * signed BININT format: store as a string.
861 */
Mark Dickinson8dd05142009-01-20 20:43:58 +0000862 pdata[0] = LONG; /* use LONG for consistency with pickle.py */
863 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ldL\n", x);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000864 if (pickler_write(self, pdata, strlen(pdata)) < 0)
865 return -1;
866 }
867 else {
868 /* Binary pickle and x fits in a signed 4-byte int. */
869 pdata[1] = (unsigned char)(x & 0xff);
870 pdata[2] = (unsigned char)((x >> 8) & 0xff);
871 pdata[3] = (unsigned char)((x >> 16) & 0xff);
872 pdata[4] = (unsigned char)((x >> 24) & 0xff);
873
874 if ((pdata[4] == 0) && (pdata[3] == 0)) {
875 if (pdata[2] == 0) {
876 pdata[0] = BININT1;
877 len = 2;
878 }
879 else {
880 pdata[0] = BININT2;
881 len = 3;
882 }
883 }
884 else {
885 pdata[0] = BININT;
886 len = 5;
887 }
888
889 if (pickler_write(self, pdata, len) < 0)
890 return -1;
891 }
892
893 return 0;
894}
895
896static int
897save_long(PicklerObject *self, PyObject *obj)
898{
899 PyObject *repr = NULL;
900 Py_ssize_t size;
901 long val = PyLong_AsLong(obj);
902 int status = 0;
903
904 const char long_op = LONG;
905
906 if (val == -1 && PyErr_Occurred()) {
907 /* out of range for int pickling */
908 PyErr_Clear();
909 }
910 else
911 return save_int(self, val);
912
913 if (self->proto >= 2) {
914 /* Linear-time pickling. */
915 size_t nbits;
916 size_t nbytes;
917 unsigned char *pdata;
918 char header[5];
919 int i;
920 int sign = _PyLong_Sign(obj);
921
922 if (sign == 0) {
923 header[0] = LONG1;
924 header[1] = 0; /* It's 0 -- an empty bytestring. */
925 if (pickler_write(self, header, 2) < 0)
926 goto error;
927 return 0;
928 }
929 nbits = _PyLong_NumBits(obj);
930 if (nbits == (size_t)-1 && PyErr_Occurred())
931 goto error;
932 /* How many bytes do we need? There are nbits >> 3 full
933 * bytes of data, and nbits & 7 leftover bits. If there
934 * are any leftover bits, then we clearly need another
935 * byte. Wnat's not so obvious is that we *probably*
936 * need another byte even if there aren't any leftovers:
937 * the most-significant bit of the most-significant byte
938 * acts like a sign bit, and it's usually got a sense
939 * opposite of the one we need. The exception is longs
940 * of the form -(2**(8*j-1)) for j > 0. Such a long is
941 * its own 256's-complement, so has the right sign bit
942 * even without the extra byte. That's a pain to check
943 * for in advance, though, so we always grab an extra
944 * byte at the start, and cut it back later if possible.
945 */
946 nbytes = (nbits >> 3) + 1;
947 if (nbytes > INT_MAX) {
948 PyErr_SetString(PyExc_OverflowError,
949 "long too large to pickle");
950 goto error;
951 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +0000952 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000953 if (repr == NULL)
954 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +0000955 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000956 i = _PyLong_AsByteArray((PyLongObject *)obj,
957 pdata, nbytes,
958 1 /* little endian */ , 1 /* signed */ );
959 if (i < 0)
960 goto error;
961 /* If the long is negative, this may be a byte more than
962 * needed. This is so iff the MSB is all redundant sign
963 * bits.
964 */
965 if (sign < 0 &&
966 nbytes > 1 &&
967 pdata[nbytes - 1] == 0xff &&
968 (pdata[nbytes - 2] & 0x80) != 0) {
969 nbytes--;
970 }
971
972 if (nbytes < 256) {
973 header[0] = LONG1;
974 header[1] = (unsigned char)nbytes;
975 size = 2;
976 }
977 else {
978 header[0] = LONG4;
979 size = (int)nbytes;
980 for (i = 1; i < 5; i++) {
981 header[i] = (unsigned char)(size & 0xff);
982 size >>= 8;
983 }
984 size = 5;
985 }
986 if (pickler_write(self, header, size) < 0 ||
987 pickler_write(self, (char *)pdata, (int)nbytes) < 0)
988 goto error;
989 }
990 else {
991 char *string;
992
Mark Dickinson8dd05142009-01-20 20:43:58 +0000993 /* proto < 2: write the repr and newline. This is quadratic-time (in
994 the number of digits), in both directions. We add a trailing 'L'
995 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000996
997 repr = PyObject_Repr(obj);
998 if (repr == NULL)
999 goto error;
1000
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001001 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001002 if (string == NULL)
1003 goto error;
1004
1005 if (pickler_write(self, &long_op, 1) < 0 ||
1006 pickler_write(self, string, size) < 0 ||
Mark Dickinson8dd05142009-01-20 20:43:58 +00001007 pickler_write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001008 goto error;
1009 }
1010
1011 if (0) {
1012 error:
1013 status = -1;
1014 }
1015 Py_XDECREF(repr);
1016
1017 return status;
1018}
1019
1020static int
1021save_float(PicklerObject *self, PyObject *obj)
1022{
1023 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1024
1025 if (self->bin) {
1026 char pdata[9];
1027 pdata[0] = BINFLOAT;
1028 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1029 return -1;
1030 if (pickler_write(self, pdata, 9) < 0)
1031 return -1;
Eric Smith0923d1d2009-04-16 20:16:10 +00001032 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001033 else {
Eric Smith0923d1d2009-04-16 20:16:10 +00001034 int result = -1;
1035 char *buf = NULL;
1036 char op = FLOAT;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001037
Eric Smith0923d1d2009-04-16 20:16:10 +00001038 if (pickler_write(self, &op, 1) < 0)
1039 goto done;
1040
Mark Dickinson3e09f432009-04-17 08:41:23 +00001041 buf = PyOS_double_to_string(x, 'g', 17, 0, NULL);
Eric Smith0923d1d2009-04-16 20:16:10 +00001042 if (!buf) {
1043 PyErr_NoMemory();
1044 goto done;
1045 }
1046
1047 if (pickler_write(self, buf, strlen(buf)) < 0)
1048 goto done;
1049
1050 if (pickler_write(self, "\n", 1) < 0)
1051 goto done;
1052
1053 result = 0;
1054done:
1055 PyMem_Free(buf);
1056 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001057 }
1058
1059 return 0;
1060}
1061
1062static int
1063save_bytes(PicklerObject *self, PyObject *obj)
1064{
1065 if (self->proto < 3) {
1066 /* Older pickle protocols do not have an opcode for pickling bytes
1067 objects. Therefore, we need to fake the copy protocol (i.e.,
1068 the __reduce__ method) to permit bytes object unpickling. */
1069 PyObject *reduce_value = NULL;
1070 PyObject *bytelist = NULL;
1071 int status;
1072
1073 bytelist = PySequence_List(obj);
1074 if (bytelist == NULL)
1075 return -1;
1076
1077 reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
1078 bytelist);
1079 if (reduce_value == NULL) {
1080 Py_DECREF(bytelist);
1081 return -1;
1082 }
1083
1084 /* save_reduce() will memoize the object automatically. */
1085 status = save_reduce(self, reduce_value, obj);
1086 Py_DECREF(reduce_value);
1087 Py_DECREF(bytelist);
1088 return status;
1089 }
1090 else {
1091 Py_ssize_t size;
1092 char header[5];
1093 int len;
1094
1095 size = PyBytes_Size(obj);
1096 if (size < 0)
1097 return -1;
1098
1099 if (size < 256) {
1100 header[0] = SHORT_BINBYTES;
1101 header[1] = (unsigned char)size;
1102 len = 2;
1103 }
1104 else if (size <= 0xffffffffL) {
1105 header[0] = BINBYTES;
1106 header[1] = (unsigned char)(size & 0xff);
1107 header[2] = (unsigned char)((size >> 8) & 0xff);
1108 header[3] = (unsigned char)((size >> 16) & 0xff);
1109 header[4] = (unsigned char)((size >> 24) & 0xff);
1110 len = 5;
1111 }
1112 else {
1113 return -1; /* string too large */
1114 }
1115
1116 if (pickler_write(self, header, len) < 0)
1117 return -1;
1118
1119 if (pickler_write(self, PyBytes_AS_STRING(obj), size) < 0)
1120 return -1;
1121
1122 if (memo_put(self, obj) < 0)
1123 return -1;
1124
1125 return 0;
1126 }
1127}
1128
1129/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1130 backslash and newline characters to \uXXXX escapes. */
1131static PyObject *
1132raw_unicode_escape(const Py_UNICODE *s, Py_ssize_t size)
1133{
1134 PyObject *repr, *result;
1135 char *p;
1136 char *q;
1137
1138 static const char *hexdigits = "0123456789abcdef";
1139
1140#ifdef Py_UNICODE_WIDE
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001141 const Py_ssize_t expandsize = 10;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001142#else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001143 const Py_ssize_t expandsize = 6;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001144#endif
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001145
1146 if (size > PY_SSIZE_T_MAX / expandsize)
1147 return PyErr_NoMemory();
1148
1149 repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001150 if (repr == NULL)
1151 return NULL;
1152 if (size == 0)
1153 goto done;
1154
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001155 p = q = PyByteArray_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001156 while (size-- > 0) {
1157 Py_UNICODE ch = *s++;
1158#ifdef Py_UNICODE_WIDE
1159 /* Map 32-bit characters to '\Uxxxxxxxx' */
1160 if (ch >= 0x10000) {
1161 *p++ = '\\';
1162 *p++ = 'U';
1163 *p++ = hexdigits[(ch >> 28) & 0xf];
1164 *p++ = hexdigits[(ch >> 24) & 0xf];
1165 *p++ = hexdigits[(ch >> 20) & 0xf];
1166 *p++ = hexdigits[(ch >> 16) & 0xf];
1167 *p++ = hexdigits[(ch >> 12) & 0xf];
1168 *p++ = hexdigits[(ch >> 8) & 0xf];
1169 *p++ = hexdigits[(ch >> 4) & 0xf];
1170 *p++ = hexdigits[ch & 15];
1171 }
1172 else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001173#else
1174 /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
1175 if (ch >= 0xD800 && ch < 0xDC00) {
1176 Py_UNICODE ch2;
1177 Py_UCS4 ucs;
1178
1179 ch2 = *s++;
1180 size--;
1181 if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
1182 ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
1183 *p++ = '\\';
1184 *p++ = 'U';
1185 *p++ = hexdigits[(ucs >> 28) & 0xf];
1186 *p++ = hexdigits[(ucs >> 24) & 0xf];
1187 *p++ = hexdigits[(ucs >> 20) & 0xf];
1188 *p++ = hexdigits[(ucs >> 16) & 0xf];
1189 *p++ = hexdigits[(ucs >> 12) & 0xf];
1190 *p++ = hexdigits[(ucs >> 8) & 0xf];
1191 *p++ = hexdigits[(ucs >> 4) & 0xf];
1192 *p++ = hexdigits[ucs & 0xf];
1193 continue;
1194 }
1195 /* Fall through: isolated surrogates are copied as-is */
1196 s--;
1197 size++;
1198 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001199#endif
1200 /* Map 16-bit characters to '\uxxxx' */
1201 if (ch >= 256 || ch == '\\' || ch == '\n') {
1202 *p++ = '\\';
1203 *p++ = 'u';
1204 *p++ = hexdigits[(ch >> 12) & 0xf];
1205 *p++ = hexdigits[(ch >> 8) & 0xf];
1206 *p++ = hexdigits[(ch >> 4) & 0xf];
1207 *p++ = hexdigits[ch & 15];
1208 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001209 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001210 else
1211 *p++ = (char) ch;
1212 }
1213 size = p - q;
1214
1215 done:
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001216 result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001217 Py_DECREF(repr);
1218 return result;
1219}
1220
1221static int
1222save_unicode(PicklerObject *self, PyObject *obj)
1223{
1224 Py_ssize_t size;
1225 PyObject *encoded = NULL;
1226
1227 if (self->bin) {
1228 char pdata[5];
1229
Victor Stinnerf7351b42010-04-13 11:09:22 +00001230 encoded = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(obj),
1231 PyUnicode_GET_SIZE(obj),
1232 "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001233 if (encoded == NULL)
1234 goto error;
1235
1236 size = PyBytes_GET_SIZE(encoded);
1237 if (size < 0 || size > 0xffffffffL)
1238 goto error; /* string too large */
1239
1240 pdata[0] = BINUNICODE;
1241 pdata[1] = (unsigned char)(size & 0xff);
1242 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1243 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1244 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1245
1246 if (pickler_write(self, pdata, 5) < 0)
1247 goto error;
1248
1249 if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
1250 goto error;
1251 }
1252 else {
1253 const char unicode_op = UNICODE;
1254
1255 encoded = raw_unicode_escape(PyUnicode_AS_UNICODE(obj),
1256 PyUnicode_GET_SIZE(obj));
1257 if (encoded == NULL)
1258 goto error;
1259
1260 if (pickler_write(self, &unicode_op, 1) < 0)
1261 goto error;
1262
1263 size = PyBytes_GET_SIZE(encoded);
1264 if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
1265 goto error;
1266
1267 if (pickler_write(self, "\n", 1) < 0)
1268 goto error;
1269 }
1270 if (memo_put(self, obj) < 0)
1271 goto error;
1272
1273 Py_DECREF(encoded);
1274 return 0;
1275
1276 error:
1277 Py_XDECREF(encoded);
1278 return -1;
1279}
1280
1281/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1282static int
1283store_tuple_elements(PicklerObject *self, PyObject *t, int len)
1284{
1285 int i;
1286
1287 assert(PyTuple_Size(t) == len);
1288
1289 for (i = 0; i < len; i++) {
1290 PyObject *element = PyTuple_GET_ITEM(t, i);
1291
1292 if (element == NULL)
1293 return -1;
1294 if (save(self, element, 0) < 0)
1295 return -1;
1296 }
1297
1298 return 0;
1299}
1300
1301/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1302 * used across protocols to minimize the space needed to pickle them.
1303 * Tuples are also the only builtin immutable type that can be recursive
1304 * (a tuple can be reached from itself), and that requires some subtle
1305 * magic so that it works in all cases. IOW, this is a long routine.
1306 */
1307static int
1308save_tuple(PicklerObject *self, PyObject *obj)
1309{
1310 PyObject *memo_key = NULL;
1311 int len, i;
1312 int status = 0;
1313
1314 const char mark_op = MARK;
1315 const char tuple_op = TUPLE;
1316 const char pop_op = POP;
1317 const char pop_mark_op = POP_MARK;
1318 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
1319
1320 if ((len = PyTuple_Size(obj)) < 0)
1321 return -1;
1322
1323 if (len == 0) {
1324 char pdata[2];
1325
1326 if (self->proto) {
1327 pdata[0] = EMPTY_TUPLE;
1328 len = 1;
1329 }
1330 else {
1331 pdata[0] = MARK;
1332 pdata[1] = TUPLE;
1333 len = 2;
1334 }
1335 if (pickler_write(self, pdata, len) < 0)
1336 return -1;
1337 return 0;
1338 }
1339
1340 /* id(tuple) isn't in the memo now. If it shows up there after
1341 * saving the tuple elements, the tuple must be recursive, in
1342 * which case we'll pop everything we put on the stack, and fetch
1343 * its value from the memo.
1344 */
1345 memo_key = PyLong_FromVoidPtr(obj);
1346 if (memo_key == NULL)
1347 return -1;
1348
1349 if (len <= 3 && self->proto >= 2) {
1350 /* Use TUPLE{1,2,3} opcodes. */
1351 if (store_tuple_elements(self, obj, len) < 0)
1352 goto error;
1353
1354 if (PyDict_GetItem(self->memo, memo_key)) {
1355 /* pop the len elements */
1356 for (i = 0; i < len; i++)
1357 if (pickler_write(self, &pop_op, 1) < 0)
1358 goto error;
1359 /* fetch from memo */
1360 if (memo_get(self, memo_key) < 0)
1361 goto error;
1362
1363 Py_DECREF(memo_key);
1364 return 0;
1365 }
1366 else { /* Not recursive. */
1367 if (pickler_write(self, len2opcode + len, 1) < 0)
1368 goto error;
1369 }
1370 goto memoize;
1371 }
1372
1373 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
1374 * Generate MARK e1 e2 ... TUPLE
1375 */
1376 if (pickler_write(self, &mark_op, 1) < 0)
1377 goto error;
1378
1379 if (store_tuple_elements(self, obj, len) < 0)
1380 goto error;
1381
1382 if (PyDict_GetItem(self->memo, memo_key)) {
1383 /* pop the stack stuff we pushed */
1384 if (self->bin) {
1385 if (pickler_write(self, &pop_mark_op, 1) < 0)
1386 goto error;
1387 }
1388 else {
1389 /* Note that we pop one more than len, to remove
1390 * the MARK too.
1391 */
1392 for (i = 0; i <= len; i++)
1393 if (pickler_write(self, &pop_op, 1) < 0)
1394 goto error;
1395 }
1396 /* fetch from memo */
1397 if (memo_get(self, memo_key) < 0)
1398 goto error;
1399
1400 Py_DECREF(memo_key);
1401 return 0;
1402 }
1403 else { /* Not recursive. */
1404 if (pickler_write(self, &tuple_op, 1) < 0)
1405 goto error;
1406 }
1407
1408 memoize:
1409 if (memo_put(self, obj) < 0)
1410 goto error;
1411
1412 if (0) {
1413 error:
1414 status = -1;
1415 }
1416
1417 Py_DECREF(memo_key);
1418 return status;
1419}
1420
1421/* iter is an iterator giving items, and we batch up chunks of
1422 * MARK item item ... item APPENDS
1423 * opcode sequences. Calling code should have arranged to first create an
1424 * empty list, or list-like object, for the APPENDS to operate on.
1425 * Returns 0 on success, <0 on error.
1426 */
1427static int
1428batch_list(PicklerObject *self, PyObject *iter)
1429{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001430 PyObject *obj = NULL;
1431 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001432 int i, n;
1433
1434 const char mark_op = MARK;
1435 const char append_op = APPEND;
1436 const char appends_op = APPENDS;
1437
1438 assert(iter != NULL);
1439
1440 /* XXX: I think this function could be made faster by avoiding the
1441 iterator interface and fetching objects directly from list using
1442 PyList_GET_ITEM.
1443 */
1444
1445 if (self->proto == 0) {
1446 /* APPENDS isn't available; do one at a time. */
1447 for (;;) {
1448 obj = PyIter_Next(iter);
1449 if (obj == NULL) {
1450 if (PyErr_Occurred())
1451 return -1;
1452 break;
1453 }
1454 i = save(self, obj, 0);
1455 Py_DECREF(obj);
1456 if (i < 0)
1457 return -1;
1458 if (pickler_write(self, &append_op, 1) < 0)
1459 return -1;
1460 }
1461 return 0;
1462 }
1463
1464 /* proto > 0: write in batches of BATCHSIZE. */
1465 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001466 /* Get first item */
1467 firstitem = PyIter_Next(iter);
1468 if (firstitem == NULL) {
1469 if (PyErr_Occurred())
1470 goto error;
1471
1472 /* nothing more to add */
1473 break;
1474 }
1475
1476 /* Try to get a second item */
1477 obj = PyIter_Next(iter);
1478 if (obj == NULL) {
1479 if (PyErr_Occurred())
1480 goto error;
1481
1482 /* Only one item to write */
1483 if (save(self, firstitem, 0) < 0)
1484 goto error;
1485 if (pickler_write(self, &append_op, 1) < 0)
1486 goto error;
1487 Py_CLEAR(firstitem);
1488 break;
1489 }
1490
1491 /* More than one item to write */
1492
1493 /* Pump out MARK, items, APPENDS. */
1494 if (pickler_write(self, &mark_op, 1) < 0)
1495 goto error;
1496
1497 if (save(self, firstitem, 0) < 0)
1498 goto error;
1499 Py_CLEAR(firstitem);
1500 n = 1;
1501
1502 /* Fetch and save up to BATCHSIZE items */
1503 while (obj) {
1504 if (save(self, obj, 0) < 0)
1505 goto error;
1506 Py_CLEAR(obj);
1507 n += 1;
1508
1509 if (n == BATCHSIZE)
1510 break;
1511
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001512 obj = PyIter_Next(iter);
1513 if (obj == NULL) {
1514 if (PyErr_Occurred())
1515 goto error;
1516 break;
1517 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001518 }
1519
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001520 if (pickler_write(self, &appends_op, 1) < 0)
1521 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001522
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001523 } while (n == BATCHSIZE);
1524 return 0;
1525
1526 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001527 Py_XDECREF(firstitem);
1528 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001529 return -1;
1530}
1531
1532static int
1533save_list(PicklerObject *self, PyObject *obj)
1534{
1535 PyObject *iter;
1536 char header[3];
1537 int len;
1538 int status = 0;
1539
1540 if (self->fast && !fast_save_enter(self, obj))
1541 goto error;
1542
1543 /* Create an empty list. */
1544 if (self->bin) {
1545 header[0] = EMPTY_LIST;
1546 len = 1;
1547 }
1548 else {
1549 header[0] = MARK;
1550 header[1] = LIST;
1551 len = 2;
1552 }
1553
1554 if (pickler_write(self, header, len) < 0)
1555 goto error;
1556
1557 /* Get list length, and bow out early if empty. */
1558 if ((len = PyList_Size(obj)) < 0)
1559 goto error;
1560
1561 if (memo_put(self, obj) < 0)
1562 goto error;
1563
1564 if (len != 0) {
1565 /* Save the list elements. */
1566 iter = PyObject_GetIter(obj);
1567 if (iter == NULL)
1568 goto error;
Antoine Pitrou0f2a61a2011-01-23 17:21:28 +00001569 if (Py_EnterRecursiveCall(" while pickling an object")) {
1570 Py_DECREF(iter);
1571 goto error;
1572 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001573 status = batch_list(self, iter);
Antoine Pitrou0f2a61a2011-01-23 17:21:28 +00001574 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001575 Py_DECREF(iter);
1576 }
1577
1578 if (0) {
1579 error:
1580 status = -1;
1581 }
1582
1583 if (self->fast && !fast_save_leave(self, obj))
1584 status = -1;
1585
1586 return status;
1587}
1588
1589/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
1590 * MARK key value ... key value SETITEMS
1591 * opcode sequences. Calling code should have arranged to first create an
1592 * empty dict, or dict-like object, for the SETITEMS to operate on.
1593 * Returns 0 on success, <0 on error.
1594 *
1595 * This is very much like batch_list(). The difference between saving
1596 * elements directly, and picking apart two-tuples, is so long-winded at
1597 * the C level, though, that attempts to combine these routines were too
1598 * ugly to bear.
1599 */
1600static int
1601batch_dict(PicklerObject *self, PyObject *iter)
1602{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001603 PyObject *obj = NULL;
1604 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001605 int i, n;
1606
1607 const char mark_op = MARK;
1608 const char setitem_op = SETITEM;
1609 const char setitems_op = SETITEMS;
1610
1611 assert(iter != NULL);
1612
1613 if (self->proto == 0) {
1614 /* SETITEMS isn't available; do one at a time. */
1615 for (;;) {
1616 obj = PyIter_Next(iter);
1617 if (obj == NULL) {
1618 if (PyErr_Occurred())
1619 return -1;
1620 break;
1621 }
1622 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
1623 PyErr_SetString(PyExc_TypeError, "dict items "
1624 "iterator must return 2-tuples");
1625 return -1;
1626 }
1627 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
1628 if (i >= 0)
1629 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
1630 Py_DECREF(obj);
1631 if (i < 0)
1632 return -1;
1633 if (pickler_write(self, &setitem_op, 1) < 0)
1634 return -1;
1635 }
1636 return 0;
1637 }
1638
1639 /* proto > 0: write in batches of BATCHSIZE. */
1640 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001641 /* Get first item */
1642 firstitem = PyIter_Next(iter);
1643 if (firstitem == NULL) {
1644 if (PyErr_Occurred())
1645 goto error;
1646
1647 /* nothing more to add */
1648 break;
1649 }
1650 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
1651 PyErr_SetString(PyExc_TypeError, "dict items "
1652 "iterator must return 2-tuples");
1653 goto error;
1654 }
1655
1656 /* Try to get a second item */
1657 obj = PyIter_Next(iter);
1658 if (obj == NULL) {
1659 if (PyErr_Occurred())
1660 goto error;
1661
1662 /* Only one item to write */
1663 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
1664 goto error;
1665 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
1666 goto error;
1667 if (pickler_write(self, &setitem_op, 1) < 0)
1668 goto error;
1669 Py_CLEAR(firstitem);
1670 break;
1671 }
1672
1673 /* More than one item to write */
1674
1675 /* Pump out MARK, items, SETITEMS. */
1676 if (pickler_write(self, &mark_op, 1) < 0)
1677 goto error;
1678
1679 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
1680 goto error;
1681 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
1682 goto error;
1683 Py_CLEAR(firstitem);
1684 n = 1;
1685
1686 /* Fetch and save up to BATCHSIZE items */
1687 while (obj) {
1688 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
1689 PyErr_SetString(PyExc_TypeError, "dict items "
1690 "iterator must return 2-tuples");
1691 goto error;
1692 }
1693 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
1694 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
1695 goto error;
1696 Py_CLEAR(obj);
1697 n += 1;
1698
1699 if (n == BATCHSIZE)
1700 break;
1701
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001702 obj = PyIter_Next(iter);
1703 if (obj == NULL) {
1704 if (PyErr_Occurred())
1705 goto error;
1706 break;
1707 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001708 }
1709
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001710 if (pickler_write(self, &setitems_op, 1) < 0)
1711 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001712
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001713 } while (n == BATCHSIZE);
1714 return 0;
1715
1716 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001717 Py_XDECREF(firstitem);
1718 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001719 return -1;
1720}
1721
Collin Winter5c9b02d2009-05-25 05:43:30 +00001722/* This is a variant of batch_dict() above that specializes for dicts, with no
1723 * support for dict subclasses. Like batch_dict(), we batch up chunks of
1724 * MARK key value ... key value SETITEMS
1725 * opcode sequences. Calling code should have arranged to first create an
1726 * empty dict, or dict-like object, for the SETITEMS to operate on.
1727 * Returns 0 on success, -1 on error.
1728 *
1729 * Note that this currently doesn't work for protocol 0.
1730 */
1731static int
1732batch_dict_exact(PicklerObject *self, PyObject *obj)
1733{
1734 PyObject *key = NULL, *value = NULL;
1735 int i;
1736 Py_ssize_t dict_size, ppos = 0;
1737
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00001738 const char mark_op = MARK;
1739 const char setitem_op = SETITEM;
1740 const char setitems_op = SETITEMS;
Collin Winter5c9b02d2009-05-25 05:43:30 +00001741
1742 assert(obj != NULL);
1743 assert(self->proto > 0);
1744
1745 dict_size = PyDict_Size(obj);
1746
1747 /* Special-case len(d) == 1 to save space. */
1748 if (dict_size == 1) {
1749 PyDict_Next(obj, &ppos, &key, &value);
1750 if (save(self, key, 0) < 0)
1751 return -1;
1752 if (save(self, value, 0) < 0)
1753 return -1;
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00001754 if (pickler_write(self, &setitem_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00001755 return -1;
1756 return 0;
1757 }
1758
1759 /* Write in batches of BATCHSIZE. */
1760 do {
1761 i = 0;
1762 if (pickler_write(self, &mark_op, 1) < 0)
1763 return -1;
1764 while (PyDict_Next(obj, &ppos, &key, &value)) {
1765 if (save(self, key, 0) < 0)
1766 return -1;
1767 if (save(self, value, 0) < 0)
1768 return -1;
1769 if (++i == BATCHSIZE)
1770 break;
1771 }
Alexandre Vassalottif70b1292009-05-25 18:00:52 +00001772 if (pickler_write(self, &setitems_op, 1) < 0)
Collin Winter5c9b02d2009-05-25 05:43:30 +00001773 return -1;
1774 if (PyDict_Size(obj) != dict_size) {
1775 PyErr_Format(
1776 PyExc_RuntimeError,
1777 "dictionary changed size during iteration");
1778 return -1;
1779 }
1780
1781 } while (i == BATCHSIZE);
1782 return 0;
1783}
1784
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001785static int
1786save_dict(PicklerObject *self, PyObject *obj)
1787{
1788 PyObject *items, *iter;
1789 char header[3];
1790 int len;
1791 int status = 0;
1792
1793 if (self->fast && !fast_save_enter(self, obj))
1794 goto error;
1795
1796 /* Create an empty dict. */
1797 if (self->bin) {
1798 header[0] = EMPTY_DICT;
1799 len = 1;
1800 }
1801 else {
1802 header[0] = MARK;
1803 header[1] = DICT;
1804 len = 2;
1805 }
1806
1807 if (pickler_write(self, header, len) < 0)
1808 goto error;
1809
1810 /* Get dict size, and bow out early if empty. */
1811 if ((len = PyDict_Size(obj)) < 0)
1812 goto error;
1813
1814 if (memo_put(self, obj) < 0)
1815 goto error;
1816
1817 if (len != 0) {
1818 /* Save the dict items. */
Collin Winter5c9b02d2009-05-25 05:43:30 +00001819 if (PyDict_CheckExact(obj) && self->proto > 0) {
1820 /* We can take certain shortcuts if we know this is a dict and
1821 not a dict subclass. */
Antoine Pitrou0f2a61a2011-01-23 17:21:28 +00001822 if (Py_EnterRecursiveCall(" while pickling an object"))
1823 goto error;
1824 status = batch_dict_exact(self, obj);
1825 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00001826 } else {
1827 items = PyObject_CallMethod(obj, "items", "()");
1828 if (items == NULL)
1829 goto error;
1830 iter = PyObject_GetIter(items);
1831 Py_DECREF(items);
1832 if (iter == NULL)
1833 goto error;
Antoine Pitrou0f2a61a2011-01-23 17:21:28 +00001834 if (Py_EnterRecursiveCall(" while pickling an object")) {
1835 Py_DECREF(iter);
1836 goto error;
1837 }
Collin Winter5c9b02d2009-05-25 05:43:30 +00001838 status = batch_dict(self, iter);
Antoine Pitrou0f2a61a2011-01-23 17:21:28 +00001839 Py_LeaveRecursiveCall();
Collin Winter5c9b02d2009-05-25 05:43:30 +00001840 Py_DECREF(iter);
1841 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001842 }
1843
1844 if (0) {
1845 error:
1846 status = -1;
1847 }
1848
1849 if (self->fast && !fast_save_leave(self, obj))
1850 status = -1;
1851
1852 return status;
1853}
1854
1855static int
1856save_global(PicklerObject *self, PyObject *obj, PyObject *name)
1857{
1858 static PyObject *name_str = NULL;
1859 PyObject *global_name = NULL;
1860 PyObject *module_name = NULL;
1861 PyObject *module = NULL;
1862 PyObject *cls;
1863 int status = 0;
1864
1865 const char global_op = GLOBAL;
1866
1867 if (name_str == NULL) {
1868 name_str = PyUnicode_InternFromString("__name__");
1869 if (name_str == NULL)
1870 goto error;
1871 }
1872
1873 if (name) {
1874 global_name = name;
1875 Py_INCREF(global_name);
1876 }
1877 else {
1878 global_name = PyObject_GetAttr(obj, name_str);
1879 if (global_name == NULL)
1880 goto error;
1881 }
1882
1883 module_name = whichmodule(obj, global_name);
1884 if (module_name == NULL)
1885 goto error;
1886
1887 /* XXX: Change to use the import C API directly with level=0 to disallow
1888 relative imports.
1889
1890 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
1891 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
1892 custom import functions (IMHO, this would be a nice security
1893 feature). The import C API would need to be extended to support the
1894 extra parameters of __import__ to fix that. */
1895 module = PyImport_Import(module_name);
1896 if (module == NULL) {
1897 PyErr_Format(PicklingError,
1898 "Can't pickle %R: import of module %R failed",
1899 obj, module_name);
1900 goto error;
1901 }
1902 cls = PyObject_GetAttr(module, global_name);
1903 if (cls == NULL) {
1904 PyErr_Format(PicklingError,
1905 "Can't pickle %R: attribute lookup %S.%S failed",
1906 obj, module_name, global_name);
1907 goto error;
1908 }
1909 if (cls != obj) {
1910 Py_DECREF(cls);
1911 PyErr_Format(PicklingError,
1912 "Can't pickle %R: it's not the same object as %S.%S",
1913 obj, module_name, global_name);
1914 goto error;
1915 }
1916 Py_DECREF(cls);
1917
1918 if (self->proto >= 2) {
1919 /* See whether this is in the extension registry, and if
1920 * so generate an EXT opcode.
1921 */
1922 PyObject *code_obj; /* extension code as Python object */
1923 long code; /* extension code as C value */
1924 char pdata[5];
1925 int n;
1926
1927 PyTuple_SET_ITEM(two_tuple, 0, module_name);
1928 PyTuple_SET_ITEM(two_tuple, 1, global_name);
1929 code_obj = PyDict_GetItem(extension_registry, two_tuple);
1930 /* The object is not registered in the extension registry.
1931 This is the most likely code path. */
1932 if (code_obj == NULL)
1933 goto gen_global;
1934
1935 /* XXX: pickle.py doesn't check neither the type, nor the range
1936 of the value returned by the extension_registry. It should for
1937 consistency. */
1938
1939 /* Verify code_obj has the right type and value. */
1940 if (!PyLong_Check(code_obj)) {
1941 PyErr_Format(PicklingError,
1942 "Can't pickle %R: extension code %R isn't an integer",
1943 obj, code_obj);
1944 goto error;
1945 }
1946 code = PyLong_AS_LONG(code_obj);
1947 if (code <= 0 || code > 0x7fffffffL) {
1948 PyErr_Format(PicklingError,
1949 "Can't pickle %R: extension code %ld is out of range",
1950 obj, code);
1951 goto error;
1952 }
1953
1954 /* Generate an EXT opcode. */
1955 if (code <= 0xff) {
1956 pdata[0] = EXT1;
1957 pdata[1] = (unsigned char)code;
1958 n = 2;
1959 }
1960 else if (code <= 0xffff) {
1961 pdata[0] = EXT2;
1962 pdata[1] = (unsigned char)(code & 0xff);
1963 pdata[2] = (unsigned char)((code >> 8) & 0xff);
1964 n = 3;
1965 }
1966 else {
1967 pdata[0] = EXT4;
1968 pdata[1] = (unsigned char)(code & 0xff);
1969 pdata[2] = (unsigned char)((code >> 8) & 0xff);
1970 pdata[3] = (unsigned char)((code >> 16) & 0xff);
1971 pdata[4] = (unsigned char)((code >> 24) & 0xff);
1972 n = 5;
1973 }
1974
1975 if (pickler_write(self, pdata, n) < 0)
1976 goto error;
1977 }
1978 else {
1979 /* Generate a normal global opcode if we are using a pickle
1980 protocol <= 2, or if the object is not registered in the
1981 extension registry. */
1982 PyObject *encoded;
1983 PyObject *(*unicode_encoder)(PyObject *);
1984
1985 gen_global:
1986 if (pickler_write(self, &global_op, 1) < 0)
1987 goto error;
1988
1989 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
1990 the module name and the global name using UTF-8. We do so only when
1991 we are using the pickle protocol newer than version 3. This is to
1992 ensure compatibility with older Unpickler running on Python 2.x. */
1993 if (self->proto >= 3) {
1994 unicode_encoder = PyUnicode_AsUTF8String;
1995 }
1996 else {
1997 unicode_encoder = PyUnicode_AsASCIIString;
1998 }
1999
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002000 /* For protocol < 3 and if the user didn't request against doing so,
2001 we convert module names to the old 2.x module names. */
2002 if (self->fix_imports) {
2003 PyObject *key;
2004 PyObject *item;
2005
2006 key = PyTuple_Pack(2, module_name, global_name);
2007 if (key == NULL)
2008 goto error;
2009 item = PyDict_GetItemWithError(name_mapping_3to2, key);
2010 Py_DECREF(key);
2011 if (item) {
2012 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
2013 PyErr_Format(PyExc_RuntimeError,
2014 "_compat_pickle.REVERSE_NAME_MAPPING values "
2015 "should be 2-tuples, not %.200s",
2016 Py_TYPE(item)->tp_name);
2017 goto error;
2018 }
2019 Py_CLEAR(module_name);
2020 Py_CLEAR(global_name);
2021 module_name = PyTuple_GET_ITEM(item, 0);
2022 global_name = PyTuple_GET_ITEM(item, 1);
2023 if (!PyUnicode_Check(module_name) ||
2024 !PyUnicode_Check(global_name)) {
2025 PyErr_Format(PyExc_RuntimeError,
2026 "_compat_pickle.REVERSE_NAME_MAPPING values "
2027 "should be pairs of str, not (%.200s, %.200s)",
2028 Py_TYPE(module_name)->tp_name,
2029 Py_TYPE(global_name)->tp_name);
2030 goto error;
2031 }
2032 Py_INCREF(module_name);
2033 Py_INCREF(global_name);
2034 }
2035 else if (PyErr_Occurred()) {
2036 goto error;
2037 }
2038
2039 item = PyDict_GetItemWithError(import_mapping_3to2, module_name);
2040 if (item) {
2041 if (!PyUnicode_Check(item)) {
2042 PyErr_Format(PyExc_RuntimeError,
2043 "_compat_pickle.REVERSE_IMPORT_MAPPING values "
2044 "should be strings, not %.200s",
2045 Py_TYPE(item)->tp_name);
2046 goto error;
2047 }
2048 Py_CLEAR(module_name);
2049 module_name = item;
2050 Py_INCREF(module_name);
2051 }
2052 else if (PyErr_Occurred()) {
2053 goto error;
2054 }
2055 }
2056
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002057 /* Save the name of the module. */
2058 encoded = unicode_encoder(module_name);
2059 if (encoded == NULL) {
2060 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2061 PyErr_Format(PicklingError,
2062 "can't pickle module identifier '%S' using "
2063 "pickle protocol %i", module_name, self->proto);
2064 goto error;
2065 }
2066 if (pickler_write(self, PyBytes_AS_STRING(encoded),
2067 PyBytes_GET_SIZE(encoded)) < 0) {
2068 Py_DECREF(encoded);
2069 goto error;
2070 }
2071 Py_DECREF(encoded);
2072 if(pickler_write(self, "\n", 1) < 0)
2073 goto error;
2074
2075 /* Save the name of the module. */
2076 encoded = unicode_encoder(global_name);
2077 if (encoded == NULL) {
2078 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
2079 PyErr_Format(PicklingError,
2080 "can't pickle global identifier '%S' using "
2081 "pickle protocol %i", global_name, self->proto);
2082 goto error;
2083 }
2084 if (pickler_write(self, PyBytes_AS_STRING(encoded),
2085 PyBytes_GET_SIZE(encoded)) < 0) {
2086 Py_DECREF(encoded);
2087 goto error;
2088 }
2089 Py_DECREF(encoded);
2090 if(pickler_write(self, "\n", 1) < 0)
2091 goto error;
2092
2093 /* Memoize the object. */
2094 if (memo_put(self, obj) < 0)
2095 goto error;
2096 }
2097
2098 if (0) {
2099 error:
2100 status = -1;
2101 }
2102 Py_XDECREF(module_name);
2103 Py_XDECREF(global_name);
2104 Py_XDECREF(module);
2105
2106 return status;
2107}
2108
2109static int
2110save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
2111{
2112 PyObject *pid = NULL;
2113 int status = 0;
2114
2115 const char persid_op = PERSID;
2116 const char binpersid_op = BINPERSID;
2117
2118 Py_INCREF(obj);
2119 pid = pickler_call(self, func, obj);
2120 if (pid == NULL)
2121 return -1;
2122
2123 if (pid != Py_None) {
2124 if (self->bin) {
2125 if (save(self, pid, 1) < 0 ||
2126 pickler_write(self, &binpersid_op, 1) < 0)
2127 goto error;
2128 }
2129 else {
2130 PyObject *pid_str = NULL;
2131 char *pid_ascii_bytes;
2132 Py_ssize_t size;
2133
2134 pid_str = PyObject_Str(pid);
2135 if (pid_str == NULL)
2136 goto error;
2137
2138 /* XXX: Should it check whether the persistent id only contains
2139 ASCII characters? And what if the pid contains embedded
2140 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00002141 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002142 Py_DECREF(pid_str);
2143 if (pid_ascii_bytes == NULL)
2144 goto error;
2145
2146 if (pickler_write(self, &persid_op, 1) < 0 ||
2147 pickler_write(self, pid_ascii_bytes, size) < 0 ||
2148 pickler_write(self, "\n", 1) < 0)
2149 goto error;
2150 }
2151 status = 1;
2152 }
2153
2154 if (0) {
2155 error:
2156 status = -1;
2157 }
2158 Py_XDECREF(pid);
2159
2160 return status;
2161}
2162
2163/* We're saving obj, and args is the 2-thru-5 tuple returned by the
2164 * appropriate __reduce__ method for obj.
2165 */
2166static int
2167save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
2168{
2169 PyObject *callable;
2170 PyObject *argtup;
2171 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002172 PyObject *listitems = Py_None;
2173 PyObject *dictitems = Py_None;
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002174 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002175
2176 int use_newobj = self->proto >= 2;
2177
2178 const char reduce_op = REDUCE;
2179 const char build_op = BUILD;
2180 const char newobj_op = NEWOBJ;
2181
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002182 size = PyTuple_Size(args);
2183 if (size < 2 || size > 5) {
2184 PyErr_SetString(PicklingError, "tuple returned by "
2185 "__reduce__ must contain 2 through 5 elements");
2186 return -1;
2187 }
2188
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002189 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
2190 &callable, &argtup, &state, &listitems, &dictitems))
2191 return -1;
2192
2193 if (!PyCallable_Check(callable)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002194 PyErr_SetString(PicklingError, "first item of the tuple "
2195 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002196 return -1;
2197 }
2198 if (!PyTuple_Check(argtup)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002199 PyErr_SetString(PicklingError, "second item of the tuple "
2200 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002201 return -1;
2202 }
2203
2204 if (state == Py_None)
2205 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002206
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002207 if (listitems == Py_None)
2208 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002209 else if (!PyIter_Check(listitems)) {
2210 PyErr_Format(PicklingError, "Fourth element of tuple"
2211 "returned by __reduce__ must be an iterator, not %s",
2212 Py_TYPE(listitems)->tp_name);
2213 return -1;
2214 }
2215
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002216 if (dictitems == Py_None)
2217 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002218 else if (!PyIter_Check(dictitems)) {
2219 PyErr_Format(PicklingError, "Fifth element of tuple"
2220 "returned by __reduce__ must be an iterator, not %s",
2221 Py_TYPE(dictitems)->tp_name);
2222 return -1;
2223 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002224
2225 /* Protocol 2 special case: if callable's name is __newobj__, use
2226 NEWOBJ. */
2227 if (use_newobj) {
2228 static PyObject *newobj_str = NULL;
2229 PyObject *name_str;
2230
2231 if (newobj_str == NULL) {
2232 newobj_str = PyUnicode_InternFromString("__newobj__");
2233 }
2234
2235 name_str = PyObject_GetAttrString(callable, "__name__");
2236 if (name_str == NULL) {
2237 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2238 PyErr_Clear();
2239 else
2240 return -1;
2241 use_newobj = 0;
2242 }
2243 else {
2244 use_newobj = PyUnicode_Check(name_str) &&
2245 PyUnicode_Compare(name_str, newobj_str) == 0;
2246 Py_DECREF(name_str);
2247 }
2248 }
2249 if (use_newobj) {
2250 PyObject *cls;
2251 PyObject *newargtup;
2252 PyObject *obj_class;
2253 int p;
2254
2255 /* Sanity checks. */
2256 if (Py_SIZE(argtup) < 1) {
2257 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
2258 return -1;
2259 }
2260
2261 cls = PyTuple_GET_ITEM(argtup, 0);
2262 if (!PyObject_HasAttrString(cls, "__new__")) {
2263 PyErr_SetString(PicklingError, "args[0] from "
2264 "__newobj__ args has no __new__");
2265 return -1;
2266 }
2267
2268 if (obj != NULL) {
2269 obj_class = PyObject_GetAttrString(obj, "__class__");
2270 if (obj_class == NULL) {
2271 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2272 PyErr_Clear();
2273 else
2274 return -1;
2275 }
2276 p = obj_class != cls; /* true iff a problem */
2277 Py_DECREF(obj_class);
2278 if (p) {
2279 PyErr_SetString(PicklingError, "args[0] from "
2280 "__newobj__ args has the wrong class");
2281 return -1;
2282 }
2283 }
2284 /* XXX: These calls save() are prone to infinite recursion. Imagine
2285 what happen if the value returned by the __reduce__() method of
2286 some extension type contains another object of the same type. Ouch!
2287
2288 Here is a quick example, that I ran into, to illustrate what I
2289 mean:
2290
2291 >>> import pickle, copyreg
2292 >>> copyreg.dispatch_table.pop(complex)
2293 >>> pickle.dumps(1+2j)
2294 Traceback (most recent call last):
2295 ...
2296 RuntimeError: maximum recursion depth exceeded
2297
2298 Removing the complex class from copyreg.dispatch_table made the
2299 __reduce_ex__() method emit another complex object:
2300
2301 >>> (1+1j).__reduce_ex__(2)
2302 (<function __newobj__ at 0xb7b71c3c>,
2303 (<class 'complex'>, (1+1j)), None, None, None)
2304
2305 Thus when save() was called on newargstup (the 2nd item) recursion
2306 ensued. Of course, the bug was in the complex class which had a
2307 broken __getnewargs__() that emitted another complex object. But,
2308 the point, here, is it is quite easy to end up with a broken reduce
2309 function. */
2310
2311 /* Save the class and its __new__ arguments. */
2312 if (save(self, cls, 0) < 0)
2313 return -1;
2314
2315 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
2316 if (newargtup == NULL)
2317 return -1;
2318
2319 p = save(self, newargtup, 0);
2320 Py_DECREF(newargtup);
2321 if (p < 0)
2322 return -1;
2323
2324 /* Add NEWOBJ opcode. */
2325 if (pickler_write(self, &newobj_op, 1) < 0)
2326 return -1;
2327 }
2328 else { /* Not using NEWOBJ. */
2329 if (save(self, callable, 0) < 0 ||
2330 save(self, argtup, 0) < 0 ||
2331 pickler_write(self, &reduce_op, 1) < 0)
2332 return -1;
2333 }
2334
2335 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
2336 the caller do not want to memoize the object. Not particularly useful,
2337 but that is to mimic the behavior save_reduce() in pickle.py when
2338 obj is None. */
2339 if (obj && memo_put(self, obj) < 0)
2340 return -1;
2341
2342 if (listitems && batch_list(self, listitems) < 0)
2343 return -1;
2344
2345 if (dictitems && batch_dict(self, dictitems) < 0)
2346 return -1;
2347
2348 if (state) {
2349 if (save(self, state, 0) < 0 ||
2350 pickler_write(self, &build_op, 1) < 0)
2351 return -1;
2352 }
2353
2354 return 0;
2355}
2356
2357static int
2358save(PicklerObject *self, PyObject *obj, int pers_save)
2359{
2360 PyTypeObject *type;
2361 PyObject *reduce_func = NULL;
2362 PyObject *reduce_value = NULL;
2363 PyObject *memo_key = NULL;
2364 int status = 0;
2365
Antoine Pitrou0f2a61a2011-01-23 17:21:28 +00002366 if (Py_EnterRecursiveCall(" while pickling an object"))
Alexandre Vassalottidff18342008-07-13 18:48:30 +00002367 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002368
2369 /* The extra pers_save argument is necessary to avoid calling save_pers()
2370 on its returned object. */
2371 if (!pers_save && self->pers_func) {
2372 /* save_pers() returns:
2373 -1 to signal an error;
2374 0 if it did nothing successfully;
2375 1 if a persistent id was saved.
2376 */
2377 if ((status = save_pers(self, obj, self->pers_func)) != 0)
2378 goto done;
2379 }
2380
2381 type = Py_TYPE(obj);
2382
2383 /* XXX: The old cPickle had an optimization that used switch-case
2384 statement dispatching on the first letter of the type name. It was
2385 probably not a bad idea after all. If benchmarks shows that particular
2386 optimization had some real benefits, it would be nice to add it
2387 back. */
2388
2389 /* Atom types; these aren't memoized, so don't check the memo. */
2390
2391 if (obj == Py_None) {
2392 status = save_none(self, obj);
2393 goto done;
2394 }
2395 else if (obj == Py_False || obj == Py_True) {
2396 status = save_bool(self, obj);
2397 goto done;
2398 }
2399 else if (type == &PyLong_Type) {
2400 status = save_long(self, obj);
2401 goto done;
2402 }
2403 else if (type == &PyFloat_Type) {
2404 status = save_float(self, obj);
2405 goto done;
2406 }
2407
2408 /* Check the memo to see if it has the object. If so, generate
2409 a GET (or BINGET) opcode, instead of pickling the object
2410 once again. */
2411 memo_key = PyLong_FromVoidPtr(obj);
2412 if (memo_key == NULL)
2413 goto error;
2414 if (PyDict_GetItem(self->memo, memo_key)) {
2415 if (memo_get(self, memo_key) < 0)
2416 goto error;
2417 goto done;
2418 }
2419
2420 if (type == &PyBytes_Type) {
2421 status = save_bytes(self, obj);
2422 goto done;
2423 }
2424 else if (type == &PyUnicode_Type) {
2425 status = save_unicode(self, obj);
2426 goto done;
2427 }
2428 else if (type == &PyDict_Type) {
2429 status = save_dict(self, obj);
2430 goto done;
2431 }
2432 else if (type == &PyList_Type) {
2433 status = save_list(self, obj);
2434 goto done;
2435 }
2436 else if (type == &PyTuple_Type) {
2437 status = save_tuple(self, obj);
2438 goto done;
2439 }
2440 else if (type == &PyType_Type) {
2441 status = save_global(self, obj, NULL);
2442 goto done;
2443 }
2444 else if (type == &PyFunction_Type) {
2445 status = save_global(self, obj, NULL);
2446 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
2447 /* fall back to reduce */
2448 PyErr_Clear();
2449 }
2450 else {
2451 goto done;
2452 }
2453 }
2454 else if (type == &PyCFunction_Type) {
2455 status = save_global(self, obj, NULL);
2456 goto done;
2457 }
2458 else if (PyType_IsSubtype(type, &PyType_Type)) {
2459 status = save_global(self, obj, NULL);
2460 goto done;
2461 }
2462
2463 /* XXX: This part needs some unit tests. */
2464
2465 /* Get a reduction callable, and call it. This may come from
2466 * copyreg.dispatch_table, the object's __reduce_ex__ method,
2467 * or the object's __reduce__ method.
2468 */
2469 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
2470 if (reduce_func != NULL) {
2471 /* Here, the reference count of the reduce_func object returned by
2472 PyDict_GetItem needs to be increased to be consistent with the one
2473 returned by PyObject_GetAttr. This is allow us to blindly DECREF
2474 reduce_func at the end of the save() routine.
2475 */
2476 Py_INCREF(reduce_func);
2477 Py_INCREF(obj);
2478 reduce_value = pickler_call(self, reduce_func, obj);
2479 }
2480 else {
2481 static PyObject *reduce_str = NULL;
2482 static PyObject *reduce_ex_str = NULL;
2483
2484 /* Cache the name of the reduce methods. */
2485 if (reduce_str == NULL) {
2486 reduce_str = PyUnicode_InternFromString("__reduce__");
2487 if (reduce_str == NULL)
2488 goto error;
2489 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
2490 if (reduce_ex_str == NULL)
2491 goto error;
2492 }
2493
2494 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
2495 automatically defined as __reduce__. While this is convenient, this
2496 make it impossible to know which method was actually called. Of
2497 course, this is not a big deal. But still, it would be nice to let
2498 the user know which method was called when something go
2499 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
2500 don't actually have to check for a __reduce__ method. */
2501
2502 /* Check for a __reduce_ex__ method. */
2503 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
2504 if (reduce_func != NULL) {
2505 PyObject *proto;
2506 proto = PyLong_FromLong(self->proto);
2507 if (proto != NULL) {
2508 reduce_value = pickler_call(self, reduce_func, proto);
2509 }
2510 }
2511 else {
2512 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2513 PyErr_Clear();
2514 else
2515 goto error;
2516 /* Check for a __reduce__ method. */
2517 reduce_func = PyObject_GetAttr(obj, reduce_str);
2518 if (reduce_func != NULL) {
2519 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
2520 }
2521 else {
2522 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
2523 type->tp_name, obj);
2524 goto error;
2525 }
2526 }
2527 }
2528
2529 if (reduce_value == NULL)
2530 goto error;
2531
2532 if (PyUnicode_Check(reduce_value)) {
2533 status = save_global(self, obj, reduce_value);
2534 goto done;
2535 }
2536
2537 if (!PyTuple_Check(reduce_value)) {
2538 PyErr_SetString(PicklingError,
2539 "__reduce__ must return a string or tuple");
2540 goto error;
2541 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002542
2543 status = save_reduce(self, reduce_value, obj);
2544
2545 if (0) {
2546 error:
2547 status = -1;
2548 }
2549 done:
Alexandre Vassalottidff18342008-07-13 18:48:30 +00002550 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002551 Py_XDECREF(memo_key);
2552 Py_XDECREF(reduce_func);
2553 Py_XDECREF(reduce_value);
2554
2555 return status;
2556}
2557
2558static int
2559dump(PicklerObject *self, PyObject *obj)
2560{
2561 const char stop_op = STOP;
2562
2563 if (self->proto >= 2) {
2564 char header[2];
2565
2566 header[0] = PROTO;
2567 assert(self->proto >= 0 && self->proto < 256);
2568 header[1] = (unsigned char)self->proto;
2569 if (pickler_write(self, header, 2) < 0)
2570 return -1;
2571 }
2572
2573 if (save(self, obj, 0) < 0 ||
2574 pickler_write(self, &stop_op, 1) < 0 ||
2575 pickler_write(self, NULL, 0) < 0)
2576 return -1;
2577
2578 return 0;
2579}
2580
2581PyDoc_STRVAR(Pickler_clear_memo_doc,
2582"clear_memo() -> None. Clears the pickler's \"memo\"."
2583"\n"
2584"The memo is the data structure that remembers which objects the\n"
2585"pickler has already seen, so that shared or recursive objects are\n"
2586"pickled by reference and not by value. This method is useful when\n"
2587"re-using picklers.");
2588
2589static PyObject *
2590Pickler_clear_memo(PicklerObject *self)
2591{
2592 if (self->memo)
2593 PyDict_Clear(self->memo);
2594
2595 Py_RETURN_NONE;
2596}
2597
2598PyDoc_STRVAR(Pickler_dump_doc,
2599"dump(obj) -> None. Write a pickled representation of obj to the open file.");
2600
2601static PyObject *
2602Pickler_dump(PicklerObject *self, PyObject *args)
2603{
2604 PyObject *obj;
2605
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00002606 /* Check whether the Pickler was initialized correctly (issue3664).
2607 Developers often forget to call __init__() in their subclasses, which
2608 would trigger a segfault without this check. */
2609 if (self->write == NULL) {
2610 PyErr_Format(PicklingError,
2611 "Pickler.__init__() was not called by %s.__init__()",
2612 Py_TYPE(self)->tp_name);
2613 return NULL;
2614 }
2615
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002616 if (!PyArg_ParseTuple(args, "O:dump", &obj))
2617 return NULL;
2618
2619 if (dump(self, obj) < 0)
2620 return NULL;
2621
2622 Py_RETURN_NONE;
2623}
2624
2625static struct PyMethodDef Pickler_methods[] = {
2626 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
2627 Pickler_dump_doc},
2628 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
2629 Pickler_clear_memo_doc},
2630 {NULL, NULL} /* sentinel */
2631};
2632
2633static void
2634Pickler_dealloc(PicklerObject *self)
2635{
2636 PyObject_GC_UnTrack(self);
2637
2638 Py_XDECREF(self->write);
2639 Py_XDECREF(self->memo);
2640 Py_XDECREF(self->pers_func);
2641 Py_XDECREF(self->arg);
2642 Py_XDECREF(self->fast_memo);
2643
2644 PyMem_Free(self->write_buf);
2645
2646 Py_TYPE(self)->tp_free((PyObject *)self);
2647}
2648
2649static int
2650Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
2651{
2652 Py_VISIT(self->write);
2653 Py_VISIT(self->memo);
2654 Py_VISIT(self->pers_func);
2655 Py_VISIT(self->arg);
2656 Py_VISIT(self->fast_memo);
2657 return 0;
2658}
2659
2660static int
2661Pickler_clear(PicklerObject *self)
2662{
2663 Py_CLEAR(self->write);
2664 Py_CLEAR(self->memo);
2665 Py_CLEAR(self->pers_func);
2666 Py_CLEAR(self->arg);
2667 Py_CLEAR(self->fast_memo);
2668
2669 PyMem_Free(self->write_buf);
2670 self->write_buf = NULL;
2671
2672 return 0;
2673}
2674
2675PyDoc_STRVAR(Pickler_doc,
2676"Pickler(file, protocol=None)"
2677"\n"
2678"This takes a binary file for writing a pickle data stream.\n"
2679"\n"
2680"The optional protocol argument tells the pickler to use the\n"
2681"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
2682"protocol is 3; a backward-incompatible protocol designed for\n"
2683"Python 3.0.\n"
2684"\n"
2685"Specifying a negative protocol version selects the highest\n"
2686"protocol version supported. The higher the protocol used, the\n"
2687"more recent the version of Python needed to read the pickle\n"
2688"produced.\n"
2689"\n"
2690"The file argument must have a write() method that accepts a single\n"
2691"bytes argument. It can thus be a file object opened for binary\n"
2692"writing, a io.BytesIO instance, or any other custom object that\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002693"meets this interface.\n"
2694"\n"
2695"If fix_imports is True and protocol is less than 3, pickle will try to\n"
2696"map the new Python 3.x names to the old module names used in Python\n"
2697"2.x, so that the pickle data stream is readable with Python 2.x.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002698
2699static int
2700Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
2701{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002702 static char *kwlist[] = {"file", "protocol", "fix_imports", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002703 PyObject *file;
2704 PyObject *proto_obj = NULL;
2705 long proto = 0;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002706 int fix_imports = 1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002707
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002708 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|Oi:Pickler",
2709 kwlist, &file, &proto_obj, &fix_imports))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002710 return -1;
2711
2712 /* In case of multiple __init__() calls, clear previous content. */
2713 if (self->write != NULL)
2714 (void)Pickler_clear(self);
2715
2716 if (proto_obj == NULL || proto_obj == Py_None)
2717 proto = DEFAULT_PROTOCOL;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002718 else {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002719 proto = PyLong_AsLong(proto_obj);
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002720 if (proto == -1 && PyErr_Occurred())
2721 return -1;
2722 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002723
2724 if (proto < 0)
2725 proto = HIGHEST_PROTOCOL;
2726 if (proto > HIGHEST_PROTOCOL) {
2727 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
2728 HIGHEST_PROTOCOL);
2729 return -1;
2730 }
2731
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00002732 self->proto = proto;
2733 self->bin = proto > 0;
2734 self->arg = NULL;
2735 self->fast = 0;
2736 self->fast_nesting = 0;
2737 self->fast_memo = NULL;
2738 self->fix_imports = fix_imports && proto < 3;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002739
2740 if (!PyObject_HasAttrString(file, "write")) {
2741 PyErr_SetString(PyExc_TypeError,
2742 "file must have a 'write' attribute");
2743 return -1;
2744 }
2745 self->write = PyObject_GetAttrString(file, "write");
2746 if (self->write == NULL)
2747 return -1;
2748 self->buf_size = 0;
2749 self->write_buf = (char *)PyMem_Malloc(WRITE_BUF_SIZE);
2750 if (self->write_buf == NULL) {
2751 PyErr_NoMemory();
2752 return -1;
2753 }
2754 self->pers_func = NULL;
2755 if (PyObject_HasAttrString((PyObject *)self, "persistent_id")) {
2756 self->pers_func = PyObject_GetAttrString((PyObject *)self,
2757 "persistent_id");
2758 if (self->pers_func == NULL)
2759 return -1;
2760 }
2761 self->memo = PyDict_New();
2762 if (self->memo == NULL)
2763 return -1;
2764
2765 return 0;
2766}
2767
2768static PyObject *
2769Pickler_get_memo(PicklerObject *self)
2770{
2771 if (self->memo == NULL)
2772 PyErr_SetString(PyExc_AttributeError, "memo");
2773 else
2774 Py_INCREF(self->memo);
2775 return self->memo;
2776}
2777
2778static int
2779Pickler_set_memo(PicklerObject *self, PyObject *value)
2780{
2781 PyObject *tmp;
2782
2783 if (value == NULL) {
2784 PyErr_SetString(PyExc_TypeError,
2785 "attribute deletion is not supported");
2786 return -1;
2787 }
2788 if (!PyDict_Check(value)) {
2789 PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
2790 return -1;
2791 }
2792
2793 tmp = self->memo;
2794 Py_INCREF(value);
2795 self->memo = value;
2796 Py_XDECREF(tmp);
2797
2798 return 0;
2799}
2800
2801static PyObject *
2802Pickler_get_persid(PicklerObject *self)
2803{
2804 if (self->pers_func == NULL)
2805 PyErr_SetString(PyExc_AttributeError, "persistent_id");
2806 else
2807 Py_INCREF(self->pers_func);
2808 return self->pers_func;
2809}
2810
2811static int
2812Pickler_set_persid(PicklerObject *self, PyObject *value)
2813{
2814 PyObject *tmp;
2815
2816 if (value == NULL) {
2817 PyErr_SetString(PyExc_TypeError,
2818 "attribute deletion is not supported");
2819 return -1;
2820 }
2821 if (!PyCallable_Check(value)) {
2822 PyErr_SetString(PyExc_TypeError,
2823 "persistent_id must be a callable taking one argument");
2824 return -1;
2825 }
2826
2827 tmp = self->pers_func;
2828 Py_INCREF(value);
2829 self->pers_func = value;
2830 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
2831
2832 return 0;
2833}
2834
2835static PyMemberDef Pickler_members[] = {
2836 {"bin", T_INT, offsetof(PicklerObject, bin)},
2837 {"fast", T_INT, offsetof(PicklerObject, fast)},
2838 {NULL}
2839};
2840
2841static PyGetSetDef Pickler_getsets[] = {
2842 {"memo", (getter)Pickler_get_memo,
2843 (setter)Pickler_set_memo},
2844 {"persistent_id", (getter)Pickler_get_persid,
2845 (setter)Pickler_set_persid},
2846 {NULL}
2847};
2848
2849static PyTypeObject Pickler_Type = {
2850 PyVarObject_HEAD_INIT(NULL, 0)
2851 "_pickle.Pickler" , /*tp_name*/
2852 sizeof(PicklerObject), /*tp_basicsize*/
2853 0, /*tp_itemsize*/
2854 (destructor)Pickler_dealloc, /*tp_dealloc*/
2855 0, /*tp_print*/
2856 0, /*tp_getattr*/
2857 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00002858 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002859 0, /*tp_repr*/
2860 0, /*tp_as_number*/
2861 0, /*tp_as_sequence*/
2862 0, /*tp_as_mapping*/
2863 0, /*tp_hash*/
2864 0, /*tp_call*/
2865 0, /*tp_str*/
2866 0, /*tp_getattro*/
2867 0, /*tp_setattro*/
2868 0, /*tp_as_buffer*/
2869 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2870 Pickler_doc, /*tp_doc*/
2871 (traverseproc)Pickler_traverse, /*tp_traverse*/
2872 (inquiry)Pickler_clear, /*tp_clear*/
2873 0, /*tp_richcompare*/
2874 0, /*tp_weaklistoffset*/
2875 0, /*tp_iter*/
2876 0, /*tp_iternext*/
2877 Pickler_methods, /*tp_methods*/
2878 Pickler_members, /*tp_members*/
2879 Pickler_getsets, /*tp_getset*/
2880 0, /*tp_base*/
2881 0, /*tp_dict*/
2882 0, /*tp_descr_get*/
2883 0, /*tp_descr_set*/
2884 0, /*tp_dictoffset*/
2885 (initproc)Pickler_init, /*tp_init*/
2886 PyType_GenericAlloc, /*tp_alloc*/
2887 PyType_GenericNew, /*tp_new*/
2888 PyObject_GC_Del, /*tp_free*/
2889 0, /*tp_is_gc*/
2890};
2891
2892/* Temporary helper for calling self.find_class().
2893
2894 XXX: It would be nice to able to avoid Python function call overhead, by
2895 using directly the C version of find_class(), when find_class() is not
2896 overridden by a subclass. Although, this could become rather hackish. A
2897 simpler optimization would be to call the C function when self is not a
2898 subclass instance. */
2899static PyObject *
2900find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
2901{
2902 return PyObject_CallMethod((PyObject *)self, "find_class", "OO",
2903 module_name, global_name);
2904}
2905
2906static int
2907marker(UnpicklerObject *self)
2908{
2909 if (self->num_marks < 1) {
2910 PyErr_SetString(UnpicklingError, "could not find MARK");
2911 return -1;
2912 }
2913
2914 return self->marks[--self->num_marks];
2915}
2916
2917static int
2918load_none(UnpicklerObject *self)
2919{
2920 PDATA_APPEND(self->stack, Py_None, -1);
2921 return 0;
2922}
2923
2924static int
2925bad_readline(void)
2926{
2927 PyErr_SetString(UnpicklingError, "pickle data was truncated");
2928 return -1;
2929}
2930
2931static int
2932load_int(UnpicklerObject *self)
2933{
2934 PyObject *value;
2935 char *endptr, *s;
2936 Py_ssize_t len;
2937 long x;
2938
2939 if ((len = unpickler_readline(self, &s)) < 0)
2940 return -1;
2941 if (len < 2)
2942 return bad_readline();
2943
2944 errno = 0;
2945 /* XXX: Should the base argument of strtol() be explicitly set to 10? */
2946 x = strtol(s, &endptr, 0);
2947
2948 if (errno || (*endptr != '\n') || (endptr[1] != '\0')) {
2949 /* Hm, maybe we've got something long. Let's try reading
2950 * it as a Python long object. */
2951 errno = 0;
2952 /* XXX: Same thing about the base here. */
2953 value = PyLong_FromString(s, NULL, 0);
2954 if (value == NULL) {
2955 PyErr_SetString(PyExc_ValueError,
2956 "could not convert string to int");
2957 return -1;
2958 }
2959 }
2960 else {
2961 if (len == 3 && (x == 0 || x == 1)) {
2962 if ((value = PyBool_FromLong(x)) == NULL)
2963 return -1;
2964 }
2965 else {
2966 if ((value = PyLong_FromLong(x)) == NULL)
2967 return -1;
2968 }
2969 }
2970
2971 PDATA_PUSH(self->stack, value, -1);
2972 return 0;
2973}
2974
2975static int
2976load_bool(UnpicklerObject *self, PyObject *boolean)
2977{
2978 assert(boolean == Py_True || boolean == Py_False);
2979 PDATA_APPEND(self->stack, boolean, -1);
2980 return 0;
2981}
2982
2983/* s contains x bytes of a little-endian integer. Return its value as a
2984 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
2985 * int, but when x is 4 it's a signed one. This is an historical source
2986 * of x-platform bugs.
2987 */
2988static long
2989calc_binint(char *bytes, int size)
2990{
2991 unsigned char *s = (unsigned char *)bytes;
2992 int i = size;
2993 long x = 0;
2994
2995 for (i = 0; i < size; i++) {
2996 x |= (long)s[i] << (i * 8);
2997 }
2998
2999 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
3000 * is signed, so on a box with longs bigger than 4 bytes we need
3001 * to extend a BININT's sign bit to the full width.
3002 */
3003 if (SIZEOF_LONG > 4 && size == 4) {
3004 x |= -(x & (1L << 31));
3005 }
3006
3007 return x;
3008}
3009
3010static int
3011load_binintx(UnpicklerObject *self, char *s, int size)
3012{
3013 PyObject *value;
3014 long x;
3015
3016 x = calc_binint(s, size);
3017
3018 if ((value = PyLong_FromLong(x)) == NULL)
3019 return -1;
3020
3021 PDATA_PUSH(self->stack, value, -1);
3022 return 0;
3023}
3024
3025static int
3026load_binint(UnpicklerObject *self)
3027{
3028 char *s;
3029
3030 if (unpickler_read(self, &s, 4) < 0)
3031 return -1;
3032
3033 return load_binintx(self, s, 4);
3034}
3035
3036static int
3037load_binint1(UnpicklerObject *self)
3038{
3039 char *s;
3040
3041 if (unpickler_read(self, &s, 1) < 0)
3042 return -1;
3043
3044 return load_binintx(self, s, 1);
3045}
3046
3047static int
3048load_binint2(UnpicklerObject *self)
3049{
3050 char *s;
3051
3052 if (unpickler_read(self, &s, 2) < 0)
3053 return -1;
3054
3055 return load_binintx(self, s, 2);
3056}
3057
3058static int
3059load_long(UnpicklerObject *self)
3060{
3061 PyObject *value;
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00003062 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003063 Py_ssize_t len;
3064
3065 if ((len = unpickler_readline(self, &s)) < 0)
3066 return -1;
3067 if (len < 2)
3068 return bad_readline();
3069
Mark Dickinson8dd05142009-01-20 20:43:58 +00003070 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
3071 the 'L' before calling PyLong_FromString. In order to maintain
3072 compatibility with Python 3.0.0, we don't actually *require*
3073 the 'L' to be present. */
3074 if (s[len-2] == 'L') {
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00003075 s[len-2] = '\0';
Mark Dickinson8dd05142009-01-20 20:43:58 +00003076 }
Alexandre Vassalottie4bccb72009-01-24 01:47:57 +00003077 /* XXX: Should the base argument explicitly set to 10? */
3078 value = PyLong_FromString(s, NULL, 0);
Mark Dickinson8dd05142009-01-20 20:43:58 +00003079 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003080 return -1;
3081
3082 PDATA_PUSH(self->stack, value, -1);
3083 return 0;
3084}
3085
3086/* 'size' bytes contain the # of bytes of little-endian 256's-complement
3087 * data following.
3088 */
3089static int
3090load_counted_long(UnpicklerObject *self, int size)
3091{
3092 PyObject *value;
3093 char *nbytes;
3094 char *pdata;
3095
3096 assert(size == 1 || size == 4);
3097 if (unpickler_read(self, &nbytes, size) < 0)
3098 return -1;
3099
3100 size = calc_binint(nbytes, size);
3101 if (size < 0) {
3102 /* Corrupt or hostile pickle -- we never write one like this */
3103 PyErr_SetString(UnpicklingError,
3104 "LONG pickle has negative byte count");
3105 return -1;
3106 }
3107
3108 if (size == 0)
3109 value = PyLong_FromLong(0L);
3110 else {
3111 /* Read the raw little-endian bytes and convert. */
3112 if (unpickler_read(self, &pdata, size) < 0)
3113 return -1;
3114 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
3115 1 /* little endian */ , 1 /* signed */ );
3116 }
3117 if (value == NULL)
3118 return -1;
3119 PDATA_PUSH(self->stack, value, -1);
3120 return 0;
3121}
3122
3123static int
3124load_float(UnpicklerObject *self)
3125{
3126 PyObject *value;
3127 char *endptr, *s;
3128 Py_ssize_t len;
3129 double d;
3130
3131 if ((len = unpickler_readline(self, &s)) < 0)
3132 return -1;
3133 if (len < 2)
3134 return bad_readline();
3135
3136 errno = 0;
Mark Dickinson725bfd82009-05-03 20:33:40 +00003137 d = PyOS_string_to_double(s, &endptr, PyExc_OverflowError);
3138 if (d == -1.0 && PyErr_Occurred())
3139 return -1;
3140 if ((endptr[0] != '\n') || (endptr[1] != '\0')) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003141 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
3142 return -1;
3143 }
Mark Dickinson725bfd82009-05-03 20:33:40 +00003144 value = PyFloat_FromDouble(d);
3145 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003146 return -1;
3147
3148 PDATA_PUSH(self->stack, value, -1);
3149 return 0;
Mark Dickinson725bfd82009-05-03 20:33:40 +00003150 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003151
3152static int
3153load_binfloat(UnpicklerObject *self)
3154{
3155 PyObject *value;
3156 double x;
3157 char *s;
3158
3159 if (unpickler_read(self, &s, 8) < 0)
3160 return -1;
3161
3162 x = _PyFloat_Unpack8((unsigned char *)s, 0);
3163 if (x == -1.0 && PyErr_Occurred())
3164 return -1;
3165
3166 if ((value = PyFloat_FromDouble(x)) == NULL)
3167 return -1;
3168
3169 PDATA_PUSH(self->stack, value, -1);
3170 return 0;
3171}
3172
3173static int
3174load_string(UnpicklerObject *self)
3175{
3176 PyObject *bytes;
3177 PyObject *str = NULL;
3178 Py_ssize_t len;
3179 char *s, *p;
3180
3181 if ((len = unpickler_readline(self, &s)) < 0)
3182 return -1;
3183 if (len < 3)
3184 return bad_readline();
3185 if ((s = strdup(s)) == NULL) {
3186 PyErr_NoMemory();
3187 return -1;
3188 }
3189
3190 /* Strip outermost quotes */
3191 while (s[len - 1] <= ' ')
3192 len--;
3193 if (s[0] == '"' && s[len - 1] == '"') {
3194 s[len - 1] = '\0';
3195 p = s + 1;
3196 len -= 2;
3197 }
3198 else if (s[0] == '\'' && s[len - 1] == '\'') {
3199 s[len - 1] = '\0';
3200 p = s + 1;
3201 len -= 2;
3202 }
3203 else {
3204 free(s);
3205 PyErr_SetString(PyExc_ValueError, "insecure string pickle");
3206 return -1;
3207 }
3208
3209 /* Use the PyBytes API to decode the string, since that is what is used
3210 to encode, and then coerce the result to Unicode. */
3211 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
3212 free(s);
3213 if (bytes == NULL)
3214 return -1;
3215 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
3216 Py_DECREF(bytes);
3217 if (str == NULL)
3218 return -1;
3219
3220 PDATA_PUSH(self->stack, str, -1);
3221 return 0;
3222}
3223
3224static int
3225load_binbytes(UnpicklerObject *self)
3226{
3227 PyObject *bytes;
3228 long x;
3229 char *s;
3230
3231 if (unpickler_read(self, &s, 4) < 0)
3232 return -1;
3233
3234 x = calc_binint(s, 4);
3235 if (x < 0) {
3236 PyErr_SetString(UnpicklingError,
3237 "BINBYTES pickle has negative byte count");
3238 return -1;
3239 }
3240
3241 if (unpickler_read(self, &s, x) < 0)
3242 return -1;
3243 bytes = PyBytes_FromStringAndSize(s, x);
3244 if (bytes == NULL)
3245 return -1;
3246
3247 PDATA_PUSH(self->stack, bytes, -1);
3248 return 0;
3249}
3250
3251static int
3252load_short_binbytes(UnpicklerObject *self)
3253{
3254 PyObject *bytes;
3255 unsigned char x;
3256 char *s;
3257
3258 if (unpickler_read(self, &s, 1) < 0)
3259 return -1;
3260
3261 x = (unsigned char)s[0];
3262
3263 if (unpickler_read(self, &s, x) < 0)
3264 return -1;
3265
3266 bytes = PyBytes_FromStringAndSize(s, x);
3267 if (bytes == NULL)
3268 return -1;
3269
3270 PDATA_PUSH(self->stack, bytes, -1);
3271 return 0;
3272}
3273
3274static int
3275load_binstring(UnpicklerObject *self)
3276{
3277 PyObject *str;
3278 long x;
3279 char *s;
3280
3281 if (unpickler_read(self, &s, 4) < 0)
3282 return -1;
3283
3284 x = calc_binint(s, 4);
3285 if (x < 0) {
3286 PyErr_SetString(UnpicklingError,
3287 "BINSTRING pickle has negative byte count");
3288 return -1;
3289 }
3290
3291 if (unpickler_read(self, &s, x) < 0)
3292 return -1;
3293
3294 /* Convert Python 2.x strings to unicode. */
3295 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
3296 if (str == NULL)
3297 return -1;
3298
3299 PDATA_PUSH(self->stack, str, -1);
3300 return 0;
3301}
3302
3303static int
3304load_short_binstring(UnpicklerObject *self)
3305{
3306 PyObject *str;
3307 unsigned char x;
3308 char *s;
3309
3310 if (unpickler_read(self, &s, 1) < 0)
3311 return -1;
3312
3313 x = (unsigned char)s[0];
3314
3315 if (unpickler_read(self, &s, x) < 0)
3316 return -1;
3317
3318 /* Convert Python 2.x strings to unicode. */
3319 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
3320 if (str == NULL)
3321 return -1;
3322
3323 PDATA_PUSH(self->stack, str, -1);
3324 return 0;
3325}
3326
3327static int
3328load_unicode(UnpicklerObject *self)
3329{
3330 PyObject *str;
3331 Py_ssize_t len;
3332 char *s;
3333
3334 if ((len = unpickler_readline(self, &s)) < 0)
3335 return -1;
3336 if (len < 1)
3337 return bad_readline();
3338
3339 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
3340 if (str == NULL)
3341 return -1;
3342
3343 PDATA_PUSH(self->stack, str, -1);
3344 return 0;
3345}
3346
3347static int
3348load_binunicode(UnpicklerObject *self)
3349{
3350 PyObject *str;
3351 long size;
3352 char *s;
3353
3354 if (unpickler_read(self, &s, 4) < 0)
3355 return -1;
3356
3357 size = calc_binint(s, 4);
3358 if (size < 0) {
3359 PyErr_SetString(UnpicklingError,
3360 "BINUNICODE pickle has negative byte count");
3361 return -1;
3362 }
3363
3364 if (unpickler_read(self, &s, size) < 0)
3365 return -1;
3366
Victor Stinnerf7351b42010-04-13 11:09:22 +00003367 str = PyUnicode_DecodeUTF8(s, size, "surrogatepass");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003368 if (str == NULL)
3369 return -1;
3370
3371 PDATA_PUSH(self->stack, str, -1);
3372 return 0;
3373}
3374
3375static int
3376load_tuple(UnpicklerObject *self)
3377{
3378 PyObject *tuple;
3379 int i;
3380
3381 if ((i = marker(self)) < 0)
3382 return -1;
3383
3384 tuple = Pdata_poptuple(self->stack, i);
3385 if (tuple == NULL)
3386 return -1;
3387 PDATA_PUSH(self->stack, tuple, -1);
3388 return 0;
3389}
3390
3391static int
3392load_counted_tuple(UnpicklerObject *self, int len)
3393{
3394 PyObject *tuple;
3395
3396 tuple = PyTuple_New(len);
3397 if (tuple == NULL)
3398 return -1;
3399
3400 while (--len >= 0) {
3401 PyObject *item;
3402
3403 PDATA_POP(self->stack, item);
3404 if (item == NULL)
3405 return -1;
3406 PyTuple_SET_ITEM(tuple, len, item);
3407 }
3408 PDATA_PUSH(self->stack, tuple, -1);
3409 return 0;
3410}
3411
3412static int
3413load_empty_list(UnpicklerObject *self)
3414{
3415 PyObject *list;
3416
3417 if ((list = PyList_New(0)) == NULL)
3418 return -1;
3419 PDATA_PUSH(self->stack, list, -1);
3420 return 0;
3421}
3422
3423static int
3424load_empty_dict(UnpicklerObject *self)
3425{
3426 PyObject *dict;
3427
3428 if ((dict = PyDict_New()) == NULL)
3429 return -1;
3430 PDATA_PUSH(self->stack, dict, -1);
3431 return 0;
3432}
3433
3434static int
3435load_list(UnpicklerObject *self)
3436{
3437 PyObject *list;
3438 int i;
3439
3440 if ((i = marker(self)) < 0)
3441 return -1;
3442
3443 list = Pdata_poplist(self->stack, i);
3444 if (list == NULL)
3445 return -1;
3446 PDATA_PUSH(self->stack, list, -1);
3447 return 0;
3448}
3449
3450static int
3451load_dict(UnpicklerObject *self)
3452{
3453 PyObject *dict, *key, *value;
3454 int i, j, k;
3455
3456 if ((i = marker(self)) < 0)
3457 return -1;
3458 j = self->stack->length;
3459
3460 if ((dict = PyDict_New()) == NULL)
3461 return -1;
3462
3463 for (k = i + 1; k < j; k += 2) {
3464 key = self->stack->data[k - 1];
3465 value = self->stack->data[k];
3466 if (PyDict_SetItem(dict, key, value) < 0) {
3467 Py_DECREF(dict);
3468 return -1;
3469 }
3470 }
3471 Pdata_clear(self->stack, i);
3472 PDATA_PUSH(self->stack, dict, -1);
3473 return 0;
3474}
3475
3476static PyObject *
3477instantiate(PyObject *cls, PyObject *args)
3478{
Alexander Belopolsky82a6bf02010-07-17 23:01:39 +00003479 PyObject *result = NULL;
3480 /* Caller must assure args are a tuple. Normally, args come from
3481 Pdata_poptuple which packs objects from the top of the stack
3482 into a newly created tuple. */
3483 assert(PyTuple_Check(args));
3484 if (Py_SIZE(args) > 0 || !PyType_Check(cls) ||
3485 PyObject_HasAttrString(cls, "__getinitargs__")) {
3486 result = PyObject_CallObject(cls, args);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003487 }
Alexander Belopolsky82a6bf02010-07-17 23:01:39 +00003488 else {
3489 result = PyObject_CallMethod(cls, "__new__", "O", cls);
3490 }
3491 return result;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003492}
3493
3494static int
3495load_obj(UnpicklerObject *self)
3496{
3497 PyObject *cls, *args, *obj = NULL;
3498 int i;
3499
3500 if ((i = marker(self)) < 0)
3501 return -1;
3502
3503 args = Pdata_poptuple(self->stack, i + 1);
3504 if (args == NULL)
3505 return -1;
3506
3507 PDATA_POP(self->stack, cls);
3508 if (cls) {
3509 obj = instantiate(cls, args);
3510 Py_DECREF(cls);
3511 }
3512 Py_DECREF(args);
3513 if (obj == NULL)
3514 return -1;
3515
3516 PDATA_PUSH(self->stack, obj, -1);
3517 return 0;
3518}
3519
3520static int
3521load_inst(UnpicklerObject *self)
3522{
3523 PyObject *cls = NULL;
3524 PyObject *args = NULL;
3525 PyObject *obj = NULL;
3526 PyObject *module_name;
3527 PyObject *class_name;
3528 Py_ssize_t len;
3529 int i;
3530 char *s;
3531
3532 if ((i = marker(self)) < 0)
3533 return -1;
3534 if ((len = unpickler_readline(self, &s)) < 0)
3535 return -1;
3536 if (len < 2)
3537 return bad_readline();
3538
3539 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
3540 identifiers are permitted in Python 3.0, since the INST opcode is only
3541 supported by older protocols on Python 2.x. */
3542 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
3543 if (module_name == NULL)
3544 return -1;
3545
3546 if ((len = unpickler_readline(self, &s)) >= 0) {
3547 if (len < 2)
3548 return bad_readline();
3549 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
Alexander Belopolsky82a6bf02010-07-17 23:01:39 +00003550 if (class_name != NULL) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003551 cls = find_class(self, module_name, class_name);
3552 Py_DECREF(class_name);
3553 }
3554 }
3555 Py_DECREF(module_name);
3556
3557 if (cls == NULL)
3558 return -1;
3559
3560 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
3561 obj = instantiate(cls, args);
3562 Py_DECREF(args);
3563 }
3564 Py_DECREF(cls);
3565
3566 if (obj == NULL)
3567 return -1;
3568
3569 PDATA_PUSH(self->stack, obj, -1);
3570 return 0;
3571}
3572
3573static int
3574load_newobj(UnpicklerObject *self)
3575{
3576 PyObject *args = NULL;
3577 PyObject *clsraw = NULL;
3578 PyTypeObject *cls; /* clsraw cast to its true type */
3579 PyObject *obj;
3580
3581 /* Stack is ... cls argtuple, and we want to call
3582 * cls.__new__(cls, *argtuple).
3583 */
3584 PDATA_POP(self->stack, args);
3585 if (args == NULL)
3586 goto error;
3587 if (!PyTuple_Check(args)) {
3588 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
3589 goto error;
3590 }
3591
3592 PDATA_POP(self->stack, clsraw);
3593 cls = (PyTypeObject *)clsraw;
3594 if (cls == NULL)
3595 goto error;
3596 if (!PyType_Check(cls)) {
3597 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
3598 "isn't a type object");
3599 goto error;
3600 }
3601 if (cls->tp_new == NULL) {
3602 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
3603 "has NULL tp_new");
3604 goto error;
3605 }
3606
3607 /* Call __new__. */
3608 obj = cls->tp_new(cls, args, NULL);
3609 if (obj == NULL)
3610 goto error;
3611
3612 Py_DECREF(args);
3613 Py_DECREF(clsraw);
3614 PDATA_PUSH(self->stack, obj, -1);
3615 return 0;
3616
3617 error:
3618 Py_XDECREF(args);
3619 Py_XDECREF(clsraw);
3620 return -1;
3621}
3622
3623static int
3624load_global(UnpicklerObject *self)
3625{
3626 PyObject *global = NULL;
3627 PyObject *module_name;
3628 PyObject *global_name;
3629 Py_ssize_t len;
3630 char *s;
3631
3632 if ((len = unpickler_readline(self, &s)) < 0)
3633 return -1;
3634 if (len < 2)
3635 return bad_readline();
3636 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
3637 if (!module_name)
3638 return -1;
3639
3640 if ((len = unpickler_readline(self, &s)) >= 0) {
3641 if (len < 2) {
3642 Py_DECREF(module_name);
3643 return bad_readline();
3644 }
3645 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
3646 if (global_name) {
3647 global = find_class(self, module_name, global_name);
3648 Py_DECREF(global_name);
3649 }
3650 }
3651 Py_DECREF(module_name);
3652
3653 if (global == NULL)
3654 return -1;
3655 PDATA_PUSH(self->stack, global, -1);
3656 return 0;
3657}
3658
3659static int
3660load_persid(UnpicklerObject *self)
3661{
3662 PyObject *pid;
3663 Py_ssize_t len;
3664 char *s;
3665
3666 if (self->pers_func) {
3667 if ((len = unpickler_readline(self, &s)) < 0)
3668 return -1;
3669 if (len < 2)
3670 return bad_readline();
3671
3672 pid = PyBytes_FromStringAndSize(s, len - 1);
3673 if (pid == NULL)
3674 return -1;
3675
3676 /* Ugh... this does not leak since unpickler_call() steals the
3677 reference to pid first. */
3678 pid = unpickler_call(self, self->pers_func, pid);
3679 if (pid == NULL)
3680 return -1;
3681
3682 PDATA_PUSH(self->stack, pid, -1);
3683 return 0;
3684 }
3685 else {
3686 PyErr_SetString(UnpicklingError,
3687 "A load persistent id instruction was encountered,\n"
3688 "but no persistent_load function was specified.");
3689 return -1;
3690 }
3691}
3692
3693static int
3694load_binpersid(UnpicklerObject *self)
3695{
3696 PyObject *pid;
3697
3698 if (self->pers_func) {
3699 PDATA_POP(self->stack, pid);
3700 if (pid == NULL)
3701 return -1;
3702
3703 /* Ugh... this does not leak since unpickler_call() steals the
3704 reference to pid first. */
3705 pid = unpickler_call(self, self->pers_func, pid);
3706 if (pid == NULL)
3707 return -1;
3708
3709 PDATA_PUSH(self->stack, pid, -1);
3710 return 0;
3711 }
3712 else {
3713 PyErr_SetString(UnpicklingError,
3714 "A load persistent id instruction was encountered,\n"
3715 "but no persistent_load function was specified.");
3716 return -1;
3717 }
3718}
3719
3720static int
3721load_pop(UnpicklerObject *self)
3722{
Collin Winter8ca69de2009-05-26 16:53:41 +00003723 int len = self->stack->length;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003724
3725 /* Note that we split the (pickle.py) stack into two stacks,
3726 * an object stack and a mark stack. We have to be clever and
3727 * pop the right one. We do this by looking at the top of the
Collin Winter8ca69de2009-05-26 16:53:41 +00003728 * mark stack first, and only signalling a stack underflow if
3729 * the object stack is empty and the mark stack doesn't match
3730 * our expectations.
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003731 */
Collin Winter8ca69de2009-05-26 16:53:41 +00003732 if (self->num_marks > 0 && self->marks[self->num_marks - 1] == len) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003733 self->num_marks--;
Antoine Pitrou901d81e2010-01-07 18:02:53 +00003734 } else if (len > 0) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003735 len--;
3736 Py_DECREF(self->stack->data[len]);
3737 self->stack->length = len;
Collin Winter8ca69de2009-05-26 16:53:41 +00003738 } else {
3739 return stack_underflow();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003740 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003741 return 0;
3742}
3743
3744static int
3745load_pop_mark(UnpicklerObject *self)
3746{
3747 int i;
3748
3749 if ((i = marker(self)) < 0)
3750 return -1;
3751
3752 Pdata_clear(self->stack, i);
3753
3754 return 0;
3755}
3756
3757static int
3758load_dup(UnpicklerObject *self)
3759{
3760 PyObject *last;
3761 int len;
3762
3763 if ((len = self->stack->length) <= 0)
3764 return stack_underflow();
3765 last = self->stack->data[len - 1];
3766 PDATA_APPEND(self->stack, last, -1);
3767 return 0;
3768}
3769
3770static int
3771load_get(UnpicklerObject *self)
3772{
3773 PyObject *key, *value;
3774 Py_ssize_t len;
3775 char *s;
3776
3777 if ((len = unpickler_readline(self, &s)) < 0)
3778 return -1;
3779 if (len < 2)
3780 return bad_readline();
3781
3782 key = PyLong_FromString(s, NULL, 10);
3783 if (key == NULL)
3784 return -1;
3785
3786 value = PyDict_GetItemWithError(self->memo, key);
3787 if (value == NULL) {
3788 if (!PyErr_Occurred())
3789 PyErr_SetObject(PyExc_KeyError, key);
3790 Py_DECREF(key);
3791 return -1;
3792 }
3793 Py_DECREF(key);
3794
3795 PDATA_APPEND(self->stack, value, -1);
3796 return 0;
3797}
3798
3799static int
3800load_binget(UnpicklerObject *self)
3801{
3802 PyObject *key, *value;
3803 char *s;
3804
3805 if (unpickler_read(self, &s, 1) < 0)
3806 return -1;
3807
3808 /* Here, the unsigned cast is necessary to avoid negative values. */
3809 key = PyLong_FromLong((long)(unsigned char)s[0]);
3810 if (key == NULL)
3811 return -1;
3812
3813 value = PyDict_GetItemWithError(self->memo, key);
3814 if (value == NULL) {
3815 if (!PyErr_Occurred())
3816 PyErr_SetObject(PyExc_KeyError, key);
3817 Py_DECREF(key);
3818 return -1;
3819 }
3820 Py_DECREF(key);
3821
3822 PDATA_APPEND(self->stack, value, -1);
3823 return 0;
3824}
3825
3826static int
3827load_long_binget(UnpicklerObject *self)
3828{
3829 PyObject *key, *value;
3830 char *s;
3831 long k;
3832
3833 if (unpickler_read(self, &s, 4) < 0)
3834 return -1;
3835
3836 k = (long)(unsigned char)s[0];
3837 k |= (long)(unsigned char)s[1] << 8;
3838 k |= (long)(unsigned char)s[2] << 16;
3839 k |= (long)(unsigned char)s[3] << 24;
3840
3841 key = PyLong_FromLong(k);
3842 if (key == NULL)
3843 return -1;
3844
3845 value = PyDict_GetItemWithError(self->memo, key);
3846 if (value == NULL) {
3847 if (!PyErr_Occurred())
3848 PyErr_SetObject(PyExc_KeyError, key);
3849 Py_DECREF(key);
3850 return -1;
3851 }
3852 Py_DECREF(key);
3853
3854 PDATA_APPEND(self->stack, value, -1);
3855 return 0;
3856}
3857
3858/* Push an object from the extension registry (EXT[124]). nbytes is
3859 * the number of bytes following the opcode, holding the index (code) value.
3860 */
3861static int
3862load_extension(UnpicklerObject *self, int nbytes)
3863{
3864 char *codebytes; /* the nbytes bytes after the opcode */
3865 long code; /* calc_binint returns long */
3866 PyObject *py_code; /* code as a Python int */
3867 PyObject *obj; /* the object to push */
3868 PyObject *pair; /* (module_name, class_name) */
3869 PyObject *module_name, *class_name;
3870
3871 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
3872 if (unpickler_read(self, &codebytes, nbytes) < 0)
3873 return -1;
3874 code = calc_binint(codebytes, nbytes);
3875 if (code <= 0) { /* note that 0 is forbidden */
3876 /* Corrupt or hostile pickle. */
3877 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
3878 return -1;
3879 }
3880
3881 /* Look for the code in the cache. */
3882 py_code = PyLong_FromLong(code);
3883 if (py_code == NULL)
3884 return -1;
3885 obj = PyDict_GetItem(extension_cache, py_code);
3886 if (obj != NULL) {
3887 /* Bingo. */
3888 Py_DECREF(py_code);
3889 PDATA_APPEND(self->stack, obj, -1);
3890 return 0;
3891 }
3892
3893 /* Look up the (module_name, class_name) pair. */
3894 pair = PyDict_GetItem(inverted_registry, py_code);
3895 if (pair == NULL) {
3896 Py_DECREF(py_code);
3897 PyErr_Format(PyExc_ValueError, "unregistered extension "
3898 "code %ld", code);
3899 return -1;
3900 }
3901 /* Since the extension registry is manipulable via Python code,
3902 * confirm that pair is really a 2-tuple of strings.
3903 */
3904 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
3905 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
3906 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
3907 Py_DECREF(py_code);
3908 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
3909 "isn't a 2-tuple of strings", code);
3910 return -1;
3911 }
3912 /* Load the object. */
3913 obj = find_class(self, module_name, class_name);
3914 if (obj == NULL) {
3915 Py_DECREF(py_code);
3916 return -1;
3917 }
3918 /* Cache code -> obj. */
3919 code = PyDict_SetItem(extension_cache, py_code, obj);
3920 Py_DECREF(py_code);
3921 if (code < 0) {
3922 Py_DECREF(obj);
3923 return -1;
3924 }
3925 PDATA_PUSH(self->stack, obj, -1);
3926 return 0;
3927}
3928
3929static int
3930load_put(UnpicklerObject *self)
3931{
3932 PyObject *key, *value;
3933 Py_ssize_t len;
3934 char *s;
3935 int x;
3936
3937 if ((len = unpickler_readline(self, &s)) < 0)
3938 return -1;
3939 if (len < 2)
3940 return bad_readline();
3941 if ((x = self->stack->length) <= 0)
3942 return stack_underflow();
3943
3944 key = PyLong_FromString(s, NULL, 10);
3945 if (key == NULL)
3946 return -1;
3947 value = self->stack->data[x - 1];
3948
3949 x = PyDict_SetItem(self->memo, key, value);
3950 Py_DECREF(key);
3951 return x;
3952}
3953
3954static int
3955load_binput(UnpicklerObject *self)
3956{
3957 PyObject *key, *value;
3958 char *s;
3959 int x;
3960
3961 if (unpickler_read(self, &s, 1) < 0)
3962 return -1;
3963 if ((x = self->stack->length) <= 0)
3964 return stack_underflow();
3965
3966 key = PyLong_FromLong((long)(unsigned char)s[0]);
3967 if (key == NULL)
3968 return -1;
3969 value = self->stack->data[x - 1];
3970
3971 x = PyDict_SetItem(self->memo, key, value);
3972 Py_DECREF(key);
3973 return x;
3974}
3975
3976static int
3977load_long_binput(UnpicklerObject *self)
3978{
3979 PyObject *key, *value;
3980 long k;
3981 char *s;
3982 int x;
3983
3984 if (unpickler_read(self, &s, 4) < 0)
3985 return -1;
3986 if ((x = self->stack->length) <= 0)
3987 return stack_underflow();
3988
3989 k = (long)(unsigned char)s[0];
3990 k |= (long)(unsigned char)s[1] << 8;
3991 k |= (long)(unsigned char)s[2] << 16;
3992 k |= (long)(unsigned char)s[3] << 24;
3993
3994 key = PyLong_FromLong(k);
3995 if (key == NULL)
3996 return -1;
3997 value = self->stack->data[x - 1];
3998
3999 x = PyDict_SetItem(self->memo, key, value);
4000 Py_DECREF(key);
4001 return x;
4002}
4003
4004static int
4005do_append(UnpicklerObject *self, int x)
4006{
4007 PyObject *value;
4008 PyObject *list;
4009 int len, i;
4010
4011 len = self->stack->length;
4012 if (x > len || x <= 0)
4013 return stack_underflow();
4014 if (len == x) /* nothing to do */
4015 return 0;
4016
4017 list = self->stack->data[x - 1];
4018
4019 if (PyList_Check(list)) {
4020 PyObject *slice;
4021 Py_ssize_t list_len;
4022
4023 slice = Pdata_poplist(self->stack, x);
4024 if (!slice)
4025 return -1;
4026 list_len = PyList_GET_SIZE(list);
4027 i = PyList_SetSlice(list, list_len, list_len, slice);
4028 Py_DECREF(slice);
4029 return i;
4030 }
4031 else {
4032 PyObject *append_func;
4033
4034 append_func = PyObject_GetAttrString(list, "append");
4035 if (append_func == NULL)
4036 return -1;
4037 for (i = x; i < len; i++) {
4038 PyObject *result;
4039
4040 value = self->stack->data[i];
4041 result = unpickler_call(self, append_func, value);
4042 if (result == NULL) {
4043 Pdata_clear(self->stack, i + 1);
4044 self->stack->length = x;
4045 return -1;
4046 }
4047 Py_DECREF(result);
4048 }
4049 self->stack->length = x;
4050 }
4051
4052 return 0;
4053}
4054
4055static int
4056load_append(UnpicklerObject *self)
4057{
4058 return do_append(self, self->stack->length - 1);
4059}
4060
4061static int
4062load_appends(UnpicklerObject *self)
4063{
4064 return do_append(self, marker(self));
4065}
4066
4067static int
4068do_setitems(UnpicklerObject *self, int x)
4069{
4070 PyObject *value, *key;
4071 PyObject *dict;
4072 int len, i;
4073 int status = 0;
4074
4075 len = self->stack->length;
4076 if (x > len || x <= 0)
4077 return stack_underflow();
4078 if (len == x) /* nothing to do */
4079 return 0;
4080 if ((len - x) % 2 != 0) {
4081 /* Currupt or hostile pickle -- we never write one like this. */
4082 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
4083 return -1;
4084 }
4085
4086 /* Here, dict does not actually need to be a PyDict; it could be anything
4087 that supports the __setitem__ attribute. */
4088 dict = self->stack->data[x - 1];
4089
4090 for (i = x + 1; i < len; i += 2) {
4091 key = self->stack->data[i - 1];
4092 value = self->stack->data[i];
4093 if (PyObject_SetItem(dict, key, value) < 0) {
4094 status = -1;
4095 break;
4096 }
4097 }
4098
4099 Pdata_clear(self->stack, x);
4100 return status;
4101}
4102
4103static int
4104load_setitem(UnpicklerObject *self)
4105{
4106 return do_setitems(self, self->stack->length - 2);
4107}
4108
4109static int
4110load_setitems(UnpicklerObject *self)
4111{
4112 return do_setitems(self, marker(self));
4113}
4114
4115static int
4116load_build(UnpicklerObject *self)
4117{
4118 PyObject *state, *inst, *slotstate;
4119 PyObject *setstate;
4120 int status = 0;
4121
4122 /* Stack is ... instance, state. We want to leave instance at
4123 * the stack top, possibly mutated via instance.__setstate__(state).
4124 */
4125 if (self->stack->length < 2)
4126 return stack_underflow();
4127
4128 PDATA_POP(self->stack, state);
4129 if (state == NULL)
4130 return -1;
4131
4132 inst = self->stack->data[self->stack->length - 1];
4133
4134 setstate = PyObject_GetAttrString(inst, "__setstate__");
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00004135 if (setstate == NULL) {
4136 if (PyErr_ExceptionMatches(PyExc_AttributeError))
4137 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00004138 else {
4139 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00004140 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00004141 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004142 }
4143 else {
4144 PyObject *result;
4145
4146 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitroud79dc622008-09-05 00:03:33 +00004147 /* Ugh... this does not leak since unpickler_call() steals the
4148 reference to state first. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004149 result = unpickler_call(self, setstate, state);
4150 Py_DECREF(setstate);
4151 if (result == NULL)
4152 return -1;
4153 Py_DECREF(result);
4154 return 0;
4155 }
4156
4157 /* A default __setstate__. First see whether state embeds a
4158 * slot state dict too (a proto 2 addition).
4159 */
4160 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
4161 PyObject *tmp = state;
4162
4163 state = PyTuple_GET_ITEM(tmp, 0);
4164 slotstate = PyTuple_GET_ITEM(tmp, 1);
4165 Py_INCREF(state);
4166 Py_INCREF(slotstate);
4167 Py_DECREF(tmp);
4168 }
4169 else
4170 slotstate = NULL;
4171
4172 /* Set inst.__dict__ from the state dict (if any). */
4173 if (state != Py_None) {
4174 PyObject *dict;
Antoine Pitroua9f48a02009-05-02 21:41:14 +00004175 PyObject *d_key, *d_value;
4176 Py_ssize_t i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004177
4178 if (!PyDict_Check(state)) {
4179 PyErr_SetString(UnpicklingError, "state is not a dictionary");
4180 goto error;
4181 }
4182 dict = PyObject_GetAttrString(inst, "__dict__");
4183 if (dict == NULL)
4184 goto error;
4185
Antoine Pitroua9f48a02009-05-02 21:41:14 +00004186 i = 0;
4187 while (PyDict_Next(state, &i, &d_key, &d_value)) {
4188 /* normally the keys for instance attributes are
4189 interned. we should try to do that here. */
4190 Py_INCREF(d_key);
4191 if (PyUnicode_CheckExact(d_key))
4192 PyUnicode_InternInPlace(&d_key);
4193 if (PyObject_SetItem(dict, d_key, d_value) < 0) {
4194 Py_DECREF(d_key);
4195 goto error;
4196 }
4197 Py_DECREF(d_key);
4198 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004199 Py_DECREF(dict);
4200 }
4201
4202 /* Also set instance attributes from the slotstate dict (if any). */
4203 if (slotstate != NULL) {
4204 PyObject *d_key, *d_value;
4205 Py_ssize_t i;
4206
4207 if (!PyDict_Check(slotstate)) {
4208 PyErr_SetString(UnpicklingError,
4209 "slot state is not a dictionary");
4210 goto error;
4211 }
4212 i = 0;
4213 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
4214 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
4215 goto error;
4216 }
4217 }
4218
4219 if (0) {
4220 error:
4221 status = -1;
4222 }
4223
4224 Py_DECREF(state);
4225 Py_XDECREF(slotstate);
4226 return status;
4227}
4228
4229static int
4230load_mark(UnpicklerObject *self)
4231{
4232
4233 /* Note that we split the (pickle.py) stack into two stacks, an
4234 * object stack and a mark stack. Here we push a mark onto the
4235 * mark stack.
4236 */
4237
4238 if ((self->num_marks + 1) >= self->marks_size) {
4239 size_t alloc;
4240 int *marks;
4241
4242 /* Use the size_t type to check for overflow. */
4243 alloc = ((size_t)self->num_marks << 1) + 20;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00004244 if (alloc > PY_SSIZE_T_MAX ||
4245 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004246 PyErr_NoMemory();
4247 return -1;
4248 }
4249
4250 if (self->marks == NULL)
4251 marks = (int *)PyMem_Malloc(alloc * sizeof(int));
4252 else
4253 marks = (int *)PyMem_Realloc(self->marks, alloc * sizeof(int));
4254 if (marks == NULL) {
4255 PyErr_NoMemory();
4256 return -1;
4257 }
4258 self->marks = marks;
4259 self->marks_size = (Py_ssize_t)alloc;
4260 }
4261
4262 self->marks[self->num_marks++] = self->stack->length;
4263
4264 return 0;
4265}
4266
4267static int
4268load_reduce(UnpicklerObject *self)
4269{
4270 PyObject *callable = NULL;
4271 PyObject *argtup = NULL;
4272 PyObject *obj = NULL;
4273
4274 PDATA_POP(self->stack, argtup);
4275 if (argtup == NULL)
4276 return -1;
4277 PDATA_POP(self->stack, callable);
4278 if (callable) {
Alexander Belopolsky82a6bf02010-07-17 23:01:39 +00004279 obj = PyObject_CallObject(callable, argtup);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004280 Py_DECREF(callable);
4281 }
4282 Py_DECREF(argtup);
4283
4284 if (obj == NULL)
4285 return -1;
4286
4287 PDATA_PUSH(self->stack, obj, -1);
4288 return 0;
4289}
4290
4291/* Just raises an error if we don't know the protocol specified. PROTO
4292 * is the first opcode for protocols >= 2.
4293 */
4294static int
4295load_proto(UnpicklerObject *self)
4296{
4297 char *s;
4298 int i;
4299
4300 if (unpickler_read(self, &s, 1) < 0)
4301 return -1;
4302
4303 i = (unsigned char)s[0];
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004304 if (i <= HIGHEST_PROTOCOL) {
4305 self->proto = i;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004306 return 0;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004307 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004308
4309 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
4310 return -1;
4311}
4312
4313static PyObject *
4314load(UnpicklerObject *self)
4315{
4316 PyObject *err;
4317 PyObject *value = NULL;
4318 char *s;
4319
4320 self->num_marks = 0;
4321 if (self->stack->length)
4322 Pdata_clear(self->stack, 0);
4323
4324 /* Convenient macros for the dispatch while-switch loop just below. */
4325#define OP(opcode, load_func) \
4326 case opcode: if (load_func(self) < 0) break; continue;
4327
4328#define OP_ARG(opcode, load_func, arg) \
4329 case opcode: if (load_func(self, (arg)) < 0) break; continue;
4330
4331 while (1) {
4332 if (unpickler_read(self, &s, 1) < 0)
4333 break;
4334
4335 switch ((enum opcode)s[0]) {
4336 OP(NONE, load_none)
4337 OP(BININT, load_binint)
4338 OP(BININT1, load_binint1)
4339 OP(BININT2, load_binint2)
4340 OP(INT, load_int)
4341 OP(LONG, load_long)
4342 OP_ARG(LONG1, load_counted_long, 1)
4343 OP_ARG(LONG4, load_counted_long, 4)
4344 OP(FLOAT, load_float)
4345 OP(BINFLOAT, load_binfloat)
4346 OP(BINBYTES, load_binbytes)
4347 OP(SHORT_BINBYTES, load_short_binbytes)
4348 OP(BINSTRING, load_binstring)
4349 OP(SHORT_BINSTRING, load_short_binstring)
4350 OP(STRING, load_string)
4351 OP(UNICODE, load_unicode)
4352 OP(BINUNICODE, load_binunicode)
4353 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
4354 OP_ARG(TUPLE1, load_counted_tuple, 1)
4355 OP_ARG(TUPLE2, load_counted_tuple, 2)
4356 OP_ARG(TUPLE3, load_counted_tuple, 3)
4357 OP(TUPLE, load_tuple)
4358 OP(EMPTY_LIST, load_empty_list)
4359 OP(LIST, load_list)
4360 OP(EMPTY_DICT, load_empty_dict)
4361 OP(DICT, load_dict)
4362 OP(OBJ, load_obj)
4363 OP(INST, load_inst)
4364 OP(NEWOBJ, load_newobj)
4365 OP(GLOBAL, load_global)
4366 OP(APPEND, load_append)
4367 OP(APPENDS, load_appends)
4368 OP(BUILD, load_build)
4369 OP(DUP, load_dup)
4370 OP(BINGET, load_binget)
4371 OP(LONG_BINGET, load_long_binget)
4372 OP(GET, load_get)
4373 OP(MARK, load_mark)
4374 OP(BINPUT, load_binput)
4375 OP(LONG_BINPUT, load_long_binput)
4376 OP(PUT, load_put)
4377 OP(POP, load_pop)
4378 OP(POP_MARK, load_pop_mark)
4379 OP(SETITEM, load_setitem)
4380 OP(SETITEMS, load_setitems)
4381 OP(PERSID, load_persid)
4382 OP(BINPERSID, load_binpersid)
4383 OP(REDUCE, load_reduce)
4384 OP(PROTO, load_proto)
4385 OP_ARG(EXT1, load_extension, 1)
4386 OP_ARG(EXT2, load_extension, 2)
4387 OP_ARG(EXT4, load_extension, 4)
4388 OP_ARG(NEWTRUE, load_bool, Py_True)
4389 OP_ARG(NEWFALSE, load_bool, Py_False)
4390
4391 case STOP:
4392 break;
4393
4394 case '\0':
4395 PyErr_SetNone(PyExc_EOFError);
4396 return NULL;
4397
4398 default:
4399 PyErr_Format(UnpicklingError,
4400 "invalid load key, '%c'.", s[0]);
4401 return NULL;
4402 }
4403
4404 break; /* and we are done! */
4405 }
4406
4407 /* XXX: It is not clear what this is actually for. */
4408 if ((err = PyErr_Occurred())) {
4409 if (err == PyExc_EOFError) {
4410 PyErr_SetNone(PyExc_EOFError);
4411 }
4412 return NULL;
4413 }
4414
4415 PDATA_POP(self->stack, value);
4416 return value;
4417}
4418
4419PyDoc_STRVAR(Unpickler_load_doc,
4420"load() -> object. Load a pickle."
4421"\n"
4422"Read a pickled object representation from the open file object given in\n"
4423"the constructor, and return the reconstituted object hierarchy specified\n"
4424"therein.\n");
4425
4426static PyObject *
4427Unpickler_load(UnpicklerObject *self)
4428{
4429 /* Check whether the Unpickler was initialized correctly. This prevents
4430 segfaulting if a subclass overridden __init__ with a function that does
4431 not call Unpickler.__init__(). Here, we simply ensure that self->read
4432 is not NULL. */
4433 if (self->read == NULL) {
4434 PyErr_Format(UnpicklingError,
4435 "Unpickler.__init__() was not called by %s.__init__()",
4436 Py_TYPE(self)->tp_name);
4437 return NULL;
4438 }
4439
4440 return load(self);
4441}
4442
4443/* The name of find_class() is misleading. In newer pickle protocols, this
4444 function is used for loading any global (i.e., functions), not just
4445 classes. The name is kept only for backward compatibility. */
4446
4447PyDoc_STRVAR(Unpickler_find_class_doc,
4448"find_class(module_name, global_name) -> object.\n"
4449"\n"
4450"Return an object from a specified module, importing the module if\n"
4451"necessary. Subclasses may override this method (e.g. to restrict\n"
4452"unpickling of arbitrary classes and functions).\n"
4453"\n"
4454"This method is called whenever a class or a function object is\n"
4455"needed. Both arguments passed are str objects.\n");
4456
4457static PyObject *
4458Unpickler_find_class(UnpicklerObject *self, PyObject *args)
4459{
4460 PyObject *global;
4461 PyObject *modules_dict;
4462 PyObject *module;
4463 PyObject *module_name, *global_name;
4464
4465 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
4466 &module_name, &global_name))
4467 return NULL;
4468
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004469 /* Try to map the old names used in Python 2.x to the new ones used in
4470 Python 3.x. We do this only with old pickle protocols and when the
4471 user has not disabled the feature. */
4472 if (self->proto < 3 && self->fix_imports) {
4473 PyObject *key;
4474 PyObject *item;
4475
4476 /* Check if the global (i.e., a function or a class) was renamed
4477 or moved to another module. */
4478 key = PyTuple_Pack(2, module_name, global_name);
4479 if (key == NULL)
4480 return NULL;
4481 item = PyDict_GetItemWithError(name_mapping_2to3, key);
4482 Py_DECREF(key);
4483 if (item) {
4484 if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
4485 PyErr_Format(PyExc_RuntimeError,
4486 "_compat_pickle.NAME_MAPPING values should be "
4487 "2-tuples, not %.200s", Py_TYPE(item)->tp_name);
4488 return NULL;
4489 }
4490 module_name = PyTuple_GET_ITEM(item, 0);
4491 global_name = PyTuple_GET_ITEM(item, 1);
4492 if (!PyUnicode_Check(module_name) ||
4493 !PyUnicode_Check(global_name)) {
4494 PyErr_Format(PyExc_RuntimeError,
4495 "_compat_pickle.NAME_MAPPING values should be "
4496 "pairs of str, not (%.200s, %.200s)",
4497 Py_TYPE(module_name)->tp_name,
4498 Py_TYPE(global_name)->tp_name);
4499 return NULL;
4500 }
4501 }
4502 else if (PyErr_Occurred()) {
4503 return NULL;
4504 }
4505
4506 /* Check if the module was renamed. */
4507 item = PyDict_GetItemWithError(import_mapping_2to3, module_name);
4508 if (item) {
4509 if (!PyUnicode_Check(item)) {
4510 PyErr_Format(PyExc_RuntimeError,
4511 "_compat_pickle.IMPORT_MAPPING values should be "
4512 "strings, not %.200s", Py_TYPE(item)->tp_name);
4513 return NULL;
4514 }
4515 module_name = item;
4516 }
4517 else if (PyErr_Occurred()) {
4518 return NULL;
4519 }
4520 }
4521
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004522 modules_dict = PySys_GetObject("modules");
4523 if (modules_dict == NULL)
4524 return NULL;
4525
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004526 module = PyDict_GetItemWithError(modules_dict, module_name);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004527 if (module == NULL) {
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004528 if (PyErr_Occurred())
4529 return NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004530 module = PyImport_Import(module_name);
4531 if (module == NULL)
4532 return NULL;
4533 global = PyObject_GetAttr(module, global_name);
4534 Py_DECREF(module);
4535 }
4536 else {
4537 global = PyObject_GetAttr(module, global_name);
4538 }
4539 return global;
4540}
4541
4542static struct PyMethodDef Unpickler_methods[] = {
4543 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
4544 Unpickler_load_doc},
4545 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
4546 Unpickler_find_class_doc},
4547 {NULL, NULL} /* sentinel */
4548};
4549
4550static void
4551Unpickler_dealloc(UnpicklerObject *self)
4552{
4553 PyObject_GC_UnTrack((PyObject *)self);
4554 Py_XDECREF(self->readline);
4555 Py_XDECREF(self->read);
4556 Py_XDECREF(self->memo);
4557 Py_XDECREF(self->stack);
4558 Py_XDECREF(self->pers_func);
4559 Py_XDECREF(self->arg);
4560 Py_XDECREF(self->last_string);
4561
4562 PyMem_Free(self->marks);
4563 free(self->encoding);
4564 free(self->errors);
4565
4566 Py_TYPE(self)->tp_free((PyObject *)self);
4567}
4568
4569static int
4570Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
4571{
4572 Py_VISIT(self->readline);
4573 Py_VISIT(self->read);
4574 Py_VISIT(self->memo);
4575 Py_VISIT(self->stack);
4576 Py_VISIT(self->pers_func);
4577 Py_VISIT(self->arg);
4578 Py_VISIT(self->last_string);
4579 return 0;
4580}
4581
4582static int
4583Unpickler_clear(UnpicklerObject *self)
4584{
4585 Py_CLEAR(self->readline);
4586 Py_CLEAR(self->read);
4587 Py_CLEAR(self->memo);
4588 Py_CLEAR(self->stack);
4589 Py_CLEAR(self->pers_func);
4590 Py_CLEAR(self->arg);
4591 Py_CLEAR(self->last_string);
4592
4593 PyMem_Free(self->marks);
4594 self->marks = NULL;
4595 free(self->encoding);
4596 self->encoding = NULL;
4597 free(self->errors);
4598 self->errors = NULL;
4599
4600 return 0;
4601}
4602
4603PyDoc_STRVAR(Unpickler_doc,
4604"Unpickler(file, *, encoding='ASCII', errors='strict')"
4605"\n"
4606"This takes a binary file for reading a pickle data stream.\n"
4607"\n"
4608"The protocol version of the pickle is detected automatically, so no\n"
4609"proto argument is needed.\n"
4610"\n"
4611"The file-like object must have two methods, a read() method\n"
4612"that takes an integer argument, and a readline() method that\n"
4613"requires no arguments. Both methods should return bytes.\n"
4614"Thus file-like object can be a binary file object opened for\n"
4615"reading, a BytesIO object, or any other custom object that\n"
4616"meets this interface.\n"
4617"\n"
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004618"Optional keyword arguments are *fix_imports*, *encoding* and *errors*,\n"
4619"which are used to control compatiblity support for pickle stream\n"
4620"generated by Python 2.x. If *fix_imports* is True, pickle will try to\n"
4621"map the old Python 2.x names to the new names used in Python 3.x. The\n"
4622"*encoding* and *errors* tell pickle how to decode 8-bit string\n"
4623"instances pickled by Python 2.x; these default to 'ASCII' and\n"
4624"'strict', respectively.\n");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004625
4626static int
4627Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
4628{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004629 static char *kwlist[] = {"file", "fix_imports", "encoding", "errors", 0};
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004630 PyObject *file;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004631 int fix_imports = 1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004632 char *encoding = NULL;
4633 char *errors = NULL;
4634
4635 /* XXX: That is an horrible error message. But, I don't know how to do
4636 better... */
4637 if (Py_SIZE(args) != 1) {
4638 PyErr_Format(PyExc_TypeError,
4639 "%s takes exactly one positional argument (%zd given)",
4640 Py_TYPE(self)->tp_name, Py_SIZE(args));
4641 return -1;
4642 }
4643
4644 /* Arguments parsing needs to be done in the __init__() method to allow
4645 subclasses to define their own __init__() method, which may (or may
4646 not) support Unpickler arguments. However, this means we need to be
4647 extra careful in the other Unpickler methods, since a subclass could
4648 forget to call Unpickler.__init__() thus breaking our internal
4649 invariants. */
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004650 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|iss:Unpickler", kwlist,
4651 &file, &fix_imports, &encoding, &errors))
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004652 return -1;
4653
4654 /* In case of multiple __init__() calls, clear previous content. */
4655 if (self->read != NULL)
4656 (void)Unpickler_clear(self);
4657
4658 self->read = PyObject_GetAttrString(file, "read");
4659 self->readline = PyObject_GetAttrString(file, "readline");
4660 if (self->readline == NULL || self->read == NULL)
4661 return -1;
4662
4663 if (encoding == NULL)
4664 encoding = "ASCII";
4665 if (errors == NULL)
4666 errors = "strict";
4667
4668 self->encoding = strdup(encoding);
4669 self->errors = strdup(errors);
4670 if (self->encoding == NULL || self->errors == NULL) {
4671 PyErr_NoMemory();
4672 return -1;
4673 }
4674
4675 if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) {
4676 self->pers_func = PyObject_GetAttrString((PyObject *)self,
4677 "persistent_load");
4678 if (self->pers_func == NULL)
4679 return -1;
4680 }
4681 else {
4682 self->pers_func = NULL;
4683 }
4684
4685 self->stack = (Pdata *)Pdata_New();
4686 if (self->stack == NULL)
4687 return -1;
4688
4689 self->memo = PyDict_New();
4690 if (self->memo == NULL)
4691 return -1;
4692
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00004693 self->last_string = NULL;
4694 self->arg = NULL;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004695 self->proto = 0;
4696 self->fix_imports = fix_imports;
Alexandre Vassalotti0e7aa8c2009-04-03 04:17:41 +00004697
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004698 return 0;
4699}
4700
4701static PyObject *
4702Unpickler_get_memo(UnpicklerObject *self)
4703{
4704 if (self->memo == NULL)
4705 PyErr_SetString(PyExc_AttributeError, "memo");
4706 else
4707 Py_INCREF(self->memo);
4708 return self->memo;
4709}
4710
4711static int
4712Unpickler_set_memo(UnpicklerObject *self, PyObject *value)
4713{
4714 PyObject *tmp;
4715
4716 if (value == NULL) {
4717 PyErr_SetString(PyExc_TypeError,
4718 "attribute deletion is not supported");
4719 return -1;
4720 }
4721 if (!PyDict_Check(value)) {
4722 PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
4723 return -1;
4724 }
4725
4726 tmp = self->memo;
4727 Py_INCREF(value);
4728 self->memo = value;
4729 Py_XDECREF(tmp);
4730
4731 return 0;
4732}
4733
4734static PyObject *
4735Unpickler_get_persload(UnpicklerObject *self)
4736{
4737 if (self->pers_func == NULL)
4738 PyErr_SetString(PyExc_AttributeError, "persistent_load");
4739 else
4740 Py_INCREF(self->pers_func);
4741 return self->pers_func;
4742}
4743
4744static int
4745Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
4746{
4747 PyObject *tmp;
4748
4749 if (value == NULL) {
4750 PyErr_SetString(PyExc_TypeError,
4751 "attribute deletion is not supported");
4752 return -1;
4753 }
4754 if (!PyCallable_Check(value)) {
4755 PyErr_SetString(PyExc_TypeError,
4756 "persistent_load must be a callable taking "
4757 "one argument");
4758 return -1;
4759 }
4760
4761 tmp = self->pers_func;
4762 Py_INCREF(value);
4763 self->pers_func = value;
4764 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
4765
4766 return 0;
4767}
4768
4769static PyGetSetDef Unpickler_getsets[] = {
4770 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
4771 {"persistent_load", (getter)Unpickler_get_persload,
4772 (setter)Unpickler_set_persload},
4773 {NULL}
4774};
4775
4776static PyTypeObject Unpickler_Type = {
4777 PyVarObject_HEAD_INIT(NULL, 0)
4778 "_pickle.Unpickler", /*tp_name*/
4779 sizeof(UnpicklerObject), /*tp_basicsize*/
4780 0, /*tp_itemsize*/
4781 (destructor)Unpickler_dealloc, /*tp_dealloc*/
4782 0, /*tp_print*/
4783 0, /*tp_getattr*/
4784 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00004785 0, /*tp_reserved*/
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004786 0, /*tp_repr*/
4787 0, /*tp_as_number*/
4788 0, /*tp_as_sequence*/
4789 0, /*tp_as_mapping*/
4790 0, /*tp_hash*/
4791 0, /*tp_call*/
4792 0, /*tp_str*/
4793 0, /*tp_getattro*/
4794 0, /*tp_setattro*/
4795 0, /*tp_as_buffer*/
4796 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4797 Unpickler_doc, /*tp_doc*/
4798 (traverseproc)Unpickler_traverse, /*tp_traverse*/
4799 (inquiry)Unpickler_clear, /*tp_clear*/
4800 0, /*tp_richcompare*/
4801 0, /*tp_weaklistoffset*/
4802 0, /*tp_iter*/
4803 0, /*tp_iternext*/
4804 Unpickler_methods, /*tp_methods*/
4805 0, /*tp_members*/
4806 Unpickler_getsets, /*tp_getset*/
4807 0, /*tp_base*/
4808 0, /*tp_dict*/
4809 0, /*tp_descr_get*/
4810 0, /*tp_descr_set*/
4811 0, /*tp_dictoffset*/
4812 (initproc)Unpickler_init, /*tp_init*/
4813 PyType_GenericAlloc, /*tp_alloc*/
4814 PyType_GenericNew, /*tp_new*/
4815 PyObject_GC_Del, /*tp_free*/
4816 0, /*tp_is_gc*/
4817};
4818
4819static int
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004820initmodule(void)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004821{
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004822 PyObject *copyreg = NULL;
4823 PyObject *compat_pickle = NULL;
4824
4825 /* XXX: We should ensure that the types of the dictionaries imported are
4826 exactly PyDict objects. Otherwise, it is possible to crash the pickle
4827 since we use the PyDict API directly to access these dictionaries. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004828
4829 copyreg = PyImport_ImportModule("copyreg");
4830 if (!copyreg)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004831 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004832 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
4833 if (!dispatch_table)
4834 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004835 extension_registry = \
4836 PyObject_GetAttrString(copyreg, "_extension_registry");
4837 if (!extension_registry)
4838 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004839 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
4840 if (!inverted_registry)
4841 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004842 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
4843 if (!extension_cache)
4844 goto error;
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004845 Py_CLEAR(copyreg);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004846
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004847 /* Load the 2.x -> 3.x stdlib module mapping tables */
4848 compat_pickle = PyImport_ImportModule("_compat_pickle");
4849 if (!compat_pickle)
4850 goto error;
4851 name_mapping_2to3 = PyObject_GetAttrString(compat_pickle, "NAME_MAPPING");
4852 if (!name_mapping_2to3)
4853 goto error;
4854 if (!PyDict_CheckExact(name_mapping_2to3)) {
4855 PyErr_Format(PyExc_RuntimeError,
4856 "_compat_pickle.NAME_MAPPING should be a dict, not %.200s",
4857 Py_TYPE(name_mapping_2to3)->tp_name);
4858 goto error;
4859 }
4860 import_mapping_2to3 = PyObject_GetAttrString(compat_pickle,
4861 "IMPORT_MAPPING");
4862 if (!import_mapping_2to3)
4863 goto error;
4864 if (!PyDict_CheckExact(import_mapping_2to3)) {
4865 PyErr_Format(PyExc_RuntimeError,
4866 "_compat_pickle.IMPORT_MAPPING should be a dict, "
4867 "not %.200s", Py_TYPE(import_mapping_2to3)->tp_name);
4868 goto error;
4869 }
4870 /* ... and the 3.x -> 2.x mapping tables */
4871 name_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
4872 "REVERSE_NAME_MAPPING");
4873 if (!name_mapping_3to2)
4874 goto error;
4875 if (!PyDict_CheckExact(name_mapping_3to2)) {
4876 PyErr_Format(PyExc_RuntimeError,
Ezio Melotti13925002011-03-16 11:05:33 +02004877 "_compat_pickle.REVERSE_NAME_MAPPING should be a dict, "
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004878 "not %.200s", Py_TYPE(name_mapping_3to2)->tp_name);
4879 goto error;
4880 }
4881 import_mapping_3to2 = PyObject_GetAttrString(compat_pickle,
4882 "REVERSE_IMPORT_MAPPING");
4883 if (!import_mapping_3to2)
4884 goto error;
4885 if (!PyDict_CheckExact(import_mapping_3to2)) {
4886 PyErr_Format(PyExc_RuntimeError,
4887 "_compat_pickle.REVERSE_IMPORT_MAPPING should be a dict, "
4888 "not %.200s", Py_TYPE(import_mapping_3to2)->tp_name);
4889 goto error;
4890 }
4891 Py_CLEAR(compat_pickle);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004892
4893 empty_tuple = PyTuple_New(0);
4894 if (empty_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004895 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004896 two_tuple = PyTuple_New(2);
4897 if (two_tuple == NULL)
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004898 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004899 /* We use this temp container with no regard to refcounts, or to
4900 * keeping containees alive. Exempt from GC, because we don't
4901 * want anything looking at two_tuple() by magic.
4902 */
4903 PyObject_GC_UnTrack(two_tuple);
4904
4905 return 0;
4906
4907 error:
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004908 Py_CLEAR(copyreg);
4909 Py_CLEAR(dispatch_table);
4910 Py_CLEAR(extension_registry);
4911 Py_CLEAR(inverted_registry);
4912 Py_CLEAR(extension_cache);
4913 Py_CLEAR(compat_pickle);
4914 Py_CLEAR(name_mapping_2to3);
4915 Py_CLEAR(import_mapping_2to3);
4916 Py_CLEAR(name_mapping_3to2);
4917 Py_CLEAR(import_mapping_3to2);
4918 Py_CLEAR(empty_tuple);
4919 Py_CLEAR(two_tuple);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004920 return -1;
4921}
4922
4923static struct PyModuleDef _picklemodule = {
4924 PyModuleDef_HEAD_INIT,
4925 "_pickle",
4926 pickle_module_doc,
4927 -1,
4928 NULL,
4929 NULL,
4930 NULL,
4931 NULL,
4932 NULL
4933};
4934
4935PyMODINIT_FUNC
4936PyInit__pickle(void)
4937{
4938 PyObject *m;
4939
4940 if (PyType_Ready(&Unpickler_Type) < 0)
4941 return NULL;
4942 if (PyType_Ready(&Pickler_Type) < 0)
4943 return NULL;
4944 if (PyType_Ready(&Pdata_Type) < 0)
4945 return NULL;
4946
4947 /* Create the module and add the functions. */
4948 m = PyModule_Create(&_picklemodule);
4949 if (m == NULL)
4950 return NULL;
4951
4952 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
4953 return NULL;
4954 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
4955 return NULL;
4956
4957 /* Initialize the exceptions. */
4958 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
4959 if (PickleError == NULL)
4960 return NULL;
4961 PicklingError = \
4962 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
4963 if (PicklingError == NULL)
4964 return NULL;
4965 UnpicklingError = \
4966 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
4967 if (UnpicklingError == NULL)
4968 return NULL;
4969
4970 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
4971 return NULL;
4972 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
4973 return NULL;
4974 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
4975 return NULL;
4976
Antoine Pitroud9dfaa92009-06-04 20:32:06 +00004977 if (initmodule() < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004978 return NULL;
4979
4980 return m;
4981}