blob: 91ebe2e88a033167825d6bdba9d5bad6f89f3176 [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
13
14/* Pickle opcodes. These must be kept updated with pickle.py.
15 Extensive docs are in pickletools.py. */
16enum opcode {
17 MARK = '(',
18 STOP = '.',
19 POP = '0',
20 POP_MARK = '1',
21 DUP = '2',
22 FLOAT = 'F',
23 INT = 'I',
24 BININT = 'J',
25 BININT1 = 'K',
26 LONG = 'L',
27 BININT2 = 'M',
28 NONE = 'N',
29 PERSID = 'P',
30 BINPERSID = 'Q',
31 REDUCE = 'R',
32 STRING = 'S',
33 BINSTRING = 'T',
34 SHORT_BINSTRING = 'U',
35 UNICODE = 'V',
36 BINUNICODE = 'X',
37 APPEND = 'a',
38 BUILD = 'b',
39 GLOBAL = 'c',
40 DICT = 'd',
41 EMPTY_DICT = '}',
42 APPENDS = 'e',
43 GET = 'g',
44 BINGET = 'h',
45 INST = 'i',
46 LONG_BINGET = 'j',
47 LIST = 'l',
48 EMPTY_LIST = ']',
49 OBJ = 'o',
50 PUT = 'p',
51 BINPUT = 'q',
52 LONG_BINPUT = 'r',
53 SETITEM = 's',
54 TUPLE = 't',
55 EMPTY_TUPLE = ')',
56 SETITEMS = 'u',
57 BINFLOAT = 'G',
58
59 /* Protocol 2. */
60 PROTO = '\x80',
61 NEWOBJ = '\x81',
62 EXT1 = '\x82',
63 EXT2 = '\x83',
64 EXT4 = '\x84',
65 TUPLE1 = '\x85',
66 TUPLE2 = '\x86',
67 TUPLE3 = '\x87',
68 NEWTRUE = '\x88',
69 NEWFALSE = '\x89',
70 LONG1 = '\x8a',
71 LONG4 = '\x8b',
72
73 /* Protocol 3 (Python 3.x) */
74 BINBYTES = 'B',
75 SHORT_BINBYTES = 'C',
76};
77
78/* These aren't opcodes -- they're ways to pickle bools before protocol 2
79 * so that unpicklers written before bools were introduced unpickle them
80 * as ints, but unpicklers after can recognize that bools were intended.
81 * Note that protocol 2 added direct ways to pickle bools.
82 */
83#undef TRUE
84#define TRUE "I01\n"
85#undef FALSE
86#define FALSE "I00\n"
87
88enum {
89 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
90 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
91 break if this gets out of synch with pickle.py, but it's unclear that would
92 help anything either. */
93 BATCHSIZE = 1000,
94
95 /* Nesting limit until Pickler, when running in "fast mode", starts
96 checking for self-referential data-structures. */
97 FAST_NESTING_LIMIT = 50,
98
99 /* Size of the write buffer of Pickler. Higher values will reduce the
100 number of calls to the write() method of the output stream. */
101 WRITE_BUF_SIZE = 256,
102};
103
104/* Exception classes for pickle. These should override the ones defined in
105 pickle.py, when the C-optimized Pickler and Unpickler are used. */
106static PyObject *PickleError;
107static PyObject *PicklingError;
108static PyObject *UnpicklingError;
109
110/* copyreg.dispatch_table, {type_object: pickling_function} */
111static PyObject *dispatch_table;
112/* For EXT[124] opcodes. */
113/* copyreg._extension_registry, {(module_name, function_name): code} */
114static PyObject *extension_registry;
115/* copyreg._inverted_registry, {code: (module_name, function_name)} */
116static PyObject *inverted_registry;
117/* copyreg._extension_cache, {code: object} */
118static PyObject *extension_cache;
119
120/* XXX: Are these really nescessary? */
121/* As the name says, an empty tuple. */
122static PyObject *empty_tuple;
123/* For looking up name pairs in copyreg._extension_registry. */
124static PyObject *two_tuple;
125
126static int
127stack_underflow(void)
128{
129 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
130 return -1;
131}
132
133/* Internal data type used as the unpickling stack. */
134typedef struct {
135 PyObject_HEAD
136 int length; /* number of initial slots in data currently used */
137 int size; /* number of slots in data allocated */
138 PyObject **data;
139} Pdata;
140
141static void
142Pdata_dealloc(Pdata *self)
143{
144 int i;
145 PyObject **p;
146
147 for (i = self->length, p = self->data; --i >= 0; p++) {
148 Py_DECREF(*p);
149 }
150 if (self->data)
151 PyMem_Free(self->data);
152 PyObject_Del(self);
153}
154
155static PyTypeObject Pdata_Type = {
156 PyVarObject_HEAD_INIT(NULL, 0)
157 "_pickle.Pdata", /*tp_name*/
158 sizeof(Pdata), /*tp_basicsize*/
159 0, /*tp_itemsize*/
160 (destructor)Pdata_dealloc, /*tp_dealloc*/
161};
162
163static PyObject *
164Pdata_New(void)
165{
166 Pdata *self;
167
168 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
169 return NULL;
170 self->size = 8;
171 self->length = 0;
172 self->data = PyMem_Malloc(self->size * sizeof(PyObject *));
173 if (self->data)
174 return (PyObject *)self;
175 Py_DECREF(self);
176 return PyErr_NoMemory();
177}
178
179
180/* Retain only the initial clearto items. If clearto >= the current
181 * number of items, this is a (non-erroneous) NOP.
182 */
183static int
184Pdata_clear(Pdata *self, int clearto)
185{
186 int i;
187 PyObject **p;
188
189 if (clearto < 0)
190 return stack_underflow();
191 if (clearto >= self->length)
192 return 0;
193
194 for (i = self->length, p = self->data + clearto; --i >= clearto; p++) {
195 Py_CLEAR(*p);
196 }
197 self->length = clearto;
198
199 return 0;
200}
201
202static int
203Pdata_grow(Pdata *self)
204{
205 int bigger;
206 size_t nbytes;
207 PyObject **tmp;
208
209 bigger = (self->size << 1) + 1;
210 if (bigger <= 0) /* was 0, or new value overflows */
211 goto nomemory;
212 if ((int)(size_t)bigger != bigger)
213 goto nomemory;
214 nbytes = (size_t)bigger * sizeof(PyObject *);
215 if (nbytes / sizeof(PyObject *) != (size_t)bigger)
216 goto nomemory;
217 tmp = PyMem_Realloc(self->data, nbytes);
218 if (tmp == NULL)
219 goto nomemory;
220 self->data = tmp;
221 self->size = bigger;
222 return 0;
223
224 nomemory:
225 PyErr_NoMemory();
226 return -1;
227}
228
229/* D is a Pdata*. Pop the topmost element and store it into V, which
230 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
231 * is raised and V is set to NULL.
232 */
233static PyObject *
234Pdata_pop(Pdata *self)
235{
236 if (self->length == 0) {
237 PyErr_SetString(UnpicklingError, "bad pickle data");
238 return NULL;
239 }
240 return self->data[--(self->length)];
241}
242#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
243
244static int
245Pdata_push(Pdata *self, PyObject *obj)
246{
247 if (self->length == self->size && Pdata_grow(self) < 0) {
248 return -1;
249 }
250 self->data[self->length++] = obj;
251 return 0;
252}
253
254/* Push an object on stack, transferring its ownership to the stack. */
255#define PDATA_PUSH(D, O, ER) do { \
256 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
257
258/* Push an object on stack, adding a new reference to the object. */
259#define PDATA_APPEND(D, O, ER) do { \
260 Py_INCREF((O)); \
261 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
262
263static PyObject *
264Pdata_poptuple(Pdata *self, Py_ssize_t start)
265{
266 PyObject *tuple;
267 Py_ssize_t len, i, j;
268
269 len = self->length - start;
270 tuple = PyTuple_New(len);
271 if (tuple == NULL)
272 return NULL;
273 for (i = start, j = 0; j < len; i++, j++)
274 PyTuple_SET_ITEM(tuple, j, self->data[i]);
275
276 self->length = start;
277 return tuple;
278}
279
280static PyObject *
281Pdata_poplist(Pdata *self, Py_ssize_t start)
282{
283 PyObject *list;
284 Py_ssize_t len, i, j;
285
286 len = self->length - start;
287 list = PyList_New(len);
288 if (list == NULL)
289 return NULL;
290 for (i = start, j = 0; j < len; i++, j++)
291 PyList_SET_ITEM(list, j, self->data[i]);
292
293 self->length = start;
294 return list;
295}
296
297typedef struct PicklerObject {
298 PyObject_HEAD
299 PyObject *write; /* write() method of the output stream */
300 PyObject *memo; /* Memo dictionary, keep track of the seen
301 objects to support self-referential objects
302 pickling. */
303 PyObject *pers_func; /* persistent_id() method, can be NULL */
304 PyObject *arg;
305 int proto; /* Pickle protocol number, >= 0 */
306 int bin; /* Boolean, true if proto > 0 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000307 int buf_size; /* Size of the current buffered pickle data */
308 char *write_buf; /* Write buffer, this is to avoid calling the
309 write() method of the output stream too
310 often. */
311 int fast; /* Enable fast mode if set to a true value.
312 The fast mode disable the usage of memo,
313 therefore speeding the pickling process by
314 not generating superfluous PUT opcodes. It
315 should not be used if with self-referential
316 objects. */
317 int fast_nesting;
318 PyObject *fast_memo;
319} PicklerObject;
320
321typedef struct UnpicklerObject {
322 PyObject_HEAD
323 Pdata *stack; /* Pickle data stack, store unpickled objects. */
324 PyObject *readline; /* readline() method of the output stream */
325 PyObject *read; /* read() method of the output stream */
326 PyObject *memo; /* Memo dictionary, provide the objects stored
327 using the PUT opcodes. */
328 PyObject *arg;
329 PyObject *pers_func; /* persistent_load() method, can be NULL. */
330 PyObject *last_string; /* Reference to the last string read by the
331 readline() method. */
332 char *buffer; /* Reading buffer. */
333 char *encoding; /* Name of the encoding to be used for
334 decoding strings pickled using Python
335 2.x. The default value is "ASCII" */
336 char *errors; /* Name of errors handling scheme to used when
337 decoding strings. The default value is
338 "strict". */
339 int *marks; /* Mark stack, used for unpickling container
340 objects. */
341 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
342 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
343} UnpicklerObject;
344
345/* Forward declarations */
346static int save(PicklerObject *, PyObject *, int);
347static int save_reduce(PicklerObject *, PyObject *, PyObject *);
348static PyTypeObject Pickler_Type;
349static PyTypeObject Unpickler_Type;
350
351
352/* Helpers for creating the argument tuple passed to functions. This has the
353 performance advantage of calling PyTuple_New() only once. */
354
355#define ARG_TUP(self, obj) do { \
356 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
357 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
358 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
359 } \
360 else { \
361 Py_DECREF((obj)); \
362 } \
363 } while (0)
364
365#define FREE_ARG_TUP(self) do { \
366 if ((self)->arg->ob_refcnt > 1) \
367 Py_CLEAR((self)->arg); \
368 } while (0)
369
370/* A temporary cleaner API for fast single argument function call.
371
372 XXX: Does caching the argument tuple provides any real performance benefits?
373
374 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
375 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
376 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
377 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
378 (i.e, call PyTuple_New() and store the returned value in an array), to save
379 one second (wall clock time). Either ways, the loading time a pickle stream
380 large enough to generate this number of calls would be massively
381 overwhelmed by other factors, like I/O throughput, the GC traversal and
382 object allocation overhead. So, I really doubt these functions provide any
383 real benefits.
384
385 On the other hand, oprofile reports that pickle spends a lot of time in
386 these functions. But, that is probably more related to the function call
387 overhead, than the argument tuple allocation.
388
389 XXX: And, what is the reference behavior of these? Steal, borrow? At first
390 glance, it seems to steal the reference of 'arg' and borrow the reference
391 of 'func'.
392 */
393static PyObject *
394pickler_call(PicklerObject *self, PyObject *func, PyObject *arg)
395{
396 PyObject *result = NULL;
397
398 ARG_TUP(self, arg);
399 if (self->arg) {
400 result = PyObject_Call(func, self->arg, NULL);
401 FREE_ARG_TUP(self);
402 }
403 return result;
404}
405
406static PyObject *
407unpickler_call(UnpicklerObject *self, PyObject *func, PyObject *arg)
408{
409 PyObject *result = NULL;
410
411 ARG_TUP(self, arg);
412 if (self->arg) {
413 result = PyObject_Call(func, self->arg, NULL);
414 FREE_ARG_TUP(self);
415 }
416 return result;
417}
418
419static Py_ssize_t
420pickler_write(PicklerObject *self, const char *s, Py_ssize_t n)
421{
422 PyObject *data, *result;
423
424 if (s == NULL) {
425 if (!(self->buf_size))
426 return 0;
427 data = PyBytes_FromStringAndSize(self->write_buf, self->buf_size);
428 if (data == NULL)
429 return -1;
430 }
431 else {
432 if (self->buf_size && (n + self->buf_size) > WRITE_BUF_SIZE) {
433 if (pickler_write(self, NULL, 0) < 0)
434 return -1;
435 }
436
437 if (n > WRITE_BUF_SIZE) {
438 if (!(data = PyBytes_FromStringAndSize(s, n)))
439 return -1;
440 }
441 else {
442 memcpy(self->write_buf + self->buf_size, s, n);
443 self->buf_size += n;
444 return n;
445 }
446 }
447
448 /* object with write method */
449 result = pickler_call(self, self->write, data);
450 if (result == NULL)
451 return -1;
452
453 Py_DECREF(result);
454 self->buf_size = 0;
455 return n;
456}
457
458/* XXX: These read/readline functions ought to be optimized. Buffered I/O
459 might help a lot, especially with the new (but much slower) io library.
460 On the other hand, the added complexity might not worth it.
461 */
462
463/* Read at least n characters from the input stream and set s to the current
464 reading position. */
465static Py_ssize_t
466unpickler_read(UnpicklerObject *self, char **s, Py_ssize_t n)
467{
468 PyObject *len;
469 PyObject *data;
470
471 len = PyLong_FromSsize_t(n);
472 if (len == NULL)
473 return -1;
474
475 data = unpickler_call(self, self->read, len);
476 if (data == NULL)
477 return -1;
478
479 /* XXX: Should bytearray be supported too? */
480 if (!PyBytes_Check(data)) {
481 PyErr_SetString(PyExc_ValueError,
482 "read() from the underlying stream did not"
483 "return bytes");
484 return -1;
485 }
486
487 Py_XDECREF(self->last_string);
488 self->last_string = data;
489
490 if (!(*s = PyBytes_AS_STRING(data)))
491 return -1;
492
493 return n;
494}
495
496static Py_ssize_t
497unpickler_readline(UnpicklerObject *self, char **s)
498{
499 PyObject *data;
500
501 data = PyObject_CallObject(self->readline, empty_tuple);
502 if (data == NULL)
503 return -1;
504
505 /* XXX: Should bytearray be supported too? */
506 if (!PyBytes_Check(data)) {
507 PyErr_SetString(PyExc_ValueError,
508 "readline() from the underlying stream did not"
509 "return bytes");
510 return -1;
511 }
512
513 Py_XDECREF(self->last_string);
514 self->last_string = data;
515
516 if (!(*s = PyBytes_AS_STRING(data)))
517 return -1;
518
519 return PyBytes_GET_SIZE(data);
520}
521
522/* Generate a GET opcode for an object stored in the memo. The 'key' argument
523 should be the address of the object as returned by PyLong_FromVoidPtr(). */
524static int
525memo_get(PicklerObject *self, PyObject *key)
526{
527 PyObject *value;
528 PyObject *memo_id;
529 long x;
530 char pdata[30];
531 int len;
532
533 value = PyDict_GetItemWithError(self->memo, key);
534 if (value == NULL) {
535 if (!PyErr_Occurred())
536 PyErr_SetObject(PyExc_KeyError, key);
537 return -1;
538 }
539
540 memo_id = PyTuple_GetItem(value, 0);
541 if (memo_id == NULL)
542 return -1;
543
544 if (!PyLong_Check(memo_id)) {
545 PyErr_SetString(PicklingError, "memo id must be an integer");
546 return -1;
547 }
548 x = PyLong_AsLong(memo_id);
549 if (x == -1 && PyErr_Occurred())
550 return -1;
551
552 if (!self->bin) {
553 pdata[0] = GET;
554 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
555 len = (int)strlen(pdata);
556 }
557 else {
558 if (x < 256) {
559 pdata[0] = BINGET;
560 pdata[1] = (unsigned char)(x & 0xff);
561 len = 2;
562 }
563 else if (x <= 0xffffffffL) {
564 pdata[0] = LONG_BINGET;
565 pdata[1] = (unsigned char)(x & 0xff);
566 pdata[2] = (unsigned char)((x >> 8) & 0xff);
567 pdata[3] = (unsigned char)((x >> 16) & 0xff);
568 pdata[4] = (unsigned char)((x >> 24) & 0xff);
569 len = 5;
570 }
571 else { /* unlikely */
572 PyErr_SetString(PicklingError,
573 "memo id too large for LONG_BINGET");
574 return -1;
575 }
576 }
577
578 if (pickler_write(self, pdata, len) < 0)
579 return -1;
580
581 return 0;
582}
583
584/* Store an object in the memo, assign it a new unique ID based on the number
585 of objects currently stored in the memo and generate a PUT opcode. */
586static int
587memo_put(PicklerObject *self, PyObject *obj)
588{
589 PyObject *key = NULL;
590 PyObject *memo_id = NULL;
591 PyObject *tuple = NULL;
592 long x;
593 char pdata[30];
594 int len;
595 int status = 0;
596
597 if (self->fast)
598 return 0;
599
600 key = PyLong_FromVoidPtr(obj);
601 if (key == NULL)
602 goto error;
603 if ((x = PyDict_Size(self->memo)) < 0)
604 goto error;
605 memo_id = PyLong_FromLong(x);
606 if (memo_id == NULL)
607 goto error;
608 tuple = PyTuple_New(2);
609 if (tuple == NULL)
610 goto error;
611
612 Py_INCREF(memo_id);
613 PyTuple_SET_ITEM(tuple, 0, memo_id);
614 Py_INCREF(obj);
615 PyTuple_SET_ITEM(tuple, 1, obj);
616 if (PyDict_SetItem(self->memo, key, tuple) < 0)
617 goto error;
618
619 if (!self->bin) {
620 pdata[0] = PUT;
621 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
622 len = strlen(pdata);
623 }
624 else {
625 if (x < 256) {
626 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +0000627 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000628 len = 2;
629 }
630 else if (x <= 0xffffffffL) {
631 pdata[0] = LONG_BINPUT;
632 pdata[1] = (unsigned char)(x & 0xff);
633 pdata[2] = (unsigned char)((x >> 8) & 0xff);
634 pdata[3] = (unsigned char)((x >> 16) & 0xff);
635 pdata[4] = (unsigned char)((x >> 24) & 0xff);
636 len = 5;
637 }
638 else { /* unlikely */
639 PyErr_SetString(PicklingError,
640 "memo id too large for LONG_BINPUT");
641 return -1;
642 }
643 }
644
645 if (pickler_write(self, pdata, len) < 0)
646 goto error;
647
648 if (0) {
649 error:
650 status = -1;
651 }
652
653 Py_XDECREF(key);
654 Py_XDECREF(memo_id);
655 Py_XDECREF(tuple);
656
657 return status;
658}
659
660static PyObject *
661whichmodule(PyObject *global, PyObject *global_name)
662{
663 Py_ssize_t i, j;
664 static PyObject *module_str = NULL;
665 static PyObject *main_str = NULL;
666 PyObject *module_name;
667 PyObject *modules_dict;
668 PyObject *module;
669 PyObject *obj;
670
671 if (module_str == NULL) {
672 module_str = PyUnicode_InternFromString("__module__");
673 if (module_str == NULL)
674 return NULL;
675 main_str = PyUnicode_InternFromString("__main__");
676 if (main_str == NULL)
677 return NULL;
678 }
679
680 module_name = PyObject_GetAttr(global, module_str);
681
682 /* In some rare cases (e.g., random.getrandbits), __module__ can be
683 None. If it is so, then search sys.modules for the module of
684 global. */
685 if (module_name == Py_None) {
686 Py_DECREF(module_name);
687 goto search;
688 }
689
690 if (module_name) {
691 return module_name;
692 }
693 if (PyErr_ExceptionMatches(PyExc_AttributeError))
694 PyErr_Clear();
695 else
696 return NULL;
697
698 search:
699 modules_dict = PySys_GetObject("modules");
700 if (modules_dict == NULL)
701 return NULL;
702
703 i = 0;
704 module_name = NULL;
705 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
706 if (PyObject_Compare(module_name, main_str) == 0)
707 continue;
708
709 obj = PyObject_GetAttr(module, global_name);
710 if (obj == NULL) {
711 if (PyErr_ExceptionMatches(PyExc_AttributeError))
712 PyErr_Clear();
713 else
714 return NULL;
715 continue;
716 }
717
718 if (obj != global) {
719 Py_DECREF(obj);
720 continue;
721 }
722
723 Py_DECREF(obj);
724 break;
725 }
726
727 /* If no module is found, use __main__. */
728 if (!j) {
729 module_name = main_str;
730 }
731
732 Py_INCREF(module_name);
733 return module_name;
734}
735
736/* fast_save_enter() and fast_save_leave() are guards against recursive
737 objects when Pickler is used with the "fast mode" (i.e., with object
738 memoization disabled). If the nesting of a list or dict object exceed
739 FAST_NESTING_LIMIT, these guards will start keeping an internal
740 reference to the seen list or dict objects and check whether these objects
741 are recursive. These are not strictly necessary, since save() has a
742 hard-coded recursion limit, but they give a nicer error message than the
743 typical RuntimeError. */
744static int
745fast_save_enter(PicklerObject *self, PyObject *obj)
746{
747 /* if fast_nesting < 0, we're doing an error exit. */
748 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
749 PyObject *key = NULL;
750 if (self->fast_memo == NULL) {
751 self->fast_memo = PyDict_New();
752 if (self->fast_memo == NULL) {
753 self->fast_nesting = -1;
754 return 0;
755 }
756 }
757 key = PyLong_FromVoidPtr(obj);
758 if (key == NULL)
759 return 0;
760 if (PyDict_GetItem(self->fast_memo, key)) {
761 Py_DECREF(key);
762 PyErr_Format(PyExc_ValueError,
763 "fast mode: can't pickle cyclic objects "
764 "including object type %.200s at %p",
765 obj->ob_type->tp_name, obj);
766 self->fast_nesting = -1;
767 return 0;
768 }
769 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
770 Py_DECREF(key);
771 self->fast_nesting = -1;
772 return 0;
773 }
774 Py_DECREF(key);
775 }
776 return 1;
777}
778
779static int
780fast_save_leave(PicklerObject *self, PyObject *obj)
781{
782 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
783 PyObject *key = PyLong_FromVoidPtr(obj);
784 if (key == NULL)
785 return 0;
786 if (PyDict_DelItem(self->fast_memo, key) < 0) {
787 Py_DECREF(key);
788 return 0;
789 }
790 Py_DECREF(key);
791 }
792 return 1;
793}
794
795static int
796save_none(PicklerObject *self, PyObject *obj)
797{
798 const char none_op = NONE;
799 if (pickler_write(self, &none_op, 1) < 0)
800 return -1;
801
802 return 0;
803}
804
805static int
806save_bool(PicklerObject *self, PyObject *obj)
807{
808 static const char *buf[2] = { FALSE, TRUE };
809 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
810 int p = (obj == Py_True);
811
812 if (self->proto >= 2) {
813 const char bool_op = p ? NEWTRUE : NEWFALSE;
814 if (pickler_write(self, &bool_op, 1) < 0)
815 return -1;
816 }
817 else if (pickler_write(self, buf[p], len[p]) < 0)
818 return -1;
819
820 return 0;
821}
822
823static int
824save_int(PicklerObject *self, long x)
825{
826 char pdata[32];
827 int len = 0;
828
829 if (!self->bin
830#if SIZEOF_LONG > 4
831 || x > 0x7fffffffL || x < -0x80000000L
832#endif
833 ) {
834 /* Text-mode pickle, or long too big to fit in the 4-byte
835 * signed BININT format: store as a string.
836 */
837 pdata[0] = LONG; /* use LONG for consistence with pickle.py */
838 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
839 if (pickler_write(self, pdata, strlen(pdata)) < 0)
840 return -1;
841 }
842 else {
843 /* Binary pickle and x fits in a signed 4-byte int. */
844 pdata[1] = (unsigned char)(x & 0xff);
845 pdata[2] = (unsigned char)((x >> 8) & 0xff);
846 pdata[3] = (unsigned char)((x >> 16) & 0xff);
847 pdata[4] = (unsigned char)((x >> 24) & 0xff);
848
849 if ((pdata[4] == 0) && (pdata[3] == 0)) {
850 if (pdata[2] == 0) {
851 pdata[0] = BININT1;
852 len = 2;
853 }
854 else {
855 pdata[0] = BININT2;
856 len = 3;
857 }
858 }
859 else {
860 pdata[0] = BININT;
861 len = 5;
862 }
863
864 if (pickler_write(self, pdata, len) < 0)
865 return -1;
866 }
867
868 return 0;
869}
870
871static int
872save_long(PicklerObject *self, PyObject *obj)
873{
874 PyObject *repr = NULL;
875 Py_ssize_t size;
876 long val = PyLong_AsLong(obj);
877 int status = 0;
878
879 const char long_op = LONG;
880
881 if (val == -1 && PyErr_Occurred()) {
882 /* out of range for int pickling */
883 PyErr_Clear();
884 }
885 else
886 return save_int(self, val);
887
888 if (self->proto >= 2) {
889 /* Linear-time pickling. */
890 size_t nbits;
891 size_t nbytes;
892 unsigned char *pdata;
893 char header[5];
894 int i;
895 int sign = _PyLong_Sign(obj);
896
897 if (sign == 0) {
898 header[0] = LONG1;
899 header[1] = 0; /* It's 0 -- an empty bytestring. */
900 if (pickler_write(self, header, 2) < 0)
901 goto error;
902 return 0;
903 }
904 nbits = _PyLong_NumBits(obj);
905 if (nbits == (size_t)-1 && PyErr_Occurred())
906 goto error;
907 /* How many bytes do we need? There are nbits >> 3 full
908 * bytes of data, and nbits & 7 leftover bits. If there
909 * are any leftover bits, then we clearly need another
910 * byte. Wnat's not so obvious is that we *probably*
911 * need another byte even if there aren't any leftovers:
912 * the most-significant bit of the most-significant byte
913 * acts like a sign bit, and it's usually got a sense
914 * opposite of the one we need. The exception is longs
915 * of the form -(2**(8*j-1)) for j > 0. Such a long is
916 * its own 256's-complement, so has the right sign bit
917 * even without the extra byte. That's a pain to check
918 * for in advance, though, so we always grab an extra
919 * byte at the start, and cut it back later if possible.
920 */
921 nbytes = (nbits >> 3) + 1;
922 if (nbytes > INT_MAX) {
923 PyErr_SetString(PyExc_OverflowError,
924 "long too large to pickle");
925 goto error;
926 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +0000927 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000928 if (repr == NULL)
929 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +0000930 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000931 i = _PyLong_AsByteArray((PyLongObject *)obj,
932 pdata, nbytes,
933 1 /* little endian */ , 1 /* signed */ );
934 if (i < 0)
935 goto error;
936 /* If the long is negative, this may be a byte more than
937 * needed. This is so iff the MSB is all redundant sign
938 * bits.
939 */
940 if (sign < 0 &&
941 nbytes > 1 &&
942 pdata[nbytes - 1] == 0xff &&
943 (pdata[nbytes - 2] & 0x80) != 0) {
944 nbytes--;
945 }
946
947 if (nbytes < 256) {
948 header[0] = LONG1;
949 header[1] = (unsigned char)nbytes;
950 size = 2;
951 }
952 else {
953 header[0] = LONG4;
954 size = (int)nbytes;
955 for (i = 1; i < 5; i++) {
956 header[i] = (unsigned char)(size & 0xff);
957 size >>= 8;
958 }
959 size = 5;
960 }
961 if (pickler_write(self, header, size) < 0 ||
962 pickler_write(self, (char *)pdata, (int)nbytes) < 0)
963 goto error;
964 }
965 else {
966 char *string;
967
968 /* proto < 2: write the repr and newline. This is quadratic-time
969 (in the number of digits), in both directions. */
970
971 repr = PyObject_Repr(obj);
972 if (repr == NULL)
973 goto error;
974
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +0000975 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000976 if (string == NULL)
977 goto error;
978
979 if (pickler_write(self, &long_op, 1) < 0 ||
980 pickler_write(self, string, size) < 0 ||
981 pickler_write(self, "\n", 1) < 0)
982 goto error;
983 }
984
985 if (0) {
986 error:
987 status = -1;
988 }
989 Py_XDECREF(repr);
990
991 return status;
992}
993
994static int
995save_float(PicklerObject *self, PyObject *obj)
996{
997 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
998
999 if (self->bin) {
1000 char pdata[9];
1001 pdata[0] = BINFLOAT;
1002 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1003 return -1;
1004 if (pickler_write(self, pdata, 9) < 0)
1005 return -1;
1006 }
1007 else {
1008 char pdata[250];
1009 pdata[0] = FLOAT;
1010 PyOS_ascii_formatd(pdata + 1, sizeof(pdata) - 2, "%.17g", x);
1011 /* Extend the formatted string with a newline character */
1012 strcat(pdata, "\n");
1013
1014 if (pickler_write(self, pdata, strlen(pdata)) < 0)
1015 return -1;
1016 }
1017
1018 return 0;
1019}
1020
1021static int
1022save_bytes(PicklerObject *self, PyObject *obj)
1023{
1024 if (self->proto < 3) {
1025 /* Older pickle protocols do not have an opcode for pickling bytes
1026 objects. Therefore, we need to fake the copy protocol (i.e.,
1027 the __reduce__ method) to permit bytes object unpickling. */
1028 PyObject *reduce_value = NULL;
1029 PyObject *bytelist = NULL;
1030 int status;
1031
1032 bytelist = PySequence_List(obj);
1033 if (bytelist == NULL)
1034 return -1;
1035
1036 reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
1037 bytelist);
1038 if (reduce_value == NULL) {
1039 Py_DECREF(bytelist);
1040 return -1;
1041 }
1042
1043 /* save_reduce() will memoize the object automatically. */
1044 status = save_reduce(self, reduce_value, obj);
1045 Py_DECREF(reduce_value);
1046 Py_DECREF(bytelist);
1047 return status;
1048 }
1049 else {
1050 Py_ssize_t size;
1051 char header[5];
1052 int len;
1053
1054 size = PyBytes_Size(obj);
1055 if (size < 0)
1056 return -1;
1057
1058 if (size < 256) {
1059 header[0] = SHORT_BINBYTES;
1060 header[1] = (unsigned char)size;
1061 len = 2;
1062 }
1063 else if (size <= 0xffffffffL) {
1064 header[0] = BINBYTES;
1065 header[1] = (unsigned char)(size & 0xff);
1066 header[2] = (unsigned char)((size >> 8) & 0xff);
1067 header[3] = (unsigned char)((size >> 16) & 0xff);
1068 header[4] = (unsigned char)((size >> 24) & 0xff);
1069 len = 5;
1070 }
1071 else {
1072 return -1; /* string too large */
1073 }
1074
1075 if (pickler_write(self, header, len) < 0)
1076 return -1;
1077
1078 if (pickler_write(self, PyBytes_AS_STRING(obj), size) < 0)
1079 return -1;
1080
1081 if (memo_put(self, obj) < 0)
1082 return -1;
1083
1084 return 0;
1085 }
1086}
1087
1088/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1089 backslash and newline characters to \uXXXX escapes. */
1090static PyObject *
1091raw_unicode_escape(const Py_UNICODE *s, Py_ssize_t size)
1092{
1093 PyObject *repr, *result;
1094 char *p;
1095 char *q;
1096
1097 static const char *hexdigits = "0123456789abcdef";
1098
1099#ifdef Py_UNICODE_WIDE
1100 repr = PyBytes_FromStringAndSize(NULL, 10 * size);
1101#else
1102 repr = PyBytes_FromStringAndSize(NULL, 6 * size);
1103#endif
1104 if (repr == NULL)
1105 return NULL;
1106 if (size == 0)
1107 goto done;
1108
1109 p = q = PyBytes_AS_STRING(repr);
1110 while (size-- > 0) {
1111 Py_UNICODE ch = *s++;
1112#ifdef Py_UNICODE_WIDE
1113 /* Map 32-bit characters to '\Uxxxxxxxx' */
1114 if (ch >= 0x10000) {
1115 *p++ = '\\';
1116 *p++ = 'U';
1117 *p++ = hexdigits[(ch >> 28) & 0xf];
1118 *p++ = hexdigits[(ch >> 24) & 0xf];
1119 *p++ = hexdigits[(ch >> 20) & 0xf];
1120 *p++ = hexdigits[(ch >> 16) & 0xf];
1121 *p++ = hexdigits[(ch >> 12) & 0xf];
1122 *p++ = hexdigits[(ch >> 8) & 0xf];
1123 *p++ = hexdigits[(ch >> 4) & 0xf];
1124 *p++ = hexdigits[ch & 15];
1125 }
1126 else
1127#endif
1128 /* Map 16-bit characters to '\uxxxx' */
1129 if (ch >= 256 || ch == '\\' || ch == '\n') {
1130 *p++ = '\\';
1131 *p++ = 'u';
1132 *p++ = hexdigits[(ch >> 12) & 0xf];
1133 *p++ = hexdigits[(ch >> 8) & 0xf];
1134 *p++ = hexdigits[(ch >> 4) & 0xf];
1135 *p++ = hexdigits[ch & 15];
1136 }
1137 /* Copy everything else as-is */
1138 else
1139 *p++ = (char) ch;
1140 }
1141 size = p - q;
1142
1143 done:
1144 result = PyBytes_FromStringAndSize(PyBytes_AS_STRING(repr), size);
1145 Py_DECREF(repr);
1146 return result;
1147}
1148
1149static int
1150save_unicode(PicklerObject *self, PyObject *obj)
1151{
1152 Py_ssize_t size;
1153 PyObject *encoded = NULL;
1154
1155 if (self->bin) {
1156 char pdata[5];
1157
1158 encoded = PyUnicode_AsUTF8String(obj);
1159 if (encoded == NULL)
1160 goto error;
1161
1162 size = PyBytes_GET_SIZE(encoded);
1163 if (size < 0 || size > 0xffffffffL)
1164 goto error; /* string too large */
1165
1166 pdata[0] = BINUNICODE;
1167 pdata[1] = (unsigned char)(size & 0xff);
1168 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1169 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1170 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1171
1172 if (pickler_write(self, pdata, 5) < 0)
1173 goto error;
1174
1175 if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
1176 goto error;
1177 }
1178 else {
1179 const char unicode_op = UNICODE;
1180
1181 encoded = raw_unicode_escape(PyUnicode_AS_UNICODE(obj),
1182 PyUnicode_GET_SIZE(obj));
1183 if (encoded == NULL)
1184 goto error;
1185
1186 if (pickler_write(self, &unicode_op, 1) < 0)
1187 goto error;
1188
1189 size = PyBytes_GET_SIZE(encoded);
1190 if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
1191 goto error;
1192
1193 if (pickler_write(self, "\n", 1) < 0)
1194 goto error;
1195 }
1196 if (memo_put(self, obj) < 0)
1197 goto error;
1198
1199 Py_DECREF(encoded);
1200 return 0;
1201
1202 error:
1203 Py_XDECREF(encoded);
1204 return -1;
1205}
1206
1207/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1208static int
1209store_tuple_elements(PicklerObject *self, PyObject *t, int len)
1210{
1211 int i;
1212
1213 assert(PyTuple_Size(t) == len);
1214
1215 for (i = 0; i < len; i++) {
1216 PyObject *element = PyTuple_GET_ITEM(t, i);
1217
1218 if (element == NULL)
1219 return -1;
1220 if (save(self, element, 0) < 0)
1221 return -1;
1222 }
1223
1224 return 0;
1225}
1226
1227/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1228 * used across protocols to minimize the space needed to pickle them.
1229 * Tuples are also the only builtin immutable type that can be recursive
1230 * (a tuple can be reached from itself), and that requires some subtle
1231 * magic so that it works in all cases. IOW, this is a long routine.
1232 */
1233static int
1234save_tuple(PicklerObject *self, PyObject *obj)
1235{
1236 PyObject *memo_key = NULL;
1237 int len, i;
1238 int status = 0;
1239
1240 const char mark_op = MARK;
1241 const char tuple_op = TUPLE;
1242 const char pop_op = POP;
1243 const char pop_mark_op = POP_MARK;
1244 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
1245
1246 if ((len = PyTuple_Size(obj)) < 0)
1247 return -1;
1248
1249 if (len == 0) {
1250 char pdata[2];
1251
1252 if (self->proto) {
1253 pdata[0] = EMPTY_TUPLE;
1254 len = 1;
1255 }
1256 else {
1257 pdata[0] = MARK;
1258 pdata[1] = TUPLE;
1259 len = 2;
1260 }
1261 if (pickler_write(self, pdata, len) < 0)
1262 return -1;
1263 return 0;
1264 }
1265
1266 /* id(tuple) isn't in the memo now. If it shows up there after
1267 * saving the tuple elements, the tuple must be recursive, in
1268 * which case we'll pop everything we put on the stack, and fetch
1269 * its value from the memo.
1270 */
1271 memo_key = PyLong_FromVoidPtr(obj);
1272 if (memo_key == NULL)
1273 return -1;
1274
1275 if (len <= 3 && self->proto >= 2) {
1276 /* Use TUPLE{1,2,3} opcodes. */
1277 if (store_tuple_elements(self, obj, len) < 0)
1278 goto error;
1279
1280 if (PyDict_GetItem(self->memo, memo_key)) {
1281 /* pop the len elements */
1282 for (i = 0; i < len; i++)
1283 if (pickler_write(self, &pop_op, 1) < 0)
1284 goto error;
1285 /* fetch from memo */
1286 if (memo_get(self, memo_key) < 0)
1287 goto error;
1288
1289 Py_DECREF(memo_key);
1290 return 0;
1291 }
1292 else { /* Not recursive. */
1293 if (pickler_write(self, len2opcode + len, 1) < 0)
1294 goto error;
1295 }
1296 goto memoize;
1297 }
1298
1299 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
1300 * Generate MARK e1 e2 ... TUPLE
1301 */
1302 if (pickler_write(self, &mark_op, 1) < 0)
1303 goto error;
1304
1305 if (store_tuple_elements(self, obj, len) < 0)
1306 goto error;
1307
1308 if (PyDict_GetItem(self->memo, memo_key)) {
1309 /* pop the stack stuff we pushed */
1310 if (self->bin) {
1311 if (pickler_write(self, &pop_mark_op, 1) < 0)
1312 goto error;
1313 }
1314 else {
1315 /* Note that we pop one more than len, to remove
1316 * the MARK too.
1317 */
1318 for (i = 0; i <= len; i++)
1319 if (pickler_write(self, &pop_op, 1) < 0)
1320 goto error;
1321 }
1322 /* fetch from memo */
1323 if (memo_get(self, memo_key) < 0)
1324 goto error;
1325
1326 Py_DECREF(memo_key);
1327 return 0;
1328 }
1329 else { /* Not recursive. */
1330 if (pickler_write(self, &tuple_op, 1) < 0)
1331 goto error;
1332 }
1333
1334 memoize:
1335 if (memo_put(self, obj) < 0)
1336 goto error;
1337
1338 if (0) {
1339 error:
1340 status = -1;
1341 }
1342
1343 Py_DECREF(memo_key);
1344 return status;
1345}
1346
1347/* iter is an iterator giving items, and we batch up chunks of
1348 * MARK item item ... item APPENDS
1349 * opcode sequences. Calling code should have arranged to first create an
1350 * empty list, or list-like object, for the APPENDS to operate on.
1351 * Returns 0 on success, <0 on error.
1352 */
1353static int
1354batch_list(PicklerObject *self, PyObject *iter)
1355{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001356 PyObject *obj = NULL;
1357 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001358 int i, n;
1359
1360 const char mark_op = MARK;
1361 const char append_op = APPEND;
1362 const char appends_op = APPENDS;
1363
1364 assert(iter != NULL);
1365
1366 /* XXX: I think this function could be made faster by avoiding the
1367 iterator interface and fetching objects directly from list using
1368 PyList_GET_ITEM.
1369 */
1370
1371 if (self->proto == 0) {
1372 /* APPENDS isn't available; do one at a time. */
1373 for (;;) {
1374 obj = PyIter_Next(iter);
1375 if (obj == NULL) {
1376 if (PyErr_Occurred())
1377 return -1;
1378 break;
1379 }
1380 i = save(self, obj, 0);
1381 Py_DECREF(obj);
1382 if (i < 0)
1383 return -1;
1384 if (pickler_write(self, &append_op, 1) < 0)
1385 return -1;
1386 }
1387 return 0;
1388 }
1389
1390 /* proto > 0: write in batches of BATCHSIZE. */
1391 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001392 /* Get first item */
1393 firstitem = PyIter_Next(iter);
1394 if (firstitem == NULL) {
1395 if (PyErr_Occurred())
1396 goto error;
1397
1398 /* nothing more to add */
1399 break;
1400 }
1401
1402 /* Try to get a second item */
1403 obj = PyIter_Next(iter);
1404 if (obj == NULL) {
1405 if (PyErr_Occurred())
1406 goto error;
1407
1408 /* Only one item to write */
1409 if (save(self, firstitem, 0) < 0)
1410 goto error;
1411 if (pickler_write(self, &append_op, 1) < 0)
1412 goto error;
1413 Py_CLEAR(firstitem);
1414 break;
1415 }
1416
1417 /* More than one item to write */
1418
1419 /* Pump out MARK, items, APPENDS. */
1420 if (pickler_write(self, &mark_op, 1) < 0)
1421 goto error;
1422
1423 if (save(self, firstitem, 0) < 0)
1424 goto error;
1425 Py_CLEAR(firstitem);
1426 n = 1;
1427
1428 /* Fetch and save up to BATCHSIZE items */
1429 while (obj) {
1430 if (save(self, obj, 0) < 0)
1431 goto error;
1432 Py_CLEAR(obj);
1433 n += 1;
1434
1435 if (n == BATCHSIZE)
1436 break;
1437
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001438 obj = PyIter_Next(iter);
1439 if (obj == NULL) {
1440 if (PyErr_Occurred())
1441 goto error;
1442 break;
1443 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001444 }
1445
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001446 if (pickler_write(self, &appends_op, 1) < 0)
1447 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001448
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001449 } while (n == BATCHSIZE);
1450 return 0;
1451
1452 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001453 Py_XDECREF(firstitem);
1454 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001455 return -1;
1456}
1457
1458static int
1459save_list(PicklerObject *self, PyObject *obj)
1460{
1461 PyObject *iter;
1462 char header[3];
1463 int len;
1464 int status = 0;
1465
1466 if (self->fast && !fast_save_enter(self, obj))
1467 goto error;
1468
1469 /* Create an empty list. */
1470 if (self->bin) {
1471 header[0] = EMPTY_LIST;
1472 len = 1;
1473 }
1474 else {
1475 header[0] = MARK;
1476 header[1] = LIST;
1477 len = 2;
1478 }
1479
1480 if (pickler_write(self, header, len) < 0)
1481 goto error;
1482
1483 /* Get list length, and bow out early if empty. */
1484 if ((len = PyList_Size(obj)) < 0)
1485 goto error;
1486
1487 if (memo_put(self, obj) < 0)
1488 goto error;
1489
1490 if (len != 0) {
1491 /* Save the list elements. */
1492 iter = PyObject_GetIter(obj);
1493 if (iter == NULL)
1494 goto error;
1495 status = batch_list(self, iter);
1496 Py_DECREF(iter);
1497 }
1498
1499 if (0) {
1500 error:
1501 status = -1;
1502 }
1503
1504 if (self->fast && !fast_save_leave(self, obj))
1505 status = -1;
1506
1507 return status;
1508}
1509
1510/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
1511 * MARK key value ... key value SETITEMS
1512 * opcode sequences. Calling code should have arranged to first create an
1513 * empty dict, or dict-like object, for the SETITEMS to operate on.
1514 * Returns 0 on success, <0 on error.
1515 *
1516 * This is very much like batch_list(). The difference between saving
1517 * elements directly, and picking apart two-tuples, is so long-winded at
1518 * the C level, though, that attempts to combine these routines were too
1519 * ugly to bear.
1520 */
1521static int
1522batch_dict(PicklerObject *self, PyObject *iter)
1523{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001524 PyObject *obj = NULL;
1525 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001526 int i, n;
1527
1528 const char mark_op = MARK;
1529 const char setitem_op = SETITEM;
1530 const char setitems_op = SETITEMS;
1531
1532 assert(iter != NULL);
1533
1534 if (self->proto == 0) {
1535 /* SETITEMS isn't available; do one at a time. */
1536 for (;;) {
1537 obj = PyIter_Next(iter);
1538 if (obj == NULL) {
1539 if (PyErr_Occurred())
1540 return -1;
1541 break;
1542 }
1543 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
1544 PyErr_SetString(PyExc_TypeError, "dict items "
1545 "iterator must return 2-tuples");
1546 return -1;
1547 }
1548 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
1549 if (i >= 0)
1550 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
1551 Py_DECREF(obj);
1552 if (i < 0)
1553 return -1;
1554 if (pickler_write(self, &setitem_op, 1) < 0)
1555 return -1;
1556 }
1557 return 0;
1558 }
1559
1560 /* proto > 0: write in batches of BATCHSIZE. */
1561 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001562 /* Get first item */
1563 firstitem = PyIter_Next(iter);
1564 if (firstitem == NULL) {
1565 if (PyErr_Occurred())
1566 goto error;
1567
1568 /* nothing more to add */
1569 break;
1570 }
1571 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
1572 PyErr_SetString(PyExc_TypeError, "dict items "
1573 "iterator must return 2-tuples");
1574 goto error;
1575 }
1576
1577 /* Try to get a second item */
1578 obj = PyIter_Next(iter);
1579 if (obj == NULL) {
1580 if (PyErr_Occurred())
1581 goto error;
1582
1583 /* Only one item to write */
1584 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
1585 goto error;
1586 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
1587 goto error;
1588 if (pickler_write(self, &setitem_op, 1) < 0)
1589 goto error;
1590 Py_CLEAR(firstitem);
1591 break;
1592 }
1593
1594 /* More than one item to write */
1595
1596 /* Pump out MARK, items, SETITEMS. */
1597 if (pickler_write(self, &mark_op, 1) < 0)
1598 goto error;
1599
1600 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
1601 goto error;
1602 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
1603 goto error;
1604 Py_CLEAR(firstitem);
1605 n = 1;
1606
1607 /* Fetch and save up to BATCHSIZE items */
1608 while (obj) {
1609 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
1610 PyErr_SetString(PyExc_TypeError, "dict items "
1611 "iterator must return 2-tuples");
1612 goto error;
1613 }
1614 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
1615 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
1616 goto error;
1617 Py_CLEAR(obj);
1618 n += 1;
1619
1620 if (n == BATCHSIZE)
1621 break;
1622
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001623 obj = PyIter_Next(iter);
1624 if (obj == NULL) {
1625 if (PyErr_Occurred())
1626 goto error;
1627 break;
1628 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001629 }
1630
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001631 if (pickler_write(self, &setitems_op, 1) < 0)
1632 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001633
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001634 } while (n == BATCHSIZE);
1635 return 0;
1636
1637 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001638 Py_XDECREF(firstitem);
1639 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001640 return -1;
1641}
1642
1643static int
1644save_dict(PicklerObject *self, PyObject *obj)
1645{
1646 PyObject *items, *iter;
1647 char header[3];
1648 int len;
1649 int status = 0;
1650
1651 if (self->fast && !fast_save_enter(self, obj))
1652 goto error;
1653
1654 /* Create an empty dict. */
1655 if (self->bin) {
1656 header[0] = EMPTY_DICT;
1657 len = 1;
1658 }
1659 else {
1660 header[0] = MARK;
1661 header[1] = DICT;
1662 len = 2;
1663 }
1664
1665 if (pickler_write(self, header, len) < 0)
1666 goto error;
1667
1668 /* Get dict size, and bow out early if empty. */
1669 if ((len = PyDict_Size(obj)) < 0)
1670 goto error;
1671
1672 if (memo_put(self, obj) < 0)
1673 goto error;
1674
1675 if (len != 0) {
1676 /* Save the dict items. */
1677 items = PyObject_CallMethod(obj, "items", "()");
1678 if (items == NULL)
1679 goto error;
1680 iter = PyObject_GetIter(items);
1681 Py_DECREF(items);
1682 if (iter == NULL)
1683 goto error;
1684 status = batch_dict(self, iter);
1685 Py_DECREF(iter);
1686 }
1687
1688 if (0) {
1689 error:
1690 status = -1;
1691 }
1692
1693 if (self->fast && !fast_save_leave(self, obj))
1694 status = -1;
1695
1696 return status;
1697}
1698
1699static int
1700save_global(PicklerObject *self, PyObject *obj, PyObject *name)
1701{
1702 static PyObject *name_str = NULL;
1703 PyObject *global_name = NULL;
1704 PyObject *module_name = NULL;
1705 PyObject *module = NULL;
1706 PyObject *cls;
1707 int status = 0;
1708
1709 const char global_op = GLOBAL;
1710
1711 if (name_str == NULL) {
1712 name_str = PyUnicode_InternFromString("__name__");
1713 if (name_str == NULL)
1714 goto error;
1715 }
1716
1717 if (name) {
1718 global_name = name;
1719 Py_INCREF(global_name);
1720 }
1721 else {
1722 global_name = PyObject_GetAttr(obj, name_str);
1723 if (global_name == NULL)
1724 goto error;
1725 }
1726
1727 module_name = whichmodule(obj, global_name);
1728 if (module_name == NULL)
1729 goto error;
1730
1731 /* XXX: Change to use the import C API directly with level=0 to disallow
1732 relative imports.
1733
1734 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
1735 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
1736 custom import functions (IMHO, this would be a nice security
1737 feature). The import C API would need to be extended to support the
1738 extra parameters of __import__ to fix that. */
1739 module = PyImport_Import(module_name);
1740 if (module == NULL) {
1741 PyErr_Format(PicklingError,
1742 "Can't pickle %R: import of module %R failed",
1743 obj, module_name);
1744 goto error;
1745 }
1746 cls = PyObject_GetAttr(module, global_name);
1747 if (cls == NULL) {
1748 PyErr_Format(PicklingError,
1749 "Can't pickle %R: attribute lookup %S.%S failed",
1750 obj, module_name, global_name);
1751 goto error;
1752 }
1753 if (cls != obj) {
1754 Py_DECREF(cls);
1755 PyErr_Format(PicklingError,
1756 "Can't pickle %R: it's not the same object as %S.%S",
1757 obj, module_name, global_name);
1758 goto error;
1759 }
1760 Py_DECREF(cls);
1761
1762 if (self->proto >= 2) {
1763 /* See whether this is in the extension registry, and if
1764 * so generate an EXT opcode.
1765 */
1766 PyObject *code_obj; /* extension code as Python object */
1767 long code; /* extension code as C value */
1768 char pdata[5];
1769 int n;
1770
1771 PyTuple_SET_ITEM(two_tuple, 0, module_name);
1772 PyTuple_SET_ITEM(two_tuple, 1, global_name);
1773 code_obj = PyDict_GetItem(extension_registry, two_tuple);
1774 /* The object is not registered in the extension registry.
1775 This is the most likely code path. */
1776 if (code_obj == NULL)
1777 goto gen_global;
1778
1779 /* XXX: pickle.py doesn't check neither the type, nor the range
1780 of the value returned by the extension_registry. It should for
1781 consistency. */
1782
1783 /* Verify code_obj has the right type and value. */
1784 if (!PyLong_Check(code_obj)) {
1785 PyErr_Format(PicklingError,
1786 "Can't pickle %R: extension code %R isn't an integer",
1787 obj, code_obj);
1788 goto error;
1789 }
1790 code = PyLong_AS_LONG(code_obj);
1791 if (code <= 0 || code > 0x7fffffffL) {
1792 PyErr_Format(PicklingError,
1793 "Can't pickle %R: extension code %ld is out of range",
1794 obj, code);
1795 goto error;
1796 }
1797
1798 /* Generate an EXT opcode. */
1799 if (code <= 0xff) {
1800 pdata[0] = EXT1;
1801 pdata[1] = (unsigned char)code;
1802 n = 2;
1803 }
1804 else if (code <= 0xffff) {
1805 pdata[0] = EXT2;
1806 pdata[1] = (unsigned char)(code & 0xff);
1807 pdata[2] = (unsigned char)((code >> 8) & 0xff);
1808 n = 3;
1809 }
1810 else {
1811 pdata[0] = EXT4;
1812 pdata[1] = (unsigned char)(code & 0xff);
1813 pdata[2] = (unsigned char)((code >> 8) & 0xff);
1814 pdata[3] = (unsigned char)((code >> 16) & 0xff);
1815 pdata[4] = (unsigned char)((code >> 24) & 0xff);
1816 n = 5;
1817 }
1818
1819 if (pickler_write(self, pdata, n) < 0)
1820 goto error;
1821 }
1822 else {
1823 /* Generate a normal global opcode if we are using a pickle
1824 protocol <= 2, or if the object is not registered in the
1825 extension registry. */
1826 PyObject *encoded;
1827 PyObject *(*unicode_encoder)(PyObject *);
1828
1829 gen_global:
1830 if (pickler_write(self, &global_op, 1) < 0)
1831 goto error;
1832
1833 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
1834 the module name and the global name using UTF-8. We do so only when
1835 we are using the pickle protocol newer than version 3. This is to
1836 ensure compatibility with older Unpickler running on Python 2.x. */
1837 if (self->proto >= 3) {
1838 unicode_encoder = PyUnicode_AsUTF8String;
1839 }
1840 else {
1841 unicode_encoder = PyUnicode_AsASCIIString;
1842 }
1843
1844 /* Save the name of the module. */
1845 encoded = unicode_encoder(module_name);
1846 if (encoded == NULL) {
1847 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
1848 PyErr_Format(PicklingError,
1849 "can't pickle module identifier '%S' using "
1850 "pickle protocol %i", module_name, self->proto);
1851 goto error;
1852 }
1853 if (pickler_write(self, PyBytes_AS_STRING(encoded),
1854 PyBytes_GET_SIZE(encoded)) < 0) {
1855 Py_DECREF(encoded);
1856 goto error;
1857 }
1858 Py_DECREF(encoded);
1859 if(pickler_write(self, "\n", 1) < 0)
1860 goto error;
1861
1862 /* Save the name of the module. */
1863 encoded = unicode_encoder(global_name);
1864 if (encoded == NULL) {
1865 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
1866 PyErr_Format(PicklingError,
1867 "can't pickle global identifier '%S' using "
1868 "pickle protocol %i", global_name, self->proto);
1869 goto error;
1870 }
1871 if (pickler_write(self, PyBytes_AS_STRING(encoded),
1872 PyBytes_GET_SIZE(encoded)) < 0) {
1873 Py_DECREF(encoded);
1874 goto error;
1875 }
1876 Py_DECREF(encoded);
1877 if(pickler_write(self, "\n", 1) < 0)
1878 goto error;
1879
1880 /* Memoize the object. */
1881 if (memo_put(self, obj) < 0)
1882 goto error;
1883 }
1884
1885 if (0) {
1886 error:
1887 status = -1;
1888 }
1889 Py_XDECREF(module_name);
1890 Py_XDECREF(global_name);
1891 Py_XDECREF(module);
1892
1893 return status;
1894}
1895
1896static int
1897save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
1898{
1899 PyObject *pid = NULL;
1900 int status = 0;
1901
1902 const char persid_op = PERSID;
1903 const char binpersid_op = BINPERSID;
1904
1905 Py_INCREF(obj);
1906 pid = pickler_call(self, func, obj);
1907 if (pid == NULL)
1908 return -1;
1909
1910 if (pid != Py_None) {
1911 if (self->bin) {
1912 if (save(self, pid, 1) < 0 ||
1913 pickler_write(self, &binpersid_op, 1) < 0)
1914 goto error;
1915 }
1916 else {
1917 PyObject *pid_str = NULL;
1918 char *pid_ascii_bytes;
1919 Py_ssize_t size;
1920
1921 pid_str = PyObject_Str(pid);
1922 if (pid_str == NULL)
1923 goto error;
1924
1925 /* XXX: Should it check whether the persistent id only contains
1926 ASCII characters? And what if the pid contains embedded
1927 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001928 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001929 Py_DECREF(pid_str);
1930 if (pid_ascii_bytes == NULL)
1931 goto error;
1932
1933 if (pickler_write(self, &persid_op, 1) < 0 ||
1934 pickler_write(self, pid_ascii_bytes, size) < 0 ||
1935 pickler_write(self, "\n", 1) < 0)
1936 goto error;
1937 }
1938 status = 1;
1939 }
1940
1941 if (0) {
1942 error:
1943 status = -1;
1944 }
1945 Py_XDECREF(pid);
1946
1947 return status;
1948}
1949
1950/* We're saving obj, and args is the 2-thru-5 tuple returned by the
1951 * appropriate __reduce__ method for obj.
1952 */
1953static int
1954save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
1955{
1956 PyObject *callable;
1957 PyObject *argtup;
1958 PyObject *state = NULL;
1959 PyObject *listitems = NULL;
1960 PyObject *dictitems = NULL;
1961
1962 int use_newobj = self->proto >= 2;
1963
1964 const char reduce_op = REDUCE;
1965 const char build_op = BUILD;
1966 const char newobj_op = NEWOBJ;
1967
1968 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
1969 &callable, &argtup, &state, &listitems, &dictitems))
1970 return -1;
1971
1972 if (!PyCallable_Check(callable)) {
1973 PyErr_SetString(PicklingError,
1974 "first argument of save_reduce() must be callable");
1975 return -1;
1976 }
1977 if (!PyTuple_Check(argtup)) {
1978 PyErr_SetString(PicklingError,
1979 "second argument of save_reduce() must be a tuple");
1980 return -1;
1981 }
1982
1983 if (state == Py_None)
1984 state = NULL;
1985 if (listitems == Py_None)
1986 listitems = NULL;
1987 if (dictitems == Py_None)
1988 dictitems = NULL;
1989
1990 /* Protocol 2 special case: if callable's name is __newobj__, use
1991 NEWOBJ. */
1992 if (use_newobj) {
1993 static PyObject *newobj_str = NULL;
1994 PyObject *name_str;
1995
1996 if (newobj_str == NULL) {
1997 newobj_str = PyUnicode_InternFromString("__newobj__");
1998 }
1999
2000 name_str = PyObject_GetAttrString(callable, "__name__");
2001 if (name_str == NULL) {
2002 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2003 PyErr_Clear();
2004 else
2005 return -1;
2006 use_newobj = 0;
2007 }
2008 else {
2009 use_newobj = PyUnicode_Check(name_str) &&
2010 PyUnicode_Compare(name_str, newobj_str) == 0;
2011 Py_DECREF(name_str);
2012 }
2013 }
2014 if (use_newobj) {
2015 PyObject *cls;
2016 PyObject *newargtup;
2017 PyObject *obj_class;
2018 int p;
2019
2020 /* Sanity checks. */
2021 if (Py_SIZE(argtup) < 1) {
2022 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
2023 return -1;
2024 }
2025
2026 cls = PyTuple_GET_ITEM(argtup, 0);
2027 if (!PyObject_HasAttrString(cls, "__new__")) {
2028 PyErr_SetString(PicklingError, "args[0] from "
2029 "__newobj__ args has no __new__");
2030 return -1;
2031 }
2032
2033 if (obj != NULL) {
2034 obj_class = PyObject_GetAttrString(obj, "__class__");
2035 if (obj_class == NULL) {
2036 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2037 PyErr_Clear();
2038 else
2039 return -1;
2040 }
2041 p = obj_class != cls; /* true iff a problem */
2042 Py_DECREF(obj_class);
2043 if (p) {
2044 PyErr_SetString(PicklingError, "args[0] from "
2045 "__newobj__ args has the wrong class");
2046 return -1;
2047 }
2048 }
2049 /* XXX: These calls save() are prone to infinite recursion. Imagine
2050 what happen if the value returned by the __reduce__() method of
2051 some extension type contains another object of the same type. Ouch!
2052
2053 Here is a quick example, that I ran into, to illustrate what I
2054 mean:
2055
2056 >>> import pickle, copyreg
2057 >>> copyreg.dispatch_table.pop(complex)
2058 >>> pickle.dumps(1+2j)
2059 Traceback (most recent call last):
2060 ...
2061 RuntimeError: maximum recursion depth exceeded
2062
2063 Removing the complex class from copyreg.dispatch_table made the
2064 __reduce_ex__() method emit another complex object:
2065
2066 >>> (1+1j).__reduce_ex__(2)
2067 (<function __newobj__ at 0xb7b71c3c>,
2068 (<class 'complex'>, (1+1j)), None, None, None)
2069
2070 Thus when save() was called on newargstup (the 2nd item) recursion
2071 ensued. Of course, the bug was in the complex class which had a
2072 broken __getnewargs__() that emitted another complex object. But,
2073 the point, here, is it is quite easy to end up with a broken reduce
2074 function. */
2075
2076 /* Save the class and its __new__ arguments. */
2077 if (save(self, cls, 0) < 0)
2078 return -1;
2079
2080 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
2081 if (newargtup == NULL)
2082 return -1;
2083
2084 p = save(self, newargtup, 0);
2085 Py_DECREF(newargtup);
2086 if (p < 0)
2087 return -1;
2088
2089 /* Add NEWOBJ opcode. */
2090 if (pickler_write(self, &newobj_op, 1) < 0)
2091 return -1;
2092 }
2093 else { /* Not using NEWOBJ. */
2094 if (save(self, callable, 0) < 0 ||
2095 save(self, argtup, 0) < 0 ||
2096 pickler_write(self, &reduce_op, 1) < 0)
2097 return -1;
2098 }
2099
2100 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
2101 the caller do not want to memoize the object. Not particularly useful,
2102 but that is to mimic the behavior save_reduce() in pickle.py when
2103 obj is None. */
2104 if (obj && memo_put(self, obj) < 0)
2105 return -1;
2106
2107 if (listitems && batch_list(self, listitems) < 0)
2108 return -1;
2109
2110 if (dictitems && batch_dict(self, dictitems) < 0)
2111 return -1;
2112
2113 if (state) {
2114 if (save(self, state, 0) < 0 ||
2115 pickler_write(self, &build_op, 1) < 0)
2116 return -1;
2117 }
2118
2119 return 0;
2120}
2121
2122static int
2123save(PicklerObject *self, PyObject *obj, int pers_save)
2124{
2125 PyTypeObject *type;
2126 PyObject *reduce_func = NULL;
2127 PyObject *reduce_value = NULL;
2128 PyObject *memo_key = NULL;
2129 int status = 0;
2130
Alexandre Vassalottidff18342008-07-13 18:48:30 +00002131 if (Py_EnterRecursiveCall(" while pickling an object") < 0)
2132 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002133
2134 /* The extra pers_save argument is necessary to avoid calling save_pers()
2135 on its returned object. */
2136 if (!pers_save && self->pers_func) {
2137 /* save_pers() returns:
2138 -1 to signal an error;
2139 0 if it did nothing successfully;
2140 1 if a persistent id was saved.
2141 */
2142 if ((status = save_pers(self, obj, self->pers_func)) != 0)
2143 goto done;
2144 }
2145
2146 type = Py_TYPE(obj);
2147
2148 /* XXX: The old cPickle had an optimization that used switch-case
2149 statement dispatching on the first letter of the type name. It was
2150 probably not a bad idea after all. If benchmarks shows that particular
2151 optimization had some real benefits, it would be nice to add it
2152 back. */
2153
2154 /* Atom types; these aren't memoized, so don't check the memo. */
2155
2156 if (obj == Py_None) {
2157 status = save_none(self, obj);
2158 goto done;
2159 }
2160 else if (obj == Py_False || obj == Py_True) {
2161 status = save_bool(self, obj);
2162 goto done;
2163 }
2164 else if (type == &PyLong_Type) {
2165 status = save_long(self, obj);
2166 goto done;
2167 }
2168 else if (type == &PyFloat_Type) {
2169 status = save_float(self, obj);
2170 goto done;
2171 }
2172
2173 /* Check the memo to see if it has the object. If so, generate
2174 a GET (or BINGET) opcode, instead of pickling the object
2175 once again. */
2176 memo_key = PyLong_FromVoidPtr(obj);
2177 if (memo_key == NULL)
2178 goto error;
2179 if (PyDict_GetItem(self->memo, memo_key)) {
2180 if (memo_get(self, memo_key) < 0)
2181 goto error;
2182 goto done;
2183 }
2184
2185 if (type == &PyBytes_Type) {
2186 status = save_bytes(self, obj);
2187 goto done;
2188 }
2189 else if (type == &PyUnicode_Type) {
2190 status = save_unicode(self, obj);
2191 goto done;
2192 }
2193 else if (type == &PyDict_Type) {
2194 status = save_dict(self, obj);
2195 goto done;
2196 }
2197 else if (type == &PyList_Type) {
2198 status = save_list(self, obj);
2199 goto done;
2200 }
2201 else if (type == &PyTuple_Type) {
2202 status = save_tuple(self, obj);
2203 goto done;
2204 }
2205 else if (type == &PyType_Type) {
2206 status = save_global(self, obj, NULL);
2207 goto done;
2208 }
2209 else if (type == &PyFunction_Type) {
2210 status = save_global(self, obj, NULL);
2211 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
2212 /* fall back to reduce */
2213 PyErr_Clear();
2214 }
2215 else {
2216 goto done;
2217 }
2218 }
2219 else if (type == &PyCFunction_Type) {
2220 status = save_global(self, obj, NULL);
2221 goto done;
2222 }
2223 else if (PyType_IsSubtype(type, &PyType_Type)) {
2224 status = save_global(self, obj, NULL);
2225 goto done;
2226 }
2227
2228 /* XXX: This part needs some unit tests. */
2229
2230 /* Get a reduction callable, and call it. This may come from
2231 * copyreg.dispatch_table, the object's __reduce_ex__ method,
2232 * or the object's __reduce__ method.
2233 */
2234 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
2235 if (reduce_func != NULL) {
2236 /* Here, the reference count of the reduce_func object returned by
2237 PyDict_GetItem needs to be increased to be consistent with the one
2238 returned by PyObject_GetAttr. This is allow us to blindly DECREF
2239 reduce_func at the end of the save() routine.
2240 */
2241 Py_INCREF(reduce_func);
2242 Py_INCREF(obj);
2243 reduce_value = pickler_call(self, reduce_func, obj);
2244 }
2245 else {
2246 static PyObject *reduce_str = NULL;
2247 static PyObject *reduce_ex_str = NULL;
2248
2249 /* Cache the name of the reduce methods. */
2250 if (reduce_str == NULL) {
2251 reduce_str = PyUnicode_InternFromString("__reduce__");
2252 if (reduce_str == NULL)
2253 goto error;
2254 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
2255 if (reduce_ex_str == NULL)
2256 goto error;
2257 }
2258
2259 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
2260 automatically defined as __reduce__. While this is convenient, this
2261 make it impossible to know which method was actually called. Of
2262 course, this is not a big deal. But still, it would be nice to let
2263 the user know which method was called when something go
2264 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
2265 don't actually have to check for a __reduce__ method. */
2266
2267 /* Check for a __reduce_ex__ method. */
2268 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
2269 if (reduce_func != NULL) {
2270 PyObject *proto;
2271 proto = PyLong_FromLong(self->proto);
2272 if (proto != NULL) {
2273 reduce_value = pickler_call(self, reduce_func, proto);
2274 }
2275 }
2276 else {
2277 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2278 PyErr_Clear();
2279 else
2280 goto error;
2281 /* Check for a __reduce__ method. */
2282 reduce_func = PyObject_GetAttr(obj, reduce_str);
2283 if (reduce_func != NULL) {
2284 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
2285 }
2286 else {
2287 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
2288 type->tp_name, obj);
2289 goto error;
2290 }
2291 }
2292 }
2293
2294 if (reduce_value == NULL)
2295 goto error;
2296
2297 if (PyUnicode_Check(reduce_value)) {
2298 status = save_global(self, obj, reduce_value);
2299 goto done;
2300 }
2301
2302 if (!PyTuple_Check(reduce_value)) {
2303 PyErr_SetString(PicklingError,
2304 "__reduce__ must return a string or tuple");
2305 goto error;
2306 }
2307 if (Py_SIZE(reduce_value) < 2 || Py_SIZE(reduce_value) > 5) {
2308 PyErr_SetString(PicklingError, "tuple returned by __reduce__ "
2309 "must contain 2 through 5 elements");
2310 goto error;
2311 }
2312 if (!PyTuple_Check(PyTuple_GET_ITEM(reduce_value, 1))) {
2313 PyErr_SetString(PicklingError, "second item of the tuple "
2314 "returned by __reduce__ must be a tuple");
2315 goto error;
2316 }
2317
2318 status = save_reduce(self, reduce_value, obj);
2319
2320 if (0) {
2321 error:
2322 status = -1;
2323 }
2324 done:
Alexandre Vassalottidff18342008-07-13 18:48:30 +00002325 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002326 Py_XDECREF(memo_key);
2327 Py_XDECREF(reduce_func);
2328 Py_XDECREF(reduce_value);
2329
2330 return status;
2331}
2332
2333static int
2334dump(PicklerObject *self, PyObject *obj)
2335{
2336 const char stop_op = STOP;
2337
2338 if (self->proto >= 2) {
2339 char header[2];
2340
2341 header[0] = PROTO;
2342 assert(self->proto >= 0 && self->proto < 256);
2343 header[1] = (unsigned char)self->proto;
2344 if (pickler_write(self, header, 2) < 0)
2345 return -1;
2346 }
2347
2348 if (save(self, obj, 0) < 0 ||
2349 pickler_write(self, &stop_op, 1) < 0 ||
2350 pickler_write(self, NULL, 0) < 0)
2351 return -1;
2352
2353 return 0;
2354}
2355
2356PyDoc_STRVAR(Pickler_clear_memo_doc,
2357"clear_memo() -> None. Clears the pickler's \"memo\"."
2358"\n"
2359"The memo is the data structure that remembers which objects the\n"
2360"pickler has already seen, so that shared or recursive objects are\n"
2361"pickled by reference and not by value. This method is useful when\n"
2362"re-using picklers.");
2363
2364static PyObject *
2365Pickler_clear_memo(PicklerObject *self)
2366{
2367 if (self->memo)
2368 PyDict_Clear(self->memo);
2369
2370 Py_RETURN_NONE;
2371}
2372
2373PyDoc_STRVAR(Pickler_dump_doc,
2374"dump(obj) -> None. Write a pickled representation of obj to the open file.");
2375
2376static PyObject *
2377Pickler_dump(PicklerObject *self, PyObject *args)
2378{
2379 PyObject *obj;
2380
2381 if (!PyArg_ParseTuple(args, "O:dump", &obj))
2382 return NULL;
2383
2384 if (dump(self, obj) < 0)
2385 return NULL;
2386
2387 Py_RETURN_NONE;
2388}
2389
2390static struct PyMethodDef Pickler_methods[] = {
2391 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
2392 Pickler_dump_doc},
2393 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
2394 Pickler_clear_memo_doc},
2395 {NULL, NULL} /* sentinel */
2396};
2397
2398static void
2399Pickler_dealloc(PicklerObject *self)
2400{
2401 PyObject_GC_UnTrack(self);
2402
2403 Py_XDECREF(self->write);
2404 Py_XDECREF(self->memo);
2405 Py_XDECREF(self->pers_func);
2406 Py_XDECREF(self->arg);
2407 Py_XDECREF(self->fast_memo);
2408
2409 PyMem_Free(self->write_buf);
2410
2411 Py_TYPE(self)->tp_free((PyObject *)self);
2412}
2413
2414static int
2415Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
2416{
2417 Py_VISIT(self->write);
2418 Py_VISIT(self->memo);
2419 Py_VISIT(self->pers_func);
2420 Py_VISIT(self->arg);
2421 Py_VISIT(self->fast_memo);
2422 return 0;
2423}
2424
2425static int
2426Pickler_clear(PicklerObject *self)
2427{
2428 Py_CLEAR(self->write);
2429 Py_CLEAR(self->memo);
2430 Py_CLEAR(self->pers_func);
2431 Py_CLEAR(self->arg);
2432 Py_CLEAR(self->fast_memo);
2433
2434 PyMem_Free(self->write_buf);
2435 self->write_buf = NULL;
2436
2437 return 0;
2438}
2439
2440PyDoc_STRVAR(Pickler_doc,
2441"Pickler(file, protocol=None)"
2442"\n"
2443"This takes a binary file for writing a pickle data stream.\n"
2444"\n"
2445"The optional protocol argument tells the pickler to use the\n"
2446"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
2447"protocol is 3; a backward-incompatible protocol designed for\n"
2448"Python 3.0.\n"
2449"\n"
2450"Specifying a negative protocol version selects the highest\n"
2451"protocol version supported. The higher the protocol used, the\n"
2452"more recent the version of Python needed to read the pickle\n"
2453"produced.\n"
2454"\n"
2455"The file argument must have a write() method that accepts a single\n"
2456"bytes argument. It can thus be a file object opened for binary\n"
2457"writing, a io.BytesIO instance, or any other custom object that\n"
2458"meets this interface.\n");
2459
2460static int
2461Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
2462{
2463 static char *kwlist[] = {"file", "protocol", 0};
2464 PyObject *file;
2465 PyObject *proto_obj = NULL;
2466 long proto = 0;
2467
2468 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:Pickler",
2469 kwlist, &file, &proto_obj))
2470 return -1;
2471
2472 /* In case of multiple __init__() calls, clear previous content. */
2473 if (self->write != NULL)
2474 (void)Pickler_clear(self);
2475
2476 if (proto_obj == NULL || proto_obj == Py_None)
2477 proto = DEFAULT_PROTOCOL;
2478 else
2479 proto = PyLong_AsLong(proto_obj);
2480
2481 if (proto < 0)
2482 proto = HIGHEST_PROTOCOL;
2483 if (proto > HIGHEST_PROTOCOL) {
2484 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
2485 HIGHEST_PROTOCOL);
2486 return -1;
2487 }
2488
2489 self->proto = proto;
2490 self->bin = proto > 0;
2491 self->arg = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002492 self->fast = 0;
2493 self->fast_nesting = 0;
2494 self->fast_memo = NULL;
2495
2496 if (!PyObject_HasAttrString(file, "write")) {
2497 PyErr_SetString(PyExc_TypeError,
2498 "file must have a 'write' attribute");
2499 return -1;
2500 }
2501 self->write = PyObject_GetAttrString(file, "write");
2502 if (self->write == NULL)
2503 return -1;
2504 self->buf_size = 0;
2505 self->write_buf = (char *)PyMem_Malloc(WRITE_BUF_SIZE);
2506 if (self->write_buf == NULL) {
2507 PyErr_NoMemory();
2508 return -1;
2509 }
2510 self->pers_func = NULL;
2511 if (PyObject_HasAttrString((PyObject *)self, "persistent_id")) {
2512 self->pers_func = PyObject_GetAttrString((PyObject *)self,
2513 "persistent_id");
2514 if (self->pers_func == NULL)
2515 return -1;
2516 }
2517 self->memo = PyDict_New();
2518 if (self->memo == NULL)
2519 return -1;
2520
2521 return 0;
2522}
2523
2524static PyObject *
2525Pickler_get_memo(PicklerObject *self)
2526{
2527 if (self->memo == NULL)
2528 PyErr_SetString(PyExc_AttributeError, "memo");
2529 else
2530 Py_INCREF(self->memo);
2531 return self->memo;
2532}
2533
2534static int
2535Pickler_set_memo(PicklerObject *self, PyObject *value)
2536{
2537 PyObject *tmp;
2538
2539 if (value == NULL) {
2540 PyErr_SetString(PyExc_TypeError,
2541 "attribute deletion is not supported");
2542 return -1;
2543 }
2544 if (!PyDict_Check(value)) {
2545 PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
2546 return -1;
2547 }
2548
2549 tmp = self->memo;
2550 Py_INCREF(value);
2551 self->memo = value;
2552 Py_XDECREF(tmp);
2553
2554 return 0;
2555}
2556
2557static PyObject *
2558Pickler_get_persid(PicklerObject *self)
2559{
2560 if (self->pers_func == NULL)
2561 PyErr_SetString(PyExc_AttributeError, "persistent_id");
2562 else
2563 Py_INCREF(self->pers_func);
2564 return self->pers_func;
2565}
2566
2567static int
2568Pickler_set_persid(PicklerObject *self, PyObject *value)
2569{
2570 PyObject *tmp;
2571
2572 if (value == NULL) {
2573 PyErr_SetString(PyExc_TypeError,
2574 "attribute deletion is not supported");
2575 return -1;
2576 }
2577 if (!PyCallable_Check(value)) {
2578 PyErr_SetString(PyExc_TypeError,
2579 "persistent_id must be a callable taking one argument");
2580 return -1;
2581 }
2582
2583 tmp = self->pers_func;
2584 Py_INCREF(value);
2585 self->pers_func = value;
2586 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
2587
2588 return 0;
2589}
2590
2591static PyMemberDef Pickler_members[] = {
2592 {"bin", T_INT, offsetof(PicklerObject, bin)},
2593 {"fast", T_INT, offsetof(PicklerObject, fast)},
2594 {NULL}
2595};
2596
2597static PyGetSetDef Pickler_getsets[] = {
2598 {"memo", (getter)Pickler_get_memo,
2599 (setter)Pickler_set_memo},
2600 {"persistent_id", (getter)Pickler_get_persid,
2601 (setter)Pickler_set_persid},
2602 {NULL}
2603};
2604
2605static PyTypeObject Pickler_Type = {
2606 PyVarObject_HEAD_INIT(NULL, 0)
2607 "_pickle.Pickler" , /*tp_name*/
2608 sizeof(PicklerObject), /*tp_basicsize*/
2609 0, /*tp_itemsize*/
2610 (destructor)Pickler_dealloc, /*tp_dealloc*/
2611 0, /*tp_print*/
2612 0, /*tp_getattr*/
2613 0, /*tp_setattr*/
2614 0, /*tp_compare*/
2615 0, /*tp_repr*/
2616 0, /*tp_as_number*/
2617 0, /*tp_as_sequence*/
2618 0, /*tp_as_mapping*/
2619 0, /*tp_hash*/
2620 0, /*tp_call*/
2621 0, /*tp_str*/
2622 0, /*tp_getattro*/
2623 0, /*tp_setattro*/
2624 0, /*tp_as_buffer*/
2625 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2626 Pickler_doc, /*tp_doc*/
2627 (traverseproc)Pickler_traverse, /*tp_traverse*/
2628 (inquiry)Pickler_clear, /*tp_clear*/
2629 0, /*tp_richcompare*/
2630 0, /*tp_weaklistoffset*/
2631 0, /*tp_iter*/
2632 0, /*tp_iternext*/
2633 Pickler_methods, /*tp_methods*/
2634 Pickler_members, /*tp_members*/
2635 Pickler_getsets, /*tp_getset*/
2636 0, /*tp_base*/
2637 0, /*tp_dict*/
2638 0, /*tp_descr_get*/
2639 0, /*tp_descr_set*/
2640 0, /*tp_dictoffset*/
2641 (initproc)Pickler_init, /*tp_init*/
2642 PyType_GenericAlloc, /*tp_alloc*/
2643 PyType_GenericNew, /*tp_new*/
2644 PyObject_GC_Del, /*tp_free*/
2645 0, /*tp_is_gc*/
2646};
2647
2648/* Temporary helper for calling self.find_class().
2649
2650 XXX: It would be nice to able to avoid Python function call overhead, by
2651 using directly the C version of find_class(), when find_class() is not
2652 overridden by a subclass. Although, this could become rather hackish. A
2653 simpler optimization would be to call the C function when self is not a
2654 subclass instance. */
2655static PyObject *
2656find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
2657{
2658 return PyObject_CallMethod((PyObject *)self, "find_class", "OO",
2659 module_name, global_name);
2660}
2661
2662static int
2663marker(UnpicklerObject *self)
2664{
2665 if (self->num_marks < 1) {
2666 PyErr_SetString(UnpicklingError, "could not find MARK");
2667 return -1;
2668 }
2669
2670 return self->marks[--self->num_marks];
2671}
2672
2673static int
2674load_none(UnpicklerObject *self)
2675{
2676 PDATA_APPEND(self->stack, Py_None, -1);
2677 return 0;
2678}
2679
2680static int
2681bad_readline(void)
2682{
2683 PyErr_SetString(UnpicklingError, "pickle data was truncated");
2684 return -1;
2685}
2686
2687static int
2688load_int(UnpicklerObject *self)
2689{
2690 PyObject *value;
2691 char *endptr, *s;
2692 Py_ssize_t len;
2693 long x;
2694
2695 if ((len = unpickler_readline(self, &s)) < 0)
2696 return -1;
2697 if (len < 2)
2698 return bad_readline();
2699
2700 errno = 0;
2701 /* XXX: Should the base argument of strtol() be explicitly set to 10? */
2702 x = strtol(s, &endptr, 0);
2703
2704 if (errno || (*endptr != '\n') || (endptr[1] != '\0')) {
2705 /* Hm, maybe we've got something long. Let's try reading
2706 * it as a Python long object. */
2707 errno = 0;
2708 /* XXX: Same thing about the base here. */
2709 value = PyLong_FromString(s, NULL, 0);
2710 if (value == NULL) {
2711 PyErr_SetString(PyExc_ValueError,
2712 "could not convert string to int");
2713 return -1;
2714 }
2715 }
2716 else {
2717 if (len == 3 && (x == 0 || x == 1)) {
2718 if ((value = PyBool_FromLong(x)) == NULL)
2719 return -1;
2720 }
2721 else {
2722 if ((value = PyLong_FromLong(x)) == NULL)
2723 return -1;
2724 }
2725 }
2726
2727 PDATA_PUSH(self->stack, value, -1);
2728 return 0;
2729}
2730
2731static int
2732load_bool(UnpicklerObject *self, PyObject *boolean)
2733{
2734 assert(boolean == Py_True || boolean == Py_False);
2735 PDATA_APPEND(self->stack, boolean, -1);
2736 return 0;
2737}
2738
2739/* s contains x bytes of a little-endian integer. Return its value as a
2740 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
2741 * int, but when x is 4 it's a signed one. This is an historical source
2742 * of x-platform bugs.
2743 */
2744static long
2745calc_binint(char *bytes, int size)
2746{
2747 unsigned char *s = (unsigned char *)bytes;
2748 int i = size;
2749 long x = 0;
2750
2751 for (i = 0; i < size; i++) {
2752 x |= (long)s[i] << (i * 8);
2753 }
2754
2755 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
2756 * is signed, so on a box with longs bigger than 4 bytes we need
2757 * to extend a BININT's sign bit to the full width.
2758 */
2759 if (SIZEOF_LONG > 4 && size == 4) {
2760 x |= -(x & (1L << 31));
2761 }
2762
2763 return x;
2764}
2765
2766static int
2767load_binintx(UnpicklerObject *self, char *s, int size)
2768{
2769 PyObject *value;
2770 long x;
2771
2772 x = calc_binint(s, size);
2773
2774 if ((value = PyLong_FromLong(x)) == NULL)
2775 return -1;
2776
2777 PDATA_PUSH(self->stack, value, -1);
2778 return 0;
2779}
2780
2781static int
2782load_binint(UnpicklerObject *self)
2783{
2784 char *s;
2785
2786 if (unpickler_read(self, &s, 4) < 0)
2787 return -1;
2788
2789 return load_binintx(self, s, 4);
2790}
2791
2792static int
2793load_binint1(UnpicklerObject *self)
2794{
2795 char *s;
2796
2797 if (unpickler_read(self, &s, 1) < 0)
2798 return -1;
2799
2800 return load_binintx(self, s, 1);
2801}
2802
2803static int
2804load_binint2(UnpicklerObject *self)
2805{
2806 char *s;
2807
2808 if (unpickler_read(self, &s, 2) < 0)
2809 return -1;
2810
2811 return load_binintx(self, s, 2);
2812}
2813
2814static int
2815load_long(UnpicklerObject *self)
2816{
2817 PyObject *value;
2818 char *s;
2819 Py_ssize_t len;
2820
2821 if ((len = unpickler_readline(self, &s)) < 0)
2822 return -1;
2823 if (len < 2)
2824 return bad_readline();
2825
2826 /* XXX: Should the base argument explicitly set to 10? */
2827 if ((value = PyLong_FromString(s, NULL, 0)) == NULL)
2828 return -1;
2829
2830 PDATA_PUSH(self->stack, value, -1);
2831 return 0;
2832}
2833
2834/* 'size' bytes contain the # of bytes of little-endian 256's-complement
2835 * data following.
2836 */
2837static int
2838load_counted_long(UnpicklerObject *self, int size)
2839{
2840 PyObject *value;
2841 char *nbytes;
2842 char *pdata;
2843
2844 assert(size == 1 || size == 4);
2845 if (unpickler_read(self, &nbytes, size) < 0)
2846 return -1;
2847
2848 size = calc_binint(nbytes, size);
2849 if (size < 0) {
2850 /* Corrupt or hostile pickle -- we never write one like this */
2851 PyErr_SetString(UnpicklingError,
2852 "LONG pickle has negative byte count");
2853 return -1;
2854 }
2855
2856 if (size == 0)
2857 value = PyLong_FromLong(0L);
2858 else {
2859 /* Read the raw little-endian bytes and convert. */
2860 if (unpickler_read(self, &pdata, size) < 0)
2861 return -1;
2862 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
2863 1 /* little endian */ , 1 /* signed */ );
2864 }
2865 if (value == NULL)
2866 return -1;
2867 PDATA_PUSH(self->stack, value, -1);
2868 return 0;
2869}
2870
2871static int
2872load_float(UnpicklerObject *self)
2873{
2874 PyObject *value;
2875 char *endptr, *s;
2876 Py_ssize_t len;
2877 double d;
2878
2879 if ((len = unpickler_readline(self, &s)) < 0)
2880 return -1;
2881 if (len < 2)
2882 return bad_readline();
2883
2884 errno = 0;
2885 d = PyOS_ascii_strtod(s, &endptr);
2886
2887 if (errno || (endptr[0] != '\n') || (endptr[1] != '\0')) {
2888 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
2889 return -1;
2890 }
2891
2892 if ((value = PyFloat_FromDouble(d)) == NULL)
2893 return -1;
2894
2895 PDATA_PUSH(self->stack, value, -1);
2896 return 0;
2897}
2898
2899static int
2900load_binfloat(UnpicklerObject *self)
2901{
2902 PyObject *value;
2903 double x;
2904 char *s;
2905
2906 if (unpickler_read(self, &s, 8) < 0)
2907 return -1;
2908
2909 x = _PyFloat_Unpack8((unsigned char *)s, 0);
2910 if (x == -1.0 && PyErr_Occurred())
2911 return -1;
2912
2913 if ((value = PyFloat_FromDouble(x)) == NULL)
2914 return -1;
2915
2916 PDATA_PUSH(self->stack, value, -1);
2917 return 0;
2918}
2919
2920static int
2921load_string(UnpicklerObject *self)
2922{
2923 PyObject *bytes;
2924 PyObject *str = NULL;
2925 Py_ssize_t len;
2926 char *s, *p;
2927
2928 if ((len = unpickler_readline(self, &s)) < 0)
2929 return -1;
2930 if (len < 3)
2931 return bad_readline();
2932 if ((s = strdup(s)) == NULL) {
2933 PyErr_NoMemory();
2934 return -1;
2935 }
2936
2937 /* Strip outermost quotes */
2938 while (s[len - 1] <= ' ')
2939 len--;
2940 if (s[0] == '"' && s[len - 1] == '"') {
2941 s[len - 1] = '\0';
2942 p = s + 1;
2943 len -= 2;
2944 }
2945 else if (s[0] == '\'' && s[len - 1] == '\'') {
2946 s[len - 1] = '\0';
2947 p = s + 1;
2948 len -= 2;
2949 }
2950 else {
2951 free(s);
2952 PyErr_SetString(PyExc_ValueError, "insecure string pickle");
2953 return -1;
2954 }
2955
2956 /* Use the PyBytes API to decode the string, since that is what is used
2957 to encode, and then coerce the result to Unicode. */
2958 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
2959 free(s);
2960 if (bytes == NULL)
2961 return -1;
2962 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
2963 Py_DECREF(bytes);
2964 if (str == NULL)
2965 return -1;
2966
2967 PDATA_PUSH(self->stack, str, -1);
2968 return 0;
2969}
2970
2971static int
2972load_binbytes(UnpicklerObject *self)
2973{
2974 PyObject *bytes;
2975 long x;
2976 char *s;
2977
2978 if (unpickler_read(self, &s, 4) < 0)
2979 return -1;
2980
2981 x = calc_binint(s, 4);
2982 if (x < 0) {
2983 PyErr_SetString(UnpicklingError,
2984 "BINBYTES pickle has negative byte count");
2985 return -1;
2986 }
2987
2988 if (unpickler_read(self, &s, x) < 0)
2989 return -1;
2990 bytes = PyBytes_FromStringAndSize(s, x);
2991 if (bytes == NULL)
2992 return -1;
2993
2994 PDATA_PUSH(self->stack, bytes, -1);
2995 return 0;
2996}
2997
2998static int
2999load_short_binbytes(UnpicklerObject *self)
3000{
3001 PyObject *bytes;
3002 unsigned char x;
3003 char *s;
3004
3005 if (unpickler_read(self, &s, 1) < 0)
3006 return -1;
3007
3008 x = (unsigned char)s[0];
3009
3010 if (unpickler_read(self, &s, x) < 0)
3011 return -1;
3012
3013 bytes = PyBytes_FromStringAndSize(s, x);
3014 if (bytes == NULL)
3015 return -1;
3016
3017 PDATA_PUSH(self->stack, bytes, -1);
3018 return 0;
3019}
3020
3021static int
3022load_binstring(UnpicklerObject *self)
3023{
3024 PyObject *str;
3025 long x;
3026 char *s;
3027
3028 if (unpickler_read(self, &s, 4) < 0)
3029 return -1;
3030
3031 x = calc_binint(s, 4);
3032 if (x < 0) {
3033 PyErr_SetString(UnpicklingError,
3034 "BINSTRING pickle has negative byte count");
3035 return -1;
3036 }
3037
3038 if (unpickler_read(self, &s, x) < 0)
3039 return -1;
3040
3041 /* Convert Python 2.x strings to unicode. */
3042 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
3043 if (str == NULL)
3044 return -1;
3045
3046 PDATA_PUSH(self->stack, str, -1);
3047 return 0;
3048}
3049
3050static int
3051load_short_binstring(UnpicklerObject *self)
3052{
3053 PyObject *str;
3054 unsigned char x;
3055 char *s;
3056
3057 if (unpickler_read(self, &s, 1) < 0)
3058 return -1;
3059
3060 x = (unsigned char)s[0];
3061
3062 if (unpickler_read(self, &s, x) < 0)
3063 return -1;
3064
3065 /* Convert Python 2.x strings to unicode. */
3066 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
3067 if (str == NULL)
3068 return -1;
3069
3070 PDATA_PUSH(self->stack, str, -1);
3071 return 0;
3072}
3073
3074static int
3075load_unicode(UnpicklerObject *self)
3076{
3077 PyObject *str;
3078 Py_ssize_t len;
3079 char *s;
3080
3081 if ((len = unpickler_readline(self, &s)) < 0)
3082 return -1;
3083 if (len < 1)
3084 return bad_readline();
3085
3086 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
3087 if (str == NULL)
3088 return -1;
3089
3090 PDATA_PUSH(self->stack, str, -1);
3091 return 0;
3092}
3093
3094static int
3095load_binunicode(UnpicklerObject *self)
3096{
3097 PyObject *str;
3098 long size;
3099 char *s;
3100
3101 if (unpickler_read(self, &s, 4) < 0)
3102 return -1;
3103
3104 size = calc_binint(s, 4);
3105 if (size < 0) {
3106 PyErr_SetString(UnpicklingError,
3107 "BINUNICODE pickle has negative byte count");
3108 return -1;
3109 }
3110
3111 if (unpickler_read(self, &s, size) < 0)
3112 return -1;
3113
3114 str = PyUnicode_DecodeUTF8(s, size, NULL);
3115 if (str == NULL)
3116 return -1;
3117
3118 PDATA_PUSH(self->stack, str, -1);
3119 return 0;
3120}
3121
3122static int
3123load_tuple(UnpicklerObject *self)
3124{
3125 PyObject *tuple;
3126 int i;
3127
3128 if ((i = marker(self)) < 0)
3129 return -1;
3130
3131 tuple = Pdata_poptuple(self->stack, i);
3132 if (tuple == NULL)
3133 return -1;
3134 PDATA_PUSH(self->stack, tuple, -1);
3135 return 0;
3136}
3137
3138static int
3139load_counted_tuple(UnpicklerObject *self, int len)
3140{
3141 PyObject *tuple;
3142
3143 tuple = PyTuple_New(len);
3144 if (tuple == NULL)
3145 return -1;
3146
3147 while (--len >= 0) {
3148 PyObject *item;
3149
3150 PDATA_POP(self->stack, item);
3151 if (item == NULL)
3152 return -1;
3153 PyTuple_SET_ITEM(tuple, len, item);
3154 }
3155 PDATA_PUSH(self->stack, tuple, -1);
3156 return 0;
3157}
3158
3159static int
3160load_empty_list(UnpicklerObject *self)
3161{
3162 PyObject *list;
3163
3164 if ((list = PyList_New(0)) == NULL)
3165 return -1;
3166 PDATA_PUSH(self->stack, list, -1);
3167 return 0;
3168}
3169
3170static int
3171load_empty_dict(UnpicklerObject *self)
3172{
3173 PyObject *dict;
3174
3175 if ((dict = PyDict_New()) == NULL)
3176 return -1;
3177 PDATA_PUSH(self->stack, dict, -1);
3178 return 0;
3179}
3180
3181static int
3182load_list(UnpicklerObject *self)
3183{
3184 PyObject *list;
3185 int i;
3186
3187 if ((i = marker(self)) < 0)
3188 return -1;
3189
3190 list = Pdata_poplist(self->stack, i);
3191 if (list == NULL)
3192 return -1;
3193 PDATA_PUSH(self->stack, list, -1);
3194 return 0;
3195}
3196
3197static int
3198load_dict(UnpicklerObject *self)
3199{
3200 PyObject *dict, *key, *value;
3201 int i, j, k;
3202
3203 if ((i = marker(self)) < 0)
3204 return -1;
3205 j = self->stack->length;
3206
3207 if ((dict = PyDict_New()) == NULL)
3208 return -1;
3209
3210 for (k = i + 1; k < j; k += 2) {
3211 key = self->stack->data[k - 1];
3212 value = self->stack->data[k];
3213 if (PyDict_SetItem(dict, key, value) < 0) {
3214 Py_DECREF(dict);
3215 return -1;
3216 }
3217 }
3218 Pdata_clear(self->stack, i);
3219 PDATA_PUSH(self->stack, dict, -1);
3220 return 0;
3221}
3222
3223static PyObject *
3224instantiate(PyObject *cls, PyObject *args)
3225{
3226 PyObject *r = NULL;
3227
3228 /* XXX: The pickle.py module does not create instances this way when the
3229 args tuple is empty. See Unpickler._instantiate(). */
3230 if ((r = PyObject_CallObject(cls, args)))
3231 return r;
3232
3233 /* XXX: Is this still nescessary? */
3234 {
3235 PyObject *tp, *v, *tb, *tmp_value;
3236
3237 PyErr_Fetch(&tp, &v, &tb);
3238 tmp_value = v;
3239 /* NULL occurs when there was a KeyboardInterrupt */
3240 if (tmp_value == NULL)
3241 tmp_value = Py_None;
3242 if ((r = PyTuple_Pack(3, tmp_value, cls, args))) {
3243 Py_XDECREF(v);
3244 v = r;
3245 }
3246 PyErr_Restore(tp, v, tb);
3247 }
3248 return NULL;
3249}
3250
3251static int
3252load_obj(UnpicklerObject *self)
3253{
3254 PyObject *cls, *args, *obj = NULL;
3255 int i;
3256
3257 if ((i = marker(self)) < 0)
3258 return -1;
3259
3260 args = Pdata_poptuple(self->stack, i + 1);
3261 if (args == NULL)
3262 return -1;
3263
3264 PDATA_POP(self->stack, cls);
3265 if (cls) {
3266 obj = instantiate(cls, args);
3267 Py_DECREF(cls);
3268 }
3269 Py_DECREF(args);
3270 if (obj == NULL)
3271 return -1;
3272
3273 PDATA_PUSH(self->stack, obj, -1);
3274 return 0;
3275}
3276
3277static int
3278load_inst(UnpicklerObject *self)
3279{
3280 PyObject *cls = NULL;
3281 PyObject *args = NULL;
3282 PyObject *obj = NULL;
3283 PyObject *module_name;
3284 PyObject *class_name;
3285 Py_ssize_t len;
3286 int i;
3287 char *s;
3288
3289 if ((i = marker(self)) < 0)
3290 return -1;
3291 if ((len = unpickler_readline(self, &s)) < 0)
3292 return -1;
3293 if (len < 2)
3294 return bad_readline();
3295
3296 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
3297 identifiers are permitted in Python 3.0, since the INST opcode is only
3298 supported by older protocols on Python 2.x. */
3299 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
3300 if (module_name == NULL)
3301 return -1;
3302
3303 if ((len = unpickler_readline(self, &s)) >= 0) {
3304 if (len < 2)
3305 return bad_readline();
3306 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
3307 if (class_name == NULL) {
3308 cls = find_class(self, module_name, class_name);
3309 Py_DECREF(class_name);
3310 }
3311 }
3312 Py_DECREF(module_name);
3313
3314 if (cls == NULL)
3315 return -1;
3316
3317 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
3318 obj = instantiate(cls, args);
3319 Py_DECREF(args);
3320 }
3321 Py_DECREF(cls);
3322
3323 if (obj == NULL)
3324 return -1;
3325
3326 PDATA_PUSH(self->stack, obj, -1);
3327 return 0;
3328}
3329
3330static int
3331load_newobj(UnpicklerObject *self)
3332{
3333 PyObject *args = NULL;
3334 PyObject *clsraw = NULL;
3335 PyTypeObject *cls; /* clsraw cast to its true type */
3336 PyObject *obj;
3337
3338 /* Stack is ... cls argtuple, and we want to call
3339 * cls.__new__(cls, *argtuple).
3340 */
3341 PDATA_POP(self->stack, args);
3342 if (args == NULL)
3343 goto error;
3344 if (!PyTuple_Check(args)) {
3345 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
3346 goto error;
3347 }
3348
3349 PDATA_POP(self->stack, clsraw);
3350 cls = (PyTypeObject *)clsraw;
3351 if (cls == NULL)
3352 goto error;
3353 if (!PyType_Check(cls)) {
3354 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
3355 "isn't a type object");
3356 goto error;
3357 }
3358 if (cls->tp_new == NULL) {
3359 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
3360 "has NULL tp_new");
3361 goto error;
3362 }
3363
3364 /* Call __new__. */
3365 obj = cls->tp_new(cls, args, NULL);
3366 if (obj == NULL)
3367 goto error;
3368
3369 Py_DECREF(args);
3370 Py_DECREF(clsraw);
3371 PDATA_PUSH(self->stack, obj, -1);
3372 return 0;
3373
3374 error:
3375 Py_XDECREF(args);
3376 Py_XDECREF(clsraw);
3377 return -1;
3378}
3379
3380static int
3381load_global(UnpicklerObject *self)
3382{
3383 PyObject *global = NULL;
3384 PyObject *module_name;
3385 PyObject *global_name;
3386 Py_ssize_t len;
3387 char *s;
3388
3389 if ((len = unpickler_readline(self, &s)) < 0)
3390 return -1;
3391 if (len < 2)
3392 return bad_readline();
3393 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
3394 if (!module_name)
3395 return -1;
3396
3397 if ((len = unpickler_readline(self, &s)) >= 0) {
3398 if (len < 2) {
3399 Py_DECREF(module_name);
3400 return bad_readline();
3401 }
3402 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
3403 if (global_name) {
3404 global = find_class(self, module_name, global_name);
3405 Py_DECREF(global_name);
3406 }
3407 }
3408 Py_DECREF(module_name);
3409
3410 if (global == NULL)
3411 return -1;
3412 PDATA_PUSH(self->stack, global, -1);
3413 return 0;
3414}
3415
3416static int
3417load_persid(UnpicklerObject *self)
3418{
3419 PyObject *pid;
3420 Py_ssize_t len;
3421 char *s;
3422
3423 if (self->pers_func) {
3424 if ((len = unpickler_readline(self, &s)) < 0)
3425 return -1;
3426 if (len < 2)
3427 return bad_readline();
3428
3429 pid = PyBytes_FromStringAndSize(s, len - 1);
3430 if (pid == NULL)
3431 return -1;
3432
3433 /* Ugh... this does not leak since unpickler_call() steals the
3434 reference to pid first. */
3435 pid = unpickler_call(self, self->pers_func, pid);
3436 if (pid == NULL)
3437 return -1;
3438
3439 PDATA_PUSH(self->stack, pid, -1);
3440 return 0;
3441 }
3442 else {
3443 PyErr_SetString(UnpicklingError,
3444 "A load persistent id instruction was encountered,\n"
3445 "but no persistent_load function was specified.");
3446 return -1;
3447 }
3448}
3449
3450static int
3451load_binpersid(UnpicklerObject *self)
3452{
3453 PyObject *pid;
3454
3455 if (self->pers_func) {
3456 PDATA_POP(self->stack, pid);
3457 if (pid == NULL)
3458 return -1;
3459
3460 /* Ugh... this does not leak since unpickler_call() steals the
3461 reference to pid first. */
3462 pid = unpickler_call(self, self->pers_func, pid);
3463 if (pid == NULL)
3464 return -1;
3465
3466 PDATA_PUSH(self->stack, pid, -1);
3467 return 0;
3468 }
3469 else {
3470 PyErr_SetString(UnpicklingError,
3471 "A load persistent id instruction was encountered,\n"
3472 "but no persistent_load function was specified.");
3473 return -1;
3474 }
3475}
3476
3477static int
3478load_pop(UnpicklerObject *self)
3479{
3480 int len;
3481
3482 if ((len = self->stack->length) <= 0)
3483 return stack_underflow();
3484
3485 /* Note that we split the (pickle.py) stack into two stacks,
3486 * an object stack and a mark stack. We have to be clever and
3487 * pop the right one. We do this by looking at the top of the
3488 * mark stack.
3489 */
3490
3491 if ((self->num_marks > 0) && (self->marks[self->num_marks - 1] == len))
3492 self->num_marks--;
3493 else {
3494 len--;
3495 Py_DECREF(self->stack->data[len]);
3496 self->stack->length = len;
3497 }
3498
3499 return 0;
3500}
3501
3502static int
3503load_pop_mark(UnpicklerObject *self)
3504{
3505 int i;
3506
3507 if ((i = marker(self)) < 0)
3508 return -1;
3509
3510 Pdata_clear(self->stack, i);
3511
3512 return 0;
3513}
3514
3515static int
3516load_dup(UnpicklerObject *self)
3517{
3518 PyObject *last;
3519 int len;
3520
3521 if ((len = self->stack->length) <= 0)
3522 return stack_underflow();
3523 last = self->stack->data[len - 1];
3524 PDATA_APPEND(self->stack, last, -1);
3525 return 0;
3526}
3527
3528static int
3529load_get(UnpicklerObject *self)
3530{
3531 PyObject *key, *value;
3532 Py_ssize_t len;
3533 char *s;
3534
3535 if ((len = unpickler_readline(self, &s)) < 0)
3536 return -1;
3537 if (len < 2)
3538 return bad_readline();
3539
3540 key = PyLong_FromString(s, NULL, 10);
3541 if (key == NULL)
3542 return -1;
3543
3544 value = PyDict_GetItemWithError(self->memo, key);
3545 if (value == NULL) {
3546 if (!PyErr_Occurred())
3547 PyErr_SetObject(PyExc_KeyError, key);
3548 Py_DECREF(key);
3549 return -1;
3550 }
3551 Py_DECREF(key);
3552
3553 PDATA_APPEND(self->stack, value, -1);
3554 return 0;
3555}
3556
3557static int
3558load_binget(UnpicklerObject *self)
3559{
3560 PyObject *key, *value;
3561 char *s;
3562
3563 if (unpickler_read(self, &s, 1) < 0)
3564 return -1;
3565
3566 /* Here, the unsigned cast is necessary to avoid negative values. */
3567 key = PyLong_FromLong((long)(unsigned char)s[0]);
3568 if (key == NULL)
3569 return -1;
3570
3571 value = PyDict_GetItemWithError(self->memo, key);
3572 if (value == NULL) {
3573 if (!PyErr_Occurred())
3574 PyErr_SetObject(PyExc_KeyError, key);
3575 Py_DECREF(key);
3576 return -1;
3577 }
3578 Py_DECREF(key);
3579
3580 PDATA_APPEND(self->stack, value, -1);
3581 return 0;
3582}
3583
3584static int
3585load_long_binget(UnpicklerObject *self)
3586{
3587 PyObject *key, *value;
3588 char *s;
3589 long k;
3590
3591 if (unpickler_read(self, &s, 4) < 0)
3592 return -1;
3593
3594 k = (long)(unsigned char)s[0];
3595 k |= (long)(unsigned char)s[1] << 8;
3596 k |= (long)(unsigned char)s[2] << 16;
3597 k |= (long)(unsigned char)s[3] << 24;
3598
3599 key = PyLong_FromLong(k);
3600 if (key == NULL)
3601 return -1;
3602
3603 value = PyDict_GetItemWithError(self->memo, key);
3604 if (value == NULL) {
3605 if (!PyErr_Occurred())
3606 PyErr_SetObject(PyExc_KeyError, key);
3607 Py_DECREF(key);
3608 return -1;
3609 }
3610 Py_DECREF(key);
3611
3612 PDATA_APPEND(self->stack, value, -1);
3613 return 0;
3614}
3615
3616/* Push an object from the extension registry (EXT[124]). nbytes is
3617 * the number of bytes following the opcode, holding the index (code) value.
3618 */
3619static int
3620load_extension(UnpicklerObject *self, int nbytes)
3621{
3622 char *codebytes; /* the nbytes bytes after the opcode */
3623 long code; /* calc_binint returns long */
3624 PyObject *py_code; /* code as a Python int */
3625 PyObject *obj; /* the object to push */
3626 PyObject *pair; /* (module_name, class_name) */
3627 PyObject *module_name, *class_name;
3628
3629 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
3630 if (unpickler_read(self, &codebytes, nbytes) < 0)
3631 return -1;
3632 code = calc_binint(codebytes, nbytes);
3633 if (code <= 0) { /* note that 0 is forbidden */
3634 /* Corrupt or hostile pickle. */
3635 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
3636 return -1;
3637 }
3638
3639 /* Look for the code in the cache. */
3640 py_code = PyLong_FromLong(code);
3641 if (py_code == NULL)
3642 return -1;
3643 obj = PyDict_GetItem(extension_cache, py_code);
3644 if (obj != NULL) {
3645 /* Bingo. */
3646 Py_DECREF(py_code);
3647 PDATA_APPEND(self->stack, obj, -1);
3648 return 0;
3649 }
3650
3651 /* Look up the (module_name, class_name) pair. */
3652 pair = PyDict_GetItem(inverted_registry, py_code);
3653 if (pair == NULL) {
3654 Py_DECREF(py_code);
3655 PyErr_Format(PyExc_ValueError, "unregistered extension "
3656 "code %ld", code);
3657 return -1;
3658 }
3659 /* Since the extension registry is manipulable via Python code,
3660 * confirm that pair is really a 2-tuple of strings.
3661 */
3662 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
3663 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
3664 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
3665 Py_DECREF(py_code);
3666 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
3667 "isn't a 2-tuple of strings", code);
3668 return -1;
3669 }
3670 /* Load the object. */
3671 obj = find_class(self, module_name, class_name);
3672 if (obj == NULL) {
3673 Py_DECREF(py_code);
3674 return -1;
3675 }
3676 /* Cache code -> obj. */
3677 code = PyDict_SetItem(extension_cache, py_code, obj);
3678 Py_DECREF(py_code);
3679 if (code < 0) {
3680 Py_DECREF(obj);
3681 return -1;
3682 }
3683 PDATA_PUSH(self->stack, obj, -1);
3684 return 0;
3685}
3686
3687static int
3688load_put(UnpicklerObject *self)
3689{
3690 PyObject *key, *value;
3691 Py_ssize_t len;
3692 char *s;
3693 int x;
3694
3695 if ((len = unpickler_readline(self, &s)) < 0)
3696 return -1;
3697 if (len < 2)
3698 return bad_readline();
3699 if ((x = self->stack->length) <= 0)
3700 return stack_underflow();
3701
3702 key = PyLong_FromString(s, NULL, 10);
3703 if (key == NULL)
3704 return -1;
3705 value = self->stack->data[x - 1];
3706
3707 x = PyDict_SetItem(self->memo, key, value);
3708 Py_DECREF(key);
3709 return x;
3710}
3711
3712static int
3713load_binput(UnpicklerObject *self)
3714{
3715 PyObject *key, *value;
3716 char *s;
3717 int x;
3718
3719 if (unpickler_read(self, &s, 1) < 0)
3720 return -1;
3721 if ((x = self->stack->length) <= 0)
3722 return stack_underflow();
3723
3724 key = PyLong_FromLong((long)(unsigned char)s[0]);
3725 if (key == NULL)
3726 return -1;
3727 value = self->stack->data[x - 1];
3728
3729 x = PyDict_SetItem(self->memo, key, value);
3730 Py_DECREF(key);
3731 return x;
3732}
3733
3734static int
3735load_long_binput(UnpicklerObject *self)
3736{
3737 PyObject *key, *value;
3738 long k;
3739 char *s;
3740 int x;
3741
3742 if (unpickler_read(self, &s, 4) < 0)
3743 return -1;
3744 if ((x = self->stack->length) <= 0)
3745 return stack_underflow();
3746
3747 k = (long)(unsigned char)s[0];
3748 k |= (long)(unsigned char)s[1] << 8;
3749 k |= (long)(unsigned char)s[2] << 16;
3750 k |= (long)(unsigned char)s[3] << 24;
3751
3752 key = PyLong_FromLong(k);
3753 if (key == NULL)
3754 return -1;
3755 value = self->stack->data[x - 1];
3756
3757 x = PyDict_SetItem(self->memo, key, value);
3758 Py_DECREF(key);
3759 return x;
3760}
3761
3762static int
3763do_append(UnpicklerObject *self, int x)
3764{
3765 PyObject *value;
3766 PyObject *list;
3767 int len, i;
3768
3769 len = self->stack->length;
3770 if (x > len || x <= 0)
3771 return stack_underflow();
3772 if (len == x) /* nothing to do */
3773 return 0;
3774
3775 list = self->stack->data[x - 1];
3776
3777 if (PyList_Check(list)) {
3778 PyObject *slice;
3779 Py_ssize_t list_len;
3780
3781 slice = Pdata_poplist(self->stack, x);
3782 if (!slice)
3783 return -1;
3784 list_len = PyList_GET_SIZE(list);
3785 i = PyList_SetSlice(list, list_len, list_len, slice);
3786 Py_DECREF(slice);
3787 return i;
3788 }
3789 else {
3790 PyObject *append_func;
3791
3792 append_func = PyObject_GetAttrString(list, "append");
3793 if (append_func == NULL)
3794 return -1;
3795 for (i = x; i < len; i++) {
3796 PyObject *result;
3797
3798 value = self->stack->data[i];
3799 result = unpickler_call(self, append_func, value);
3800 if (result == NULL) {
3801 Pdata_clear(self->stack, i + 1);
3802 self->stack->length = x;
3803 return -1;
3804 }
3805 Py_DECREF(result);
3806 }
3807 self->stack->length = x;
3808 }
3809
3810 return 0;
3811}
3812
3813static int
3814load_append(UnpicklerObject *self)
3815{
3816 return do_append(self, self->stack->length - 1);
3817}
3818
3819static int
3820load_appends(UnpicklerObject *self)
3821{
3822 return do_append(self, marker(self));
3823}
3824
3825static int
3826do_setitems(UnpicklerObject *self, int x)
3827{
3828 PyObject *value, *key;
3829 PyObject *dict;
3830 int len, i;
3831 int status = 0;
3832
3833 len = self->stack->length;
3834 if (x > len || x <= 0)
3835 return stack_underflow();
3836 if (len == x) /* nothing to do */
3837 return 0;
3838 if ((len - x) % 2 != 0) {
3839 /* Currupt or hostile pickle -- we never write one like this. */
3840 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
3841 return -1;
3842 }
3843
3844 /* Here, dict does not actually need to be a PyDict; it could be anything
3845 that supports the __setitem__ attribute. */
3846 dict = self->stack->data[x - 1];
3847
3848 for (i = x + 1; i < len; i += 2) {
3849 key = self->stack->data[i - 1];
3850 value = self->stack->data[i];
3851 if (PyObject_SetItem(dict, key, value) < 0) {
3852 status = -1;
3853 break;
3854 }
3855 }
3856
3857 Pdata_clear(self->stack, x);
3858 return status;
3859}
3860
3861static int
3862load_setitem(UnpicklerObject *self)
3863{
3864 return do_setitems(self, self->stack->length - 2);
3865}
3866
3867static int
3868load_setitems(UnpicklerObject *self)
3869{
3870 return do_setitems(self, marker(self));
3871}
3872
3873static int
3874load_build(UnpicklerObject *self)
3875{
3876 PyObject *state, *inst, *slotstate;
3877 PyObject *setstate;
3878 int status = 0;
3879
3880 /* Stack is ... instance, state. We want to leave instance at
3881 * the stack top, possibly mutated via instance.__setstate__(state).
3882 */
3883 if (self->stack->length < 2)
3884 return stack_underflow();
3885
3886 PDATA_POP(self->stack, state);
3887 if (state == NULL)
3888 return -1;
3889
3890 inst = self->stack->data[self->stack->length - 1];
3891
3892 setstate = PyObject_GetAttrString(inst, "__setstate__");
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00003893 if (setstate == NULL) {
3894 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3895 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00003896 else {
3897 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00003898 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00003899 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003900 }
3901 else {
3902 PyObject *result;
3903
3904 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitroud79dc622008-09-05 00:03:33 +00003905 /* Ugh... this does not leak since unpickler_call() steals the
3906 reference to state first. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003907 result = unpickler_call(self, setstate, state);
3908 Py_DECREF(setstate);
3909 if (result == NULL)
3910 return -1;
3911 Py_DECREF(result);
3912 return 0;
3913 }
3914
3915 /* A default __setstate__. First see whether state embeds a
3916 * slot state dict too (a proto 2 addition).
3917 */
3918 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
3919 PyObject *tmp = state;
3920
3921 state = PyTuple_GET_ITEM(tmp, 0);
3922 slotstate = PyTuple_GET_ITEM(tmp, 1);
3923 Py_INCREF(state);
3924 Py_INCREF(slotstate);
3925 Py_DECREF(tmp);
3926 }
3927 else
3928 slotstate = NULL;
3929
3930 /* Set inst.__dict__ from the state dict (if any). */
3931 if (state != Py_None) {
3932 PyObject *dict;
3933
3934 if (!PyDict_Check(state)) {
3935 PyErr_SetString(UnpicklingError, "state is not a dictionary");
3936 goto error;
3937 }
3938 dict = PyObject_GetAttrString(inst, "__dict__");
3939 if (dict == NULL)
3940 goto error;
3941
3942 PyDict_Update(dict, state);
3943 Py_DECREF(dict);
3944 }
3945
3946 /* Also set instance attributes from the slotstate dict (if any). */
3947 if (slotstate != NULL) {
3948 PyObject *d_key, *d_value;
3949 Py_ssize_t i;
3950
3951 if (!PyDict_Check(slotstate)) {
3952 PyErr_SetString(UnpicklingError,
3953 "slot state is not a dictionary");
3954 goto error;
3955 }
3956 i = 0;
3957 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
3958 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
3959 goto error;
3960 }
3961 }
3962
3963 if (0) {
3964 error:
3965 status = -1;
3966 }
3967
3968 Py_DECREF(state);
3969 Py_XDECREF(slotstate);
3970 return status;
3971}
3972
3973static int
3974load_mark(UnpicklerObject *self)
3975{
3976
3977 /* Note that we split the (pickle.py) stack into two stacks, an
3978 * object stack and a mark stack. Here we push a mark onto the
3979 * mark stack.
3980 */
3981
3982 if ((self->num_marks + 1) >= self->marks_size) {
3983 size_t alloc;
3984 int *marks;
3985
3986 /* Use the size_t type to check for overflow. */
3987 alloc = ((size_t)self->num_marks << 1) + 20;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00003988 if (alloc > PY_SSIZE_T_MAX ||
3989 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003990 PyErr_NoMemory();
3991 return -1;
3992 }
3993
3994 if (self->marks == NULL)
3995 marks = (int *)PyMem_Malloc(alloc * sizeof(int));
3996 else
3997 marks = (int *)PyMem_Realloc(self->marks, alloc * sizeof(int));
3998 if (marks == NULL) {
3999 PyErr_NoMemory();
4000 return -1;
4001 }
4002 self->marks = marks;
4003 self->marks_size = (Py_ssize_t)alloc;
4004 }
4005
4006 self->marks[self->num_marks++] = self->stack->length;
4007
4008 return 0;
4009}
4010
4011static int
4012load_reduce(UnpicklerObject *self)
4013{
4014 PyObject *callable = NULL;
4015 PyObject *argtup = NULL;
4016 PyObject *obj = NULL;
4017
4018 PDATA_POP(self->stack, argtup);
4019 if (argtup == NULL)
4020 return -1;
4021 PDATA_POP(self->stack, callable);
4022 if (callable) {
4023 obj = instantiate(callable, argtup);
4024 Py_DECREF(callable);
4025 }
4026 Py_DECREF(argtup);
4027
4028 if (obj == NULL)
4029 return -1;
4030
4031 PDATA_PUSH(self->stack, obj, -1);
4032 return 0;
4033}
4034
4035/* Just raises an error if we don't know the protocol specified. PROTO
4036 * is the first opcode for protocols >= 2.
4037 */
4038static int
4039load_proto(UnpicklerObject *self)
4040{
4041 char *s;
4042 int i;
4043
4044 if (unpickler_read(self, &s, 1) < 0)
4045 return -1;
4046
4047 i = (unsigned char)s[0];
4048 if (i <= HIGHEST_PROTOCOL)
4049 return 0;
4050
4051 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
4052 return -1;
4053}
4054
4055static PyObject *
4056load(UnpicklerObject *self)
4057{
4058 PyObject *err;
4059 PyObject *value = NULL;
4060 char *s;
4061
4062 self->num_marks = 0;
4063 if (self->stack->length)
4064 Pdata_clear(self->stack, 0);
4065
4066 /* Convenient macros for the dispatch while-switch loop just below. */
4067#define OP(opcode, load_func) \
4068 case opcode: if (load_func(self) < 0) break; continue;
4069
4070#define OP_ARG(opcode, load_func, arg) \
4071 case opcode: if (load_func(self, (arg)) < 0) break; continue;
4072
4073 while (1) {
4074 if (unpickler_read(self, &s, 1) < 0)
4075 break;
4076
4077 switch ((enum opcode)s[0]) {
4078 OP(NONE, load_none)
4079 OP(BININT, load_binint)
4080 OP(BININT1, load_binint1)
4081 OP(BININT2, load_binint2)
4082 OP(INT, load_int)
4083 OP(LONG, load_long)
4084 OP_ARG(LONG1, load_counted_long, 1)
4085 OP_ARG(LONG4, load_counted_long, 4)
4086 OP(FLOAT, load_float)
4087 OP(BINFLOAT, load_binfloat)
4088 OP(BINBYTES, load_binbytes)
4089 OP(SHORT_BINBYTES, load_short_binbytes)
4090 OP(BINSTRING, load_binstring)
4091 OP(SHORT_BINSTRING, load_short_binstring)
4092 OP(STRING, load_string)
4093 OP(UNICODE, load_unicode)
4094 OP(BINUNICODE, load_binunicode)
4095 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
4096 OP_ARG(TUPLE1, load_counted_tuple, 1)
4097 OP_ARG(TUPLE2, load_counted_tuple, 2)
4098 OP_ARG(TUPLE3, load_counted_tuple, 3)
4099 OP(TUPLE, load_tuple)
4100 OP(EMPTY_LIST, load_empty_list)
4101 OP(LIST, load_list)
4102 OP(EMPTY_DICT, load_empty_dict)
4103 OP(DICT, load_dict)
4104 OP(OBJ, load_obj)
4105 OP(INST, load_inst)
4106 OP(NEWOBJ, load_newobj)
4107 OP(GLOBAL, load_global)
4108 OP(APPEND, load_append)
4109 OP(APPENDS, load_appends)
4110 OP(BUILD, load_build)
4111 OP(DUP, load_dup)
4112 OP(BINGET, load_binget)
4113 OP(LONG_BINGET, load_long_binget)
4114 OP(GET, load_get)
4115 OP(MARK, load_mark)
4116 OP(BINPUT, load_binput)
4117 OP(LONG_BINPUT, load_long_binput)
4118 OP(PUT, load_put)
4119 OP(POP, load_pop)
4120 OP(POP_MARK, load_pop_mark)
4121 OP(SETITEM, load_setitem)
4122 OP(SETITEMS, load_setitems)
4123 OP(PERSID, load_persid)
4124 OP(BINPERSID, load_binpersid)
4125 OP(REDUCE, load_reduce)
4126 OP(PROTO, load_proto)
4127 OP_ARG(EXT1, load_extension, 1)
4128 OP_ARG(EXT2, load_extension, 2)
4129 OP_ARG(EXT4, load_extension, 4)
4130 OP_ARG(NEWTRUE, load_bool, Py_True)
4131 OP_ARG(NEWFALSE, load_bool, Py_False)
4132
4133 case STOP:
4134 break;
4135
4136 case '\0':
4137 PyErr_SetNone(PyExc_EOFError);
4138 return NULL;
4139
4140 default:
4141 PyErr_Format(UnpicklingError,
4142 "invalid load key, '%c'.", s[0]);
4143 return NULL;
4144 }
4145
4146 break; /* and we are done! */
4147 }
4148
4149 /* XXX: It is not clear what this is actually for. */
4150 if ((err = PyErr_Occurred())) {
4151 if (err == PyExc_EOFError) {
4152 PyErr_SetNone(PyExc_EOFError);
4153 }
4154 return NULL;
4155 }
4156
4157 PDATA_POP(self->stack, value);
4158 return value;
4159}
4160
4161PyDoc_STRVAR(Unpickler_load_doc,
4162"load() -> object. Load a pickle."
4163"\n"
4164"Read a pickled object representation from the open file object given in\n"
4165"the constructor, and return the reconstituted object hierarchy specified\n"
4166"therein.\n");
4167
4168static PyObject *
4169Unpickler_load(UnpicklerObject *self)
4170{
4171 /* Check whether the Unpickler was initialized correctly. This prevents
4172 segfaulting if a subclass overridden __init__ with a function that does
4173 not call Unpickler.__init__(). Here, we simply ensure that self->read
4174 is not NULL. */
4175 if (self->read == NULL) {
4176 PyErr_Format(UnpicklingError,
4177 "Unpickler.__init__() was not called by %s.__init__()",
4178 Py_TYPE(self)->tp_name);
4179 return NULL;
4180 }
4181
4182 return load(self);
4183}
4184
4185/* The name of find_class() is misleading. In newer pickle protocols, this
4186 function is used for loading any global (i.e., functions), not just
4187 classes. The name is kept only for backward compatibility. */
4188
4189PyDoc_STRVAR(Unpickler_find_class_doc,
4190"find_class(module_name, global_name) -> object.\n"
4191"\n"
4192"Return an object from a specified module, importing the module if\n"
4193"necessary. Subclasses may override this method (e.g. to restrict\n"
4194"unpickling of arbitrary classes and functions).\n"
4195"\n"
4196"This method is called whenever a class or a function object is\n"
4197"needed. Both arguments passed are str objects.\n");
4198
4199static PyObject *
4200Unpickler_find_class(UnpicklerObject *self, PyObject *args)
4201{
4202 PyObject *global;
4203 PyObject *modules_dict;
4204 PyObject *module;
4205 PyObject *module_name, *global_name;
4206
4207 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
4208 &module_name, &global_name))
4209 return NULL;
4210
4211 modules_dict = PySys_GetObject("modules");
4212 if (modules_dict == NULL)
4213 return NULL;
4214
4215 module = PyDict_GetItem(modules_dict, module_name);
4216 if (module == NULL) {
4217 module = PyImport_Import(module_name);
4218 if (module == NULL)
4219 return NULL;
4220 global = PyObject_GetAttr(module, global_name);
4221 Py_DECREF(module);
4222 }
4223 else {
4224 global = PyObject_GetAttr(module, global_name);
4225 }
4226 return global;
4227}
4228
4229static struct PyMethodDef Unpickler_methods[] = {
4230 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
4231 Unpickler_load_doc},
4232 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
4233 Unpickler_find_class_doc},
4234 {NULL, NULL} /* sentinel */
4235};
4236
4237static void
4238Unpickler_dealloc(UnpicklerObject *self)
4239{
4240 PyObject_GC_UnTrack((PyObject *)self);
4241 Py_XDECREF(self->readline);
4242 Py_XDECREF(self->read);
4243 Py_XDECREF(self->memo);
4244 Py_XDECREF(self->stack);
4245 Py_XDECREF(self->pers_func);
4246 Py_XDECREF(self->arg);
4247 Py_XDECREF(self->last_string);
4248
4249 PyMem_Free(self->marks);
4250 free(self->encoding);
4251 free(self->errors);
4252
4253 Py_TYPE(self)->tp_free((PyObject *)self);
4254}
4255
4256static int
4257Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
4258{
4259 Py_VISIT(self->readline);
4260 Py_VISIT(self->read);
4261 Py_VISIT(self->memo);
4262 Py_VISIT(self->stack);
4263 Py_VISIT(self->pers_func);
4264 Py_VISIT(self->arg);
4265 Py_VISIT(self->last_string);
4266 return 0;
4267}
4268
4269static int
4270Unpickler_clear(UnpicklerObject *self)
4271{
4272 Py_CLEAR(self->readline);
4273 Py_CLEAR(self->read);
4274 Py_CLEAR(self->memo);
4275 Py_CLEAR(self->stack);
4276 Py_CLEAR(self->pers_func);
4277 Py_CLEAR(self->arg);
4278 Py_CLEAR(self->last_string);
4279
4280 PyMem_Free(self->marks);
4281 self->marks = NULL;
4282 free(self->encoding);
4283 self->encoding = NULL;
4284 free(self->errors);
4285 self->errors = NULL;
4286
4287 return 0;
4288}
4289
4290PyDoc_STRVAR(Unpickler_doc,
4291"Unpickler(file, *, encoding='ASCII', errors='strict')"
4292"\n"
4293"This takes a binary file for reading a pickle data stream.\n"
4294"\n"
4295"The protocol version of the pickle is detected automatically, so no\n"
4296"proto argument is needed.\n"
4297"\n"
4298"The file-like object must have two methods, a read() method\n"
4299"that takes an integer argument, and a readline() method that\n"
4300"requires no arguments. Both methods should return bytes.\n"
4301"Thus file-like object can be a binary file object opened for\n"
4302"reading, a BytesIO object, or any other custom object that\n"
4303"meets this interface.\n"
4304"\n"
4305"Optional keyword arguments are encoding and errors, which are\n"
4306"used to decode 8-bit string instances pickled by Python 2.x.\n"
4307"These default to 'ASCII' and 'strict', respectively.\n");
4308
4309static int
4310Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
4311{
4312 static char *kwlist[] = {"file", "encoding", "errors", 0};
4313 PyObject *file;
4314 char *encoding = NULL;
4315 char *errors = NULL;
4316
4317 /* XXX: That is an horrible error message. But, I don't know how to do
4318 better... */
4319 if (Py_SIZE(args) != 1) {
4320 PyErr_Format(PyExc_TypeError,
4321 "%s takes exactly one positional argument (%zd given)",
4322 Py_TYPE(self)->tp_name, Py_SIZE(args));
4323 return -1;
4324 }
4325
4326 /* Arguments parsing needs to be done in the __init__() method to allow
4327 subclasses to define their own __init__() method, which may (or may
4328 not) support Unpickler arguments. However, this means we need to be
4329 extra careful in the other Unpickler methods, since a subclass could
4330 forget to call Unpickler.__init__() thus breaking our internal
4331 invariants. */
4332 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|ss:Unpickler", kwlist,
4333 &file, &encoding, &errors))
4334 return -1;
4335
4336 /* In case of multiple __init__() calls, clear previous content. */
4337 if (self->read != NULL)
4338 (void)Unpickler_clear(self);
4339
4340 self->read = PyObject_GetAttrString(file, "read");
4341 self->readline = PyObject_GetAttrString(file, "readline");
4342 if (self->readline == NULL || self->read == NULL)
4343 return -1;
4344
4345 if (encoding == NULL)
4346 encoding = "ASCII";
4347 if (errors == NULL)
4348 errors = "strict";
4349
4350 self->encoding = strdup(encoding);
4351 self->errors = strdup(errors);
4352 if (self->encoding == NULL || self->errors == NULL) {
4353 PyErr_NoMemory();
4354 return -1;
4355 }
4356
4357 if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) {
4358 self->pers_func = PyObject_GetAttrString((PyObject *)self,
4359 "persistent_load");
4360 if (self->pers_func == NULL)
4361 return -1;
4362 }
4363 else {
4364 self->pers_func = NULL;
4365 }
4366
4367 self->stack = (Pdata *)Pdata_New();
4368 if (self->stack == NULL)
4369 return -1;
4370
4371 self->memo = PyDict_New();
4372 if (self->memo == NULL)
4373 return -1;
4374
4375 return 0;
4376}
4377
4378static PyObject *
4379Unpickler_get_memo(UnpicklerObject *self)
4380{
4381 if (self->memo == NULL)
4382 PyErr_SetString(PyExc_AttributeError, "memo");
4383 else
4384 Py_INCREF(self->memo);
4385 return self->memo;
4386}
4387
4388static int
4389Unpickler_set_memo(UnpicklerObject *self, PyObject *value)
4390{
4391 PyObject *tmp;
4392
4393 if (value == NULL) {
4394 PyErr_SetString(PyExc_TypeError,
4395 "attribute deletion is not supported");
4396 return -1;
4397 }
4398 if (!PyDict_Check(value)) {
4399 PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
4400 return -1;
4401 }
4402
4403 tmp = self->memo;
4404 Py_INCREF(value);
4405 self->memo = value;
4406 Py_XDECREF(tmp);
4407
4408 return 0;
4409}
4410
4411static PyObject *
4412Unpickler_get_persload(UnpicklerObject *self)
4413{
4414 if (self->pers_func == NULL)
4415 PyErr_SetString(PyExc_AttributeError, "persistent_load");
4416 else
4417 Py_INCREF(self->pers_func);
4418 return self->pers_func;
4419}
4420
4421static int
4422Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
4423{
4424 PyObject *tmp;
4425
4426 if (value == NULL) {
4427 PyErr_SetString(PyExc_TypeError,
4428 "attribute deletion is not supported");
4429 return -1;
4430 }
4431 if (!PyCallable_Check(value)) {
4432 PyErr_SetString(PyExc_TypeError,
4433 "persistent_load must be a callable taking "
4434 "one argument");
4435 return -1;
4436 }
4437
4438 tmp = self->pers_func;
4439 Py_INCREF(value);
4440 self->pers_func = value;
4441 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
4442
4443 return 0;
4444}
4445
4446static PyGetSetDef Unpickler_getsets[] = {
4447 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
4448 {"persistent_load", (getter)Unpickler_get_persload,
4449 (setter)Unpickler_set_persload},
4450 {NULL}
4451};
4452
4453static PyTypeObject Unpickler_Type = {
4454 PyVarObject_HEAD_INIT(NULL, 0)
4455 "_pickle.Unpickler", /*tp_name*/
4456 sizeof(UnpicklerObject), /*tp_basicsize*/
4457 0, /*tp_itemsize*/
4458 (destructor)Unpickler_dealloc, /*tp_dealloc*/
4459 0, /*tp_print*/
4460 0, /*tp_getattr*/
4461 0, /*tp_setattr*/
4462 0, /*tp_compare*/
4463 0, /*tp_repr*/
4464 0, /*tp_as_number*/
4465 0, /*tp_as_sequence*/
4466 0, /*tp_as_mapping*/
4467 0, /*tp_hash*/
4468 0, /*tp_call*/
4469 0, /*tp_str*/
4470 0, /*tp_getattro*/
4471 0, /*tp_setattro*/
4472 0, /*tp_as_buffer*/
4473 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4474 Unpickler_doc, /*tp_doc*/
4475 (traverseproc)Unpickler_traverse, /*tp_traverse*/
4476 (inquiry)Unpickler_clear, /*tp_clear*/
4477 0, /*tp_richcompare*/
4478 0, /*tp_weaklistoffset*/
4479 0, /*tp_iter*/
4480 0, /*tp_iternext*/
4481 Unpickler_methods, /*tp_methods*/
4482 0, /*tp_members*/
4483 Unpickler_getsets, /*tp_getset*/
4484 0, /*tp_base*/
4485 0, /*tp_dict*/
4486 0, /*tp_descr_get*/
4487 0, /*tp_descr_set*/
4488 0, /*tp_dictoffset*/
4489 (initproc)Unpickler_init, /*tp_init*/
4490 PyType_GenericAlloc, /*tp_alloc*/
4491 PyType_GenericNew, /*tp_new*/
4492 PyObject_GC_Del, /*tp_free*/
4493 0, /*tp_is_gc*/
4494};
4495
4496static int
4497init_stuff(void)
4498{
4499 PyObject *copyreg;
4500
4501 copyreg = PyImport_ImportModule("copyreg");
4502 if (!copyreg)
4503 return -1;
4504
4505 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
4506 if (!dispatch_table)
4507 goto error;
4508
4509 extension_registry = \
4510 PyObject_GetAttrString(copyreg, "_extension_registry");
4511 if (!extension_registry)
4512 goto error;
4513
4514 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
4515 if (!inverted_registry)
4516 goto error;
4517
4518 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
4519 if (!extension_cache)
4520 goto error;
4521
4522 Py_DECREF(copyreg);
4523
4524 empty_tuple = PyTuple_New(0);
4525 if (empty_tuple == NULL)
4526 return -1;
4527
4528 two_tuple = PyTuple_New(2);
4529 if (two_tuple == NULL)
4530 return -1;
4531 /* We use this temp container with no regard to refcounts, or to
4532 * keeping containees alive. Exempt from GC, because we don't
4533 * want anything looking at two_tuple() by magic.
4534 */
4535 PyObject_GC_UnTrack(two_tuple);
4536
4537 return 0;
4538
4539 error:
4540 Py_DECREF(copyreg);
4541 return -1;
4542}
4543
4544static struct PyModuleDef _picklemodule = {
4545 PyModuleDef_HEAD_INIT,
4546 "_pickle",
4547 pickle_module_doc,
4548 -1,
4549 NULL,
4550 NULL,
4551 NULL,
4552 NULL,
4553 NULL
4554};
4555
4556PyMODINIT_FUNC
4557PyInit__pickle(void)
4558{
4559 PyObject *m;
4560
4561 if (PyType_Ready(&Unpickler_Type) < 0)
4562 return NULL;
4563 if (PyType_Ready(&Pickler_Type) < 0)
4564 return NULL;
4565 if (PyType_Ready(&Pdata_Type) < 0)
4566 return NULL;
4567
4568 /* Create the module and add the functions. */
4569 m = PyModule_Create(&_picklemodule);
4570 if (m == NULL)
4571 return NULL;
4572
4573 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
4574 return NULL;
4575 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
4576 return NULL;
4577
4578 /* Initialize the exceptions. */
4579 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
4580 if (PickleError == NULL)
4581 return NULL;
4582 PicklingError = \
4583 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
4584 if (PicklingError == NULL)
4585 return NULL;
4586 UnpicklingError = \
4587 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
4588 if (UnpicklingError == NULL)
4589 return NULL;
4590
4591 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
4592 return NULL;
4593 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
4594 return NULL;
4595 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
4596 return NULL;
4597
4598 if (init_stuff() < 0)
4599 return NULL;
4600
4601 return m;
4602}