blob: 52fa15694cd6e31712fbc29d2a7eb535ff4715d7 [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
13
14/* Pickle opcodes. These must be kept updated with pickle.py.
15 Extensive docs are in pickletools.py. */
16enum opcode {
17 MARK = '(',
18 STOP = '.',
19 POP = '0',
20 POP_MARK = '1',
21 DUP = '2',
22 FLOAT = 'F',
23 INT = 'I',
24 BININT = 'J',
25 BININT1 = 'K',
26 LONG = 'L',
27 BININT2 = 'M',
28 NONE = 'N',
29 PERSID = 'P',
30 BINPERSID = 'Q',
31 REDUCE = 'R',
32 STRING = 'S',
33 BINSTRING = 'T',
34 SHORT_BINSTRING = 'U',
35 UNICODE = 'V',
36 BINUNICODE = 'X',
37 APPEND = 'a',
38 BUILD = 'b',
39 GLOBAL = 'c',
40 DICT = 'd',
41 EMPTY_DICT = '}',
42 APPENDS = 'e',
43 GET = 'g',
44 BINGET = 'h',
45 INST = 'i',
46 LONG_BINGET = 'j',
47 LIST = 'l',
48 EMPTY_LIST = ']',
49 OBJ = 'o',
50 PUT = 'p',
51 BINPUT = 'q',
52 LONG_BINPUT = 'r',
53 SETITEM = 's',
54 TUPLE = 't',
55 EMPTY_TUPLE = ')',
56 SETITEMS = 'u',
57 BINFLOAT = 'G',
58
59 /* Protocol 2. */
60 PROTO = '\x80',
61 NEWOBJ = '\x81',
62 EXT1 = '\x82',
63 EXT2 = '\x83',
64 EXT4 = '\x84',
65 TUPLE1 = '\x85',
66 TUPLE2 = '\x86',
67 TUPLE3 = '\x87',
68 NEWTRUE = '\x88',
69 NEWFALSE = '\x89',
70 LONG1 = '\x8a',
71 LONG4 = '\x8b',
72
73 /* Protocol 3 (Python 3.x) */
74 BINBYTES = 'B',
75 SHORT_BINBYTES = 'C',
76};
77
78/* These aren't opcodes -- they're ways to pickle bools before protocol 2
79 * so that unpicklers written before bools were introduced unpickle them
80 * as ints, but unpicklers after can recognize that bools were intended.
81 * Note that protocol 2 added direct ways to pickle bools.
82 */
83#undef TRUE
84#define TRUE "I01\n"
85#undef FALSE
86#define FALSE "I00\n"
87
88enum {
89 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
90 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
91 break if this gets out of synch with pickle.py, but it's unclear that would
92 help anything either. */
93 BATCHSIZE = 1000,
94
95 /* Nesting limit until Pickler, when running in "fast mode", starts
96 checking for self-referential data-structures. */
97 FAST_NESTING_LIMIT = 50,
98
99 /* Size of the write buffer of Pickler. Higher values will reduce the
100 number of calls to the write() method of the output stream. */
101 WRITE_BUF_SIZE = 256,
102};
103
104/* Exception classes for pickle. These should override the ones defined in
105 pickle.py, when the C-optimized Pickler and Unpickler are used. */
106static PyObject *PickleError;
107static PyObject *PicklingError;
108static PyObject *UnpicklingError;
109
110/* copyreg.dispatch_table, {type_object: pickling_function} */
111static PyObject *dispatch_table;
112/* For EXT[124] opcodes. */
113/* copyreg._extension_registry, {(module_name, function_name): code} */
114static PyObject *extension_registry;
115/* copyreg._inverted_registry, {code: (module_name, function_name)} */
116static PyObject *inverted_registry;
117/* copyreg._extension_cache, {code: object} */
118static PyObject *extension_cache;
119
120/* XXX: Are these really nescessary? */
121/* As the name says, an empty tuple. */
122static PyObject *empty_tuple;
123/* For looking up name pairs in copyreg._extension_registry. */
124static PyObject *two_tuple;
125
126static int
127stack_underflow(void)
128{
129 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
130 return -1;
131}
132
133/* Internal data type used as the unpickling stack. */
134typedef struct {
135 PyObject_HEAD
136 int length; /* number of initial slots in data currently used */
137 int size; /* number of slots in data allocated */
138 PyObject **data;
139} Pdata;
140
141static void
142Pdata_dealloc(Pdata *self)
143{
144 int i;
145 PyObject **p;
146
147 for (i = self->length, p = self->data; --i >= 0; p++) {
148 Py_DECREF(*p);
149 }
150 if (self->data)
151 PyMem_Free(self->data);
152 PyObject_Del(self);
153}
154
155static PyTypeObject Pdata_Type = {
156 PyVarObject_HEAD_INIT(NULL, 0)
157 "_pickle.Pdata", /*tp_name*/
158 sizeof(Pdata), /*tp_basicsize*/
159 0, /*tp_itemsize*/
160 (destructor)Pdata_dealloc, /*tp_dealloc*/
161};
162
163static PyObject *
164Pdata_New(void)
165{
166 Pdata *self;
167
168 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
169 return NULL;
170 self->size = 8;
171 self->length = 0;
172 self->data = PyMem_Malloc(self->size * sizeof(PyObject *));
173 if (self->data)
174 return (PyObject *)self;
175 Py_DECREF(self);
176 return PyErr_NoMemory();
177}
178
179
180/* Retain only the initial clearto items. If clearto >= the current
181 * number of items, this is a (non-erroneous) NOP.
182 */
183static int
184Pdata_clear(Pdata *self, int clearto)
185{
186 int i;
187 PyObject **p;
188
189 if (clearto < 0)
190 return stack_underflow();
191 if (clearto >= self->length)
192 return 0;
193
194 for (i = self->length, p = self->data + clearto; --i >= clearto; p++) {
195 Py_CLEAR(*p);
196 }
197 self->length = clearto;
198
199 return 0;
200}
201
202static int
203Pdata_grow(Pdata *self)
204{
205 int bigger;
206 size_t nbytes;
207 PyObject **tmp;
208
209 bigger = (self->size << 1) + 1;
210 if (bigger <= 0) /* was 0, or new value overflows */
211 goto nomemory;
212 if ((int)(size_t)bigger != bigger)
213 goto nomemory;
214 nbytes = (size_t)bigger * sizeof(PyObject *);
215 if (nbytes / sizeof(PyObject *) != (size_t)bigger)
216 goto nomemory;
217 tmp = PyMem_Realloc(self->data, nbytes);
218 if (tmp == NULL)
219 goto nomemory;
220 self->data = tmp;
221 self->size = bigger;
222 return 0;
223
224 nomemory:
225 PyErr_NoMemory();
226 return -1;
227}
228
229/* D is a Pdata*. Pop the topmost element and store it into V, which
230 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
231 * is raised and V is set to NULL.
232 */
233static PyObject *
234Pdata_pop(Pdata *self)
235{
236 if (self->length == 0) {
237 PyErr_SetString(UnpicklingError, "bad pickle data");
238 return NULL;
239 }
240 return self->data[--(self->length)];
241}
242#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
243
244static int
245Pdata_push(Pdata *self, PyObject *obj)
246{
247 if (self->length == self->size && Pdata_grow(self) < 0) {
248 return -1;
249 }
250 self->data[self->length++] = obj;
251 return 0;
252}
253
254/* Push an object on stack, transferring its ownership to the stack. */
255#define PDATA_PUSH(D, O, ER) do { \
256 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
257
258/* Push an object on stack, adding a new reference to the object. */
259#define PDATA_APPEND(D, O, ER) do { \
260 Py_INCREF((O)); \
261 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
262
263static PyObject *
264Pdata_poptuple(Pdata *self, Py_ssize_t start)
265{
266 PyObject *tuple;
267 Py_ssize_t len, i, j;
268
269 len = self->length - start;
270 tuple = PyTuple_New(len);
271 if (tuple == NULL)
272 return NULL;
273 for (i = start, j = 0; j < len; i++, j++)
274 PyTuple_SET_ITEM(tuple, j, self->data[i]);
275
276 self->length = start;
277 return tuple;
278}
279
280static PyObject *
281Pdata_poplist(Pdata *self, Py_ssize_t start)
282{
283 PyObject *list;
284 Py_ssize_t len, i, j;
285
286 len = self->length - start;
287 list = PyList_New(len);
288 if (list == NULL)
289 return NULL;
290 for (i = start, j = 0; j < len; i++, j++)
291 PyList_SET_ITEM(list, j, self->data[i]);
292
293 self->length = start;
294 return list;
295}
296
297typedef struct PicklerObject {
298 PyObject_HEAD
299 PyObject *write; /* write() method of the output stream */
300 PyObject *memo; /* Memo dictionary, keep track of the seen
301 objects to support self-referential objects
302 pickling. */
303 PyObject *pers_func; /* persistent_id() method, can be NULL */
304 PyObject *arg;
305 int proto; /* Pickle protocol number, >= 0 */
306 int bin; /* Boolean, true if proto > 0 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000307 int buf_size; /* Size of the current buffered pickle data */
308 char *write_buf; /* Write buffer, this is to avoid calling the
309 write() method of the output stream too
310 often. */
311 int fast; /* Enable fast mode if set to a true value.
312 The fast mode disable the usage of memo,
313 therefore speeding the pickling process by
314 not generating superfluous PUT opcodes. It
315 should not be used if with self-referential
316 objects. */
317 int fast_nesting;
318 PyObject *fast_memo;
319} PicklerObject;
320
321typedef struct UnpicklerObject {
322 PyObject_HEAD
323 Pdata *stack; /* Pickle data stack, store unpickled objects. */
324 PyObject *readline; /* readline() method of the output stream */
325 PyObject *read; /* read() method of the output stream */
326 PyObject *memo; /* Memo dictionary, provide the objects stored
327 using the PUT opcodes. */
328 PyObject *arg;
329 PyObject *pers_func; /* persistent_load() method, can be NULL. */
330 PyObject *last_string; /* Reference to the last string read by the
331 readline() method. */
332 char *buffer; /* Reading buffer. */
333 char *encoding; /* Name of the encoding to be used for
334 decoding strings pickled using Python
335 2.x. The default value is "ASCII" */
336 char *errors; /* Name of errors handling scheme to used when
337 decoding strings. The default value is
338 "strict". */
339 int *marks; /* Mark stack, used for unpickling container
340 objects. */
341 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
342 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
343} UnpicklerObject;
344
345/* Forward declarations */
346static int save(PicklerObject *, PyObject *, int);
347static int save_reduce(PicklerObject *, PyObject *, PyObject *);
348static PyTypeObject Pickler_Type;
349static PyTypeObject Unpickler_Type;
350
351
352/* Helpers for creating the argument tuple passed to functions. This has the
353 performance advantage of calling PyTuple_New() only once. */
354
355#define ARG_TUP(self, obj) do { \
356 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
357 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
358 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
359 } \
360 else { \
361 Py_DECREF((obj)); \
362 } \
363 } while (0)
364
365#define FREE_ARG_TUP(self) do { \
366 if ((self)->arg->ob_refcnt > 1) \
367 Py_CLEAR((self)->arg); \
368 } while (0)
369
370/* A temporary cleaner API for fast single argument function call.
371
372 XXX: Does caching the argument tuple provides any real performance benefits?
373
374 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
375 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
376 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
377 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
378 (i.e, call PyTuple_New() and store the returned value in an array), to save
379 one second (wall clock time). Either ways, the loading time a pickle stream
380 large enough to generate this number of calls would be massively
381 overwhelmed by other factors, like I/O throughput, the GC traversal and
382 object allocation overhead. So, I really doubt these functions provide any
383 real benefits.
384
385 On the other hand, oprofile reports that pickle spends a lot of time in
386 these functions. But, that is probably more related to the function call
387 overhead, than the argument tuple allocation.
388
389 XXX: And, what is the reference behavior of these? Steal, borrow? At first
390 glance, it seems to steal the reference of 'arg' and borrow the reference
391 of 'func'.
392 */
393static PyObject *
394pickler_call(PicklerObject *self, PyObject *func, PyObject *arg)
395{
396 PyObject *result = NULL;
397
398 ARG_TUP(self, arg);
399 if (self->arg) {
400 result = PyObject_Call(func, self->arg, NULL);
401 FREE_ARG_TUP(self);
402 }
403 return result;
404}
405
406static PyObject *
407unpickler_call(UnpicklerObject *self, PyObject *func, PyObject *arg)
408{
409 PyObject *result = NULL;
410
411 ARG_TUP(self, arg);
412 if (self->arg) {
413 result = PyObject_Call(func, self->arg, NULL);
414 FREE_ARG_TUP(self);
415 }
416 return result;
417}
418
419static Py_ssize_t
420pickler_write(PicklerObject *self, const char *s, Py_ssize_t n)
421{
422 PyObject *data, *result;
423
424 if (s == NULL) {
425 if (!(self->buf_size))
426 return 0;
427 data = PyBytes_FromStringAndSize(self->write_buf, self->buf_size);
428 if (data == NULL)
429 return -1;
430 }
431 else {
432 if (self->buf_size && (n + self->buf_size) > WRITE_BUF_SIZE) {
433 if (pickler_write(self, NULL, 0) < 0)
434 return -1;
435 }
436
437 if (n > WRITE_BUF_SIZE) {
438 if (!(data = PyBytes_FromStringAndSize(s, n)))
439 return -1;
440 }
441 else {
442 memcpy(self->write_buf + self->buf_size, s, n);
443 self->buf_size += n;
444 return n;
445 }
446 }
447
448 /* object with write method */
449 result = pickler_call(self, self->write, data);
450 if (result == NULL)
451 return -1;
452
453 Py_DECREF(result);
454 self->buf_size = 0;
455 return n;
456}
457
458/* XXX: These read/readline functions ought to be optimized. Buffered I/O
459 might help a lot, especially with the new (but much slower) io library.
460 On the other hand, the added complexity might not worth it.
461 */
462
463/* Read at least n characters from the input stream and set s to the current
464 reading position. */
465static Py_ssize_t
466unpickler_read(UnpicklerObject *self, char **s, Py_ssize_t n)
467{
468 PyObject *len;
469 PyObject *data;
470
471 len = PyLong_FromSsize_t(n);
472 if (len == NULL)
473 return -1;
474
475 data = unpickler_call(self, self->read, len);
476 if (data == NULL)
477 return -1;
478
479 /* XXX: Should bytearray be supported too? */
480 if (!PyBytes_Check(data)) {
481 PyErr_SetString(PyExc_ValueError,
482 "read() from the underlying stream did not"
483 "return bytes");
484 return -1;
485 }
486
487 Py_XDECREF(self->last_string);
488 self->last_string = data;
489
490 if (!(*s = PyBytes_AS_STRING(data)))
491 return -1;
492
493 return n;
494}
495
496static Py_ssize_t
497unpickler_readline(UnpicklerObject *self, char **s)
498{
499 PyObject *data;
500
501 data = PyObject_CallObject(self->readline, empty_tuple);
502 if (data == NULL)
503 return -1;
504
505 /* XXX: Should bytearray be supported too? */
506 if (!PyBytes_Check(data)) {
507 PyErr_SetString(PyExc_ValueError,
508 "readline() from the underlying stream did not"
509 "return bytes");
510 return -1;
511 }
512
513 Py_XDECREF(self->last_string);
514 self->last_string = data;
515
516 if (!(*s = PyBytes_AS_STRING(data)))
517 return -1;
518
519 return PyBytes_GET_SIZE(data);
520}
521
522/* Generate a GET opcode for an object stored in the memo. The 'key' argument
523 should be the address of the object as returned by PyLong_FromVoidPtr(). */
524static int
525memo_get(PicklerObject *self, PyObject *key)
526{
527 PyObject *value;
528 PyObject *memo_id;
529 long x;
530 char pdata[30];
531 int len;
532
533 value = PyDict_GetItemWithError(self->memo, key);
534 if (value == NULL) {
535 if (!PyErr_Occurred())
536 PyErr_SetObject(PyExc_KeyError, key);
537 return -1;
538 }
539
540 memo_id = PyTuple_GetItem(value, 0);
541 if (memo_id == NULL)
542 return -1;
543
544 if (!PyLong_Check(memo_id)) {
545 PyErr_SetString(PicklingError, "memo id must be an integer");
546 return -1;
547 }
548 x = PyLong_AsLong(memo_id);
549 if (x == -1 && PyErr_Occurred())
550 return -1;
551
552 if (!self->bin) {
553 pdata[0] = GET;
554 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
555 len = (int)strlen(pdata);
556 }
557 else {
558 if (x < 256) {
559 pdata[0] = BINGET;
560 pdata[1] = (unsigned char)(x & 0xff);
561 len = 2;
562 }
563 else if (x <= 0xffffffffL) {
564 pdata[0] = LONG_BINGET;
565 pdata[1] = (unsigned char)(x & 0xff);
566 pdata[2] = (unsigned char)((x >> 8) & 0xff);
567 pdata[3] = (unsigned char)((x >> 16) & 0xff);
568 pdata[4] = (unsigned char)((x >> 24) & 0xff);
569 len = 5;
570 }
571 else { /* unlikely */
572 PyErr_SetString(PicklingError,
573 "memo id too large for LONG_BINGET");
574 return -1;
575 }
576 }
577
578 if (pickler_write(self, pdata, len) < 0)
579 return -1;
580
581 return 0;
582}
583
584/* Store an object in the memo, assign it a new unique ID based on the number
585 of objects currently stored in the memo and generate a PUT opcode. */
586static int
587memo_put(PicklerObject *self, PyObject *obj)
588{
589 PyObject *key = NULL;
590 PyObject *memo_id = NULL;
591 PyObject *tuple = NULL;
592 long x;
593 char pdata[30];
594 int len;
595 int status = 0;
596
597 if (self->fast)
598 return 0;
599
600 key = PyLong_FromVoidPtr(obj);
601 if (key == NULL)
602 goto error;
603 if ((x = PyDict_Size(self->memo)) < 0)
604 goto error;
605 memo_id = PyLong_FromLong(x);
606 if (memo_id == NULL)
607 goto error;
608 tuple = PyTuple_New(2);
609 if (tuple == NULL)
610 goto error;
611
612 Py_INCREF(memo_id);
613 PyTuple_SET_ITEM(tuple, 0, memo_id);
614 Py_INCREF(obj);
615 PyTuple_SET_ITEM(tuple, 1, obj);
616 if (PyDict_SetItem(self->memo, key, tuple) < 0)
617 goto error;
618
619 if (!self->bin) {
620 pdata[0] = PUT;
621 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
622 len = strlen(pdata);
623 }
624 else {
625 if (x < 256) {
626 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +0000627 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000628 len = 2;
629 }
630 else if (x <= 0xffffffffL) {
631 pdata[0] = LONG_BINPUT;
632 pdata[1] = (unsigned char)(x & 0xff);
633 pdata[2] = (unsigned char)((x >> 8) & 0xff);
634 pdata[3] = (unsigned char)((x >> 16) & 0xff);
635 pdata[4] = (unsigned char)((x >> 24) & 0xff);
636 len = 5;
637 }
638 else { /* unlikely */
639 PyErr_SetString(PicklingError,
640 "memo id too large for LONG_BINPUT");
641 return -1;
642 }
643 }
644
645 if (pickler_write(self, pdata, len) < 0)
646 goto error;
647
648 if (0) {
649 error:
650 status = -1;
651 }
652
653 Py_XDECREF(key);
654 Py_XDECREF(memo_id);
655 Py_XDECREF(tuple);
656
657 return status;
658}
659
660static PyObject *
661whichmodule(PyObject *global, PyObject *global_name)
662{
663 Py_ssize_t i, j;
664 static PyObject *module_str = NULL;
665 static PyObject *main_str = NULL;
666 PyObject *module_name;
667 PyObject *modules_dict;
668 PyObject *module;
669 PyObject *obj;
670
671 if (module_str == NULL) {
672 module_str = PyUnicode_InternFromString("__module__");
673 if (module_str == NULL)
674 return NULL;
675 main_str = PyUnicode_InternFromString("__main__");
676 if (main_str == NULL)
677 return NULL;
678 }
679
680 module_name = PyObject_GetAttr(global, module_str);
681
682 /* In some rare cases (e.g., random.getrandbits), __module__ can be
683 None. If it is so, then search sys.modules for the module of
684 global. */
685 if (module_name == Py_None) {
686 Py_DECREF(module_name);
687 goto search;
688 }
689
690 if (module_name) {
691 return module_name;
692 }
693 if (PyErr_ExceptionMatches(PyExc_AttributeError))
694 PyErr_Clear();
695 else
696 return NULL;
697
698 search:
699 modules_dict = PySys_GetObject("modules");
700 if (modules_dict == NULL)
701 return NULL;
702
703 i = 0;
704 module_name = NULL;
705 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
706 if (PyObject_Compare(module_name, main_str) == 0)
707 continue;
708
709 obj = PyObject_GetAttr(module, global_name);
710 if (obj == NULL) {
711 if (PyErr_ExceptionMatches(PyExc_AttributeError))
712 PyErr_Clear();
713 else
714 return NULL;
715 continue;
716 }
717
718 if (obj != global) {
719 Py_DECREF(obj);
720 continue;
721 }
722
723 Py_DECREF(obj);
724 break;
725 }
726
727 /* If no module is found, use __main__. */
728 if (!j) {
729 module_name = main_str;
730 }
731
732 Py_INCREF(module_name);
733 return module_name;
734}
735
736/* fast_save_enter() and fast_save_leave() are guards against recursive
737 objects when Pickler is used with the "fast mode" (i.e., with object
738 memoization disabled). If the nesting of a list or dict object exceed
739 FAST_NESTING_LIMIT, these guards will start keeping an internal
740 reference to the seen list or dict objects and check whether these objects
741 are recursive. These are not strictly necessary, since save() has a
742 hard-coded recursion limit, but they give a nicer error message than the
743 typical RuntimeError. */
744static int
745fast_save_enter(PicklerObject *self, PyObject *obj)
746{
747 /* if fast_nesting < 0, we're doing an error exit. */
748 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
749 PyObject *key = NULL;
750 if (self->fast_memo == NULL) {
751 self->fast_memo = PyDict_New();
752 if (self->fast_memo == NULL) {
753 self->fast_nesting = -1;
754 return 0;
755 }
756 }
757 key = PyLong_FromVoidPtr(obj);
758 if (key == NULL)
759 return 0;
760 if (PyDict_GetItem(self->fast_memo, key)) {
761 Py_DECREF(key);
762 PyErr_Format(PyExc_ValueError,
763 "fast mode: can't pickle cyclic objects "
764 "including object type %.200s at %p",
765 obj->ob_type->tp_name, obj);
766 self->fast_nesting = -1;
767 return 0;
768 }
769 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
770 Py_DECREF(key);
771 self->fast_nesting = -1;
772 return 0;
773 }
774 Py_DECREF(key);
775 }
776 return 1;
777}
778
779static int
780fast_save_leave(PicklerObject *self, PyObject *obj)
781{
782 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
783 PyObject *key = PyLong_FromVoidPtr(obj);
784 if (key == NULL)
785 return 0;
786 if (PyDict_DelItem(self->fast_memo, key) < 0) {
787 Py_DECREF(key);
788 return 0;
789 }
790 Py_DECREF(key);
791 }
792 return 1;
793}
794
795static int
796save_none(PicklerObject *self, PyObject *obj)
797{
798 const char none_op = NONE;
799 if (pickler_write(self, &none_op, 1) < 0)
800 return -1;
801
802 return 0;
803}
804
805static int
806save_bool(PicklerObject *self, PyObject *obj)
807{
808 static const char *buf[2] = { FALSE, TRUE };
809 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
810 int p = (obj == Py_True);
811
812 if (self->proto >= 2) {
813 const char bool_op = p ? NEWTRUE : NEWFALSE;
814 if (pickler_write(self, &bool_op, 1) < 0)
815 return -1;
816 }
817 else if (pickler_write(self, buf[p], len[p]) < 0)
818 return -1;
819
820 return 0;
821}
822
823static int
824save_int(PicklerObject *self, long x)
825{
826 char pdata[32];
827 int len = 0;
828
829 if (!self->bin
830#if SIZEOF_LONG > 4
831 || x > 0x7fffffffL || x < -0x80000000L
832#endif
833 ) {
834 /* Text-mode pickle, or long too big to fit in the 4-byte
835 * signed BININT format: store as a string.
836 */
837 pdata[0] = LONG; /* use LONG for consistence with pickle.py */
838 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
839 if (pickler_write(self, pdata, strlen(pdata)) < 0)
840 return -1;
841 }
842 else {
843 /* Binary pickle and x fits in a signed 4-byte int. */
844 pdata[1] = (unsigned char)(x & 0xff);
845 pdata[2] = (unsigned char)((x >> 8) & 0xff);
846 pdata[3] = (unsigned char)((x >> 16) & 0xff);
847 pdata[4] = (unsigned char)((x >> 24) & 0xff);
848
849 if ((pdata[4] == 0) && (pdata[3] == 0)) {
850 if (pdata[2] == 0) {
851 pdata[0] = BININT1;
852 len = 2;
853 }
854 else {
855 pdata[0] = BININT2;
856 len = 3;
857 }
858 }
859 else {
860 pdata[0] = BININT;
861 len = 5;
862 }
863
864 if (pickler_write(self, pdata, len) < 0)
865 return -1;
866 }
867
868 return 0;
869}
870
871static int
872save_long(PicklerObject *self, PyObject *obj)
873{
874 PyObject *repr = NULL;
875 Py_ssize_t size;
876 long val = PyLong_AsLong(obj);
877 int status = 0;
878
879 const char long_op = LONG;
880
881 if (val == -1 && PyErr_Occurred()) {
882 /* out of range for int pickling */
883 PyErr_Clear();
884 }
885 else
886 return save_int(self, val);
887
888 if (self->proto >= 2) {
889 /* Linear-time pickling. */
890 size_t nbits;
891 size_t nbytes;
892 unsigned char *pdata;
893 char header[5];
894 int i;
895 int sign = _PyLong_Sign(obj);
896
897 if (sign == 0) {
898 header[0] = LONG1;
899 header[1] = 0; /* It's 0 -- an empty bytestring. */
900 if (pickler_write(self, header, 2) < 0)
901 goto error;
902 return 0;
903 }
904 nbits = _PyLong_NumBits(obj);
905 if (nbits == (size_t)-1 && PyErr_Occurred())
906 goto error;
907 /* How many bytes do we need? There are nbits >> 3 full
908 * bytes of data, and nbits & 7 leftover bits. If there
909 * are any leftover bits, then we clearly need another
910 * byte. Wnat's not so obvious is that we *probably*
911 * need another byte even if there aren't any leftovers:
912 * the most-significant bit of the most-significant byte
913 * acts like a sign bit, and it's usually got a sense
914 * opposite of the one we need. The exception is longs
915 * of the form -(2**(8*j-1)) for j > 0. Such a long is
916 * its own 256's-complement, so has the right sign bit
917 * even without the extra byte. That's a pain to check
918 * for in advance, though, so we always grab an extra
919 * byte at the start, and cut it back later if possible.
920 */
921 nbytes = (nbits >> 3) + 1;
922 if (nbytes > INT_MAX) {
923 PyErr_SetString(PyExc_OverflowError,
924 "long too large to pickle");
925 goto error;
926 }
927 repr = PyUnicode_FromStringAndSize(NULL, (int)nbytes);
928 if (repr == NULL)
929 goto error;
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +0000930 pdata = (unsigned char *)_PyUnicode_AsString(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000931 i = _PyLong_AsByteArray((PyLongObject *)obj,
932 pdata, nbytes,
933 1 /* little endian */ , 1 /* signed */ );
934 if (i < 0)
935 goto error;
936 /* If the long is negative, this may be a byte more than
937 * needed. This is so iff the MSB is all redundant sign
938 * bits.
939 */
940 if (sign < 0 &&
941 nbytes > 1 &&
942 pdata[nbytes - 1] == 0xff &&
943 (pdata[nbytes - 2] & 0x80) != 0) {
944 nbytes--;
945 }
946
947 if (nbytes < 256) {
948 header[0] = LONG1;
949 header[1] = (unsigned char)nbytes;
950 size = 2;
951 }
952 else {
953 header[0] = LONG4;
954 size = (int)nbytes;
955 for (i = 1; i < 5; i++) {
956 header[i] = (unsigned char)(size & 0xff);
957 size >>= 8;
958 }
959 size = 5;
960 }
961 if (pickler_write(self, header, size) < 0 ||
962 pickler_write(self, (char *)pdata, (int)nbytes) < 0)
963 goto error;
964 }
965 else {
966 char *string;
967
968 /* proto < 2: write the repr and newline. This is quadratic-time
969 (in the number of digits), in both directions. */
970
971 repr = PyObject_Repr(obj);
972 if (repr == NULL)
973 goto error;
974
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +0000975 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000976 if (string == NULL)
977 goto error;
978
979 if (pickler_write(self, &long_op, 1) < 0 ||
980 pickler_write(self, string, size) < 0 ||
981 pickler_write(self, "\n", 1) < 0)
982 goto error;
983 }
984
985 if (0) {
986 error:
987 status = -1;
988 }
989 Py_XDECREF(repr);
990
991 return status;
992}
993
994static int
995save_float(PicklerObject *self, PyObject *obj)
996{
997 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
998
999 if (self->bin) {
1000 char pdata[9];
1001 pdata[0] = BINFLOAT;
1002 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1003 return -1;
1004 if (pickler_write(self, pdata, 9) < 0)
1005 return -1;
1006 }
1007 else {
1008 char pdata[250];
1009 pdata[0] = FLOAT;
1010 PyOS_ascii_formatd(pdata + 1, sizeof(pdata) - 2, "%.17g", x);
1011 /* Extend the formatted string with a newline character */
1012 strcat(pdata, "\n");
1013
1014 if (pickler_write(self, pdata, strlen(pdata)) < 0)
1015 return -1;
1016 }
1017
1018 return 0;
1019}
1020
1021static int
1022save_bytes(PicklerObject *self, PyObject *obj)
1023{
1024 if (self->proto < 3) {
1025 /* Older pickle protocols do not have an opcode for pickling bytes
1026 objects. Therefore, we need to fake the copy protocol (i.e.,
1027 the __reduce__ method) to permit bytes object unpickling. */
1028 PyObject *reduce_value = NULL;
1029 PyObject *bytelist = NULL;
1030 int status;
1031
1032 bytelist = PySequence_List(obj);
1033 if (bytelist == NULL)
1034 return -1;
1035
1036 reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
1037 bytelist);
1038 if (reduce_value == NULL) {
1039 Py_DECREF(bytelist);
1040 return -1;
1041 }
1042
1043 /* save_reduce() will memoize the object automatically. */
1044 status = save_reduce(self, reduce_value, obj);
1045 Py_DECREF(reduce_value);
1046 Py_DECREF(bytelist);
1047 return status;
1048 }
1049 else {
1050 Py_ssize_t size;
1051 char header[5];
1052 int len;
1053
1054 size = PyBytes_Size(obj);
1055 if (size < 0)
1056 return -1;
1057
1058 if (size < 256) {
1059 header[0] = SHORT_BINBYTES;
1060 header[1] = (unsigned char)size;
1061 len = 2;
1062 }
1063 else if (size <= 0xffffffffL) {
1064 header[0] = BINBYTES;
1065 header[1] = (unsigned char)(size & 0xff);
1066 header[2] = (unsigned char)((size >> 8) & 0xff);
1067 header[3] = (unsigned char)((size >> 16) & 0xff);
1068 header[4] = (unsigned char)((size >> 24) & 0xff);
1069 len = 5;
1070 }
1071 else {
1072 return -1; /* string too large */
1073 }
1074
1075 if (pickler_write(self, header, len) < 0)
1076 return -1;
1077
1078 if (pickler_write(self, PyBytes_AS_STRING(obj), size) < 0)
1079 return -1;
1080
1081 if (memo_put(self, obj) < 0)
1082 return -1;
1083
1084 return 0;
1085 }
1086}
1087
1088/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1089 backslash and newline characters to \uXXXX escapes. */
1090static PyObject *
1091raw_unicode_escape(const Py_UNICODE *s, Py_ssize_t size)
1092{
1093 PyObject *repr, *result;
1094 char *p;
1095 char *q;
1096
1097 static const char *hexdigits = "0123456789abcdef";
1098
1099#ifdef Py_UNICODE_WIDE
1100 repr = PyBytes_FromStringAndSize(NULL, 10 * size);
1101#else
1102 repr = PyBytes_FromStringAndSize(NULL, 6 * size);
1103#endif
1104 if (repr == NULL)
1105 return NULL;
1106 if (size == 0)
1107 goto done;
1108
1109 p = q = PyBytes_AS_STRING(repr);
1110 while (size-- > 0) {
1111 Py_UNICODE ch = *s++;
1112#ifdef Py_UNICODE_WIDE
1113 /* Map 32-bit characters to '\Uxxxxxxxx' */
1114 if (ch >= 0x10000) {
1115 *p++ = '\\';
1116 *p++ = 'U';
1117 *p++ = hexdigits[(ch >> 28) & 0xf];
1118 *p++ = hexdigits[(ch >> 24) & 0xf];
1119 *p++ = hexdigits[(ch >> 20) & 0xf];
1120 *p++ = hexdigits[(ch >> 16) & 0xf];
1121 *p++ = hexdigits[(ch >> 12) & 0xf];
1122 *p++ = hexdigits[(ch >> 8) & 0xf];
1123 *p++ = hexdigits[(ch >> 4) & 0xf];
1124 *p++ = hexdigits[ch & 15];
1125 }
1126 else
1127#endif
1128 /* Map 16-bit characters to '\uxxxx' */
1129 if (ch >= 256 || ch == '\\' || ch == '\n') {
1130 *p++ = '\\';
1131 *p++ = 'u';
1132 *p++ = hexdigits[(ch >> 12) & 0xf];
1133 *p++ = hexdigits[(ch >> 8) & 0xf];
1134 *p++ = hexdigits[(ch >> 4) & 0xf];
1135 *p++ = hexdigits[ch & 15];
1136 }
1137 /* Copy everything else as-is */
1138 else
1139 *p++ = (char) ch;
1140 }
1141 size = p - q;
1142
1143 done:
1144 result = PyBytes_FromStringAndSize(PyBytes_AS_STRING(repr), size);
1145 Py_DECREF(repr);
1146 return result;
1147}
1148
1149static int
1150save_unicode(PicklerObject *self, PyObject *obj)
1151{
1152 Py_ssize_t size;
1153 PyObject *encoded = NULL;
1154
1155 if (self->bin) {
1156 char pdata[5];
1157
1158 encoded = PyUnicode_AsUTF8String(obj);
1159 if (encoded == NULL)
1160 goto error;
1161
1162 size = PyBytes_GET_SIZE(encoded);
1163 if (size < 0 || size > 0xffffffffL)
1164 goto error; /* string too large */
1165
1166 pdata[0] = BINUNICODE;
1167 pdata[1] = (unsigned char)(size & 0xff);
1168 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1169 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1170 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1171
1172 if (pickler_write(self, pdata, 5) < 0)
1173 goto error;
1174
1175 if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
1176 goto error;
1177 }
1178 else {
1179 const char unicode_op = UNICODE;
1180
1181 encoded = raw_unicode_escape(PyUnicode_AS_UNICODE(obj),
1182 PyUnicode_GET_SIZE(obj));
1183 if (encoded == NULL)
1184 goto error;
1185
1186 if (pickler_write(self, &unicode_op, 1) < 0)
1187 goto error;
1188
1189 size = PyBytes_GET_SIZE(encoded);
1190 if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
1191 goto error;
1192
1193 if (pickler_write(self, "\n", 1) < 0)
1194 goto error;
1195 }
1196 if (memo_put(self, obj) < 0)
1197 goto error;
1198
1199 Py_DECREF(encoded);
1200 return 0;
1201
1202 error:
1203 Py_XDECREF(encoded);
1204 return -1;
1205}
1206
1207/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1208static int
1209store_tuple_elements(PicklerObject *self, PyObject *t, int len)
1210{
1211 int i;
1212
1213 assert(PyTuple_Size(t) == len);
1214
1215 for (i = 0; i < len; i++) {
1216 PyObject *element = PyTuple_GET_ITEM(t, i);
1217
1218 if (element == NULL)
1219 return -1;
1220 if (save(self, element, 0) < 0)
1221 return -1;
1222 }
1223
1224 return 0;
1225}
1226
1227/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1228 * used across protocols to minimize the space needed to pickle them.
1229 * Tuples are also the only builtin immutable type that can be recursive
1230 * (a tuple can be reached from itself), and that requires some subtle
1231 * magic so that it works in all cases. IOW, this is a long routine.
1232 */
1233static int
1234save_tuple(PicklerObject *self, PyObject *obj)
1235{
1236 PyObject *memo_key = NULL;
1237 int len, i;
1238 int status = 0;
1239
1240 const char mark_op = MARK;
1241 const char tuple_op = TUPLE;
1242 const char pop_op = POP;
1243 const char pop_mark_op = POP_MARK;
1244 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
1245
1246 if ((len = PyTuple_Size(obj)) < 0)
1247 return -1;
1248
1249 if (len == 0) {
1250 char pdata[2];
1251
1252 if (self->proto) {
1253 pdata[0] = EMPTY_TUPLE;
1254 len = 1;
1255 }
1256 else {
1257 pdata[0] = MARK;
1258 pdata[1] = TUPLE;
1259 len = 2;
1260 }
1261 if (pickler_write(self, pdata, len) < 0)
1262 return -1;
1263 return 0;
1264 }
1265
1266 /* id(tuple) isn't in the memo now. If it shows up there after
1267 * saving the tuple elements, the tuple must be recursive, in
1268 * which case we'll pop everything we put on the stack, and fetch
1269 * its value from the memo.
1270 */
1271 memo_key = PyLong_FromVoidPtr(obj);
1272 if (memo_key == NULL)
1273 return -1;
1274
1275 if (len <= 3 && self->proto >= 2) {
1276 /* Use TUPLE{1,2,3} opcodes. */
1277 if (store_tuple_elements(self, obj, len) < 0)
1278 goto error;
1279
1280 if (PyDict_GetItem(self->memo, memo_key)) {
1281 /* pop the len elements */
1282 for (i = 0; i < len; i++)
1283 if (pickler_write(self, &pop_op, 1) < 0)
1284 goto error;
1285 /* fetch from memo */
1286 if (memo_get(self, memo_key) < 0)
1287 goto error;
1288
1289 Py_DECREF(memo_key);
1290 return 0;
1291 }
1292 else { /* Not recursive. */
1293 if (pickler_write(self, len2opcode + len, 1) < 0)
1294 goto error;
1295 }
1296 goto memoize;
1297 }
1298
1299 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
1300 * Generate MARK e1 e2 ... TUPLE
1301 */
1302 if (pickler_write(self, &mark_op, 1) < 0)
1303 goto error;
1304
1305 if (store_tuple_elements(self, obj, len) < 0)
1306 goto error;
1307
1308 if (PyDict_GetItem(self->memo, memo_key)) {
1309 /* pop the stack stuff we pushed */
1310 if (self->bin) {
1311 if (pickler_write(self, &pop_mark_op, 1) < 0)
1312 goto error;
1313 }
1314 else {
1315 /* Note that we pop one more than len, to remove
1316 * the MARK too.
1317 */
1318 for (i = 0; i <= len; i++)
1319 if (pickler_write(self, &pop_op, 1) < 0)
1320 goto error;
1321 }
1322 /* fetch from memo */
1323 if (memo_get(self, memo_key) < 0)
1324 goto error;
1325
1326 Py_DECREF(memo_key);
1327 return 0;
1328 }
1329 else { /* Not recursive. */
1330 if (pickler_write(self, &tuple_op, 1) < 0)
1331 goto error;
1332 }
1333
1334 memoize:
1335 if (memo_put(self, obj) < 0)
1336 goto error;
1337
1338 if (0) {
1339 error:
1340 status = -1;
1341 }
1342
1343 Py_DECREF(memo_key);
1344 return status;
1345}
1346
1347/* iter is an iterator giving items, and we batch up chunks of
1348 * MARK item item ... item APPENDS
1349 * opcode sequences. Calling code should have arranged to first create an
1350 * empty list, or list-like object, for the APPENDS to operate on.
1351 * Returns 0 on success, <0 on error.
1352 */
1353static int
1354batch_list(PicklerObject *self, PyObject *iter)
1355{
1356 PyObject *obj;
1357 PyObject *slice[BATCHSIZE];
1358 int i, n;
1359
1360 const char mark_op = MARK;
1361 const char append_op = APPEND;
1362 const char appends_op = APPENDS;
1363
1364 assert(iter != NULL);
1365
1366 /* XXX: I think this function could be made faster by avoiding the
1367 iterator interface and fetching objects directly from list using
1368 PyList_GET_ITEM.
1369 */
1370
1371 if (self->proto == 0) {
1372 /* APPENDS isn't available; do one at a time. */
1373 for (;;) {
1374 obj = PyIter_Next(iter);
1375 if (obj == NULL) {
1376 if (PyErr_Occurred())
1377 return -1;
1378 break;
1379 }
1380 i = save(self, obj, 0);
1381 Py_DECREF(obj);
1382 if (i < 0)
1383 return -1;
1384 if (pickler_write(self, &append_op, 1) < 0)
1385 return -1;
1386 }
1387 return 0;
1388 }
1389
1390 /* proto > 0: write in batches of BATCHSIZE. */
1391 do {
1392 /* Get next group of (no more than) BATCHSIZE elements. */
1393 for (n = 0; n < BATCHSIZE; n++) {
1394 obj = PyIter_Next(iter);
1395 if (obj == NULL) {
1396 if (PyErr_Occurred())
1397 goto error;
1398 break;
1399 }
1400 slice[n] = obj;
1401 }
1402
1403 if (n > 1) {
1404 /* Pump out MARK, slice[0:n], APPENDS. */
1405 if (pickler_write(self, &mark_op, 1) < 0)
1406 goto error;
1407 for (i = 0; i < n; i++) {
1408 if (save(self, slice[i], 0) < 0)
1409 goto error;
1410 }
1411 if (pickler_write(self, &appends_op, 1) < 0)
1412 goto error;
1413 }
1414 else if (n == 1) {
1415 if (save(self, slice[0], 0) < 0 ||
1416 pickler_write(self, &append_op, 1) < 0)
1417 goto error;
1418 }
1419
1420 for (i = 0; i < n; i++) {
1421 Py_DECREF(slice[i]);
1422 }
1423 } while (n == BATCHSIZE);
1424 return 0;
1425
1426 error:
1427 while (--n >= 0) {
1428 Py_DECREF(slice[n]);
1429 }
1430 return -1;
1431}
1432
1433static int
1434save_list(PicklerObject *self, PyObject *obj)
1435{
1436 PyObject *iter;
1437 char header[3];
1438 int len;
1439 int status = 0;
1440
1441 if (self->fast && !fast_save_enter(self, obj))
1442 goto error;
1443
1444 /* Create an empty list. */
1445 if (self->bin) {
1446 header[0] = EMPTY_LIST;
1447 len = 1;
1448 }
1449 else {
1450 header[0] = MARK;
1451 header[1] = LIST;
1452 len = 2;
1453 }
1454
1455 if (pickler_write(self, header, len) < 0)
1456 goto error;
1457
1458 /* Get list length, and bow out early if empty. */
1459 if ((len = PyList_Size(obj)) < 0)
1460 goto error;
1461
1462 if (memo_put(self, obj) < 0)
1463 goto error;
1464
1465 if (len != 0) {
1466 /* Save the list elements. */
1467 iter = PyObject_GetIter(obj);
1468 if (iter == NULL)
1469 goto error;
1470 status = batch_list(self, iter);
1471 Py_DECREF(iter);
1472 }
1473
1474 if (0) {
1475 error:
1476 status = -1;
1477 }
1478
1479 if (self->fast && !fast_save_leave(self, obj))
1480 status = -1;
1481
1482 return status;
1483}
1484
1485/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
1486 * MARK key value ... key value SETITEMS
1487 * opcode sequences. Calling code should have arranged to first create an
1488 * empty dict, or dict-like object, for the SETITEMS to operate on.
1489 * Returns 0 on success, <0 on error.
1490 *
1491 * This is very much like batch_list(). The difference between saving
1492 * elements directly, and picking apart two-tuples, is so long-winded at
1493 * the C level, though, that attempts to combine these routines were too
1494 * ugly to bear.
1495 */
1496static int
1497batch_dict(PicklerObject *self, PyObject *iter)
1498{
1499 PyObject *obj;
1500 PyObject *slice[BATCHSIZE];
1501 int i, n;
1502
1503 const char mark_op = MARK;
1504 const char setitem_op = SETITEM;
1505 const char setitems_op = SETITEMS;
1506
1507 assert(iter != NULL);
1508
1509 if (self->proto == 0) {
1510 /* SETITEMS isn't available; do one at a time. */
1511 for (;;) {
1512 obj = PyIter_Next(iter);
1513 if (obj == NULL) {
1514 if (PyErr_Occurred())
1515 return -1;
1516 break;
1517 }
1518 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
1519 PyErr_SetString(PyExc_TypeError, "dict items "
1520 "iterator must return 2-tuples");
1521 return -1;
1522 }
1523 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
1524 if (i >= 0)
1525 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
1526 Py_DECREF(obj);
1527 if (i < 0)
1528 return -1;
1529 if (pickler_write(self, &setitem_op, 1) < 0)
1530 return -1;
1531 }
1532 return 0;
1533 }
1534
1535 /* proto > 0: write in batches of BATCHSIZE. */
1536 do {
1537 /* Get next group of (no more than) BATCHSIZE elements. */
1538 for (n = 0; n < BATCHSIZE; n++) {
1539 obj = PyIter_Next(iter);
1540 if (obj == NULL) {
1541 if (PyErr_Occurred())
1542 goto error;
1543 break;
1544 }
1545 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
1546 PyErr_SetString(PyExc_TypeError, "dict items "
1547 "iterator must return 2-tuples");
1548 goto error;
1549 }
1550 slice[n] = obj;
1551 }
1552
1553 if (n > 1) {
1554 /* Pump out MARK, slice[0:n], SETITEMS. */
1555 if (pickler_write(self, &mark_op, 1) < 0)
1556 goto error;
1557 for (i = 0; i < n; i++) {
1558 obj = slice[i];
1559 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
1560 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
1561 goto error;
1562 }
1563 if (pickler_write(self, &setitems_op, 1) < 0)
1564 goto error;
1565 }
1566 else if (n == 1) {
1567 obj = slice[0];
1568 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
1569 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0 ||
1570 pickler_write(self, &setitem_op, 1) < 0)
1571 goto error;
1572 }
1573
1574 for (i = 0; i < n; i++) {
1575 Py_DECREF(slice[i]);
1576 }
1577 } while (n == BATCHSIZE);
1578 return 0;
1579
1580 error:
1581 while (--n >= 0) {
1582 Py_DECREF(slice[n]);
1583 }
1584 return -1;
1585}
1586
1587static int
1588save_dict(PicklerObject *self, PyObject *obj)
1589{
1590 PyObject *items, *iter;
1591 char header[3];
1592 int len;
1593 int status = 0;
1594
1595 if (self->fast && !fast_save_enter(self, obj))
1596 goto error;
1597
1598 /* Create an empty dict. */
1599 if (self->bin) {
1600 header[0] = EMPTY_DICT;
1601 len = 1;
1602 }
1603 else {
1604 header[0] = MARK;
1605 header[1] = DICT;
1606 len = 2;
1607 }
1608
1609 if (pickler_write(self, header, len) < 0)
1610 goto error;
1611
1612 /* Get dict size, and bow out early if empty. */
1613 if ((len = PyDict_Size(obj)) < 0)
1614 goto error;
1615
1616 if (memo_put(self, obj) < 0)
1617 goto error;
1618
1619 if (len != 0) {
1620 /* Save the dict items. */
1621 items = PyObject_CallMethod(obj, "items", "()");
1622 if (items == NULL)
1623 goto error;
1624 iter = PyObject_GetIter(items);
1625 Py_DECREF(items);
1626 if (iter == NULL)
1627 goto error;
1628 status = batch_dict(self, iter);
1629 Py_DECREF(iter);
1630 }
1631
1632 if (0) {
1633 error:
1634 status = -1;
1635 }
1636
1637 if (self->fast && !fast_save_leave(self, obj))
1638 status = -1;
1639
1640 return status;
1641}
1642
1643static int
1644save_global(PicklerObject *self, PyObject *obj, PyObject *name)
1645{
1646 static PyObject *name_str = NULL;
1647 PyObject *global_name = NULL;
1648 PyObject *module_name = NULL;
1649 PyObject *module = NULL;
1650 PyObject *cls;
1651 int status = 0;
1652
1653 const char global_op = GLOBAL;
1654
1655 if (name_str == NULL) {
1656 name_str = PyUnicode_InternFromString("__name__");
1657 if (name_str == NULL)
1658 goto error;
1659 }
1660
1661 if (name) {
1662 global_name = name;
1663 Py_INCREF(global_name);
1664 }
1665 else {
1666 global_name = PyObject_GetAttr(obj, name_str);
1667 if (global_name == NULL)
1668 goto error;
1669 }
1670
1671 module_name = whichmodule(obj, global_name);
1672 if (module_name == NULL)
1673 goto error;
1674
1675 /* XXX: Change to use the import C API directly with level=0 to disallow
1676 relative imports.
1677
1678 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
1679 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
1680 custom import functions (IMHO, this would be a nice security
1681 feature). The import C API would need to be extended to support the
1682 extra parameters of __import__ to fix that. */
1683 module = PyImport_Import(module_name);
1684 if (module == NULL) {
1685 PyErr_Format(PicklingError,
1686 "Can't pickle %R: import of module %R failed",
1687 obj, module_name);
1688 goto error;
1689 }
1690 cls = PyObject_GetAttr(module, global_name);
1691 if (cls == NULL) {
1692 PyErr_Format(PicklingError,
1693 "Can't pickle %R: attribute lookup %S.%S failed",
1694 obj, module_name, global_name);
1695 goto error;
1696 }
1697 if (cls != obj) {
1698 Py_DECREF(cls);
1699 PyErr_Format(PicklingError,
1700 "Can't pickle %R: it's not the same object as %S.%S",
1701 obj, module_name, global_name);
1702 goto error;
1703 }
1704 Py_DECREF(cls);
1705
1706 if (self->proto >= 2) {
1707 /* See whether this is in the extension registry, and if
1708 * so generate an EXT opcode.
1709 */
1710 PyObject *code_obj; /* extension code as Python object */
1711 long code; /* extension code as C value */
1712 char pdata[5];
1713 int n;
1714
1715 PyTuple_SET_ITEM(two_tuple, 0, module_name);
1716 PyTuple_SET_ITEM(two_tuple, 1, global_name);
1717 code_obj = PyDict_GetItem(extension_registry, two_tuple);
1718 /* The object is not registered in the extension registry.
1719 This is the most likely code path. */
1720 if (code_obj == NULL)
1721 goto gen_global;
1722
1723 /* XXX: pickle.py doesn't check neither the type, nor the range
1724 of the value returned by the extension_registry. It should for
1725 consistency. */
1726
1727 /* Verify code_obj has the right type and value. */
1728 if (!PyLong_Check(code_obj)) {
1729 PyErr_Format(PicklingError,
1730 "Can't pickle %R: extension code %R isn't an integer",
1731 obj, code_obj);
1732 goto error;
1733 }
1734 code = PyLong_AS_LONG(code_obj);
1735 if (code <= 0 || code > 0x7fffffffL) {
1736 PyErr_Format(PicklingError,
1737 "Can't pickle %R: extension code %ld is out of range",
1738 obj, code);
1739 goto error;
1740 }
1741
1742 /* Generate an EXT opcode. */
1743 if (code <= 0xff) {
1744 pdata[0] = EXT1;
1745 pdata[1] = (unsigned char)code;
1746 n = 2;
1747 }
1748 else if (code <= 0xffff) {
1749 pdata[0] = EXT2;
1750 pdata[1] = (unsigned char)(code & 0xff);
1751 pdata[2] = (unsigned char)((code >> 8) & 0xff);
1752 n = 3;
1753 }
1754 else {
1755 pdata[0] = EXT4;
1756 pdata[1] = (unsigned char)(code & 0xff);
1757 pdata[2] = (unsigned char)((code >> 8) & 0xff);
1758 pdata[3] = (unsigned char)((code >> 16) & 0xff);
1759 pdata[4] = (unsigned char)((code >> 24) & 0xff);
1760 n = 5;
1761 }
1762
1763 if (pickler_write(self, pdata, n) < 0)
1764 goto error;
1765 }
1766 else {
1767 /* Generate a normal global opcode if we are using a pickle
1768 protocol <= 2, or if the object is not registered in the
1769 extension registry. */
1770 PyObject *encoded;
1771 PyObject *(*unicode_encoder)(PyObject *);
1772
1773 gen_global:
1774 if (pickler_write(self, &global_op, 1) < 0)
1775 goto error;
1776
1777 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
1778 the module name and the global name using UTF-8. We do so only when
1779 we are using the pickle protocol newer than version 3. This is to
1780 ensure compatibility with older Unpickler running on Python 2.x. */
1781 if (self->proto >= 3) {
1782 unicode_encoder = PyUnicode_AsUTF8String;
1783 }
1784 else {
1785 unicode_encoder = PyUnicode_AsASCIIString;
1786 }
1787
1788 /* Save the name of the module. */
1789 encoded = unicode_encoder(module_name);
1790 if (encoded == NULL) {
1791 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
1792 PyErr_Format(PicklingError,
1793 "can't pickle module identifier '%S' using "
1794 "pickle protocol %i", module_name, self->proto);
1795 goto error;
1796 }
1797 if (pickler_write(self, PyBytes_AS_STRING(encoded),
1798 PyBytes_GET_SIZE(encoded)) < 0) {
1799 Py_DECREF(encoded);
1800 goto error;
1801 }
1802 Py_DECREF(encoded);
1803 if(pickler_write(self, "\n", 1) < 0)
1804 goto error;
1805
1806 /* Save the name of the module. */
1807 encoded = unicode_encoder(global_name);
1808 if (encoded == NULL) {
1809 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
1810 PyErr_Format(PicklingError,
1811 "can't pickle global identifier '%S' using "
1812 "pickle protocol %i", global_name, self->proto);
1813 goto error;
1814 }
1815 if (pickler_write(self, PyBytes_AS_STRING(encoded),
1816 PyBytes_GET_SIZE(encoded)) < 0) {
1817 Py_DECREF(encoded);
1818 goto error;
1819 }
1820 Py_DECREF(encoded);
1821 if(pickler_write(self, "\n", 1) < 0)
1822 goto error;
1823
1824 /* Memoize the object. */
1825 if (memo_put(self, obj) < 0)
1826 goto error;
1827 }
1828
1829 if (0) {
1830 error:
1831 status = -1;
1832 }
1833 Py_XDECREF(module_name);
1834 Py_XDECREF(global_name);
1835 Py_XDECREF(module);
1836
1837 return status;
1838}
1839
1840static int
1841save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
1842{
1843 PyObject *pid = NULL;
1844 int status = 0;
1845
1846 const char persid_op = PERSID;
1847 const char binpersid_op = BINPERSID;
1848
1849 Py_INCREF(obj);
1850 pid = pickler_call(self, func, obj);
1851 if (pid == NULL)
1852 return -1;
1853
1854 if (pid != Py_None) {
1855 if (self->bin) {
1856 if (save(self, pid, 1) < 0 ||
1857 pickler_write(self, &binpersid_op, 1) < 0)
1858 goto error;
1859 }
1860 else {
1861 PyObject *pid_str = NULL;
1862 char *pid_ascii_bytes;
1863 Py_ssize_t size;
1864
1865 pid_str = PyObject_Str(pid);
1866 if (pid_str == NULL)
1867 goto error;
1868
1869 /* XXX: Should it check whether the persistent id only contains
1870 ASCII characters? And what if the pid contains embedded
1871 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001872 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001873 Py_DECREF(pid_str);
1874 if (pid_ascii_bytes == NULL)
1875 goto error;
1876
1877 if (pickler_write(self, &persid_op, 1) < 0 ||
1878 pickler_write(self, pid_ascii_bytes, size) < 0 ||
1879 pickler_write(self, "\n", 1) < 0)
1880 goto error;
1881 }
1882 status = 1;
1883 }
1884
1885 if (0) {
1886 error:
1887 status = -1;
1888 }
1889 Py_XDECREF(pid);
1890
1891 return status;
1892}
1893
1894/* We're saving obj, and args is the 2-thru-5 tuple returned by the
1895 * appropriate __reduce__ method for obj.
1896 */
1897static int
1898save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
1899{
1900 PyObject *callable;
1901 PyObject *argtup;
1902 PyObject *state = NULL;
1903 PyObject *listitems = NULL;
1904 PyObject *dictitems = NULL;
1905
1906 int use_newobj = self->proto >= 2;
1907
1908 const char reduce_op = REDUCE;
1909 const char build_op = BUILD;
1910 const char newobj_op = NEWOBJ;
1911
1912 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
1913 &callable, &argtup, &state, &listitems, &dictitems))
1914 return -1;
1915
1916 if (!PyCallable_Check(callable)) {
1917 PyErr_SetString(PicklingError,
1918 "first argument of save_reduce() must be callable");
1919 return -1;
1920 }
1921 if (!PyTuple_Check(argtup)) {
1922 PyErr_SetString(PicklingError,
1923 "second argument of save_reduce() must be a tuple");
1924 return -1;
1925 }
1926
1927 if (state == Py_None)
1928 state = NULL;
1929 if (listitems == Py_None)
1930 listitems = NULL;
1931 if (dictitems == Py_None)
1932 dictitems = NULL;
1933
1934 /* Protocol 2 special case: if callable's name is __newobj__, use
1935 NEWOBJ. */
1936 if (use_newobj) {
1937 static PyObject *newobj_str = NULL;
1938 PyObject *name_str;
1939
1940 if (newobj_str == NULL) {
1941 newobj_str = PyUnicode_InternFromString("__newobj__");
1942 }
1943
1944 name_str = PyObject_GetAttrString(callable, "__name__");
1945 if (name_str == NULL) {
1946 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1947 PyErr_Clear();
1948 else
1949 return -1;
1950 use_newobj = 0;
1951 }
1952 else {
1953 use_newobj = PyUnicode_Check(name_str) &&
1954 PyUnicode_Compare(name_str, newobj_str) == 0;
1955 Py_DECREF(name_str);
1956 }
1957 }
1958 if (use_newobj) {
1959 PyObject *cls;
1960 PyObject *newargtup;
1961 PyObject *obj_class;
1962 int p;
1963
1964 /* Sanity checks. */
1965 if (Py_SIZE(argtup) < 1) {
1966 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
1967 return -1;
1968 }
1969
1970 cls = PyTuple_GET_ITEM(argtup, 0);
1971 if (!PyObject_HasAttrString(cls, "__new__")) {
1972 PyErr_SetString(PicklingError, "args[0] from "
1973 "__newobj__ args has no __new__");
1974 return -1;
1975 }
1976
1977 if (obj != NULL) {
1978 obj_class = PyObject_GetAttrString(obj, "__class__");
1979 if (obj_class == NULL) {
1980 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1981 PyErr_Clear();
1982 else
1983 return -1;
1984 }
1985 p = obj_class != cls; /* true iff a problem */
1986 Py_DECREF(obj_class);
1987 if (p) {
1988 PyErr_SetString(PicklingError, "args[0] from "
1989 "__newobj__ args has the wrong class");
1990 return -1;
1991 }
1992 }
1993 /* XXX: These calls save() are prone to infinite recursion. Imagine
1994 what happen if the value returned by the __reduce__() method of
1995 some extension type contains another object of the same type. Ouch!
1996
1997 Here is a quick example, that I ran into, to illustrate what I
1998 mean:
1999
2000 >>> import pickle, copyreg
2001 >>> copyreg.dispatch_table.pop(complex)
2002 >>> pickle.dumps(1+2j)
2003 Traceback (most recent call last):
2004 ...
2005 RuntimeError: maximum recursion depth exceeded
2006
2007 Removing the complex class from copyreg.dispatch_table made the
2008 __reduce_ex__() method emit another complex object:
2009
2010 >>> (1+1j).__reduce_ex__(2)
2011 (<function __newobj__ at 0xb7b71c3c>,
2012 (<class 'complex'>, (1+1j)), None, None, None)
2013
2014 Thus when save() was called on newargstup (the 2nd item) recursion
2015 ensued. Of course, the bug was in the complex class which had a
2016 broken __getnewargs__() that emitted another complex object. But,
2017 the point, here, is it is quite easy to end up with a broken reduce
2018 function. */
2019
2020 /* Save the class and its __new__ arguments. */
2021 if (save(self, cls, 0) < 0)
2022 return -1;
2023
2024 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
2025 if (newargtup == NULL)
2026 return -1;
2027
2028 p = save(self, newargtup, 0);
2029 Py_DECREF(newargtup);
2030 if (p < 0)
2031 return -1;
2032
2033 /* Add NEWOBJ opcode. */
2034 if (pickler_write(self, &newobj_op, 1) < 0)
2035 return -1;
2036 }
2037 else { /* Not using NEWOBJ. */
2038 if (save(self, callable, 0) < 0 ||
2039 save(self, argtup, 0) < 0 ||
2040 pickler_write(self, &reduce_op, 1) < 0)
2041 return -1;
2042 }
2043
2044 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
2045 the caller do not want to memoize the object. Not particularly useful,
2046 but that is to mimic the behavior save_reduce() in pickle.py when
2047 obj is None. */
2048 if (obj && memo_put(self, obj) < 0)
2049 return -1;
2050
2051 if (listitems && batch_list(self, listitems) < 0)
2052 return -1;
2053
2054 if (dictitems && batch_dict(self, dictitems) < 0)
2055 return -1;
2056
2057 if (state) {
2058 if (save(self, state, 0) < 0 ||
2059 pickler_write(self, &build_op, 1) < 0)
2060 return -1;
2061 }
2062
2063 return 0;
2064}
2065
2066static int
2067save(PicklerObject *self, PyObject *obj, int pers_save)
2068{
2069 PyTypeObject *type;
2070 PyObject *reduce_func = NULL;
2071 PyObject *reduce_value = NULL;
2072 PyObject *memo_key = NULL;
2073 int status = 0;
2074
Alexandre Vassalottidff18342008-07-13 18:48:30 +00002075 if (Py_EnterRecursiveCall(" while pickling an object") < 0)
2076 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002077
2078 /* The extra pers_save argument is necessary to avoid calling save_pers()
2079 on its returned object. */
2080 if (!pers_save && self->pers_func) {
2081 /* save_pers() returns:
2082 -1 to signal an error;
2083 0 if it did nothing successfully;
2084 1 if a persistent id was saved.
2085 */
2086 if ((status = save_pers(self, obj, self->pers_func)) != 0)
2087 goto done;
2088 }
2089
2090 type = Py_TYPE(obj);
2091
2092 /* XXX: The old cPickle had an optimization that used switch-case
2093 statement dispatching on the first letter of the type name. It was
2094 probably not a bad idea after all. If benchmarks shows that particular
2095 optimization had some real benefits, it would be nice to add it
2096 back. */
2097
2098 /* Atom types; these aren't memoized, so don't check the memo. */
2099
2100 if (obj == Py_None) {
2101 status = save_none(self, obj);
2102 goto done;
2103 }
2104 else if (obj == Py_False || obj == Py_True) {
2105 status = save_bool(self, obj);
2106 goto done;
2107 }
2108 else if (type == &PyLong_Type) {
2109 status = save_long(self, obj);
2110 goto done;
2111 }
2112 else if (type == &PyFloat_Type) {
2113 status = save_float(self, obj);
2114 goto done;
2115 }
2116
2117 /* Check the memo to see if it has the object. If so, generate
2118 a GET (or BINGET) opcode, instead of pickling the object
2119 once again. */
2120 memo_key = PyLong_FromVoidPtr(obj);
2121 if (memo_key == NULL)
2122 goto error;
2123 if (PyDict_GetItem(self->memo, memo_key)) {
2124 if (memo_get(self, memo_key) < 0)
2125 goto error;
2126 goto done;
2127 }
2128
2129 if (type == &PyBytes_Type) {
2130 status = save_bytes(self, obj);
2131 goto done;
2132 }
2133 else if (type == &PyUnicode_Type) {
2134 status = save_unicode(self, obj);
2135 goto done;
2136 }
2137 else if (type == &PyDict_Type) {
2138 status = save_dict(self, obj);
2139 goto done;
2140 }
2141 else if (type == &PyList_Type) {
2142 status = save_list(self, obj);
2143 goto done;
2144 }
2145 else if (type == &PyTuple_Type) {
2146 status = save_tuple(self, obj);
2147 goto done;
2148 }
2149 else if (type == &PyType_Type) {
2150 status = save_global(self, obj, NULL);
2151 goto done;
2152 }
2153 else if (type == &PyFunction_Type) {
2154 status = save_global(self, obj, NULL);
2155 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
2156 /* fall back to reduce */
2157 PyErr_Clear();
2158 }
2159 else {
2160 goto done;
2161 }
2162 }
2163 else if (type == &PyCFunction_Type) {
2164 status = save_global(self, obj, NULL);
2165 goto done;
2166 }
2167 else if (PyType_IsSubtype(type, &PyType_Type)) {
2168 status = save_global(self, obj, NULL);
2169 goto done;
2170 }
2171
2172 /* XXX: This part needs some unit tests. */
2173
2174 /* Get a reduction callable, and call it. This may come from
2175 * copyreg.dispatch_table, the object's __reduce_ex__ method,
2176 * or the object's __reduce__ method.
2177 */
2178 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
2179 if (reduce_func != NULL) {
2180 /* Here, the reference count of the reduce_func object returned by
2181 PyDict_GetItem needs to be increased to be consistent with the one
2182 returned by PyObject_GetAttr. This is allow us to blindly DECREF
2183 reduce_func at the end of the save() routine.
2184 */
2185 Py_INCREF(reduce_func);
2186 Py_INCREF(obj);
2187 reduce_value = pickler_call(self, reduce_func, obj);
2188 }
2189 else {
2190 static PyObject *reduce_str = NULL;
2191 static PyObject *reduce_ex_str = NULL;
2192
2193 /* Cache the name of the reduce methods. */
2194 if (reduce_str == NULL) {
2195 reduce_str = PyUnicode_InternFromString("__reduce__");
2196 if (reduce_str == NULL)
2197 goto error;
2198 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
2199 if (reduce_ex_str == NULL)
2200 goto error;
2201 }
2202
2203 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
2204 automatically defined as __reduce__. While this is convenient, this
2205 make it impossible to know which method was actually called. Of
2206 course, this is not a big deal. But still, it would be nice to let
2207 the user know which method was called when something go
2208 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
2209 don't actually have to check for a __reduce__ method. */
2210
2211 /* Check for a __reduce_ex__ method. */
2212 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
2213 if (reduce_func != NULL) {
2214 PyObject *proto;
2215 proto = PyLong_FromLong(self->proto);
2216 if (proto != NULL) {
2217 reduce_value = pickler_call(self, reduce_func, proto);
2218 }
2219 }
2220 else {
2221 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2222 PyErr_Clear();
2223 else
2224 goto error;
2225 /* Check for a __reduce__ method. */
2226 reduce_func = PyObject_GetAttr(obj, reduce_str);
2227 if (reduce_func != NULL) {
2228 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
2229 }
2230 else {
2231 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
2232 type->tp_name, obj);
2233 goto error;
2234 }
2235 }
2236 }
2237
2238 if (reduce_value == NULL)
2239 goto error;
2240
2241 if (PyUnicode_Check(reduce_value)) {
2242 status = save_global(self, obj, reduce_value);
2243 goto done;
2244 }
2245
2246 if (!PyTuple_Check(reduce_value)) {
2247 PyErr_SetString(PicklingError,
2248 "__reduce__ must return a string or tuple");
2249 goto error;
2250 }
2251 if (Py_SIZE(reduce_value) < 2 || Py_SIZE(reduce_value) > 5) {
2252 PyErr_SetString(PicklingError, "tuple returned by __reduce__ "
2253 "must contain 2 through 5 elements");
2254 goto error;
2255 }
2256 if (!PyTuple_Check(PyTuple_GET_ITEM(reduce_value, 1))) {
2257 PyErr_SetString(PicklingError, "second item of the tuple "
2258 "returned by __reduce__ must be a tuple");
2259 goto error;
2260 }
2261
2262 status = save_reduce(self, reduce_value, obj);
2263
2264 if (0) {
2265 error:
2266 status = -1;
2267 }
2268 done:
Alexandre Vassalottidff18342008-07-13 18:48:30 +00002269 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002270 Py_XDECREF(memo_key);
2271 Py_XDECREF(reduce_func);
2272 Py_XDECREF(reduce_value);
2273
2274 return status;
2275}
2276
2277static int
2278dump(PicklerObject *self, PyObject *obj)
2279{
2280 const char stop_op = STOP;
2281
2282 if (self->proto >= 2) {
2283 char header[2];
2284
2285 header[0] = PROTO;
2286 assert(self->proto >= 0 && self->proto < 256);
2287 header[1] = (unsigned char)self->proto;
2288 if (pickler_write(self, header, 2) < 0)
2289 return -1;
2290 }
2291
2292 if (save(self, obj, 0) < 0 ||
2293 pickler_write(self, &stop_op, 1) < 0 ||
2294 pickler_write(self, NULL, 0) < 0)
2295 return -1;
2296
2297 return 0;
2298}
2299
2300PyDoc_STRVAR(Pickler_clear_memo_doc,
2301"clear_memo() -> None. Clears the pickler's \"memo\"."
2302"\n"
2303"The memo is the data structure that remembers which objects the\n"
2304"pickler has already seen, so that shared or recursive objects are\n"
2305"pickled by reference and not by value. This method is useful when\n"
2306"re-using picklers.");
2307
2308static PyObject *
2309Pickler_clear_memo(PicklerObject *self)
2310{
2311 if (self->memo)
2312 PyDict_Clear(self->memo);
2313
2314 Py_RETURN_NONE;
2315}
2316
2317PyDoc_STRVAR(Pickler_dump_doc,
2318"dump(obj) -> None. Write a pickled representation of obj to the open file.");
2319
2320static PyObject *
2321Pickler_dump(PicklerObject *self, PyObject *args)
2322{
2323 PyObject *obj;
2324
2325 if (!PyArg_ParseTuple(args, "O:dump", &obj))
2326 return NULL;
2327
2328 if (dump(self, obj) < 0)
2329 return NULL;
2330
2331 Py_RETURN_NONE;
2332}
2333
2334static struct PyMethodDef Pickler_methods[] = {
2335 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
2336 Pickler_dump_doc},
2337 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
2338 Pickler_clear_memo_doc},
2339 {NULL, NULL} /* sentinel */
2340};
2341
2342static void
2343Pickler_dealloc(PicklerObject *self)
2344{
2345 PyObject_GC_UnTrack(self);
2346
2347 Py_XDECREF(self->write);
2348 Py_XDECREF(self->memo);
2349 Py_XDECREF(self->pers_func);
2350 Py_XDECREF(self->arg);
2351 Py_XDECREF(self->fast_memo);
2352
2353 PyMem_Free(self->write_buf);
2354
2355 Py_TYPE(self)->tp_free((PyObject *)self);
2356}
2357
2358static int
2359Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
2360{
2361 Py_VISIT(self->write);
2362 Py_VISIT(self->memo);
2363 Py_VISIT(self->pers_func);
2364 Py_VISIT(self->arg);
2365 Py_VISIT(self->fast_memo);
2366 return 0;
2367}
2368
2369static int
2370Pickler_clear(PicklerObject *self)
2371{
2372 Py_CLEAR(self->write);
2373 Py_CLEAR(self->memo);
2374 Py_CLEAR(self->pers_func);
2375 Py_CLEAR(self->arg);
2376 Py_CLEAR(self->fast_memo);
2377
2378 PyMem_Free(self->write_buf);
2379 self->write_buf = NULL;
2380
2381 return 0;
2382}
2383
2384PyDoc_STRVAR(Pickler_doc,
2385"Pickler(file, protocol=None)"
2386"\n"
2387"This takes a binary file for writing a pickle data stream.\n"
2388"\n"
2389"The optional protocol argument tells the pickler to use the\n"
2390"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
2391"protocol is 3; a backward-incompatible protocol designed for\n"
2392"Python 3.0.\n"
2393"\n"
2394"Specifying a negative protocol version selects the highest\n"
2395"protocol version supported. The higher the protocol used, the\n"
2396"more recent the version of Python needed to read the pickle\n"
2397"produced.\n"
2398"\n"
2399"The file argument must have a write() method that accepts a single\n"
2400"bytes argument. It can thus be a file object opened for binary\n"
2401"writing, a io.BytesIO instance, or any other custom object that\n"
2402"meets this interface.\n");
2403
2404static int
2405Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
2406{
2407 static char *kwlist[] = {"file", "protocol", 0};
2408 PyObject *file;
2409 PyObject *proto_obj = NULL;
2410 long proto = 0;
2411
2412 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:Pickler",
2413 kwlist, &file, &proto_obj))
2414 return -1;
2415
2416 /* In case of multiple __init__() calls, clear previous content. */
2417 if (self->write != NULL)
2418 (void)Pickler_clear(self);
2419
2420 if (proto_obj == NULL || proto_obj == Py_None)
2421 proto = DEFAULT_PROTOCOL;
2422 else
2423 proto = PyLong_AsLong(proto_obj);
2424
2425 if (proto < 0)
2426 proto = HIGHEST_PROTOCOL;
2427 if (proto > HIGHEST_PROTOCOL) {
2428 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
2429 HIGHEST_PROTOCOL);
2430 return -1;
2431 }
2432
2433 self->proto = proto;
2434 self->bin = proto > 0;
2435 self->arg = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002436 self->fast = 0;
2437 self->fast_nesting = 0;
2438 self->fast_memo = NULL;
2439
2440 if (!PyObject_HasAttrString(file, "write")) {
2441 PyErr_SetString(PyExc_TypeError,
2442 "file must have a 'write' attribute");
2443 return -1;
2444 }
2445 self->write = PyObject_GetAttrString(file, "write");
2446 if (self->write == NULL)
2447 return -1;
2448 self->buf_size = 0;
2449 self->write_buf = (char *)PyMem_Malloc(WRITE_BUF_SIZE);
2450 if (self->write_buf == NULL) {
2451 PyErr_NoMemory();
2452 return -1;
2453 }
2454 self->pers_func = NULL;
2455 if (PyObject_HasAttrString((PyObject *)self, "persistent_id")) {
2456 self->pers_func = PyObject_GetAttrString((PyObject *)self,
2457 "persistent_id");
2458 if (self->pers_func == NULL)
2459 return -1;
2460 }
2461 self->memo = PyDict_New();
2462 if (self->memo == NULL)
2463 return -1;
2464
2465 return 0;
2466}
2467
2468static PyObject *
2469Pickler_get_memo(PicklerObject *self)
2470{
2471 if (self->memo == NULL)
2472 PyErr_SetString(PyExc_AttributeError, "memo");
2473 else
2474 Py_INCREF(self->memo);
2475 return self->memo;
2476}
2477
2478static int
2479Pickler_set_memo(PicklerObject *self, PyObject *value)
2480{
2481 PyObject *tmp;
2482
2483 if (value == NULL) {
2484 PyErr_SetString(PyExc_TypeError,
2485 "attribute deletion is not supported");
2486 return -1;
2487 }
2488 if (!PyDict_Check(value)) {
2489 PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
2490 return -1;
2491 }
2492
2493 tmp = self->memo;
2494 Py_INCREF(value);
2495 self->memo = value;
2496 Py_XDECREF(tmp);
2497
2498 return 0;
2499}
2500
2501static PyObject *
2502Pickler_get_persid(PicklerObject *self)
2503{
2504 if (self->pers_func == NULL)
2505 PyErr_SetString(PyExc_AttributeError, "persistent_id");
2506 else
2507 Py_INCREF(self->pers_func);
2508 return self->pers_func;
2509}
2510
2511static int
2512Pickler_set_persid(PicklerObject *self, PyObject *value)
2513{
2514 PyObject *tmp;
2515
2516 if (value == NULL) {
2517 PyErr_SetString(PyExc_TypeError,
2518 "attribute deletion is not supported");
2519 return -1;
2520 }
2521 if (!PyCallable_Check(value)) {
2522 PyErr_SetString(PyExc_TypeError,
2523 "persistent_id must be a callable taking one argument");
2524 return -1;
2525 }
2526
2527 tmp = self->pers_func;
2528 Py_INCREF(value);
2529 self->pers_func = value;
2530 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
2531
2532 return 0;
2533}
2534
2535static PyMemberDef Pickler_members[] = {
2536 {"bin", T_INT, offsetof(PicklerObject, bin)},
2537 {"fast", T_INT, offsetof(PicklerObject, fast)},
2538 {NULL}
2539};
2540
2541static PyGetSetDef Pickler_getsets[] = {
2542 {"memo", (getter)Pickler_get_memo,
2543 (setter)Pickler_set_memo},
2544 {"persistent_id", (getter)Pickler_get_persid,
2545 (setter)Pickler_set_persid},
2546 {NULL}
2547};
2548
2549static PyTypeObject Pickler_Type = {
2550 PyVarObject_HEAD_INIT(NULL, 0)
2551 "_pickle.Pickler" , /*tp_name*/
2552 sizeof(PicklerObject), /*tp_basicsize*/
2553 0, /*tp_itemsize*/
2554 (destructor)Pickler_dealloc, /*tp_dealloc*/
2555 0, /*tp_print*/
2556 0, /*tp_getattr*/
2557 0, /*tp_setattr*/
2558 0, /*tp_compare*/
2559 0, /*tp_repr*/
2560 0, /*tp_as_number*/
2561 0, /*tp_as_sequence*/
2562 0, /*tp_as_mapping*/
2563 0, /*tp_hash*/
2564 0, /*tp_call*/
2565 0, /*tp_str*/
2566 0, /*tp_getattro*/
2567 0, /*tp_setattro*/
2568 0, /*tp_as_buffer*/
2569 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2570 Pickler_doc, /*tp_doc*/
2571 (traverseproc)Pickler_traverse, /*tp_traverse*/
2572 (inquiry)Pickler_clear, /*tp_clear*/
2573 0, /*tp_richcompare*/
2574 0, /*tp_weaklistoffset*/
2575 0, /*tp_iter*/
2576 0, /*tp_iternext*/
2577 Pickler_methods, /*tp_methods*/
2578 Pickler_members, /*tp_members*/
2579 Pickler_getsets, /*tp_getset*/
2580 0, /*tp_base*/
2581 0, /*tp_dict*/
2582 0, /*tp_descr_get*/
2583 0, /*tp_descr_set*/
2584 0, /*tp_dictoffset*/
2585 (initproc)Pickler_init, /*tp_init*/
2586 PyType_GenericAlloc, /*tp_alloc*/
2587 PyType_GenericNew, /*tp_new*/
2588 PyObject_GC_Del, /*tp_free*/
2589 0, /*tp_is_gc*/
2590};
2591
2592/* Temporary helper for calling self.find_class().
2593
2594 XXX: It would be nice to able to avoid Python function call overhead, by
2595 using directly the C version of find_class(), when find_class() is not
2596 overridden by a subclass. Although, this could become rather hackish. A
2597 simpler optimization would be to call the C function when self is not a
2598 subclass instance. */
2599static PyObject *
2600find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
2601{
2602 return PyObject_CallMethod((PyObject *)self, "find_class", "OO",
2603 module_name, global_name);
2604}
2605
2606static int
2607marker(UnpicklerObject *self)
2608{
2609 if (self->num_marks < 1) {
2610 PyErr_SetString(UnpicklingError, "could not find MARK");
2611 return -1;
2612 }
2613
2614 return self->marks[--self->num_marks];
2615}
2616
2617static int
2618load_none(UnpicklerObject *self)
2619{
2620 PDATA_APPEND(self->stack, Py_None, -1);
2621 return 0;
2622}
2623
2624static int
2625bad_readline(void)
2626{
2627 PyErr_SetString(UnpicklingError, "pickle data was truncated");
2628 return -1;
2629}
2630
2631static int
2632load_int(UnpicklerObject *self)
2633{
2634 PyObject *value;
2635 char *endptr, *s;
2636 Py_ssize_t len;
2637 long x;
2638
2639 if ((len = unpickler_readline(self, &s)) < 0)
2640 return -1;
2641 if (len < 2)
2642 return bad_readline();
2643
2644 errno = 0;
2645 /* XXX: Should the base argument of strtol() be explicitly set to 10? */
2646 x = strtol(s, &endptr, 0);
2647
2648 if (errno || (*endptr != '\n') || (endptr[1] != '\0')) {
2649 /* Hm, maybe we've got something long. Let's try reading
2650 * it as a Python long object. */
2651 errno = 0;
2652 /* XXX: Same thing about the base here. */
2653 value = PyLong_FromString(s, NULL, 0);
2654 if (value == NULL) {
2655 PyErr_SetString(PyExc_ValueError,
2656 "could not convert string to int");
2657 return -1;
2658 }
2659 }
2660 else {
2661 if (len == 3 && (x == 0 || x == 1)) {
2662 if ((value = PyBool_FromLong(x)) == NULL)
2663 return -1;
2664 }
2665 else {
2666 if ((value = PyLong_FromLong(x)) == NULL)
2667 return -1;
2668 }
2669 }
2670
2671 PDATA_PUSH(self->stack, value, -1);
2672 return 0;
2673}
2674
2675static int
2676load_bool(UnpicklerObject *self, PyObject *boolean)
2677{
2678 assert(boolean == Py_True || boolean == Py_False);
2679 PDATA_APPEND(self->stack, boolean, -1);
2680 return 0;
2681}
2682
2683/* s contains x bytes of a little-endian integer. Return its value as a
2684 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
2685 * int, but when x is 4 it's a signed one. This is an historical source
2686 * of x-platform bugs.
2687 */
2688static long
2689calc_binint(char *bytes, int size)
2690{
2691 unsigned char *s = (unsigned char *)bytes;
2692 int i = size;
2693 long x = 0;
2694
2695 for (i = 0; i < size; i++) {
2696 x |= (long)s[i] << (i * 8);
2697 }
2698
2699 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
2700 * is signed, so on a box with longs bigger than 4 bytes we need
2701 * to extend a BININT's sign bit to the full width.
2702 */
2703 if (SIZEOF_LONG > 4 && size == 4) {
2704 x |= -(x & (1L << 31));
2705 }
2706
2707 return x;
2708}
2709
2710static int
2711load_binintx(UnpicklerObject *self, char *s, int size)
2712{
2713 PyObject *value;
2714 long x;
2715
2716 x = calc_binint(s, size);
2717
2718 if ((value = PyLong_FromLong(x)) == NULL)
2719 return -1;
2720
2721 PDATA_PUSH(self->stack, value, -1);
2722 return 0;
2723}
2724
2725static int
2726load_binint(UnpicklerObject *self)
2727{
2728 char *s;
2729
2730 if (unpickler_read(self, &s, 4) < 0)
2731 return -1;
2732
2733 return load_binintx(self, s, 4);
2734}
2735
2736static int
2737load_binint1(UnpicklerObject *self)
2738{
2739 char *s;
2740
2741 if (unpickler_read(self, &s, 1) < 0)
2742 return -1;
2743
2744 return load_binintx(self, s, 1);
2745}
2746
2747static int
2748load_binint2(UnpicklerObject *self)
2749{
2750 char *s;
2751
2752 if (unpickler_read(self, &s, 2) < 0)
2753 return -1;
2754
2755 return load_binintx(self, s, 2);
2756}
2757
2758static int
2759load_long(UnpicklerObject *self)
2760{
2761 PyObject *value;
2762 char *s;
2763 Py_ssize_t len;
2764
2765 if ((len = unpickler_readline(self, &s)) < 0)
2766 return -1;
2767 if (len < 2)
2768 return bad_readline();
2769
2770 /* XXX: Should the base argument explicitly set to 10? */
2771 if ((value = PyLong_FromString(s, NULL, 0)) == NULL)
2772 return -1;
2773
2774 PDATA_PUSH(self->stack, value, -1);
2775 return 0;
2776}
2777
2778/* 'size' bytes contain the # of bytes of little-endian 256's-complement
2779 * data following.
2780 */
2781static int
2782load_counted_long(UnpicklerObject *self, int size)
2783{
2784 PyObject *value;
2785 char *nbytes;
2786 char *pdata;
2787
2788 assert(size == 1 || size == 4);
2789 if (unpickler_read(self, &nbytes, size) < 0)
2790 return -1;
2791
2792 size = calc_binint(nbytes, size);
2793 if (size < 0) {
2794 /* Corrupt or hostile pickle -- we never write one like this */
2795 PyErr_SetString(UnpicklingError,
2796 "LONG pickle has negative byte count");
2797 return -1;
2798 }
2799
2800 if (size == 0)
2801 value = PyLong_FromLong(0L);
2802 else {
2803 /* Read the raw little-endian bytes and convert. */
2804 if (unpickler_read(self, &pdata, size) < 0)
2805 return -1;
2806 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
2807 1 /* little endian */ , 1 /* signed */ );
2808 }
2809 if (value == NULL)
2810 return -1;
2811 PDATA_PUSH(self->stack, value, -1);
2812 return 0;
2813}
2814
2815static int
2816load_float(UnpicklerObject *self)
2817{
2818 PyObject *value;
2819 char *endptr, *s;
2820 Py_ssize_t len;
2821 double d;
2822
2823 if ((len = unpickler_readline(self, &s)) < 0)
2824 return -1;
2825 if (len < 2)
2826 return bad_readline();
2827
2828 errno = 0;
2829 d = PyOS_ascii_strtod(s, &endptr);
2830
2831 if (errno || (endptr[0] != '\n') || (endptr[1] != '\0')) {
2832 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
2833 return -1;
2834 }
2835
2836 if ((value = PyFloat_FromDouble(d)) == NULL)
2837 return -1;
2838
2839 PDATA_PUSH(self->stack, value, -1);
2840 return 0;
2841}
2842
2843static int
2844load_binfloat(UnpicklerObject *self)
2845{
2846 PyObject *value;
2847 double x;
2848 char *s;
2849
2850 if (unpickler_read(self, &s, 8) < 0)
2851 return -1;
2852
2853 x = _PyFloat_Unpack8((unsigned char *)s, 0);
2854 if (x == -1.0 && PyErr_Occurred())
2855 return -1;
2856
2857 if ((value = PyFloat_FromDouble(x)) == NULL)
2858 return -1;
2859
2860 PDATA_PUSH(self->stack, value, -1);
2861 return 0;
2862}
2863
2864static int
2865load_string(UnpicklerObject *self)
2866{
2867 PyObject *bytes;
2868 PyObject *str = NULL;
2869 Py_ssize_t len;
2870 char *s, *p;
2871
2872 if ((len = unpickler_readline(self, &s)) < 0)
2873 return -1;
2874 if (len < 3)
2875 return bad_readline();
2876 if ((s = strdup(s)) == NULL) {
2877 PyErr_NoMemory();
2878 return -1;
2879 }
2880
2881 /* Strip outermost quotes */
2882 while (s[len - 1] <= ' ')
2883 len--;
2884 if (s[0] == '"' && s[len - 1] == '"') {
2885 s[len - 1] = '\0';
2886 p = s + 1;
2887 len -= 2;
2888 }
2889 else if (s[0] == '\'' && s[len - 1] == '\'') {
2890 s[len - 1] = '\0';
2891 p = s + 1;
2892 len -= 2;
2893 }
2894 else {
2895 free(s);
2896 PyErr_SetString(PyExc_ValueError, "insecure string pickle");
2897 return -1;
2898 }
2899
2900 /* Use the PyBytes API to decode the string, since that is what is used
2901 to encode, and then coerce the result to Unicode. */
2902 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
2903 free(s);
2904 if (bytes == NULL)
2905 return -1;
2906 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
2907 Py_DECREF(bytes);
2908 if (str == NULL)
2909 return -1;
2910
2911 PDATA_PUSH(self->stack, str, -1);
2912 return 0;
2913}
2914
2915static int
2916load_binbytes(UnpicklerObject *self)
2917{
2918 PyObject *bytes;
2919 long x;
2920 char *s;
2921
2922 if (unpickler_read(self, &s, 4) < 0)
2923 return -1;
2924
2925 x = calc_binint(s, 4);
2926 if (x < 0) {
2927 PyErr_SetString(UnpicklingError,
2928 "BINBYTES pickle has negative byte count");
2929 return -1;
2930 }
2931
2932 if (unpickler_read(self, &s, x) < 0)
2933 return -1;
2934 bytes = PyBytes_FromStringAndSize(s, x);
2935 if (bytes == NULL)
2936 return -1;
2937
2938 PDATA_PUSH(self->stack, bytes, -1);
2939 return 0;
2940}
2941
2942static int
2943load_short_binbytes(UnpicklerObject *self)
2944{
2945 PyObject *bytes;
2946 unsigned char x;
2947 char *s;
2948
2949 if (unpickler_read(self, &s, 1) < 0)
2950 return -1;
2951
2952 x = (unsigned char)s[0];
2953
2954 if (unpickler_read(self, &s, x) < 0)
2955 return -1;
2956
2957 bytes = PyBytes_FromStringAndSize(s, x);
2958 if (bytes == NULL)
2959 return -1;
2960
2961 PDATA_PUSH(self->stack, bytes, -1);
2962 return 0;
2963}
2964
2965static int
2966load_binstring(UnpicklerObject *self)
2967{
2968 PyObject *str;
2969 long x;
2970 char *s;
2971
2972 if (unpickler_read(self, &s, 4) < 0)
2973 return -1;
2974
2975 x = calc_binint(s, 4);
2976 if (x < 0) {
2977 PyErr_SetString(UnpicklingError,
2978 "BINSTRING pickle has negative byte count");
2979 return -1;
2980 }
2981
2982 if (unpickler_read(self, &s, x) < 0)
2983 return -1;
2984
2985 /* Convert Python 2.x strings to unicode. */
2986 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
2987 if (str == NULL)
2988 return -1;
2989
2990 PDATA_PUSH(self->stack, str, -1);
2991 return 0;
2992}
2993
2994static int
2995load_short_binstring(UnpicklerObject *self)
2996{
2997 PyObject *str;
2998 unsigned char x;
2999 char *s;
3000
3001 if (unpickler_read(self, &s, 1) < 0)
3002 return -1;
3003
3004 x = (unsigned char)s[0];
3005
3006 if (unpickler_read(self, &s, x) < 0)
3007 return -1;
3008
3009 /* Convert Python 2.x strings to unicode. */
3010 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
3011 if (str == NULL)
3012 return -1;
3013
3014 PDATA_PUSH(self->stack, str, -1);
3015 return 0;
3016}
3017
3018static int
3019load_unicode(UnpicklerObject *self)
3020{
3021 PyObject *str;
3022 Py_ssize_t len;
3023 char *s;
3024
3025 if ((len = unpickler_readline(self, &s)) < 0)
3026 return -1;
3027 if (len < 1)
3028 return bad_readline();
3029
3030 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
3031 if (str == NULL)
3032 return -1;
3033
3034 PDATA_PUSH(self->stack, str, -1);
3035 return 0;
3036}
3037
3038static int
3039load_binunicode(UnpicklerObject *self)
3040{
3041 PyObject *str;
3042 long size;
3043 char *s;
3044
3045 if (unpickler_read(self, &s, 4) < 0)
3046 return -1;
3047
3048 size = calc_binint(s, 4);
3049 if (size < 0) {
3050 PyErr_SetString(UnpicklingError,
3051 "BINUNICODE pickle has negative byte count");
3052 return -1;
3053 }
3054
3055 if (unpickler_read(self, &s, size) < 0)
3056 return -1;
3057
3058 str = PyUnicode_DecodeUTF8(s, size, NULL);
3059 if (str == NULL)
3060 return -1;
3061
3062 PDATA_PUSH(self->stack, str, -1);
3063 return 0;
3064}
3065
3066static int
3067load_tuple(UnpicklerObject *self)
3068{
3069 PyObject *tuple;
3070 int i;
3071
3072 if ((i = marker(self)) < 0)
3073 return -1;
3074
3075 tuple = Pdata_poptuple(self->stack, i);
3076 if (tuple == NULL)
3077 return -1;
3078 PDATA_PUSH(self->stack, tuple, -1);
3079 return 0;
3080}
3081
3082static int
3083load_counted_tuple(UnpicklerObject *self, int len)
3084{
3085 PyObject *tuple;
3086
3087 tuple = PyTuple_New(len);
3088 if (tuple == NULL)
3089 return -1;
3090
3091 while (--len >= 0) {
3092 PyObject *item;
3093
3094 PDATA_POP(self->stack, item);
3095 if (item == NULL)
3096 return -1;
3097 PyTuple_SET_ITEM(tuple, len, item);
3098 }
3099 PDATA_PUSH(self->stack, tuple, -1);
3100 return 0;
3101}
3102
3103static int
3104load_empty_list(UnpicklerObject *self)
3105{
3106 PyObject *list;
3107
3108 if ((list = PyList_New(0)) == NULL)
3109 return -1;
3110 PDATA_PUSH(self->stack, list, -1);
3111 return 0;
3112}
3113
3114static int
3115load_empty_dict(UnpicklerObject *self)
3116{
3117 PyObject *dict;
3118
3119 if ((dict = PyDict_New()) == NULL)
3120 return -1;
3121 PDATA_PUSH(self->stack, dict, -1);
3122 return 0;
3123}
3124
3125static int
3126load_list(UnpicklerObject *self)
3127{
3128 PyObject *list;
3129 int i;
3130
3131 if ((i = marker(self)) < 0)
3132 return -1;
3133
3134 list = Pdata_poplist(self->stack, i);
3135 if (list == NULL)
3136 return -1;
3137 PDATA_PUSH(self->stack, list, -1);
3138 return 0;
3139}
3140
3141static int
3142load_dict(UnpicklerObject *self)
3143{
3144 PyObject *dict, *key, *value;
3145 int i, j, k;
3146
3147 if ((i = marker(self)) < 0)
3148 return -1;
3149 j = self->stack->length;
3150
3151 if ((dict = PyDict_New()) == NULL)
3152 return -1;
3153
3154 for (k = i + 1; k < j; k += 2) {
3155 key = self->stack->data[k - 1];
3156 value = self->stack->data[k];
3157 if (PyDict_SetItem(dict, key, value) < 0) {
3158 Py_DECREF(dict);
3159 return -1;
3160 }
3161 }
3162 Pdata_clear(self->stack, i);
3163 PDATA_PUSH(self->stack, dict, -1);
3164 return 0;
3165}
3166
3167static PyObject *
3168instantiate(PyObject *cls, PyObject *args)
3169{
3170 PyObject *r = NULL;
3171
3172 /* XXX: The pickle.py module does not create instances this way when the
3173 args tuple is empty. See Unpickler._instantiate(). */
3174 if ((r = PyObject_CallObject(cls, args)))
3175 return r;
3176
3177 /* XXX: Is this still nescessary? */
3178 {
3179 PyObject *tp, *v, *tb, *tmp_value;
3180
3181 PyErr_Fetch(&tp, &v, &tb);
3182 tmp_value = v;
3183 /* NULL occurs when there was a KeyboardInterrupt */
3184 if (tmp_value == NULL)
3185 tmp_value = Py_None;
3186 if ((r = PyTuple_Pack(3, tmp_value, cls, args))) {
3187 Py_XDECREF(v);
3188 v = r;
3189 }
3190 PyErr_Restore(tp, v, tb);
3191 }
3192 return NULL;
3193}
3194
3195static int
3196load_obj(UnpicklerObject *self)
3197{
3198 PyObject *cls, *args, *obj = NULL;
3199 int i;
3200
3201 if ((i = marker(self)) < 0)
3202 return -1;
3203
3204 args = Pdata_poptuple(self->stack, i + 1);
3205 if (args == NULL)
3206 return -1;
3207
3208 PDATA_POP(self->stack, cls);
3209 if (cls) {
3210 obj = instantiate(cls, args);
3211 Py_DECREF(cls);
3212 }
3213 Py_DECREF(args);
3214 if (obj == NULL)
3215 return -1;
3216
3217 PDATA_PUSH(self->stack, obj, -1);
3218 return 0;
3219}
3220
3221static int
3222load_inst(UnpicklerObject *self)
3223{
3224 PyObject *cls = NULL;
3225 PyObject *args = NULL;
3226 PyObject *obj = NULL;
3227 PyObject *module_name;
3228 PyObject *class_name;
3229 Py_ssize_t len;
3230 int i;
3231 char *s;
3232
3233 if ((i = marker(self)) < 0)
3234 return -1;
3235 if ((len = unpickler_readline(self, &s)) < 0)
3236 return -1;
3237 if (len < 2)
3238 return bad_readline();
3239
3240 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
3241 identifiers are permitted in Python 3.0, since the INST opcode is only
3242 supported by older protocols on Python 2.x. */
3243 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
3244 if (module_name == NULL)
3245 return -1;
3246
3247 if ((len = unpickler_readline(self, &s)) >= 0) {
3248 if (len < 2)
3249 return bad_readline();
3250 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
3251 if (class_name == NULL) {
3252 cls = find_class(self, module_name, class_name);
3253 Py_DECREF(class_name);
3254 }
3255 }
3256 Py_DECREF(module_name);
3257
3258 if (cls == NULL)
3259 return -1;
3260
3261 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
3262 obj = instantiate(cls, args);
3263 Py_DECREF(args);
3264 }
3265 Py_DECREF(cls);
3266
3267 if (obj == NULL)
3268 return -1;
3269
3270 PDATA_PUSH(self->stack, obj, -1);
3271 return 0;
3272}
3273
3274static int
3275load_newobj(UnpicklerObject *self)
3276{
3277 PyObject *args = NULL;
3278 PyObject *clsraw = NULL;
3279 PyTypeObject *cls; /* clsraw cast to its true type */
3280 PyObject *obj;
3281
3282 /* Stack is ... cls argtuple, and we want to call
3283 * cls.__new__(cls, *argtuple).
3284 */
3285 PDATA_POP(self->stack, args);
3286 if (args == NULL)
3287 goto error;
3288 if (!PyTuple_Check(args)) {
3289 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
3290 goto error;
3291 }
3292
3293 PDATA_POP(self->stack, clsraw);
3294 cls = (PyTypeObject *)clsraw;
3295 if (cls == NULL)
3296 goto error;
3297 if (!PyType_Check(cls)) {
3298 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
3299 "isn't a type object");
3300 goto error;
3301 }
3302 if (cls->tp_new == NULL) {
3303 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
3304 "has NULL tp_new");
3305 goto error;
3306 }
3307
3308 /* Call __new__. */
3309 obj = cls->tp_new(cls, args, NULL);
3310 if (obj == NULL)
3311 goto error;
3312
3313 Py_DECREF(args);
3314 Py_DECREF(clsraw);
3315 PDATA_PUSH(self->stack, obj, -1);
3316 return 0;
3317
3318 error:
3319 Py_XDECREF(args);
3320 Py_XDECREF(clsraw);
3321 return -1;
3322}
3323
3324static int
3325load_global(UnpicklerObject *self)
3326{
3327 PyObject *global = NULL;
3328 PyObject *module_name;
3329 PyObject *global_name;
3330 Py_ssize_t len;
3331 char *s;
3332
3333 if ((len = unpickler_readline(self, &s)) < 0)
3334 return -1;
3335 if (len < 2)
3336 return bad_readline();
3337 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
3338 if (!module_name)
3339 return -1;
3340
3341 if ((len = unpickler_readline(self, &s)) >= 0) {
3342 if (len < 2) {
3343 Py_DECREF(module_name);
3344 return bad_readline();
3345 }
3346 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
3347 if (global_name) {
3348 global = find_class(self, module_name, global_name);
3349 Py_DECREF(global_name);
3350 }
3351 }
3352 Py_DECREF(module_name);
3353
3354 if (global == NULL)
3355 return -1;
3356 PDATA_PUSH(self->stack, global, -1);
3357 return 0;
3358}
3359
3360static int
3361load_persid(UnpicklerObject *self)
3362{
3363 PyObject *pid;
3364 Py_ssize_t len;
3365 char *s;
3366
3367 if (self->pers_func) {
3368 if ((len = unpickler_readline(self, &s)) < 0)
3369 return -1;
3370 if (len < 2)
3371 return bad_readline();
3372
3373 pid = PyBytes_FromStringAndSize(s, len - 1);
3374 if (pid == NULL)
3375 return -1;
3376
3377 /* Ugh... this does not leak since unpickler_call() steals the
3378 reference to pid first. */
3379 pid = unpickler_call(self, self->pers_func, pid);
3380 if (pid == NULL)
3381 return -1;
3382
3383 PDATA_PUSH(self->stack, pid, -1);
3384 return 0;
3385 }
3386 else {
3387 PyErr_SetString(UnpicklingError,
3388 "A load persistent id instruction was encountered,\n"
3389 "but no persistent_load function was specified.");
3390 return -1;
3391 }
3392}
3393
3394static int
3395load_binpersid(UnpicklerObject *self)
3396{
3397 PyObject *pid;
3398
3399 if (self->pers_func) {
3400 PDATA_POP(self->stack, pid);
3401 if (pid == NULL)
3402 return -1;
3403
3404 /* Ugh... this does not leak since unpickler_call() steals the
3405 reference to pid first. */
3406 pid = unpickler_call(self, self->pers_func, pid);
3407 if (pid == NULL)
3408 return -1;
3409
3410 PDATA_PUSH(self->stack, pid, -1);
3411 return 0;
3412 }
3413 else {
3414 PyErr_SetString(UnpicklingError,
3415 "A load persistent id instruction was encountered,\n"
3416 "but no persistent_load function was specified.");
3417 return -1;
3418 }
3419}
3420
3421static int
3422load_pop(UnpicklerObject *self)
3423{
3424 int len;
3425
3426 if ((len = self->stack->length) <= 0)
3427 return stack_underflow();
3428
3429 /* Note that we split the (pickle.py) stack into two stacks,
3430 * an object stack and a mark stack. We have to be clever and
3431 * pop the right one. We do this by looking at the top of the
3432 * mark stack.
3433 */
3434
3435 if ((self->num_marks > 0) && (self->marks[self->num_marks - 1] == len))
3436 self->num_marks--;
3437 else {
3438 len--;
3439 Py_DECREF(self->stack->data[len]);
3440 self->stack->length = len;
3441 }
3442
3443 return 0;
3444}
3445
3446static int
3447load_pop_mark(UnpicklerObject *self)
3448{
3449 int i;
3450
3451 if ((i = marker(self)) < 0)
3452 return -1;
3453
3454 Pdata_clear(self->stack, i);
3455
3456 return 0;
3457}
3458
3459static int
3460load_dup(UnpicklerObject *self)
3461{
3462 PyObject *last;
3463 int len;
3464
3465 if ((len = self->stack->length) <= 0)
3466 return stack_underflow();
3467 last = self->stack->data[len - 1];
3468 PDATA_APPEND(self->stack, last, -1);
3469 return 0;
3470}
3471
3472static int
3473load_get(UnpicklerObject *self)
3474{
3475 PyObject *key, *value;
3476 Py_ssize_t len;
3477 char *s;
3478
3479 if ((len = unpickler_readline(self, &s)) < 0)
3480 return -1;
3481 if (len < 2)
3482 return bad_readline();
3483
3484 key = PyLong_FromString(s, NULL, 10);
3485 if (key == NULL)
3486 return -1;
3487
3488 value = PyDict_GetItemWithError(self->memo, key);
3489 if (value == NULL) {
3490 if (!PyErr_Occurred())
3491 PyErr_SetObject(PyExc_KeyError, key);
3492 Py_DECREF(key);
3493 return -1;
3494 }
3495 Py_DECREF(key);
3496
3497 PDATA_APPEND(self->stack, value, -1);
3498 return 0;
3499}
3500
3501static int
3502load_binget(UnpicklerObject *self)
3503{
3504 PyObject *key, *value;
3505 char *s;
3506
3507 if (unpickler_read(self, &s, 1) < 0)
3508 return -1;
3509
3510 /* Here, the unsigned cast is necessary to avoid negative values. */
3511 key = PyLong_FromLong((long)(unsigned char)s[0]);
3512 if (key == NULL)
3513 return -1;
3514
3515 value = PyDict_GetItemWithError(self->memo, key);
3516 if (value == NULL) {
3517 if (!PyErr_Occurred())
3518 PyErr_SetObject(PyExc_KeyError, key);
3519 Py_DECREF(key);
3520 return -1;
3521 }
3522 Py_DECREF(key);
3523
3524 PDATA_APPEND(self->stack, value, -1);
3525 return 0;
3526}
3527
3528static int
3529load_long_binget(UnpicklerObject *self)
3530{
3531 PyObject *key, *value;
3532 char *s;
3533 long k;
3534
3535 if (unpickler_read(self, &s, 4) < 0)
3536 return -1;
3537
3538 k = (long)(unsigned char)s[0];
3539 k |= (long)(unsigned char)s[1] << 8;
3540 k |= (long)(unsigned char)s[2] << 16;
3541 k |= (long)(unsigned char)s[3] << 24;
3542
3543 key = PyLong_FromLong(k);
3544 if (key == NULL)
3545 return -1;
3546
3547 value = PyDict_GetItemWithError(self->memo, key);
3548 if (value == NULL) {
3549 if (!PyErr_Occurred())
3550 PyErr_SetObject(PyExc_KeyError, key);
3551 Py_DECREF(key);
3552 return -1;
3553 }
3554 Py_DECREF(key);
3555
3556 PDATA_APPEND(self->stack, value, -1);
3557 return 0;
3558}
3559
3560/* Push an object from the extension registry (EXT[124]). nbytes is
3561 * the number of bytes following the opcode, holding the index (code) value.
3562 */
3563static int
3564load_extension(UnpicklerObject *self, int nbytes)
3565{
3566 char *codebytes; /* the nbytes bytes after the opcode */
3567 long code; /* calc_binint returns long */
3568 PyObject *py_code; /* code as a Python int */
3569 PyObject *obj; /* the object to push */
3570 PyObject *pair; /* (module_name, class_name) */
3571 PyObject *module_name, *class_name;
3572
3573 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
3574 if (unpickler_read(self, &codebytes, nbytes) < 0)
3575 return -1;
3576 code = calc_binint(codebytes, nbytes);
3577 if (code <= 0) { /* note that 0 is forbidden */
3578 /* Corrupt or hostile pickle. */
3579 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
3580 return -1;
3581 }
3582
3583 /* Look for the code in the cache. */
3584 py_code = PyLong_FromLong(code);
3585 if (py_code == NULL)
3586 return -1;
3587 obj = PyDict_GetItem(extension_cache, py_code);
3588 if (obj != NULL) {
3589 /* Bingo. */
3590 Py_DECREF(py_code);
3591 PDATA_APPEND(self->stack, obj, -1);
3592 return 0;
3593 }
3594
3595 /* Look up the (module_name, class_name) pair. */
3596 pair = PyDict_GetItem(inverted_registry, py_code);
3597 if (pair == NULL) {
3598 Py_DECREF(py_code);
3599 PyErr_Format(PyExc_ValueError, "unregistered extension "
3600 "code %ld", code);
3601 return -1;
3602 }
3603 /* Since the extension registry is manipulable via Python code,
3604 * confirm that pair is really a 2-tuple of strings.
3605 */
3606 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
3607 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
3608 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
3609 Py_DECREF(py_code);
3610 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
3611 "isn't a 2-tuple of strings", code);
3612 return -1;
3613 }
3614 /* Load the object. */
3615 obj = find_class(self, module_name, class_name);
3616 if (obj == NULL) {
3617 Py_DECREF(py_code);
3618 return -1;
3619 }
3620 /* Cache code -> obj. */
3621 code = PyDict_SetItem(extension_cache, py_code, obj);
3622 Py_DECREF(py_code);
3623 if (code < 0) {
3624 Py_DECREF(obj);
3625 return -1;
3626 }
3627 PDATA_PUSH(self->stack, obj, -1);
3628 return 0;
3629}
3630
3631static int
3632load_put(UnpicklerObject *self)
3633{
3634 PyObject *key, *value;
3635 Py_ssize_t len;
3636 char *s;
3637 int x;
3638
3639 if ((len = unpickler_readline(self, &s)) < 0)
3640 return -1;
3641 if (len < 2)
3642 return bad_readline();
3643 if ((x = self->stack->length) <= 0)
3644 return stack_underflow();
3645
3646 key = PyLong_FromString(s, NULL, 10);
3647 if (key == NULL)
3648 return -1;
3649 value = self->stack->data[x - 1];
3650
3651 x = PyDict_SetItem(self->memo, key, value);
3652 Py_DECREF(key);
3653 return x;
3654}
3655
3656static int
3657load_binput(UnpicklerObject *self)
3658{
3659 PyObject *key, *value;
3660 char *s;
3661 int x;
3662
3663 if (unpickler_read(self, &s, 1) < 0)
3664 return -1;
3665 if ((x = self->stack->length) <= 0)
3666 return stack_underflow();
3667
3668 key = PyLong_FromLong((long)(unsigned char)s[0]);
3669 if (key == NULL)
3670 return -1;
3671 value = self->stack->data[x - 1];
3672
3673 x = PyDict_SetItem(self->memo, key, value);
3674 Py_DECREF(key);
3675 return x;
3676}
3677
3678static int
3679load_long_binput(UnpicklerObject *self)
3680{
3681 PyObject *key, *value;
3682 long k;
3683 char *s;
3684 int x;
3685
3686 if (unpickler_read(self, &s, 4) < 0)
3687 return -1;
3688 if ((x = self->stack->length) <= 0)
3689 return stack_underflow();
3690
3691 k = (long)(unsigned char)s[0];
3692 k |= (long)(unsigned char)s[1] << 8;
3693 k |= (long)(unsigned char)s[2] << 16;
3694 k |= (long)(unsigned char)s[3] << 24;
3695
3696 key = PyLong_FromLong(k);
3697 if (key == NULL)
3698 return -1;
3699 value = self->stack->data[x - 1];
3700
3701 x = PyDict_SetItem(self->memo, key, value);
3702 Py_DECREF(key);
3703 return x;
3704}
3705
3706static int
3707do_append(UnpicklerObject *self, int x)
3708{
3709 PyObject *value;
3710 PyObject *list;
3711 int len, i;
3712
3713 len = self->stack->length;
3714 if (x > len || x <= 0)
3715 return stack_underflow();
3716 if (len == x) /* nothing to do */
3717 return 0;
3718
3719 list = self->stack->data[x - 1];
3720
3721 if (PyList_Check(list)) {
3722 PyObject *slice;
3723 Py_ssize_t list_len;
3724
3725 slice = Pdata_poplist(self->stack, x);
3726 if (!slice)
3727 return -1;
3728 list_len = PyList_GET_SIZE(list);
3729 i = PyList_SetSlice(list, list_len, list_len, slice);
3730 Py_DECREF(slice);
3731 return i;
3732 }
3733 else {
3734 PyObject *append_func;
3735
3736 append_func = PyObject_GetAttrString(list, "append");
3737 if (append_func == NULL)
3738 return -1;
3739 for (i = x; i < len; i++) {
3740 PyObject *result;
3741
3742 value = self->stack->data[i];
3743 result = unpickler_call(self, append_func, value);
3744 if (result == NULL) {
3745 Pdata_clear(self->stack, i + 1);
3746 self->stack->length = x;
3747 return -1;
3748 }
3749 Py_DECREF(result);
3750 }
3751 self->stack->length = x;
3752 }
3753
3754 return 0;
3755}
3756
3757static int
3758load_append(UnpicklerObject *self)
3759{
3760 return do_append(self, self->stack->length - 1);
3761}
3762
3763static int
3764load_appends(UnpicklerObject *self)
3765{
3766 return do_append(self, marker(self));
3767}
3768
3769static int
3770do_setitems(UnpicklerObject *self, int x)
3771{
3772 PyObject *value, *key;
3773 PyObject *dict;
3774 int len, i;
3775 int status = 0;
3776
3777 len = self->stack->length;
3778 if (x > len || x <= 0)
3779 return stack_underflow();
3780 if (len == x) /* nothing to do */
3781 return 0;
3782 if ((len - x) % 2 != 0) {
3783 /* Currupt or hostile pickle -- we never write one like this. */
3784 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
3785 return -1;
3786 }
3787
3788 /* Here, dict does not actually need to be a PyDict; it could be anything
3789 that supports the __setitem__ attribute. */
3790 dict = self->stack->data[x - 1];
3791
3792 for (i = x + 1; i < len; i += 2) {
3793 key = self->stack->data[i - 1];
3794 value = self->stack->data[i];
3795 if (PyObject_SetItem(dict, key, value) < 0) {
3796 status = -1;
3797 break;
3798 }
3799 }
3800
3801 Pdata_clear(self->stack, x);
3802 return status;
3803}
3804
3805static int
3806load_setitem(UnpicklerObject *self)
3807{
3808 return do_setitems(self, self->stack->length - 2);
3809}
3810
3811static int
3812load_setitems(UnpicklerObject *self)
3813{
3814 return do_setitems(self, marker(self));
3815}
3816
3817static int
3818load_build(UnpicklerObject *self)
3819{
3820 PyObject *state, *inst, *slotstate;
3821 PyObject *setstate;
3822 int status = 0;
3823
3824 /* Stack is ... instance, state. We want to leave instance at
3825 * the stack top, possibly mutated via instance.__setstate__(state).
3826 */
3827 if (self->stack->length < 2)
3828 return stack_underflow();
3829
3830 PDATA_POP(self->stack, state);
3831 if (state == NULL)
3832 return -1;
3833
3834 inst = self->stack->data[self->stack->length - 1];
3835
3836 setstate = PyObject_GetAttrString(inst, "__setstate__");
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00003837 if (setstate == NULL) {
3838 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3839 PyErr_Clear();
3840 else
3841 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003842 }
3843 else {
3844 PyObject *result;
3845
3846 /* The explicit __setstate__ is responsible for everything. */
3847 result = unpickler_call(self, setstate, state);
3848 Py_DECREF(setstate);
3849 if (result == NULL)
3850 return -1;
3851 Py_DECREF(result);
3852 return 0;
3853 }
3854
3855 /* A default __setstate__. First see whether state embeds a
3856 * slot state dict too (a proto 2 addition).
3857 */
3858 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
3859 PyObject *tmp = state;
3860
3861 state = PyTuple_GET_ITEM(tmp, 0);
3862 slotstate = PyTuple_GET_ITEM(tmp, 1);
3863 Py_INCREF(state);
3864 Py_INCREF(slotstate);
3865 Py_DECREF(tmp);
3866 }
3867 else
3868 slotstate = NULL;
3869
3870 /* Set inst.__dict__ from the state dict (if any). */
3871 if (state != Py_None) {
3872 PyObject *dict;
3873
3874 if (!PyDict_Check(state)) {
3875 PyErr_SetString(UnpicklingError, "state is not a dictionary");
3876 goto error;
3877 }
3878 dict = PyObject_GetAttrString(inst, "__dict__");
3879 if (dict == NULL)
3880 goto error;
3881
3882 PyDict_Update(dict, state);
3883 Py_DECREF(dict);
3884 }
3885
3886 /* Also set instance attributes from the slotstate dict (if any). */
3887 if (slotstate != NULL) {
3888 PyObject *d_key, *d_value;
3889 Py_ssize_t i;
3890
3891 if (!PyDict_Check(slotstate)) {
3892 PyErr_SetString(UnpicklingError,
3893 "slot state is not a dictionary");
3894 goto error;
3895 }
3896 i = 0;
3897 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
3898 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
3899 goto error;
3900 }
3901 }
3902
3903 if (0) {
3904 error:
3905 status = -1;
3906 }
3907
3908 Py_DECREF(state);
3909 Py_XDECREF(slotstate);
3910 return status;
3911}
3912
3913static int
3914load_mark(UnpicklerObject *self)
3915{
3916
3917 /* Note that we split the (pickle.py) stack into two stacks, an
3918 * object stack and a mark stack. Here we push a mark onto the
3919 * mark stack.
3920 */
3921
3922 if ((self->num_marks + 1) >= self->marks_size) {
3923 size_t alloc;
3924 int *marks;
3925
3926 /* Use the size_t type to check for overflow. */
3927 alloc = ((size_t)self->num_marks << 1) + 20;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00003928 if (alloc > PY_SSIZE_T_MAX ||
3929 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003930 PyErr_NoMemory();
3931 return -1;
3932 }
3933
3934 if (self->marks == NULL)
3935 marks = (int *)PyMem_Malloc(alloc * sizeof(int));
3936 else
3937 marks = (int *)PyMem_Realloc(self->marks, alloc * sizeof(int));
3938 if (marks == NULL) {
3939 PyErr_NoMemory();
3940 return -1;
3941 }
3942 self->marks = marks;
3943 self->marks_size = (Py_ssize_t)alloc;
3944 }
3945
3946 self->marks[self->num_marks++] = self->stack->length;
3947
3948 return 0;
3949}
3950
3951static int
3952load_reduce(UnpicklerObject *self)
3953{
3954 PyObject *callable = NULL;
3955 PyObject *argtup = NULL;
3956 PyObject *obj = NULL;
3957
3958 PDATA_POP(self->stack, argtup);
3959 if (argtup == NULL)
3960 return -1;
3961 PDATA_POP(self->stack, callable);
3962 if (callable) {
3963 obj = instantiate(callable, argtup);
3964 Py_DECREF(callable);
3965 }
3966 Py_DECREF(argtup);
3967
3968 if (obj == NULL)
3969 return -1;
3970
3971 PDATA_PUSH(self->stack, obj, -1);
3972 return 0;
3973}
3974
3975/* Just raises an error if we don't know the protocol specified. PROTO
3976 * is the first opcode for protocols >= 2.
3977 */
3978static int
3979load_proto(UnpicklerObject *self)
3980{
3981 char *s;
3982 int i;
3983
3984 if (unpickler_read(self, &s, 1) < 0)
3985 return -1;
3986
3987 i = (unsigned char)s[0];
3988 if (i <= HIGHEST_PROTOCOL)
3989 return 0;
3990
3991 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
3992 return -1;
3993}
3994
3995static PyObject *
3996load(UnpicklerObject *self)
3997{
3998 PyObject *err;
3999 PyObject *value = NULL;
4000 char *s;
4001
4002 self->num_marks = 0;
4003 if (self->stack->length)
4004 Pdata_clear(self->stack, 0);
4005
4006 /* Convenient macros for the dispatch while-switch loop just below. */
4007#define OP(opcode, load_func) \
4008 case opcode: if (load_func(self) < 0) break; continue;
4009
4010#define OP_ARG(opcode, load_func, arg) \
4011 case opcode: if (load_func(self, (arg)) < 0) break; continue;
4012
4013 while (1) {
4014 if (unpickler_read(self, &s, 1) < 0)
4015 break;
4016
4017 switch ((enum opcode)s[0]) {
4018 OP(NONE, load_none)
4019 OP(BININT, load_binint)
4020 OP(BININT1, load_binint1)
4021 OP(BININT2, load_binint2)
4022 OP(INT, load_int)
4023 OP(LONG, load_long)
4024 OP_ARG(LONG1, load_counted_long, 1)
4025 OP_ARG(LONG4, load_counted_long, 4)
4026 OP(FLOAT, load_float)
4027 OP(BINFLOAT, load_binfloat)
4028 OP(BINBYTES, load_binbytes)
4029 OP(SHORT_BINBYTES, load_short_binbytes)
4030 OP(BINSTRING, load_binstring)
4031 OP(SHORT_BINSTRING, load_short_binstring)
4032 OP(STRING, load_string)
4033 OP(UNICODE, load_unicode)
4034 OP(BINUNICODE, load_binunicode)
4035 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
4036 OP_ARG(TUPLE1, load_counted_tuple, 1)
4037 OP_ARG(TUPLE2, load_counted_tuple, 2)
4038 OP_ARG(TUPLE3, load_counted_tuple, 3)
4039 OP(TUPLE, load_tuple)
4040 OP(EMPTY_LIST, load_empty_list)
4041 OP(LIST, load_list)
4042 OP(EMPTY_DICT, load_empty_dict)
4043 OP(DICT, load_dict)
4044 OP(OBJ, load_obj)
4045 OP(INST, load_inst)
4046 OP(NEWOBJ, load_newobj)
4047 OP(GLOBAL, load_global)
4048 OP(APPEND, load_append)
4049 OP(APPENDS, load_appends)
4050 OP(BUILD, load_build)
4051 OP(DUP, load_dup)
4052 OP(BINGET, load_binget)
4053 OP(LONG_BINGET, load_long_binget)
4054 OP(GET, load_get)
4055 OP(MARK, load_mark)
4056 OP(BINPUT, load_binput)
4057 OP(LONG_BINPUT, load_long_binput)
4058 OP(PUT, load_put)
4059 OP(POP, load_pop)
4060 OP(POP_MARK, load_pop_mark)
4061 OP(SETITEM, load_setitem)
4062 OP(SETITEMS, load_setitems)
4063 OP(PERSID, load_persid)
4064 OP(BINPERSID, load_binpersid)
4065 OP(REDUCE, load_reduce)
4066 OP(PROTO, load_proto)
4067 OP_ARG(EXT1, load_extension, 1)
4068 OP_ARG(EXT2, load_extension, 2)
4069 OP_ARG(EXT4, load_extension, 4)
4070 OP_ARG(NEWTRUE, load_bool, Py_True)
4071 OP_ARG(NEWFALSE, load_bool, Py_False)
4072
4073 case STOP:
4074 break;
4075
4076 case '\0':
4077 PyErr_SetNone(PyExc_EOFError);
4078 return NULL;
4079
4080 default:
4081 PyErr_Format(UnpicklingError,
4082 "invalid load key, '%c'.", s[0]);
4083 return NULL;
4084 }
4085
4086 break; /* and we are done! */
4087 }
4088
4089 /* XXX: It is not clear what this is actually for. */
4090 if ((err = PyErr_Occurred())) {
4091 if (err == PyExc_EOFError) {
4092 PyErr_SetNone(PyExc_EOFError);
4093 }
4094 return NULL;
4095 }
4096
4097 PDATA_POP(self->stack, value);
4098 return value;
4099}
4100
4101PyDoc_STRVAR(Unpickler_load_doc,
4102"load() -> object. Load a pickle."
4103"\n"
4104"Read a pickled object representation from the open file object given in\n"
4105"the constructor, and return the reconstituted object hierarchy specified\n"
4106"therein.\n");
4107
4108static PyObject *
4109Unpickler_load(UnpicklerObject *self)
4110{
4111 /* Check whether the Unpickler was initialized correctly. This prevents
4112 segfaulting if a subclass overridden __init__ with a function that does
4113 not call Unpickler.__init__(). Here, we simply ensure that self->read
4114 is not NULL. */
4115 if (self->read == NULL) {
4116 PyErr_Format(UnpicklingError,
4117 "Unpickler.__init__() was not called by %s.__init__()",
4118 Py_TYPE(self)->tp_name);
4119 return NULL;
4120 }
4121
4122 return load(self);
4123}
4124
4125/* The name of find_class() is misleading. In newer pickle protocols, this
4126 function is used for loading any global (i.e., functions), not just
4127 classes. The name is kept only for backward compatibility. */
4128
4129PyDoc_STRVAR(Unpickler_find_class_doc,
4130"find_class(module_name, global_name) -> object.\n"
4131"\n"
4132"Return an object from a specified module, importing the module if\n"
4133"necessary. Subclasses may override this method (e.g. to restrict\n"
4134"unpickling of arbitrary classes and functions).\n"
4135"\n"
4136"This method is called whenever a class or a function object is\n"
4137"needed. Both arguments passed are str objects.\n");
4138
4139static PyObject *
4140Unpickler_find_class(UnpicklerObject *self, PyObject *args)
4141{
4142 PyObject *global;
4143 PyObject *modules_dict;
4144 PyObject *module;
4145 PyObject *module_name, *global_name;
4146
4147 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
4148 &module_name, &global_name))
4149 return NULL;
4150
4151 modules_dict = PySys_GetObject("modules");
4152 if (modules_dict == NULL)
4153 return NULL;
4154
4155 module = PyDict_GetItem(modules_dict, module_name);
4156 if (module == NULL) {
4157 module = PyImport_Import(module_name);
4158 if (module == NULL)
4159 return NULL;
4160 global = PyObject_GetAttr(module, global_name);
4161 Py_DECREF(module);
4162 }
4163 else {
4164 global = PyObject_GetAttr(module, global_name);
4165 }
4166 return global;
4167}
4168
4169static struct PyMethodDef Unpickler_methods[] = {
4170 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
4171 Unpickler_load_doc},
4172 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
4173 Unpickler_find_class_doc},
4174 {NULL, NULL} /* sentinel */
4175};
4176
4177static void
4178Unpickler_dealloc(UnpicklerObject *self)
4179{
4180 PyObject_GC_UnTrack((PyObject *)self);
4181 Py_XDECREF(self->readline);
4182 Py_XDECREF(self->read);
4183 Py_XDECREF(self->memo);
4184 Py_XDECREF(self->stack);
4185 Py_XDECREF(self->pers_func);
4186 Py_XDECREF(self->arg);
4187 Py_XDECREF(self->last_string);
4188
4189 PyMem_Free(self->marks);
4190 free(self->encoding);
4191 free(self->errors);
4192
4193 Py_TYPE(self)->tp_free((PyObject *)self);
4194}
4195
4196static int
4197Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
4198{
4199 Py_VISIT(self->readline);
4200 Py_VISIT(self->read);
4201 Py_VISIT(self->memo);
4202 Py_VISIT(self->stack);
4203 Py_VISIT(self->pers_func);
4204 Py_VISIT(self->arg);
4205 Py_VISIT(self->last_string);
4206 return 0;
4207}
4208
4209static int
4210Unpickler_clear(UnpicklerObject *self)
4211{
4212 Py_CLEAR(self->readline);
4213 Py_CLEAR(self->read);
4214 Py_CLEAR(self->memo);
4215 Py_CLEAR(self->stack);
4216 Py_CLEAR(self->pers_func);
4217 Py_CLEAR(self->arg);
4218 Py_CLEAR(self->last_string);
4219
4220 PyMem_Free(self->marks);
4221 self->marks = NULL;
4222 free(self->encoding);
4223 self->encoding = NULL;
4224 free(self->errors);
4225 self->errors = NULL;
4226
4227 return 0;
4228}
4229
4230PyDoc_STRVAR(Unpickler_doc,
4231"Unpickler(file, *, encoding='ASCII', errors='strict')"
4232"\n"
4233"This takes a binary file for reading a pickle data stream.\n"
4234"\n"
4235"The protocol version of the pickle is detected automatically, so no\n"
4236"proto argument is needed.\n"
4237"\n"
4238"The file-like object must have two methods, a read() method\n"
4239"that takes an integer argument, and a readline() method that\n"
4240"requires no arguments. Both methods should return bytes.\n"
4241"Thus file-like object can be a binary file object opened for\n"
4242"reading, a BytesIO object, or any other custom object that\n"
4243"meets this interface.\n"
4244"\n"
4245"Optional keyword arguments are encoding and errors, which are\n"
4246"used to decode 8-bit string instances pickled by Python 2.x.\n"
4247"These default to 'ASCII' and 'strict', respectively.\n");
4248
4249static int
4250Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
4251{
4252 static char *kwlist[] = {"file", "encoding", "errors", 0};
4253 PyObject *file;
4254 char *encoding = NULL;
4255 char *errors = NULL;
4256
4257 /* XXX: That is an horrible error message. But, I don't know how to do
4258 better... */
4259 if (Py_SIZE(args) != 1) {
4260 PyErr_Format(PyExc_TypeError,
4261 "%s takes exactly one positional argument (%zd given)",
4262 Py_TYPE(self)->tp_name, Py_SIZE(args));
4263 return -1;
4264 }
4265
4266 /* Arguments parsing needs to be done in the __init__() method to allow
4267 subclasses to define their own __init__() method, which may (or may
4268 not) support Unpickler arguments. However, this means we need to be
4269 extra careful in the other Unpickler methods, since a subclass could
4270 forget to call Unpickler.__init__() thus breaking our internal
4271 invariants. */
4272 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|ss:Unpickler", kwlist,
4273 &file, &encoding, &errors))
4274 return -1;
4275
4276 /* In case of multiple __init__() calls, clear previous content. */
4277 if (self->read != NULL)
4278 (void)Unpickler_clear(self);
4279
4280 self->read = PyObject_GetAttrString(file, "read");
4281 self->readline = PyObject_GetAttrString(file, "readline");
4282 if (self->readline == NULL || self->read == NULL)
4283 return -1;
4284
4285 if (encoding == NULL)
4286 encoding = "ASCII";
4287 if (errors == NULL)
4288 errors = "strict";
4289
4290 self->encoding = strdup(encoding);
4291 self->errors = strdup(errors);
4292 if (self->encoding == NULL || self->errors == NULL) {
4293 PyErr_NoMemory();
4294 return -1;
4295 }
4296
4297 if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) {
4298 self->pers_func = PyObject_GetAttrString((PyObject *)self,
4299 "persistent_load");
4300 if (self->pers_func == NULL)
4301 return -1;
4302 }
4303 else {
4304 self->pers_func = NULL;
4305 }
4306
4307 self->stack = (Pdata *)Pdata_New();
4308 if (self->stack == NULL)
4309 return -1;
4310
4311 self->memo = PyDict_New();
4312 if (self->memo == NULL)
4313 return -1;
4314
4315 return 0;
4316}
4317
4318static PyObject *
4319Unpickler_get_memo(UnpicklerObject *self)
4320{
4321 if (self->memo == NULL)
4322 PyErr_SetString(PyExc_AttributeError, "memo");
4323 else
4324 Py_INCREF(self->memo);
4325 return self->memo;
4326}
4327
4328static int
4329Unpickler_set_memo(UnpicklerObject *self, PyObject *value)
4330{
4331 PyObject *tmp;
4332
4333 if (value == NULL) {
4334 PyErr_SetString(PyExc_TypeError,
4335 "attribute deletion is not supported");
4336 return -1;
4337 }
4338 if (!PyDict_Check(value)) {
4339 PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
4340 return -1;
4341 }
4342
4343 tmp = self->memo;
4344 Py_INCREF(value);
4345 self->memo = value;
4346 Py_XDECREF(tmp);
4347
4348 return 0;
4349}
4350
4351static PyObject *
4352Unpickler_get_persload(UnpicklerObject *self)
4353{
4354 if (self->pers_func == NULL)
4355 PyErr_SetString(PyExc_AttributeError, "persistent_load");
4356 else
4357 Py_INCREF(self->pers_func);
4358 return self->pers_func;
4359}
4360
4361static int
4362Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
4363{
4364 PyObject *tmp;
4365
4366 if (value == NULL) {
4367 PyErr_SetString(PyExc_TypeError,
4368 "attribute deletion is not supported");
4369 return -1;
4370 }
4371 if (!PyCallable_Check(value)) {
4372 PyErr_SetString(PyExc_TypeError,
4373 "persistent_load must be a callable taking "
4374 "one argument");
4375 return -1;
4376 }
4377
4378 tmp = self->pers_func;
4379 Py_INCREF(value);
4380 self->pers_func = value;
4381 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
4382
4383 return 0;
4384}
4385
4386static PyGetSetDef Unpickler_getsets[] = {
4387 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
4388 {"persistent_load", (getter)Unpickler_get_persload,
4389 (setter)Unpickler_set_persload},
4390 {NULL}
4391};
4392
4393static PyTypeObject Unpickler_Type = {
4394 PyVarObject_HEAD_INIT(NULL, 0)
4395 "_pickle.Unpickler", /*tp_name*/
4396 sizeof(UnpicklerObject), /*tp_basicsize*/
4397 0, /*tp_itemsize*/
4398 (destructor)Unpickler_dealloc, /*tp_dealloc*/
4399 0, /*tp_print*/
4400 0, /*tp_getattr*/
4401 0, /*tp_setattr*/
4402 0, /*tp_compare*/
4403 0, /*tp_repr*/
4404 0, /*tp_as_number*/
4405 0, /*tp_as_sequence*/
4406 0, /*tp_as_mapping*/
4407 0, /*tp_hash*/
4408 0, /*tp_call*/
4409 0, /*tp_str*/
4410 0, /*tp_getattro*/
4411 0, /*tp_setattro*/
4412 0, /*tp_as_buffer*/
4413 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4414 Unpickler_doc, /*tp_doc*/
4415 (traverseproc)Unpickler_traverse, /*tp_traverse*/
4416 (inquiry)Unpickler_clear, /*tp_clear*/
4417 0, /*tp_richcompare*/
4418 0, /*tp_weaklistoffset*/
4419 0, /*tp_iter*/
4420 0, /*tp_iternext*/
4421 Unpickler_methods, /*tp_methods*/
4422 0, /*tp_members*/
4423 Unpickler_getsets, /*tp_getset*/
4424 0, /*tp_base*/
4425 0, /*tp_dict*/
4426 0, /*tp_descr_get*/
4427 0, /*tp_descr_set*/
4428 0, /*tp_dictoffset*/
4429 (initproc)Unpickler_init, /*tp_init*/
4430 PyType_GenericAlloc, /*tp_alloc*/
4431 PyType_GenericNew, /*tp_new*/
4432 PyObject_GC_Del, /*tp_free*/
4433 0, /*tp_is_gc*/
4434};
4435
4436static int
4437init_stuff(void)
4438{
4439 PyObject *copyreg;
4440
4441 copyreg = PyImport_ImportModule("copyreg");
4442 if (!copyreg)
4443 return -1;
4444
4445 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
4446 if (!dispatch_table)
4447 goto error;
4448
4449 extension_registry = \
4450 PyObject_GetAttrString(copyreg, "_extension_registry");
4451 if (!extension_registry)
4452 goto error;
4453
4454 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
4455 if (!inverted_registry)
4456 goto error;
4457
4458 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
4459 if (!extension_cache)
4460 goto error;
4461
4462 Py_DECREF(copyreg);
4463
4464 empty_tuple = PyTuple_New(0);
4465 if (empty_tuple == NULL)
4466 return -1;
4467
4468 two_tuple = PyTuple_New(2);
4469 if (two_tuple == NULL)
4470 return -1;
4471 /* We use this temp container with no regard to refcounts, or to
4472 * keeping containees alive. Exempt from GC, because we don't
4473 * want anything looking at two_tuple() by magic.
4474 */
4475 PyObject_GC_UnTrack(two_tuple);
4476
4477 return 0;
4478
4479 error:
4480 Py_DECREF(copyreg);
4481 return -1;
4482}
4483
4484static struct PyModuleDef _picklemodule = {
4485 PyModuleDef_HEAD_INIT,
4486 "_pickle",
4487 pickle_module_doc,
4488 -1,
4489 NULL,
4490 NULL,
4491 NULL,
4492 NULL,
4493 NULL
4494};
4495
4496PyMODINIT_FUNC
4497PyInit__pickle(void)
4498{
4499 PyObject *m;
4500
4501 if (PyType_Ready(&Unpickler_Type) < 0)
4502 return NULL;
4503 if (PyType_Ready(&Pickler_Type) < 0)
4504 return NULL;
4505 if (PyType_Ready(&Pdata_Type) < 0)
4506 return NULL;
4507
4508 /* Create the module and add the functions. */
4509 m = PyModule_Create(&_picklemodule);
4510 if (m == NULL)
4511 return NULL;
4512
4513 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
4514 return NULL;
4515 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
4516 return NULL;
4517
4518 /* Initialize the exceptions. */
4519 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
4520 if (PickleError == NULL)
4521 return NULL;
4522 PicklingError = \
4523 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
4524 if (PicklingError == NULL)
4525 return NULL;
4526 UnpicklingError = \
4527 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
4528 if (UnpicklingError == NULL)
4529 return NULL;
4530
4531 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
4532 return NULL;
4533 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
4534 return NULL;
4535 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
4536 return NULL;
4537
4538 if (init_stuff() < 0)
4539 return NULL;
4540
4541 return m;
4542}