blob: d015bafebebeda49b0afefcbdc4cbcb21f661a7f [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
13
14/* Pickle opcodes. These must be kept updated with pickle.py.
15 Extensive docs are in pickletools.py. */
16enum opcode {
17 MARK = '(',
18 STOP = '.',
19 POP = '0',
20 POP_MARK = '1',
21 DUP = '2',
22 FLOAT = 'F',
23 INT = 'I',
24 BININT = 'J',
25 BININT1 = 'K',
26 LONG = 'L',
27 BININT2 = 'M',
28 NONE = 'N',
29 PERSID = 'P',
30 BINPERSID = 'Q',
31 REDUCE = 'R',
32 STRING = 'S',
33 BINSTRING = 'T',
34 SHORT_BINSTRING = 'U',
35 UNICODE = 'V',
36 BINUNICODE = 'X',
37 APPEND = 'a',
38 BUILD = 'b',
39 GLOBAL = 'c',
40 DICT = 'd',
41 EMPTY_DICT = '}',
42 APPENDS = 'e',
43 GET = 'g',
44 BINGET = 'h',
45 INST = 'i',
46 LONG_BINGET = 'j',
47 LIST = 'l',
48 EMPTY_LIST = ']',
49 OBJ = 'o',
50 PUT = 'p',
51 BINPUT = 'q',
52 LONG_BINPUT = 'r',
53 SETITEM = 's',
54 TUPLE = 't',
55 EMPTY_TUPLE = ')',
56 SETITEMS = 'u',
57 BINFLOAT = 'G',
58
59 /* Protocol 2. */
60 PROTO = '\x80',
61 NEWOBJ = '\x81',
62 EXT1 = '\x82',
63 EXT2 = '\x83',
64 EXT4 = '\x84',
65 TUPLE1 = '\x85',
66 TUPLE2 = '\x86',
67 TUPLE3 = '\x87',
68 NEWTRUE = '\x88',
69 NEWFALSE = '\x89',
70 LONG1 = '\x8a',
71 LONG4 = '\x8b',
72
73 /* Protocol 3 (Python 3.x) */
74 BINBYTES = 'B',
75 SHORT_BINBYTES = 'C',
76};
77
78/* These aren't opcodes -- they're ways to pickle bools before protocol 2
79 * so that unpicklers written before bools were introduced unpickle them
80 * as ints, but unpicklers after can recognize that bools were intended.
81 * Note that protocol 2 added direct ways to pickle bools.
82 */
83#undef TRUE
84#define TRUE "I01\n"
85#undef FALSE
86#define FALSE "I00\n"
87
88enum {
89 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
90 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
91 break if this gets out of synch with pickle.py, but it's unclear that would
92 help anything either. */
93 BATCHSIZE = 1000,
94
95 /* Nesting limit until Pickler, when running in "fast mode", starts
96 checking for self-referential data-structures. */
97 FAST_NESTING_LIMIT = 50,
98
99 /* Size of the write buffer of Pickler. Higher values will reduce the
100 number of calls to the write() method of the output stream. */
101 WRITE_BUF_SIZE = 256,
102};
103
104/* Exception classes for pickle. These should override the ones defined in
105 pickle.py, when the C-optimized Pickler and Unpickler are used. */
106static PyObject *PickleError;
107static PyObject *PicklingError;
108static PyObject *UnpicklingError;
109
110/* copyreg.dispatch_table, {type_object: pickling_function} */
111static PyObject *dispatch_table;
112/* For EXT[124] opcodes. */
113/* copyreg._extension_registry, {(module_name, function_name): code} */
114static PyObject *extension_registry;
115/* copyreg._inverted_registry, {code: (module_name, function_name)} */
116static PyObject *inverted_registry;
117/* copyreg._extension_cache, {code: object} */
118static PyObject *extension_cache;
119
120/* XXX: Are these really nescessary? */
121/* As the name says, an empty tuple. */
122static PyObject *empty_tuple;
123/* For looking up name pairs in copyreg._extension_registry. */
124static PyObject *two_tuple;
125
126static int
127stack_underflow(void)
128{
129 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
130 return -1;
131}
132
133/* Internal data type used as the unpickling stack. */
134typedef struct {
135 PyObject_HEAD
136 int length; /* number of initial slots in data currently used */
137 int size; /* number of slots in data allocated */
138 PyObject **data;
139} Pdata;
140
141static void
142Pdata_dealloc(Pdata *self)
143{
144 int i;
145 PyObject **p;
146
147 for (i = self->length, p = self->data; --i >= 0; p++) {
148 Py_DECREF(*p);
149 }
150 if (self->data)
151 PyMem_Free(self->data);
152 PyObject_Del(self);
153}
154
155static PyTypeObject Pdata_Type = {
156 PyVarObject_HEAD_INIT(NULL, 0)
157 "_pickle.Pdata", /*tp_name*/
158 sizeof(Pdata), /*tp_basicsize*/
159 0, /*tp_itemsize*/
160 (destructor)Pdata_dealloc, /*tp_dealloc*/
161};
162
163static PyObject *
164Pdata_New(void)
165{
166 Pdata *self;
167
168 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
169 return NULL;
170 self->size = 8;
171 self->length = 0;
172 self->data = PyMem_Malloc(self->size * sizeof(PyObject *));
173 if (self->data)
174 return (PyObject *)self;
175 Py_DECREF(self);
176 return PyErr_NoMemory();
177}
178
179
180/* Retain only the initial clearto items. If clearto >= the current
181 * number of items, this is a (non-erroneous) NOP.
182 */
183static int
184Pdata_clear(Pdata *self, int clearto)
185{
186 int i;
187 PyObject **p;
188
189 if (clearto < 0)
190 return stack_underflow();
191 if (clearto >= self->length)
192 return 0;
193
194 for (i = self->length, p = self->data + clearto; --i >= clearto; p++) {
195 Py_CLEAR(*p);
196 }
197 self->length = clearto;
198
199 return 0;
200}
201
202static int
203Pdata_grow(Pdata *self)
204{
205 int bigger;
206 size_t nbytes;
207 PyObject **tmp;
208
209 bigger = (self->size << 1) + 1;
210 if (bigger <= 0) /* was 0, or new value overflows */
211 goto nomemory;
212 if ((int)(size_t)bigger != bigger)
213 goto nomemory;
214 nbytes = (size_t)bigger * sizeof(PyObject *);
215 if (nbytes / sizeof(PyObject *) != (size_t)bigger)
216 goto nomemory;
217 tmp = PyMem_Realloc(self->data, nbytes);
218 if (tmp == NULL)
219 goto nomemory;
220 self->data = tmp;
221 self->size = bigger;
222 return 0;
223
224 nomemory:
225 PyErr_NoMemory();
226 return -1;
227}
228
229/* D is a Pdata*. Pop the topmost element and store it into V, which
230 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
231 * is raised and V is set to NULL.
232 */
233static PyObject *
234Pdata_pop(Pdata *self)
235{
236 if (self->length == 0) {
237 PyErr_SetString(UnpicklingError, "bad pickle data");
238 return NULL;
239 }
240 return self->data[--(self->length)];
241}
242#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
243
244static int
245Pdata_push(Pdata *self, PyObject *obj)
246{
247 if (self->length == self->size && Pdata_grow(self) < 0) {
248 return -1;
249 }
250 self->data[self->length++] = obj;
251 return 0;
252}
253
254/* Push an object on stack, transferring its ownership to the stack. */
255#define PDATA_PUSH(D, O, ER) do { \
256 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
257
258/* Push an object on stack, adding a new reference to the object. */
259#define PDATA_APPEND(D, O, ER) do { \
260 Py_INCREF((O)); \
261 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
262
263static PyObject *
264Pdata_poptuple(Pdata *self, Py_ssize_t start)
265{
266 PyObject *tuple;
267 Py_ssize_t len, i, j;
268
269 len = self->length - start;
270 tuple = PyTuple_New(len);
271 if (tuple == NULL)
272 return NULL;
273 for (i = start, j = 0; j < len; i++, j++)
274 PyTuple_SET_ITEM(tuple, j, self->data[i]);
275
276 self->length = start;
277 return tuple;
278}
279
280static PyObject *
281Pdata_poplist(Pdata *self, Py_ssize_t start)
282{
283 PyObject *list;
284 Py_ssize_t len, i, j;
285
286 len = self->length - start;
287 list = PyList_New(len);
288 if (list == NULL)
289 return NULL;
290 for (i = start, j = 0; j < len; i++, j++)
291 PyList_SET_ITEM(list, j, self->data[i]);
292
293 self->length = start;
294 return list;
295}
296
297typedef struct PicklerObject {
298 PyObject_HEAD
299 PyObject *write; /* write() method of the output stream */
300 PyObject *memo; /* Memo dictionary, keep track of the seen
301 objects to support self-referential objects
302 pickling. */
303 PyObject *pers_func; /* persistent_id() method, can be NULL */
304 PyObject *arg;
305 int proto; /* Pickle protocol number, >= 0 */
306 int bin; /* Boolean, true if proto > 0 */
307 int nesting; /* Current nesting level, this is to guard
308 save() from going into infinite recursion
309 and segfaulting. */
310 int buf_size; /* Size of the current buffered pickle data */
311 char *write_buf; /* Write buffer, this is to avoid calling the
312 write() method of the output stream too
313 often. */
314 int fast; /* Enable fast mode if set to a true value.
315 The fast mode disable the usage of memo,
316 therefore speeding the pickling process by
317 not generating superfluous PUT opcodes. It
318 should not be used if with self-referential
319 objects. */
320 int fast_nesting;
321 PyObject *fast_memo;
322} PicklerObject;
323
324typedef struct UnpicklerObject {
325 PyObject_HEAD
326 Pdata *stack; /* Pickle data stack, store unpickled objects. */
327 PyObject *readline; /* readline() method of the output stream */
328 PyObject *read; /* read() method of the output stream */
329 PyObject *memo; /* Memo dictionary, provide the objects stored
330 using the PUT opcodes. */
331 PyObject *arg;
332 PyObject *pers_func; /* persistent_load() method, can be NULL. */
333 PyObject *last_string; /* Reference to the last string read by the
334 readline() method. */
335 char *buffer; /* Reading buffer. */
336 char *encoding; /* Name of the encoding to be used for
337 decoding strings pickled using Python
338 2.x. The default value is "ASCII" */
339 char *errors; /* Name of errors handling scheme to used when
340 decoding strings. The default value is
341 "strict". */
342 int *marks; /* Mark stack, used for unpickling container
343 objects. */
344 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
345 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
346} UnpicklerObject;
347
348/* Forward declarations */
349static int save(PicklerObject *, PyObject *, int);
350static int save_reduce(PicklerObject *, PyObject *, PyObject *);
351static PyTypeObject Pickler_Type;
352static PyTypeObject Unpickler_Type;
353
354
355/* Helpers for creating the argument tuple passed to functions. This has the
356 performance advantage of calling PyTuple_New() only once. */
357
358#define ARG_TUP(self, obj) do { \
359 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
360 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
361 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
362 } \
363 else { \
364 Py_DECREF((obj)); \
365 } \
366 } while (0)
367
368#define FREE_ARG_TUP(self) do { \
369 if ((self)->arg->ob_refcnt > 1) \
370 Py_CLEAR((self)->arg); \
371 } while (0)
372
373/* A temporary cleaner API for fast single argument function call.
374
375 XXX: Does caching the argument tuple provides any real performance benefits?
376
377 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
378 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
379 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
380 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
381 (i.e, call PyTuple_New() and store the returned value in an array), to save
382 one second (wall clock time). Either ways, the loading time a pickle stream
383 large enough to generate this number of calls would be massively
384 overwhelmed by other factors, like I/O throughput, the GC traversal and
385 object allocation overhead. So, I really doubt these functions provide any
386 real benefits.
387
388 On the other hand, oprofile reports that pickle spends a lot of time in
389 these functions. But, that is probably more related to the function call
390 overhead, than the argument tuple allocation.
391
392 XXX: And, what is the reference behavior of these? Steal, borrow? At first
393 glance, it seems to steal the reference of 'arg' and borrow the reference
394 of 'func'.
395 */
396static PyObject *
397pickler_call(PicklerObject *self, PyObject *func, PyObject *arg)
398{
399 PyObject *result = NULL;
400
401 ARG_TUP(self, arg);
402 if (self->arg) {
403 result = PyObject_Call(func, self->arg, NULL);
404 FREE_ARG_TUP(self);
405 }
406 return result;
407}
408
409static PyObject *
410unpickler_call(UnpicklerObject *self, PyObject *func, PyObject *arg)
411{
412 PyObject *result = NULL;
413
414 ARG_TUP(self, arg);
415 if (self->arg) {
416 result = PyObject_Call(func, self->arg, NULL);
417 FREE_ARG_TUP(self);
418 }
419 return result;
420}
421
422static Py_ssize_t
423pickler_write(PicklerObject *self, const char *s, Py_ssize_t n)
424{
425 PyObject *data, *result;
426
427 if (s == NULL) {
428 if (!(self->buf_size))
429 return 0;
430 data = PyBytes_FromStringAndSize(self->write_buf, self->buf_size);
431 if (data == NULL)
432 return -1;
433 }
434 else {
435 if (self->buf_size && (n + self->buf_size) > WRITE_BUF_SIZE) {
436 if (pickler_write(self, NULL, 0) < 0)
437 return -1;
438 }
439
440 if (n > WRITE_BUF_SIZE) {
441 if (!(data = PyBytes_FromStringAndSize(s, n)))
442 return -1;
443 }
444 else {
445 memcpy(self->write_buf + self->buf_size, s, n);
446 self->buf_size += n;
447 return n;
448 }
449 }
450
451 /* object with write method */
452 result = pickler_call(self, self->write, data);
453 if (result == NULL)
454 return -1;
455
456 Py_DECREF(result);
457 self->buf_size = 0;
458 return n;
459}
460
461/* XXX: These read/readline functions ought to be optimized. Buffered I/O
462 might help a lot, especially with the new (but much slower) io library.
463 On the other hand, the added complexity might not worth it.
464 */
465
466/* Read at least n characters from the input stream and set s to the current
467 reading position. */
468static Py_ssize_t
469unpickler_read(UnpicklerObject *self, char **s, Py_ssize_t n)
470{
471 PyObject *len;
472 PyObject *data;
473
474 len = PyLong_FromSsize_t(n);
475 if (len == NULL)
476 return -1;
477
478 data = unpickler_call(self, self->read, len);
479 if (data == NULL)
480 return -1;
481
482 /* XXX: Should bytearray be supported too? */
483 if (!PyBytes_Check(data)) {
484 PyErr_SetString(PyExc_ValueError,
485 "read() from the underlying stream did not"
486 "return bytes");
487 return -1;
488 }
489
490 Py_XDECREF(self->last_string);
491 self->last_string = data;
492
493 if (!(*s = PyBytes_AS_STRING(data)))
494 return -1;
495
496 return n;
497}
498
499static Py_ssize_t
500unpickler_readline(UnpicklerObject *self, char **s)
501{
502 PyObject *data;
503
504 data = PyObject_CallObject(self->readline, empty_tuple);
505 if (data == NULL)
506 return -1;
507
508 /* XXX: Should bytearray be supported too? */
509 if (!PyBytes_Check(data)) {
510 PyErr_SetString(PyExc_ValueError,
511 "readline() from the underlying stream did not"
512 "return bytes");
513 return -1;
514 }
515
516 Py_XDECREF(self->last_string);
517 self->last_string = data;
518
519 if (!(*s = PyBytes_AS_STRING(data)))
520 return -1;
521
522 return PyBytes_GET_SIZE(data);
523}
524
525/* Generate a GET opcode for an object stored in the memo. The 'key' argument
526 should be the address of the object as returned by PyLong_FromVoidPtr(). */
527static int
528memo_get(PicklerObject *self, PyObject *key)
529{
530 PyObject *value;
531 PyObject *memo_id;
532 long x;
533 char pdata[30];
534 int len;
535
536 value = PyDict_GetItemWithError(self->memo, key);
537 if (value == NULL) {
538 if (!PyErr_Occurred())
539 PyErr_SetObject(PyExc_KeyError, key);
540 return -1;
541 }
542
543 memo_id = PyTuple_GetItem(value, 0);
544 if (memo_id == NULL)
545 return -1;
546
547 if (!PyLong_Check(memo_id)) {
548 PyErr_SetString(PicklingError, "memo id must be an integer");
549 return -1;
550 }
551 x = PyLong_AsLong(memo_id);
552 if (x == -1 && PyErr_Occurred())
553 return -1;
554
555 if (!self->bin) {
556 pdata[0] = GET;
557 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
558 len = (int)strlen(pdata);
559 }
560 else {
561 if (x < 256) {
562 pdata[0] = BINGET;
563 pdata[1] = (unsigned char)(x & 0xff);
564 len = 2;
565 }
566 else if (x <= 0xffffffffL) {
567 pdata[0] = LONG_BINGET;
568 pdata[1] = (unsigned char)(x & 0xff);
569 pdata[2] = (unsigned char)((x >> 8) & 0xff);
570 pdata[3] = (unsigned char)((x >> 16) & 0xff);
571 pdata[4] = (unsigned char)((x >> 24) & 0xff);
572 len = 5;
573 }
574 else { /* unlikely */
575 PyErr_SetString(PicklingError,
576 "memo id too large for LONG_BINGET");
577 return -1;
578 }
579 }
580
581 if (pickler_write(self, pdata, len) < 0)
582 return -1;
583
584 return 0;
585}
586
587/* Store an object in the memo, assign it a new unique ID based on the number
588 of objects currently stored in the memo and generate a PUT opcode. */
589static int
590memo_put(PicklerObject *self, PyObject *obj)
591{
592 PyObject *key = NULL;
593 PyObject *memo_id = NULL;
594 PyObject *tuple = NULL;
595 long x;
596 char pdata[30];
597 int len;
598 int status = 0;
599
600 if (self->fast)
601 return 0;
602
603 key = PyLong_FromVoidPtr(obj);
604 if (key == NULL)
605 goto error;
606 if ((x = PyDict_Size(self->memo)) < 0)
607 goto error;
608 memo_id = PyLong_FromLong(x);
609 if (memo_id == NULL)
610 goto error;
611 tuple = PyTuple_New(2);
612 if (tuple == NULL)
613 goto error;
614
615 Py_INCREF(memo_id);
616 PyTuple_SET_ITEM(tuple, 0, memo_id);
617 Py_INCREF(obj);
618 PyTuple_SET_ITEM(tuple, 1, obj);
619 if (PyDict_SetItem(self->memo, key, tuple) < 0)
620 goto error;
621
622 if (!self->bin) {
623 pdata[0] = PUT;
624 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
625 len = strlen(pdata);
626 }
627 else {
628 if (x < 256) {
629 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +0000630 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000631 len = 2;
632 }
633 else if (x <= 0xffffffffL) {
634 pdata[0] = LONG_BINPUT;
635 pdata[1] = (unsigned char)(x & 0xff);
636 pdata[2] = (unsigned char)((x >> 8) & 0xff);
637 pdata[3] = (unsigned char)((x >> 16) & 0xff);
638 pdata[4] = (unsigned char)((x >> 24) & 0xff);
639 len = 5;
640 }
641 else { /* unlikely */
642 PyErr_SetString(PicklingError,
643 "memo id too large for LONG_BINPUT");
644 return -1;
645 }
646 }
647
648 if (pickler_write(self, pdata, len) < 0)
649 goto error;
650
651 if (0) {
652 error:
653 status = -1;
654 }
655
656 Py_XDECREF(key);
657 Py_XDECREF(memo_id);
658 Py_XDECREF(tuple);
659
660 return status;
661}
662
663static PyObject *
664whichmodule(PyObject *global, PyObject *global_name)
665{
666 Py_ssize_t i, j;
667 static PyObject *module_str = NULL;
668 static PyObject *main_str = NULL;
669 PyObject *module_name;
670 PyObject *modules_dict;
671 PyObject *module;
672 PyObject *obj;
673
674 if (module_str == NULL) {
675 module_str = PyUnicode_InternFromString("__module__");
676 if (module_str == NULL)
677 return NULL;
678 main_str = PyUnicode_InternFromString("__main__");
679 if (main_str == NULL)
680 return NULL;
681 }
682
683 module_name = PyObject_GetAttr(global, module_str);
684
685 /* In some rare cases (e.g., random.getrandbits), __module__ can be
686 None. If it is so, then search sys.modules for the module of
687 global. */
688 if (module_name == Py_None) {
689 Py_DECREF(module_name);
690 goto search;
691 }
692
693 if (module_name) {
694 return module_name;
695 }
696 if (PyErr_ExceptionMatches(PyExc_AttributeError))
697 PyErr_Clear();
698 else
699 return NULL;
700
701 search:
702 modules_dict = PySys_GetObject("modules");
703 if (modules_dict == NULL)
704 return NULL;
705
706 i = 0;
707 module_name = NULL;
708 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
709 if (PyObject_Compare(module_name, main_str) == 0)
710 continue;
711
712 obj = PyObject_GetAttr(module, global_name);
713 if (obj == NULL) {
714 if (PyErr_ExceptionMatches(PyExc_AttributeError))
715 PyErr_Clear();
716 else
717 return NULL;
718 continue;
719 }
720
721 if (obj != global) {
722 Py_DECREF(obj);
723 continue;
724 }
725
726 Py_DECREF(obj);
727 break;
728 }
729
730 /* If no module is found, use __main__. */
731 if (!j) {
732 module_name = main_str;
733 }
734
735 Py_INCREF(module_name);
736 return module_name;
737}
738
739/* fast_save_enter() and fast_save_leave() are guards against recursive
740 objects when Pickler is used with the "fast mode" (i.e., with object
741 memoization disabled). If the nesting of a list or dict object exceed
742 FAST_NESTING_LIMIT, these guards will start keeping an internal
743 reference to the seen list or dict objects and check whether these objects
744 are recursive. These are not strictly necessary, since save() has a
745 hard-coded recursion limit, but they give a nicer error message than the
746 typical RuntimeError. */
747static int
748fast_save_enter(PicklerObject *self, PyObject *obj)
749{
750 /* if fast_nesting < 0, we're doing an error exit. */
751 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
752 PyObject *key = NULL;
753 if (self->fast_memo == NULL) {
754 self->fast_memo = PyDict_New();
755 if (self->fast_memo == NULL) {
756 self->fast_nesting = -1;
757 return 0;
758 }
759 }
760 key = PyLong_FromVoidPtr(obj);
761 if (key == NULL)
762 return 0;
763 if (PyDict_GetItem(self->fast_memo, key)) {
764 Py_DECREF(key);
765 PyErr_Format(PyExc_ValueError,
766 "fast mode: can't pickle cyclic objects "
767 "including object type %.200s at %p",
768 obj->ob_type->tp_name, obj);
769 self->fast_nesting = -1;
770 return 0;
771 }
772 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
773 Py_DECREF(key);
774 self->fast_nesting = -1;
775 return 0;
776 }
777 Py_DECREF(key);
778 }
779 return 1;
780}
781
782static int
783fast_save_leave(PicklerObject *self, PyObject *obj)
784{
785 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
786 PyObject *key = PyLong_FromVoidPtr(obj);
787 if (key == NULL)
788 return 0;
789 if (PyDict_DelItem(self->fast_memo, key) < 0) {
790 Py_DECREF(key);
791 return 0;
792 }
793 Py_DECREF(key);
794 }
795 return 1;
796}
797
798static int
799save_none(PicklerObject *self, PyObject *obj)
800{
801 const char none_op = NONE;
802 if (pickler_write(self, &none_op, 1) < 0)
803 return -1;
804
805 return 0;
806}
807
808static int
809save_bool(PicklerObject *self, PyObject *obj)
810{
811 static const char *buf[2] = { FALSE, TRUE };
812 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
813 int p = (obj == Py_True);
814
815 if (self->proto >= 2) {
816 const char bool_op = p ? NEWTRUE : NEWFALSE;
817 if (pickler_write(self, &bool_op, 1) < 0)
818 return -1;
819 }
820 else if (pickler_write(self, buf[p], len[p]) < 0)
821 return -1;
822
823 return 0;
824}
825
826static int
827save_int(PicklerObject *self, long x)
828{
829 char pdata[32];
830 int len = 0;
831
832 if (!self->bin
833#if SIZEOF_LONG > 4
834 || x > 0x7fffffffL || x < -0x80000000L
835#endif
836 ) {
837 /* Text-mode pickle, or long too big to fit in the 4-byte
838 * signed BININT format: store as a string.
839 */
840 pdata[0] = LONG; /* use LONG for consistence with pickle.py */
841 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
842 if (pickler_write(self, pdata, strlen(pdata)) < 0)
843 return -1;
844 }
845 else {
846 /* Binary pickle and x fits in a signed 4-byte int. */
847 pdata[1] = (unsigned char)(x & 0xff);
848 pdata[2] = (unsigned char)((x >> 8) & 0xff);
849 pdata[3] = (unsigned char)((x >> 16) & 0xff);
850 pdata[4] = (unsigned char)((x >> 24) & 0xff);
851
852 if ((pdata[4] == 0) && (pdata[3] == 0)) {
853 if (pdata[2] == 0) {
854 pdata[0] = BININT1;
855 len = 2;
856 }
857 else {
858 pdata[0] = BININT2;
859 len = 3;
860 }
861 }
862 else {
863 pdata[0] = BININT;
864 len = 5;
865 }
866
867 if (pickler_write(self, pdata, len) < 0)
868 return -1;
869 }
870
871 return 0;
872}
873
874static int
875save_long(PicklerObject *self, PyObject *obj)
876{
877 PyObject *repr = NULL;
878 Py_ssize_t size;
879 long val = PyLong_AsLong(obj);
880 int status = 0;
881
882 const char long_op = LONG;
883
884 if (val == -1 && PyErr_Occurred()) {
885 /* out of range for int pickling */
886 PyErr_Clear();
887 }
888 else
889 return save_int(self, val);
890
891 if (self->proto >= 2) {
892 /* Linear-time pickling. */
893 size_t nbits;
894 size_t nbytes;
895 unsigned char *pdata;
896 char header[5];
897 int i;
898 int sign = _PyLong_Sign(obj);
899
900 if (sign == 0) {
901 header[0] = LONG1;
902 header[1] = 0; /* It's 0 -- an empty bytestring. */
903 if (pickler_write(self, header, 2) < 0)
904 goto error;
905 return 0;
906 }
907 nbits = _PyLong_NumBits(obj);
908 if (nbits == (size_t)-1 && PyErr_Occurred())
909 goto error;
910 /* How many bytes do we need? There are nbits >> 3 full
911 * bytes of data, and nbits & 7 leftover bits. If there
912 * are any leftover bits, then we clearly need another
913 * byte. Wnat's not so obvious is that we *probably*
914 * need another byte even if there aren't any leftovers:
915 * the most-significant bit of the most-significant byte
916 * acts like a sign bit, and it's usually got a sense
917 * opposite of the one we need. The exception is longs
918 * of the form -(2**(8*j-1)) for j > 0. Such a long is
919 * its own 256's-complement, so has the right sign bit
920 * even without the extra byte. That's a pain to check
921 * for in advance, though, so we always grab an extra
922 * byte at the start, and cut it back later if possible.
923 */
924 nbytes = (nbits >> 3) + 1;
925 if (nbytes > INT_MAX) {
926 PyErr_SetString(PyExc_OverflowError,
927 "long too large to pickle");
928 goto error;
929 }
930 repr = PyUnicode_FromStringAndSize(NULL, (int)nbytes);
931 if (repr == NULL)
932 goto error;
933 pdata = (unsigned char *)PyUnicode_AsString(repr);
934 i = _PyLong_AsByteArray((PyLongObject *)obj,
935 pdata, nbytes,
936 1 /* little endian */ , 1 /* signed */ );
937 if (i < 0)
938 goto error;
939 /* If the long is negative, this may be a byte more than
940 * needed. This is so iff the MSB is all redundant sign
941 * bits.
942 */
943 if (sign < 0 &&
944 nbytes > 1 &&
945 pdata[nbytes - 1] == 0xff &&
946 (pdata[nbytes - 2] & 0x80) != 0) {
947 nbytes--;
948 }
949
950 if (nbytes < 256) {
951 header[0] = LONG1;
952 header[1] = (unsigned char)nbytes;
953 size = 2;
954 }
955 else {
956 header[0] = LONG4;
957 size = (int)nbytes;
958 for (i = 1; i < 5; i++) {
959 header[i] = (unsigned char)(size & 0xff);
960 size >>= 8;
961 }
962 size = 5;
963 }
964 if (pickler_write(self, header, size) < 0 ||
965 pickler_write(self, (char *)pdata, (int)nbytes) < 0)
966 goto error;
967 }
968 else {
969 char *string;
970
971 /* proto < 2: write the repr and newline. This is quadratic-time
972 (in the number of digits), in both directions. */
973
974 repr = PyObject_Repr(obj);
975 if (repr == NULL)
976 goto error;
977
978 string = PyUnicode_AsStringAndSize(repr, &size);
979 if (string == NULL)
980 goto error;
981
982 if (pickler_write(self, &long_op, 1) < 0 ||
983 pickler_write(self, string, size) < 0 ||
984 pickler_write(self, "\n", 1) < 0)
985 goto error;
986 }
987
988 if (0) {
989 error:
990 status = -1;
991 }
992 Py_XDECREF(repr);
993
994 return status;
995}
996
997static int
998save_float(PicklerObject *self, PyObject *obj)
999{
1000 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1001
1002 if (self->bin) {
1003 char pdata[9];
1004 pdata[0] = BINFLOAT;
1005 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1006 return -1;
1007 if (pickler_write(self, pdata, 9) < 0)
1008 return -1;
1009 }
1010 else {
1011 char pdata[250];
1012 pdata[0] = FLOAT;
1013 PyOS_ascii_formatd(pdata + 1, sizeof(pdata) - 2, "%.17g", x);
1014 /* Extend the formatted string with a newline character */
1015 strcat(pdata, "\n");
1016
1017 if (pickler_write(self, pdata, strlen(pdata)) < 0)
1018 return -1;
1019 }
1020
1021 return 0;
1022}
1023
1024static int
1025save_bytes(PicklerObject *self, PyObject *obj)
1026{
1027 if (self->proto < 3) {
1028 /* Older pickle protocols do not have an opcode for pickling bytes
1029 objects. Therefore, we need to fake the copy protocol (i.e.,
1030 the __reduce__ method) to permit bytes object unpickling. */
1031 PyObject *reduce_value = NULL;
1032 PyObject *bytelist = NULL;
1033 int status;
1034
1035 bytelist = PySequence_List(obj);
1036 if (bytelist == NULL)
1037 return -1;
1038
1039 reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
1040 bytelist);
1041 if (reduce_value == NULL) {
1042 Py_DECREF(bytelist);
1043 return -1;
1044 }
1045
1046 /* save_reduce() will memoize the object automatically. */
1047 status = save_reduce(self, reduce_value, obj);
1048 Py_DECREF(reduce_value);
1049 Py_DECREF(bytelist);
1050 return status;
1051 }
1052 else {
1053 Py_ssize_t size;
1054 char header[5];
1055 int len;
1056
1057 size = PyBytes_Size(obj);
1058 if (size < 0)
1059 return -1;
1060
1061 if (size < 256) {
1062 header[0] = SHORT_BINBYTES;
1063 header[1] = (unsigned char)size;
1064 len = 2;
1065 }
1066 else if (size <= 0xffffffffL) {
1067 header[0] = BINBYTES;
1068 header[1] = (unsigned char)(size & 0xff);
1069 header[2] = (unsigned char)((size >> 8) & 0xff);
1070 header[3] = (unsigned char)((size >> 16) & 0xff);
1071 header[4] = (unsigned char)((size >> 24) & 0xff);
1072 len = 5;
1073 }
1074 else {
1075 return -1; /* string too large */
1076 }
1077
1078 if (pickler_write(self, header, len) < 0)
1079 return -1;
1080
1081 if (pickler_write(self, PyBytes_AS_STRING(obj), size) < 0)
1082 return -1;
1083
1084 if (memo_put(self, obj) < 0)
1085 return -1;
1086
1087 return 0;
1088 }
1089}
1090
1091/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1092 backslash and newline characters to \uXXXX escapes. */
1093static PyObject *
1094raw_unicode_escape(const Py_UNICODE *s, Py_ssize_t size)
1095{
1096 PyObject *repr, *result;
1097 char *p;
1098 char *q;
1099
1100 static const char *hexdigits = "0123456789abcdef";
1101
1102#ifdef Py_UNICODE_WIDE
1103 repr = PyBytes_FromStringAndSize(NULL, 10 * size);
1104#else
1105 repr = PyBytes_FromStringAndSize(NULL, 6 * size);
1106#endif
1107 if (repr == NULL)
1108 return NULL;
1109 if (size == 0)
1110 goto done;
1111
1112 p = q = PyBytes_AS_STRING(repr);
1113 while (size-- > 0) {
1114 Py_UNICODE ch = *s++;
1115#ifdef Py_UNICODE_WIDE
1116 /* Map 32-bit characters to '\Uxxxxxxxx' */
1117 if (ch >= 0x10000) {
1118 *p++ = '\\';
1119 *p++ = 'U';
1120 *p++ = hexdigits[(ch >> 28) & 0xf];
1121 *p++ = hexdigits[(ch >> 24) & 0xf];
1122 *p++ = hexdigits[(ch >> 20) & 0xf];
1123 *p++ = hexdigits[(ch >> 16) & 0xf];
1124 *p++ = hexdigits[(ch >> 12) & 0xf];
1125 *p++ = hexdigits[(ch >> 8) & 0xf];
1126 *p++ = hexdigits[(ch >> 4) & 0xf];
1127 *p++ = hexdigits[ch & 15];
1128 }
1129 else
1130#endif
1131 /* Map 16-bit characters to '\uxxxx' */
1132 if (ch >= 256 || ch == '\\' || ch == '\n') {
1133 *p++ = '\\';
1134 *p++ = 'u';
1135 *p++ = hexdigits[(ch >> 12) & 0xf];
1136 *p++ = hexdigits[(ch >> 8) & 0xf];
1137 *p++ = hexdigits[(ch >> 4) & 0xf];
1138 *p++ = hexdigits[ch & 15];
1139 }
1140 /* Copy everything else as-is */
1141 else
1142 *p++ = (char) ch;
1143 }
1144 size = p - q;
1145
1146 done:
1147 result = PyBytes_FromStringAndSize(PyBytes_AS_STRING(repr), size);
1148 Py_DECREF(repr);
1149 return result;
1150}
1151
1152static int
1153save_unicode(PicklerObject *self, PyObject *obj)
1154{
1155 Py_ssize_t size;
1156 PyObject *encoded = NULL;
1157
1158 if (self->bin) {
1159 char pdata[5];
1160
1161 encoded = PyUnicode_AsUTF8String(obj);
1162 if (encoded == NULL)
1163 goto error;
1164
1165 size = PyBytes_GET_SIZE(encoded);
1166 if (size < 0 || size > 0xffffffffL)
1167 goto error; /* string too large */
1168
1169 pdata[0] = BINUNICODE;
1170 pdata[1] = (unsigned char)(size & 0xff);
1171 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1172 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1173 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1174
1175 if (pickler_write(self, pdata, 5) < 0)
1176 goto error;
1177
1178 if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
1179 goto error;
1180 }
1181 else {
1182 const char unicode_op = UNICODE;
1183
1184 encoded = raw_unicode_escape(PyUnicode_AS_UNICODE(obj),
1185 PyUnicode_GET_SIZE(obj));
1186 if (encoded == NULL)
1187 goto error;
1188
1189 if (pickler_write(self, &unicode_op, 1) < 0)
1190 goto error;
1191
1192 size = PyBytes_GET_SIZE(encoded);
1193 if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
1194 goto error;
1195
1196 if (pickler_write(self, "\n", 1) < 0)
1197 goto error;
1198 }
1199 if (memo_put(self, obj) < 0)
1200 goto error;
1201
1202 Py_DECREF(encoded);
1203 return 0;
1204
1205 error:
1206 Py_XDECREF(encoded);
1207 return -1;
1208}
1209
1210/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1211static int
1212store_tuple_elements(PicklerObject *self, PyObject *t, int len)
1213{
1214 int i;
1215
1216 assert(PyTuple_Size(t) == len);
1217
1218 for (i = 0; i < len; i++) {
1219 PyObject *element = PyTuple_GET_ITEM(t, i);
1220
1221 if (element == NULL)
1222 return -1;
1223 if (save(self, element, 0) < 0)
1224 return -1;
1225 }
1226
1227 return 0;
1228}
1229
1230/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1231 * used across protocols to minimize the space needed to pickle them.
1232 * Tuples are also the only builtin immutable type that can be recursive
1233 * (a tuple can be reached from itself), and that requires some subtle
1234 * magic so that it works in all cases. IOW, this is a long routine.
1235 */
1236static int
1237save_tuple(PicklerObject *self, PyObject *obj)
1238{
1239 PyObject *memo_key = NULL;
1240 int len, i;
1241 int status = 0;
1242
1243 const char mark_op = MARK;
1244 const char tuple_op = TUPLE;
1245 const char pop_op = POP;
1246 const char pop_mark_op = POP_MARK;
1247 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
1248
1249 if ((len = PyTuple_Size(obj)) < 0)
1250 return -1;
1251
1252 if (len == 0) {
1253 char pdata[2];
1254
1255 if (self->proto) {
1256 pdata[0] = EMPTY_TUPLE;
1257 len = 1;
1258 }
1259 else {
1260 pdata[0] = MARK;
1261 pdata[1] = TUPLE;
1262 len = 2;
1263 }
1264 if (pickler_write(self, pdata, len) < 0)
1265 return -1;
1266 return 0;
1267 }
1268
1269 /* id(tuple) isn't in the memo now. If it shows up there after
1270 * saving the tuple elements, the tuple must be recursive, in
1271 * which case we'll pop everything we put on the stack, and fetch
1272 * its value from the memo.
1273 */
1274 memo_key = PyLong_FromVoidPtr(obj);
1275 if (memo_key == NULL)
1276 return -1;
1277
1278 if (len <= 3 && self->proto >= 2) {
1279 /* Use TUPLE{1,2,3} opcodes. */
1280 if (store_tuple_elements(self, obj, len) < 0)
1281 goto error;
1282
1283 if (PyDict_GetItem(self->memo, memo_key)) {
1284 /* pop the len elements */
1285 for (i = 0; i < len; i++)
1286 if (pickler_write(self, &pop_op, 1) < 0)
1287 goto error;
1288 /* fetch from memo */
1289 if (memo_get(self, memo_key) < 0)
1290 goto error;
1291
1292 Py_DECREF(memo_key);
1293 return 0;
1294 }
1295 else { /* Not recursive. */
1296 if (pickler_write(self, len2opcode + len, 1) < 0)
1297 goto error;
1298 }
1299 goto memoize;
1300 }
1301
1302 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
1303 * Generate MARK e1 e2 ... TUPLE
1304 */
1305 if (pickler_write(self, &mark_op, 1) < 0)
1306 goto error;
1307
1308 if (store_tuple_elements(self, obj, len) < 0)
1309 goto error;
1310
1311 if (PyDict_GetItem(self->memo, memo_key)) {
1312 /* pop the stack stuff we pushed */
1313 if (self->bin) {
1314 if (pickler_write(self, &pop_mark_op, 1) < 0)
1315 goto error;
1316 }
1317 else {
1318 /* Note that we pop one more than len, to remove
1319 * the MARK too.
1320 */
1321 for (i = 0; i <= len; i++)
1322 if (pickler_write(self, &pop_op, 1) < 0)
1323 goto error;
1324 }
1325 /* fetch from memo */
1326 if (memo_get(self, memo_key) < 0)
1327 goto error;
1328
1329 Py_DECREF(memo_key);
1330 return 0;
1331 }
1332 else { /* Not recursive. */
1333 if (pickler_write(self, &tuple_op, 1) < 0)
1334 goto error;
1335 }
1336
1337 memoize:
1338 if (memo_put(self, obj) < 0)
1339 goto error;
1340
1341 if (0) {
1342 error:
1343 status = -1;
1344 }
1345
1346 Py_DECREF(memo_key);
1347 return status;
1348}
1349
1350/* iter is an iterator giving items, and we batch up chunks of
1351 * MARK item item ... item APPENDS
1352 * opcode sequences. Calling code should have arranged to first create an
1353 * empty list, or list-like object, for the APPENDS to operate on.
1354 * Returns 0 on success, <0 on error.
1355 */
1356static int
1357batch_list(PicklerObject *self, PyObject *iter)
1358{
1359 PyObject *obj;
1360 PyObject *slice[BATCHSIZE];
1361 int i, n;
1362
1363 const char mark_op = MARK;
1364 const char append_op = APPEND;
1365 const char appends_op = APPENDS;
1366
1367 assert(iter != NULL);
1368
1369 /* XXX: I think this function could be made faster by avoiding the
1370 iterator interface and fetching objects directly from list using
1371 PyList_GET_ITEM.
1372 */
1373
1374 if (self->proto == 0) {
1375 /* APPENDS isn't available; do one at a time. */
1376 for (;;) {
1377 obj = PyIter_Next(iter);
1378 if (obj == NULL) {
1379 if (PyErr_Occurred())
1380 return -1;
1381 break;
1382 }
1383 i = save(self, obj, 0);
1384 Py_DECREF(obj);
1385 if (i < 0)
1386 return -1;
1387 if (pickler_write(self, &append_op, 1) < 0)
1388 return -1;
1389 }
1390 return 0;
1391 }
1392
1393 /* proto > 0: write in batches of BATCHSIZE. */
1394 do {
1395 /* Get next group of (no more than) BATCHSIZE elements. */
1396 for (n = 0; n < BATCHSIZE; n++) {
1397 obj = PyIter_Next(iter);
1398 if (obj == NULL) {
1399 if (PyErr_Occurred())
1400 goto error;
1401 break;
1402 }
1403 slice[n] = obj;
1404 }
1405
1406 if (n > 1) {
1407 /* Pump out MARK, slice[0:n], APPENDS. */
1408 if (pickler_write(self, &mark_op, 1) < 0)
1409 goto error;
1410 for (i = 0; i < n; i++) {
1411 if (save(self, slice[i], 0) < 0)
1412 goto error;
1413 }
1414 if (pickler_write(self, &appends_op, 1) < 0)
1415 goto error;
1416 }
1417 else if (n == 1) {
1418 if (save(self, slice[0], 0) < 0 ||
1419 pickler_write(self, &append_op, 1) < 0)
1420 goto error;
1421 }
1422
1423 for (i = 0; i < n; i++) {
1424 Py_DECREF(slice[i]);
1425 }
1426 } while (n == BATCHSIZE);
1427 return 0;
1428
1429 error:
1430 while (--n >= 0) {
1431 Py_DECREF(slice[n]);
1432 }
1433 return -1;
1434}
1435
1436static int
1437save_list(PicklerObject *self, PyObject *obj)
1438{
1439 PyObject *iter;
1440 char header[3];
1441 int len;
1442 int status = 0;
1443
1444 if (self->fast && !fast_save_enter(self, obj))
1445 goto error;
1446
1447 /* Create an empty list. */
1448 if (self->bin) {
1449 header[0] = EMPTY_LIST;
1450 len = 1;
1451 }
1452 else {
1453 header[0] = MARK;
1454 header[1] = LIST;
1455 len = 2;
1456 }
1457
1458 if (pickler_write(self, header, len) < 0)
1459 goto error;
1460
1461 /* Get list length, and bow out early if empty. */
1462 if ((len = PyList_Size(obj)) < 0)
1463 goto error;
1464
1465 if (memo_put(self, obj) < 0)
1466 goto error;
1467
1468 if (len != 0) {
1469 /* Save the list elements. */
1470 iter = PyObject_GetIter(obj);
1471 if (iter == NULL)
1472 goto error;
1473 status = batch_list(self, iter);
1474 Py_DECREF(iter);
1475 }
1476
1477 if (0) {
1478 error:
1479 status = -1;
1480 }
1481
1482 if (self->fast && !fast_save_leave(self, obj))
1483 status = -1;
1484
1485 return status;
1486}
1487
1488/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
1489 * MARK key value ... key value SETITEMS
1490 * opcode sequences. Calling code should have arranged to first create an
1491 * empty dict, or dict-like object, for the SETITEMS to operate on.
1492 * Returns 0 on success, <0 on error.
1493 *
1494 * This is very much like batch_list(). The difference between saving
1495 * elements directly, and picking apart two-tuples, is so long-winded at
1496 * the C level, though, that attempts to combine these routines were too
1497 * ugly to bear.
1498 */
1499static int
1500batch_dict(PicklerObject *self, PyObject *iter)
1501{
1502 PyObject *obj;
1503 PyObject *slice[BATCHSIZE];
1504 int i, n;
1505
1506 const char mark_op = MARK;
1507 const char setitem_op = SETITEM;
1508 const char setitems_op = SETITEMS;
1509
1510 assert(iter != NULL);
1511
1512 if (self->proto == 0) {
1513 /* SETITEMS isn't available; do one at a time. */
1514 for (;;) {
1515 obj = PyIter_Next(iter);
1516 if (obj == NULL) {
1517 if (PyErr_Occurred())
1518 return -1;
1519 break;
1520 }
1521 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
1522 PyErr_SetString(PyExc_TypeError, "dict items "
1523 "iterator must return 2-tuples");
1524 return -1;
1525 }
1526 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
1527 if (i >= 0)
1528 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
1529 Py_DECREF(obj);
1530 if (i < 0)
1531 return -1;
1532 if (pickler_write(self, &setitem_op, 1) < 0)
1533 return -1;
1534 }
1535 return 0;
1536 }
1537
1538 /* proto > 0: write in batches of BATCHSIZE. */
1539 do {
1540 /* Get next group of (no more than) BATCHSIZE elements. */
1541 for (n = 0; n < BATCHSIZE; n++) {
1542 obj = PyIter_Next(iter);
1543 if (obj == NULL) {
1544 if (PyErr_Occurred())
1545 goto error;
1546 break;
1547 }
1548 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
1549 PyErr_SetString(PyExc_TypeError, "dict items "
1550 "iterator must return 2-tuples");
1551 goto error;
1552 }
1553 slice[n] = obj;
1554 }
1555
1556 if (n > 1) {
1557 /* Pump out MARK, slice[0:n], SETITEMS. */
1558 if (pickler_write(self, &mark_op, 1) < 0)
1559 goto error;
1560 for (i = 0; i < n; i++) {
1561 obj = slice[i];
1562 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
1563 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
1564 goto error;
1565 }
1566 if (pickler_write(self, &setitems_op, 1) < 0)
1567 goto error;
1568 }
1569 else if (n == 1) {
1570 obj = slice[0];
1571 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
1572 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0 ||
1573 pickler_write(self, &setitem_op, 1) < 0)
1574 goto error;
1575 }
1576
1577 for (i = 0; i < n; i++) {
1578 Py_DECREF(slice[i]);
1579 }
1580 } while (n == BATCHSIZE);
1581 return 0;
1582
1583 error:
1584 while (--n >= 0) {
1585 Py_DECREF(slice[n]);
1586 }
1587 return -1;
1588}
1589
1590static int
1591save_dict(PicklerObject *self, PyObject *obj)
1592{
1593 PyObject *items, *iter;
1594 char header[3];
1595 int len;
1596 int status = 0;
1597
1598 if (self->fast && !fast_save_enter(self, obj))
1599 goto error;
1600
1601 /* Create an empty dict. */
1602 if (self->bin) {
1603 header[0] = EMPTY_DICT;
1604 len = 1;
1605 }
1606 else {
1607 header[0] = MARK;
1608 header[1] = DICT;
1609 len = 2;
1610 }
1611
1612 if (pickler_write(self, header, len) < 0)
1613 goto error;
1614
1615 /* Get dict size, and bow out early if empty. */
1616 if ((len = PyDict_Size(obj)) < 0)
1617 goto error;
1618
1619 if (memo_put(self, obj) < 0)
1620 goto error;
1621
1622 if (len != 0) {
1623 /* Save the dict items. */
1624 items = PyObject_CallMethod(obj, "items", "()");
1625 if (items == NULL)
1626 goto error;
1627 iter = PyObject_GetIter(items);
1628 Py_DECREF(items);
1629 if (iter == NULL)
1630 goto error;
1631 status = batch_dict(self, iter);
1632 Py_DECREF(iter);
1633 }
1634
1635 if (0) {
1636 error:
1637 status = -1;
1638 }
1639
1640 if (self->fast && !fast_save_leave(self, obj))
1641 status = -1;
1642
1643 return status;
1644}
1645
1646static int
1647save_global(PicklerObject *self, PyObject *obj, PyObject *name)
1648{
1649 static PyObject *name_str = NULL;
1650 PyObject *global_name = NULL;
1651 PyObject *module_name = NULL;
1652 PyObject *module = NULL;
1653 PyObject *cls;
1654 int status = 0;
1655
1656 const char global_op = GLOBAL;
1657
1658 if (name_str == NULL) {
1659 name_str = PyUnicode_InternFromString("__name__");
1660 if (name_str == NULL)
1661 goto error;
1662 }
1663
1664 if (name) {
1665 global_name = name;
1666 Py_INCREF(global_name);
1667 }
1668 else {
1669 global_name = PyObject_GetAttr(obj, name_str);
1670 if (global_name == NULL)
1671 goto error;
1672 }
1673
1674 module_name = whichmodule(obj, global_name);
1675 if (module_name == NULL)
1676 goto error;
1677
1678 /* XXX: Change to use the import C API directly with level=0 to disallow
1679 relative imports.
1680
1681 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
1682 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
1683 custom import functions (IMHO, this would be a nice security
1684 feature). The import C API would need to be extended to support the
1685 extra parameters of __import__ to fix that. */
1686 module = PyImport_Import(module_name);
1687 if (module == NULL) {
1688 PyErr_Format(PicklingError,
1689 "Can't pickle %R: import of module %R failed",
1690 obj, module_name);
1691 goto error;
1692 }
1693 cls = PyObject_GetAttr(module, global_name);
1694 if (cls == NULL) {
1695 PyErr_Format(PicklingError,
1696 "Can't pickle %R: attribute lookup %S.%S failed",
1697 obj, module_name, global_name);
1698 goto error;
1699 }
1700 if (cls != obj) {
1701 Py_DECREF(cls);
1702 PyErr_Format(PicklingError,
1703 "Can't pickle %R: it's not the same object as %S.%S",
1704 obj, module_name, global_name);
1705 goto error;
1706 }
1707 Py_DECREF(cls);
1708
1709 if (self->proto >= 2) {
1710 /* See whether this is in the extension registry, and if
1711 * so generate an EXT opcode.
1712 */
1713 PyObject *code_obj; /* extension code as Python object */
1714 long code; /* extension code as C value */
1715 char pdata[5];
1716 int n;
1717
1718 PyTuple_SET_ITEM(two_tuple, 0, module_name);
1719 PyTuple_SET_ITEM(two_tuple, 1, global_name);
1720 code_obj = PyDict_GetItem(extension_registry, two_tuple);
1721 /* The object is not registered in the extension registry.
1722 This is the most likely code path. */
1723 if (code_obj == NULL)
1724 goto gen_global;
1725
1726 /* XXX: pickle.py doesn't check neither the type, nor the range
1727 of the value returned by the extension_registry. It should for
1728 consistency. */
1729
1730 /* Verify code_obj has the right type and value. */
1731 if (!PyLong_Check(code_obj)) {
1732 PyErr_Format(PicklingError,
1733 "Can't pickle %R: extension code %R isn't an integer",
1734 obj, code_obj);
1735 goto error;
1736 }
1737 code = PyLong_AS_LONG(code_obj);
1738 if (code <= 0 || code > 0x7fffffffL) {
1739 PyErr_Format(PicklingError,
1740 "Can't pickle %R: extension code %ld is out of range",
1741 obj, code);
1742 goto error;
1743 }
1744
1745 /* Generate an EXT opcode. */
1746 if (code <= 0xff) {
1747 pdata[0] = EXT1;
1748 pdata[1] = (unsigned char)code;
1749 n = 2;
1750 }
1751 else if (code <= 0xffff) {
1752 pdata[0] = EXT2;
1753 pdata[1] = (unsigned char)(code & 0xff);
1754 pdata[2] = (unsigned char)((code >> 8) & 0xff);
1755 n = 3;
1756 }
1757 else {
1758 pdata[0] = EXT4;
1759 pdata[1] = (unsigned char)(code & 0xff);
1760 pdata[2] = (unsigned char)((code >> 8) & 0xff);
1761 pdata[3] = (unsigned char)((code >> 16) & 0xff);
1762 pdata[4] = (unsigned char)((code >> 24) & 0xff);
1763 n = 5;
1764 }
1765
1766 if (pickler_write(self, pdata, n) < 0)
1767 goto error;
1768 }
1769 else {
1770 /* Generate a normal global opcode if we are using a pickle
1771 protocol <= 2, or if the object is not registered in the
1772 extension registry. */
1773 PyObject *encoded;
1774 PyObject *(*unicode_encoder)(PyObject *);
1775
1776 gen_global:
1777 if (pickler_write(self, &global_op, 1) < 0)
1778 goto error;
1779
1780 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
1781 the module name and the global name using UTF-8. We do so only when
1782 we are using the pickle protocol newer than version 3. This is to
1783 ensure compatibility with older Unpickler running on Python 2.x. */
1784 if (self->proto >= 3) {
1785 unicode_encoder = PyUnicode_AsUTF8String;
1786 }
1787 else {
1788 unicode_encoder = PyUnicode_AsASCIIString;
1789 }
1790
1791 /* Save the name of the module. */
1792 encoded = unicode_encoder(module_name);
1793 if (encoded == NULL) {
1794 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
1795 PyErr_Format(PicklingError,
1796 "can't pickle module identifier '%S' using "
1797 "pickle protocol %i", module_name, self->proto);
1798 goto error;
1799 }
1800 if (pickler_write(self, PyBytes_AS_STRING(encoded),
1801 PyBytes_GET_SIZE(encoded)) < 0) {
1802 Py_DECREF(encoded);
1803 goto error;
1804 }
1805 Py_DECREF(encoded);
1806 if(pickler_write(self, "\n", 1) < 0)
1807 goto error;
1808
1809 /* Save the name of the module. */
1810 encoded = unicode_encoder(global_name);
1811 if (encoded == NULL) {
1812 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
1813 PyErr_Format(PicklingError,
1814 "can't pickle global identifier '%S' using "
1815 "pickle protocol %i", global_name, self->proto);
1816 goto error;
1817 }
1818 if (pickler_write(self, PyBytes_AS_STRING(encoded),
1819 PyBytes_GET_SIZE(encoded)) < 0) {
1820 Py_DECREF(encoded);
1821 goto error;
1822 }
1823 Py_DECREF(encoded);
1824 if(pickler_write(self, "\n", 1) < 0)
1825 goto error;
1826
1827 /* Memoize the object. */
1828 if (memo_put(self, obj) < 0)
1829 goto error;
1830 }
1831
1832 if (0) {
1833 error:
1834 status = -1;
1835 }
1836 Py_XDECREF(module_name);
1837 Py_XDECREF(global_name);
1838 Py_XDECREF(module);
1839
1840 return status;
1841}
1842
1843static int
1844save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
1845{
1846 PyObject *pid = NULL;
1847 int status = 0;
1848
1849 const char persid_op = PERSID;
1850 const char binpersid_op = BINPERSID;
1851
1852 Py_INCREF(obj);
1853 pid = pickler_call(self, func, obj);
1854 if (pid == NULL)
1855 return -1;
1856
1857 if (pid != Py_None) {
1858 if (self->bin) {
1859 if (save(self, pid, 1) < 0 ||
1860 pickler_write(self, &binpersid_op, 1) < 0)
1861 goto error;
1862 }
1863 else {
1864 PyObject *pid_str = NULL;
1865 char *pid_ascii_bytes;
1866 Py_ssize_t size;
1867
1868 pid_str = PyObject_Str(pid);
1869 if (pid_str == NULL)
1870 goto error;
1871
1872 /* XXX: Should it check whether the persistent id only contains
1873 ASCII characters? And what if the pid contains embedded
1874 newlines? */
1875 pid_ascii_bytes = PyUnicode_AsStringAndSize(pid_str, &size);
1876 Py_DECREF(pid_str);
1877 if (pid_ascii_bytes == NULL)
1878 goto error;
1879
1880 if (pickler_write(self, &persid_op, 1) < 0 ||
1881 pickler_write(self, pid_ascii_bytes, size) < 0 ||
1882 pickler_write(self, "\n", 1) < 0)
1883 goto error;
1884 }
1885 status = 1;
1886 }
1887
1888 if (0) {
1889 error:
1890 status = -1;
1891 }
1892 Py_XDECREF(pid);
1893
1894 return status;
1895}
1896
1897/* We're saving obj, and args is the 2-thru-5 tuple returned by the
1898 * appropriate __reduce__ method for obj.
1899 */
1900static int
1901save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
1902{
1903 PyObject *callable;
1904 PyObject *argtup;
1905 PyObject *state = NULL;
1906 PyObject *listitems = NULL;
1907 PyObject *dictitems = NULL;
1908
1909 int use_newobj = self->proto >= 2;
1910
1911 const char reduce_op = REDUCE;
1912 const char build_op = BUILD;
1913 const char newobj_op = NEWOBJ;
1914
1915 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
1916 &callable, &argtup, &state, &listitems, &dictitems))
1917 return -1;
1918
1919 if (!PyCallable_Check(callable)) {
1920 PyErr_SetString(PicklingError,
1921 "first argument of save_reduce() must be callable");
1922 return -1;
1923 }
1924 if (!PyTuple_Check(argtup)) {
1925 PyErr_SetString(PicklingError,
1926 "second argument of save_reduce() must be a tuple");
1927 return -1;
1928 }
1929
1930 if (state == Py_None)
1931 state = NULL;
1932 if (listitems == Py_None)
1933 listitems = NULL;
1934 if (dictitems == Py_None)
1935 dictitems = NULL;
1936
1937 /* Protocol 2 special case: if callable's name is __newobj__, use
1938 NEWOBJ. */
1939 if (use_newobj) {
1940 static PyObject *newobj_str = NULL;
1941 PyObject *name_str;
1942
1943 if (newobj_str == NULL) {
1944 newobj_str = PyUnicode_InternFromString("__newobj__");
1945 }
1946
1947 name_str = PyObject_GetAttrString(callable, "__name__");
1948 if (name_str == NULL) {
1949 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1950 PyErr_Clear();
1951 else
1952 return -1;
1953 use_newobj = 0;
1954 }
1955 else {
1956 use_newobj = PyUnicode_Check(name_str) &&
1957 PyUnicode_Compare(name_str, newobj_str) == 0;
1958 Py_DECREF(name_str);
1959 }
1960 }
1961 if (use_newobj) {
1962 PyObject *cls;
1963 PyObject *newargtup;
1964 PyObject *obj_class;
1965 int p;
1966
1967 /* Sanity checks. */
1968 if (Py_SIZE(argtup) < 1) {
1969 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
1970 return -1;
1971 }
1972
1973 cls = PyTuple_GET_ITEM(argtup, 0);
1974 if (!PyObject_HasAttrString(cls, "__new__")) {
1975 PyErr_SetString(PicklingError, "args[0] from "
1976 "__newobj__ args has no __new__");
1977 return -1;
1978 }
1979
1980 if (obj != NULL) {
1981 obj_class = PyObject_GetAttrString(obj, "__class__");
1982 if (obj_class == NULL) {
1983 if (PyErr_ExceptionMatches(PyExc_AttributeError))
1984 PyErr_Clear();
1985 else
1986 return -1;
1987 }
1988 p = obj_class != cls; /* true iff a problem */
1989 Py_DECREF(obj_class);
1990 if (p) {
1991 PyErr_SetString(PicklingError, "args[0] from "
1992 "__newobj__ args has the wrong class");
1993 return -1;
1994 }
1995 }
1996 /* XXX: These calls save() are prone to infinite recursion. Imagine
1997 what happen if the value returned by the __reduce__() method of
1998 some extension type contains another object of the same type. Ouch!
1999
2000 Here is a quick example, that I ran into, to illustrate what I
2001 mean:
2002
2003 >>> import pickle, copyreg
2004 >>> copyreg.dispatch_table.pop(complex)
2005 >>> pickle.dumps(1+2j)
2006 Traceback (most recent call last):
2007 ...
2008 RuntimeError: maximum recursion depth exceeded
2009
2010 Removing the complex class from copyreg.dispatch_table made the
2011 __reduce_ex__() method emit another complex object:
2012
2013 >>> (1+1j).__reduce_ex__(2)
2014 (<function __newobj__ at 0xb7b71c3c>,
2015 (<class 'complex'>, (1+1j)), None, None, None)
2016
2017 Thus when save() was called on newargstup (the 2nd item) recursion
2018 ensued. Of course, the bug was in the complex class which had a
2019 broken __getnewargs__() that emitted another complex object. But,
2020 the point, here, is it is quite easy to end up with a broken reduce
2021 function. */
2022
2023 /* Save the class and its __new__ arguments. */
2024 if (save(self, cls, 0) < 0)
2025 return -1;
2026
2027 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
2028 if (newargtup == NULL)
2029 return -1;
2030
2031 p = save(self, newargtup, 0);
2032 Py_DECREF(newargtup);
2033 if (p < 0)
2034 return -1;
2035
2036 /* Add NEWOBJ opcode. */
2037 if (pickler_write(self, &newobj_op, 1) < 0)
2038 return -1;
2039 }
2040 else { /* Not using NEWOBJ. */
2041 if (save(self, callable, 0) < 0 ||
2042 save(self, argtup, 0) < 0 ||
2043 pickler_write(self, &reduce_op, 1) < 0)
2044 return -1;
2045 }
2046
2047 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
2048 the caller do not want to memoize the object. Not particularly useful,
2049 but that is to mimic the behavior save_reduce() in pickle.py when
2050 obj is None. */
2051 if (obj && memo_put(self, obj) < 0)
2052 return -1;
2053
2054 if (listitems && batch_list(self, listitems) < 0)
2055 return -1;
2056
2057 if (dictitems && batch_dict(self, dictitems) < 0)
2058 return -1;
2059
2060 if (state) {
2061 if (save(self, state, 0) < 0 ||
2062 pickler_write(self, &build_op, 1) < 0)
2063 return -1;
2064 }
2065
2066 return 0;
2067}
2068
2069static int
2070save(PicklerObject *self, PyObject *obj, int pers_save)
2071{
2072 PyTypeObject *type;
2073 PyObject *reduce_func = NULL;
2074 PyObject *reduce_value = NULL;
2075 PyObject *memo_key = NULL;
2076 int status = 0;
2077
2078 /* XXX: Use Py_EnterRecursiveCall()? */
2079 if (++self->nesting > Py_GetRecursionLimit()) {
2080 PyErr_SetString(PyExc_RuntimeError,
2081 "maximum recursion depth exceeded");
2082 goto error;
2083 }
2084
2085 /* The extra pers_save argument is necessary to avoid calling save_pers()
2086 on its returned object. */
2087 if (!pers_save && self->pers_func) {
2088 /* save_pers() returns:
2089 -1 to signal an error;
2090 0 if it did nothing successfully;
2091 1 if a persistent id was saved.
2092 */
2093 if ((status = save_pers(self, obj, self->pers_func)) != 0)
2094 goto done;
2095 }
2096
2097 type = Py_TYPE(obj);
2098
2099 /* XXX: The old cPickle had an optimization that used switch-case
2100 statement dispatching on the first letter of the type name. It was
2101 probably not a bad idea after all. If benchmarks shows that particular
2102 optimization had some real benefits, it would be nice to add it
2103 back. */
2104
2105 /* Atom types; these aren't memoized, so don't check the memo. */
2106
2107 if (obj == Py_None) {
2108 status = save_none(self, obj);
2109 goto done;
2110 }
2111 else if (obj == Py_False || obj == Py_True) {
2112 status = save_bool(self, obj);
2113 goto done;
2114 }
2115 else if (type == &PyLong_Type) {
2116 status = save_long(self, obj);
2117 goto done;
2118 }
2119 else if (type == &PyFloat_Type) {
2120 status = save_float(self, obj);
2121 goto done;
2122 }
2123
2124 /* Check the memo to see if it has the object. If so, generate
2125 a GET (or BINGET) opcode, instead of pickling the object
2126 once again. */
2127 memo_key = PyLong_FromVoidPtr(obj);
2128 if (memo_key == NULL)
2129 goto error;
2130 if (PyDict_GetItem(self->memo, memo_key)) {
2131 if (memo_get(self, memo_key) < 0)
2132 goto error;
2133 goto done;
2134 }
2135
2136 if (type == &PyBytes_Type) {
2137 status = save_bytes(self, obj);
2138 goto done;
2139 }
2140 else if (type == &PyUnicode_Type) {
2141 status = save_unicode(self, obj);
2142 goto done;
2143 }
2144 else if (type == &PyDict_Type) {
2145 status = save_dict(self, obj);
2146 goto done;
2147 }
2148 else if (type == &PyList_Type) {
2149 status = save_list(self, obj);
2150 goto done;
2151 }
2152 else if (type == &PyTuple_Type) {
2153 status = save_tuple(self, obj);
2154 goto done;
2155 }
2156 else if (type == &PyType_Type) {
2157 status = save_global(self, obj, NULL);
2158 goto done;
2159 }
2160 else if (type == &PyFunction_Type) {
2161 status = save_global(self, obj, NULL);
2162 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
2163 /* fall back to reduce */
2164 PyErr_Clear();
2165 }
2166 else {
2167 goto done;
2168 }
2169 }
2170 else if (type == &PyCFunction_Type) {
2171 status = save_global(self, obj, NULL);
2172 goto done;
2173 }
2174 else if (PyType_IsSubtype(type, &PyType_Type)) {
2175 status = save_global(self, obj, NULL);
2176 goto done;
2177 }
2178
2179 /* XXX: This part needs some unit tests. */
2180
2181 /* Get a reduction callable, and call it. This may come from
2182 * copyreg.dispatch_table, the object's __reduce_ex__ method,
2183 * or the object's __reduce__ method.
2184 */
2185 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
2186 if (reduce_func != NULL) {
2187 /* Here, the reference count of the reduce_func object returned by
2188 PyDict_GetItem needs to be increased to be consistent with the one
2189 returned by PyObject_GetAttr. This is allow us to blindly DECREF
2190 reduce_func at the end of the save() routine.
2191 */
2192 Py_INCREF(reduce_func);
2193 Py_INCREF(obj);
2194 reduce_value = pickler_call(self, reduce_func, obj);
2195 }
2196 else {
2197 static PyObject *reduce_str = NULL;
2198 static PyObject *reduce_ex_str = NULL;
2199
2200 /* Cache the name of the reduce methods. */
2201 if (reduce_str == NULL) {
2202 reduce_str = PyUnicode_InternFromString("__reduce__");
2203 if (reduce_str == NULL)
2204 goto error;
2205 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
2206 if (reduce_ex_str == NULL)
2207 goto error;
2208 }
2209
2210 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
2211 automatically defined as __reduce__. While this is convenient, this
2212 make it impossible to know which method was actually called. Of
2213 course, this is not a big deal. But still, it would be nice to let
2214 the user know which method was called when something go
2215 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
2216 don't actually have to check for a __reduce__ method. */
2217
2218 /* Check for a __reduce_ex__ method. */
2219 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
2220 if (reduce_func != NULL) {
2221 PyObject *proto;
2222 proto = PyLong_FromLong(self->proto);
2223 if (proto != NULL) {
2224 reduce_value = pickler_call(self, reduce_func, proto);
2225 }
2226 }
2227 else {
2228 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2229 PyErr_Clear();
2230 else
2231 goto error;
2232 /* Check for a __reduce__ method. */
2233 reduce_func = PyObject_GetAttr(obj, reduce_str);
2234 if (reduce_func != NULL) {
2235 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
2236 }
2237 else {
2238 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
2239 type->tp_name, obj);
2240 goto error;
2241 }
2242 }
2243 }
2244
2245 if (reduce_value == NULL)
2246 goto error;
2247
2248 if (PyUnicode_Check(reduce_value)) {
2249 status = save_global(self, obj, reduce_value);
2250 goto done;
2251 }
2252
2253 if (!PyTuple_Check(reduce_value)) {
2254 PyErr_SetString(PicklingError,
2255 "__reduce__ must return a string or tuple");
2256 goto error;
2257 }
2258 if (Py_SIZE(reduce_value) < 2 || Py_SIZE(reduce_value) > 5) {
2259 PyErr_SetString(PicklingError, "tuple returned by __reduce__ "
2260 "must contain 2 through 5 elements");
2261 goto error;
2262 }
2263 if (!PyTuple_Check(PyTuple_GET_ITEM(reduce_value, 1))) {
2264 PyErr_SetString(PicklingError, "second item of the tuple "
2265 "returned by __reduce__ must be a tuple");
2266 goto error;
2267 }
2268
2269 status = save_reduce(self, reduce_value, obj);
2270
2271 if (0) {
2272 error:
2273 status = -1;
2274 }
2275 done:
2276 self->nesting--;
2277 Py_XDECREF(memo_key);
2278 Py_XDECREF(reduce_func);
2279 Py_XDECREF(reduce_value);
2280
2281 return status;
2282}
2283
2284static int
2285dump(PicklerObject *self, PyObject *obj)
2286{
2287 const char stop_op = STOP;
2288
2289 if (self->proto >= 2) {
2290 char header[2];
2291
2292 header[0] = PROTO;
2293 assert(self->proto >= 0 && self->proto < 256);
2294 header[1] = (unsigned char)self->proto;
2295 if (pickler_write(self, header, 2) < 0)
2296 return -1;
2297 }
2298
2299 if (save(self, obj, 0) < 0 ||
2300 pickler_write(self, &stop_op, 1) < 0 ||
2301 pickler_write(self, NULL, 0) < 0)
2302 return -1;
2303
2304 return 0;
2305}
2306
2307PyDoc_STRVAR(Pickler_clear_memo_doc,
2308"clear_memo() -> None. Clears the pickler's \"memo\"."
2309"\n"
2310"The memo is the data structure that remembers which objects the\n"
2311"pickler has already seen, so that shared or recursive objects are\n"
2312"pickled by reference and not by value. This method is useful when\n"
2313"re-using picklers.");
2314
2315static PyObject *
2316Pickler_clear_memo(PicklerObject *self)
2317{
2318 if (self->memo)
2319 PyDict_Clear(self->memo);
2320
2321 Py_RETURN_NONE;
2322}
2323
2324PyDoc_STRVAR(Pickler_dump_doc,
2325"dump(obj) -> None. Write a pickled representation of obj to the open file.");
2326
2327static PyObject *
2328Pickler_dump(PicklerObject *self, PyObject *args)
2329{
2330 PyObject *obj;
2331
2332 if (!PyArg_ParseTuple(args, "O:dump", &obj))
2333 return NULL;
2334
2335 if (dump(self, obj) < 0)
2336 return NULL;
2337
2338 Py_RETURN_NONE;
2339}
2340
2341static struct PyMethodDef Pickler_methods[] = {
2342 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
2343 Pickler_dump_doc},
2344 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
2345 Pickler_clear_memo_doc},
2346 {NULL, NULL} /* sentinel */
2347};
2348
2349static void
2350Pickler_dealloc(PicklerObject *self)
2351{
2352 PyObject_GC_UnTrack(self);
2353
2354 Py_XDECREF(self->write);
2355 Py_XDECREF(self->memo);
2356 Py_XDECREF(self->pers_func);
2357 Py_XDECREF(self->arg);
2358 Py_XDECREF(self->fast_memo);
2359
2360 PyMem_Free(self->write_buf);
2361
2362 Py_TYPE(self)->tp_free((PyObject *)self);
2363}
2364
2365static int
2366Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
2367{
2368 Py_VISIT(self->write);
2369 Py_VISIT(self->memo);
2370 Py_VISIT(self->pers_func);
2371 Py_VISIT(self->arg);
2372 Py_VISIT(self->fast_memo);
2373 return 0;
2374}
2375
2376static int
2377Pickler_clear(PicklerObject *self)
2378{
2379 Py_CLEAR(self->write);
2380 Py_CLEAR(self->memo);
2381 Py_CLEAR(self->pers_func);
2382 Py_CLEAR(self->arg);
2383 Py_CLEAR(self->fast_memo);
2384
2385 PyMem_Free(self->write_buf);
2386 self->write_buf = NULL;
2387
2388 return 0;
2389}
2390
2391PyDoc_STRVAR(Pickler_doc,
2392"Pickler(file, protocol=None)"
2393"\n"
2394"This takes a binary file for writing a pickle data stream.\n"
2395"\n"
2396"The optional protocol argument tells the pickler to use the\n"
2397"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
2398"protocol is 3; a backward-incompatible protocol designed for\n"
2399"Python 3.0.\n"
2400"\n"
2401"Specifying a negative protocol version selects the highest\n"
2402"protocol version supported. The higher the protocol used, the\n"
2403"more recent the version of Python needed to read the pickle\n"
2404"produced.\n"
2405"\n"
2406"The file argument must have a write() method that accepts a single\n"
2407"bytes argument. It can thus be a file object opened for binary\n"
2408"writing, a io.BytesIO instance, or any other custom object that\n"
2409"meets this interface.\n");
2410
2411static int
2412Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
2413{
2414 static char *kwlist[] = {"file", "protocol", 0};
2415 PyObject *file;
2416 PyObject *proto_obj = NULL;
2417 long proto = 0;
2418
2419 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:Pickler",
2420 kwlist, &file, &proto_obj))
2421 return -1;
2422
2423 /* In case of multiple __init__() calls, clear previous content. */
2424 if (self->write != NULL)
2425 (void)Pickler_clear(self);
2426
2427 if (proto_obj == NULL || proto_obj == Py_None)
2428 proto = DEFAULT_PROTOCOL;
2429 else
2430 proto = PyLong_AsLong(proto_obj);
2431
2432 if (proto < 0)
2433 proto = HIGHEST_PROTOCOL;
2434 if (proto > HIGHEST_PROTOCOL) {
2435 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
2436 HIGHEST_PROTOCOL);
2437 return -1;
2438 }
2439
2440 self->proto = proto;
2441 self->bin = proto > 0;
2442 self->arg = NULL;
2443 self->nesting = 0;
2444 self->fast = 0;
2445 self->fast_nesting = 0;
2446 self->fast_memo = NULL;
2447
2448 if (!PyObject_HasAttrString(file, "write")) {
2449 PyErr_SetString(PyExc_TypeError,
2450 "file must have a 'write' attribute");
2451 return -1;
2452 }
2453 self->write = PyObject_GetAttrString(file, "write");
2454 if (self->write == NULL)
2455 return -1;
2456 self->buf_size = 0;
2457 self->write_buf = (char *)PyMem_Malloc(WRITE_BUF_SIZE);
2458 if (self->write_buf == NULL) {
2459 PyErr_NoMemory();
2460 return -1;
2461 }
2462 self->pers_func = NULL;
2463 if (PyObject_HasAttrString((PyObject *)self, "persistent_id")) {
2464 self->pers_func = PyObject_GetAttrString((PyObject *)self,
2465 "persistent_id");
2466 if (self->pers_func == NULL)
2467 return -1;
2468 }
2469 self->memo = PyDict_New();
2470 if (self->memo == NULL)
2471 return -1;
2472
2473 return 0;
2474}
2475
2476static PyObject *
2477Pickler_get_memo(PicklerObject *self)
2478{
2479 if (self->memo == NULL)
2480 PyErr_SetString(PyExc_AttributeError, "memo");
2481 else
2482 Py_INCREF(self->memo);
2483 return self->memo;
2484}
2485
2486static int
2487Pickler_set_memo(PicklerObject *self, PyObject *value)
2488{
2489 PyObject *tmp;
2490
2491 if (value == NULL) {
2492 PyErr_SetString(PyExc_TypeError,
2493 "attribute deletion is not supported");
2494 return -1;
2495 }
2496 if (!PyDict_Check(value)) {
2497 PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
2498 return -1;
2499 }
2500
2501 tmp = self->memo;
2502 Py_INCREF(value);
2503 self->memo = value;
2504 Py_XDECREF(tmp);
2505
2506 return 0;
2507}
2508
2509static PyObject *
2510Pickler_get_persid(PicklerObject *self)
2511{
2512 if (self->pers_func == NULL)
2513 PyErr_SetString(PyExc_AttributeError, "persistent_id");
2514 else
2515 Py_INCREF(self->pers_func);
2516 return self->pers_func;
2517}
2518
2519static int
2520Pickler_set_persid(PicklerObject *self, PyObject *value)
2521{
2522 PyObject *tmp;
2523
2524 if (value == NULL) {
2525 PyErr_SetString(PyExc_TypeError,
2526 "attribute deletion is not supported");
2527 return -1;
2528 }
2529 if (!PyCallable_Check(value)) {
2530 PyErr_SetString(PyExc_TypeError,
2531 "persistent_id must be a callable taking one argument");
2532 return -1;
2533 }
2534
2535 tmp = self->pers_func;
2536 Py_INCREF(value);
2537 self->pers_func = value;
2538 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
2539
2540 return 0;
2541}
2542
2543static PyMemberDef Pickler_members[] = {
2544 {"bin", T_INT, offsetof(PicklerObject, bin)},
2545 {"fast", T_INT, offsetof(PicklerObject, fast)},
2546 {NULL}
2547};
2548
2549static PyGetSetDef Pickler_getsets[] = {
2550 {"memo", (getter)Pickler_get_memo,
2551 (setter)Pickler_set_memo},
2552 {"persistent_id", (getter)Pickler_get_persid,
2553 (setter)Pickler_set_persid},
2554 {NULL}
2555};
2556
2557static PyTypeObject Pickler_Type = {
2558 PyVarObject_HEAD_INIT(NULL, 0)
2559 "_pickle.Pickler" , /*tp_name*/
2560 sizeof(PicklerObject), /*tp_basicsize*/
2561 0, /*tp_itemsize*/
2562 (destructor)Pickler_dealloc, /*tp_dealloc*/
2563 0, /*tp_print*/
2564 0, /*tp_getattr*/
2565 0, /*tp_setattr*/
2566 0, /*tp_compare*/
2567 0, /*tp_repr*/
2568 0, /*tp_as_number*/
2569 0, /*tp_as_sequence*/
2570 0, /*tp_as_mapping*/
2571 0, /*tp_hash*/
2572 0, /*tp_call*/
2573 0, /*tp_str*/
2574 0, /*tp_getattro*/
2575 0, /*tp_setattro*/
2576 0, /*tp_as_buffer*/
2577 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2578 Pickler_doc, /*tp_doc*/
2579 (traverseproc)Pickler_traverse, /*tp_traverse*/
2580 (inquiry)Pickler_clear, /*tp_clear*/
2581 0, /*tp_richcompare*/
2582 0, /*tp_weaklistoffset*/
2583 0, /*tp_iter*/
2584 0, /*tp_iternext*/
2585 Pickler_methods, /*tp_methods*/
2586 Pickler_members, /*tp_members*/
2587 Pickler_getsets, /*tp_getset*/
2588 0, /*tp_base*/
2589 0, /*tp_dict*/
2590 0, /*tp_descr_get*/
2591 0, /*tp_descr_set*/
2592 0, /*tp_dictoffset*/
2593 (initproc)Pickler_init, /*tp_init*/
2594 PyType_GenericAlloc, /*tp_alloc*/
2595 PyType_GenericNew, /*tp_new*/
2596 PyObject_GC_Del, /*tp_free*/
2597 0, /*tp_is_gc*/
2598};
2599
2600/* Temporary helper for calling self.find_class().
2601
2602 XXX: It would be nice to able to avoid Python function call overhead, by
2603 using directly the C version of find_class(), when find_class() is not
2604 overridden by a subclass. Although, this could become rather hackish. A
2605 simpler optimization would be to call the C function when self is not a
2606 subclass instance. */
2607static PyObject *
2608find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
2609{
2610 return PyObject_CallMethod((PyObject *)self, "find_class", "OO",
2611 module_name, global_name);
2612}
2613
2614static int
2615marker(UnpicklerObject *self)
2616{
2617 if (self->num_marks < 1) {
2618 PyErr_SetString(UnpicklingError, "could not find MARK");
2619 return -1;
2620 }
2621
2622 return self->marks[--self->num_marks];
2623}
2624
2625static int
2626load_none(UnpicklerObject *self)
2627{
2628 PDATA_APPEND(self->stack, Py_None, -1);
2629 return 0;
2630}
2631
2632static int
2633bad_readline(void)
2634{
2635 PyErr_SetString(UnpicklingError, "pickle data was truncated");
2636 return -1;
2637}
2638
2639static int
2640load_int(UnpicklerObject *self)
2641{
2642 PyObject *value;
2643 char *endptr, *s;
2644 Py_ssize_t len;
2645 long x;
2646
2647 if ((len = unpickler_readline(self, &s)) < 0)
2648 return -1;
2649 if (len < 2)
2650 return bad_readline();
2651
2652 errno = 0;
2653 /* XXX: Should the base argument of strtol() be explicitly set to 10? */
2654 x = strtol(s, &endptr, 0);
2655
2656 if (errno || (*endptr != '\n') || (endptr[1] != '\0')) {
2657 /* Hm, maybe we've got something long. Let's try reading
2658 * it as a Python long object. */
2659 errno = 0;
2660 /* XXX: Same thing about the base here. */
2661 value = PyLong_FromString(s, NULL, 0);
2662 if (value == NULL) {
2663 PyErr_SetString(PyExc_ValueError,
2664 "could not convert string to int");
2665 return -1;
2666 }
2667 }
2668 else {
2669 if (len == 3 && (x == 0 || x == 1)) {
2670 if ((value = PyBool_FromLong(x)) == NULL)
2671 return -1;
2672 }
2673 else {
2674 if ((value = PyLong_FromLong(x)) == NULL)
2675 return -1;
2676 }
2677 }
2678
2679 PDATA_PUSH(self->stack, value, -1);
2680 return 0;
2681}
2682
2683static int
2684load_bool(UnpicklerObject *self, PyObject *boolean)
2685{
2686 assert(boolean == Py_True || boolean == Py_False);
2687 PDATA_APPEND(self->stack, boolean, -1);
2688 return 0;
2689}
2690
2691/* s contains x bytes of a little-endian integer. Return its value as a
2692 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
2693 * int, but when x is 4 it's a signed one. This is an historical source
2694 * of x-platform bugs.
2695 */
2696static long
2697calc_binint(char *bytes, int size)
2698{
2699 unsigned char *s = (unsigned char *)bytes;
2700 int i = size;
2701 long x = 0;
2702
2703 for (i = 0; i < size; i++) {
2704 x |= (long)s[i] << (i * 8);
2705 }
2706
2707 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
2708 * is signed, so on a box with longs bigger than 4 bytes we need
2709 * to extend a BININT's sign bit to the full width.
2710 */
2711 if (SIZEOF_LONG > 4 && size == 4) {
2712 x |= -(x & (1L << 31));
2713 }
2714
2715 return x;
2716}
2717
2718static int
2719load_binintx(UnpicklerObject *self, char *s, int size)
2720{
2721 PyObject *value;
2722 long x;
2723
2724 x = calc_binint(s, size);
2725
2726 if ((value = PyLong_FromLong(x)) == NULL)
2727 return -1;
2728
2729 PDATA_PUSH(self->stack, value, -1);
2730 return 0;
2731}
2732
2733static int
2734load_binint(UnpicklerObject *self)
2735{
2736 char *s;
2737
2738 if (unpickler_read(self, &s, 4) < 0)
2739 return -1;
2740
2741 return load_binintx(self, s, 4);
2742}
2743
2744static int
2745load_binint1(UnpicklerObject *self)
2746{
2747 char *s;
2748
2749 if (unpickler_read(self, &s, 1) < 0)
2750 return -1;
2751
2752 return load_binintx(self, s, 1);
2753}
2754
2755static int
2756load_binint2(UnpicklerObject *self)
2757{
2758 char *s;
2759
2760 if (unpickler_read(self, &s, 2) < 0)
2761 return -1;
2762
2763 return load_binintx(self, s, 2);
2764}
2765
2766static int
2767load_long(UnpicklerObject *self)
2768{
2769 PyObject *value;
2770 char *s;
2771 Py_ssize_t len;
2772
2773 if ((len = unpickler_readline(self, &s)) < 0)
2774 return -1;
2775 if (len < 2)
2776 return bad_readline();
2777
2778 /* XXX: Should the base argument explicitly set to 10? */
2779 if ((value = PyLong_FromString(s, NULL, 0)) == NULL)
2780 return -1;
2781
2782 PDATA_PUSH(self->stack, value, -1);
2783 return 0;
2784}
2785
2786/* 'size' bytes contain the # of bytes of little-endian 256's-complement
2787 * data following.
2788 */
2789static int
2790load_counted_long(UnpicklerObject *self, int size)
2791{
2792 PyObject *value;
2793 char *nbytes;
2794 char *pdata;
2795
2796 assert(size == 1 || size == 4);
2797 if (unpickler_read(self, &nbytes, size) < 0)
2798 return -1;
2799
2800 size = calc_binint(nbytes, size);
2801 if (size < 0) {
2802 /* Corrupt or hostile pickle -- we never write one like this */
2803 PyErr_SetString(UnpicklingError,
2804 "LONG pickle has negative byte count");
2805 return -1;
2806 }
2807
2808 if (size == 0)
2809 value = PyLong_FromLong(0L);
2810 else {
2811 /* Read the raw little-endian bytes and convert. */
2812 if (unpickler_read(self, &pdata, size) < 0)
2813 return -1;
2814 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
2815 1 /* little endian */ , 1 /* signed */ );
2816 }
2817 if (value == NULL)
2818 return -1;
2819 PDATA_PUSH(self->stack, value, -1);
2820 return 0;
2821}
2822
2823static int
2824load_float(UnpicklerObject *self)
2825{
2826 PyObject *value;
2827 char *endptr, *s;
2828 Py_ssize_t len;
2829 double d;
2830
2831 if ((len = unpickler_readline(self, &s)) < 0)
2832 return -1;
2833 if (len < 2)
2834 return bad_readline();
2835
2836 errno = 0;
2837 d = PyOS_ascii_strtod(s, &endptr);
2838
2839 if (errno || (endptr[0] != '\n') || (endptr[1] != '\0')) {
2840 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
2841 return -1;
2842 }
2843
2844 if ((value = PyFloat_FromDouble(d)) == NULL)
2845 return -1;
2846
2847 PDATA_PUSH(self->stack, value, -1);
2848 return 0;
2849}
2850
2851static int
2852load_binfloat(UnpicklerObject *self)
2853{
2854 PyObject *value;
2855 double x;
2856 char *s;
2857
2858 if (unpickler_read(self, &s, 8) < 0)
2859 return -1;
2860
2861 x = _PyFloat_Unpack8((unsigned char *)s, 0);
2862 if (x == -1.0 && PyErr_Occurred())
2863 return -1;
2864
2865 if ((value = PyFloat_FromDouble(x)) == NULL)
2866 return -1;
2867
2868 PDATA_PUSH(self->stack, value, -1);
2869 return 0;
2870}
2871
2872static int
2873load_string(UnpicklerObject *self)
2874{
2875 PyObject *bytes;
2876 PyObject *str = NULL;
2877 Py_ssize_t len;
2878 char *s, *p;
2879
2880 if ((len = unpickler_readline(self, &s)) < 0)
2881 return -1;
2882 if (len < 3)
2883 return bad_readline();
2884 if ((s = strdup(s)) == NULL) {
2885 PyErr_NoMemory();
2886 return -1;
2887 }
2888
2889 /* Strip outermost quotes */
2890 while (s[len - 1] <= ' ')
2891 len--;
2892 if (s[0] == '"' && s[len - 1] == '"') {
2893 s[len - 1] = '\0';
2894 p = s + 1;
2895 len -= 2;
2896 }
2897 else if (s[0] == '\'' && s[len - 1] == '\'') {
2898 s[len - 1] = '\0';
2899 p = s + 1;
2900 len -= 2;
2901 }
2902 else {
2903 free(s);
2904 PyErr_SetString(PyExc_ValueError, "insecure string pickle");
2905 return -1;
2906 }
2907
2908 /* Use the PyBytes API to decode the string, since that is what is used
2909 to encode, and then coerce the result to Unicode. */
2910 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
2911 free(s);
2912 if (bytes == NULL)
2913 return -1;
2914 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
2915 Py_DECREF(bytes);
2916 if (str == NULL)
2917 return -1;
2918
2919 PDATA_PUSH(self->stack, str, -1);
2920 return 0;
2921}
2922
2923static int
2924load_binbytes(UnpicklerObject *self)
2925{
2926 PyObject *bytes;
2927 long x;
2928 char *s;
2929
2930 if (unpickler_read(self, &s, 4) < 0)
2931 return -1;
2932
2933 x = calc_binint(s, 4);
2934 if (x < 0) {
2935 PyErr_SetString(UnpicklingError,
2936 "BINBYTES pickle has negative byte count");
2937 return -1;
2938 }
2939
2940 if (unpickler_read(self, &s, x) < 0)
2941 return -1;
2942 bytes = PyBytes_FromStringAndSize(s, x);
2943 if (bytes == NULL)
2944 return -1;
2945
2946 PDATA_PUSH(self->stack, bytes, -1);
2947 return 0;
2948}
2949
2950static int
2951load_short_binbytes(UnpicklerObject *self)
2952{
2953 PyObject *bytes;
2954 unsigned char x;
2955 char *s;
2956
2957 if (unpickler_read(self, &s, 1) < 0)
2958 return -1;
2959
2960 x = (unsigned char)s[0];
2961
2962 if (unpickler_read(self, &s, x) < 0)
2963 return -1;
2964
2965 bytes = PyBytes_FromStringAndSize(s, x);
2966 if (bytes == NULL)
2967 return -1;
2968
2969 PDATA_PUSH(self->stack, bytes, -1);
2970 return 0;
2971}
2972
2973static int
2974load_binstring(UnpicklerObject *self)
2975{
2976 PyObject *str;
2977 long x;
2978 char *s;
2979
2980 if (unpickler_read(self, &s, 4) < 0)
2981 return -1;
2982
2983 x = calc_binint(s, 4);
2984 if (x < 0) {
2985 PyErr_SetString(UnpicklingError,
2986 "BINSTRING pickle has negative byte count");
2987 return -1;
2988 }
2989
2990 if (unpickler_read(self, &s, x) < 0)
2991 return -1;
2992
2993 /* Convert Python 2.x strings to unicode. */
2994 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
2995 if (str == NULL)
2996 return -1;
2997
2998 PDATA_PUSH(self->stack, str, -1);
2999 return 0;
3000}
3001
3002static int
3003load_short_binstring(UnpicklerObject *self)
3004{
3005 PyObject *str;
3006 unsigned char x;
3007 char *s;
3008
3009 if (unpickler_read(self, &s, 1) < 0)
3010 return -1;
3011
3012 x = (unsigned char)s[0];
3013
3014 if (unpickler_read(self, &s, x) < 0)
3015 return -1;
3016
3017 /* Convert Python 2.x strings to unicode. */
3018 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
3019 if (str == NULL)
3020 return -1;
3021
3022 PDATA_PUSH(self->stack, str, -1);
3023 return 0;
3024}
3025
3026static int
3027load_unicode(UnpicklerObject *self)
3028{
3029 PyObject *str;
3030 Py_ssize_t len;
3031 char *s;
3032
3033 if ((len = unpickler_readline(self, &s)) < 0)
3034 return -1;
3035 if (len < 1)
3036 return bad_readline();
3037
3038 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
3039 if (str == NULL)
3040 return -1;
3041
3042 PDATA_PUSH(self->stack, str, -1);
3043 return 0;
3044}
3045
3046static int
3047load_binunicode(UnpicklerObject *self)
3048{
3049 PyObject *str;
3050 long size;
3051 char *s;
3052
3053 if (unpickler_read(self, &s, 4) < 0)
3054 return -1;
3055
3056 size = calc_binint(s, 4);
3057 if (size < 0) {
3058 PyErr_SetString(UnpicklingError,
3059 "BINUNICODE pickle has negative byte count");
3060 return -1;
3061 }
3062
3063 if (unpickler_read(self, &s, size) < 0)
3064 return -1;
3065
3066 str = PyUnicode_DecodeUTF8(s, size, NULL);
3067 if (str == NULL)
3068 return -1;
3069
3070 PDATA_PUSH(self->stack, str, -1);
3071 return 0;
3072}
3073
3074static int
3075load_tuple(UnpicklerObject *self)
3076{
3077 PyObject *tuple;
3078 int i;
3079
3080 if ((i = marker(self)) < 0)
3081 return -1;
3082
3083 tuple = Pdata_poptuple(self->stack, i);
3084 if (tuple == NULL)
3085 return -1;
3086 PDATA_PUSH(self->stack, tuple, -1);
3087 return 0;
3088}
3089
3090static int
3091load_counted_tuple(UnpicklerObject *self, int len)
3092{
3093 PyObject *tuple;
3094
3095 tuple = PyTuple_New(len);
3096 if (tuple == NULL)
3097 return -1;
3098
3099 while (--len >= 0) {
3100 PyObject *item;
3101
3102 PDATA_POP(self->stack, item);
3103 if (item == NULL)
3104 return -1;
3105 PyTuple_SET_ITEM(tuple, len, item);
3106 }
3107 PDATA_PUSH(self->stack, tuple, -1);
3108 return 0;
3109}
3110
3111static int
3112load_empty_list(UnpicklerObject *self)
3113{
3114 PyObject *list;
3115
3116 if ((list = PyList_New(0)) == NULL)
3117 return -1;
3118 PDATA_PUSH(self->stack, list, -1);
3119 return 0;
3120}
3121
3122static int
3123load_empty_dict(UnpicklerObject *self)
3124{
3125 PyObject *dict;
3126
3127 if ((dict = PyDict_New()) == NULL)
3128 return -1;
3129 PDATA_PUSH(self->stack, dict, -1);
3130 return 0;
3131}
3132
3133static int
3134load_list(UnpicklerObject *self)
3135{
3136 PyObject *list;
3137 int i;
3138
3139 if ((i = marker(self)) < 0)
3140 return -1;
3141
3142 list = Pdata_poplist(self->stack, i);
3143 if (list == NULL)
3144 return -1;
3145 PDATA_PUSH(self->stack, list, -1);
3146 return 0;
3147}
3148
3149static int
3150load_dict(UnpicklerObject *self)
3151{
3152 PyObject *dict, *key, *value;
3153 int i, j, k;
3154
3155 if ((i = marker(self)) < 0)
3156 return -1;
3157 j = self->stack->length;
3158
3159 if ((dict = PyDict_New()) == NULL)
3160 return -1;
3161
3162 for (k = i + 1; k < j; k += 2) {
3163 key = self->stack->data[k - 1];
3164 value = self->stack->data[k];
3165 if (PyDict_SetItem(dict, key, value) < 0) {
3166 Py_DECREF(dict);
3167 return -1;
3168 }
3169 }
3170 Pdata_clear(self->stack, i);
3171 PDATA_PUSH(self->stack, dict, -1);
3172 return 0;
3173}
3174
3175static PyObject *
3176instantiate(PyObject *cls, PyObject *args)
3177{
3178 PyObject *r = NULL;
3179
3180 /* XXX: The pickle.py module does not create instances this way when the
3181 args tuple is empty. See Unpickler._instantiate(). */
3182 if ((r = PyObject_CallObject(cls, args)))
3183 return r;
3184
3185 /* XXX: Is this still nescessary? */
3186 {
3187 PyObject *tp, *v, *tb, *tmp_value;
3188
3189 PyErr_Fetch(&tp, &v, &tb);
3190 tmp_value = v;
3191 /* NULL occurs when there was a KeyboardInterrupt */
3192 if (tmp_value == NULL)
3193 tmp_value = Py_None;
3194 if ((r = PyTuple_Pack(3, tmp_value, cls, args))) {
3195 Py_XDECREF(v);
3196 v = r;
3197 }
3198 PyErr_Restore(tp, v, tb);
3199 }
3200 return NULL;
3201}
3202
3203static int
3204load_obj(UnpicklerObject *self)
3205{
3206 PyObject *cls, *args, *obj = NULL;
3207 int i;
3208
3209 if ((i = marker(self)) < 0)
3210 return -1;
3211
3212 args = Pdata_poptuple(self->stack, i + 1);
3213 if (args == NULL)
3214 return -1;
3215
3216 PDATA_POP(self->stack, cls);
3217 if (cls) {
3218 obj = instantiate(cls, args);
3219 Py_DECREF(cls);
3220 }
3221 Py_DECREF(args);
3222 if (obj == NULL)
3223 return -1;
3224
3225 PDATA_PUSH(self->stack, obj, -1);
3226 return 0;
3227}
3228
3229static int
3230load_inst(UnpicklerObject *self)
3231{
3232 PyObject *cls = NULL;
3233 PyObject *args = NULL;
3234 PyObject *obj = NULL;
3235 PyObject *module_name;
3236 PyObject *class_name;
3237 Py_ssize_t len;
3238 int i;
3239 char *s;
3240
3241 if ((i = marker(self)) < 0)
3242 return -1;
3243 if ((len = unpickler_readline(self, &s)) < 0)
3244 return -1;
3245 if (len < 2)
3246 return bad_readline();
3247
3248 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
3249 identifiers are permitted in Python 3.0, since the INST opcode is only
3250 supported by older protocols on Python 2.x. */
3251 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
3252 if (module_name == NULL)
3253 return -1;
3254
3255 if ((len = unpickler_readline(self, &s)) >= 0) {
3256 if (len < 2)
3257 return bad_readline();
3258 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
3259 if (class_name == NULL) {
3260 cls = find_class(self, module_name, class_name);
3261 Py_DECREF(class_name);
3262 }
3263 }
3264 Py_DECREF(module_name);
3265
3266 if (cls == NULL)
3267 return -1;
3268
3269 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
3270 obj = instantiate(cls, args);
3271 Py_DECREF(args);
3272 }
3273 Py_DECREF(cls);
3274
3275 if (obj == NULL)
3276 return -1;
3277
3278 PDATA_PUSH(self->stack, obj, -1);
3279 return 0;
3280}
3281
3282static int
3283load_newobj(UnpicklerObject *self)
3284{
3285 PyObject *args = NULL;
3286 PyObject *clsraw = NULL;
3287 PyTypeObject *cls; /* clsraw cast to its true type */
3288 PyObject *obj;
3289
3290 /* Stack is ... cls argtuple, and we want to call
3291 * cls.__new__(cls, *argtuple).
3292 */
3293 PDATA_POP(self->stack, args);
3294 if (args == NULL)
3295 goto error;
3296 if (!PyTuple_Check(args)) {
3297 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
3298 goto error;
3299 }
3300
3301 PDATA_POP(self->stack, clsraw);
3302 cls = (PyTypeObject *)clsraw;
3303 if (cls == NULL)
3304 goto error;
3305 if (!PyType_Check(cls)) {
3306 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
3307 "isn't a type object");
3308 goto error;
3309 }
3310 if (cls->tp_new == NULL) {
3311 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
3312 "has NULL tp_new");
3313 goto error;
3314 }
3315
3316 /* Call __new__. */
3317 obj = cls->tp_new(cls, args, NULL);
3318 if (obj == NULL)
3319 goto error;
3320
3321 Py_DECREF(args);
3322 Py_DECREF(clsraw);
3323 PDATA_PUSH(self->stack, obj, -1);
3324 return 0;
3325
3326 error:
3327 Py_XDECREF(args);
3328 Py_XDECREF(clsraw);
3329 return -1;
3330}
3331
3332static int
3333load_global(UnpicklerObject *self)
3334{
3335 PyObject *global = NULL;
3336 PyObject *module_name;
3337 PyObject *global_name;
3338 Py_ssize_t len;
3339 char *s;
3340
3341 if ((len = unpickler_readline(self, &s)) < 0)
3342 return -1;
3343 if (len < 2)
3344 return bad_readline();
3345 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
3346 if (!module_name)
3347 return -1;
3348
3349 if ((len = unpickler_readline(self, &s)) >= 0) {
3350 if (len < 2) {
3351 Py_DECREF(module_name);
3352 return bad_readline();
3353 }
3354 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
3355 if (global_name) {
3356 global = find_class(self, module_name, global_name);
3357 Py_DECREF(global_name);
3358 }
3359 }
3360 Py_DECREF(module_name);
3361
3362 if (global == NULL)
3363 return -1;
3364 PDATA_PUSH(self->stack, global, -1);
3365 return 0;
3366}
3367
3368static int
3369load_persid(UnpicklerObject *self)
3370{
3371 PyObject *pid;
3372 Py_ssize_t len;
3373 char *s;
3374
3375 if (self->pers_func) {
3376 if ((len = unpickler_readline(self, &s)) < 0)
3377 return -1;
3378 if (len < 2)
3379 return bad_readline();
3380
3381 pid = PyBytes_FromStringAndSize(s, len - 1);
3382 if (pid == NULL)
3383 return -1;
3384
3385 /* Ugh... this does not leak since unpickler_call() steals the
3386 reference to pid first. */
3387 pid = unpickler_call(self, self->pers_func, pid);
3388 if (pid == NULL)
3389 return -1;
3390
3391 PDATA_PUSH(self->stack, pid, -1);
3392 return 0;
3393 }
3394 else {
3395 PyErr_SetString(UnpicklingError,
3396 "A load persistent id instruction was encountered,\n"
3397 "but no persistent_load function was specified.");
3398 return -1;
3399 }
3400}
3401
3402static int
3403load_binpersid(UnpicklerObject *self)
3404{
3405 PyObject *pid;
3406
3407 if (self->pers_func) {
3408 PDATA_POP(self->stack, pid);
3409 if (pid == NULL)
3410 return -1;
3411
3412 /* Ugh... this does not leak since unpickler_call() steals the
3413 reference to pid first. */
3414 pid = unpickler_call(self, self->pers_func, pid);
3415 if (pid == NULL)
3416 return -1;
3417
3418 PDATA_PUSH(self->stack, pid, -1);
3419 return 0;
3420 }
3421 else {
3422 PyErr_SetString(UnpicklingError,
3423 "A load persistent id instruction was encountered,\n"
3424 "but no persistent_load function was specified.");
3425 return -1;
3426 }
3427}
3428
3429static int
3430load_pop(UnpicklerObject *self)
3431{
3432 int len;
3433
3434 if ((len = self->stack->length) <= 0)
3435 return stack_underflow();
3436
3437 /* Note that we split the (pickle.py) stack into two stacks,
3438 * an object stack and a mark stack. We have to be clever and
3439 * pop the right one. We do this by looking at the top of the
3440 * mark stack.
3441 */
3442
3443 if ((self->num_marks > 0) && (self->marks[self->num_marks - 1] == len))
3444 self->num_marks--;
3445 else {
3446 len--;
3447 Py_DECREF(self->stack->data[len]);
3448 self->stack->length = len;
3449 }
3450
3451 return 0;
3452}
3453
3454static int
3455load_pop_mark(UnpicklerObject *self)
3456{
3457 int i;
3458
3459 if ((i = marker(self)) < 0)
3460 return -1;
3461
3462 Pdata_clear(self->stack, i);
3463
3464 return 0;
3465}
3466
3467static int
3468load_dup(UnpicklerObject *self)
3469{
3470 PyObject *last;
3471 int len;
3472
3473 if ((len = self->stack->length) <= 0)
3474 return stack_underflow();
3475 last = self->stack->data[len - 1];
3476 PDATA_APPEND(self->stack, last, -1);
3477 return 0;
3478}
3479
3480static int
3481load_get(UnpicklerObject *self)
3482{
3483 PyObject *key, *value;
3484 Py_ssize_t len;
3485 char *s;
3486
3487 if ((len = unpickler_readline(self, &s)) < 0)
3488 return -1;
3489 if (len < 2)
3490 return bad_readline();
3491
3492 key = PyLong_FromString(s, NULL, 10);
3493 if (key == NULL)
3494 return -1;
3495
3496 value = PyDict_GetItemWithError(self->memo, key);
3497 if (value == NULL) {
3498 if (!PyErr_Occurred())
3499 PyErr_SetObject(PyExc_KeyError, key);
3500 Py_DECREF(key);
3501 return -1;
3502 }
3503 Py_DECREF(key);
3504
3505 PDATA_APPEND(self->stack, value, -1);
3506 return 0;
3507}
3508
3509static int
3510load_binget(UnpicklerObject *self)
3511{
3512 PyObject *key, *value;
3513 char *s;
3514
3515 if (unpickler_read(self, &s, 1) < 0)
3516 return -1;
3517
3518 /* Here, the unsigned cast is necessary to avoid negative values. */
3519 key = PyLong_FromLong((long)(unsigned char)s[0]);
3520 if (key == NULL)
3521 return -1;
3522
3523 value = PyDict_GetItemWithError(self->memo, key);
3524 if (value == NULL) {
3525 if (!PyErr_Occurred())
3526 PyErr_SetObject(PyExc_KeyError, key);
3527 Py_DECREF(key);
3528 return -1;
3529 }
3530 Py_DECREF(key);
3531
3532 PDATA_APPEND(self->stack, value, -1);
3533 return 0;
3534}
3535
3536static int
3537load_long_binget(UnpicklerObject *self)
3538{
3539 PyObject *key, *value;
3540 char *s;
3541 long k;
3542
3543 if (unpickler_read(self, &s, 4) < 0)
3544 return -1;
3545
3546 k = (long)(unsigned char)s[0];
3547 k |= (long)(unsigned char)s[1] << 8;
3548 k |= (long)(unsigned char)s[2] << 16;
3549 k |= (long)(unsigned char)s[3] << 24;
3550
3551 key = PyLong_FromLong(k);
3552 if (key == NULL)
3553 return -1;
3554
3555 value = PyDict_GetItemWithError(self->memo, key);
3556 if (value == NULL) {
3557 if (!PyErr_Occurred())
3558 PyErr_SetObject(PyExc_KeyError, key);
3559 Py_DECREF(key);
3560 return -1;
3561 }
3562 Py_DECREF(key);
3563
3564 PDATA_APPEND(self->stack, value, -1);
3565 return 0;
3566}
3567
3568/* Push an object from the extension registry (EXT[124]). nbytes is
3569 * the number of bytes following the opcode, holding the index (code) value.
3570 */
3571static int
3572load_extension(UnpicklerObject *self, int nbytes)
3573{
3574 char *codebytes; /* the nbytes bytes after the opcode */
3575 long code; /* calc_binint returns long */
3576 PyObject *py_code; /* code as a Python int */
3577 PyObject *obj; /* the object to push */
3578 PyObject *pair; /* (module_name, class_name) */
3579 PyObject *module_name, *class_name;
3580
3581 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
3582 if (unpickler_read(self, &codebytes, nbytes) < 0)
3583 return -1;
3584 code = calc_binint(codebytes, nbytes);
3585 if (code <= 0) { /* note that 0 is forbidden */
3586 /* Corrupt or hostile pickle. */
3587 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
3588 return -1;
3589 }
3590
3591 /* Look for the code in the cache. */
3592 py_code = PyLong_FromLong(code);
3593 if (py_code == NULL)
3594 return -1;
3595 obj = PyDict_GetItem(extension_cache, py_code);
3596 if (obj != NULL) {
3597 /* Bingo. */
3598 Py_DECREF(py_code);
3599 PDATA_APPEND(self->stack, obj, -1);
3600 return 0;
3601 }
3602
3603 /* Look up the (module_name, class_name) pair. */
3604 pair = PyDict_GetItem(inverted_registry, py_code);
3605 if (pair == NULL) {
3606 Py_DECREF(py_code);
3607 PyErr_Format(PyExc_ValueError, "unregistered extension "
3608 "code %ld", code);
3609 return -1;
3610 }
3611 /* Since the extension registry is manipulable via Python code,
3612 * confirm that pair is really a 2-tuple of strings.
3613 */
3614 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
3615 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
3616 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
3617 Py_DECREF(py_code);
3618 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
3619 "isn't a 2-tuple of strings", code);
3620 return -1;
3621 }
3622 /* Load the object. */
3623 obj = find_class(self, module_name, class_name);
3624 if (obj == NULL) {
3625 Py_DECREF(py_code);
3626 return -1;
3627 }
3628 /* Cache code -> obj. */
3629 code = PyDict_SetItem(extension_cache, py_code, obj);
3630 Py_DECREF(py_code);
3631 if (code < 0) {
3632 Py_DECREF(obj);
3633 return -1;
3634 }
3635 PDATA_PUSH(self->stack, obj, -1);
3636 return 0;
3637}
3638
3639static int
3640load_put(UnpicklerObject *self)
3641{
3642 PyObject *key, *value;
3643 Py_ssize_t len;
3644 char *s;
3645 int x;
3646
3647 if ((len = unpickler_readline(self, &s)) < 0)
3648 return -1;
3649 if (len < 2)
3650 return bad_readline();
3651 if ((x = self->stack->length) <= 0)
3652 return stack_underflow();
3653
3654 key = PyLong_FromString(s, NULL, 10);
3655 if (key == NULL)
3656 return -1;
3657 value = self->stack->data[x - 1];
3658
3659 x = PyDict_SetItem(self->memo, key, value);
3660 Py_DECREF(key);
3661 return x;
3662}
3663
3664static int
3665load_binput(UnpicklerObject *self)
3666{
3667 PyObject *key, *value;
3668 char *s;
3669 int x;
3670
3671 if (unpickler_read(self, &s, 1) < 0)
3672 return -1;
3673 if ((x = self->stack->length) <= 0)
3674 return stack_underflow();
3675
3676 key = PyLong_FromLong((long)(unsigned char)s[0]);
3677 if (key == NULL)
3678 return -1;
3679 value = self->stack->data[x - 1];
3680
3681 x = PyDict_SetItem(self->memo, key, value);
3682 Py_DECREF(key);
3683 return x;
3684}
3685
3686static int
3687load_long_binput(UnpicklerObject *self)
3688{
3689 PyObject *key, *value;
3690 long k;
3691 char *s;
3692 int x;
3693
3694 if (unpickler_read(self, &s, 4) < 0)
3695 return -1;
3696 if ((x = self->stack->length) <= 0)
3697 return stack_underflow();
3698
3699 k = (long)(unsigned char)s[0];
3700 k |= (long)(unsigned char)s[1] << 8;
3701 k |= (long)(unsigned char)s[2] << 16;
3702 k |= (long)(unsigned char)s[3] << 24;
3703
3704 key = PyLong_FromLong(k);
3705 if (key == NULL)
3706 return -1;
3707 value = self->stack->data[x - 1];
3708
3709 x = PyDict_SetItem(self->memo, key, value);
3710 Py_DECREF(key);
3711 return x;
3712}
3713
3714static int
3715do_append(UnpicklerObject *self, int x)
3716{
3717 PyObject *value;
3718 PyObject *list;
3719 int len, i;
3720
3721 len = self->stack->length;
3722 if (x > len || x <= 0)
3723 return stack_underflow();
3724 if (len == x) /* nothing to do */
3725 return 0;
3726
3727 list = self->stack->data[x - 1];
3728
3729 if (PyList_Check(list)) {
3730 PyObject *slice;
3731 Py_ssize_t list_len;
3732
3733 slice = Pdata_poplist(self->stack, x);
3734 if (!slice)
3735 return -1;
3736 list_len = PyList_GET_SIZE(list);
3737 i = PyList_SetSlice(list, list_len, list_len, slice);
3738 Py_DECREF(slice);
3739 return i;
3740 }
3741 else {
3742 PyObject *append_func;
3743
3744 append_func = PyObject_GetAttrString(list, "append");
3745 if (append_func == NULL)
3746 return -1;
3747 for (i = x; i < len; i++) {
3748 PyObject *result;
3749
3750 value = self->stack->data[i];
3751 result = unpickler_call(self, append_func, value);
3752 if (result == NULL) {
3753 Pdata_clear(self->stack, i + 1);
3754 self->stack->length = x;
3755 return -1;
3756 }
3757 Py_DECREF(result);
3758 }
3759 self->stack->length = x;
3760 }
3761
3762 return 0;
3763}
3764
3765static int
3766load_append(UnpicklerObject *self)
3767{
3768 return do_append(self, self->stack->length - 1);
3769}
3770
3771static int
3772load_appends(UnpicklerObject *self)
3773{
3774 return do_append(self, marker(self));
3775}
3776
3777static int
3778do_setitems(UnpicklerObject *self, int x)
3779{
3780 PyObject *value, *key;
3781 PyObject *dict;
3782 int len, i;
3783 int status = 0;
3784
3785 len = self->stack->length;
3786 if (x > len || x <= 0)
3787 return stack_underflow();
3788 if (len == x) /* nothing to do */
3789 return 0;
3790 if ((len - x) % 2 != 0) {
3791 /* Currupt or hostile pickle -- we never write one like this. */
3792 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
3793 return -1;
3794 }
3795
3796 /* Here, dict does not actually need to be a PyDict; it could be anything
3797 that supports the __setitem__ attribute. */
3798 dict = self->stack->data[x - 1];
3799
3800 for (i = x + 1; i < len; i += 2) {
3801 key = self->stack->data[i - 1];
3802 value = self->stack->data[i];
3803 if (PyObject_SetItem(dict, key, value) < 0) {
3804 status = -1;
3805 break;
3806 }
3807 }
3808
3809 Pdata_clear(self->stack, x);
3810 return status;
3811}
3812
3813static int
3814load_setitem(UnpicklerObject *self)
3815{
3816 return do_setitems(self, self->stack->length - 2);
3817}
3818
3819static int
3820load_setitems(UnpicklerObject *self)
3821{
3822 return do_setitems(self, marker(self));
3823}
3824
3825static int
3826load_build(UnpicklerObject *self)
3827{
3828 PyObject *state, *inst, *slotstate;
3829 PyObject *setstate;
3830 int status = 0;
3831
3832 /* Stack is ... instance, state. We want to leave instance at
3833 * the stack top, possibly mutated via instance.__setstate__(state).
3834 */
3835 if (self->stack->length < 2)
3836 return stack_underflow();
3837
3838 PDATA_POP(self->stack, state);
3839 if (state == NULL)
3840 return -1;
3841
3842 inst = self->stack->data[self->stack->length - 1];
3843
3844 setstate = PyObject_GetAttrString(inst, "__setstate__");
3845 if (setstate == NULL && PyErr_ExceptionMatches(PyExc_AttributeError)) {
3846 PyErr_Clear();
3847 }
3848 else {
3849 PyObject *result;
3850
3851 /* The explicit __setstate__ is responsible for everything. */
3852 result = unpickler_call(self, setstate, state);
3853 Py_DECREF(setstate);
3854 if (result == NULL)
3855 return -1;
3856 Py_DECREF(result);
3857 return 0;
3858 }
3859
3860 /* A default __setstate__. First see whether state embeds a
3861 * slot state dict too (a proto 2 addition).
3862 */
3863 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
3864 PyObject *tmp = state;
3865
3866 state = PyTuple_GET_ITEM(tmp, 0);
3867 slotstate = PyTuple_GET_ITEM(tmp, 1);
3868 Py_INCREF(state);
3869 Py_INCREF(slotstate);
3870 Py_DECREF(tmp);
3871 }
3872 else
3873 slotstate = NULL;
3874
3875 /* Set inst.__dict__ from the state dict (if any). */
3876 if (state != Py_None) {
3877 PyObject *dict;
3878
3879 if (!PyDict_Check(state)) {
3880 PyErr_SetString(UnpicklingError, "state is not a dictionary");
3881 goto error;
3882 }
3883 dict = PyObject_GetAttrString(inst, "__dict__");
3884 if (dict == NULL)
3885 goto error;
3886
3887 PyDict_Update(dict, state);
3888 Py_DECREF(dict);
3889 }
3890
3891 /* Also set instance attributes from the slotstate dict (if any). */
3892 if (slotstate != NULL) {
3893 PyObject *d_key, *d_value;
3894 Py_ssize_t i;
3895
3896 if (!PyDict_Check(slotstate)) {
3897 PyErr_SetString(UnpicklingError,
3898 "slot state is not a dictionary");
3899 goto error;
3900 }
3901 i = 0;
3902 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
3903 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
3904 goto error;
3905 }
3906 }
3907
3908 if (0) {
3909 error:
3910 status = -1;
3911 }
3912
3913 Py_DECREF(state);
3914 Py_XDECREF(slotstate);
3915 return status;
3916}
3917
3918static int
3919load_mark(UnpicklerObject *self)
3920{
3921
3922 /* Note that we split the (pickle.py) stack into two stacks, an
3923 * object stack and a mark stack. Here we push a mark onto the
3924 * mark stack.
3925 */
3926
3927 if ((self->num_marks + 1) >= self->marks_size) {
3928 size_t alloc;
3929 int *marks;
3930
3931 /* Use the size_t type to check for overflow. */
3932 alloc = ((size_t)self->num_marks << 1) + 20;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00003933 if (alloc > PY_SSIZE_T_MAX ||
3934 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003935 PyErr_NoMemory();
3936 return -1;
3937 }
3938
3939 if (self->marks == NULL)
3940 marks = (int *)PyMem_Malloc(alloc * sizeof(int));
3941 else
3942 marks = (int *)PyMem_Realloc(self->marks, alloc * sizeof(int));
3943 if (marks == NULL) {
3944 PyErr_NoMemory();
3945 return -1;
3946 }
3947 self->marks = marks;
3948 self->marks_size = (Py_ssize_t)alloc;
3949 }
3950
3951 self->marks[self->num_marks++] = self->stack->length;
3952
3953 return 0;
3954}
3955
3956static int
3957load_reduce(UnpicklerObject *self)
3958{
3959 PyObject *callable = NULL;
3960 PyObject *argtup = NULL;
3961 PyObject *obj = NULL;
3962
3963 PDATA_POP(self->stack, argtup);
3964 if (argtup == NULL)
3965 return -1;
3966 PDATA_POP(self->stack, callable);
3967 if (callable) {
3968 obj = instantiate(callable, argtup);
3969 Py_DECREF(callable);
3970 }
3971 Py_DECREF(argtup);
3972
3973 if (obj == NULL)
3974 return -1;
3975
3976 PDATA_PUSH(self->stack, obj, -1);
3977 return 0;
3978}
3979
3980/* Just raises an error if we don't know the protocol specified. PROTO
3981 * is the first opcode for protocols >= 2.
3982 */
3983static int
3984load_proto(UnpicklerObject *self)
3985{
3986 char *s;
3987 int i;
3988
3989 if (unpickler_read(self, &s, 1) < 0)
3990 return -1;
3991
3992 i = (unsigned char)s[0];
3993 if (i <= HIGHEST_PROTOCOL)
3994 return 0;
3995
3996 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
3997 return -1;
3998}
3999
4000static PyObject *
4001load(UnpicklerObject *self)
4002{
4003 PyObject *err;
4004 PyObject *value = NULL;
4005 char *s;
4006
4007 self->num_marks = 0;
4008 if (self->stack->length)
4009 Pdata_clear(self->stack, 0);
4010
4011 /* Convenient macros for the dispatch while-switch loop just below. */
4012#define OP(opcode, load_func) \
4013 case opcode: if (load_func(self) < 0) break; continue;
4014
4015#define OP_ARG(opcode, load_func, arg) \
4016 case opcode: if (load_func(self, (arg)) < 0) break; continue;
4017
4018 while (1) {
4019 if (unpickler_read(self, &s, 1) < 0)
4020 break;
4021
4022 switch ((enum opcode)s[0]) {
4023 OP(NONE, load_none)
4024 OP(BININT, load_binint)
4025 OP(BININT1, load_binint1)
4026 OP(BININT2, load_binint2)
4027 OP(INT, load_int)
4028 OP(LONG, load_long)
4029 OP_ARG(LONG1, load_counted_long, 1)
4030 OP_ARG(LONG4, load_counted_long, 4)
4031 OP(FLOAT, load_float)
4032 OP(BINFLOAT, load_binfloat)
4033 OP(BINBYTES, load_binbytes)
4034 OP(SHORT_BINBYTES, load_short_binbytes)
4035 OP(BINSTRING, load_binstring)
4036 OP(SHORT_BINSTRING, load_short_binstring)
4037 OP(STRING, load_string)
4038 OP(UNICODE, load_unicode)
4039 OP(BINUNICODE, load_binunicode)
4040 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
4041 OP_ARG(TUPLE1, load_counted_tuple, 1)
4042 OP_ARG(TUPLE2, load_counted_tuple, 2)
4043 OP_ARG(TUPLE3, load_counted_tuple, 3)
4044 OP(TUPLE, load_tuple)
4045 OP(EMPTY_LIST, load_empty_list)
4046 OP(LIST, load_list)
4047 OP(EMPTY_DICT, load_empty_dict)
4048 OP(DICT, load_dict)
4049 OP(OBJ, load_obj)
4050 OP(INST, load_inst)
4051 OP(NEWOBJ, load_newobj)
4052 OP(GLOBAL, load_global)
4053 OP(APPEND, load_append)
4054 OP(APPENDS, load_appends)
4055 OP(BUILD, load_build)
4056 OP(DUP, load_dup)
4057 OP(BINGET, load_binget)
4058 OP(LONG_BINGET, load_long_binget)
4059 OP(GET, load_get)
4060 OP(MARK, load_mark)
4061 OP(BINPUT, load_binput)
4062 OP(LONG_BINPUT, load_long_binput)
4063 OP(PUT, load_put)
4064 OP(POP, load_pop)
4065 OP(POP_MARK, load_pop_mark)
4066 OP(SETITEM, load_setitem)
4067 OP(SETITEMS, load_setitems)
4068 OP(PERSID, load_persid)
4069 OP(BINPERSID, load_binpersid)
4070 OP(REDUCE, load_reduce)
4071 OP(PROTO, load_proto)
4072 OP_ARG(EXT1, load_extension, 1)
4073 OP_ARG(EXT2, load_extension, 2)
4074 OP_ARG(EXT4, load_extension, 4)
4075 OP_ARG(NEWTRUE, load_bool, Py_True)
4076 OP_ARG(NEWFALSE, load_bool, Py_False)
4077
4078 case STOP:
4079 break;
4080
4081 case '\0':
4082 PyErr_SetNone(PyExc_EOFError);
4083 return NULL;
4084
4085 default:
4086 PyErr_Format(UnpicklingError,
4087 "invalid load key, '%c'.", s[0]);
4088 return NULL;
4089 }
4090
4091 break; /* and we are done! */
4092 }
4093
4094 /* XXX: It is not clear what this is actually for. */
4095 if ((err = PyErr_Occurred())) {
4096 if (err == PyExc_EOFError) {
4097 PyErr_SetNone(PyExc_EOFError);
4098 }
4099 return NULL;
4100 }
4101
4102 PDATA_POP(self->stack, value);
4103 return value;
4104}
4105
4106PyDoc_STRVAR(Unpickler_load_doc,
4107"load() -> object. Load a pickle."
4108"\n"
4109"Read a pickled object representation from the open file object given in\n"
4110"the constructor, and return the reconstituted object hierarchy specified\n"
4111"therein.\n");
4112
4113static PyObject *
4114Unpickler_load(UnpicklerObject *self)
4115{
4116 /* Check whether the Unpickler was initialized correctly. This prevents
4117 segfaulting if a subclass overridden __init__ with a function that does
4118 not call Unpickler.__init__(). Here, we simply ensure that self->read
4119 is not NULL. */
4120 if (self->read == NULL) {
4121 PyErr_Format(UnpicklingError,
4122 "Unpickler.__init__() was not called by %s.__init__()",
4123 Py_TYPE(self)->tp_name);
4124 return NULL;
4125 }
4126
4127 return load(self);
4128}
4129
4130/* The name of find_class() is misleading. In newer pickle protocols, this
4131 function is used for loading any global (i.e., functions), not just
4132 classes. The name is kept only for backward compatibility. */
4133
4134PyDoc_STRVAR(Unpickler_find_class_doc,
4135"find_class(module_name, global_name) -> object.\n"
4136"\n"
4137"Return an object from a specified module, importing the module if\n"
4138"necessary. Subclasses may override this method (e.g. to restrict\n"
4139"unpickling of arbitrary classes and functions).\n"
4140"\n"
4141"This method is called whenever a class or a function object is\n"
4142"needed. Both arguments passed are str objects.\n");
4143
4144static PyObject *
4145Unpickler_find_class(UnpicklerObject *self, PyObject *args)
4146{
4147 PyObject *global;
4148 PyObject *modules_dict;
4149 PyObject *module;
4150 PyObject *module_name, *global_name;
4151
4152 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
4153 &module_name, &global_name))
4154 return NULL;
4155
4156 modules_dict = PySys_GetObject("modules");
4157 if (modules_dict == NULL)
4158 return NULL;
4159
4160 module = PyDict_GetItem(modules_dict, module_name);
4161 if (module == NULL) {
4162 module = PyImport_Import(module_name);
4163 if (module == NULL)
4164 return NULL;
4165 global = PyObject_GetAttr(module, global_name);
4166 Py_DECREF(module);
4167 }
4168 else {
4169 global = PyObject_GetAttr(module, global_name);
4170 }
4171 return global;
4172}
4173
4174static struct PyMethodDef Unpickler_methods[] = {
4175 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
4176 Unpickler_load_doc},
4177 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
4178 Unpickler_find_class_doc},
4179 {NULL, NULL} /* sentinel */
4180};
4181
4182static void
4183Unpickler_dealloc(UnpicklerObject *self)
4184{
4185 PyObject_GC_UnTrack((PyObject *)self);
4186 Py_XDECREF(self->readline);
4187 Py_XDECREF(self->read);
4188 Py_XDECREF(self->memo);
4189 Py_XDECREF(self->stack);
4190 Py_XDECREF(self->pers_func);
4191 Py_XDECREF(self->arg);
4192 Py_XDECREF(self->last_string);
4193
4194 PyMem_Free(self->marks);
4195 free(self->encoding);
4196 free(self->errors);
4197
4198 Py_TYPE(self)->tp_free((PyObject *)self);
4199}
4200
4201static int
4202Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
4203{
4204 Py_VISIT(self->readline);
4205 Py_VISIT(self->read);
4206 Py_VISIT(self->memo);
4207 Py_VISIT(self->stack);
4208 Py_VISIT(self->pers_func);
4209 Py_VISIT(self->arg);
4210 Py_VISIT(self->last_string);
4211 return 0;
4212}
4213
4214static int
4215Unpickler_clear(UnpicklerObject *self)
4216{
4217 Py_CLEAR(self->readline);
4218 Py_CLEAR(self->read);
4219 Py_CLEAR(self->memo);
4220 Py_CLEAR(self->stack);
4221 Py_CLEAR(self->pers_func);
4222 Py_CLEAR(self->arg);
4223 Py_CLEAR(self->last_string);
4224
4225 PyMem_Free(self->marks);
4226 self->marks = NULL;
4227 free(self->encoding);
4228 self->encoding = NULL;
4229 free(self->errors);
4230 self->errors = NULL;
4231
4232 return 0;
4233}
4234
4235PyDoc_STRVAR(Unpickler_doc,
4236"Unpickler(file, *, encoding='ASCII', errors='strict')"
4237"\n"
4238"This takes a binary file for reading a pickle data stream.\n"
4239"\n"
4240"The protocol version of the pickle is detected automatically, so no\n"
4241"proto argument is needed.\n"
4242"\n"
4243"The file-like object must have two methods, a read() method\n"
4244"that takes an integer argument, and a readline() method that\n"
4245"requires no arguments. Both methods should return bytes.\n"
4246"Thus file-like object can be a binary file object opened for\n"
4247"reading, a BytesIO object, or any other custom object that\n"
4248"meets this interface.\n"
4249"\n"
4250"Optional keyword arguments are encoding and errors, which are\n"
4251"used to decode 8-bit string instances pickled by Python 2.x.\n"
4252"These default to 'ASCII' and 'strict', respectively.\n");
4253
4254static int
4255Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
4256{
4257 static char *kwlist[] = {"file", "encoding", "errors", 0};
4258 PyObject *file;
4259 char *encoding = NULL;
4260 char *errors = NULL;
4261
4262 /* XXX: That is an horrible error message. But, I don't know how to do
4263 better... */
4264 if (Py_SIZE(args) != 1) {
4265 PyErr_Format(PyExc_TypeError,
4266 "%s takes exactly one positional argument (%zd given)",
4267 Py_TYPE(self)->tp_name, Py_SIZE(args));
4268 return -1;
4269 }
4270
4271 /* Arguments parsing needs to be done in the __init__() method to allow
4272 subclasses to define their own __init__() method, which may (or may
4273 not) support Unpickler arguments. However, this means we need to be
4274 extra careful in the other Unpickler methods, since a subclass could
4275 forget to call Unpickler.__init__() thus breaking our internal
4276 invariants. */
4277 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|ss:Unpickler", kwlist,
4278 &file, &encoding, &errors))
4279 return -1;
4280
4281 /* In case of multiple __init__() calls, clear previous content. */
4282 if (self->read != NULL)
4283 (void)Unpickler_clear(self);
4284
4285 self->read = PyObject_GetAttrString(file, "read");
4286 self->readline = PyObject_GetAttrString(file, "readline");
4287 if (self->readline == NULL || self->read == NULL)
4288 return -1;
4289
4290 if (encoding == NULL)
4291 encoding = "ASCII";
4292 if (errors == NULL)
4293 errors = "strict";
4294
4295 self->encoding = strdup(encoding);
4296 self->errors = strdup(errors);
4297 if (self->encoding == NULL || self->errors == NULL) {
4298 PyErr_NoMemory();
4299 return -1;
4300 }
4301
4302 if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) {
4303 self->pers_func = PyObject_GetAttrString((PyObject *)self,
4304 "persistent_load");
4305 if (self->pers_func == NULL)
4306 return -1;
4307 }
4308 else {
4309 self->pers_func = NULL;
4310 }
4311
4312 self->stack = (Pdata *)Pdata_New();
4313 if (self->stack == NULL)
4314 return -1;
4315
4316 self->memo = PyDict_New();
4317 if (self->memo == NULL)
4318 return -1;
4319
4320 return 0;
4321}
4322
4323static PyObject *
4324Unpickler_get_memo(UnpicklerObject *self)
4325{
4326 if (self->memo == NULL)
4327 PyErr_SetString(PyExc_AttributeError, "memo");
4328 else
4329 Py_INCREF(self->memo);
4330 return self->memo;
4331}
4332
4333static int
4334Unpickler_set_memo(UnpicklerObject *self, PyObject *value)
4335{
4336 PyObject *tmp;
4337
4338 if (value == NULL) {
4339 PyErr_SetString(PyExc_TypeError,
4340 "attribute deletion is not supported");
4341 return -1;
4342 }
4343 if (!PyDict_Check(value)) {
4344 PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
4345 return -1;
4346 }
4347
4348 tmp = self->memo;
4349 Py_INCREF(value);
4350 self->memo = value;
4351 Py_XDECREF(tmp);
4352
4353 return 0;
4354}
4355
4356static PyObject *
4357Unpickler_get_persload(UnpicklerObject *self)
4358{
4359 if (self->pers_func == NULL)
4360 PyErr_SetString(PyExc_AttributeError, "persistent_load");
4361 else
4362 Py_INCREF(self->pers_func);
4363 return self->pers_func;
4364}
4365
4366static int
4367Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
4368{
4369 PyObject *tmp;
4370
4371 if (value == NULL) {
4372 PyErr_SetString(PyExc_TypeError,
4373 "attribute deletion is not supported");
4374 return -1;
4375 }
4376 if (!PyCallable_Check(value)) {
4377 PyErr_SetString(PyExc_TypeError,
4378 "persistent_load must be a callable taking "
4379 "one argument");
4380 return -1;
4381 }
4382
4383 tmp = self->pers_func;
4384 Py_INCREF(value);
4385 self->pers_func = value;
4386 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
4387
4388 return 0;
4389}
4390
4391static PyGetSetDef Unpickler_getsets[] = {
4392 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
4393 {"persistent_load", (getter)Unpickler_get_persload,
4394 (setter)Unpickler_set_persload},
4395 {NULL}
4396};
4397
4398static PyTypeObject Unpickler_Type = {
4399 PyVarObject_HEAD_INIT(NULL, 0)
4400 "_pickle.Unpickler", /*tp_name*/
4401 sizeof(UnpicklerObject), /*tp_basicsize*/
4402 0, /*tp_itemsize*/
4403 (destructor)Unpickler_dealloc, /*tp_dealloc*/
4404 0, /*tp_print*/
4405 0, /*tp_getattr*/
4406 0, /*tp_setattr*/
4407 0, /*tp_compare*/
4408 0, /*tp_repr*/
4409 0, /*tp_as_number*/
4410 0, /*tp_as_sequence*/
4411 0, /*tp_as_mapping*/
4412 0, /*tp_hash*/
4413 0, /*tp_call*/
4414 0, /*tp_str*/
4415 0, /*tp_getattro*/
4416 0, /*tp_setattro*/
4417 0, /*tp_as_buffer*/
4418 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4419 Unpickler_doc, /*tp_doc*/
4420 (traverseproc)Unpickler_traverse, /*tp_traverse*/
4421 (inquiry)Unpickler_clear, /*tp_clear*/
4422 0, /*tp_richcompare*/
4423 0, /*tp_weaklistoffset*/
4424 0, /*tp_iter*/
4425 0, /*tp_iternext*/
4426 Unpickler_methods, /*tp_methods*/
4427 0, /*tp_members*/
4428 Unpickler_getsets, /*tp_getset*/
4429 0, /*tp_base*/
4430 0, /*tp_dict*/
4431 0, /*tp_descr_get*/
4432 0, /*tp_descr_set*/
4433 0, /*tp_dictoffset*/
4434 (initproc)Unpickler_init, /*tp_init*/
4435 PyType_GenericAlloc, /*tp_alloc*/
4436 PyType_GenericNew, /*tp_new*/
4437 PyObject_GC_Del, /*tp_free*/
4438 0, /*tp_is_gc*/
4439};
4440
4441static int
4442init_stuff(void)
4443{
4444 PyObject *copyreg;
4445
4446 copyreg = PyImport_ImportModule("copyreg");
4447 if (!copyreg)
4448 return -1;
4449
4450 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
4451 if (!dispatch_table)
4452 goto error;
4453
4454 extension_registry = \
4455 PyObject_GetAttrString(copyreg, "_extension_registry");
4456 if (!extension_registry)
4457 goto error;
4458
4459 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
4460 if (!inverted_registry)
4461 goto error;
4462
4463 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
4464 if (!extension_cache)
4465 goto error;
4466
4467 Py_DECREF(copyreg);
4468
4469 empty_tuple = PyTuple_New(0);
4470 if (empty_tuple == NULL)
4471 return -1;
4472
4473 two_tuple = PyTuple_New(2);
4474 if (two_tuple == NULL)
4475 return -1;
4476 /* We use this temp container with no regard to refcounts, or to
4477 * keeping containees alive. Exempt from GC, because we don't
4478 * want anything looking at two_tuple() by magic.
4479 */
4480 PyObject_GC_UnTrack(two_tuple);
4481
4482 return 0;
4483
4484 error:
4485 Py_DECREF(copyreg);
4486 return -1;
4487}
4488
4489static struct PyModuleDef _picklemodule = {
4490 PyModuleDef_HEAD_INIT,
4491 "_pickle",
4492 pickle_module_doc,
4493 -1,
4494 NULL,
4495 NULL,
4496 NULL,
4497 NULL,
4498 NULL
4499};
4500
4501PyMODINIT_FUNC
4502PyInit__pickle(void)
4503{
4504 PyObject *m;
4505
4506 if (PyType_Ready(&Unpickler_Type) < 0)
4507 return NULL;
4508 if (PyType_Ready(&Pickler_Type) < 0)
4509 return NULL;
4510 if (PyType_Ready(&Pdata_Type) < 0)
4511 return NULL;
4512
4513 /* Create the module and add the functions. */
4514 m = PyModule_Create(&_picklemodule);
4515 if (m == NULL)
4516 return NULL;
4517
4518 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
4519 return NULL;
4520 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
4521 return NULL;
4522
4523 /* Initialize the exceptions. */
4524 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
4525 if (PickleError == NULL)
4526 return NULL;
4527 PicklingError = \
4528 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
4529 if (PicklingError == NULL)
4530 return NULL;
4531 UnpicklingError = \
4532 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
4533 if (UnpicklingError == NULL)
4534 return NULL;
4535
4536 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
4537 return NULL;
4538 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
4539 return NULL;
4540 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
4541 return NULL;
4542
4543 if (init_stuff() < 0)
4544 return NULL;
4545
4546 return m;
4547}