blob: c1facd8381398687425ac4254bf165b2e6ee2f54 [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
13
14/* Pickle opcodes. These must be kept updated with pickle.py.
15 Extensive docs are in pickletools.py. */
16enum opcode {
17 MARK = '(',
18 STOP = '.',
19 POP = '0',
20 POP_MARK = '1',
21 DUP = '2',
22 FLOAT = 'F',
23 INT = 'I',
24 BININT = 'J',
25 BININT1 = 'K',
26 LONG = 'L',
27 BININT2 = 'M',
28 NONE = 'N',
29 PERSID = 'P',
30 BINPERSID = 'Q',
31 REDUCE = 'R',
32 STRING = 'S',
33 BINSTRING = 'T',
34 SHORT_BINSTRING = 'U',
35 UNICODE = 'V',
36 BINUNICODE = 'X',
37 APPEND = 'a',
38 BUILD = 'b',
39 GLOBAL = 'c',
40 DICT = 'd',
41 EMPTY_DICT = '}',
42 APPENDS = 'e',
43 GET = 'g',
44 BINGET = 'h',
45 INST = 'i',
46 LONG_BINGET = 'j',
47 LIST = 'l',
48 EMPTY_LIST = ']',
49 OBJ = 'o',
50 PUT = 'p',
51 BINPUT = 'q',
52 LONG_BINPUT = 'r',
53 SETITEM = 's',
54 TUPLE = 't',
55 EMPTY_TUPLE = ')',
56 SETITEMS = 'u',
57 BINFLOAT = 'G',
58
59 /* Protocol 2. */
60 PROTO = '\x80',
61 NEWOBJ = '\x81',
62 EXT1 = '\x82',
63 EXT2 = '\x83',
64 EXT4 = '\x84',
65 TUPLE1 = '\x85',
66 TUPLE2 = '\x86',
67 TUPLE3 = '\x87',
68 NEWTRUE = '\x88',
69 NEWFALSE = '\x89',
70 LONG1 = '\x8a',
71 LONG4 = '\x8b',
72
73 /* Protocol 3 (Python 3.x) */
74 BINBYTES = 'B',
75 SHORT_BINBYTES = 'C',
76};
77
78/* These aren't opcodes -- they're ways to pickle bools before protocol 2
79 * so that unpicklers written before bools were introduced unpickle them
80 * as ints, but unpicklers after can recognize that bools were intended.
81 * Note that protocol 2 added direct ways to pickle bools.
82 */
83#undef TRUE
84#define TRUE "I01\n"
85#undef FALSE
86#define FALSE "I00\n"
87
88enum {
89 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
90 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
91 break if this gets out of synch with pickle.py, but it's unclear that would
92 help anything either. */
93 BATCHSIZE = 1000,
94
95 /* Nesting limit until Pickler, when running in "fast mode", starts
96 checking for self-referential data-structures. */
97 FAST_NESTING_LIMIT = 50,
98
99 /* Size of the write buffer of Pickler. Higher values will reduce the
100 number of calls to the write() method of the output stream. */
101 WRITE_BUF_SIZE = 256,
102};
103
104/* Exception classes for pickle. These should override the ones defined in
105 pickle.py, when the C-optimized Pickler and Unpickler are used. */
106static PyObject *PickleError;
107static PyObject *PicklingError;
108static PyObject *UnpicklingError;
109
110/* copyreg.dispatch_table, {type_object: pickling_function} */
111static PyObject *dispatch_table;
112/* For EXT[124] opcodes. */
113/* copyreg._extension_registry, {(module_name, function_name): code} */
114static PyObject *extension_registry;
115/* copyreg._inverted_registry, {code: (module_name, function_name)} */
116static PyObject *inverted_registry;
117/* copyreg._extension_cache, {code: object} */
118static PyObject *extension_cache;
119
120/* XXX: Are these really nescessary? */
121/* As the name says, an empty tuple. */
122static PyObject *empty_tuple;
123/* For looking up name pairs in copyreg._extension_registry. */
124static PyObject *two_tuple;
125
126static int
127stack_underflow(void)
128{
129 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
130 return -1;
131}
132
133/* Internal data type used as the unpickling stack. */
134typedef struct {
135 PyObject_HEAD
136 int length; /* number of initial slots in data currently used */
137 int size; /* number of slots in data allocated */
138 PyObject **data;
139} Pdata;
140
141static void
142Pdata_dealloc(Pdata *self)
143{
144 int i;
145 PyObject **p;
146
147 for (i = self->length, p = self->data; --i >= 0; p++) {
148 Py_DECREF(*p);
149 }
150 if (self->data)
151 PyMem_Free(self->data);
152 PyObject_Del(self);
153}
154
155static PyTypeObject Pdata_Type = {
156 PyVarObject_HEAD_INIT(NULL, 0)
157 "_pickle.Pdata", /*tp_name*/
158 sizeof(Pdata), /*tp_basicsize*/
159 0, /*tp_itemsize*/
160 (destructor)Pdata_dealloc, /*tp_dealloc*/
161};
162
163static PyObject *
164Pdata_New(void)
165{
166 Pdata *self;
167
168 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
169 return NULL;
170 self->size = 8;
171 self->length = 0;
172 self->data = PyMem_Malloc(self->size * sizeof(PyObject *));
173 if (self->data)
174 return (PyObject *)self;
175 Py_DECREF(self);
176 return PyErr_NoMemory();
177}
178
179
180/* Retain only the initial clearto items. If clearto >= the current
181 * number of items, this is a (non-erroneous) NOP.
182 */
183static int
184Pdata_clear(Pdata *self, int clearto)
185{
186 int i;
187 PyObject **p;
188
189 if (clearto < 0)
190 return stack_underflow();
191 if (clearto >= self->length)
192 return 0;
193
194 for (i = self->length, p = self->data + clearto; --i >= clearto; p++) {
195 Py_CLEAR(*p);
196 }
197 self->length = clearto;
198
199 return 0;
200}
201
202static int
203Pdata_grow(Pdata *self)
204{
205 int bigger;
206 size_t nbytes;
207 PyObject **tmp;
208
209 bigger = (self->size << 1) + 1;
210 if (bigger <= 0) /* was 0, or new value overflows */
211 goto nomemory;
212 if ((int)(size_t)bigger != bigger)
213 goto nomemory;
214 nbytes = (size_t)bigger * sizeof(PyObject *);
215 if (nbytes / sizeof(PyObject *) != (size_t)bigger)
216 goto nomemory;
217 tmp = PyMem_Realloc(self->data, nbytes);
218 if (tmp == NULL)
219 goto nomemory;
220 self->data = tmp;
221 self->size = bigger;
222 return 0;
223
224 nomemory:
225 PyErr_NoMemory();
226 return -1;
227}
228
229/* D is a Pdata*. Pop the topmost element and store it into V, which
230 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
231 * is raised and V is set to NULL.
232 */
233static PyObject *
234Pdata_pop(Pdata *self)
235{
236 if (self->length == 0) {
237 PyErr_SetString(UnpicklingError, "bad pickle data");
238 return NULL;
239 }
240 return self->data[--(self->length)];
241}
242#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
243
244static int
245Pdata_push(Pdata *self, PyObject *obj)
246{
247 if (self->length == self->size && Pdata_grow(self) < 0) {
248 return -1;
249 }
250 self->data[self->length++] = obj;
251 return 0;
252}
253
254/* Push an object on stack, transferring its ownership to the stack. */
255#define PDATA_PUSH(D, O, ER) do { \
256 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
257
258/* Push an object on stack, adding a new reference to the object. */
259#define PDATA_APPEND(D, O, ER) do { \
260 Py_INCREF((O)); \
261 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
262
263static PyObject *
264Pdata_poptuple(Pdata *self, Py_ssize_t start)
265{
266 PyObject *tuple;
267 Py_ssize_t len, i, j;
268
269 len = self->length - start;
270 tuple = PyTuple_New(len);
271 if (tuple == NULL)
272 return NULL;
273 for (i = start, j = 0; j < len; i++, j++)
274 PyTuple_SET_ITEM(tuple, j, self->data[i]);
275
276 self->length = start;
277 return tuple;
278}
279
280static PyObject *
281Pdata_poplist(Pdata *self, Py_ssize_t start)
282{
283 PyObject *list;
284 Py_ssize_t len, i, j;
285
286 len = self->length - start;
287 list = PyList_New(len);
288 if (list == NULL)
289 return NULL;
290 for (i = start, j = 0; j < len; i++, j++)
291 PyList_SET_ITEM(list, j, self->data[i]);
292
293 self->length = start;
294 return list;
295}
296
297typedef struct PicklerObject {
298 PyObject_HEAD
299 PyObject *write; /* write() method of the output stream */
300 PyObject *memo; /* Memo dictionary, keep track of the seen
301 objects to support self-referential objects
302 pickling. */
303 PyObject *pers_func; /* persistent_id() method, can be NULL */
304 PyObject *arg;
305 int proto; /* Pickle protocol number, >= 0 */
306 int bin; /* Boolean, true if proto > 0 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000307 int buf_size; /* Size of the current buffered pickle data */
308 char *write_buf; /* Write buffer, this is to avoid calling the
309 write() method of the output stream too
310 often. */
311 int fast; /* Enable fast mode if set to a true value.
312 The fast mode disable the usage of memo,
313 therefore speeding the pickling process by
314 not generating superfluous PUT opcodes. It
315 should not be used if with self-referential
316 objects. */
317 int fast_nesting;
318 PyObject *fast_memo;
319} PicklerObject;
320
321typedef struct UnpicklerObject {
322 PyObject_HEAD
323 Pdata *stack; /* Pickle data stack, store unpickled objects. */
324 PyObject *readline; /* readline() method of the output stream */
325 PyObject *read; /* read() method of the output stream */
326 PyObject *memo; /* Memo dictionary, provide the objects stored
327 using the PUT opcodes. */
328 PyObject *arg;
329 PyObject *pers_func; /* persistent_load() method, can be NULL. */
330 PyObject *last_string; /* Reference to the last string read by the
331 readline() method. */
332 char *buffer; /* Reading buffer. */
333 char *encoding; /* Name of the encoding to be used for
334 decoding strings pickled using Python
335 2.x. The default value is "ASCII" */
336 char *errors; /* Name of errors handling scheme to used when
337 decoding strings. The default value is
338 "strict". */
339 int *marks; /* Mark stack, used for unpickling container
340 objects. */
341 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
342 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
343} UnpicklerObject;
344
345/* Forward declarations */
346static int save(PicklerObject *, PyObject *, int);
347static int save_reduce(PicklerObject *, PyObject *, PyObject *);
348static PyTypeObject Pickler_Type;
349static PyTypeObject Unpickler_Type;
350
351
352/* Helpers for creating the argument tuple passed to functions. This has the
353 performance advantage of calling PyTuple_New() only once. */
354
355#define ARG_TUP(self, obj) do { \
356 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
357 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
358 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
359 } \
360 else { \
361 Py_DECREF((obj)); \
362 } \
363 } while (0)
364
365#define FREE_ARG_TUP(self) do { \
366 if ((self)->arg->ob_refcnt > 1) \
367 Py_CLEAR((self)->arg); \
368 } while (0)
369
370/* A temporary cleaner API for fast single argument function call.
371
372 XXX: Does caching the argument tuple provides any real performance benefits?
373
374 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
375 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
376 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
377 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
378 (i.e, call PyTuple_New() and store the returned value in an array), to save
379 one second (wall clock time). Either ways, the loading time a pickle stream
380 large enough to generate this number of calls would be massively
381 overwhelmed by other factors, like I/O throughput, the GC traversal and
382 object allocation overhead. So, I really doubt these functions provide any
383 real benefits.
384
385 On the other hand, oprofile reports that pickle spends a lot of time in
386 these functions. But, that is probably more related to the function call
387 overhead, than the argument tuple allocation.
388
389 XXX: And, what is the reference behavior of these? Steal, borrow? At first
390 glance, it seems to steal the reference of 'arg' and borrow the reference
391 of 'func'.
392 */
393static PyObject *
394pickler_call(PicklerObject *self, PyObject *func, PyObject *arg)
395{
396 PyObject *result = NULL;
397
398 ARG_TUP(self, arg);
399 if (self->arg) {
400 result = PyObject_Call(func, self->arg, NULL);
401 FREE_ARG_TUP(self);
402 }
403 return result;
404}
405
406static PyObject *
407unpickler_call(UnpicklerObject *self, PyObject *func, PyObject *arg)
408{
409 PyObject *result = NULL;
410
411 ARG_TUP(self, arg);
412 if (self->arg) {
413 result = PyObject_Call(func, self->arg, NULL);
414 FREE_ARG_TUP(self);
415 }
416 return result;
417}
418
419static Py_ssize_t
420pickler_write(PicklerObject *self, const char *s, Py_ssize_t n)
421{
422 PyObject *data, *result;
423
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000424 if (self->write_buf == NULL) {
425 PyErr_SetString(PyExc_SystemError, "invalid write buffer");
426 return -1;
427 }
428
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000429 if (s == NULL) {
430 if (!(self->buf_size))
431 return 0;
432 data = PyBytes_FromStringAndSize(self->write_buf, self->buf_size);
433 if (data == NULL)
434 return -1;
435 }
436 else {
437 if (self->buf_size && (n + self->buf_size) > WRITE_BUF_SIZE) {
438 if (pickler_write(self, NULL, 0) < 0)
439 return -1;
440 }
441
442 if (n > WRITE_BUF_SIZE) {
443 if (!(data = PyBytes_FromStringAndSize(s, n)))
444 return -1;
445 }
446 else {
447 memcpy(self->write_buf + self->buf_size, s, n);
448 self->buf_size += n;
449 return n;
450 }
451 }
452
453 /* object with write method */
454 result = pickler_call(self, self->write, data);
455 if (result == NULL)
456 return -1;
457
458 Py_DECREF(result);
459 self->buf_size = 0;
460 return n;
461}
462
463/* XXX: These read/readline functions ought to be optimized. Buffered I/O
464 might help a lot, especially with the new (but much slower) io library.
465 On the other hand, the added complexity might not worth it.
466 */
467
468/* Read at least n characters from the input stream and set s to the current
469 reading position. */
470static Py_ssize_t
471unpickler_read(UnpicklerObject *self, char **s, Py_ssize_t n)
472{
473 PyObject *len;
474 PyObject *data;
475
476 len = PyLong_FromSsize_t(n);
477 if (len == NULL)
478 return -1;
479
480 data = unpickler_call(self, self->read, len);
481 if (data == NULL)
482 return -1;
483
484 /* XXX: Should bytearray be supported too? */
485 if (!PyBytes_Check(data)) {
486 PyErr_SetString(PyExc_ValueError,
487 "read() from the underlying stream did not"
488 "return bytes");
489 return -1;
490 }
491
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000492 if (PyBytes_GET_SIZE(data) != n) {
493 PyErr_SetNone(PyExc_EOFError);
494 return -1;
495 }
496
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000497 Py_XDECREF(self->last_string);
498 self->last_string = data;
499
500 if (!(*s = PyBytes_AS_STRING(data)))
501 return -1;
502
503 return n;
504}
505
506static Py_ssize_t
507unpickler_readline(UnpicklerObject *self, char **s)
508{
509 PyObject *data;
510
511 data = PyObject_CallObject(self->readline, empty_tuple);
512 if (data == NULL)
513 return -1;
514
515 /* XXX: Should bytearray be supported too? */
516 if (!PyBytes_Check(data)) {
517 PyErr_SetString(PyExc_ValueError,
518 "readline() from the underlying stream did not"
519 "return bytes");
520 return -1;
521 }
522
523 Py_XDECREF(self->last_string);
524 self->last_string = data;
525
526 if (!(*s = PyBytes_AS_STRING(data)))
527 return -1;
528
529 return PyBytes_GET_SIZE(data);
530}
531
532/* Generate a GET opcode for an object stored in the memo. The 'key' argument
533 should be the address of the object as returned by PyLong_FromVoidPtr(). */
534static int
535memo_get(PicklerObject *self, PyObject *key)
536{
537 PyObject *value;
538 PyObject *memo_id;
539 long x;
540 char pdata[30];
541 int len;
542
543 value = PyDict_GetItemWithError(self->memo, key);
544 if (value == NULL) {
545 if (!PyErr_Occurred())
546 PyErr_SetObject(PyExc_KeyError, key);
547 return -1;
548 }
549
550 memo_id = PyTuple_GetItem(value, 0);
551 if (memo_id == NULL)
552 return -1;
553
554 if (!PyLong_Check(memo_id)) {
555 PyErr_SetString(PicklingError, "memo id must be an integer");
556 return -1;
557 }
558 x = PyLong_AsLong(memo_id);
559 if (x == -1 && PyErr_Occurred())
560 return -1;
561
562 if (!self->bin) {
563 pdata[0] = GET;
564 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
565 len = (int)strlen(pdata);
566 }
567 else {
568 if (x < 256) {
569 pdata[0] = BINGET;
570 pdata[1] = (unsigned char)(x & 0xff);
571 len = 2;
572 }
573 else if (x <= 0xffffffffL) {
574 pdata[0] = LONG_BINGET;
575 pdata[1] = (unsigned char)(x & 0xff);
576 pdata[2] = (unsigned char)((x >> 8) & 0xff);
577 pdata[3] = (unsigned char)((x >> 16) & 0xff);
578 pdata[4] = (unsigned char)((x >> 24) & 0xff);
579 len = 5;
580 }
581 else { /* unlikely */
582 PyErr_SetString(PicklingError,
583 "memo id too large for LONG_BINGET");
584 return -1;
585 }
586 }
587
588 if (pickler_write(self, pdata, len) < 0)
589 return -1;
590
591 return 0;
592}
593
594/* Store an object in the memo, assign it a new unique ID based on the number
595 of objects currently stored in the memo and generate a PUT opcode. */
596static int
597memo_put(PicklerObject *self, PyObject *obj)
598{
599 PyObject *key = NULL;
600 PyObject *memo_id = NULL;
601 PyObject *tuple = NULL;
602 long x;
603 char pdata[30];
604 int len;
605 int status = 0;
606
607 if (self->fast)
608 return 0;
609
610 key = PyLong_FromVoidPtr(obj);
611 if (key == NULL)
612 goto error;
613 if ((x = PyDict_Size(self->memo)) < 0)
614 goto error;
615 memo_id = PyLong_FromLong(x);
616 if (memo_id == NULL)
617 goto error;
618 tuple = PyTuple_New(2);
619 if (tuple == NULL)
620 goto error;
621
622 Py_INCREF(memo_id);
623 PyTuple_SET_ITEM(tuple, 0, memo_id);
624 Py_INCREF(obj);
625 PyTuple_SET_ITEM(tuple, 1, obj);
626 if (PyDict_SetItem(self->memo, key, tuple) < 0)
627 goto error;
628
629 if (!self->bin) {
630 pdata[0] = PUT;
631 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
632 len = strlen(pdata);
633 }
634 else {
635 if (x < 256) {
636 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +0000637 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000638 len = 2;
639 }
640 else if (x <= 0xffffffffL) {
641 pdata[0] = LONG_BINPUT;
642 pdata[1] = (unsigned char)(x & 0xff);
643 pdata[2] = (unsigned char)((x >> 8) & 0xff);
644 pdata[3] = (unsigned char)((x >> 16) & 0xff);
645 pdata[4] = (unsigned char)((x >> 24) & 0xff);
646 len = 5;
647 }
648 else { /* unlikely */
649 PyErr_SetString(PicklingError,
650 "memo id too large for LONG_BINPUT");
651 return -1;
652 }
653 }
654
655 if (pickler_write(self, pdata, len) < 0)
656 goto error;
657
658 if (0) {
659 error:
660 status = -1;
661 }
662
663 Py_XDECREF(key);
664 Py_XDECREF(memo_id);
665 Py_XDECREF(tuple);
666
667 return status;
668}
669
670static PyObject *
671whichmodule(PyObject *global, PyObject *global_name)
672{
673 Py_ssize_t i, j;
674 static PyObject *module_str = NULL;
675 static PyObject *main_str = NULL;
676 PyObject *module_name;
677 PyObject *modules_dict;
678 PyObject *module;
679 PyObject *obj;
680
681 if (module_str == NULL) {
682 module_str = PyUnicode_InternFromString("__module__");
683 if (module_str == NULL)
684 return NULL;
685 main_str = PyUnicode_InternFromString("__main__");
686 if (main_str == NULL)
687 return NULL;
688 }
689
690 module_name = PyObject_GetAttr(global, module_str);
691
692 /* In some rare cases (e.g., random.getrandbits), __module__ can be
693 None. If it is so, then search sys.modules for the module of
694 global. */
695 if (module_name == Py_None) {
696 Py_DECREF(module_name);
697 goto search;
698 }
699
700 if (module_name) {
701 return module_name;
702 }
703 if (PyErr_ExceptionMatches(PyExc_AttributeError))
704 PyErr_Clear();
705 else
706 return NULL;
707
708 search:
709 modules_dict = PySys_GetObject("modules");
710 if (modules_dict == NULL)
711 return NULL;
712
713 i = 0;
714 module_name = NULL;
715 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
716 if (PyObject_Compare(module_name, main_str) == 0)
717 continue;
718
719 obj = PyObject_GetAttr(module, global_name);
720 if (obj == NULL) {
721 if (PyErr_ExceptionMatches(PyExc_AttributeError))
722 PyErr_Clear();
723 else
724 return NULL;
725 continue;
726 }
727
728 if (obj != global) {
729 Py_DECREF(obj);
730 continue;
731 }
732
733 Py_DECREF(obj);
734 break;
735 }
736
737 /* If no module is found, use __main__. */
738 if (!j) {
739 module_name = main_str;
740 }
741
742 Py_INCREF(module_name);
743 return module_name;
744}
745
746/* fast_save_enter() and fast_save_leave() are guards against recursive
747 objects when Pickler is used with the "fast mode" (i.e., with object
748 memoization disabled). If the nesting of a list or dict object exceed
749 FAST_NESTING_LIMIT, these guards will start keeping an internal
750 reference to the seen list or dict objects and check whether these objects
751 are recursive. These are not strictly necessary, since save() has a
752 hard-coded recursion limit, but they give a nicer error message than the
753 typical RuntimeError. */
754static int
755fast_save_enter(PicklerObject *self, PyObject *obj)
756{
757 /* if fast_nesting < 0, we're doing an error exit. */
758 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
759 PyObject *key = NULL;
760 if (self->fast_memo == NULL) {
761 self->fast_memo = PyDict_New();
762 if (self->fast_memo == NULL) {
763 self->fast_nesting = -1;
764 return 0;
765 }
766 }
767 key = PyLong_FromVoidPtr(obj);
768 if (key == NULL)
769 return 0;
770 if (PyDict_GetItem(self->fast_memo, key)) {
771 Py_DECREF(key);
772 PyErr_Format(PyExc_ValueError,
773 "fast mode: can't pickle cyclic objects "
774 "including object type %.200s at %p",
775 obj->ob_type->tp_name, obj);
776 self->fast_nesting = -1;
777 return 0;
778 }
779 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
780 Py_DECREF(key);
781 self->fast_nesting = -1;
782 return 0;
783 }
784 Py_DECREF(key);
785 }
786 return 1;
787}
788
789static int
790fast_save_leave(PicklerObject *self, PyObject *obj)
791{
792 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
793 PyObject *key = PyLong_FromVoidPtr(obj);
794 if (key == NULL)
795 return 0;
796 if (PyDict_DelItem(self->fast_memo, key) < 0) {
797 Py_DECREF(key);
798 return 0;
799 }
800 Py_DECREF(key);
801 }
802 return 1;
803}
804
805static int
806save_none(PicklerObject *self, PyObject *obj)
807{
808 const char none_op = NONE;
809 if (pickler_write(self, &none_op, 1) < 0)
810 return -1;
811
812 return 0;
813}
814
815static int
816save_bool(PicklerObject *self, PyObject *obj)
817{
818 static const char *buf[2] = { FALSE, TRUE };
819 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
820 int p = (obj == Py_True);
821
822 if (self->proto >= 2) {
823 const char bool_op = p ? NEWTRUE : NEWFALSE;
824 if (pickler_write(self, &bool_op, 1) < 0)
825 return -1;
826 }
827 else if (pickler_write(self, buf[p], len[p]) < 0)
828 return -1;
829
830 return 0;
831}
832
833static int
834save_int(PicklerObject *self, long x)
835{
836 char pdata[32];
837 int len = 0;
838
839 if (!self->bin
840#if SIZEOF_LONG > 4
841 || x > 0x7fffffffL || x < -0x80000000L
842#endif
843 ) {
844 /* Text-mode pickle, or long too big to fit in the 4-byte
845 * signed BININT format: store as a string.
846 */
847 pdata[0] = LONG; /* use LONG for consistence with pickle.py */
848 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
849 if (pickler_write(self, pdata, strlen(pdata)) < 0)
850 return -1;
851 }
852 else {
853 /* Binary pickle and x fits in a signed 4-byte int. */
854 pdata[1] = (unsigned char)(x & 0xff);
855 pdata[2] = (unsigned char)((x >> 8) & 0xff);
856 pdata[3] = (unsigned char)((x >> 16) & 0xff);
857 pdata[4] = (unsigned char)((x >> 24) & 0xff);
858
859 if ((pdata[4] == 0) && (pdata[3] == 0)) {
860 if (pdata[2] == 0) {
861 pdata[0] = BININT1;
862 len = 2;
863 }
864 else {
865 pdata[0] = BININT2;
866 len = 3;
867 }
868 }
869 else {
870 pdata[0] = BININT;
871 len = 5;
872 }
873
874 if (pickler_write(self, pdata, len) < 0)
875 return -1;
876 }
877
878 return 0;
879}
880
881static int
882save_long(PicklerObject *self, PyObject *obj)
883{
884 PyObject *repr = NULL;
885 Py_ssize_t size;
886 long val = PyLong_AsLong(obj);
887 int status = 0;
888
889 const char long_op = LONG;
890
891 if (val == -1 && PyErr_Occurred()) {
892 /* out of range for int pickling */
893 PyErr_Clear();
894 }
895 else
896 return save_int(self, val);
897
898 if (self->proto >= 2) {
899 /* Linear-time pickling. */
900 size_t nbits;
901 size_t nbytes;
902 unsigned char *pdata;
903 char header[5];
904 int i;
905 int sign = _PyLong_Sign(obj);
906
907 if (sign == 0) {
908 header[0] = LONG1;
909 header[1] = 0; /* It's 0 -- an empty bytestring. */
910 if (pickler_write(self, header, 2) < 0)
911 goto error;
912 return 0;
913 }
914 nbits = _PyLong_NumBits(obj);
915 if (nbits == (size_t)-1 && PyErr_Occurred())
916 goto error;
917 /* How many bytes do we need? There are nbits >> 3 full
918 * bytes of data, and nbits & 7 leftover bits. If there
919 * are any leftover bits, then we clearly need another
920 * byte. Wnat's not so obvious is that we *probably*
921 * need another byte even if there aren't any leftovers:
922 * the most-significant bit of the most-significant byte
923 * acts like a sign bit, and it's usually got a sense
924 * opposite of the one we need. The exception is longs
925 * of the form -(2**(8*j-1)) for j > 0. Such a long is
926 * its own 256's-complement, so has the right sign bit
927 * even without the extra byte. That's a pain to check
928 * for in advance, though, so we always grab an extra
929 * byte at the start, and cut it back later if possible.
930 */
931 nbytes = (nbits >> 3) + 1;
932 if (nbytes > INT_MAX) {
933 PyErr_SetString(PyExc_OverflowError,
934 "long too large to pickle");
935 goto error;
936 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +0000937 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000938 if (repr == NULL)
939 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +0000940 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000941 i = _PyLong_AsByteArray((PyLongObject *)obj,
942 pdata, nbytes,
943 1 /* little endian */ , 1 /* signed */ );
944 if (i < 0)
945 goto error;
946 /* If the long is negative, this may be a byte more than
947 * needed. This is so iff the MSB is all redundant sign
948 * bits.
949 */
950 if (sign < 0 &&
951 nbytes > 1 &&
952 pdata[nbytes - 1] == 0xff &&
953 (pdata[nbytes - 2] & 0x80) != 0) {
954 nbytes--;
955 }
956
957 if (nbytes < 256) {
958 header[0] = LONG1;
959 header[1] = (unsigned char)nbytes;
960 size = 2;
961 }
962 else {
963 header[0] = LONG4;
964 size = (int)nbytes;
965 for (i = 1; i < 5; i++) {
966 header[i] = (unsigned char)(size & 0xff);
967 size >>= 8;
968 }
969 size = 5;
970 }
971 if (pickler_write(self, header, size) < 0 ||
972 pickler_write(self, (char *)pdata, (int)nbytes) < 0)
973 goto error;
974 }
975 else {
976 char *string;
977
978 /* proto < 2: write the repr and newline. This is quadratic-time
979 (in the number of digits), in both directions. */
980
981 repr = PyObject_Repr(obj);
982 if (repr == NULL)
983 goto error;
984
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +0000985 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000986 if (string == NULL)
987 goto error;
988
989 if (pickler_write(self, &long_op, 1) < 0 ||
990 pickler_write(self, string, size) < 0 ||
991 pickler_write(self, "\n", 1) < 0)
992 goto error;
993 }
994
995 if (0) {
996 error:
997 status = -1;
998 }
999 Py_XDECREF(repr);
1000
1001 return status;
1002}
1003
1004static int
1005save_float(PicklerObject *self, PyObject *obj)
1006{
1007 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1008
1009 if (self->bin) {
1010 char pdata[9];
1011 pdata[0] = BINFLOAT;
1012 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1013 return -1;
1014 if (pickler_write(self, pdata, 9) < 0)
1015 return -1;
1016 }
1017 else {
1018 char pdata[250];
1019 pdata[0] = FLOAT;
1020 PyOS_ascii_formatd(pdata + 1, sizeof(pdata) - 2, "%.17g", x);
1021 /* Extend the formatted string with a newline character */
1022 strcat(pdata, "\n");
1023
1024 if (pickler_write(self, pdata, strlen(pdata)) < 0)
1025 return -1;
1026 }
1027
1028 return 0;
1029}
1030
1031static int
1032save_bytes(PicklerObject *self, PyObject *obj)
1033{
1034 if (self->proto < 3) {
1035 /* Older pickle protocols do not have an opcode for pickling bytes
1036 objects. Therefore, we need to fake the copy protocol (i.e.,
1037 the __reduce__ method) to permit bytes object unpickling. */
1038 PyObject *reduce_value = NULL;
1039 PyObject *bytelist = NULL;
1040 int status;
1041
1042 bytelist = PySequence_List(obj);
1043 if (bytelist == NULL)
1044 return -1;
1045
1046 reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
1047 bytelist);
1048 if (reduce_value == NULL) {
1049 Py_DECREF(bytelist);
1050 return -1;
1051 }
1052
1053 /* save_reduce() will memoize the object automatically. */
1054 status = save_reduce(self, reduce_value, obj);
1055 Py_DECREF(reduce_value);
1056 Py_DECREF(bytelist);
1057 return status;
1058 }
1059 else {
1060 Py_ssize_t size;
1061 char header[5];
1062 int len;
1063
1064 size = PyBytes_Size(obj);
1065 if (size < 0)
1066 return -1;
1067
1068 if (size < 256) {
1069 header[0] = SHORT_BINBYTES;
1070 header[1] = (unsigned char)size;
1071 len = 2;
1072 }
1073 else if (size <= 0xffffffffL) {
1074 header[0] = BINBYTES;
1075 header[1] = (unsigned char)(size & 0xff);
1076 header[2] = (unsigned char)((size >> 8) & 0xff);
1077 header[3] = (unsigned char)((size >> 16) & 0xff);
1078 header[4] = (unsigned char)((size >> 24) & 0xff);
1079 len = 5;
1080 }
1081 else {
1082 return -1; /* string too large */
1083 }
1084
1085 if (pickler_write(self, header, len) < 0)
1086 return -1;
1087
1088 if (pickler_write(self, PyBytes_AS_STRING(obj), size) < 0)
1089 return -1;
1090
1091 if (memo_put(self, obj) < 0)
1092 return -1;
1093
1094 return 0;
1095 }
1096}
1097
1098/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1099 backslash and newline characters to \uXXXX escapes. */
1100static PyObject *
1101raw_unicode_escape(const Py_UNICODE *s, Py_ssize_t size)
1102{
1103 PyObject *repr, *result;
1104 char *p;
1105 char *q;
1106
1107 static const char *hexdigits = "0123456789abcdef";
1108
1109#ifdef Py_UNICODE_WIDE
1110 repr = PyBytes_FromStringAndSize(NULL, 10 * size);
1111#else
1112 repr = PyBytes_FromStringAndSize(NULL, 6 * size);
1113#endif
1114 if (repr == NULL)
1115 return NULL;
1116 if (size == 0)
1117 goto done;
1118
1119 p = q = PyBytes_AS_STRING(repr);
1120 while (size-- > 0) {
1121 Py_UNICODE ch = *s++;
1122#ifdef Py_UNICODE_WIDE
1123 /* Map 32-bit characters to '\Uxxxxxxxx' */
1124 if (ch >= 0x10000) {
1125 *p++ = '\\';
1126 *p++ = 'U';
1127 *p++ = hexdigits[(ch >> 28) & 0xf];
1128 *p++ = hexdigits[(ch >> 24) & 0xf];
1129 *p++ = hexdigits[(ch >> 20) & 0xf];
1130 *p++ = hexdigits[(ch >> 16) & 0xf];
1131 *p++ = hexdigits[(ch >> 12) & 0xf];
1132 *p++ = hexdigits[(ch >> 8) & 0xf];
1133 *p++ = hexdigits[(ch >> 4) & 0xf];
1134 *p++ = hexdigits[ch & 15];
1135 }
1136 else
1137#endif
1138 /* Map 16-bit characters to '\uxxxx' */
1139 if (ch >= 256 || ch == '\\' || ch == '\n') {
1140 *p++ = '\\';
1141 *p++ = 'u';
1142 *p++ = hexdigits[(ch >> 12) & 0xf];
1143 *p++ = hexdigits[(ch >> 8) & 0xf];
1144 *p++ = hexdigits[(ch >> 4) & 0xf];
1145 *p++ = hexdigits[ch & 15];
1146 }
1147 /* Copy everything else as-is */
1148 else
1149 *p++ = (char) ch;
1150 }
1151 size = p - q;
1152
1153 done:
1154 result = PyBytes_FromStringAndSize(PyBytes_AS_STRING(repr), size);
1155 Py_DECREF(repr);
1156 return result;
1157}
1158
1159static int
1160save_unicode(PicklerObject *self, PyObject *obj)
1161{
1162 Py_ssize_t size;
1163 PyObject *encoded = NULL;
1164
1165 if (self->bin) {
1166 char pdata[5];
1167
1168 encoded = PyUnicode_AsUTF8String(obj);
1169 if (encoded == NULL)
1170 goto error;
1171
1172 size = PyBytes_GET_SIZE(encoded);
1173 if (size < 0 || size > 0xffffffffL)
1174 goto error; /* string too large */
1175
1176 pdata[0] = BINUNICODE;
1177 pdata[1] = (unsigned char)(size & 0xff);
1178 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1179 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1180 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1181
1182 if (pickler_write(self, pdata, 5) < 0)
1183 goto error;
1184
1185 if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
1186 goto error;
1187 }
1188 else {
1189 const char unicode_op = UNICODE;
1190
1191 encoded = raw_unicode_escape(PyUnicode_AS_UNICODE(obj),
1192 PyUnicode_GET_SIZE(obj));
1193 if (encoded == NULL)
1194 goto error;
1195
1196 if (pickler_write(self, &unicode_op, 1) < 0)
1197 goto error;
1198
1199 size = PyBytes_GET_SIZE(encoded);
1200 if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
1201 goto error;
1202
1203 if (pickler_write(self, "\n", 1) < 0)
1204 goto error;
1205 }
1206 if (memo_put(self, obj) < 0)
1207 goto error;
1208
1209 Py_DECREF(encoded);
1210 return 0;
1211
1212 error:
1213 Py_XDECREF(encoded);
1214 return -1;
1215}
1216
1217/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1218static int
1219store_tuple_elements(PicklerObject *self, PyObject *t, int len)
1220{
1221 int i;
1222
1223 assert(PyTuple_Size(t) == len);
1224
1225 for (i = 0; i < len; i++) {
1226 PyObject *element = PyTuple_GET_ITEM(t, i);
1227
1228 if (element == NULL)
1229 return -1;
1230 if (save(self, element, 0) < 0)
1231 return -1;
1232 }
1233
1234 return 0;
1235}
1236
1237/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1238 * used across protocols to minimize the space needed to pickle them.
1239 * Tuples are also the only builtin immutable type that can be recursive
1240 * (a tuple can be reached from itself), and that requires some subtle
1241 * magic so that it works in all cases. IOW, this is a long routine.
1242 */
1243static int
1244save_tuple(PicklerObject *self, PyObject *obj)
1245{
1246 PyObject *memo_key = NULL;
1247 int len, i;
1248 int status = 0;
1249
1250 const char mark_op = MARK;
1251 const char tuple_op = TUPLE;
1252 const char pop_op = POP;
1253 const char pop_mark_op = POP_MARK;
1254 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
1255
1256 if ((len = PyTuple_Size(obj)) < 0)
1257 return -1;
1258
1259 if (len == 0) {
1260 char pdata[2];
1261
1262 if (self->proto) {
1263 pdata[0] = EMPTY_TUPLE;
1264 len = 1;
1265 }
1266 else {
1267 pdata[0] = MARK;
1268 pdata[1] = TUPLE;
1269 len = 2;
1270 }
1271 if (pickler_write(self, pdata, len) < 0)
1272 return -1;
1273 return 0;
1274 }
1275
1276 /* id(tuple) isn't in the memo now. If it shows up there after
1277 * saving the tuple elements, the tuple must be recursive, in
1278 * which case we'll pop everything we put on the stack, and fetch
1279 * its value from the memo.
1280 */
1281 memo_key = PyLong_FromVoidPtr(obj);
1282 if (memo_key == NULL)
1283 return -1;
1284
1285 if (len <= 3 && self->proto >= 2) {
1286 /* Use TUPLE{1,2,3} opcodes. */
1287 if (store_tuple_elements(self, obj, len) < 0)
1288 goto error;
1289
1290 if (PyDict_GetItem(self->memo, memo_key)) {
1291 /* pop the len elements */
1292 for (i = 0; i < len; i++)
1293 if (pickler_write(self, &pop_op, 1) < 0)
1294 goto error;
1295 /* fetch from memo */
1296 if (memo_get(self, memo_key) < 0)
1297 goto error;
1298
1299 Py_DECREF(memo_key);
1300 return 0;
1301 }
1302 else { /* Not recursive. */
1303 if (pickler_write(self, len2opcode + len, 1) < 0)
1304 goto error;
1305 }
1306 goto memoize;
1307 }
1308
1309 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
1310 * Generate MARK e1 e2 ... TUPLE
1311 */
1312 if (pickler_write(self, &mark_op, 1) < 0)
1313 goto error;
1314
1315 if (store_tuple_elements(self, obj, len) < 0)
1316 goto error;
1317
1318 if (PyDict_GetItem(self->memo, memo_key)) {
1319 /* pop the stack stuff we pushed */
1320 if (self->bin) {
1321 if (pickler_write(self, &pop_mark_op, 1) < 0)
1322 goto error;
1323 }
1324 else {
1325 /* Note that we pop one more than len, to remove
1326 * the MARK too.
1327 */
1328 for (i = 0; i <= len; i++)
1329 if (pickler_write(self, &pop_op, 1) < 0)
1330 goto error;
1331 }
1332 /* fetch from memo */
1333 if (memo_get(self, memo_key) < 0)
1334 goto error;
1335
1336 Py_DECREF(memo_key);
1337 return 0;
1338 }
1339 else { /* Not recursive. */
1340 if (pickler_write(self, &tuple_op, 1) < 0)
1341 goto error;
1342 }
1343
1344 memoize:
1345 if (memo_put(self, obj) < 0)
1346 goto error;
1347
1348 if (0) {
1349 error:
1350 status = -1;
1351 }
1352
1353 Py_DECREF(memo_key);
1354 return status;
1355}
1356
1357/* iter is an iterator giving items, and we batch up chunks of
1358 * MARK item item ... item APPENDS
1359 * opcode sequences. Calling code should have arranged to first create an
1360 * empty list, or list-like object, for the APPENDS to operate on.
1361 * Returns 0 on success, <0 on error.
1362 */
1363static int
1364batch_list(PicklerObject *self, PyObject *iter)
1365{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001366 PyObject *obj = NULL;
1367 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001368 int i, n;
1369
1370 const char mark_op = MARK;
1371 const char append_op = APPEND;
1372 const char appends_op = APPENDS;
1373
1374 assert(iter != NULL);
1375
1376 /* XXX: I think this function could be made faster by avoiding the
1377 iterator interface and fetching objects directly from list using
1378 PyList_GET_ITEM.
1379 */
1380
1381 if (self->proto == 0) {
1382 /* APPENDS isn't available; do one at a time. */
1383 for (;;) {
1384 obj = PyIter_Next(iter);
1385 if (obj == NULL) {
1386 if (PyErr_Occurred())
1387 return -1;
1388 break;
1389 }
1390 i = save(self, obj, 0);
1391 Py_DECREF(obj);
1392 if (i < 0)
1393 return -1;
1394 if (pickler_write(self, &append_op, 1) < 0)
1395 return -1;
1396 }
1397 return 0;
1398 }
1399
1400 /* proto > 0: write in batches of BATCHSIZE. */
1401 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001402 /* Get first item */
1403 firstitem = PyIter_Next(iter);
1404 if (firstitem == NULL) {
1405 if (PyErr_Occurred())
1406 goto error;
1407
1408 /* nothing more to add */
1409 break;
1410 }
1411
1412 /* Try to get a second item */
1413 obj = PyIter_Next(iter);
1414 if (obj == NULL) {
1415 if (PyErr_Occurred())
1416 goto error;
1417
1418 /* Only one item to write */
1419 if (save(self, firstitem, 0) < 0)
1420 goto error;
1421 if (pickler_write(self, &append_op, 1) < 0)
1422 goto error;
1423 Py_CLEAR(firstitem);
1424 break;
1425 }
1426
1427 /* More than one item to write */
1428
1429 /* Pump out MARK, items, APPENDS. */
1430 if (pickler_write(self, &mark_op, 1) < 0)
1431 goto error;
1432
1433 if (save(self, firstitem, 0) < 0)
1434 goto error;
1435 Py_CLEAR(firstitem);
1436 n = 1;
1437
1438 /* Fetch and save up to BATCHSIZE items */
1439 while (obj) {
1440 if (save(self, obj, 0) < 0)
1441 goto error;
1442 Py_CLEAR(obj);
1443 n += 1;
1444
1445 if (n == BATCHSIZE)
1446 break;
1447
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001448 obj = PyIter_Next(iter);
1449 if (obj == NULL) {
1450 if (PyErr_Occurred())
1451 goto error;
1452 break;
1453 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001454 }
1455
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001456 if (pickler_write(self, &appends_op, 1) < 0)
1457 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001458
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001459 } while (n == BATCHSIZE);
1460 return 0;
1461
1462 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001463 Py_XDECREF(firstitem);
1464 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001465 return -1;
1466}
1467
1468static int
1469save_list(PicklerObject *self, PyObject *obj)
1470{
1471 PyObject *iter;
1472 char header[3];
1473 int len;
1474 int status = 0;
1475
1476 if (self->fast && !fast_save_enter(self, obj))
1477 goto error;
1478
1479 /* Create an empty list. */
1480 if (self->bin) {
1481 header[0] = EMPTY_LIST;
1482 len = 1;
1483 }
1484 else {
1485 header[0] = MARK;
1486 header[1] = LIST;
1487 len = 2;
1488 }
1489
1490 if (pickler_write(self, header, len) < 0)
1491 goto error;
1492
1493 /* Get list length, and bow out early if empty. */
1494 if ((len = PyList_Size(obj)) < 0)
1495 goto error;
1496
1497 if (memo_put(self, obj) < 0)
1498 goto error;
1499
1500 if (len != 0) {
1501 /* Save the list elements. */
1502 iter = PyObject_GetIter(obj);
1503 if (iter == NULL)
1504 goto error;
1505 status = batch_list(self, iter);
1506 Py_DECREF(iter);
1507 }
1508
1509 if (0) {
1510 error:
1511 status = -1;
1512 }
1513
1514 if (self->fast && !fast_save_leave(self, obj))
1515 status = -1;
1516
1517 return status;
1518}
1519
1520/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
1521 * MARK key value ... key value SETITEMS
1522 * opcode sequences. Calling code should have arranged to first create an
1523 * empty dict, or dict-like object, for the SETITEMS to operate on.
1524 * Returns 0 on success, <0 on error.
1525 *
1526 * This is very much like batch_list(). The difference between saving
1527 * elements directly, and picking apart two-tuples, is so long-winded at
1528 * the C level, though, that attempts to combine these routines were too
1529 * ugly to bear.
1530 */
1531static int
1532batch_dict(PicklerObject *self, PyObject *iter)
1533{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001534 PyObject *obj = NULL;
1535 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001536 int i, n;
1537
1538 const char mark_op = MARK;
1539 const char setitem_op = SETITEM;
1540 const char setitems_op = SETITEMS;
1541
1542 assert(iter != NULL);
1543
1544 if (self->proto == 0) {
1545 /* SETITEMS isn't available; do one at a time. */
1546 for (;;) {
1547 obj = PyIter_Next(iter);
1548 if (obj == NULL) {
1549 if (PyErr_Occurred())
1550 return -1;
1551 break;
1552 }
1553 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
1554 PyErr_SetString(PyExc_TypeError, "dict items "
1555 "iterator must return 2-tuples");
1556 return -1;
1557 }
1558 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
1559 if (i >= 0)
1560 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
1561 Py_DECREF(obj);
1562 if (i < 0)
1563 return -1;
1564 if (pickler_write(self, &setitem_op, 1) < 0)
1565 return -1;
1566 }
1567 return 0;
1568 }
1569
1570 /* proto > 0: write in batches of BATCHSIZE. */
1571 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001572 /* Get first item */
1573 firstitem = PyIter_Next(iter);
1574 if (firstitem == NULL) {
1575 if (PyErr_Occurred())
1576 goto error;
1577
1578 /* nothing more to add */
1579 break;
1580 }
1581 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
1582 PyErr_SetString(PyExc_TypeError, "dict items "
1583 "iterator must return 2-tuples");
1584 goto error;
1585 }
1586
1587 /* Try to get a second item */
1588 obj = PyIter_Next(iter);
1589 if (obj == NULL) {
1590 if (PyErr_Occurred())
1591 goto error;
1592
1593 /* Only one item to write */
1594 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
1595 goto error;
1596 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
1597 goto error;
1598 if (pickler_write(self, &setitem_op, 1) < 0)
1599 goto error;
1600 Py_CLEAR(firstitem);
1601 break;
1602 }
1603
1604 /* More than one item to write */
1605
1606 /* Pump out MARK, items, SETITEMS. */
1607 if (pickler_write(self, &mark_op, 1) < 0)
1608 goto error;
1609
1610 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
1611 goto error;
1612 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
1613 goto error;
1614 Py_CLEAR(firstitem);
1615 n = 1;
1616
1617 /* Fetch and save up to BATCHSIZE items */
1618 while (obj) {
1619 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
1620 PyErr_SetString(PyExc_TypeError, "dict items "
1621 "iterator must return 2-tuples");
1622 goto error;
1623 }
1624 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
1625 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
1626 goto error;
1627 Py_CLEAR(obj);
1628 n += 1;
1629
1630 if (n == BATCHSIZE)
1631 break;
1632
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001633 obj = PyIter_Next(iter);
1634 if (obj == NULL) {
1635 if (PyErr_Occurred())
1636 goto error;
1637 break;
1638 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001639 }
1640
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001641 if (pickler_write(self, &setitems_op, 1) < 0)
1642 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001643
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001644 } while (n == BATCHSIZE);
1645 return 0;
1646
1647 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001648 Py_XDECREF(firstitem);
1649 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001650 return -1;
1651}
1652
1653static int
1654save_dict(PicklerObject *self, PyObject *obj)
1655{
1656 PyObject *items, *iter;
1657 char header[3];
1658 int len;
1659 int status = 0;
1660
1661 if (self->fast && !fast_save_enter(self, obj))
1662 goto error;
1663
1664 /* Create an empty dict. */
1665 if (self->bin) {
1666 header[0] = EMPTY_DICT;
1667 len = 1;
1668 }
1669 else {
1670 header[0] = MARK;
1671 header[1] = DICT;
1672 len = 2;
1673 }
1674
1675 if (pickler_write(self, header, len) < 0)
1676 goto error;
1677
1678 /* Get dict size, and bow out early if empty. */
1679 if ((len = PyDict_Size(obj)) < 0)
1680 goto error;
1681
1682 if (memo_put(self, obj) < 0)
1683 goto error;
1684
1685 if (len != 0) {
1686 /* Save the dict items. */
1687 items = PyObject_CallMethod(obj, "items", "()");
1688 if (items == NULL)
1689 goto error;
1690 iter = PyObject_GetIter(items);
1691 Py_DECREF(items);
1692 if (iter == NULL)
1693 goto error;
1694 status = batch_dict(self, iter);
1695 Py_DECREF(iter);
1696 }
1697
1698 if (0) {
1699 error:
1700 status = -1;
1701 }
1702
1703 if (self->fast && !fast_save_leave(self, obj))
1704 status = -1;
1705
1706 return status;
1707}
1708
1709static int
1710save_global(PicklerObject *self, PyObject *obj, PyObject *name)
1711{
1712 static PyObject *name_str = NULL;
1713 PyObject *global_name = NULL;
1714 PyObject *module_name = NULL;
1715 PyObject *module = NULL;
1716 PyObject *cls;
1717 int status = 0;
1718
1719 const char global_op = GLOBAL;
1720
1721 if (name_str == NULL) {
1722 name_str = PyUnicode_InternFromString("__name__");
1723 if (name_str == NULL)
1724 goto error;
1725 }
1726
1727 if (name) {
1728 global_name = name;
1729 Py_INCREF(global_name);
1730 }
1731 else {
1732 global_name = PyObject_GetAttr(obj, name_str);
1733 if (global_name == NULL)
1734 goto error;
1735 }
1736
1737 module_name = whichmodule(obj, global_name);
1738 if (module_name == NULL)
1739 goto error;
1740
1741 /* XXX: Change to use the import C API directly with level=0 to disallow
1742 relative imports.
1743
1744 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
1745 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
1746 custom import functions (IMHO, this would be a nice security
1747 feature). The import C API would need to be extended to support the
1748 extra parameters of __import__ to fix that. */
1749 module = PyImport_Import(module_name);
1750 if (module == NULL) {
1751 PyErr_Format(PicklingError,
1752 "Can't pickle %R: import of module %R failed",
1753 obj, module_name);
1754 goto error;
1755 }
1756 cls = PyObject_GetAttr(module, global_name);
1757 if (cls == NULL) {
1758 PyErr_Format(PicklingError,
1759 "Can't pickle %R: attribute lookup %S.%S failed",
1760 obj, module_name, global_name);
1761 goto error;
1762 }
1763 if (cls != obj) {
1764 Py_DECREF(cls);
1765 PyErr_Format(PicklingError,
1766 "Can't pickle %R: it's not the same object as %S.%S",
1767 obj, module_name, global_name);
1768 goto error;
1769 }
1770 Py_DECREF(cls);
1771
1772 if (self->proto >= 2) {
1773 /* See whether this is in the extension registry, and if
1774 * so generate an EXT opcode.
1775 */
1776 PyObject *code_obj; /* extension code as Python object */
1777 long code; /* extension code as C value */
1778 char pdata[5];
1779 int n;
1780
1781 PyTuple_SET_ITEM(two_tuple, 0, module_name);
1782 PyTuple_SET_ITEM(two_tuple, 1, global_name);
1783 code_obj = PyDict_GetItem(extension_registry, two_tuple);
1784 /* The object is not registered in the extension registry.
1785 This is the most likely code path. */
1786 if (code_obj == NULL)
1787 goto gen_global;
1788
1789 /* XXX: pickle.py doesn't check neither the type, nor the range
1790 of the value returned by the extension_registry. It should for
1791 consistency. */
1792
1793 /* Verify code_obj has the right type and value. */
1794 if (!PyLong_Check(code_obj)) {
1795 PyErr_Format(PicklingError,
1796 "Can't pickle %R: extension code %R isn't an integer",
1797 obj, code_obj);
1798 goto error;
1799 }
1800 code = PyLong_AS_LONG(code_obj);
1801 if (code <= 0 || code > 0x7fffffffL) {
1802 PyErr_Format(PicklingError,
1803 "Can't pickle %R: extension code %ld is out of range",
1804 obj, code);
1805 goto error;
1806 }
1807
1808 /* Generate an EXT opcode. */
1809 if (code <= 0xff) {
1810 pdata[0] = EXT1;
1811 pdata[1] = (unsigned char)code;
1812 n = 2;
1813 }
1814 else if (code <= 0xffff) {
1815 pdata[0] = EXT2;
1816 pdata[1] = (unsigned char)(code & 0xff);
1817 pdata[2] = (unsigned char)((code >> 8) & 0xff);
1818 n = 3;
1819 }
1820 else {
1821 pdata[0] = EXT4;
1822 pdata[1] = (unsigned char)(code & 0xff);
1823 pdata[2] = (unsigned char)((code >> 8) & 0xff);
1824 pdata[3] = (unsigned char)((code >> 16) & 0xff);
1825 pdata[4] = (unsigned char)((code >> 24) & 0xff);
1826 n = 5;
1827 }
1828
1829 if (pickler_write(self, pdata, n) < 0)
1830 goto error;
1831 }
1832 else {
1833 /* Generate a normal global opcode if we are using a pickle
1834 protocol <= 2, or if the object is not registered in the
1835 extension registry. */
1836 PyObject *encoded;
1837 PyObject *(*unicode_encoder)(PyObject *);
1838
1839 gen_global:
1840 if (pickler_write(self, &global_op, 1) < 0)
1841 goto error;
1842
1843 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
1844 the module name and the global name using UTF-8. We do so only when
1845 we are using the pickle protocol newer than version 3. This is to
1846 ensure compatibility with older Unpickler running on Python 2.x. */
1847 if (self->proto >= 3) {
1848 unicode_encoder = PyUnicode_AsUTF8String;
1849 }
1850 else {
1851 unicode_encoder = PyUnicode_AsASCIIString;
1852 }
1853
1854 /* Save the name of the module. */
1855 encoded = unicode_encoder(module_name);
1856 if (encoded == NULL) {
1857 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
1858 PyErr_Format(PicklingError,
1859 "can't pickle module identifier '%S' using "
1860 "pickle protocol %i", module_name, self->proto);
1861 goto error;
1862 }
1863 if (pickler_write(self, PyBytes_AS_STRING(encoded),
1864 PyBytes_GET_SIZE(encoded)) < 0) {
1865 Py_DECREF(encoded);
1866 goto error;
1867 }
1868 Py_DECREF(encoded);
1869 if(pickler_write(self, "\n", 1) < 0)
1870 goto error;
1871
1872 /* Save the name of the module. */
1873 encoded = unicode_encoder(global_name);
1874 if (encoded == NULL) {
1875 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
1876 PyErr_Format(PicklingError,
1877 "can't pickle global identifier '%S' using "
1878 "pickle protocol %i", global_name, self->proto);
1879 goto error;
1880 }
1881 if (pickler_write(self, PyBytes_AS_STRING(encoded),
1882 PyBytes_GET_SIZE(encoded)) < 0) {
1883 Py_DECREF(encoded);
1884 goto error;
1885 }
1886 Py_DECREF(encoded);
1887 if(pickler_write(self, "\n", 1) < 0)
1888 goto error;
1889
1890 /* Memoize the object. */
1891 if (memo_put(self, obj) < 0)
1892 goto error;
1893 }
1894
1895 if (0) {
1896 error:
1897 status = -1;
1898 }
1899 Py_XDECREF(module_name);
1900 Py_XDECREF(global_name);
1901 Py_XDECREF(module);
1902
1903 return status;
1904}
1905
1906static int
1907save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
1908{
1909 PyObject *pid = NULL;
1910 int status = 0;
1911
1912 const char persid_op = PERSID;
1913 const char binpersid_op = BINPERSID;
1914
1915 Py_INCREF(obj);
1916 pid = pickler_call(self, func, obj);
1917 if (pid == NULL)
1918 return -1;
1919
1920 if (pid != Py_None) {
1921 if (self->bin) {
1922 if (save(self, pid, 1) < 0 ||
1923 pickler_write(self, &binpersid_op, 1) < 0)
1924 goto error;
1925 }
1926 else {
1927 PyObject *pid_str = NULL;
1928 char *pid_ascii_bytes;
1929 Py_ssize_t size;
1930
1931 pid_str = PyObject_Str(pid);
1932 if (pid_str == NULL)
1933 goto error;
1934
1935 /* XXX: Should it check whether the persistent id only contains
1936 ASCII characters? And what if the pid contains embedded
1937 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001938 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001939 Py_DECREF(pid_str);
1940 if (pid_ascii_bytes == NULL)
1941 goto error;
1942
1943 if (pickler_write(self, &persid_op, 1) < 0 ||
1944 pickler_write(self, pid_ascii_bytes, size) < 0 ||
1945 pickler_write(self, "\n", 1) < 0)
1946 goto error;
1947 }
1948 status = 1;
1949 }
1950
1951 if (0) {
1952 error:
1953 status = -1;
1954 }
1955 Py_XDECREF(pid);
1956
1957 return status;
1958}
1959
1960/* We're saving obj, and args is the 2-thru-5 tuple returned by the
1961 * appropriate __reduce__ method for obj.
1962 */
1963static int
1964save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
1965{
1966 PyObject *callable;
1967 PyObject *argtup;
1968 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00001969 PyObject *listitems = Py_None;
1970 PyObject *dictitems = Py_None;
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00001971 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001972
1973 int use_newobj = self->proto >= 2;
1974
1975 const char reduce_op = REDUCE;
1976 const char build_op = BUILD;
1977 const char newobj_op = NEWOBJ;
1978
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00001979 size = PyTuple_Size(args);
1980 if (size < 2 || size > 5) {
1981 PyErr_SetString(PicklingError, "tuple returned by "
1982 "__reduce__ must contain 2 through 5 elements");
1983 return -1;
1984 }
1985
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001986 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
1987 &callable, &argtup, &state, &listitems, &dictitems))
1988 return -1;
1989
1990 if (!PyCallable_Check(callable)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00001991 PyErr_SetString(PicklingError, "first item of the tuple "
1992 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001993 return -1;
1994 }
1995 if (!PyTuple_Check(argtup)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00001996 PyErr_SetString(PicklingError, "second item of the tuple "
1997 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001998 return -1;
1999 }
2000
2001 if (state == Py_None)
2002 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002003
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002004 if (listitems == Py_None)
2005 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002006 else if (!PyIter_Check(listitems)) {
2007 PyErr_Format(PicklingError, "Fourth element of tuple"
2008 "returned by __reduce__ must be an iterator, not %s",
2009 Py_TYPE(listitems)->tp_name);
2010 return -1;
2011 }
2012
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002013 if (dictitems == Py_None)
2014 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002015 else if (!PyIter_Check(dictitems)) {
2016 PyErr_Format(PicklingError, "Fifth element of tuple"
2017 "returned by __reduce__ must be an iterator, not %s",
2018 Py_TYPE(dictitems)->tp_name);
2019 return -1;
2020 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002021
2022 /* Protocol 2 special case: if callable's name is __newobj__, use
2023 NEWOBJ. */
2024 if (use_newobj) {
2025 static PyObject *newobj_str = NULL;
2026 PyObject *name_str;
2027
2028 if (newobj_str == NULL) {
2029 newobj_str = PyUnicode_InternFromString("__newobj__");
2030 }
2031
2032 name_str = PyObject_GetAttrString(callable, "__name__");
2033 if (name_str == NULL) {
2034 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2035 PyErr_Clear();
2036 else
2037 return -1;
2038 use_newobj = 0;
2039 }
2040 else {
2041 use_newobj = PyUnicode_Check(name_str) &&
2042 PyUnicode_Compare(name_str, newobj_str) == 0;
2043 Py_DECREF(name_str);
2044 }
2045 }
2046 if (use_newobj) {
2047 PyObject *cls;
2048 PyObject *newargtup;
2049 PyObject *obj_class;
2050 int p;
2051
2052 /* Sanity checks. */
2053 if (Py_SIZE(argtup) < 1) {
2054 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
2055 return -1;
2056 }
2057
2058 cls = PyTuple_GET_ITEM(argtup, 0);
2059 if (!PyObject_HasAttrString(cls, "__new__")) {
2060 PyErr_SetString(PicklingError, "args[0] from "
2061 "__newobj__ args has no __new__");
2062 return -1;
2063 }
2064
2065 if (obj != NULL) {
2066 obj_class = PyObject_GetAttrString(obj, "__class__");
2067 if (obj_class == NULL) {
2068 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2069 PyErr_Clear();
2070 else
2071 return -1;
2072 }
2073 p = obj_class != cls; /* true iff a problem */
2074 Py_DECREF(obj_class);
2075 if (p) {
2076 PyErr_SetString(PicklingError, "args[0] from "
2077 "__newobj__ args has the wrong class");
2078 return -1;
2079 }
2080 }
2081 /* XXX: These calls save() are prone to infinite recursion. Imagine
2082 what happen if the value returned by the __reduce__() method of
2083 some extension type contains another object of the same type. Ouch!
2084
2085 Here is a quick example, that I ran into, to illustrate what I
2086 mean:
2087
2088 >>> import pickle, copyreg
2089 >>> copyreg.dispatch_table.pop(complex)
2090 >>> pickle.dumps(1+2j)
2091 Traceback (most recent call last):
2092 ...
2093 RuntimeError: maximum recursion depth exceeded
2094
2095 Removing the complex class from copyreg.dispatch_table made the
2096 __reduce_ex__() method emit another complex object:
2097
2098 >>> (1+1j).__reduce_ex__(2)
2099 (<function __newobj__ at 0xb7b71c3c>,
2100 (<class 'complex'>, (1+1j)), None, None, None)
2101
2102 Thus when save() was called on newargstup (the 2nd item) recursion
2103 ensued. Of course, the bug was in the complex class which had a
2104 broken __getnewargs__() that emitted another complex object. But,
2105 the point, here, is it is quite easy to end up with a broken reduce
2106 function. */
2107
2108 /* Save the class and its __new__ arguments. */
2109 if (save(self, cls, 0) < 0)
2110 return -1;
2111
2112 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
2113 if (newargtup == NULL)
2114 return -1;
2115
2116 p = save(self, newargtup, 0);
2117 Py_DECREF(newargtup);
2118 if (p < 0)
2119 return -1;
2120
2121 /* Add NEWOBJ opcode. */
2122 if (pickler_write(self, &newobj_op, 1) < 0)
2123 return -1;
2124 }
2125 else { /* Not using NEWOBJ. */
2126 if (save(self, callable, 0) < 0 ||
2127 save(self, argtup, 0) < 0 ||
2128 pickler_write(self, &reduce_op, 1) < 0)
2129 return -1;
2130 }
2131
2132 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
2133 the caller do not want to memoize the object. Not particularly useful,
2134 but that is to mimic the behavior save_reduce() in pickle.py when
2135 obj is None. */
2136 if (obj && memo_put(self, obj) < 0)
2137 return -1;
2138
2139 if (listitems && batch_list(self, listitems) < 0)
2140 return -1;
2141
2142 if (dictitems && batch_dict(self, dictitems) < 0)
2143 return -1;
2144
2145 if (state) {
2146 if (save(self, state, 0) < 0 ||
2147 pickler_write(self, &build_op, 1) < 0)
2148 return -1;
2149 }
2150
2151 return 0;
2152}
2153
2154static int
2155save(PicklerObject *self, PyObject *obj, int pers_save)
2156{
2157 PyTypeObject *type;
2158 PyObject *reduce_func = NULL;
2159 PyObject *reduce_value = NULL;
2160 PyObject *memo_key = NULL;
2161 int status = 0;
2162
Alexandre Vassalottidff18342008-07-13 18:48:30 +00002163 if (Py_EnterRecursiveCall(" while pickling an object") < 0)
2164 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002165
2166 /* The extra pers_save argument is necessary to avoid calling save_pers()
2167 on its returned object. */
2168 if (!pers_save && self->pers_func) {
2169 /* save_pers() returns:
2170 -1 to signal an error;
2171 0 if it did nothing successfully;
2172 1 if a persistent id was saved.
2173 */
2174 if ((status = save_pers(self, obj, self->pers_func)) != 0)
2175 goto done;
2176 }
2177
2178 type = Py_TYPE(obj);
2179
2180 /* XXX: The old cPickle had an optimization that used switch-case
2181 statement dispatching on the first letter of the type name. It was
2182 probably not a bad idea after all. If benchmarks shows that particular
2183 optimization had some real benefits, it would be nice to add it
2184 back. */
2185
2186 /* Atom types; these aren't memoized, so don't check the memo. */
2187
2188 if (obj == Py_None) {
2189 status = save_none(self, obj);
2190 goto done;
2191 }
2192 else if (obj == Py_False || obj == Py_True) {
2193 status = save_bool(self, obj);
2194 goto done;
2195 }
2196 else if (type == &PyLong_Type) {
2197 status = save_long(self, obj);
2198 goto done;
2199 }
2200 else if (type == &PyFloat_Type) {
2201 status = save_float(self, obj);
2202 goto done;
2203 }
2204
2205 /* Check the memo to see if it has the object. If so, generate
2206 a GET (or BINGET) opcode, instead of pickling the object
2207 once again. */
2208 memo_key = PyLong_FromVoidPtr(obj);
2209 if (memo_key == NULL)
2210 goto error;
2211 if (PyDict_GetItem(self->memo, memo_key)) {
2212 if (memo_get(self, memo_key) < 0)
2213 goto error;
2214 goto done;
2215 }
2216
2217 if (type == &PyBytes_Type) {
2218 status = save_bytes(self, obj);
2219 goto done;
2220 }
2221 else if (type == &PyUnicode_Type) {
2222 status = save_unicode(self, obj);
2223 goto done;
2224 }
2225 else if (type == &PyDict_Type) {
2226 status = save_dict(self, obj);
2227 goto done;
2228 }
2229 else if (type == &PyList_Type) {
2230 status = save_list(self, obj);
2231 goto done;
2232 }
2233 else if (type == &PyTuple_Type) {
2234 status = save_tuple(self, obj);
2235 goto done;
2236 }
2237 else if (type == &PyType_Type) {
2238 status = save_global(self, obj, NULL);
2239 goto done;
2240 }
2241 else if (type == &PyFunction_Type) {
2242 status = save_global(self, obj, NULL);
2243 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
2244 /* fall back to reduce */
2245 PyErr_Clear();
2246 }
2247 else {
2248 goto done;
2249 }
2250 }
2251 else if (type == &PyCFunction_Type) {
2252 status = save_global(self, obj, NULL);
2253 goto done;
2254 }
2255 else if (PyType_IsSubtype(type, &PyType_Type)) {
2256 status = save_global(self, obj, NULL);
2257 goto done;
2258 }
2259
2260 /* XXX: This part needs some unit tests. */
2261
2262 /* Get a reduction callable, and call it. This may come from
2263 * copyreg.dispatch_table, the object's __reduce_ex__ method,
2264 * or the object's __reduce__ method.
2265 */
2266 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
2267 if (reduce_func != NULL) {
2268 /* Here, the reference count of the reduce_func object returned by
2269 PyDict_GetItem needs to be increased to be consistent with the one
2270 returned by PyObject_GetAttr. This is allow us to blindly DECREF
2271 reduce_func at the end of the save() routine.
2272 */
2273 Py_INCREF(reduce_func);
2274 Py_INCREF(obj);
2275 reduce_value = pickler_call(self, reduce_func, obj);
2276 }
2277 else {
2278 static PyObject *reduce_str = NULL;
2279 static PyObject *reduce_ex_str = NULL;
2280
2281 /* Cache the name of the reduce methods. */
2282 if (reduce_str == NULL) {
2283 reduce_str = PyUnicode_InternFromString("__reduce__");
2284 if (reduce_str == NULL)
2285 goto error;
2286 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
2287 if (reduce_ex_str == NULL)
2288 goto error;
2289 }
2290
2291 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
2292 automatically defined as __reduce__. While this is convenient, this
2293 make it impossible to know which method was actually called. Of
2294 course, this is not a big deal. But still, it would be nice to let
2295 the user know which method was called when something go
2296 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
2297 don't actually have to check for a __reduce__ method. */
2298
2299 /* Check for a __reduce_ex__ method. */
2300 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
2301 if (reduce_func != NULL) {
2302 PyObject *proto;
2303 proto = PyLong_FromLong(self->proto);
2304 if (proto != NULL) {
2305 reduce_value = pickler_call(self, reduce_func, proto);
2306 }
2307 }
2308 else {
2309 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2310 PyErr_Clear();
2311 else
2312 goto error;
2313 /* Check for a __reduce__ method. */
2314 reduce_func = PyObject_GetAttr(obj, reduce_str);
2315 if (reduce_func != NULL) {
2316 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
2317 }
2318 else {
2319 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
2320 type->tp_name, obj);
2321 goto error;
2322 }
2323 }
2324 }
2325
2326 if (reduce_value == NULL)
2327 goto error;
2328
2329 if (PyUnicode_Check(reduce_value)) {
2330 status = save_global(self, obj, reduce_value);
2331 goto done;
2332 }
2333
2334 if (!PyTuple_Check(reduce_value)) {
2335 PyErr_SetString(PicklingError,
2336 "__reduce__ must return a string or tuple");
2337 goto error;
2338 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002339
2340 status = save_reduce(self, reduce_value, obj);
2341
2342 if (0) {
2343 error:
2344 status = -1;
2345 }
2346 done:
Alexandre Vassalottidff18342008-07-13 18:48:30 +00002347 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002348 Py_XDECREF(memo_key);
2349 Py_XDECREF(reduce_func);
2350 Py_XDECREF(reduce_value);
2351
2352 return status;
2353}
2354
2355static int
2356dump(PicklerObject *self, PyObject *obj)
2357{
2358 const char stop_op = STOP;
2359
2360 if (self->proto >= 2) {
2361 char header[2];
2362
2363 header[0] = PROTO;
2364 assert(self->proto >= 0 && self->proto < 256);
2365 header[1] = (unsigned char)self->proto;
2366 if (pickler_write(self, header, 2) < 0)
2367 return -1;
2368 }
2369
2370 if (save(self, obj, 0) < 0 ||
2371 pickler_write(self, &stop_op, 1) < 0 ||
2372 pickler_write(self, NULL, 0) < 0)
2373 return -1;
2374
2375 return 0;
2376}
2377
2378PyDoc_STRVAR(Pickler_clear_memo_doc,
2379"clear_memo() -> None. Clears the pickler's \"memo\"."
2380"\n"
2381"The memo is the data structure that remembers which objects the\n"
2382"pickler has already seen, so that shared or recursive objects are\n"
2383"pickled by reference and not by value. This method is useful when\n"
2384"re-using picklers.");
2385
2386static PyObject *
2387Pickler_clear_memo(PicklerObject *self)
2388{
2389 if (self->memo)
2390 PyDict_Clear(self->memo);
2391
2392 Py_RETURN_NONE;
2393}
2394
2395PyDoc_STRVAR(Pickler_dump_doc,
2396"dump(obj) -> None. Write a pickled representation of obj to the open file.");
2397
2398static PyObject *
2399Pickler_dump(PicklerObject *self, PyObject *args)
2400{
2401 PyObject *obj;
2402
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00002403 /* Check whether the Pickler was initialized correctly (issue3664).
2404 Developers often forget to call __init__() in their subclasses, which
2405 would trigger a segfault without this check. */
2406 if (self->write == NULL) {
2407 PyErr_Format(PicklingError,
2408 "Pickler.__init__() was not called by %s.__init__()",
2409 Py_TYPE(self)->tp_name);
2410 return NULL;
2411 }
2412
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002413 if (!PyArg_ParseTuple(args, "O:dump", &obj))
2414 return NULL;
2415
2416 if (dump(self, obj) < 0)
2417 return NULL;
2418
2419 Py_RETURN_NONE;
2420}
2421
2422static struct PyMethodDef Pickler_methods[] = {
2423 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
2424 Pickler_dump_doc},
2425 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
2426 Pickler_clear_memo_doc},
2427 {NULL, NULL} /* sentinel */
2428};
2429
2430static void
2431Pickler_dealloc(PicklerObject *self)
2432{
2433 PyObject_GC_UnTrack(self);
2434
2435 Py_XDECREF(self->write);
2436 Py_XDECREF(self->memo);
2437 Py_XDECREF(self->pers_func);
2438 Py_XDECREF(self->arg);
2439 Py_XDECREF(self->fast_memo);
2440
2441 PyMem_Free(self->write_buf);
2442
2443 Py_TYPE(self)->tp_free((PyObject *)self);
2444}
2445
2446static int
2447Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
2448{
2449 Py_VISIT(self->write);
2450 Py_VISIT(self->memo);
2451 Py_VISIT(self->pers_func);
2452 Py_VISIT(self->arg);
2453 Py_VISIT(self->fast_memo);
2454 return 0;
2455}
2456
2457static int
2458Pickler_clear(PicklerObject *self)
2459{
2460 Py_CLEAR(self->write);
2461 Py_CLEAR(self->memo);
2462 Py_CLEAR(self->pers_func);
2463 Py_CLEAR(self->arg);
2464 Py_CLEAR(self->fast_memo);
2465
2466 PyMem_Free(self->write_buf);
2467 self->write_buf = NULL;
2468
2469 return 0;
2470}
2471
2472PyDoc_STRVAR(Pickler_doc,
2473"Pickler(file, protocol=None)"
2474"\n"
2475"This takes a binary file for writing a pickle data stream.\n"
2476"\n"
2477"The optional protocol argument tells the pickler to use the\n"
2478"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
2479"protocol is 3; a backward-incompatible protocol designed for\n"
2480"Python 3.0.\n"
2481"\n"
2482"Specifying a negative protocol version selects the highest\n"
2483"protocol version supported. The higher the protocol used, the\n"
2484"more recent the version of Python needed to read the pickle\n"
2485"produced.\n"
2486"\n"
2487"The file argument must have a write() method that accepts a single\n"
2488"bytes argument. It can thus be a file object opened for binary\n"
2489"writing, a io.BytesIO instance, or any other custom object that\n"
2490"meets this interface.\n");
2491
2492static int
2493Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
2494{
2495 static char *kwlist[] = {"file", "protocol", 0};
2496 PyObject *file;
2497 PyObject *proto_obj = NULL;
2498 long proto = 0;
2499
2500 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:Pickler",
2501 kwlist, &file, &proto_obj))
2502 return -1;
2503
2504 /* In case of multiple __init__() calls, clear previous content. */
2505 if (self->write != NULL)
2506 (void)Pickler_clear(self);
2507
2508 if (proto_obj == NULL || proto_obj == Py_None)
2509 proto = DEFAULT_PROTOCOL;
2510 else
2511 proto = PyLong_AsLong(proto_obj);
2512
2513 if (proto < 0)
2514 proto = HIGHEST_PROTOCOL;
2515 if (proto > HIGHEST_PROTOCOL) {
2516 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
2517 HIGHEST_PROTOCOL);
2518 return -1;
2519 }
2520
2521 self->proto = proto;
2522 self->bin = proto > 0;
2523 self->arg = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002524 self->fast = 0;
2525 self->fast_nesting = 0;
2526 self->fast_memo = NULL;
2527
2528 if (!PyObject_HasAttrString(file, "write")) {
2529 PyErr_SetString(PyExc_TypeError,
2530 "file must have a 'write' attribute");
2531 return -1;
2532 }
2533 self->write = PyObject_GetAttrString(file, "write");
2534 if (self->write == NULL)
2535 return -1;
2536 self->buf_size = 0;
2537 self->write_buf = (char *)PyMem_Malloc(WRITE_BUF_SIZE);
2538 if (self->write_buf == NULL) {
2539 PyErr_NoMemory();
2540 return -1;
2541 }
2542 self->pers_func = NULL;
2543 if (PyObject_HasAttrString((PyObject *)self, "persistent_id")) {
2544 self->pers_func = PyObject_GetAttrString((PyObject *)self,
2545 "persistent_id");
2546 if (self->pers_func == NULL)
2547 return -1;
2548 }
2549 self->memo = PyDict_New();
2550 if (self->memo == NULL)
2551 return -1;
2552
2553 return 0;
2554}
2555
2556static PyObject *
2557Pickler_get_memo(PicklerObject *self)
2558{
2559 if (self->memo == NULL)
2560 PyErr_SetString(PyExc_AttributeError, "memo");
2561 else
2562 Py_INCREF(self->memo);
2563 return self->memo;
2564}
2565
2566static int
2567Pickler_set_memo(PicklerObject *self, PyObject *value)
2568{
2569 PyObject *tmp;
2570
2571 if (value == NULL) {
2572 PyErr_SetString(PyExc_TypeError,
2573 "attribute deletion is not supported");
2574 return -1;
2575 }
2576 if (!PyDict_Check(value)) {
2577 PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
2578 return -1;
2579 }
2580
2581 tmp = self->memo;
2582 Py_INCREF(value);
2583 self->memo = value;
2584 Py_XDECREF(tmp);
2585
2586 return 0;
2587}
2588
2589static PyObject *
2590Pickler_get_persid(PicklerObject *self)
2591{
2592 if (self->pers_func == NULL)
2593 PyErr_SetString(PyExc_AttributeError, "persistent_id");
2594 else
2595 Py_INCREF(self->pers_func);
2596 return self->pers_func;
2597}
2598
2599static int
2600Pickler_set_persid(PicklerObject *self, PyObject *value)
2601{
2602 PyObject *tmp;
2603
2604 if (value == NULL) {
2605 PyErr_SetString(PyExc_TypeError,
2606 "attribute deletion is not supported");
2607 return -1;
2608 }
2609 if (!PyCallable_Check(value)) {
2610 PyErr_SetString(PyExc_TypeError,
2611 "persistent_id must be a callable taking one argument");
2612 return -1;
2613 }
2614
2615 tmp = self->pers_func;
2616 Py_INCREF(value);
2617 self->pers_func = value;
2618 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
2619
2620 return 0;
2621}
2622
2623static PyMemberDef Pickler_members[] = {
2624 {"bin", T_INT, offsetof(PicklerObject, bin)},
2625 {"fast", T_INT, offsetof(PicklerObject, fast)},
2626 {NULL}
2627};
2628
2629static PyGetSetDef Pickler_getsets[] = {
2630 {"memo", (getter)Pickler_get_memo,
2631 (setter)Pickler_set_memo},
2632 {"persistent_id", (getter)Pickler_get_persid,
2633 (setter)Pickler_set_persid},
2634 {NULL}
2635};
2636
2637static PyTypeObject Pickler_Type = {
2638 PyVarObject_HEAD_INIT(NULL, 0)
2639 "_pickle.Pickler" , /*tp_name*/
2640 sizeof(PicklerObject), /*tp_basicsize*/
2641 0, /*tp_itemsize*/
2642 (destructor)Pickler_dealloc, /*tp_dealloc*/
2643 0, /*tp_print*/
2644 0, /*tp_getattr*/
2645 0, /*tp_setattr*/
2646 0, /*tp_compare*/
2647 0, /*tp_repr*/
2648 0, /*tp_as_number*/
2649 0, /*tp_as_sequence*/
2650 0, /*tp_as_mapping*/
2651 0, /*tp_hash*/
2652 0, /*tp_call*/
2653 0, /*tp_str*/
2654 0, /*tp_getattro*/
2655 0, /*tp_setattro*/
2656 0, /*tp_as_buffer*/
2657 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2658 Pickler_doc, /*tp_doc*/
2659 (traverseproc)Pickler_traverse, /*tp_traverse*/
2660 (inquiry)Pickler_clear, /*tp_clear*/
2661 0, /*tp_richcompare*/
2662 0, /*tp_weaklistoffset*/
2663 0, /*tp_iter*/
2664 0, /*tp_iternext*/
2665 Pickler_methods, /*tp_methods*/
2666 Pickler_members, /*tp_members*/
2667 Pickler_getsets, /*tp_getset*/
2668 0, /*tp_base*/
2669 0, /*tp_dict*/
2670 0, /*tp_descr_get*/
2671 0, /*tp_descr_set*/
2672 0, /*tp_dictoffset*/
2673 (initproc)Pickler_init, /*tp_init*/
2674 PyType_GenericAlloc, /*tp_alloc*/
2675 PyType_GenericNew, /*tp_new*/
2676 PyObject_GC_Del, /*tp_free*/
2677 0, /*tp_is_gc*/
2678};
2679
2680/* Temporary helper for calling self.find_class().
2681
2682 XXX: It would be nice to able to avoid Python function call overhead, by
2683 using directly the C version of find_class(), when find_class() is not
2684 overridden by a subclass. Although, this could become rather hackish. A
2685 simpler optimization would be to call the C function when self is not a
2686 subclass instance. */
2687static PyObject *
2688find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
2689{
2690 return PyObject_CallMethod((PyObject *)self, "find_class", "OO",
2691 module_name, global_name);
2692}
2693
2694static int
2695marker(UnpicklerObject *self)
2696{
2697 if (self->num_marks < 1) {
2698 PyErr_SetString(UnpicklingError, "could not find MARK");
2699 return -1;
2700 }
2701
2702 return self->marks[--self->num_marks];
2703}
2704
2705static int
2706load_none(UnpicklerObject *self)
2707{
2708 PDATA_APPEND(self->stack, Py_None, -1);
2709 return 0;
2710}
2711
2712static int
2713bad_readline(void)
2714{
2715 PyErr_SetString(UnpicklingError, "pickle data was truncated");
2716 return -1;
2717}
2718
2719static int
2720load_int(UnpicklerObject *self)
2721{
2722 PyObject *value;
2723 char *endptr, *s;
2724 Py_ssize_t len;
2725 long x;
2726
2727 if ((len = unpickler_readline(self, &s)) < 0)
2728 return -1;
2729 if (len < 2)
2730 return bad_readline();
2731
2732 errno = 0;
2733 /* XXX: Should the base argument of strtol() be explicitly set to 10? */
2734 x = strtol(s, &endptr, 0);
2735
2736 if (errno || (*endptr != '\n') || (endptr[1] != '\0')) {
2737 /* Hm, maybe we've got something long. Let's try reading
2738 * it as a Python long object. */
2739 errno = 0;
2740 /* XXX: Same thing about the base here. */
2741 value = PyLong_FromString(s, NULL, 0);
2742 if (value == NULL) {
2743 PyErr_SetString(PyExc_ValueError,
2744 "could not convert string to int");
2745 return -1;
2746 }
2747 }
2748 else {
2749 if (len == 3 && (x == 0 || x == 1)) {
2750 if ((value = PyBool_FromLong(x)) == NULL)
2751 return -1;
2752 }
2753 else {
2754 if ((value = PyLong_FromLong(x)) == NULL)
2755 return -1;
2756 }
2757 }
2758
2759 PDATA_PUSH(self->stack, value, -1);
2760 return 0;
2761}
2762
2763static int
2764load_bool(UnpicklerObject *self, PyObject *boolean)
2765{
2766 assert(boolean == Py_True || boolean == Py_False);
2767 PDATA_APPEND(self->stack, boolean, -1);
2768 return 0;
2769}
2770
2771/* s contains x bytes of a little-endian integer. Return its value as a
2772 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
2773 * int, but when x is 4 it's a signed one. This is an historical source
2774 * of x-platform bugs.
2775 */
2776static long
2777calc_binint(char *bytes, int size)
2778{
2779 unsigned char *s = (unsigned char *)bytes;
2780 int i = size;
2781 long x = 0;
2782
2783 for (i = 0; i < size; i++) {
2784 x |= (long)s[i] << (i * 8);
2785 }
2786
2787 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
2788 * is signed, so on a box with longs bigger than 4 bytes we need
2789 * to extend a BININT's sign bit to the full width.
2790 */
2791 if (SIZEOF_LONG > 4 && size == 4) {
2792 x |= -(x & (1L << 31));
2793 }
2794
2795 return x;
2796}
2797
2798static int
2799load_binintx(UnpicklerObject *self, char *s, int size)
2800{
2801 PyObject *value;
2802 long x;
2803
2804 x = calc_binint(s, size);
2805
2806 if ((value = PyLong_FromLong(x)) == NULL)
2807 return -1;
2808
2809 PDATA_PUSH(self->stack, value, -1);
2810 return 0;
2811}
2812
2813static int
2814load_binint(UnpicklerObject *self)
2815{
2816 char *s;
2817
2818 if (unpickler_read(self, &s, 4) < 0)
2819 return -1;
2820
2821 return load_binintx(self, s, 4);
2822}
2823
2824static int
2825load_binint1(UnpicklerObject *self)
2826{
2827 char *s;
2828
2829 if (unpickler_read(self, &s, 1) < 0)
2830 return -1;
2831
2832 return load_binintx(self, s, 1);
2833}
2834
2835static int
2836load_binint2(UnpicklerObject *self)
2837{
2838 char *s;
2839
2840 if (unpickler_read(self, &s, 2) < 0)
2841 return -1;
2842
2843 return load_binintx(self, s, 2);
2844}
2845
2846static int
2847load_long(UnpicklerObject *self)
2848{
2849 PyObject *value;
2850 char *s;
2851 Py_ssize_t len;
2852
2853 if ((len = unpickler_readline(self, &s)) < 0)
2854 return -1;
2855 if (len < 2)
2856 return bad_readline();
2857
2858 /* XXX: Should the base argument explicitly set to 10? */
2859 if ((value = PyLong_FromString(s, NULL, 0)) == NULL)
2860 return -1;
2861
2862 PDATA_PUSH(self->stack, value, -1);
2863 return 0;
2864}
2865
2866/* 'size' bytes contain the # of bytes of little-endian 256's-complement
2867 * data following.
2868 */
2869static int
2870load_counted_long(UnpicklerObject *self, int size)
2871{
2872 PyObject *value;
2873 char *nbytes;
2874 char *pdata;
2875
2876 assert(size == 1 || size == 4);
2877 if (unpickler_read(self, &nbytes, size) < 0)
2878 return -1;
2879
2880 size = calc_binint(nbytes, size);
2881 if (size < 0) {
2882 /* Corrupt or hostile pickle -- we never write one like this */
2883 PyErr_SetString(UnpicklingError,
2884 "LONG pickle has negative byte count");
2885 return -1;
2886 }
2887
2888 if (size == 0)
2889 value = PyLong_FromLong(0L);
2890 else {
2891 /* Read the raw little-endian bytes and convert. */
2892 if (unpickler_read(self, &pdata, size) < 0)
2893 return -1;
2894 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
2895 1 /* little endian */ , 1 /* signed */ );
2896 }
2897 if (value == NULL)
2898 return -1;
2899 PDATA_PUSH(self->stack, value, -1);
2900 return 0;
2901}
2902
2903static int
2904load_float(UnpicklerObject *self)
2905{
2906 PyObject *value;
2907 char *endptr, *s;
2908 Py_ssize_t len;
2909 double d;
2910
2911 if ((len = unpickler_readline(self, &s)) < 0)
2912 return -1;
2913 if (len < 2)
2914 return bad_readline();
2915
2916 errno = 0;
2917 d = PyOS_ascii_strtod(s, &endptr);
2918
2919 if (errno || (endptr[0] != '\n') || (endptr[1] != '\0')) {
2920 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
2921 return -1;
2922 }
2923
2924 if ((value = PyFloat_FromDouble(d)) == NULL)
2925 return -1;
2926
2927 PDATA_PUSH(self->stack, value, -1);
2928 return 0;
2929}
2930
2931static int
2932load_binfloat(UnpicklerObject *self)
2933{
2934 PyObject *value;
2935 double x;
2936 char *s;
2937
2938 if (unpickler_read(self, &s, 8) < 0)
2939 return -1;
2940
2941 x = _PyFloat_Unpack8((unsigned char *)s, 0);
2942 if (x == -1.0 && PyErr_Occurred())
2943 return -1;
2944
2945 if ((value = PyFloat_FromDouble(x)) == NULL)
2946 return -1;
2947
2948 PDATA_PUSH(self->stack, value, -1);
2949 return 0;
2950}
2951
2952static int
2953load_string(UnpicklerObject *self)
2954{
2955 PyObject *bytes;
2956 PyObject *str = NULL;
2957 Py_ssize_t len;
2958 char *s, *p;
2959
2960 if ((len = unpickler_readline(self, &s)) < 0)
2961 return -1;
2962 if (len < 3)
2963 return bad_readline();
2964 if ((s = strdup(s)) == NULL) {
2965 PyErr_NoMemory();
2966 return -1;
2967 }
2968
2969 /* Strip outermost quotes */
2970 while (s[len - 1] <= ' ')
2971 len--;
2972 if (s[0] == '"' && s[len - 1] == '"') {
2973 s[len - 1] = '\0';
2974 p = s + 1;
2975 len -= 2;
2976 }
2977 else if (s[0] == '\'' && s[len - 1] == '\'') {
2978 s[len - 1] = '\0';
2979 p = s + 1;
2980 len -= 2;
2981 }
2982 else {
2983 free(s);
2984 PyErr_SetString(PyExc_ValueError, "insecure string pickle");
2985 return -1;
2986 }
2987
2988 /* Use the PyBytes API to decode the string, since that is what is used
2989 to encode, and then coerce the result to Unicode. */
2990 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
2991 free(s);
2992 if (bytes == NULL)
2993 return -1;
2994 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
2995 Py_DECREF(bytes);
2996 if (str == NULL)
2997 return -1;
2998
2999 PDATA_PUSH(self->stack, str, -1);
3000 return 0;
3001}
3002
3003static int
3004load_binbytes(UnpicklerObject *self)
3005{
3006 PyObject *bytes;
3007 long x;
3008 char *s;
3009
3010 if (unpickler_read(self, &s, 4) < 0)
3011 return -1;
3012
3013 x = calc_binint(s, 4);
3014 if (x < 0) {
3015 PyErr_SetString(UnpicklingError,
3016 "BINBYTES pickle has negative byte count");
3017 return -1;
3018 }
3019
3020 if (unpickler_read(self, &s, x) < 0)
3021 return -1;
3022 bytes = PyBytes_FromStringAndSize(s, x);
3023 if (bytes == NULL)
3024 return -1;
3025
3026 PDATA_PUSH(self->stack, bytes, -1);
3027 return 0;
3028}
3029
3030static int
3031load_short_binbytes(UnpicklerObject *self)
3032{
3033 PyObject *bytes;
3034 unsigned char x;
3035 char *s;
3036
3037 if (unpickler_read(self, &s, 1) < 0)
3038 return -1;
3039
3040 x = (unsigned char)s[0];
3041
3042 if (unpickler_read(self, &s, x) < 0)
3043 return -1;
3044
3045 bytes = PyBytes_FromStringAndSize(s, x);
3046 if (bytes == NULL)
3047 return -1;
3048
3049 PDATA_PUSH(self->stack, bytes, -1);
3050 return 0;
3051}
3052
3053static int
3054load_binstring(UnpicklerObject *self)
3055{
3056 PyObject *str;
3057 long x;
3058 char *s;
3059
3060 if (unpickler_read(self, &s, 4) < 0)
3061 return -1;
3062
3063 x = calc_binint(s, 4);
3064 if (x < 0) {
3065 PyErr_SetString(UnpicklingError,
3066 "BINSTRING pickle has negative byte count");
3067 return -1;
3068 }
3069
3070 if (unpickler_read(self, &s, x) < 0)
3071 return -1;
3072
3073 /* Convert Python 2.x strings to unicode. */
3074 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
3075 if (str == NULL)
3076 return -1;
3077
3078 PDATA_PUSH(self->stack, str, -1);
3079 return 0;
3080}
3081
3082static int
3083load_short_binstring(UnpicklerObject *self)
3084{
3085 PyObject *str;
3086 unsigned char x;
3087 char *s;
3088
3089 if (unpickler_read(self, &s, 1) < 0)
3090 return -1;
3091
3092 x = (unsigned char)s[0];
3093
3094 if (unpickler_read(self, &s, x) < 0)
3095 return -1;
3096
3097 /* Convert Python 2.x strings to unicode. */
3098 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
3099 if (str == NULL)
3100 return -1;
3101
3102 PDATA_PUSH(self->stack, str, -1);
3103 return 0;
3104}
3105
3106static int
3107load_unicode(UnpicklerObject *self)
3108{
3109 PyObject *str;
3110 Py_ssize_t len;
3111 char *s;
3112
3113 if ((len = unpickler_readline(self, &s)) < 0)
3114 return -1;
3115 if (len < 1)
3116 return bad_readline();
3117
3118 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
3119 if (str == NULL)
3120 return -1;
3121
3122 PDATA_PUSH(self->stack, str, -1);
3123 return 0;
3124}
3125
3126static int
3127load_binunicode(UnpicklerObject *self)
3128{
3129 PyObject *str;
3130 long size;
3131 char *s;
3132
3133 if (unpickler_read(self, &s, 4) < 0)
3134 return -1;
3135
3136 size = calc_binint(s, 4);
3137 if (size < 0) {
3138 PyErr_SetString(UnpicklingError,
3139 "BINUNICODE pickle has negative byte count");
3140 return -1;
3141 }
3142
3143 if (unpickler_read(self, &s, size) < 0)
3144 return -1;
3145
3146 str = PyUnicode_DecodeUTF8(s, size, NULL);
3147 if (str == NULL)
3148 return -1;
3149
3150 PDATA_PUSH(self->stack, str, -1);
3151 return 0;
3152}
3153
3154static int
3155load_tuple(UnpicklerObject *self)
3156{
3157 PyObject *tuple;
3158 int i;
3159
3160 if ((i = marker(self)) < 0)
3161 return -1;
3162
3163 tuple = Pdata_poptuple(self->stack, i);
3164 if (tuple == NULL)
3165 return -1;
3166 PDATA_PUSH(self->stack, tuple, -1);
3167 return 0;
3168}
3169
3170static int
3171load_counted_tuple(UnpicklerObject *self, int len)
3172{
3173 PyObject *tuple;
3174
3175 tuple = PyTuple_New(len);
3176 if (tuple == NULL)
3177 return -1;
3178
3179 while (--len >= 0) {
3180 PyObject *item;
3181
3182 PDATA_POP(self->stack, item);
3183 if (item == NULL)
3184 return -1;
3185 PyTuple_SET_ITEM(tuple, len, item);
3186 }
3187 PDATA_PUSH(self->stack, tuple, -1);
3188 return 0;
3189}
3190
3191static int
3192load_empty_list(UnpicklerObject *self)
3193{
3194 PyObject *list;
3195
3196 if ((list = PyList_New(0)) == NULL)
3197 return -1;
3198 PDATA_PUSH(self->stack, list, -1);
3199 return 0;
3200}
3201
3202static int
3203load_empty_dict(UnpicklerObject *self)
3204{
3205 PyObject *dict;
3206
3207 if ((dict = PyDict_New()) == NULL)
3208 return -1;
3209 PDATA_PUSH(self->stack, dict, -1);
3210 return 0;
3211}
3212
3213static int
3214load_list(UnpicklerObject *self)
3215{
3216 PyObject *list;
3217 int i;
3218
3219 if ((i = marker(self)) < 0)
3220 return -1;
3221
3222 list = Pdata_poplist(self->stack, i);
3223 if (list == NULL)
3224 return -1;
3225 PDATA_PUSH(self->stack, list, -1);
3226 return 0;
3227}
3228
3229static int
3230load_dict(UnpicklerObject *self)
3231{
3232 PyObject *dict, *key, *value;
3233 int i, j, k;
3234
3235 if ((i = marker(self)) < 0)
3236 return -1;
3237 j = self->stack->length;
3238
3239 if ((dict = PyDict_New()) == NULL)
3240 return -1;
3241
3242 for (k = i + 1; k < j; k += 2) {
3243 key = self->stack->data[k - 1];
3244 value = self->stack->data[k];
3245 if (PyDict_SetItem(dict, key, value) < 0) {
3246 Py_DECREF(dict);
3247 return -1;
3248 }
3249 }
3250 Pdata_clear(self->stack, i);
3251 PDATA_PUSH(self->stack, dict, -1);
3252 return 0;
3253}
3254
3255static PyObject *
3256instantiate(PyObject *cls, PyObject *args)
3257{
3258 PyObject *r = NULL;
3259
3260 /* XXX: The pickle.py module does not create instances this way when the
3261 args tuple is empty. See Unpickler._instantiate(). */
3262 if ((r = PyObject_CallObject(cls, args)))
3263 return r;
3264
3265 /* XXX: Is this still nescessary? */
3266 {
3267 PyObject *tp, *v, *tb, *tmp_value;
3268
3269 PyErr_Fetch(&tp, &v, &tb);
3270 tmp_value = v;
3271 /* NULL occurs when there was a KeyboardInterrupt */
3272 if (tmp_value == NULL)
3273 tmp_value = Py_None;
3274 if ((r = PyTuple_Pack(3, tmp_value, cls, args))) {
3275 Py_XDECREF(v);
3276 v = r;
3277 }
3278 PyErr_Restore(tp, v, tb);
3279 }
3280 return NULL;
3281}
3282
3283static int
3284load_obj(UnpicklerObject *self)
3285{
3286 PyObject *cls, *args, *obj = NULL;
3287 int i;
3288
3289 if ((i = marker(self)) < 0)
3290 return -1;
3291
3292 args = Pdata_poptuple(self->stack, i + 1);
3293 if (args == NULL)
3294 return -1;
3295
3296 PDATA_POP(self->stack, cls);
3297 if (cls) {
3298 obj = instantiate(cls, args);
3299 Py_DECREF(cls);
3300 }
3301 Py_DECREF(args);
3302 if (obj == NULL)
3303 return -1;
3304
3305 PDATA_PUSH(self->stack, obj, -1);
3306 return 0;
3307}
3308
3309static int
3310load_inst(UnpicklerObject *self)
3311{
3312 PyObject *cls = NULL;
3313 PyObject *args = NULL;
3314 PyObject *obj = NULL;
3315 PyObject *module_name;
3316 PyObject *class_name;
3317 Py_ssize_t len;
3318 int i;
3319 char *s;
3320
3321 if ((i = marker(self)) < 0)
3322 return -1;
3323 if ((len = unpickler_readline(self, &s)) < 0)
3324 return -1;
3325 if (len < 2)
3326 return bad_readline();
3327
3328 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
3329 identifiers are permitted in Python 3.0, since the INST opcode is only
3330 supported by older protocols on Python 2.x. */
3331 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
3332 if (module_name == NULL)
3333 return -1;
3334
3335 if ((len = unpickler_readline(self, &s)) >= 0) {
3336 if (len < 2)
3337 return bad_readline();
3338 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
3339 if (class_name == NULL) {
3340 cls = find_class(self, module_name, class_name);
3341 Py_DECREF(class_name);
3342 }
3343 }
3344 Py_DECREF(module_name);
3345
3346 if (cls == NULL)
3347 return -1;
3348
3349 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
3350 obj = instantiate(cls, args);
3351 Py_DECREF(args);
3352 }
3353 Py_DECREF(cls);
3354
3355 if (obj == NULL)
3356 return -1;
3357
3358 PDATA_PUSH(self->stack, obj, -1);
3359 return 0;
3360}
3361
3362static int
3363load_newobj(UnpicklerObject *self)
3364{
3365 PyObject *args = NULL;
3366 PyObject *clsraw = NULL;
3367 PyTypeObject *cls; /* clsraw cast to its true type */
3368 PyObject *obj;
3369
3370 /* Stack is ... cls argtuple, and we want to call
3371 * cls.__new__(cls, *argtuple).
3372 */
3373 PDATA_POP(self->stack, args);
3374 if (args == NULL)
3375 goto error;
3376 if (!PyTuple_Check(args)) {
3377 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
3378 goto error;
3379 }
3380
3381 PDATA_POP(self->stack, clsraw);
3382 cls = (PyTypeObject *)clsraw;
3383 if (cls == NULL)
3384 goto error;
3385 if (!PyType_Check(cls)) {
3386 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
3387 "isn't a type object");
3388 goto error;
3389 }
3390 if (cls->tp_new == NULL) {
3391 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
3392 "has NULL tp_new");
3393 goto error;
3394 }
3395
3396 /* Call __new__. */
3397 obj = cls->tp_new(cls, args, NULL);
3398 if (obj == NULL)
3399 goto error;
3400
3401 Py_DECREF(args);
3402 Py_DECREF(clsraw);
3403 PDATA_PUSH(self->stack, obj, -1);
3404 return 0;
3405
3406 error:
3407 Py_XDECREF(args);
3408 Py_XDECREF(clsraw);
3409 return -1;
3410}
3411
3412static int
3413load_global(UnpicklerObject *self)
3414{
3415 PyObject *global = NULL;
3416 PyObject *module_name;
3417 PyObject *global_name;
3418 Py_ssize_t len;
3419 char *s;
3420
3421 if ((len = unpickler_readline(self, &s)) < 0)
3422 return -1;
3423 if (len < 2)
3424 return bad_readline();
3425 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
3426 if (!module_name)
3427 return -1;
3428
3429 if ((len = unpickler_readline(self, &s)) >= 0) {
3430 if (len < 2) {
3431 Py_DECREF(module_name);
3432 return bad_readline();
3433 }
3434 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
3435 if (global_name) {
3436 global = find_class(self, module_name, global_name);
3437 Py_DECREF(global_name);
3438 }
3439 }
3440 Py_DECREF(module_name);
3441
3442 if (global == NULL)
3443 return -1;
3444 PDATA_PUSH(self->stack, global, -1);
3445 return 0;
3446}
3447
3448static int
3449load_persid(UnpicklerObject *self)
3450{
3451 PyObject *pid;
3452 Py_ssize_t len;
3453 char *s;
3454
3455 if (self->pers_func) {
3456 if ((len = unpickler_readline(self, &s)) < 0)
3457 return -1;
3458 if (len < 2)
3459 return bad_readline();
3460
3461 pid = PyBytes_FromStringAndSize(s, len - 1);
3462 if (pid == NULL)
3463 return -1;
3464
3465 /* Ugh... this does not leak since unpickler_call() steals the
3466 reference to pid first. */
3467 pid = unpickler_call(self, self->pers_func, pid);
3468 if (pid == NULL)
3469 return -1;
3470
3471 PDATA_PUSH(self->stack, pid, -1);
3472 return 0;
3473 }
3474 else {
3475 PyErr_SetString(UnpicklingError,
3476 "A load persistent id instruction was encountered,\n"
3477 "but no persistent_load function was specified.");
3478 return -1;
3479 }
3480}
3481
3482static int
3483load_binpersid(UnpicklerObject *self)
3484{
3485 PyObject *pid;
3486
3487 if (self->pers_func) {
3488 PDATA_POP(self->stack, pid);
3489 if (pid == NULL)
3490 return -1;
3491
3492 /* Ugh... this does not leak since unpickler_call() steals the
3493 reference to pid first. */
3494 pid = unpickler_call(self, self->pers_func, pid);
3495 if (pid == NULL)
3496 return -1;
3497
3498 PDATA_PUSH(self->stack, pid, -1);
3499 return 0;
3500 }
3501 else {
3502 PyErr_SetString(UnpicklingError,
3503 "A load persistent id instruction was encountered,\n"
3504 "but no persistent_load function was specified.");
3505 return -1;
3506 }
3507}
3508
3509static int
3510load_pop(UnpicklerObject *self)
3511{
3512 int len;
3513
3514 if ((len = self->stack->length) <= 0)
3515 return stack_underflow();
3516
3517 /* Note that we split the (pickle.py) stack into two stacks,
3518 * an object stack and a mark stack. We have to be clever and
3519 * pop the right one. We do this by looking at the top of the
3520 * mark stack.
3521 */
3522
3523 if ((self->num_marks > 0) && (self->marks[self->num_marks - 1] == len))
3524 self->num_marks--;
3525 else {
3526 len--;
3527 Py_DECREF(self->stack->data[len]);
3528 self->stack->length = len;
3529 }
3530
3531 return 0;
3532}
3533
3534static int
3535load_pop_mark(UnpicklerObject *self)
3536{
3537 int i;
3538
3539 if ((i = marker(self)) < 0)
3540 return -1;
3541
3542 Pdata_clear(self->stack, i);
3543
3544 return 0;
3545}
3546
3547static int
3548load_dup(UnpicklerObject *self)
3549{
3550 PyObject *last;
3551 int len;
3552
3553 if ((len = self->stack->length) <= 0)
3554 return stack_underflow();
3555 last = self->stack->data[len - 1];
3556 PDATA_APPEND(self->stack, last, -1);
3557 return 0;
3558}
3559
3560static int
3561load_get(UnpicklerObject *self)
3562{
3563 PyObject *key, *value;
3564 Py_ssize_t len;
3565 char *s;
3566
3567 if ((len = unpickler_readline(self, &s)) < 0)
3568 return -1;
3569 if (len < 2)
3570 return bad_readline();
3571
3572 key = PyLong_FromString(s, NULL, 10);
3573 if (key == NULL)
3574 return -1;
3575
3576 value = PyDict_GetItemWithError(self->memo, key);
3577 if (value == NULL) {
3578 if (!PyErr_Occurred())
3579 PyErr_SetObject(PyExc_KeyError, key);
3580 Py_DECREF(key);
3581 return -1;
3582 }
3583 Py_DECREF(key);
3584
3585 PDATA_APPEND(self->stack, value, -1);
3586 return 0;
3587}
3588
3589static int
3590load_binget(UnpicklerObject *self)
3591{
3592 PyObject *key, *value;
3593 char *s;
3594
3595 if (unpickler_read(self, &s, 1) < 0)
3596 return -1;
3597
3598 /* Here, the unsigned cast is necessary to avoid negative values. */
3599 key = PyLong_FromLong((long)(unsigned char)s[0]);
3600 if (key == NULL)
3601 return -1;
3602
3603 value = PyDict_GetItemWithError(self->memo, key);
3604 if (value == NULL) {
3605 if (!PyErr_Occurred())
3606 PyErr_SetObject(PyExc_KeyError, key);
3607 Py_DECREF(key);
3608 return -1;
3609 }
3610 Py_DECREF(key);
3611
3612 PDATA_APPEND(self->stack, value, -1);
3613 return 0;
3614}
3615
3616static int
3617load_long_binget(UnpicklerObject *self)
3618{
3619 PyObject *key, *value;
3620 char *s;
3621 long k;
3622
3623 if (unpickler_read(self, &s, 4) < 0)
3624 return -1;
3625
3626 k = (long)(unsigned char)s[0];
3627 k |= (long)(unsigned char)s[1] << 8;
3628 k |= (long)(unsigned char)s[2] << 16;
3629 k |= (long)(unsigned char)s[3] << 24;
3630
3631 key = PyLong_FromLong(k);
3632 if (key == NULL)
3633 return -1;
3634
3635 value = PyDict_GetItemWithError(self->memo, key);
3636 if (value == NULL) {
3637 if (!PyErr_Occurred())
3638 PyErr_SetObject(PyExc_KeyError, key);
3639 Py_DECREF(key);
3640 return -1;
3641 }
3642 Py_DECREF(key);
3643
3644 PDATA_APPEND(self->stack, value, -1);
3645 return 0;
3646}
3647
3648/* Push an object from the extension registry (EXT[124]). nbytes is
3649 * the number of bytes following the opcode, holding the index (code) value.
3650 */
3651static int
3652load_extension(UnpicklerObject *self, int nbytes)
3653{
3654 char *codebytes; /* the nbytes bytes after the opcode */
3655 long code; /* calc_binint returns long */
3656 PyObject *py_code; /* code as a Python int */
3657 PyObject *obj; /* the object to push */
3658 PyObject *pair; /* (module_name, class_name) */
3659 PyObject *module_name, *class_name;
3660
3661 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
3662 if (unpickler_read(self, &codebytes, nbytes) < 0)
3663 return -1;
3664 code = calc_binint(codebytes, nbytes);
3665 if (code <= 0) { /* note that 0 is forbidden */
3666 /* Corrupt or hostile pickle. */
3667 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
3668 return -1;
3669 }
3670
3671 /* Look for the code in the cache. */
3672 py_code = PyLong_FromLong(code);
3673 if (py_code == NULL)
3674 return -1;
3675 obj = PyDict_GetItem(extension_cache, py_code);
3676 if (obj != NULL) {
3677 /* Bingo. */
3678 Py_DECREF(py_code);
3679 PDATA_APPEND(self->stack, obj, -1);
3680 return 0;
3681 }
3682
3683 /* Look up the (module_name, class_name) pair. */
3684 pair = PyDict_GetItem(inverted_registry, py_code);
3685 if (pair == NULL) {
3686 Py_DECREF(py_code);
3687 PyErr_Format(PyExc_ValueError, "unregistered extension "
3688 "code %ld", code);
3689 return -1;
3690 }
3691 /* Since the extension registry is manipulable via Python code,
3692 * confirm that pair is really a 2-tuple of strings.
3693 */
3694 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
3695 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
3696 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
3697 Py_DECREF(py_code);
3698 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
3699 "isn't a 2-tuple of strings", code);
3700 return -1;
3701 }
3702 /* Load the object. */
3703 obj = find_class(self, module_name, class_name);
3704 if (obj == NULL) {
3705 Py_DECREF(py_code);
3706 return -1;
3707 }
3708 /* Cache code -> obj. */
3709 code = PyDict_SetItem(extension_cache, py_code, obj);
3710 Py_DECREF(py_code);
3711 if (code < 0) {
3712 Py_DECREF(obj);
3713 return -1;
3714 }
3715 PDATA_PUSH(self->stack, obj, -1);
3716 return 0;
3717}
3718
3719static int
3720load_put(UnpicklerObject *self)
3721{
3722 PyObject *key, *value;
3723 Py_ssize_t len;
3724 char *s;
3725 int x;
3726
3727 if ((len = unpickler_readline(self, &s)) < 0)
3728 return -1;
3729 if (len < 2)
3730 return bad_readline();
3731 if ((x = self->stack->length) <= 0)
3732 return stack_underflow();
3733
3734 key = PyLong_FromString(s, NULL, 10);
3735 if (key == NULL)
3736 return -1;
3737 value = self->stack->data[x - 1];
3738
3739 x = PyDict_SetItem(self->memo, key, value);
3740 Py_DECREF(key);
3741 return x;
3742}
3743
3744static int
3745load_binput(UnpicklerObject *self)
3746{
3747 PyObject *key, *value;
3748 char *s;
3749 int x;
3750
3751 if (unpickler_read(self, &s, 1) < 0)
3752 return -1;
3753 if ((x = self->stack->length) <= 0)
3754 return stack_underflow();
3755
3756 key = PyLong_FromLong((long)(unsigned char)s[0]);
3757 if (key == NULL)
3758 return -1;
3759 value = self->stack->data[x - 1];
3760
3761 x = PyDict_SetItem(self->memo, key, value);
3762 Py_DECREF(key);
3763 return x;
3764}
3765
3766static int
3767load_long_binput(UnpicklerObject *self)
3768{
3769 PyObject *key, *value;
3770 long k;
3771 char *s;
3772 int x;
3773
3774 if (unpickler_read(self, &s, 4) < 0)
3775 return -1;
3776 if ((x = self->stack->length) <= 0)
3777 return stack_underflow();
3778
3779 k = (long)(unsigned char)s[0];
3780 k |= (long)(unsigned char)s[1] << 8;
3781 k |= (long)(unsigned char)s[2] << 16;
3782 k |= (long)(unsigned char)s[3] << 24;
3783
3784 key = PyLong_FromLong(k);
3785 if (key == NULL)
3786 return -1;
3787 value = self->stack->data[x - 1];
3788
3789 x = PyDict_SetItem(self->memo, key, value);
3790 Py_DECREF(key);
3791 return x;
3792}
3793
3794static int
3795do_append(UnpicklerObject *self, int x)
3796{
3797 PyObject *value;
3798 PyObject *list;
3799 int len, i;
3800
3801 len = self->stack->length;
3802 if (x > len || x <= 0)
3803 return stack_underflow();
3804 if (len == x) /* nothing to do */
3805 return 0;
3806
3807 list = self->stack->data[x - 1];
3808
3809 if (PyList_Check(list)) {
3810 PyObject *slice;
3811 Py_ssize_t list_len;
3812
3813 slice = Pdata_poplist(self->stack, x);
3814 if (!slice)
3815 return -1;
3816 list_len = PyList_GET_SIZE(list);
3817 i = PyList_SetSlice(list, list_len, list_len, slice);
3818 Py_DECREF(slice);
3819 return i;
3820 }
3821 else {
3822 PyObject *append_func;
3823
3824 append_func = PyObject_GetAttrString(list, "append");
3825 if (append_func == NULL)
3826 return -1;
3827 for (i = x; i < len; i++) {
3828 PyObject *result;
3829
3830 value = self->stack->data[i];
3831 result = unpickler_call(self, append_func, value);
3832 if (result == NULL) {
3833 Pdata_clear(self->stack, i + 1);
3834 self->stack->length = x;
3835 return -1;
3836 }
3837 Py_DECREF(result);
3838 }
3839 self->stack->length = x;
3840 }
3841
3842 return 0;
3843}
3844
3845static int
3846load_append(UnpicklerObject *self)
3847{
3848 return do_append(self, self->stack->length - 1);
3849}
3850
3851static int
3852load_appends(UnpicklerObject *self)
3853{
3854 return do_append(self, marker(self));
3855}
3856
3857static int
3858do_setitems(UnpicklerObject *self, int x)
3859{
3860 PyObject *value, *key;
3861 PyObject *dict;
3862 int len, i;
3863 int status = 0;
3864
3865 len = self->stack->length;
3866 if (x > len || x <= 0)
3867 return stack_underflow();
3868 if (len == x) /* nothing to do */
3869 return 0;
3870 if ((len - x) % 2 != 0) {
3871 /* Currupt or hostile pickle -- we never write one like this. */
3872 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
3873 return -1;
3874 }
3875
3876 /* Here, dict does not actually need to be a PyDict; it could be anything
3877 that supports the __setitem__ attribute. */
3878 dict = self->stack->data[x - 1];
3879
3880 for (i = x + 1; i < len; i += 2) {
3881 key = self->stack->data[i - 1];
3882 value = self->stack->data[i];
3883 if (PyObject_SetItem(dict, key, value) < 0) {
3884 status = -1;
3885 break;
3886 }
3887 }
3888
3889 Pdata_clear(self->stack, x);
3890 return status;
3891}
3892
3893static int
3894load_setitem(UnpicklerObject *self)
3895{
3896 return do_setitems(self, self->stack->length - 2);
3897}
3898
3899static int
3900load_setitems(UnpicklerObject *self)
3901{
3902 return do_setitems(self, marker(self));
3903}
3904
3905static int
3906load_build(UnpicklerObject *self)
3907{
3908 PyObject *state, *inst, *slotstate;
3909 PyObject *setstate;
3910 int status = 0;
3911
3912 /* Stack is ... instance, state. We want to leave instance at
3913 * the stack top, possibly mutated via instance.__setstate__(state).
3914 */
3915 if (self->stack->length < 2)
3916 return stack_underflow();
3917
3918 PDATA_POP(self->stack, state);
3919 if (state == NULL)
3920 return -1;
3921
3922 inst = self->stack->data[self->stack->length - 1];
3923
3924 setstate = PyObject_GetAttrString(inst, "__setstate__");
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00003925 if (setstate == NULL) {
3926 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3927 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00003928 else {
3929 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00003930 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00003931 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003932 }
3933 else {
3934 PyObject *result;
3935
3936 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitroud79dc622008-09-05 00:03:33 +00003937 /* Ugh... this does not leak since unpickler_call() steals the
3938 reference to state first. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003939 result = unpickler_call(self, setstate, state);
3940 Py_DECREF(setstate);
3941 if (result == NULL)
3942 return -1;
3943 Py_DECREF(result);
3944 return 0;
3945 }
3946
3947 /* A default __setstate__. First see whether state embeds a
3948 * slot state dict too (a proto 2 addition).
3949 */
3950 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
3951 PyObject *tmp = state;
3952
3953 state = PyTuple_GET_ITEM(tmp, 0);
3954 slotstate = PyTuple_GET_ITEM(tmp, 1);
3955 Py_INCREF(state);
3956 Py_INCREF(slotstate);
3957 Py_DECREF(tmp);
3958 }
3959 else
3960 slotstate = NULL;
3961
3962 /* Set inst.__dict__ from the state dict (if any). */
3963 if (state != Py_None) {
3964 PyObject *dict;
3965
3966 if (!PyDict_Check(state)) {
3967 PyErr_SetString(UnpicklingError, "state is not a dictionary");
3968 goto error;
3969 }
3970 dict = PyObject_GetAttrString(inst, "__dict__");
3971 if (dict == NULL)
3972 goto error;
3973
3974 PyDict_Update(dict, state);
3975 Py_DECREF(dict);
3976 }
3977
3978 /* Also set instance attributes from the slotstate dict (if any). */
3979 if (slotstate != NULL) {
3980 PyObject *d_key, *d_value;
3981 Py_ssize_t i;
3982
3983 if (!PyDict_Check(slotstate)) {
3984 PyErr_SetString(UnpicklingError,
3985 "slot state is not a dictionary");
3986 goto error;
3987 }
3988 i = 0;
3989 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
3990 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
3991 goto error;
3992 }
3993 }
3994
3995 if (0) {
3996 error:
3997 status = -1;
3998 }
3999
4000 Py_DECREF(state);
4001 Py_XDECREF(slotstate);
4002 return status;
4003}
4004
4005static int
4006load_mark(UnpicklerObject *self)
4007{
4008
4009 /* Note that we split the (pickle.py) stack into two stacks, an
4010 * object stack and a mark stack. Here we push a mark onto the
4011 * mark stack.
4012 */
4013
4014 if ((self->num_marks + 1) >= self->marks_size) {
4015 size_t alloc;
4016 int *marks;
4017
4018 /* Use the size_t type to check for overflow. */
4019 alloc = ((size_t)self->num_marks << 1) + 20;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00004020 if (alloc > PY_SSIZE_T_MAX ||
4021 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004022 PyErr_NoMemory();
4023 return -1;
4024 }
4025
4026 if (self->marks == NULL)
4027 marks = (int *)PyMem_Malloc(alloc * sizeof(int));
4028 else
4029 marks = (int *)PyMem_Realloc(self->marks, alloc * sizeof(int));
4030 if (marks == NULL) {
4031 PyErr_NoMemory();
4032 return -1;
4033 }
4034 self->marks = marks;
4035 self->marks_size = (Py_ssize_t)alloc;
4036 }
4037
4038 self->marks[self->num_marks++] = self->stack->length;
4039
4040 return 0;
4041}
4042
4043static int
4044load_reduce(UnpicklerObject *self)
4045{
4046 PyObject *callable = NULL;
4047 PyObject *argtup = NULL;
4048 PyObject *obj = NULL;
4049
4050 PDATA_POP(self->stack, argtup);
4051 if (argtup == NULL)
4052 return -1;
4053 PDATA_POP(self->stack, callable);
4054 if (callable) {
4055 obj = instantiate(callable, argtup);
4056 Py_DECREF(callable);
4057 }
4058 Py_DECREF(argtup);
4059
4060 if (obj == NULL)
4061 return -1;
4062
4063 PDATA_PUSH(self->stack, obj, -1);
4064 return 0;
4065}
4066
4067/* Just raises an error if we don't know the protocol specified. PROTO
4068 * is the first opcode for protocols >= 2.
4069 */
4070static int
4071load_proto(UnpicklerObject *self)
4072{
4073 char *s;
4074 int i;
4075
4076 if (unpickler_read(self, &s, 1) < 0)
4077 return -1;
4078
4079 i = (unsigned char)s[0];
4080 if (i <= HIGHEST_PROTOCOL)
4081 return 0;
4082
4083 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
4084 return -1;
4085}
4086
4087static PyObject *
4088load(UnpicklerObject *self)
4089{
4090 PyObject *err;
4091 PyObject *value = NULL;
4092 char *s;
4093
4094 self->num_marks = 0;
4095 if (self->stack->length)
4096 Pdata_clear(self->stack, 0);
4097
4098 /* Convenient macros for the dispatch while-switch loop just below. */
4099#define OP(opcode, load_func) \
4100 case opcode: if (load_func(self) < 0) break; continue;
4101
4102#define OP_ARG(opcode, load_func, arg) \
4103 case opcode: if (load_func(self, (arg)) < 0) break; continue;
4104
4105 while (1) {
4106 if (unpickler_read(self, &s, 1) < 0)
4107 break;
4108
4109 switch ((enum opcode)s[0]) {
4110 OP(NONE, load_none)
4111 OP(BININT, load_binint)
4112 OP(BININT1, load_binint1)
4113 OP(BININT2, load_binint2)
4114 OP(INT, load_int)
4115 OP(LONG, load_long)
4116 OP_ARG(LONG1, load_counted_long, 1)
4117 OP_ARG(LONG4, load_counted_long, 4)
4118 OP(FLOAT, load_float)
4119 OP(BINFLOAT, load_binfloat)
4120 OP(BINBYTES, load_binbytes)
4121 OP(SHORT_BINBYTES, load_short_binbytes)
4122 OP(BINSTRING, load_binstring)
4123 OP(SHORT_BINSTRING, load_short_binstring)
4124 OP(STRING, load_string)
4125 OP(UNICODE, load_unicode)
4126 OP(BINUNICODE, load_binunicode)
4127 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
4128 OP_ARG(TUPLE1, load_counted_tuple, 1)
4129 OP_ARG(TUPLE2, load_counted_tuple, 2)
4130 OP_ARG(TUPLE3, load_counted_tuple, 3)
4131 OP(TUPLE, load_tuple)
4132 OP(EMPTY_LIST, load_empty_list)
4133 OP(LIST, load_list)
4134 OP(EMPTY_DICT, load_empty_dict)
4135 OP(DICT, load_dict)
4136 OP(OBJ, load_obj)
4137 OP(INST, load_inst)
4138 OP(NEWOBJ, load_newobj)
4139 OP(GLOBAL, load_global)
4140 OP(APPEND, load_append)
4141 OP(APPENDS, load_appends)
4142 OP(BUILD, load_build)
4143 OP(DUP, load_dup)
4144 OP(BINGET, load_binget)
4145 OP(LONG_BINGET, load_long_binget)
4146 OP(GET, load_get)
4147 OP(MARK, load_mark)
4148 OP(BINPUT, load_binput)
4149 OP(LONG_BINPUT, load_long_binput)
4150 OP(PUT, load_put)
4151 OP(POP, load_pop)
4152 OP(POP_MARK, load_pop_mark)
4153 OP(SETITEM, load_setitem)
4154 OP(SETITEMS, load_setitems)
4155 OP(PERSID, load_persid)
4156 OP(BINPERSID, load_binpersid)
4157 OP(REDUCE, load_reduce)
4158 OP(PROTO, load_proto)
4159 OP_ARG(EXT1, load_extension, 1)
4160 OP_ARG(EXT2, load_extension, 2)
4161 OP_ARG(EXT4, load_extension, 4)
4162 OP_ARG(NEWTRUE, load_bool, Py_True)
4163 OP_ARG(NEWFALSE, load_bool, Py_False)
4164
4165 case STOP:
4166 break;
4167
4168 case '\0':
4169 PyErr_SetNone(PyExc_EOFError);
4170 return NULL;
4171
4172 default:
4173 PyErr_Format(UnpicklingError,
4174 "invalid load key, '%c'.", s[0]);
4175 return NULL;
4176 }
4177
4178 break; /* and we are done! */
4179 }
4180
4181 /* XXX: It is not clear what this is actually for. */
4182 if ((err = PyErr_Occurred())) {
4183 if (err == PyExc_EOFError) {
4184 PyErr_SetNone(PyExc_EOFError);
4185 }
4186 return NULL;
4187 }
4188
4189 PDATA_POP(self->stack, value);
4190 return value;
4191}
4192
4193PyDoc_STRVAR(Unpickler_load_doc,
4194"load() -> object. Load a pickle."
4195"\n"
4196"Read a pickled object representation from the open file object given in\n"
4197"the constructor, and return the reconstituted object hierarchy specified\n"
4198"therein.\n");
4199
4200static PyObject *
4201Unpickler_load(UnpicklerObject *self)
4202{
4203 /* Check whether the Unpickler was initialized correctly. This prevents
4204 segfaulting if a subclass overridden __init__ with a function that does
4205 not call Unpickler.__init__(). Here, we simply ensure that self->read
4206 is not NULL. */
4207 if (self->read == NULL) {
4208 PyErr_Format(UnpicklingError,
4209 "Unpickler.__init__() was not called by %s.__init__()",
4210 Py_TYPE(self)->tp_name);
4211 return NULL;
4212 }
4213
4214 return load(self);
4215}
4216
4217/* The name of find_class() is misleading. In newer pickle protocols, this
4218 function is used for loading any global (i.e., functions), not just
4219 classes. The name is kept only for backward compatibility. */
4220
4221PyDoc_STRVAR(Unpickler_find_class_doc,
4222"find_class(module_name, global_name) -> object.\n"
4223"\n"
4224"Return an object from a specified module, importing the module if\n"
4225"necessary. Subclasses may override this method (e.g. to restrict\n"
4226"unpickling of arbitrary classes and functions).\n"
4227"\n"
4228"This method is called whenever a class or a function object is\n"
4229"needed. Both arguments passed are str objects.\n");
4230
4231static PyObject *
4232Unpickler_find_class(UnpicklerObject *self, PyObject *args)
4233{
4234 PyObject *global;
4235 PyObject *modules_dict;
4236 PyObject *module;
4237 PyObject *module_name, *global_name;
4238
4239 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
4240 &module_name, &global_name))
4241 return NULL;
4242
4243 modules_dict = PySys_GetObject("modules");
4244 if (modules_dict == NULL)
4245 return NULL;
4246
4247 module = PyDict_GetItem(modules_dict, module_name);
4248 if (module == NULL) {
4249 module = PyImport_Import(module_name);
4250 if (module == NULL)
4251 return NULL;
4252 global = PyObject_GetAttr(module, global_name);
4253 Py_DECREF(module);
4254 }
4255 else {
4256 global = PyObject_GetAttr(module, global_name);
4257 }
4258 return global;
4259}
4260
4261static struct PyMethodDef Unpickler_methods[] = {
4262 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
4263 Unpickler_load_doc},
4264 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
4265 Unpickler_find_class_doc},
4266 {NULL, NULL} /* sentinel */
4267};
4268
4269static void
4270Unpickler_dealloc(UnpicklerObject *self)
4271{
4272 PyObject_GC_UnTrack((PyObject *)self);
4273 Py_XDECREF(self->readline);
4274 Py_XDECREF(self->read);
4275 Py_XDECREF(self->memo);
4276 Py_XDECREF(self->stack);
4277 Py_XDECREF(self->pers_func);
4278 Py_XDECREF(self->arg);
4279 Py_XDECREF(self->last_string);
4280
4281 PyMem_Free(self->marks);
4282 free(self->encoding);
4283 free(self->errors);
4284
4285 Py_TYPE(self)->tp_free((PyObject *)self);
4286}
4287
4288static int
4289Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
4290{
4291 Py_VISIT(self->readline);
4292 Py_VISIT(self->read);
4293 Py_VISIT(self->memo);
4294 Py_VISIT(self->stack);
4295 Py_VISIT(self->pers_func);
4296 Py_VISIT(self->arg);
4297 Py_VISIT(self->last_string);
4298 return 0;
4299}
4300
4301static int
4302Unpickler_clear(UnpicklerObject *self)
4303{
4304 Py_CLEAR(self->readline);
4305 Py_CLEAR(self->read);
4306 Py_CLEAR(self->memo);
4307 Py_CLEAR(self->stack);
4308 Py_CLEAR(self->pers_func);
4309 Py_CLEAR(self->arg);
4310 Py_CLEAR(self->last_string);
4311
4312 PyMem_Free(self->marks);
4313 self->marks = NULL;
4314 free(self->encoding);
4315 self->encoding = NULL;
4316 free(self->errors);
4317 self->errors = NULL;
4318
4319 return 0;
4320}
4321
4322PyDoc_STRVAR(Unpickler_doc,
4323"Unpickler(file, *, encoding='ASCII', errors='strict')"
4324"\n"
4325"This takes a binary file for reading a pickle data stream.\n"
4326"\n"
4327"The protocol version of the pickle is detected automatically, so no\n"
4328"proto argument is needed.\n"
4329"\n"
4330"The file-like object must have two methods, a read() method\n"
4331"that takes an integer argument, and a readline() method that\n"
4332"requires no arguments. Both methods should return bytes.\n"
4333"Thus file-like object can be a binary file object opened for\n"
4334"reading, a BytesIO object, or any other custom object that\n"
4335"meets this interface.\n"
4336"\n"
4337"Optional keyword arguments are encoding and errors, which are\n"
4338"used to decode 8-bit string instances pickled by Python 2.x.\n"
4339"These default to 'ASCII' and 'strict', respectively.\n");
4340
4341static int
4342Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
4343{
4344 static char *kwlist[] = {"file", "encoding", "errors", 0};
4345 PyObject *file;
4346 char *encoding = NULL;
4347 char *errors = NULL;
4348
4349 /* XXX: That is an horrible error message. But, I don't know how to do
4350 better... */
4351 if (Py_SIZE(args) != 1) {
4352 PyErr_Format(PyExc_TypeError,
4353 "%s takes exactly one positional argument (%zd given)",
4354 Py_TYPE(self)->tp_name, Py_SIZE(args));
4355 return -1;
4356 }
4357
4358 /* Arguments parsing needs to be done in the __init__() method to allow
4359 subclasses to define their own __init__() method, which may (or may
4360 not) support Unpickler arguments. However, this means we need to be
4361 extra careful in the other Unpickler methods, since a subclass could
4362 forget to call Unpickler.__init__() thus breaking our internal
4363 invariants. */
4364 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|ss:Unpickler", kwlist,
4365 &file, &encoding, &errors))
4366 return -1;
4367
4368 /* In case of multiple __init__() calls, clear previous content. */
4369 if (self->read != NULL)
4370 (void)Unpickler_clear(self);
4371
4372 self->read = PyObject_GetAttrString(file, "read");
4373 self->readline = PyObject_GetAttrString(file, "readline");
4374 if (self->readline == NULL || self->read == NULL)
4375 return -1;
4376
4377 if (encoding == NULL)
4378 encoding = "ASCII";
4379 if (errors == NULL)
4380 errors = "strict";
4381
4382 self->encoding = strdup(encoding);
4383 self->errors = strdup(errors);
4384 if (self->encoding == NULL || self->errors == NULL) {
4385 PyErr_NoMemory();
4386 return -1;
4387 }
4388
4389 if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) {
4390 self->pers_func = PyObject_GetAttrString((PyObject *)self,
4391 "persistent_load");
4392 if (self->pers_func == NULL)
4393 return -1;
4394 }
4395 else {
4396 self->pers_func = NULL;
4397 }
4398
4399 self->stack = (Pdata *)Pdata_New();
4400 if (self->stack == NULL)
4401 return -1;
4402
4403 self->memo = PyDict_New();
4404 if (self->memo == NULL)
4405 return -1;
4406
4407 return 0;
4408}
4409
4410static PyObject *
4411Unpickler_get_memo(UnpicklerObject *self)
4412{
4413 if (self->memo == NULL)
4414 PyErr_SetString(PyExc_AttributeError, "memo");
4415 else
4416 Py_INCREF(self->memo);
4417 return self->memo;
4418}
4419
4420static int
4421Unpickler_set_memo(UnpicklerObject *self, PyObject *value)
4422{
4423 PyObject *tmp;
4424
4425 if (value == NULL) {
4426 PyErr_SetString(PyExc_TypeError,
4427 "attribute deletion is not supported");
4428 return -1;
4429 }
4430 if (!PyDict_Check(value)) {
4431 PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
4432 return -1;
4433 }
4434
4435 tmp = self->memo;
4436 Py_INCREF(value);
4437 self->memo = value;
4438 Py_XDECREF(tmp);
4439
4440 return 0;
4441}
4442
4443static PyObject *
4444Unpickler_get_persload(UnpicklerObject *self)
4445{
4446 if (self->pers_func == NULL)
4447 PyErr_SetString(PyExc_AttributeError, "persistent_load");
4448 else
4449 Py_INCREF(self->pers_func);
4450 return self->pers_func;
4451}
4452
4453static int
4454Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
4455{
4456 PyObject *tmp;
4457
4458 if (value == NULL) {
4459 PyErr_SetString(PyExc_TypeError,
4460 "attribute deletion is not supported");
4461 return -1;
4462 }
4463 if (!PyCallable_Check(value)) {
4464 PyErr_SetString(PyExc_TypeError,
4465 "persistent_load must be a callable taking "
4466 "one argument");
4467 return -1;
4468 }
4469
4470 tmp = self->pers_func;
4471 Py_INCREF(value);
4472 self->pers_func = value;
4473 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
4474
4475 return 0;
4476}
4477
4478static PyGetSetDef Unpickler_getsets[] = {
4479 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
4480 {"persistent_load", (getter)Unpickler_get_persload,
4481 (setter)Unpickler_set_persload},
4482 {NULL}
4483};
4484
4485static PyTypeObject Unpickler_Type = {
4486 PyVarObject_HEAD_INIT(NULL, 0)
4487 "_pickle.Unpickler", /*tp_name*/
4488 sizeof(UnpicklerObject), /*tp_basicsize*/
4489 0, /*tp_itemsize*/
4490 (destructor)Unpickler_dealloc, /*tp_dealloc*/
4491 0, /*tp_print*/
4492 0, /*tp_getattr*/
4493 0, /*tp_setattr*/
4494 0, /*tp_compare*/
4495 0, /*tp_repr*/
4496 0, /*tp_as_number*/
4497 0, /*tp_as_sequence*/
4498 0, /*tp_as_mapping*/
4499 0, /*tp_hash*/
4500 0, /*tp_call*/
4501 0, /*tp_str*/
4502 0, /*tp_getattro*/
4503 0, /*tp_setattro*/
4504 0, /*tp_as_buffer*/
4505 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4506 Unpickler_doc, /*tp_doc*/
4507 (traverseproc)Unpickler_traverse, /*tp_traverse*/
4508 (inquiry)Unpickler_clear, /*tp_clear*/
4509 0, /*tp_richcompare*/
4510 0, /*tp_weaklistoffset*/
4511 0, /*tp_iter*/
4512 0, /*tp_iternext*/
4513 Unpickler_methods, /*tp_methods*/
4514 0, /*tp_members*/
4515 Unpickler_getsets, /*tp_getset*/
4516 0, /*tp_base*/
4517 0, /*tp_dict*/
4518 0, /*tp_descr_get*/
4519 0, /*tp_descr_set*/
4520 0, /*tp_dictoffset*/
4521 (initproc)Unpickler_init, /*tp_init*/
4522 PyType_GenericAlloc, /*tp_alloc*/
4523 PyType_GenericNew, /*tp_new*/
4524 PyObject_GC_Del, /*tp_free*/
4525 0, /*tp_is_gc*/
4526};
4527
4528static int
4529init_stuff(void)
4530{
4531 PyObject *copyreg;
4532
4533 copyreg = PyImport_ImportModule("copyreg");
4534 if (!copyreg)
4535 return -1;
4536
4537 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
4538 if (!dispatch_table)
4539 goto error;
4540
4541 extension_registry = \
4542 PyObject_GetAttrString(copyreg, "_extension_registry");
4543 if (!extension_registry)
4544 goto error;
4545
4546 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
4547 if (!inverted_registry)
4548 goto error;
4549
4550 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
4551 if (!extension_cache)
4552 goto error;
4553
4554 Py_DECREF(copyreg);
4555
4556 empty_tuple = PyTuple_New(0);
4557 if (empty_tuple == NULL)
4558 return -1;
4559
4560 two_tuple = PyTuple_New(2);
4561 if (two_tuple == NULL)
4562 return -1;
4563 /* We use this temp container with no regard to refcounts, or to
4564 * keeping containees alive. Exempt from GC, because we don't
4565 * want anything looking at two_tuple() by magic.
4566 */
4567 PyObject_GC_UnTrack(two_tuple);
4568
4569 return 0;
4570
4571 error:
4572 Py_DECREF(copyreg);
4573 return -1;
4574}
4575
4576static struct PyModuleDef _picklemodule = {
4577 PyModuleDef_HEAD_INIT,
4578 "_pickle",
4579 pickle_module_doc,
4580 -1,
4581 NULL,
4582 NULL,
4583 NULL,
4584 NULL,
4585 NULL
4586};
4587
4588PyMODINIT_FUNC
4589PyInit__pickle(void)
4590{
4591 PyObject *m;
4592
4593 if (PyType_Ready(&Unpickler_Type) < 0)
4594 return NULL;
4595 if (PyType_Ready(&Pickler_Type) < 0)
4596 return NULL;
4597 if (PyType_Ready(&Pdata_Type) < 0)
4598 return NULL;
4599
4600 /* Create the module and add the functions. */
4601 m = PyModule_Create(&_picklemodule);
4602 if (m == NULL)
4603 return NULL;
4604
4605 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
4606 return NULL;
4607 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
4608 return NULL;
4609
4610 /* Initialize the exceptions. */
4611 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
4612 if (PickleError == NULL)
4613 return NULL;
4614 PicklingError = \
4615 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
4616 if (PicklingError == NULL)
4617 return NULL;
4618 UnpicklingError = \
4619 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
4620 if (UnpicklingError == NULL)
4621 return NULL;
4622
4623 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
4624 return NULL;
4625 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
4626 return NULL;
4627 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
4628 return NULL;
4629
4630 if (init_stuff() < 0)
4631 return NULL;
4632
4633 return m;
4634}