blob: b0686afe9d5d50a472dd745196391d63c26a0f45 [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
13
14/* Pickle opcodes. These must be kept updated with pickle.py.
15 Extensive docs are in pickletools.py. */
16enum opcode {
17 MARK = '(',
18 STOP = '.',
19 POP = '0',
20 POP_MARK = '1',
21 DUP = '2',
22 FLOAT = 'F',
23 INT = 'I',
24 BININT = 'J',
25 BININT1 = 'K',
26 LONG = 'L',
27 BININT2 = 'M',
28 NONE = 'N',
29 PERSID = 'P',
30 BINPERSID = 'Q',
31 REDUCE = 'R',
32 STRING = 'S',
33 BINSTRING = 'T',
34 SHORT_BINSTRING = 'U',
35 UNICODE = 'V',
36 BINUNICODE = 'X',
37 APPEND = 'a',
38 BUILD = 'b',
39 GLOBAL = 'c',
40 DICT = 'd',
41 EMPTY_DICT = '}',
42 APPENDS = 'e',
43 GET = 'g',
44 BINGET = 'h',
45 INST = 'i',
46 LONG_BINGET = 'j',
47 LIST = 'l',
48 EMPTY_LIST = ']',
49 OBJ = 'o',
50 PUT = 'p',
51 BINPUT = 'q',
52 LONG_BINPUT = 'r',
53 SETITEM = 's',
54 TUPLE = 't',
55 EMPTY_TUPLE = ')',
56 SETITEMS = 'u',
57 BINFLOAT = 'G',
58
59 /* Protocol 2. */
60 PROTO = '\x80',
61 NEWOBJ = '\x81',
62 EXT1 = '\x82',
63 EXT2 = '\x83',
64 EXT4 = '\x84',
65 TUPLE1 = '\x85',
66 TUPLE2 = '\x86',
67 TUPLE3 = '\x87',
68 NEWTRUE = '\x88',
69 NEWFALSE = '\x89',
70 LONG1 = '\x8a',
71 LONG4 = '\x8b',
72
73 /* Protocol 3 (Python 3.x) */
74 BINBYTES = 'B',
75 SHORT_BINBYTES = 'C',
76};
77
78/* These aren't opcodes -- they're ways to pickle bools before protocol 2
79 * so that unpicklers written before bools were introduced unpickle them
80 * as ints, but unpicklers after can recognize that bools were intended.
81 * Note that protocol 2 added direct ways to pickle bools.
82 */
83#undef TRUE
84#define TRUE "I01\n"
85#undef FALSE
86#define FALSE "I00\n"
87
88enum {
89 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
90 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
91 break if this gets out of synch with pickle.py, but it's unclear that would
92 help anything either. */
93 BATCHSIZE = 1000,
94
95 /* Nesting limit until Pickler, when running in "fast mode", starts
96 checking for self-referential data-structures. */
97 FAST_NESTING_LIMIT = 50,
98
99 /* Size of the write buffer of Pickler. Higher values will reduce the
100 number of calls to the write() method of the output stream. */
101 WRITE_BUF_SIZE = 256,
102};
103
104/* Exception classes for pickle. These should override the ones defined in
105 pickle.py, when the C-optimized Pickler and Unpickler are used. */
106static PyObject *PickleError;
107static PyObject *PicklingError;
108static PyObject *UnpicklingError;
109
110/* copyreg.dispatch_table, {type_object: pickling_function} */
111static PyObject *dispatch_table;
112/* For EXT[124] opcodes. */
113/* copyreg._extension_registry, {(module_name, function_name): code} */
114static PyObject *extension_registry;
115/* copyreg._inverted_registry, {code: (module_name, function_name)} */
116static PyObject *inverted_registry;
117/* copyreg._extension_cache, {code: object} */
118static PyObject *extension_cache;
119
120/* XXX: Are these really nescessary? */
121/* As the name says, an empty tuple. */
122static PyObject *empty_tuple;
123/* For looking up name pairs in copyreg._extension_registry. */
124static PyObject *two_tuple;
125
126static int
127stack_underflow(void)
128{
129 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
130 return -1;
131}
132
133/* Internal data type used as the unpickling stack. */
134typedef struct {
135 PyObject_HEAD
136 int length; /* number of initial slots in data currently used */
137 int size; /* number of slots in data allocated */
138 PyObject **data;
139} Pdata;
140
141static void
142Pdata_dealloc(Pdata *self)
143{
144 int i;
145 PyObject **p;
146
147 for (i = self->length, p = self->data; --i >= 0; p++) {
148 Py_DECREF(*p);
149 }
150 if (self->data)
151 PyMem_Free(self->data);
152 PyObject_Del(self);
153}
154
155static PyTypeObject Pdata_Type = {
156 PyVarObject_HEAD_INIT(NULL, 0)
157 "_pickle.Pdata", /*tp_name*/
158 sizeof(Pdata), /*tp_basicsize*/
159 0, /*tp_itemsize*/
160 (destructor)Pdata_dealloc, /*tp_dealloc*/
161};
162
163static PyObject *
164Pdata_New(void)
165{
166 Pdata *self;
167
168 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
169 return NULL;
170 self->size = 8;
171 self->length = 0;
172 self->data = PyMem_Malloc(self->size * sizeof(PyObject *));
173 if (self->data)
174 return (PyObject *)self;
175 Py_DECREF(self);
176 return PyErr_NoMemory();
177}
178
179
180/* Retain only the initial clearto items. If clearto >= the current
181 * number of items, this is a (non-erroneous) NOP.
182 */
183static int
184Pdata_clear(Pdata *self, int clearto)
185{
186 int i;
187 PyObject **p;
188
189 if (clearto < 0)
190 return stack_underflow();
191 if (clearto >= self->length)
192 return 0;
193
194 for (i = self->length, p = self->data + clearto; --i >= clearto; p++) {
195 Py_CLEAR(*p);
196 }
197 self->length = clearto;
198
199 return 0;
200}
201
202static int
203Pdata_grow(Pdata *self)
204{
205 int bigger;
206 size_t nbytes;
207 PyObject **tmp;
208
209 bigger = (self->size << 1) + 1;
210 if (bigger <= 0) /* was 0, or new value overflows */
211 goto nomemory;
212 if ((int)(size_t)bigger != bigger)
213 goto nomemory;
214 nbytes = (size_t)bigger * sizeof(PyObject *);
215 if (nbytes / sizeof(PyObject *) != (size_t)bigger)
216 goto nomemory;
217 tmp = PyMem_Realloc(self->data, nbytes);
218 if (tmp == NULL)
219 goto nomemory;
220 self->data = tmp;
221 self->size = bigger;
222 return 0;
223
224 nomemory:
225 PyErr_NoMemory();
226 return -1;
227}
228
229/* D is a Pdata*. Pop the topmost element and store it into V, which
230 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
231 * is raised and V is set to NULL.
232 */
233static PyObject *
234Pdata_pop(Pdata *self)
235{
236 if (self->length == 0) {
237 PyErr_SetString(UnpicklingError, "bad pickle data");
238 return NULL;
239 }
240 return self->data[--(self->length)];
241}
242#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
243
244static int
245Pdata_push(Pdata *self, PyObject *obj)
246{
247 if (self->length == self->size && Pdata_grow(self) < 0) {
248 return -1;
249 }
250 self->data[self->length++] = obj;
251 return 0;
252}
253
254/* Push an object on stack, transferring its ownership to the stack. */
255#define PDATA_PUSH(D, O, ER) do { \
256 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
257
258/* Push an object on stack, adding a new reference to the object. */
259#define PDATA_APPEND(D, O, ER) do { \
260 Py_INCREF((O)); \
261 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
262
263static PyObject *
264Pdata_poptuple(Pdata *self, Py_ssize_t start)
265{
266 PyObject *tuple;
267 Py_ssize_t len, i, j;
268
269 len = self->length - start;
270 tuple = PyTuple_New(len);
271 if (tuple == NULL)
272 return NULL;
273 for (i = start, j = 0; j < len; i++, j++)
274 PyTuple_SET_ITEM(tuple, j, self->data[i]);
275
276 self->length = start;
277 return tuple;
278}
279
280static PyObject *
281Pdata_poplist(Pdata *self, Py_ssize_t start)
282{
283 PyObject *list;
284 Py_ssize_t len, i, j;
285
286 len = self->length - start;
287 list = PyList_New(len);
288 if (list == NULL)
289 return NULL;
290 for (i = start, j = 0; j < len; i++, j++)
291 PyList_SET_ITEM(list, j, self->data[i]);
292
293 self->length = start;
294 return list;
295}
296
297typedef struct PicklerObject {
298 PyObject_HEAD
299 PyObject *write; /* write() method of the output stream */
300 PyObject *memo; /* Memo dictionary, keep track of the seen
301 objects to support self-referential objects
302 pickling. */
303 PyObject *pers_func; /* persistent_id() method, can be NULL */
304 PyObject *arg;
305 int proto; /* Pickle protocol number, >= 0 */
306 int bin; /* Boolean, true if proto > 0 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000307 int buf_size; /* Size of the current buffered pickle data */
308 char *write_buf; /* Write buffer, this is to avoid calling the
309 write() method of the output stream too
310 often. */
311 int fast; /* Enable fast mode if set to a true value.
312 The fast mode disable the usage of memo,
313 therefore speeding the pickling process by
314 not generating superfluous PUT opcodes. It
315 should not be used if with self-referential
316 objects. */
317 int fast_nesting;
318 PyObject *fast_memo;
319} PicklerObject;
320
321typedef struct UnpicklerObject {
322 PyObject_HEAD
323 Pdata *stack; /* Pickle data stack, store unpickled objects. */
324 PyObject *readline; /* readline() method of the output stream */
325 PyObject *read; /* read() method of the output stream */
326 PyObject *memo; /* Memo dictionary, provide the objects stored
327 using the PUT opcodes. */
328 PyObject *arg;
329 PyObject *pers_func; /* persistent_load() method, can be NULL. */
330 PyObject *last_string; /* Reference to the last string read by the
331 readline() method. */
332 char *buffer; /* Reading buffer. */
333 char *encoding; /* Name of the encoding to be used for
334 decoding strings pickled using Python
335 2.x. The default value is "ASCII" */
336 char *errors; /* Name of errors handling scheme to used when
337 decoding strings. The default value is
338 "strict". */
339 int *marks; /* Mark stack, used for unpickling container
340 objects. */
341 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
342 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
343} UnpicklerObject;
344
345/* Forward declarations */
346static int save(PicklerObject *, PyObject *, int);
347static int save_reduce(PicklerObject *, PyObject *, PyObject *);
348static PyTypeObject Pickler_Type;
349static PyTypeObject Unpickler_Type;
350
351
352/* Helpers for creating the argument tuple passed to functions. This has the
353 performance advantage of calling PyTuple_New() only once. */
354
355#define ARG_TUP(self, obj) do { \
356 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
357 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
358 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
359 } \
360 else { \
361 Py_DECREF((obj)); \
362 } \
363 } while (0)
364
365#define FREE_ARG_TUP(self) do { \
366 if ((self)->arg->ob_refcnt > 1) \
367 Py_CLEAR((self)->arg); \
368 } while (0)
369
370/* A temporary cleaner API for fast single argument function call.
371
372 XXX: Does caching the argument tuple provides any real performance benefits?
373
374 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
375 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
376 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
377 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
378 (i.e, call PyTuple_New() and store the returned value in an array), to save
379 one second (wall clock time). Either ways, the loading time a pickle stream
380 large enough to generate this number of calls would be massively
381 overwhelmed by other factors, like I/O throughput, the GC traversal and
382 object allocation overhead. So, I really doubt these functions provide any
383 real benefits.
384
385 On the other hand, oprofile reports that pickle spends a lot of time in
386 these functions. But, that is probably more related to the function call
387 overhead, than the argument tuple allocation.
388
389 XXX: And, what is the reference behavior of these? Steal, borrow? At first
390 glance, it seems to steal the reference of 'arg' and borrow the reference
391 of 'func'.
392 */
393static PyObject *
394pickler_call(PicklerObject *self, PyObject *func, PyObject *arg)
395{
396 PyObject *result = NULL;
397
398 ARG_TUP(self, arg);
399 if (self->arg) {
400 result = PyObject_Call(func, self->arg, NULL);
401 FREE_ARG_TUP(self);
402 }
403 return result;
404}
405
406static PyObject *
407unpickler_call(UnpicklerObject *self, PyObject *func, PyObject *arg)
408{
409 PyObject *result = NULL;
410
411 ARG_TUP(self, arg);
412 if (self->arg) {
413 result = PyObject_Call(func, self->arg, NULL);
414 FREE_ARG_TUP(self);
415 }
416 return result;
417}
418
419static Py_ssize_t
420pickler_write(PicklerObject *self, const char *s, Py_ssize_t n)
421{
422 PyObject *data, *result;
423
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000424 if (self->write_buf == NULL) {
425 PyErr_SetString(PyExc_SystemError, "invalid write buffer");
426 return -1;
427 }
428
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000429 if (s == NULL) {
430 if (!(self->buf_size))
431 return 0;
432 data = PyBytes_FromStringAndSize(self->write_buf, self->buf_size);
433 if (data == NULL)
434 return -1;
435 }
436 else {
437 if (self->buf_size && (n + self->buf_size) > WRITE_BUF_SIZE) {
438 if (pickler_write(self, NULL, 0) < 0)
439 return -1;
440 }
441
442 if (n > WRITE_BUF_SIZE) {
443 if (!(data = PyBytes_FromStringAndSize(s, n)))
444 return -1;
445 }
446 else {
447 memcpy(self->write_buf + self->buf_size, s, n);
448 self->buf_size += n;
449 return n;
450 }
451 }
452
453 /* object with write method */
454 result = pickler_call(self, self->write, data);
455 if (result == NULL)
456 return -1;
457
458 Py_DECREF(result);
459 self->buf_size = 0;
460 return n;
461}
462
463/* XXX: These read/readline functions ought to be optimized. Buffered I/O
464 might help a lot, especially with the new (but much slower) io library.
465 On the other hand, the added complexity might not worth it.
466 */
467
468/* Read at least n characters from the input stream and set s to the current
469 reading position. */
470static Py_ssize_t
471unpickler_read(UnpicklerObject *self, char **s, Py_ssize_t n)
472{
473 PyObject *len;
474 PyObject *data;
475
476 len = PyLong_FromSsize_t(n);
477 if (len == NULL)
478 return -1;
479
480 data = unpickler_call(self, self->read, len);
481 if (data == NULL)
482 return -1;
483
484 /* XXX: Should bytearray be supported too? */
485 if (!PyBytes_Check(data)) {
486 PyErr_SetString(PyExc_ValueError,
487 "read() from the underlying stream did not"
488 "return bytes");
489 return -1;
490 }
491
492 Py_XDECREF(self->last_string);
493 self->last_string = data;
494
495 if (!(*s = PyBytes_AS_STRING(data)))
496 return -1;
497
498 return n;
499}
500
501static Py_ssize_t
502unpickler_readline(UnpicklerObject *self, char **s)
503{
504 PyObject *data;
505
506 data = PyObject_CallObject(self->readline, empty_tuple);
507 if (data == NULL)
508 return -1;
509
510 /* XXX: Should bytearray be supported too? */
511 if (!PyBytes_Check(data)) {
512 PyErr_SetString(PyExc_ValueError,
513 "readline() from the underlying stream did not"
514 "return bytes");
515 return -1;
516 }
517
518 Py_XDECREF(self->last_string);
519 self->last_string = data;
520
521 if (!(*s = PyBytes_AS_STRING(data)))
522 return -1;
523
524 return PyBytes_GET_SIZE(data);
525}
526
527/* Generate a GET opcode for an object stored in the memo. The 'key' argument
528 should be the address of the object as returned by PyLong_FromVoidPtr(). */
529static int
530memo_get(PicklerObject *self, PyObject *key)
531{
532 PyObject *value;
533 PyObject *memo_id;
534 long x;
535 char pdata[30];
536 int len;
537
538 value = PyDict_GetItemWithError(self->memo, key);
539 if (value == NULL) {
540 if (!PyErr_Occurred())
541 PyErr_SetObject(PyExc_KeyError, key);
542 return -1;
543 }
544
545 memo_id = PyTuple_GetItem(value, 0);
546 if (memo_id == NULL)
547 return -1;
548
549 if (!PyLong_Check(memo_id)) {
550 PyErr_SetString(PicklingError, "memo id must be an integer");
551 return -1;
552 }
553 x = PyLong_AsLong(memo_id);
554 if (x == -1 && PyErr_Occurred())
555 return -1;
556
557 if (!self->bin) {
558 pdata[0] = GET;
559 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
560 len = (int)strlen(pdata);
561 }
562 else {
563 if (x < 256) {
564 pdata[0] = BINGET;
565 pdata[1] = (unsigned char)(x & 0xff);
566 len = 2;
567 }
568 else if (x <= 0xffffffffL) {
569 pdata[0] = LONG_BINGET;
570 pdata[1] = (unsigned char)(x & 0xff);
571 pdata[2] = (unsigned char)((x >> 8) & 0xff);
572 pdata[3] = (unsigned char)((x >> 16) & 0xff);
573 pdata[4] = (unsigned char)((x >> 24) & 0xff);
574 len = 5;
575 }
576 else { /* unlikely */
577 PyErr_SetString(PicklingError,
578 "memo id too large for LONG_BINGET");
579 return -1;
580 }
581 }
582
583 if (pickler_write(self, pdata, len) < 0)
584 return -1;
585
586 return 0;
587}
588
589/* Store an object in the memo, assign it a new unique ID based on the number
590 of objects currently stored in the memo and generate a PUT opcode. */
591static int
592memo_put(PicklerObject *self, PyObject *obj)
593{
594 PyObject *key = NULL;
595 PyObject *memo_id = NULL;
596 PyObject *tuple = NULL;
597 long x;
598 char pdata[30];
599 int len;
600 int status = 0;
601
602 if (self->fast)
603 return 0;
604
605 key = PyLong_FromVoidPtr(obj);
606 if (key == NULL)
607 goto error;
608 if ((x = PyDict_Size(self->memo)) < 0)
609 goto error;
610 memo_id = PyLong_FromLong(x);
611 if (memo_id == NULL)
612 goto error;
613 tuple = PyTuple_New(2);
614 if (tuple == NULL)
615 goto error;
616
617 Py_INCREF(memo_id);
618 PyTuple_SET_ITEM(tuple, 0, memo_id);
619 Py_INCREF(obj);
620 PyTuple_SET_ITEM(tuple, 1, obj);
621 if (PyDict_SetItem(self->memo, key, tuple) < 0)
622 goto error;
623
624 if (!self->bin) {
625 pdata[0] = PUT;
626 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
627 len = strlen(pdata);
628 }
629 else {
630 if (x < 256) {
631 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +0000632 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000633 len = 2;
634 }
635 else if (x <= 0xffffffffL) {
636 pdata[0] = LONG_BINPUT;
637 pdata[1] = (unsigned char)(x & 0xff);
638 pdata[2] = (unsigned char)((x >> 8) & 0xff);
639 pdata[3] = (unsigned char)((x >> 16) & 0xff);
640 pdata[4] = (unsigned char)((x >> 24) & 0xff);
641 len = 5;
642 }
643 else { /* unlikely */
644 PyErr_SetString(PicklingError,
645 "memo id too large for LONG_BINPUT");
646 return -1;
647 }
648 }
649
650 if (pickler_write(self, pdata, len) < 0)
651 goto error;
652
653 if (0) {
654 error:
655 status = -1;
656 }
657
658 Py_XDECREF(key);
659 Py_XDECREF(memo_id);
660 Py_XDECREF(tuple);
661
662 return status;
663}
664
665static PyObject *
666whichmodule(PyObject *global, PyObject *global_name)
667{
668 Py_ssize_t i, j;
669 static PyObject *module_str = NULL;
670 static PyObject *main_str = NULL;
671 PyObject *module_name;
672 PyObject *modules_dict;
673 PyObject *module;
674 PyObject *obj;
675
676 if (module_str == NULL) {
677 module_str = PyUnicode_InternFromString("__module__");
678 if (module_str == NULL)
679 return NULL;
680 main_str = PyUnicode_InternFromString("__main__");
681 if (main_str == NULL)
682 return NULL;
683 }
684
685 module_name = PyObject_GetAttr(global, module_str);
686
687 /* In some rare cases (e.g., random.getrandbits), __module__ can be
688 None. If it is so, then search sys.modules for the module of
689 global. */
690 if (module_name == Py_None) {
691 Py_DECREF(module_name);
692 goto search;
693 }
694
695 if (module_name) {
696 return module_name;
697 }
698 if (PyErr_ExceptionMatches(PyExc_AttributeError))
699 PyErr_Clear();
700 else
701 return NULL;
702
703 search:
704 modules_dict = PySys_GetObject("modules");
705 if (modules_dict == NULL)
706 return NULL;
707
708 i = 0;
709 module_name = NULL;
710 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
711 if (PyObject_Compare(module_name, main_str) == 0)
712 continue;
713
714 obj = PyObject_GetAttr(module, global_name);
715 if (obj == NULL) {
716 if (PyErr_ExceptionMatches(PyExc_AttributeError))
717 PyErr_Clear();
718 else
719 return NULL;
720 continue;
721 }
722
723 if (obj != global) {
724 Py_DECREF(obj);
725 continue;
726 }
727
728 Py_DECREF(obj);
729 break;
730 }
731
732 /* If no module is found, use __main__. */
733 if (!j) {
734 module_name = main_str;
735 }
736
737 Py_INCREF(module_name);
738 return module_name;
739}
740
741/* fast_save_enter() and fast_save_leave() are guards against recursive
742 objects when Pickler is used with the "fast mode" (i.e., with object
743 memoization disabled). If the nesting of a list or dict object exceed
744 FAST_NESTING_LIMIT, these guards will start keeping an internal
745 reference to the seen list or dict objects and check whether these objects
746 are recursive. These are not strictly necessary, since save() has a
747 hard-coded recursion limit, but they give a nicer error message than the
748 typical RuntimeError. */
749static int
750fast_save_enter(PicklerObject *self, PyObject *obj)
751{
752 /* if fast_nesting < 0, we're doing an error exit. */
753 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
754 PyObject *key = NULL;
755 if (self->fast_memo == NULL) {
756 self->fast_memo = PyDict_New();
757 if (self->fast_memo == NULL) {
758 self->fast_nesting = -1;
759 return 0;
760 }
761 }
762 key = PyLong_FromVoidPtr(obj);
763 if (key == NULL)
764 return 0;
765 if (PyDict_GetItem(self->fast_memo, key)) {
766 Py_DECREF(key);
767 PyErr_Format(PyExc_ValueError,
768 "fast mode: can't pickle cyclic objects "
769 "including object type %.200s at %p",
770 obj->ob_type->tp_name, obj);
771 self->fast_nesting = -1;
772 return 0;
773 }
774 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
775 Py_DECREF(key);
776 self->fast_nesting = -1;
777 return 0;
778 }
779 Py_DECREF(key);
780 }
781 return 1;
782}
783
784static int
785fast_save_leave(PicklerObject *self, PyObject *obj)
786{
787 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
788 PyObject *key = PyLong_FromVoidPtr(obj);
789 if (key == NULL)
790 return 0;
791 if (PyDict_DelItem(self->fast_memo, key) < 0) {
792 Py_DECREF(key);
793 return 0;
794 }
795 Py_DECREF(key);
796 }
797 return 1;
798}
799
800static int
801save_none(PicklerObject *self, PyObject *obj)
802{
803 const char none_op = NONE;
804 if (pickler_write(self, &none_op, 1) < 0)
805 return -1;
806
807 return 0;
808}
809
810static int
811save_bool(PicklerObject *self, PyObject *obj)
812{
813 static const char *buf[2] = { FALSE, TRUE };
814 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
815 int p = (obj == Py_True);
816
817 if (self->proto >= 2) {
818 const char bool_op = p ? NEWTRUE : NEWFALSE;
819 if (pickler_write(self, &bool_op, 1) < 0)
820 return -1;
821 }
822 else if (pickler_write(self, buf[p], len[p]) < 0)
823 return -1;
824
825 return 0;
826}
827
828static int
829save_int(PicklerObject *self, long x)
830{
831 char pdata[32];
832 int len = 0;
833
834 if (!self->bin
835#if SIZEOF_LONG > 4
836 || x > 0x7fffffffL || x < -0x80000000L
837#endif
838 ) {
839 /* Text-mode pickle, or long too big to fit in the 4-byte
840 * signed BININT format: store as a string.
841 */
842 pdata[0] = LONG; /* use LONG for consistence with pickle.py */
843 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
844 if (pickler_write(self, pdata, strlen(pdata)) < 0)
845 return -1;
846 }
847 else {
848 /* Binary pickle and x fits in a signed 4-byte int. */
849 pdata[1] = (unsigned char)(x & 0xff);
850 pdata[2] = (unsigned char)((x >> 8) & 0xff);
851 pdata[3] = (unsigned char)((x >> 16) & 0xff);
852 pdata[4] = (unsigned char)((x >> 24) & 0xff);
853
854 if ((pdata[4] == 0) && (pdata[3] == 0)) {
855 if (pdata[2] == 0) {
856 pdata[0] = BININT1;
857 len = 2;
858 }
859 else {
860 pdata[0] = BININT2;
861 len = 3;
862 }
863 }
864 else {
865 pdata[0] = BININT;
866 len = 5;
867 }
868
869 if (pickler_write(self, pdata, len) < 0)
870 return -1;
871 }
872
873 return 0;
874}
875
876static int
877save_long(PicklerObject *self, PyObject *obj)
878{
879 PyObject *repr = NULL;
880 Py_ssize_t size;
881 long val = PyLong_AsLong(obj);
882 int status = 0;
883
884 const char long_op = LONG;
885
886 if (val == -1 && PyErr_Occurred()) {
887 /* out of range for int pickling */
888 PyErr_Clear();
889 }
890 else
891 return save_int(self, val);
892
893 if (self->proto >= 2) {
894 /* Linear-time pickling. */
895 size_t nbits;
896 size_t nbytes;
897 unsigned char *pdata;
898 char header[5];
899 int i;
900 int sign = _PyLong_Sign(obj);
901
902 if (sign == 0) {
903 header[0] = LONG1;
904 header[1] = 0; /* It's 0 -- an empty bytestring. */
905 if (pickler_write(self, header, 2) < 0)
906 goto error;
907 return 0;
908 }
909 nbits = _PyLong_NumBits(obj);
910 if (nbits == (size_t)-1 && PyErr_Occurred())
911 goto error;
912 /* How many bytes do we need? There are nbits >> 3 full
913 * bytes of data, and nbits & 7 leftover bits. If there
914 * are any leftover bits, then we clearly need another
915 * byte. Wnat's not so obvious is that we *probably*
916 * need another byte even if there aren't any leftovers:
917 * the most-significant bit of the most-significant byte
918 * acts like a sign bit, and it's usually got a sense
919 * opposite of the one we need. The exception is longs
920 * of the form -(2**(8*j-1)) for j > 0. Such a long is
921 * its own 256's-complement, so has the right sign bit
922 * even without the extra byte. That's a pain to check
923 * for in advance, though, so we always grab an extra
924 * byte at the start, and cut it back later if possible.
925 */
926 nbytes = (nbits >> 3) + 1;
927 if (nbytes > INT_MAX) {
928 PyErr_SetString(PyExc_OverflowError,
929 "long too large to pickle");
930 goto error;
931 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +0000932 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000933 if (repr == NULL)
934 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +0000935 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000936 i = _PyLong_AsByteArray((PyLongObject *)obj,
937 pdata, nbytes,
938 1 /* little endian */ , 1 /* signed */ );
939 if (i < 0)
940 goto error;
941 /* If the long is negative, this may be a byte more than
942 * needed. This is so iff the MSB is all redundant sign
943 * bits.
944 */
945 if (sign < 0 &&
946 nbytes > 1 &&
947 pdata[nbytes - 1] == 0xff &&
948 (pdata[nbytes - 2] & 0x80) != 0) {
949 nbytes--;
950 }
951
952 if (nbytes < 256) {
953 header[0] = LONG1;
954 header[1] = (unsigned char)nbytes;
955 size = 2;
956 }
957 else {
958 header[0] = LONG4;
959 size = (int)nbytes;
960 for (i = 1; i < 5; i++) {
961 header[i] = (unsigned char)(size & 0xff);
962 size >>= 8;
963 }
964 size = 5;
965 }
966 if (pickler_write(self, header, size) < 0 ||
967 pickler_write(self, (char *)pdata, (int)nbytes) < 0)
968 goto error;
969 }
970 else {
971 char *string;
972
973 /* proto < 2: write the repr and newline. This is quadratic-time
974 (in the number of digits), in both directions. */
975
976 repr = PyObject_Repr(obj);
977 if (repr == NULL)
978 goto error;
979
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +0000980 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000981 if (string == NULL)
982 goto error;
983
984 if (pickler_write(self, &long_op, 1) < 0 ||
985 pickler_write(self, string, size) < 0 ||
986 pickler_write(self, "\n", 1) < 0)
987 goto error;
988 }
989
990 if (0) {
991 error:
992 status = -1;
993 }
994 Py_XDECREF(repr);
995
996 return status;
997}
998
999static int
1000save_float(PicklerObject *self, PyObject *obj)
1001{
1002 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1003
1004 if (self->bin) {
1005 char pdata[9];
1006 pdata[0] = BINFLOAT;
1007 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1008 return -1;
1009 if (pickler_write(self, pdata, 9) < 0)
1010 return -1;
1011 }
1012 else {
1013 char pdata[250];
1014 pdata[0] = FLOAT;
1015 PyOS_ascii_formatd(pdata + 1, sizeof(pdata) - 2, "%.17g", x);
1016 /* Extend the formatted string with a newline character */
1017 strcat(pdata, "\n");
1018
1019 if (pickler_write(self, pdata, strlen(pdata)) < 0)
1020 return -1;
1021 }
1022
1023 return 0;
1024}
1025
1026static int
1027save_bytes(PicklerObject *self, PyObject *obj)
1028{
1029 if (self->proto < 3) {
1030 /* Older pickle protocols do not have an opcode for pickling bytes
1031 objects. Therefore, we need to fake the copy protocol (i.e.,
1032 the __reduce__ method) to permit bytes object unpickling. */
1033 PyObject *reduce_value = NULL;
1034 PyObject *bytelist = NULL;
1035 int status;
1036
1037 bytelist = PySequence_List(obj);
1038 if (bytelist == NULL)
1039 return -1;
1040
1041 reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
1042 bytelist);
1043 if (reduce_value == NULL) {
1044 Py_DECREF(bytelist);
1045 return -1;
1046 }
1047
1048 /* save_reduce() will memoize the object automatically. */
1049 status = save_reduce(self, reduce_value, obj);
1050 Py_DECREF(reduce_value);
1051 Py_DECREF(bytelist);
1052 return status;
1053 }
1054 else {
1055 Py_ssize_t size;
1056 char header[5];
1057 int len;
1058
1059 size = PyBytes_Size(obj);
1060 if (size < 0)
1061 return -1;
1062
1063 if (size < 256) {
1064 header[0] = SHORT_BINBYTES;
1065 header[1] = (unsigned char)size;
1066 len = 2;
1067 }
1068 else if (size <= 0xffffffffL) {
1069 header[0] = BINBYTES;
1070 header[1] = (unsigned char)(size & 0xff);
1071 header[2] = (unsigned char)((size >> 8) & 0xff);
1072 header[3] = (unsigned char)((size >> 16) & 0xff);
1073 header[4] = (unsigned char)((size >> 24) & 0xff);
1074 len = 5;
1075 }
1076 else {
1077 return -1; /* string too large */
1078 }
1079
1080 if (pickler_write(self, header, len) < 0)
1081 return -1;
1082
1083 if (pickler_write(self, PyBytes_AS_STRING(obj), size) < 0)
1084 return -1;
1085
1086 if (memo_put(self, obj) < 0)
1087 return -1;
1088
1089 return 0;
1090 }
1091}
1092
1093/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1094 backslash and newline characters to \uXXXX escapes. */
1095static PyObject *
1096raw_unicode_escape(const Py_UNICODE *s, Py_ssize_t size)
1097{
1098 PyObject *repr, *result;
1099 char *p;
1100 char *q;
1101
1102 static const char *hexdigits = "0123456789abcdef";
1103
1104#ifdef Py_UNICODE_WIDE
1105 repr = PyBytes_FromStringAndSize(NULL, 10 * size);
1106#else
1107 repr = PyBytes_FromStringAndSize(NULL, 6 * size);
1108#endif
1109 if (repr == NULL)
1110 return NULL;
1111 if (size == 0)
1112 goto done;
1113
1114 p = q = PyBytes_AS_STRING(repr);
1115 while (size-- > 0) {
1116 Py_UNICODE ch = *s++;
1117#ifdef Py_UNICODE_WIDE
1118 /* Map 32-bit characters to '\Uxxxxxxxx' */
1119 if (ch >= 0x10000) {
1120 *p++ = '\\';
1121 *p++ = 'U';
1122 *p++ = hexdigits[(ch >> 28) & 0xf];
1123 *p++ = hexdigits[(ch >> 24) & 0xf];
1124 *p++ = hexdigits[(ch >> 20) & 0xf];
1125 *p++ = hexdigits[(ch >> 16) & 0xf];
1126 *p++ = hexdigits[(ch >> 12) & 0xf];
1127 *p++ = hexdigits[(ch >> 8) & 0xf];
1128 *p++ = hexdigits[(ch >> 4) & 0xf];
1129 *p++ = hexdigits[ch & 15];
1130 }
1131 else
1132#endif
1133 /* Map 16-bit characters to '\uxxxx' */
1134 if (ch >= 256 || ch == '\\' || ch == '\n') {
1135 *p++ = '\\';
1136 *p++ = 'u';
1137 *p++ = hexdigits[(ch >> 12) & 0xf];
1138 *p++ = hexdigits[(ch >> 8) & 0xf];
1139 *p++ = hexdigits[(ch >> 4) & 0xf];
1140 *p++ = hexdigits[ch & 15];
1141 }
1142 /* Copy everything else as-is */
1143 else
1144 *p++ = (char) ch;
1145 }
1146 size = p - q;
1147
1148 done:
1149 result = PyBytes_FromStringAndSize(PyBytes_AS_STRING(repr), size);
1150 Py_DECREF(repr);
1151 return result;
1152}
1153
1154static int
1155save_unicode(PicklerObject *self, PyObject *obj)
1156{
1157 Py_ssize_t size;
1158 PyObject *encoded = NULL;
1159
1160 if (self->bin) {
1161 char pdata[5];
1162
1163 encoded = PyUnicode_AsUTF8String(obj);
1164 if (encoded == NULL)
1165 goto error;
1166
1167 size = PyBytes_GET_SIZE(encoded);
1168 if (size < 0 || size > 0xffffffffL)
1169 goto error; /* string too large */
1170
1171 pdata[0] = BINUNICODE;
1172 pdata[1] = (unsigned char)(size & 0xff);
1173 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1174 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1175 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1176
1177 if (pickler_write(self, pdata, 5) < 0)
1178 goto error;
1179
1180 if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
1181 goto error;
1182 }
1183 else {
1184 const char unicode_op = UNICODE;
1185
1186 encoded = raw_unicode_escape(PyUnicode_AS_UNICODE(obj),
1187 PyUnicode_GET_SIZE(obj));
1188 if (encoded == NULL)
1189 goto error;
1190
1191 if (pickler_write(self, &unicode_op, 1) < 0)
1192 goto error;
1193
1194 size = PyBytes_GET_SIZE(encoded);
1195 if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
1196 goto error;
1197
1198 if (pickler_write(self, "\n", 1) < 0)
1199 goto error;
1200 }
1201 if (memo_put(self, obj) < 0)
1202 goto error;
1203
1204 Py_DECREF(encoded);
1205 return 0;
1206
1207 error:
1208 Py_XDECREF(encoded);
1209 return -1;
1210}
1211
1212/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1213static int
1214store_tuple_elements(PicklerObject *self, PyObject *t, int len)
1215{
1216 int i;
1217
1218 assert(PyTuple_Size(t) == len);
1219
1220 for (i = 0; i < len; i++) {
1221 PyObject *element = PyTuple_GET_ITEM(t, i);
1222
1223 if (element == NULL)
1224 return -1;
1225 if (save(self, element, 0) < 0)
1226 return -1;
1227 }
1228
1229 return 0;
1230}
1231
1232/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1233 * used across protocols to minimize the space needed to pickle them.
1234 * Tuples are also the only builtin immutable type that can be recursive
1235 * (a tuple can be reached from itself), and that requires some subtle
1236 * magic so that it works in all cases. IOW, this is a long routine.
1237 */
1238static int
1239save_tuple(PicklerObject *self, PyObject *obj)
1240{
1241 PyObject *memo_key = NULL;
1242 int len, i;
1243 int status = 0;
1244
1245 const char mark_op = MARK;
1246 const char tuple_op = TUPLE;
1247 const char pop_op = POP;
1248 const char pop_mark_op = POP_MARK;
1249 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
1250
1251 if ((len = PyTuple_Size(obj)) < 0)
1252 return -1;
1253
1254 if (len == 0) {
1255 char pdata[2];
1256
1257 if (self->proto) {
1258 pdata[0] = EMPTY_TUPLE;
1259 len = 1;
1260 }
1261 else {
1262 pdata[0] = MARK;
1263 pdata[1] = TUPLE;
1264 len = 2;
1265 }
1266 if (pickler_write(self, pdata, len) < 0)
1267 return -1;
1268 return 0;
1269 }
1270
1271 /* id(tuple) isn't in the memo now. If it shows up there after
1272 * saving the tuple elements, the tuple must be recursive, in
1273 * which case we'll pop everything we put on the stack, and fetch
1274 * its value from the memo.
1275 */
1276 memo_key = PyLong_FromVoidPtr(obj);
1277 if (memo_key == NULL)
1278 return -1;
1279
1280 if (len <= 3 && self->proto >= 2) {
1281 /* Use TUPLE{1,2,3} opcodes. */
1282 if (store_tuple_elements(self, obj, len) < 0)
1283 goto error;
1284
1285 if (PyDict_GetItem(self->memo, memo_key)) {
1286 /* pop the len elements */
1287 for (i = 0; i < len; i++)
1288 if (pickler_write(self, &pop_op, 1) < 0)
1289 goto error;
1290 /* fetch from memo */
1291 if (memo_get(self, memo_key) < 0)
1292 goto error;
1293
1294 Py_DECREF(memo_key);
1295 return 0;
1296 }
1297 else { /* Not recursive. */
1298 if (pickler_write(self, len2opcode + len, 1) < 0)
1299 goto error;
1300 }
1301 goto memoize;
1302 }
1303
1304 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
1305 * Generate MARK e1 e2 ... TUPLE
1306 */
1307 if (pickler_write(self, &mark_op, 1) < 0)
1308 goto error;
1309
1310 if (store_tuple_elements(self, obj, len) < 0)
1311 goto error;
1312
1313 if (PyDict_GetItem(self->memo, memo_key)) {
1314 /* pop the stack stuff we pushed */
1315 if (self->bin) {
1316 if (pickler_write(self, &pop_mark_op, 1) < 0)
1317 goto error;
1318 }
1319 else {
1320 /* Note that we pop one more than len, to remove
1321 * the MARK too.
1322 */
1323 for (i = 0; i <= len; i++)
1324 if (pickler_write(self, &pop_op, 1) < 0)
1325 goto error;
1326 }
1327 /* fetch from memo */
1328 if (memo_get(self, memo_key) < 0)
1329 goto error;
1330
1331 Py_DECREF(memo_key);
1332 return 0;
1333 }
1334 else { /* Not recursive. */
1335 if (pickler_write(self, &tuple_op, 1) < 0)
1336 goto error;
1337 }
1338
1339 memoize:
1340 if (memo_put(self, obj) < 0)
1341 goto error;
1342
1343 if (0) {
1344 error:
1345 status = -1;
1346 }
1347
1348 Py_DECREF(memo_key);
1349 return status;
1350}
1351
1352/* iter is an iterator giving items, and we batch up chunks of
1353 * MARK item item ... item APPENDS
1354 * opcode sequences. Calling code should have arranged to first create an
1355 * empty list, or list-like object, for the APPENDS to operate on.
1356 * Returns 0 on success, <0 on error.
1357 */
1358static int
1359batch_list(PicklerObject *self, PyObject *iter)
1360{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001361 PyObject *obj = NULL;
1362 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001363 int i, n;
1364
1365 const char mark_op = MARK;
1366 const char append_op = APPEND;
1367 const char appends_op = APPENDS;
1368
1369 assert(iter != NULL);
1370
1371 /* XXX: I think this function could be made faster by avoiding the
1372 iterator interface and fetching objects directly from list using
1373 PyList_GET_ITEM.
1374 */
1375
1376 if (self->proto == 0) {
1377 /* APPENDS isn't available; do one at a time. */
1378 for (;;) {
1379 obj = PyIter_Next(iter);
1380 if (obj == NULL) {
1381 if (PyErr_Occurred())
1382 return -1;
1383 break;
1384 }
1385 i = save(self, obj, 0);
1386 Py_DECREF(obj);
1387 if (i < 0)
1388 return -1;
1389 if (pickler_write(self, &append_op, 1) < 0)
1390 return -1;
1391 }
1392 return 0;
1393 }
1394
1395 /* proto > 0: write in batches of BATCHSIZE. */
1396 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001397 /* Get first item */
1398 firstitem = PyIter_Next(iter);
1399 if (firstitem == NULL) {
1400 if (PyErr_Occurred())
1401 goto error;
1402
1403 /* nothing more to add */
1404 break;
1405 }
1406
1407 /* Try to get a second item */
1408 obj = PyIter_Next(iter);
1409 if (obj == NULL) {
1410 if (PyErr_Occurred())
1411 goto error;
1412
1413 /* Only one item to write */
1414 if (save(self, firstitem, 0) < 0)
1415 goto error;
1416 if (pickler_write(self, &append_op, 1) < 0)
1417 goto error;
1418 Py_CLEAR(firstitem);
1419 break;
1420 }
1421
1422 /* More than one item to write */
1423
1424 /* Pump out MARK, items, APPENDS. */
1425 if (pickler_write(self, &mark_op, 1) < 0)
1426 goto error;
1427
1428 if (save(self, firstitem, 0) < 0)
1429 goto error;
1430 Py_CLEAR(firstitem);
1431 n = 1;
1432
1433 /* Fetch and save up to BATCHSIZE items */
1434 while (obj) {
1435 if (save(self, obj, 0) < 0)
1436 goto error;
1437 Py_CLEAR(obj);
1438 n += 1;
1439
1440 if (n == BATCHSIZE)
1441 break;
1442
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001443 obj = PyIter_Next(iter);
1444 if (obj == NULL) {
1445 if (PyErr_Occurred())
1446 goto error;
1447 break;
1448 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001449 }
1450
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001451 if (pickler_write(self, &appends_op, 1) < 0)
1452 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001453
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001454 } while (n == BATCHSIZE);
1455 return 0;
1456
1457 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001458 Py_XDECREF(firstitem);
1459 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001460 return -1;
1461}
1462
1463static int
1464save_list(PicklerObject *self, PyObject *obj)
1465{
1466 PyObject *iter;
1467 char header[3];
1468 int len;
1469 int status = 0;
1470
1471 if (self->fast && !fast_save_enter(self, obj))
1472 goto error;
1473
1474 /* Create an empty list. */
1475 if (self->bin) {
1476 header[0] = EMPTY_LIST;
1477 len = 1;
1478 }
1479 else {
1480 header[0] = MARK;
1481 header[1] = LIST;
1482 len = 2;
1483 }
1484
1485 if (pickler_write(self, header, len) < 0)
1486 goto error;
1487
1488 /* Get list length, and bow out early if empty. */
1489 if ((len = PyList_Size(obj)) < 0)
1490 goto error;
1491
1492 if (memo_put(self, obj) < 0)
1493 goto error;
1494
1495 if (len != 0) {
1496 /* Save the list elements. */
1497 iter = PyObject_GetIter(obj);
1498 if (iter == NULL)
1499 goto error;
1500 status = batch_list(self, iter);
1501 Py_DECREF(iter);
1502 }
1503
1504 if (0) {
1505 error:
1506 status = -1;
1507 }
1508
1509 if (self->fast && !fast_save_leave(self, obj))
1510 status = -1;
1511
1512 return status;
1513}
1514
1515/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
1516 * MARK key value ... key value SETITEMS
1517 * opcode sequences. Calling code should have arranged to first create an
1518 * empty dict, or dict-like object, for the SETITEMS to operate on.
1519 * Returns 0 on success, <0 on error.
1520 *
1521 * This is very much like batch_list(). The difference between saving
1522 * elements directly, and picking apart two-tuples, is so long-winded at
1523 * the C level, though, that attempts to combine these routines were too
1524 * ugly to bear.
1525 */
1526static int
1527batch_dict(PicklerObject *self, PyObject *iter)
1528{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001529 PyObject *obj = NULL;
1530 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001531 int i, n;
1532
1533 const char mark_op = MARK;
1534 const char setitem_op = SETITEM;
1535 const char setitems_op = SETITEMS;
1536
1537 assert(iter != NULL);
1538
1539 if (self->proto == 0) {
1540 /* SETITEMS isn't available; do one at a time. */
1541 for (;;) {
1542 obj = PyIter_Next(iter);
1543 if (obj == NULL) {
1544 if (PyErr_Occurred())
1545 return -1;
1546 break;
1547 }
1548 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
1549 PyErr_SetString(PyExc_TypeError, "dict items "
1550 "iterator must return 2-tuples");
1551 return -1;
1552 }
1553 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
1554 if (i >= 0)
1555 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
1556 Py_DECREF(obj);
1557 if (i < 0)
1558 return -1;
1559 if (pickler_write(self, &setitem_op, 1) < 0)
1560 return -1;
1561 }
1562 return 0;
1563 }
1564
1565 /* proto > 0: write in batches of BATCHSIZE. */
1566 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001567 /* Get first item */
1568 firstitem = PyIter_Next(iter);
1569 if (firstitem == NULL) {
1570 if (PyErr_Occurred())
1571 goto error;
1572
1573 /* nothing more to add */
1574 break;
1575 }
1576 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
1577 PyErr_SetString(PyExc_TypeError, "dict items "
1578 "iterator must return 2-tuples");
1579 goto error;
1580 }
1581
1582 /* Try to get a second item */
1583 obj = PyIter_Next(iter);
1584 if (obj == NULL) {
1585 if (PyErr_Occurred())
1586 goto error;
1587
1588 /* Only one item to write */
1589 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
1590 goto error;
1591 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
1592 goto error;
1593 if (pickler_write(self, &setitem_op, 1) < 0)
1594 goto error;
1595 Py_CLEAR(firstitem);
1596 break;
1597 }
1598
1599 /* More than one item to write */
1600
1601 /* Pump out MARK, items, SETITEMS. */
1602 if (pickler_write(self, &mark_op, 1) < 0)
1603 goto error;
1604
1605 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
1606 goto error;
1607 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
1608 goto error;
1609 Py_CLEAR(firstitem);
1610 n = 1;
1611
1612 /* Fetch and save up to BATCHSIZE items */
1613 while (obj) {
1614 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
1615 PyErr_SetString(PyExc_TypeError, "dict items "
1616 "iterator must return 2-tuples");
1617 goto error;
1618 }
1619 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
1620 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
1621 goto error;
1622 Py_CLEAR(obj);
1623 n += 1;
1624
1625 if (n == BATCHSIZE)
1626 break;
1627
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001628 obj = PyIter_Next(iter);
1629 if (obj == NULL) {
1630 if (PyErr_Occurred())
1631 goto error;
1632 break;
1633 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001634 }
1635
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001636 if (pickler_write(self, &setitems_op, 1) < 0)
1637 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001638
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001639 } while (n == BATCHSIZE);
1640 return 0;
1641
1642 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001643 Py_XDECREF(firstitem);
1644 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001645 return -1;
1646}
1647
1648static int
1649save_dict(PicklerObject *self, PyObject *obj)
1650{
1651 PyObject *items, *iter;
1652 char header[3];
1653 int len;
1654 int status = 0;
1655
1656 if (self->fast && !fast_save_enter(self, obj))
1657 goto error;
1658
1659 /* Create an empty dict. */
1660 if (self->bin) {
1661 header[0] = EMPTY_DICT;
1662 len = 1;
1663 }
1664 else {
1665 header[0] = MARK;
1666 header[1] = DICT;
1667 len = 2;
1668 }
1669
1670 if (pickler_write(self, header, len) < 0)
1671 goto error;
1672
1673 /* Get dict size, and bow out early if empty. */
1674 if ((len = PyDict_Size(obj)) < 0)
1675 goto error;
1676
1677 if (memo_put(self, obj) < 0)
1678 goto error;
1679
1680 if (len != 0) {
1681 /* Save the dict items. */
1682 items = PyObject_CallMethod(obj, "items", "()");
1683 if (items == NULL)
1684 goto error;
1685 iter = PyObject_GetIter(items);
1686 Py_DECREF(items);
1687 if (iter == NULL)
1688 goto error;
1689 status = batch_dict(self, iter);
1690 Py_DECREF(iter);
1691 }
1692
1693 if (0) {
1694 error:
1695 status = -1;
1696 }
1697
1698 if (self->fast && !fast_save_leave(self, obj))
1699 status = -1;
1700
1701 return status;
1702}
1703
1704static int
1705save_global(PicklerObject *self, PyObject *obj, PyObject *name)
1706{
1707 static PyObject *name_str = NULL;
1708 PyObject *global_name = NULL;
1709 PyObject *module_name = NULL;
1710 PyObject *module = NULL;
1711 PyObject *cls;
1712 int status = 0;
1713
1714 const char global_op = GLOBAL;
1715
1716 if (name_str == NULL) {
1717 name_str = PyUnicode_InternFromString("__name__");
1718 if (name_str == NULL)
1719 goto error;
1720 }
1721
1722 if (name) {
1723 global_name = name;
1724 Py_INCREF(global_name);
1725 }
1726 else {
1727 global_name = PyObject_GetAttr(obj, name_str);
1728 if (global_name == NULL)
1729 goto error;
1730 }
1731
1732 module_name = whichmodule(obj, global_name);
1733 if (module_name == NULL)
1734 goto error;
1735
1736 /* XXX: Change to use the import C API directly with level=0 to disallow
1737 relative imports.
1738
1739 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
1740 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
1741 custom import functions (IMHO, this would be a nice security
1742 feature). The import C API would need to be extended to support the
1743 extra parameters of __import__ to fix that. */
1744 module = PyImport_Import(module_name);
1745 if (module == NULL) {
1746 PyErr_Format(PicklingError,
1747 "Can't pickle %R: import of module %R failed",
1748 obj, module_name);
1749 goto error;
1750 }
1751 cls = PyObject_GetAttr(module, global_name);
1752 if (cls == NULL) {
1753 PyErr_Format(PicklingError,
1754 "Can't pickle %R: attribute lookup %S.%S failed",
1755 obj, module_name, global_name);
1756 goto error;
1757 }
1758 if (cls != obj) {
1759 Py_DECREF(cls);
1760 PyErr_Format(PicklingError,
1761 "Can't pickle %R: it's not the same object as %S.%S",
1762 obj, module_name, global_name);
1763 goto error;
1764 }
1765 Py_DECREF(cls);
1766
1767 if (self->proto >= 2) {
1768 /* See whether this is in the extension registry, and if
1769 * so generate an EXT opcode.
1770 */
1771 PyObject *code_obj; /* extension code as Python object */
1772 long code; /* extension code as C value */
1773 char pdata[5];
1774 int n;
1775
1776 PyTuple_SET_ITEM(two_tuple, 0, module_name);
1777 PyTuple_SET_ITEM(two_tuple, 1, global_name);
1778 code_obj = PyDict_GetItem(extension_registry, two_tuple);
1779 /* The object is not registered in the extension registry.
1780 This is the most likely code path. */
1781 if (code_obj == NULL)
1782 goto gen_global;
1783
1784 /* XXX: pickle.py doesn't check neither the type, nor the range
1785 of the value returned by the extension_registry. It should for
1786 consistency. */
1787
1788 /* Verify code_obj has the right type and value. */
1789 if (!PyLong_Check(code_obj)) {
1790 PyErr_Format(PicklingError,
1791 "Can't pickle %R: extension code %R isn't an integer",
1792 obj, code_obj);
1793 goto error;
1794 }
1795 code = PyLong_AS_LONG(code_obj);
1796 if (code <= 0 || code > 0x7fffffffL) {
1797 PyErr_Format(PicklingError,
1798 "Can't pickle %R: extension code %ld is out of range",
1799 obj, code);
1800 goto error;
1801 }
1802
1803 /* Generate an EXT opcode. */
1804 if (code <= 0xff) {
1805 pdata[0] = EXT1;
1806 pdata[1] = (unsigned char)code;
1807 n = 2;
1808 }
1809 else if (code <= 0xffff) {
1810 pdata[0] = EXT2;
1811 pdata[1] = (unsigned char)(code & 0xff);
1812 pdata[2] = (unsigned char)((code >> 8) & 0xff);
1813 n = 3;
1814 }
1815 else {
1816 pdata[0] = EXT4;
1817 pdata[1] = (unsigned char)(code & 0xff);
1818 pdata[2] = (unsigned char)((code >> 8) & 0xff);
1819 pdata[3] = (unsigned char)((code >> 16) & 0xff);
1820 pdata[4] = (unsigned char)((code >> 24) & 0xff);
1821 n = 5;
1822 }
1823
1824 if (pickler_write(self, pdata, n) < 0)
1825 goto error;
1826 }
1827 else {
1828 /* Generate a normal global opcode if we are using a pickle
1829 protocol <= 2, or if the object is not registered in the
1830 extension registry. */
1831 PyObject *encoded;
1832 PyObject *(*unicode_encoder)(PyObject *);
1833
1834 gen_global:
1835 if (pickler_write(self, &global_op, 1) < 0)
1836 goto error;
1837
1838 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
1839 the module name and the global name using UTF-8. We do so only when
1840 we are using the pickle protocol newer than version 3. This is to
1841 ensure compatibility with older Unpickler running on Python 2.x. */
1842 if (self->proto >= 3) {
1843 unicode_encoder = PyUnicode_AsUTF8String;
1844 }
1845 else {
1846 unicode_encoder = PyUnicode_AsASCIIString;
1847 }
1848
1849 /* Save the name of the module. */
1850 encoded = unicode_encoder(module_name);
1851 if (encoded == NULL) {
1852 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
1853 PyErr_Format(PicklingError,
1854 "can't pickle module identifier '%S' using "
1855 "pickle protocol %i", module_name, self->proto);
1856 goto error;
1857 }
1858 if (pickler_write(self, PyBytes_AS_STRING(encoded),
1859 PyBytes_GET_SIZE(encoded)) < 0) {
1860 Py_DECREF(encoded);
1861 goto error;
1862 }
1863 Py_DECREF(encoded);
1864 if(pickler_write(self, "\n", 1) < 0)
1865 goto error;
1866
1867 /* Save the name of the module. */
1868 encoded = unicode_encoder(global_name);
1869 if (encoded == NULL) {
1870 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
1871 PyErr_Format(PicklingError,
1872 "can't pickle global identifier '%S' using "
1873 "pickle protocol %i", global_name, self->proto);
1874 goto error;
1875 }
1876 if (pickler_write(self, PyBytes_AS_STRING(encoded),
1877 PyBytes_GET_SIZE(encoded)) < 0) {
1878 Py_DECREF(encoded);
1879 goto error;
1880 }
1881 Py_DECREF(encoded);
1882 if(pickler_write(self, "\n", 1) < 0)
1883 goto error;
1884
1885 /* Memoize the object. */
1886 if (memo_put(self, obj) < 0)
1887 goto error;
1888 }
1889
1890 if (0) {
1891 error:
1892 status = -1;
1893 }
1894 Py_XDECREF(module_name);
1895 Py_XDECREF(global_name);
1896 Py_XDECREF(module);
1897
1898 return status;
1899}
1900
1901static int
1902save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
1903{
1904 PyObject *pid = NULL;
1905 int status = 0;
1906
1907 const char persid_op = PERSID;
1908 const char binpersid_op = BINPERSID;
1909
1910 Py_INCREF(obj);
1911 pid = pickler_call(self, func, obj);
1912 if (pid == NULL)
1913 return -1;
1914
1915 if (pid != Py_None) {
1916 if (self->bin) {
1917 if (save(self, pid, 1) < 0 ||
1918 pickler_write(self, &binpersid_op, 1) < 0)
1919 goto error;
1920 }
1921 else {
1922 PyObject *pid_str = NULL;
1923 char *pid_ascii_bytes;
1924 Py_ssize_t size;
1925
1926 pid_str = PyObject_Str(pid);
1927 if (pid_str == NULL)
1928 goto error;
1929
1930 /* XXX: Should it check whether the persistent id only contains
1931 ASCII characters? And what if the pid contains embedded
1932 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001933 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001934 Py_DECREF(pid_str);
1935 if (pid_ascii_bytes == NULL)
1936 goto error;
1937
1938 if (pickler_write(self, &persid_op, 1) < 0 ||
1939 pickler_write(self, pid_ascii_bytes, size) < 0 ||
1940 pickler_write(self, "\n", 1) < 0)
1941 goto error;
1942 }
1943 status = 1;
1944 }
1945
1946 if (0) {
1947 error:
1948 status = -1;
1949 }
1950 Py_XDECREF(pid);
1951
1952 return status;
1953}
1954
1955/* We're saving obj, and args is the 2-thru-5 tuple returned by the
1956 * appropriate __reduce__ method for obj.
1957 */
1958static int
1959save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
1960{
1961 PyObject *callable;
1962 PyObject *argtup;
1963 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00001964 PyObject *listitems = Py_None;
1965 PyObject *dictitems = Py_None;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001966
1967 int use_newobj = self->proto >= 2;
1968
1969 const char reduce_op = REDUCE;
1970 const char build_op = BUILD;
1971 const char newobj_op = NEWOBJ;
1972
1973 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
1974 &callable, &argtup, &state, &listitems, &dictitems))
1975 return -1;
1976
1977 if (!PyCallable_Check(callable)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00001978 PyErr_SetString(PicklingError, "first item of the tuple "
1979 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001980 return -1;
1981 }
1982 if (!PyTuple_Check(argtup)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00001983 PyErr_SetString(PicklingError, "second item of the tuple "
1984 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001985 return -1;
1986 }
1987
1988 if (state == Py_None)
1989 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00001990
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001991 if (listitems == Py_None)
1992 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00001993 else if (!PyIter_Check(listitems)) {
1994 PyErr_Format(PicklingError, "Fourth element of tuple"
1995 "returned by __reduce__ must be an iterator, not %s",
1996 Py_TYPE(listitems)->tp_name);
1997 return -1;
1998 }
1999
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002000 if (dictitems == Py_None)
2001 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002002 else if (!PyIter_Check(dictitems)) {
2003 PyErr_Format(PicklingError, "Fifth element of tuple"
2004 "returned by __reduce__ must be an iterator, not %s",
2005 Py_TYPE(dictitems)->tp_name);
2006 return -1;
2007 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002008
2009 /* Protocol 2 special case: if callable's name is __newobj__, use
2010 NEWOBJ. */
2011 if (use_newobj) {
2012 static PyObject *newobj_str = NULL;
2013 PyObject *name_str;
2014
2015 if (newobj_str == NULL) {
2016 newobj_str = PyUnicode_InternFromString("__newobj__");
2017 }
2018
2019 name_str = PyObject_GetAttrString(callable, "__name__");
2020 if (name_str == NULL) {
2021 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2022 PyErr_Clear();
2023 else
2024 return -1;
2025 use_newobj = 0;
2026 }
2027 else {
2028 use_newobj = PyUnicode_Check(name_str) &&
2029 PyUnicode_Compare(name_str, newobj_str) == 0;
2030 Py_DECREF(name_str);
2031 }
2032 }
2033 if (use_newobj) {
2034 PyObject *cls;
2035 PyObject *newargtup;
2036 PyObject *obj_class;
2037 int p;
2038
2039 /* Sanity checks. */
2040 if (Py_SIZE(argtup) < 1) {
2041 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
2042 return -1;
2043 }
2044
2045 cls = PyTuple_GET_ITEM(argtup, 0);
2046 if (!PyObject_HasAttrString(cls, "__new__")) {
2047 PyErr_SetString(PicklingError, "args[0] from "
2048 "__newobj__ args has no __new__");
2049 return -1;
2050 }
2051
2052 if (obj != NULL) {
2053 obj_class = PyObject_GetAttrString(obj, "__class__");
2054 if (obj_class == NULL) {
2055 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2056 PyErr_Clear();
2057 else
2058 return -1;
2059 }
2060 p = obj_class != cls; /* true iff a problem */
2061 Py_DECREF(obj_class);
2062 if (p) {
2063 PyErr_SetString(PicklingError, "args[0] from "
2064 "__newobj__ args has the wrong class");
2065 return -1;
2066 }
2067 }
2068 /* XXX: These calls save() are prone to infinite recursion. Imagine
2069 what happen if the value returned by the __reduce__() method of
2070 some extension type contains another object of the same type. Ouch!
2071
2072 Here is a quick example, that I ran into, to illustrate what I
2073 mean:
2074
2075 >>> import pickle, copyreg
2076 >>> copyreg.dispatch_table.pop(complex)
2077 >>> pickle.dumps(1+2j)
2078 Traceback (most recent call last):
2079 ...
2080 RuntimeError: maximum recursion depth exceeded
2081
2082 Removing the complex class from copyreg.dispatch_table made the
2083 __reduce_ex__() method emit another complex object:
2084
2085 >>> (1+1j).__reduce_ex__(2)
2086 (<function __newobj__ at 0xb7b71c3c>,
2087 (<class 'complex'>, (1+1j)), None, None, None)
2088
2089 Thus when save() was called on newargstup (the 2nd item) recursion
2090 ensued. Of course, the bug was in the complex class which had a
2091 broken __getnewargs__() that emitted another complex object. But,
2092 the point, here, is it is quite easy to end up with a broken reduce
2093 function. */
2094
2095 /* Save the class and its __new__ arguments. */
2096 if (save(self, cls, 0) < 0)
2097 return -1;
2098
2099 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
2100 if (newargtup == NULL)
2101 return -1;
2102
2103 p = save(self, newargtup, 0);
2104 Py_DECREF(newargtup);
2105 if (p < 0)
2106 return -1;
2107
2108 /* Add NEWOBJ opcode. */
2109 if (pickler_write(self, &newobj_op, 1) < 0)
2110 return -1;
2111 }
2112 else { /* Not using NEWOBJ. */
2113 if (save(self, callable, 0) < 0 ||
2114 save(self, argtup, 0) < 0 ||
2115 pickler_write(self, &reduce_op, 1) < 0)
2116 return -1;
2117 }
2118
2119 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
2120 the caller do not want to memoize the object. Not particularly useful,
2121 but that is to mimic the behavior save_reduce() in pickle.py when
2122 obj is None. */
2123 if (obj && memo_put(self, obj) < 0)
2124 return -1;
2125
2126 if (listitems && batch_list(self, listitems) < 0)
2127 return -1;
2128
2129 if (dictitems && batch_dict(self, dictitems) < 0)
2130 return -1;
2131
2132 if (state) {
2133 if (save(self, state, 0) < 0 ||
2134 pickler_write(self, &build_op, 1) < 0)
2135 return -1;
2136 }
2137
2138 return 0;
2139}
2140
2141static int
2142save(PicklerObject *self, PyObject *obj, int pers_save)
2143{
2144 PyTypeObject *type;
2145 PyObject *reduce_func = NULL;
2146 PyObject *reduce_value = NULL;
2147 PyObject *memo_key = NULL;
2148 int status = 0;
Hirokazu Yamamoto1543a222008-11-04 00:31:31 +00002149 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002150
Alexandre Vassalottidff18342008-07-13 18:48:30 +00002151 if (Py_EnterRecursiveCall(" while pickling an object") < 0)
2152 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002153
2154 /* The extra pers_save argument is necessary to avoid calling save_pers()
2155 on its returned object. */
2156 if (!pers_save && self->pers_func) {
2157 /* save_pers() returns:
2158 -1 to signal an error;
2159 0 if it did nothing successfully;
2160 1 if a persistent id was saved.
2161 */
2162 if ((status = save_pers(self, obj, self->pers_func)) != 0)
2163 goto done;
2164 }
2165
2166 type = Py_TYPE(obj);
2167
2168 /* XXX: The old cPickle had an optimization that used switch-case
2169 statement dispatching on the first letter of the type name. It was
2170 probably not a bad idea after all. If benchmarks shows that particular
2171 optimization had some real benefits, it would be nice to add it
2172 back. */
2173
2174 /* Atom types; these aren't memoized, so don't check the memo. */
2175
2176 if (obj == Py_None) {
2177 status = save_none(self, obj);
2178 goto done;
2179 }
2180 else if (obj == Py_False || obj == Py_True) {
2181 status = save_bool(self, obj);
2182 goto done;
2183 }
2184 else if (type == &PyLong_Type) {
2185 status = save_long(self, obj);
2186 goto done;
2187 }
2188 else if (type == &PyFloat_Type) {
2189 status = save_float(self, obj);
2190 goto done;
2191 }
2192
2193 /* Check the memo to see if it has the object. If so, generate
2194 a GET (or BINGET) opcode, instead of pickling the object
2195 once again. */
2196 memo_key = PyLong_FromVoidPtr(obj);
2197 if (memo_key == NULL)
2198 goto error;
2199 if (PyDict_GetItem(self->memo, memo_key)) {
2200 if (memo_get(self, memo_key) < 0)
2201 goto error;
2202 goto done;
2203 }
2204
2205 if (type == &PyBytes_Type) {
2206 status = save_bytes(self, obj);
2207 goto done;
2208 }
2209 else if (type == &PyUnicode_Type) {
2210 status = save_unicode(self, obj);
2211 goto done;
2212 }
2213 else if (type == &PyDict_Type) {
2214 status = save_dict(self, obj);
2215 goto done;
2216 }
2217 else if (type == &PyList_Type) {
2218 status = save_list(self, obj);
2219 goto done;
2220 }
2221 else if (type == &PyTuple_Type) {
2222 status = save_tuple(self, obj);
2223 goto done;
2224 }
2225 else if (type == &PyType_Type) {
2226 status = save_global(self, obj, NULL);
2227 goto done;
2228 }
2229 else if (type == &PyFunction_Type) {
2230 status = save_global(self, obj, NULL);
2231 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
2232 /* fall back to reduce */
2233 PyErr_Clear();
2234 }
2235 else {
2236 goto done;
2237 }
2238 }
2239 else if (type == &PyCFunction_Type) {
2240 status = save_global(self, obj, NULL);
2241 goto done;
2242 }
2243 else if (PyType_IsSubtype(type, &PyType_Type)) {
2244 status = save_global(self, obj, NULL);
2245 goto done;
2246 }
2247
2248 /* XXX: This part needs some unit tests. */
2249
2250 /* Get a reduction callable, and call it. This may come from
2251 * copyreg.dispatch_table, the object's __reduce_ex__ method,
2252 * or the object's __reduce__ method.
2253 */
2254 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
2255 if (reduce_func != NULL) {
2256 /* Here, the reference count of the reduce_func object returned by
2257 PyDict_GetItem needs to be increased to be consistent with the one
2258 returned by PyObject_GetAttr. This is allow us to blindly DECREF
2259 reduce_func at the end of the save() routine.
2260 */
2261 Py_INCREF(reduce_func);
2262 Py_INCREF(obj);
2263 reduce_value = pickler_call(self, reduce_func, obj);
2264 }
2265 else {
2266 static PyObject *reduce_str = NULL;
2267 static PyObject *reduce_ex_str = NULL;
2268
2269 /* Cache the name of the reduce methods. */
2270 if (reduce_str == NULL) {
2271 reduce_str = PyUnicode_InternFromString("__reduce__");
2272 if (reduce_str == NULL)
2273 goto error;
2274 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
2275 if (reduce_ex_str == NULL)
2276 goto error;
2277 }
2278
2279 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
2280 automatically defined as __reduce__. While this is convenient, this
2281 make it impossible to know which method was actually called. Of
2282 course, this is not a big deal. But still, it would be nice to let
2283 the user know which method was called when something go
2284 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
2285 don't actually have to check for a __reduce__ method. */
2286
2287 /* Check for a __reduce_ex__ method. */
2288 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
2289 if (reduce_func != NULL) {
2290 PyObject *proto;
2291 proto = PyLong_FromLong(self->proto);
2292 if (proto != NULL) {
2293 reduce_value = pickler_call(self, reduce_func, proto);
2294 }
2295 }
2296 else {
2297 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2298 PyErr_Clear();
2299 else
2300 goto error;
2301 /* Check for a __reduce__ method. */
2302 reduce_func = PyObject_GetAttr(obj, reduce_str);
2303 if (reduce_func != NULL) {
2304 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
2305 }
2306 else {
2307 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
2308 type->tp_name, obj);
2309 goto error;
2310 }
2311 }
2312 }
2313
2314 if (reduce_value == NULL)
2315 goto error;
2316
2317 if (PyUnicode_Check(reduce_value)) {
2318 status = save_global(self, obj, reduce_value);
2319 goto done;
2320 }
2321
2322 if (!PyTuple_Check(reduce_value)) {
2323 PyErr_SetString(PicklingError,
2324 "__reduce__ must return a string or tuple");
2325 goto error;
2326 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002327
Hirokazu Yamamoto1543a222008-11-04 00:31:31 +00002328 size = PyTuple_Size(reduce_value);
2329 if (size < 2 || size > 5) {
2330 PyErr_SetString(PicklingError, "tuple returned by "
2331 "__reduce__ must contain 2 through 5 elements");
2332 goto error;
2333 }
2334
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002335 status = save_reduce(self, reduce_value, obj);
2336
2337 if (0) {
2338 error:
2339 status = -1;
2340 }
2341 done:
Alexandre Vassalottidff18342008-07-13 18:48:30 +00002342 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002343 Py_XDECREF(memo_key);
2344 Py_XDECREF(reduce_func);
2345 Py_XDECREF(reduce_value);
2346
2347 return status;
2348}
2349
2350static int
2351dump(PicklerObject *self, PyObject *obj)
2352{
2353 const char stop_op = STOP;
2354
2355 if (self->proto >= 2) {
2356 char header[2];
2357
2358 header[0] = PROTO;
2359 assert(self->proto >= 0 && self->proto < 256);
2360 header[1] = (unsigned char)self->proto;
2361 if (pickler_write(self, header, 2) < 0)
2362 return -1;
2363 }
2364
2365 if (save(self, obj, 0) < 0 ||
2366 pickler_write(self, &stop_op, 1) < 0 ||
2367 pickler_write(self, NULL, 0) < 0)
2368 return -1;
2369
2370 return 0;
2371}
2372
2373PyDoc_STRVAR(Pickler_clear_memo_doc,
2374"clear_memo() -> None. Clears the pickler's \"memo\"."
2375"\n"
2376"The memo is the data structure that remembers which objects the\n"
2377"pickler has already seen, so that shared or recursive objects are\n"
2378"pickled by reference and not by value. This method is useful when\n"
2379"re-using picklers.");
2380
2381static PyObject *
2382Pickler_clear_memo(PicklerObject *self)
2383{
2384 if (self->memo)
2385 PyDict_Clear(self->memo);
2386
2387 Py_RETURN_NONE;
2388}
2389
2390PyDoc_STRVAR(Pickler_dump_doc,
2391"dump(obj) -> None. Write a pickled representation of obj to the open file.");
2392
2393static PyObject *
2394Pickler_dump(PicklerObject *self, PyObject *args)
2395{
2396 PyObject *obj;
2397
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00002398 /* Check whether the Pickler was initialized correctly (issue3664).
2399 Developers often forget to call __init__() in their subclasses, which
2400 would trigger a segfault without this check. */
2401 if (self->write == NULL) {
2402 PyErr_Format(PicklingError,
2403 "Pickler.__init__() was not called by %s.__init__()",
2404 Py_TYPE(self)->tp_name);
2405 return NULL;
2406 }
2407
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002408 if (!PyArg_ParseTuple(args, "O:dump", &obj))
2409 return NULL;
2410
2411 if (dump(self, obj) < 0)
2412 return NULL;
2413
2414 Py_RETURN_NONE;
2415}
2416
2417static struct PyMethodDef Pickler_methods[] = {
2418 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
2419 Pickler_dump_doc},
2420 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
2421 Pickler_clear_memo_doc},
2422 {NULL, NULL} /* sentinel */
2423};
2424
2425static void
2426Pickler_dealloc(PicklerObject *self)
2427{
2428 PyObject_GC_UnTrack(self);
2429
2430 Py_XDECREF(self->write);
2431 Py_XDECREF(self->memo);
2432 Py_XDECREF(self->pers_func);
2433 Py_XDECREF(self->arg);
2434 Py_XDECREF(self->fast_memo);
2435
2436 PyMem_Free(self->write_buf);
2437
2438 Py_TYPE(self)->tp_free((PyObject *)self);
2439}
2440
2441static int
2442Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
2443{
2444 Py_VISIT(self->write);
2445 Py_VISIT(self->memo);
2446 Py_VISIT(self->pers_func);
2447 Py_VISIT(self->arg);
2448 Py_VISIT(self->fast_memo);
2449 return 0;
2450}
2451
2452static int
2453Pickler_clear(PicklerObject *self)
2454{
2455 Py_CLEAR(self->write);
2456 Py_CLEAR(self->memo);
2457 Py_CLEAR(self->pers_func);
2458 Py_CLEAR(self->arg);
2459 Py_CLEAR(self->fast_memo);
2460
2461 PyMem_Free(self->write_buf);
2462 self->write_buf = NULL;
2463
2464 return 0;
2465}
2466
2467PyDoc_STRVAR(Pickler_doc,
2468"Pickler(file, protocol=None)"
2469"\n"
2470"This takes a binary file for writing a pickle data stream.\n"
2471"\n"
2472"The optional protocol argument tells the pickler to use the\n"
2473"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
2474"protocol is 3; a backward-incompatible protocol designed for\n"
2475"Python 3.0.\n"
2476"\n"
2477"Specifying a negative protocol version selects the highest\n"
2478"protocol version supported. The higher the protocol used, the\n"
2479"more recent the version of Python needed to read the pickle\n"
2480"produced.\n"
2481"\n"
2482"The file argument must have a write() method that accepts a single\n"
2483"bytes argument. It can thus be a file object opened for binary\n"
2484"writing, a io.BytesIO instance, or any other custom object that\n"
2485"meets this interface.\n");
2486
2487static int
2488Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
2489{
2490 static char *kwlist[] = {"file", "protocol", 0};
2491 PyObject *file;
2492 PyObject *proto_obj = NULL;
2493 long proto = 0;
2494
2495 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:Pickler",
2496 kwlist, &file, &proto_obj))
2497 return -1;
2498
2499 /* In case of multiple __init__() calls, clear previous content. */
2500 if (self->write != NULL)
2501 (void)Pickler_clear(self);
2502
2503 if (proto_obj == NULL || proto_obj == Py_None)
2504 proto = DEFAULT_PROTOCOL;
2505 else
2506 proto = PyLong_AsLong(proto_obj);
2507
2508 if (proto < 0)
2509 proto = HIGHEST_PROTOCOL;
2510 if (proto > HIGHEST_PROTOCOL) {
2511 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
2512 HIGHEST_PROTOCOL);
2513 return -1;
2514 }
2515
2516 self->proto = proto;
2517 self->bin = proto > 0;
2518 self->arg = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002519 self->fast = 0;
2520 self->fast_nesting = 0;
2521 self->fast_memo = NULL;
2522
2523 if (!PyObject_HasAttrString(file, "write")) {
2524 PyErr_SetString(PyExc_TypeError,
2525 "file must have a 'write' attribute");
2526 return -1;
2527 }
2528 self->write = PyObject_GetAttrString(file, "write");
2529 if (self->write == NULL)
2530 return -1;
2531 self->buf_size = 0;
2532 self->write_buf = (char *)PyMem_Malloc(WRITE_BUF_SIZE);
2533 if (self->write_buf == NULL) {
2534 PyErr_NoMemory();
2535 return -1;
2536 }
2537 self->pers_func = NULL;
2538 if (PyObject_HasAttrString((PyObject *)self, "persistent_id")) {
2539 self->pers_func = PyObject_GetAttrString((PyObject *)self,
2540 "persistent_id");
2541 if (self->pers_func == NULL)
2542 return -1;
2543 }
2544 self->memo = PyDict_New();
2545 if (self->memo == NULL)
2546 return -1;
2547
2548 return 0;
2549}
2550
2551static PyObject *
2552Pickler_get_memo(PicklerObject *self)
2553{
2554 if (self->memo == NULL)
2555 PyErr_SetString(PyExc_AttributeError, "memo");
2556 else
2557 Py_INCREF(self->memo);
2558 return self->memo;
2559}
2560
2561static int
2562Pickler_set_memo(PicklerObject *self, PyObject *value)
2563{
2564 PyObject *tmp;
2565
2566 if (value == NULL) {
2567 PyErr_SetString(PyExc_TypeError,
2568 "attribute deletion is not supported");
2569 return -1;
2570 }
2571 if (!PyDict_Check(value)) {
2572 PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
2573 return -1;
2574 }
2575
2576 tmp = self->memo;
2577 Py_INCREF(value);
2578 self->memo = value;
2579 Py_XDECREF(tmp);
2580
2581 return 0;
2582}
2583
2584static PyObject *
2585Pickler_get_persid(PicklerObject *self)
2586{
2587 if (self->pers_func == NULL)
2588 PyErr_SetString(PyExc_AttributeError, "persistent_id");
2589 else
2590 Py_INCREF(self->pers_func);
2591 return self->pers_func;
2592}
2593
2594static int
2595Pickler_set_persid(PicklerObject *self, PyObject *value)
2596{
2597 PyObject *tmp;
2598
2599 if (value == NULL) {
2600 PyErr_SetString(PyExc_TypeError,
2601 "attribute deletion is not supported");
2602 return -1;
2603 }
2604 if (!PyCallable_Check(value)) {
2605 PyErr_SetString(PyExc_TypeError,
2606 "persistent_id must be a callable taking one argument");
2607 return -1;
2608 }
2609
2610 tmp = self->pers_func;
2611 Py_INCREF(value);
2612 self->pers_func = value;
2613 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
2614
2615 return 0;
2616}
2617
2618static PyMemberDef Pickler_members[] = {
2619 {"bin", T_INT, offsetof(PicklerObject, bin)},
2620 {"fast", T_INT, offsetof(PicklerObject, fast)},
2621 {NULL}
2622};
2623
2624static PyGetSetDef Pickler_getsets[] = {
2625 {"memo", (getter)Pickler_get_memo,
2626 (setter)Pickler_set_memo},
2627 {"persistent_id", (getter)Pickler_get_persid,
2628 (setter)Pickler_set_persid},
2629 {NULL}
2630};
2631
2632static PyTypeObject Pickler_Type = {
2633 PyVarObject_HEAD_INIT(NULL, 0)
2634 "_pickle.Pickler" , /*tp_name*/
2635 sizeof(PicklerObject), /*tp_basicsize*/
2636 0, /*tp_itemsize*/
2637 (destructor)Pickler_dealloc, /*tp_dealloc*/
2638 0, /*tp_print*/
2639 0, /*tp_getattr*/
2640 0, /*tp_setattr*/
2641 0, /*tp_compare*/
2642 0, /*tp_repr*/
2643 0, /*tp_as_number*/
2644 0, /*tp_as_sequence*/
2645 0, /*tp_as_mapping*/
2646 0, /*tp_hash*/
2647 0, /*tp_call*/
2648 0, /*tp_str*/
2649 0, /*tp_getattro*/
2650 0, /*tp_setattro*/
2651 0, /*tp_as_buffer*/
2652 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2653 Pickler_doc, /*tp_doc*/
2654 (traverseproc)Pickler_traverse, /*tp_traverse*/
2655 (inquiry)Pickler_clear, /*tp_clear*/
2656 0, /*tp_richcompare*/
2657 0, /*tp_weaklistoffset*/
2658 0, /*tp_iter*/
2659 0, /*tp_iternext*/
2660 Pickler_methods, /*tp_methods*/
2661 Pickler_members, /*tp_members*/
2662 Pickler_getsets, /*tp_getset*/
2663 0, /*tp_base*/
2664 0, /*tp_dict*/
2665 0, /*tp_descr_get*/
2666 0, /*tp_descr_set*/
2667 0, /*tp_dictoffset*/
2668 (initproc)Pickler_init, /*tp_init*/
2669 PyType_GenericAlloc, /*tp_alloc*/
2670 PyType_GenericNew, /*tp_new*/
2671 PyObject_GC_Del, /*tp_free*/
2672 0, /*tp_is_gc*/
2673};
2674
2675/* Temporary helper for calling self.find_class().
2676
2677 XXX: It would be nice to able to avoid Python function call overhead, by
2678 using directly the C version of find_class(), when find_class() is not
2679 overridden by a subclass. Although, this could become rather hackish. A
2680 simpler optimization would be to call the C function when self is not a
2681 subclass instance. */
2682static PyObject *
2683find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
2684{
2685 return PyObject_CallMethod((PyObject *)self, "find_class", "OO",
2686 module_name, global_name);
2687}
2688
2689static int
2690marker(UnpicklerObject *self)
2691{
2692 if (self->num_marks < 1) {
2693 PyErr_SetString(UnpicklingError, "could not find MARK");
2694 return -1;
2695 }
2696
2697 return self->marks[--self->num_marks];
2698}
2699
2700static int
2701load_none(UnpicklerObject *self)
2702{
2703 PDATA_APPEND(self->stack, Py_None, -1);
2704 return 0;
2705}
2706
2707static int
2708bad_readline(void)
2709{
2710 PyErr_SetString(UnpicklingError, "pickle data was truncated");
2711 return -1;
2712}
2713
2714static int
2715load_int(UnpicklerObject *self)
2716{
2717 PyObject *value;
2718 char *endptr, *s;
2719 Py_ssize_t len;
2720 long x;
2721
2722 if ((len = unpickler_readline(self, &s)) < 0)
2723 return -1;
2724 if (len < 2)
2725 return bad_readline();
2726
2727 errno = 0;
2728 /* XXX: Should the base argument of strtol() be explicitly set to 10? */
2729 x = strtol(s, &endptr, 0);
2730
2731 if (errno || (*endptr != '\n') || (endptr[1] != '\0')) {
2732 /* Hm, maybe we've got something long. Let's try reading
2733 * it as a Python long object. */
2734 errno = 0;
2735 /* XXX: Same thing about the base here. */
2736 value = PyLong_FromString(s, NULL, 0);
2737 if (value == NULL) {
2738 PyErr_SetString(PyExc_ValueError,
2739 "could not convert string to int");
2740 return -1;
2741 }
2742 }
2743 else {
2744 if (len == 3 && (x == 0 || x == 1)) {
2745 if ((value = PyBool_FromLong(x)) == NULL)
2746 return -1;
2747 }
2748 else {
2749 if ((value = PyLong_FromLong(x)) == NULL)
2750 return -1;
2751 }
2752 }
2753
2754 PDATA_PUSH(self->stack, value, -1);
2755 return 0;
2756}
2757
2758static int
2759load_bool(UnpicklerObject *self, PyObject *boolean)
2760{
2761 assert(boolean == Py_True || boolean == Py_False);
2762 PDATA_APPEND(self->stack, boolean, -1);
2763 return 0;
2764}
2765
2766/* s contains x bytes of a little-endian integer. Return its value as a
2767 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
2768 * int, but when x is 4 it's a signed one. This is an historical source
2769 * of x-platform bugs.
2770 */
2771static long
2772calc_binint(char *bytes, int size)
2773{
2774 unsigned char *s = (unsigned char *)bytes;
2775 int i = size;
2776 long x = 0;
2777
2778 for (i = 0; i < size; i++) {
2779 x |= (long)s[i] << (i * 8);
2780 }
2781
2782 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
2783 * is signed, so on a box with longs bigger than 4 bytes we need
2784 * to extend a BININT's sign bit to the full width.
2785 */
2786 if (SIZEOF_LONG > 4 && size == 4) {
2787 x |= -(x & (1L << 31));
2788 }
2789
2790 return x;
2791}
2792
2793static int
2794load_binintx(UnpicklerObject *self, char *s, int size)
2795{
2796 PyObject *value;
2797 long x;
2798
2799 x = calc_binint(s, size);
2800
2801 if ((value = PyLong_FromLong(x)) == NULL)
2802 return -1;
2803
2804 PDATA_PUSH(self->stack, value, -1);
2805 return 0;
2806}
2807
2808static int
2809load_binint(UnpicklerObject *self)
2810{
2811 char *s;
2812
2813 if (unpickler_read(self, &s, 4) < 0)
2814 return -1;
2815
2816 return load_binintx(self, s, 4);
2817}
2818
2819static int
2820load_binint1(UnpicklerObject *self)
2821{
2822 char *s;
2823
2824 if (unpickler_read(self, &s, 1) < 0)
2825 return -1;
2826
2827 return load_binintx(self, s, 1);
2828}
2829
2830static int
2831load_binint2(UnpicklerObject *self)
2832{
2833 char *s;
2834
2835 if (unpickler_read(self, &s, 2) < 0)
2836 return -1;
2837
2838 return load_binintx(self, s, 2);
2839}
2840
2841static int
2842load_long(UnpicklerObject *self)
2843{
2844 PyObject *value;
2845 char *s;
2846 Py_ssize_t len;
2847
2848 if ((len = unpickler_readline(self, &s)) < 0)
2849 return -1;
2850 if (len < 2)
2851 return bad_readline();
2852
2853 /* XXX: Should the base argument explicitly set to 10? */
2854 if ((value = PyLong_FromString(s, NULL, 0)) == NULL)
2855 return -1;
2856
2857 PDATA_PUSH(self->stack, value, -1);
2858 return 0;
2859}
2860
2861/* 'size' bytes contain the # of bytes of little-endian 256's-complement
2862 * data following.
2863 */
2864static int
2865load_counted_long(UnpicklerObject *self, int size)
2866{
2867 PyObject *value;
2868 char *nbytes;
2869 char *pdata;
2870
2871 assert(size == 1 || size == 4);
2872 if (unpickler_read(self, &nbytes, size) < 0)
2873 return -1;
2874
2875 size = calc_binint(nbytes, size);
2876 if (size < 0) {
2877 /* Corrupt or hostile pickle -- we never write one like this */
2878 PyErr_SetString(UnpicklingError,
2879 "LONG pickle has negative byte count");
2880 return -1;
2881 }
2882
2883 if (size == 0)
2884 value = PyLong_FromLong(0L);
2885 else {
2886 /* Read the raw little-endian bytes and convert. */
2887 if (unpickler_read(self, &pdata, size) < 0)
2888 return -1;
2889 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
2890 1 /* little endian */ , 1 /* signed */ );
2891 }
2892 if (value == NULL)
2893 return -1;
2894 PDATA_PUSH(self->stack, value, -1);
2895 return 0;
2896}
2897
2898static int
2899load_float(UnpicklerObject *self)
2900{
2901 PyObject *value;
2902 char *endptr, *s;
2903 Py_ssize_t len;
2904 double d;
2905
2906 if ((len = unpickler_readline(self, &s)) < 0)
2907 return -1;
2908 if (len < 2)
2909 return bad_readline();
2910
2911 errno = 0;
2912 d = PyOS_ascii_strtod(s, &endptr);
2913
2914 if (errno || (endptr[0] != '\n') || (endptr[1] != '\0')) {
2915 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
2916 return -1;
2917 }
2918
2919 if ((value = PyFloat_FromDouble(d)) == NULL)
2920 return -1;
2921
2922 PDATA_PUSH(self->stack, value, -1);
2923 return 0;
2924}
2925
2926static int
2927load_binfloat(UnpicklerObject *self)
2928{
2929 PyObject *value;
2930 double x;
2931 char *s;
2932
2933 if (unpickler_read(self, &s, 8) < 0)
2934 return -1;
2935
2936 x = _PyFloat_Unpack8((unsigned char *)s, 0);
2937 if (x == -1.0 && PyErr_Occurred())
2938 return -1;
2939
2940 if ((value = PyFloat_FromDouble(x)) == NULL)
2941 return -1;
2942
2943 PDATA_PUSH(self->stack, value, -1);
2944 return 0;
2945}
2946
2947static int
2948load_string(UnpicklerObject *self)
2949{
2950 PyObject *bytes;
2951 PyObject *str = NULL;
2952 Py_ssize_t len;
2953 char *s, *p;
2954
2955 if ((len = unpickler_readline(self, &s)) < 0)
2956 return -1;
2957 if (len < 3)
2958 return bad_readline();
2959 if ((s = strdup(s)) == NULL) {
2960 PyErr_NoMemory();
2961 return -1;
2962 }
2963
2964 /* Strip outermost quotes */
2965 while (s[len - 1] <= ' ')
2966 len--;
2967 if (s[0] == '"' && s[len - 1] == '"') {
2968 s[len - 1] = '\0';
2969 p = s + 1;
2970 len -= 2;
2971 }
2972 else if (s[0] == '\'' && s[len - 1] == '\'') {
2973 s[len - 1] = '\0';
2974 p = s + 1;
2975 len -= 2;
2976 }
2977 else {
2978 free(s);
2979 PyErr_SetString(PyExc_ValueError, "insecure string pickle");
2980 return -1;
2981 }
2982
2983 /* Use the PyBytes API to decode the string, since that is what is used
2984 to encode, and then coerce the result to Unicode. */
2985 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
2986 free(s);
2987 if (bytes == NULL)
2988 return -1;
2989 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
2990 Py_DECREF(bytes);
2991 if (str == NULL)
2992 return -1;
2993
2994 PDATA_PUSH(self->stack, str, -1);
2995 return 0;
2996}
2997
2998static int
2999load_binbytes(UnpicklerObject *self)
3000{
3001 PyObject *bytes;
3002 long x;
3003 char *s;
3004
3005 if (unpickler_read(self, &s, 4) < 0)
3006 return -1;
3007
3008 x = calc_binint(s, 4);
3009 if (x < 0) {
3010 PyErr_SetString(UnpicklingError,
3011 "BINBYTES pickle has negative byte count");
3012 return -1;
3013 }
3014
3015 if (unpickler_read(self, &s, x) < 0)
3016 return -1;
3017 bytes = PyBytes_FromStringAndSize(s, x);
3018 if (bytes == NULL)
3019 return -1;
3020
3021 PDATA_PUSH(self->stack, bytes, -1);
3022 return 0;
3023}
3024
3025static int
3026load_short_binbytes(UnpicklerObject *self)
3027{
3028 PyObject *bytes;
3029 unsigned char x;
3030 char *s;
3031
3032 if (unpickler_read(self, &s, 1) < 0)
3033 return -1;
3034
3035 x = (unsigned char)s[0];
3036
3037 if (unpickler_read(self, &s, x) < 0)
3038 return -1;
3039
3040 bytes = PyBytes_FromStringAndSize(s, x);
3041 if (bytes == NULL)
3042 return -1;
3043
3044 PDATA_PUSH(self->stack, bytes, -1);
3045 return 0;
3046}
3047
3048static int
3049load_binstring(UnpicklerObject *self)
3050{
3051 PyObject *str;
3052 long x;
3053 char *s;
3054
3055 if (unpickler_read(self, &s, 4) < 0)
3056 return -1;
3057
3058 x = calc_binint(s, 4);
3059 if (x < 0) {
3060 PyErr_SetString(UnpicklingError,
3061 "BINSTRING pickle has negative byte count");
3062 return -1;
3063 }
3064
3065 if (unpickler_read(self, &s, x) < 0)
3066 return -1;
3067
3068 /* Convert Python 2.x strings to unicode. */
3069 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
3070 if (str == NULL)
3071 return -1;
3072
3073 PDATA_PUSH(self->stack, str, -1);
3074 return 0;
3075}
3076
3077static int
3078load_short_binstring(UnpicklerObject *self)
3079{
3080 PyObject *str;
3081 unsigned char x;
3082 char *s;
3083
3084 if (unpickler_read(self, &s, 1) < 0)
3085 return -1;
3086
3087 x = (unsigned char)s[0];
3088
3089 if (unpickler_read(self, &s, x) < 0)
3090 return -1;
3091
3092 /* Convert Python 2.x strings to unicode. */
3093 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
3094 if (str == NULL)
3095 return -1;
3096
3097 PDATA_PUSH(self->stack, str, -1);
3098 return 0;
3099}
3100
3101static int
3102load_unicode(UnpicklerObject *self)
3103{
3104 PyObject *str;
3105 Py_ssize_t len;
3106 char *s;
3107
3108 if ((len = unpickler_readline(self, &s)) < 0)
3109 return -1;
3110 if (len < 1)
3111 return bad_readline();
3112
3113 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
3114 if (str == NULL)
3115 return -1;
3116
3117 PDATA_PUSH(self->stack, str, -1);
3118 return 0;
3119}
3120
3121static int
3122load_binunicode(UnpicklerObject *self)
3123{
3124 PyObject *str;
3125 long size;
3126 char *s;
3127
3128 if (unpickler_read(self, &s, 4) < 0)
3129 return -1;
3130
3131 size = calc_binint(s, 4);
3132 if (size < 0) {
3133 PyErr_SetString(UnpicklingError,
3134 "BINUNICODE pickle has negative byte count");
3135 return -1;
3136 }
3137
3138 if (unpickler_read(self, &s, size) < 0)
3139 return -1;
3140
3141 str = PyUnicode_DecodeUTF8(s, size, NULL);
3142 if (str == NULL)
3143 return -1;
3144
3145 PDATA_PUSH(self->stack, str, -1);
3146 return 0;
3147}
3148
3149static int
3150load_tuple(UnpicklerObject *self)
3151{
3152 PyObject *tuple;
3153 int i;
3154
3155 if ((i = marker(self)) < 0)
3156 return -1;
3157
3158 tuple = Pdata_poptuple(self->stack, i);
3159 if (tuple == NULL)
3160 return -1;
3161 PDATA_PUSH(self->stack, tuple, -1);
3162 return 0;
3163}
3164
3165static int
3166load_counted_tuple(UnpicklerObject *self, int len)
3167{
3168 PyObject *tuple;
3169
3170 tuple = PyTuple_New(len);
3171 if (tuple == NULL)
3172 return -1;
3173
3174 while (--len >= 0) {
3175 PyObject *item;
3176
3177 PDATA_POP(self->stack, item);
3178 if (item == NULL)
3179 return -1;
3180 PyTuple_SET_ITEM(tuple, len, item);
3181 }
3182 PDATA_PUSH(self->stack, tuple, -1);
3183 return 0;
3184}
3185
3186static int
3187load_empty_list(UnpicklerObject *self)
3188{
3189 PyObject *list;
3190
3191 if ((list = PyList_New(0)) == NULL)
3192 return -1;
3193 PDATA_PUSH(self->stack, list, -1);
3194 return 0;
3195}
3196
3197static int
3198load_empty_dict(UnpicklerObject *self)
3199{
3200 PyObject *dict;
3201
3202 if ((dict = PyDict_New()) == NULL)
3203 return -1;
3204 PDATA_PUSH(self->stack, dict, -1);
3205 return 0;
3206}
3207
3208static int
3209load_list(UnpicklerObject *self)
3210{
3211 PyObject *list;
3212 int i;
3213
3214 if ((i = marker(self)) < 0)
3215 return -1;
3216
3217 list = Pdata_poplist(self->stack, i);
3218 if (list == NULL)
3219 return -1;
3220 PDATA_PUSH(self->stack, list, -1);
3221 return 0;
3222}
3223
3224static int
3225load_dict(UnpicklerObject *self)
3226{
3227 PyObject *dict, *key, *value;
3228 int i, j, k;
3229
3230 if ((i = marker(self)) < 0)
3231 return -1;
3232 j = self->stack->length;
3233
3234 if ((dict = PyDict_New()) == NULL)
3235 return -1;
3236
3237 for (k = i + 1; k < j; k += 2) {
3238 key = self->stack->data[k - 1];
3239 value = self->stack->data[k];
3240 if (PyDict_SetItem(dict, key, value) < 0) {
3241 Py_DECREF(dict);
3242 return -1;
3243 }
3244 }
3245 Pdata_clear(self->stack, i);
3246 PDATA_PUSH(self->stack, dict, -1);
3247 return 0;
3248}
3249
3250static PyObject *
3251instantiate(PyObject *cls, PyObject *args)
3252{
3253 PyObject *r = NULL;
3254
3255 /* XXX: The pickle.py module does not create instances this way when the
3256 args tuple is empty. See Unpickler._instantiate(). */
3257 if ((r = PyObject_CallObject(cls, args)))
3258 return r;
3259
3260 /* XXX: Is this still nescessary? */
3261 {
3262 PyObject *tp, *v, *tb, *tmp_value;
3263
3264 PyErr_Fetch(&tp, &v, &tb);
3265 tmp_value = v;
3266 /* NULL occurs when there was a KeyboardInterrupt */
3267 if (tmp_value == NULL)
3268 tmp_value = Py_None;
3269 if ((r = PyTuple_Pack(3, tmp_value, cls, args))) {
3270 Py_XDECREF(v);
3271 v = r;
3272 }
3273 PyErr_Restore(tp, v, tb);
3274 }
3275 return NULL;
3276}
3277
3278static int
3279load_obj(UnpicklerObject *self)
3280{
3281 PyObject *cls, *args, *obj = NULL;
3282 int i;
3283
3284 if ((i = marker(self)) < 0)
3285 return -1;
3286
3287 args = Pdata_poptuple(self->stack, i + 1);
3288 if (args == NULL)
3289 return -1;
3290
3291 PDATA_POP(self->stack, cls);
3292 if (cls) {
3293 obj = instantiate(cls, args);
3294 Py_DECREF(cls);
3295 }
3296 Py_DECREF(args);
3297 if (obj == NULL)
3298 return -1;
3299
3300 PDATA_PUSH(self->stack, obj, -1);
3301 return 0;
3302}
3303
3304static int
3305load_inst(UnpicklerObject *self)
3306{
3307 PyObject *cls = NULL;
3308 PyObject *args = NULL;
3309 PyObject *obj = NULL;
3310 PyObject *module_name;
3311 PyObject *class_name;
3312 Py_ssize_t len;
3313 int i;
3314 char *s;
3315
3316 if ((i = marker(self)) < 0)
3317 return -1;
3318 if ((len = unpickler_readline(self, &s)) < 0)
3319 return -1;
3320 if (len < 2)
3321 return bad_readline();
3322
3323 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
3324 identifiers are permitted in Python 3.0, since the INST opcode is only
3325 supported by older protocols on Python 2.x. */
3326 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
3327 if (module_name == NULL)
3328 return -1;
3329
3330 if ((len = unpickler_readline(self, &s)) >= 0) {
3331 if (len < 2)
3332 return bad_readline();
3333 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
3334 if (class_name == NULL) {
3335 cls = find_class(self, module_name, class_name);
3336 Py_DECREF(class_name);
3337 }
3338 }
3339 Py_DECREF(module_name);
3340
3341 if (cls == NULL)
3342 return -1;
3343
3344 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
3345 obj = instantiate(cls, args);
3346 Py_DECREF(args);
3347 }
3348 Py_DECREF(cls);
3349
3350 if (obj == NULL)
3351 return -1;
3352
3353 PDATA_PUSH(self->stack, obj, -1);
3354 return 0;
3355}
3356
3357static int
3358load_newobj(UnpicklerObject *self)
3359{
3360 PyObject *args = NULL;
3361 PyObject *clsraw = NULL;
3362 PyTypeObject *cls; /* clsraw cast to its true type */
3363 PyObject *obj;
3364
3365 /* Stack is ... cls argtuple, and we want to call
3366 * cls.__new__(cls, *argtuple).
3367 */
3368 PDATA_POP(self->stack, args);
3369 if (args == NULL)
3370 goto error;
3371 if (!PyTuple_Check(args)) {
3372 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
3373 goto error;
3374 }
3375
3376 PDATA_POP(self->stack, clsraw);
3377 cls = (PyTypeObject *)clsraw;
3378 if (cls == NULL)
3379 goto error;
3380 if (!PyType_Check(cls)) {
3381 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
3382 "isn't a type object");
3383 goto error;
3384 }
3385 if (cls->tp_new == NULL) {
3386 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
3387 "has NULL tp_new");
3388 goto error;
3389 }
3390
3391 /* Call __new__. */
3392 obj = cls->tp_new(cls, args, NULL);
3393 if (obj == NULL)
3394 goto error;
3395
3396 Py_DECREF(args);
3397 Py_DECREF(clsraw);
3398 PDATA_PUSH(self->stack, obj, -1);
3399 return 0;
3400
3401 error:
3402 Py_XDECREF(args);
3403 Py_XDECREF(clsraw);
3404 return -1;
3405}
3406
3407static int
3408load_global(UnpicklerObject *self)
3409{
3410 PyObject *global = NULL;
3411 PyObject *module_name;
3412 PyObject *global_name;
3413 Py_ssize_t len;
3414 char *s;
3415
3416 if ((len = unpickler_readline(self, &s)) < 0)
3417 return -1;
3418 if (len < 2)
3419 return bad_readline();
3420 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
3421 if (!module_name)
3422 return -1;
3423
3424 if ((len = unpickler_readline(self, &s)) >= 0) {
3425 if (len < 2) {
3426 Py_DECREF(module_name);
3427 return bad_readline();
3428 }
3429 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
3430 if (global_name) {
3431 global = find_class(self, module_name, global_name);
3432 Py_DECREF(global_name);
3433 }
3434 }
3435 Py_DECREF(module_name);
3436
3437 if (global == NULL)
3438 return -1;
3439 PDATA_PUSH(self->stack, global, -1);
3440 return 0;
3441}
3442
3443static int
3444load_persid(UnpicklerObject *self)
3445{
3446 PyObject *pid;
3447 Py_ssize_t len;
3448 char *s;
3449
3450 if (self->pers_func) {
3451 if ((len = unpickler_readline(self, &s)) < 0)
3452 return -1;
3453 if (len < 2)
3454 return bad_readline();
3455
3456 pid = PyBytes_FromStringAndSize(s, len - 1);
3457 if (pid == NULL)
3458 return -1;
3459
3460 /* Ugh... this does not leak since unpickler_call() steals the
3461 reference to pid first. */
3462 pid = unpickler_call(self, self->pers_func, pid);
3463 if (pid == NULL)
3464 return -1;
3465
3466 PDATA_PUSH(self->stack, pid, -1);
3467 return 0;
3468 }
3469 else {
3470 PyErr_SetString(UnpicklingError,
3471 "A load persistent id instruction was encountered,\n"
3472 "but no persistent_load function was specified.");
3473 return -1;
3474 }
3475}
3476
3477static int
3478load_binpersid(UnpicklerObject *self)
3479{
3480 PyObject *pid;
3481
3482 if (self->pers_func) {
3483 PDATA_POP(self->stack, pid);
3484 if (pid == NULL)
3485 return -1;
3486
3487 /* Ugh... this does not leak since unpickler_call() steals the
3488 reference to pid first. */
3489 pid = unpickler_call(self, self->pers_func, pid);
3490 if (pid == NULL)
3491 return -1;
3492
3493 PDATA_PUSH(self->stack, pid, -1);
3494 return 0;
3495 }
3496 else {
3497 PyErr_SetString(UnpicklingError,
3498 "A load persistent id instruction was encountered,\n"
3499 "but no persistent_load function was specified.");
3500 return -1;
3501 }
3502}
3503
3504static int
3505load_pop(UnpicklerObject *self)
3506{
3507 int len;
3508
3509 if ((len = self->stack->length) <= 0)
3510 return stack_underflow();
3511
3512 /* Note that we split the (pickle.py) stack into two stacks,
3513 * an object stack and a mark stack. We have to be clever and
3514 * pop the right one. We do this by looking at the top of the
3515 * mark stack.
3516 */
3517
3518 if ((self->num_marks > 0) && (self->marks[self->num_marks - 1] == len))
3519 self->num_marks--;
3520 else {
3521 len--;
3522 Py_DECREF(self->stack->data[len]);
3523 self->stack->length = len;
3524 }
3525
3526 return 0;
3527}
3528
3529static int
3530load_pop_mark(UnpicklerObject *self)
3531{
3532 int i;
3533
3534 if ((i = marker(self)) < 0)
3535 return -1;
3536
3537 Pdata_clear(self->stack, i);
3538
3539 return 0;
3540}
3541
3542static int
3543load_dup(UnpicklerObject *self)
3544{
3545 PyObject *last;
3546 int len;
3547
3548 if ((len = self->stack->length) <= 0)
3549 return stack_underflow();
3550 last = self->stack->data[len - 1];
3551 PDATA_APPEND(self->stack, last, -1);
3552 return 0;
3553}
3554
3555static int
3556load_get(UnpicklerObject *self)
3557{
3558 PyObject *key, *value;
3559 Py_ssize_t len;
3560 char *s;
3561
3562 if ((len = unpickler_readline(self, &s)) < 0)
3563 return -1;
3564 if (len < 2)
3565 return bad_readline();
3566
3567 key = PyLong_FromString(s, NULL, 10);
3568 if (key == NULL)
3569 return -1;
3570
3571 value = PyDict_GetItemWithError(self->memo, key);
3572 if (value == NULL) {
3573 if (!PyErr_Occurred())
3574 PyErr_SetObject(PyExc_KeyError, key);
3575 Py_DECREF(key);
3576 return -1;
3577 }
3578 Py_DECREF(key);
3579
3580 PDATA_APPEND(self->stack, value, -1);
3581 return 0;
3582}
3583
3584static int
3585load_binget(UnpicklerObject *self)
3586{
3587 PyObject *key, *value;
3588 char *s;
3589
3590 if (unpickler_read(self, &s, 1) < 0)
3591 return -1;
3592
3593 /* Here, the unsigned cast is necessary to avoid negative values. */
3594 key = PyLong_FromLong((long)(unsigned char)s[0]);
3595 if (key == NULL)
3596 return -1;
3597
3598 value = PyDict_GetItemWithError(self->memo, key);
3599 if (value == NULL) {
3600 if (!PyErr_Occurred())
3601 PyErr_SetObject(PyExc_KeyError, key);
3602 Py_DECREF(key);
3603 return -1;
3604 }
3605 Py_DECREF(key);
3606
3607 PDATA_APPEND(self->stack, value, -1);
3608 return 0;
3609}
3610
3611static int
3612load_long_binget(UnpicklerObject *self)
3613{
3614 PyObject *key, *value;
3615 char *s;
3616 long k;
3617
3618 if (unpickler_read(self, &s, 4) < 0)
3619 return -1;
3620
3621 k = (long)(unsigned char)s[0];
3622 k |= (long)(unsigned char)s[1] << 8;
3623 k |= (long)(unsigned char)s[2] << 16;
3624 k |= (long)(unsigned char)s[3] << 24;
3625
3626 key = PyLong_FromLong(k);
3627 if (key == NULL)
3628 return -1;
3629
3630 value = PyDict_GetItemWithError(self->memo, key);
3631 if (value == NULL) {
3632 if (!PyErr_Occurred())
3633 PyErr_SetObject(PyExc_KeyError, key);
3634 Py_DECREF(key);
3635 return -1;
3636 }
3637 Py_DECREF(key);
3638
3639 PDATA_APPEND(self->stack, value, -1);
3640 return 0;
3641}
3642
3643/* Push an object from the extension registry (EXT[124]). nbytes is
3644 * the number of bytes following the opcode, holding the index (code) value.
3645 */
3646static int
3647load_extension(UnpicklerObject *self, int nbytes)
3648{
3649 char *codebytes; /* the nbytes bytes after the opcode */
3650 long code; /* calc_binint returns long */
3651 PyObject *py_code; /* code as a Python int */
3652 PyObject *obj; /* the object to push */
3653 PyObject *pair; /* (module_name, class_name) */
3654 PyObject *module_name, *class_name;
3655
3656 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
3657 if (unpickler_read(self, &codebytes, nbytes) < 0)
3658 return -1;
3659 code = calc_binint(codebytes, nbytes);
3660 if (code <= 0) { /* note that 0 is forbidden */
3661 /* Corrupt or hostile pickle. */
3662 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
3663 return -1;
3664 }
3665
3666 /* Look for the code in the cache. */
3667 py_code = PyLong_FromLong(code);
3668 if (py_code == NULL)
3669 return -1;
3670 obj = PyDict_GetItem(extension_cache, py_code);
3671 if (obj != NULL) {
3672 /* Bingo. */
3673 Py_DECREF(py_code);
3674 PDATA_APPEND(self->stack, obj, -1);
3675 return 0;
3676 }
3677
3678 /* Look up the (module_name, class_name) pair. */
3679 pair = PyDict_GetItem(inverted_registry, py_code);
3680 if (pair == NULL) {
3681 Py_DECREF(py_code);
3682 PyErr_Format(PyExc_ValueError, "unregistered extension "
3683 "code %ld", code);
3684 return -1;
3685 }
3686 /* Since the extension registry is manipulable via Python code,
3687 * confirm that pair is really a 2-tuple of strings.
3688 */
3689 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
3690 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
3691 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
3692 Py_DECREF(py_code);
3693 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
3694 "isn't a 2-tuple of strings", code);
3695 return -1;
3696 }
3697 /* Load the object. */
3698 obj = find_class(self, module_name, class_name);
3699 if (obj == NULL) {
3700 Py_DECREF(py_code);
3701 return -1;
3702 }
3703 /* Cache code -> obj. */
3704 code = PyDict_SetItem(extension_cache, py_code, obj);
3705 Py_DECREF(py_code);
3706 if (code < 0) {
3707 Py_DECREF(obj);
3708 return -1;
3709 }
3710 PDATA_PUSH(self->stack, obj, -1);
3711 return 0;
3712}
3713
3714static int
3715load_put(UnpicklerObject *self)
3716{
3717 PyObject *key, *value;
3718 Py_ssize_t len;
3719 char *s;
3720 int x;
3721
3722 if ((len = unpickler_readline(self, &s)) < 0)
3723 return -1;
3724 if (len < 2)
3725 return bad_readline();
3726 if ((x = self->stack->length) <= 0)
3727 return stack_underflow();
3728
3729 key = PyLong_FromString(s, NULL, 10);
3730 if (key == NULL)
3731 return -1;
3732 value = self->stack->data[x - 1];
3733
3734 x = PyDict_SetItem(self->memo, key, value);
3735 Py_DECREF(key);
3736 return x;
3737}
3738
3739static int
3740load_binput(UnpicklerObject *self)
3741{
3742 PyObject *key, *value;
3743 char *s;
3744 int x;
3745
3746 if (unpickler_read(self, &s, 1) < 0)
3747 return -1;
3748 if ((x = self->stack->length) <= 0)
3749 return stack_underflow();
3750
3751 key = PyLong_FromLong((long)(unsigned char)s[0]);
3752 if (key == NULL)
3753 return -1;
3754 value = self->stack->data[x - 1];
3755
3756 x = PyDict_SetItem(self->memo, key, value);
3757 Py_DECREF(key);
3758 return x;
3759}
3760
3761static int
3762load_long_binput(UnpicklerObject *self)
3763{
3764 PyObject *key, *value;
3765 long k;
3766 char *s;
3767 int x;
3768
3769 if (unpickler_read(self, &s, 4) < 0)
3770 return -1;
3771 if ((x = self->stack->length) <= 0)
3772 return stack_underflow();
3773
3774 k = (long)(unsigned char)s[0];
3775 k |= (long)(unsigned char)s[1] << 8;
3776 k |= (long)(unsigned char)s[2] << 16;
3777 k |= (long)(unsigned char)s[3] << 24;
3778
3779 key = PyLong_FromLong(k);
3780 if (key == NULL)
3781 return -1;
3782 value = self->stack->data[x - 1];
3783
3784 x = PyDict_SetItem(self->memo, key, value);
3785 Py_DECREF(key);
3786 return x;
3787}
3788
3789static int
3790do_append(UnpicklerObject *self, int x)
3791{
3792 PyObject *value;
3793 PyObject *list;
3794 int len, i;
3795
3796 len = self->stack->length;
3797 if (x > len || x <= 0)
3798 return stack_underflow();
3799 if (len == x) /* nothing to do */
3800 return 0;
3801
3802 list = self->stack->data[x - 1];
3803
3804 if (PyList_Check(list)) {
3805 PyObject *slice;
3806 Py_ssize_t list_len;
3807
3808 slice = Pdata_poplist(self->stack, x);
3809 if (!slice)
3810 return -1;
3811 list_len = PyList_GET_SIZE(list);
3812 i = PyList_SetSlice(list, list_len, list_len, slice);
3813 Py_DECREF(slice);
3814 return i;
3815 }
3816 else {
3817 PyObject *append_func;
3818
3819 append_func = PyObject_GetAttrString(list, "append");
3820 if (append_func == NULL)
3821 return -1;
3822 for (i = x; i < len; i++) {
3823 PyObject *result;
3824
3825 value = self->stack->data[i];
3826 result = unpickler_call(self, append_func, value);
3827 if (result == NULL) {
3828 Pdata_clear(self->stack, i + 1);
3829 self->stack->length = x;
3830 return -1;
3831 }
3832 Py_DECREF(result);
3833 }
3834 self->stack->length = x;
3835 }
3836
3837 return 0;
3838}
3839
3840static int
3841load_append(UnpicklerObject *self)
3842{
3843 return do_append(self, self->stack->length - 1);
3844}
3845
3846static int
3847load_appends(UnpicklerObject *self)
3848{
3849 return do_append(self, marker(self));
3850}
3851
3852static int
3853do_setitems(UnpicklerObject *self, int x)
3854{
3855 PyObject *value, *key;
3856 PyObject *dict;
3857 int len, i;
3858 int status = 0;
3859
3860 len = self->stack->length;
3861 if (x > len || x <= 0)
3862 return stack_underflow();
3863 if (len == x) /* nothing to do */
3864 return 0;
3865 if ((len - x) % 2 != 0) {
3866 /* Currupt or hostile pickle -- we never write one like this. */
3867 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
3868 return -1;
3869 }
3870
3871 /* Here, dict does not actually need to be a PyDict; it could be anything
3872 that supports the __setitem__ attribute. */
3873 dict = self->stack->data[x - 1];
3874
3875 for (i = x + 1; i < len; i += 2) {
3876 key = self->stack->data[i - 1];
3877 value = self->stack->data[i];
3878 if (PyObject_SetItem(dict, key, value) < 0) {
3879 status = -1;
3880 break;
3881 }
3882 }
3883
3884 Pdata_clear(self->stack, x);
3885 return status;
3886}
3887
3888static int
3889load_setitem(UnpicklerObject *self)
3890{
3891 return do_setitems(self, self->stack->length - 2);
3892}
3893
3894static int
3895load_setitems(UnpicklerObject *self)
3896{
3897 return do_setitems(self, marker(self));
3898}
3899
3900static int
3901load_build(UnpicklerObject *self)
3902{
3903 PyObject *state, *inst, *slotstate;
3904 PyObject *setstate;
3905 int status = 0;
3906
3907 /* Stack is ... instance, state. We want to leave instance at
3908 * the stack top, possibly mutated via instance.__setstate__(state).
3909 */
3910 if (self->stack->length < 2)
3911 return stack_underflow();
3912
3913 PDATA_POP(self->stack, state);
3914 if (state == NULL)
3915 return -1;
3916
3917 inst = self->stack->data[self->stack->length - 1];
3918
3919 setstate = PyObject_GetAttrString(inst, "__setstate__");
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00003920 if (setstate == NULL) {
3921 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3922 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00003923 else {
3924 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00003925 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00003926 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003927 }
3928 else {
3929 PyObject *result;
3930
3931 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitroud79dc622008-09-05 00:03:33 +00003932 /* Ugh... this does not leak since unpickler_call() steals the
3933 reference to state first. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003934 result = unpickler_call(self, setstate, state);
3935 Py_DECREF(setstate);
3936 if (result == NULL)
3937 return -1;
3938 Py_DECREF(result);
3939 return 0;
3940 }
3941
3942 /* A default __setstate__. First see whether state embeds a
3943 * slot state dict too (a proto 2 addition).
3944 */
3945 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
3946 PyObject *tmp = state;
3947
3948 state = PyTuple_GET_ITEM(tmp, 0);
3949 slotstate = PyTuple_GET_ITEM(tmp, 1);
3950 Py_INCREF(state);
3951 Py_INCREF(slotstate);
3952 Py_DECREF(tmp);
3953 }
3954 else
3955 slotstate = NULL;
3956
3957 /* Set inst.__dict__ from the state dict (if any). */
3958 if (state != Py_None) {
3959 PyObject *dict;
3960
3961 if (!PyDict_Check(state)) {
3962 PyErr_SetString(UnpicklingError, "state is not a dictionary");
3963 goto error;
3964 }
3965 dict = PyObject_GetAttrString(inst, "__dict__");
3966 if (dict == NULL)
3967 goto error;
3968
3969 PyDict_Update(dict, state);
3970 Py_DECREF(dict);
3971 }
3972
3973 /* Also set instance attributes from the slotstate dict (if any). */
3974 if (slotstate != NULL) {
3975 PyObject *d_key, *d_value;
3976 Py_ssize_t i;
3977
3978 if (!PyDict_Check(slotstate)) {
3979 PyErr_SetString(UnpicklingError,
3980 "slot state is not a dictionary");
3981 goto error;
3982 }
3983 i = 0;
3984 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
3985 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
3986 goto error;
3987 }
3988 }
3989
3990 if (0) {
3991 error:
3992 status = -1;
3993 }
3994
3995 Py_DECREF(state);
3996 Py_XDECREF(slotstate);
3997 return status;
3998}
3999
4000static int
4001load_mark(UnpicklerObject *self)
4002{
4003
4004 /* Note that we split the (pickle.py) stack into two stacks, an
4005 * object stack and a mark stack. Here we push a mark onto the
4006 * mark stack.
4007 */
4008
4009 if ((self->num_marks + 1) >= self->marks_size) {
4010 size_t alloc;
4011 int *marks;
4012
4013 /* Use the size_t type to check for overflow. */
4014 alloc = ((size_t)self->num_marks << 1) + 20;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00004015 if (alloc > PY_SSIZE_T_MAX ||
4016 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004017 PyErr_NoMemory();
4018 return -1;
4019 }
4020
4021 if (self->marks == NULL)
4022 marks = (int *)PyMem_Malloc(alloc * sizeof(int));
4023 else
4024 marks = (int *)PyMem_Realloc(self->marks, alloc * sizeof(int));
4025 if (marks == NULL) {
4026 PyErr_NoMemory();
4027 return -1;
4028 }
4029 self->marks = marks;
4030 self->marks_size = (Py_ssize_t)alloc;
4031 }
4032
4033 self->marks[self->num_marks++] = self->stack->length;
4034
4035 return 0;
4036}
4037
4038static int
4039load_reduce(UnpicklerObject *self)
4040{
4041 PyObject *callable = NULL;
4042 PyObject *argtup = NULL;
4043 PyObject *obj = NULL;
4044
4045 PDATA_POP(self->stack, argtup);
4046 if (argtup == NULL)
4047 return -1;
4048 PDATA_POP(self->stack, callable);
4049 if (callable) {
4050 obj = instantiate(callable, argtup);
4051 Py_DECREF(callable);
4052 }
4053 Py_DECREF(argtup);
4054
4055 if (obj == NULL)
4056 return -1;
4057
4058 PDATA_PUSH(self->stack, obj, -1);
4059 return 0;
4060}
4061
4062/* Just raises an error if we don't know the protocol specified. PROTO
4063 * is the first opcode for protocols >= 2.
4064 */
4065static int
4066load_proto(UnpicklerObject *self)
4067{
4068 char *s;
4069 int i;
4070
4071 if (unpickler_read(self, &s, 1) < 0)
4072 return -1;
4073
4074 i = (unsigned char)s[0];
4075 if (i <= HIGHEST_PROTOCOL)
4076 return 0;
4077
4078 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
4079 return -1;
4080}
4081
4082static PyObject *
4083load(UnpicklerObject *self)
4084{
4085 PyObject *err;
4086 PyObject *value = NULL;
4087 char *s;
4088
4089 self->num_marks = 0;
4090 if (self->stack->length)
4091 Pdata_clear(self->stack, 0);
4092
4093 /* Convenient macros for the dispatch while-switch loop just below. */
4094#define OP(opcode, load_func) \
4095 case opcode: if (load_func(self) < 0) break; continue;
4096
4097#define OP_ARG(opcode, load_func, arg) \
4098 case opcode: if (load_func(self, (arg)) < 0) break; continue;
4099
4100 while (1) {
4101 if (unpickler_read(self, &s, 1) < 0)
4102 break;
4103
4104 switch ((enum opcode)s[0]) {
4105 OP(NONE, load_none)
4106 OP(BININT, load_binint)
4107 OP(BININT1, load_binint1)
4108 OP(BININT2, load_binint2)
4109 OP(INT, load_int)
4110 OP(LONG, load_long)
4111 OP_ARG(LONG1, load_counted_long, 1)
4112 OP_ARG(LONG4, load_counted_long, 4)
4113 OP(FLOAT, load_float)
4114 OP(BINFLOAT, load_binfloat)
4115 OP(BINBYTES, load_binbytes)
4116 OP(SHORT_BINBYTES, load_short_binbytes)
4117 OP(BINSTRING, load_binstring)
4118 OP(SHORT_BINSTRING, load_short_binstring)
4119 OP(STRING, load_string)
4120 OP(UNICODE, load_unicode)
4121 OP(BINUNICODE, load_binunicode)
4122 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
4123 OP_ARG(TUPLE1, load_counted_tuple, 1)
4124 OP_ARG(TUPLE2, load_counted_tuple, 2)
4125 OP_ARG(TUPLE3, load_counted_tuple, 3)
4126 OP(TUPLE, load_tuple)
4127 OP(EMPTY_LIST, load_empty_list)
4128 OP(LIST, load_list)
4129 OP(EMPTY_DICT, load_empty_dict)
4130 OP(DICT, load_dict)
4131 OP(OBJ, load_obj)
4132 OP(INST, load_inst)
4133 OP(NEWOBJ, load_newobj)
4134 OP(GLOBAL, load_global)
4135 OP(APPEND, load_append)
4136 OP(APPENDS, load_appends)
4137 OP(BUILD, load_build)
4138 OP(DUP, load_dup)
4139 OP(BINGET, load_binget)
4140 OP(LONG_BINGET, load_long_binget)
4141 OP(GET, load_get)
4142 OP(MARK, load_mark)
4143 OP(BINPUT, load_binput)
4144 OP(LONG_BINPUT, load_long_binput)
4145 OP(PUT, load_put)
4146 OP(POP, load_pop)
4147 OP(POP_MARK, load_pop_mark)
4148 OP(SETITEM, load_setitem)
4149 OP(SETITEMS, load_setitems)
4150 OP(PERSID, load_persid)
4151 OP(BINPERSID, load_binpersid)
4152 OP(REDUCE, load_reduce)
4153 OP(PROTO, load_proto)
4154 OP_ARG(EXT1, load_extension, 1)
4155 OP_ARG(EXT2, load_extension, 2)
4156 OP_ARG(EXT4, load_extension, 4)
4157 OP_ARG(NEWTRUE, load_bool, Py_True)
4158 OP_ARG(NEWFALSE, load_bool, Py_False)
4159
4160 case STOP:
4161 break;
4162
4163 case '\0':
4164 PyErr_SetNone(PyExc_EOFError);
4165 return NULL;
4166
4167 default:
4168 PyErr_Format(UnpicklingError,
4169 "invalid load key, '%c'.", s[0]);
4170 return NULL;
4171 }
4172
4173 break; /* and we are done! */
4174 }
4175
4176 /* XXX: It is not clear what this is actually for. */
4177 if ((err = PyErr_Occurred())) {
4178 if (err == PyExc_EOFError) {
4179 PyErr_SetNone(PyExc_EOFError);
4180 }
4181 return NULL;
4182 }
4183
4184 PDATA_POP(self->stack, value);
4185 return value;
4186}
4187
4188PyDoc_STRVAR(Unpickler_load_doc,
4189"load() -> object. Load a pickle."
4190"\n"
4191"Read a pickled object representation from the open file object given in\n"
4192"the constructor, and return the reconstituted object hierarchy specified\n"
4193"therein.\n");
4194
4195static PyObject *
4196Unpickler_load(UnpicklerObject *self)
4197{
4198 /* Check whether the Unpickler was initialized correctly. This prevents
4199 segfaulting if a subclass overridden __init__ with a function that does
4200 not call Unpickler.__init__(). Here, we simply ensure that self->read
4201 is not NULL. */
4202 if (self->read == NULL) {
4203 PyErr_Format(UnpicklingError,
4204 "Unpickler.__init__() was not called by %s.__init__()",
4205 Py_TYPE(self)->tp_name);
4206 return NULL;
4207 }
4208
4209 return load(self);
4210}
4211
4212/* The name of find_class() is misleading. In newer pickle protocols, this
4213 function is used for loading any global (i.e., functions), not just
4214 classes. The name is kept only for backward compatibility. */
4215
4216PyDoc_STRVAR(Unpickler_find_class_doc,
4217"find_class(module_name, global_name) -> object.\n"
4218"\n"
4219"Return an object from a specified module, importing the module if\n"
4220"necessary. Subclasses may override this method (e.g. to restrict\n"
4221"unpickling of arbitrary classes and functions).\n"
4222"\n"
4223"This method is called whenever a class or a function object is\n"
4224"needed. Both arguments passed are str objects.\n");
4225
4226static PyObject *
4227Unpickler_find_class(UnpicklerObject *self, PyObject *args)
4228{
4229 PyObject *global;
4230 PyObject *modules_dict;
4231 PyObject *module;
4232 PyObject *module_name, *global_name;
4233
4234 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
4235 &module_name, &global_name))
4236 return NULL;
4237
4238 modules_dict = PySys_GetObject("modules");
4239 if (modules_dict == NULL)
4240 return NULL;
4241
4242 module = PyDict_GetItem(modules_dict, module_name);
4243 if (module == NULL) {
4244 module = PyImport_Import(module_name);
4245 if (module == NULL)
4246 return NULL;
4247 global = PyObject_GetAttr(module, global_name);
4248 Py_DECREF(module);
4249 }
4250 else {
4251 global = PyObject_GetAttr(module, global_name);
4252 }
4253 return global;
4254}
4255
4256static struct PyMethodDef Unpickler_methods[] = {
4257 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
4258 Unpickler_load_doc},
4259 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
4260 Unpickler_find_class_doc},
4261 {NULL, NULL} /* sentinel */
4262};
4263
4264static void
4265Unpickler_dealloc(UnpicklerObject *self)
4266{
4267 PyObject_GC_UnTrack((PyObject *)self);
4268 Py_XDECREF(self->readline);
4269 Py_XDECREF(self->read);
4270 Py_XDECREF(self->memo);
4271 Py_XDECREF(self->stack);
4272 Py_XDECREF(self->pers_func);
4273 Py_XDECREF(self->arg);
4274 Py_XDECREF(self->last_string);
4275
4276 PyMem_Free(self->marks);
4277 free(self->encoding);
4278 free(self->errors);
4279
4280 Py_TYPE(self)->tp_free((PyObject *)self);
4281}
4282
4283static int
4284Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
4285{
4286 Py_VISIT(self->readline);
4287 Py_VISIT(self->read);
4288 Py_VISIT(self->memo);
4289 Py_VISIT(self->stack);
4290 Py_VISIT(self->pers_func);
4291 Py_VISIT(self->arg);
4292 Py_VISIT(self->last_string);
4293 return 0;
4294}
4295
4296static int
4297Unpickler_clear(UnpicklerObject *self)
4298{
4299 Py_CLEAR(self->readline);
4300 Py_CLEAR(self->read);
4301 Py_CLEAR(self->memo);
4302 Py_CLEAR(self->stack);
4303 Py_CLEAR(self->pers_func);
4304 Py_CLEAR(self->arg);
4305 Py_CLEAR(self->last_string);
4306
4307 PyMem_Free(self->marks);
4308 self->marks = NULL;
4309 free(self->encoding);
4310 self->encoding = NULL;
4311 free(self->errors);
4312 self->errors = NULL;
4313
4314 return 0;
4315}
4316
4317PyDoc_STRVAR(Unpickler_doc,
4318"Unpickler(file, *, encoding='ASCII', errors='strict')"
4319"\n"
4320"This takes a binary file for reading a pickle data stream.\n"
4321"\n"
4322"The protocol version of the pickle is detected automatically, so no\n"
4323"proto argument is needed.\n"
4324"\n"
4325"The file-like object must have two methods, a read() method\n"
4326"that takes an integer argument, and a readline() method that\n"
4327"requires no arguments. Both methods should return bytes.\n"
4328"Thus file-like object can be a binary file object opened for\n"
4329"reading, a BytesIO object, or any other custom object that\n"
4330"meets this interface.\n"
4331"\n"
4332"Optional keyword arguments are encoding and errors, which are\n"
4333"used to decode 8-bit string instances pickled by Python 2.x.\n"
4334"These default to 'ASCII' and 'strict', respectively.\n");
4335
4336static int
4337Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
4338{
4339 static char *kwlist[] = {"file", "encoding", "errors", 0};
4340 PyObject *file;
4341 char *encoding = NULL;
4342 char *errors = NULL;
4343
4344 /* XXX: That is an horrible error message. But, I don't know how to do
4345 better... */
4346 if (Py_SIZE(args) != 1) {
4347 PyErr_Format(PyExc_TypeError,
4348 "%s takes exactly one positional argument (%zd given)",
4349 Py_TYPE(self)->tp_name, Py_SIZE(args));
4350 return -1;
4351 }
4352
4353 /* Arguments parsing needs to be done in the __init__() method to allow
4354 subclasses to define their own __init__() method, which may (or may
4355 not) support Unpickler arguments. However, this means we need to be
4356 extra careful in the other Unpickler methods, since a subclass could
4357 forget to call Unpickler.__init__() thus breaking our internal
4358 invariants. */
4359 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|ss:Unpickler", kwlist,
4360 &file, &encoding, &errors))
4361 return -1;
4362
4363 /* In case of multiple __init__() calls, clear previous content. */
4364 if (self->read != NULL)
4365 (void)Unpickler_clear(self);
4366
4367 self->read = PyObject_GetAttrString(file, "read");
4368 self->readline = PyObject_GetAttrString(file, "readline");
4369 if (self->readline == NULL || self->read == NULL)
4370 return -1;
4371
4372 if (encoding == NULL)
4373 encoding = "ASCII";
4374 if (errors == NULL)
4375 errors = "strict";
4376
4377 self->encoding = strdup(encoding);
4378 self->errors = strdup(errors);
4379 if (self->encoding == NULL || self->errors == NULL) {
4380 PyErr_NoMemory();
4381 return -1;
4382 }
4383
4384 if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) {
4385 self->pers_func = PyObject_GetAttrString((PyObject *)self,
4386 "persistent_load");
4387 if (self->pers_func == NULL)
4388 return -1;
4389 }
4390 else {
4391 self->pers_func = NULL;
4392 }
4393
4394 self->stack = (Pdata *)Pdata_New();
4395 if (self->stack == NULL)
4396 return -1;
4397
4398 self->memo = PyDict_New();
4399 if (self->memo == NULL)
4400 return -1;
4401
4402 return 0;
4403}
4404
4405static PyObject *
4406Unpickler_get_memo(UnpicklerObject *self)
4407{
4408 if (self->memo == NULL)
4409 PyErr_SetString(PyExc_AttributeError, "memo");
4410 else
4411 Py_INCREF(self->memo);
4412 return self->memo;
4413}
4414
4415static int
4416Unpickler_set_memo(UnpicklerObject *self, PyObject *value)
4417{
4418 PyObject *tmp;
4419
4420 if (value == NULL) {
4421 PyErr_SetString(PyExc_TypeError,
4422 "attribute deletion is not supported");
4423 return -1;
4424 }
4425 if (!PyDict_Check(value)) {
4426 PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
4427 return -1;
4428 }
4429
4430 tmp = self->memo;
4431 Py_INCREF(value);
4432 self->memo = value;
4433 Py_XDECREF(tmp);
4434
4435 return 0;
4436}
4437
4438static PyObject *
4439Unpickler_get_persload(UnpicklerObject *self)
4440{
4441 if (self->pers_func == NULL)
4442 PyErr_SetString(PyExc_AttributeError, "persistent_load");
4443 else
4444 Py_INCREF(self->pers_func);
4445 return self->pers_func;
4446}
4447
4448static int
4449Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
4450{
4451 PyObject *tmp;
4452
4453 if (value == NULL) {
4454 PyErr_SetString(PyExc_TypeError,
4455 "attribute deletion is not supported");
4456 return -1;
4457 }
4458 if (!PyCallable_Check(value)) {
4459 PyErr_SetString(PyExc_TypeError,
4460 "persistent_load must be a callable taking "
4461 "one argument");
4462 return -1;
4463 }
4464
4465 tmp = self->pers_func;
4466 Py_INCREF(value);
4467 self->pers_func = value;
4468 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
4469
4470 return 0;
4471}
4472
4473static PyGetSetDef Unpickler_getsets[] = {
4474 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
4475 {"persistent_load", (getter)Unpickler_get_persload,
4476 (setter)Unpickler_set_persload},
4477 {NULL}
4478};
4479
4480static PyTypeObject Unpickler_Type = {
4481 PyVarObject_HEAD_INIT(NULL, 0)
4482 "_pickle.Unpickler", /*tp_name*/
4483 sizeof(UnpicklerObject), /*tp_basicsize*/
4484 0, /*tp_itemsize*/
4485 (destructor)Unpickler_dealloc, /*tp_dealloc*/
4486 0, /*tp_print*/
4487 0, /*tp_getattr*/
4488 0, /*tp_setattr*/
4489 0, /*tp_compare*/
4490 0, /*tp_repr*/
4491 0, /*tp_as_number*/
4492 0, /*tp_as_sequence*/
4493 0, /*tp_as_mapping*/
4494 0, /*tp_hash*/
4495 0, /*tp_call*/
4496 0, /*tp_str*/
4497 0, /*tp_getattro*/
4498 0, /*tp_setattro*/
4499 0, /*tp_as_buffer*/
4500 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4501 Unpickler_doc, /*tp_doc*/
4502 (traverseproc)Unpickler_traverse, /*tp_traverse*/
4503 (inquiry)Unpickler_clear, /*tp_clear*/
4504 0, /*tp_richcompare*/
4505 0, /*tp_weaklistoffset*/
4506 0, /*tp_iter*/
4507 0, /*tp_iternext*/
4508 Unpickler_methods, /*tp_methods*/
4509 0, /*tp_members*/
4510 Unpickler_getsets, /*tp_getset*/
4511 0, /*tp_base*/
4512 0, /*tp_dict*/
4513 0, /*tp_descr_get*/
4514 0, /*tp_descr_set*/
4515 0, /*tp_dictoffset*/
4516 (initproc)Unpickler_init, /*tp_init*/
4517 PyType_GenericAlloc, /*tp_alloc*/
4518 PyType_GenericNew, /*tp_new*/
4519 PyObject_GC_Del, /*tp_free*/
4520 0, /*tp_is_gc*/
4521};
4522
4523static int
4524init_stuff(void)
4525{
4526 PyObject *copyreg;
4527
4528 copyreg = PyImport_ImportModule("copyreg");
4529 if (!copyreg)
4530 return -1;
4531
4532 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
4533 if (!dispatch_table)
4534 goto error;
4535
4536 extension_registry = \
4537 PyObject_GetAttrString(copyreg, "_extension_registry");
4538 if (!extension_registry)
4539 goto error;
4540
4541 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
4542 if (!inverted_registry)
4543 goto error;
4544
4545 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
4546 if (!extension_cache)
4547 goto error;
4548
4549 Py_DECREF(copyreg);
4550
4551 empty_tuple = PyTuple_New(0);
4552 if (empty_tuple == NULL)
4553 return -1;
4554
4555 two_tuple = PyTuple_New(2);
4556 if (two_tuple == NULL)
4557 return -1;
4558 /* We use this temp container with no regard to refcounts, or to
4559 * keeping containees alive. Exempt from GC, because we don't
4560 * want anything looking at two_tuple() by magic.
4561 */
4562 PyObject_GC_UnTrack(two_tuple);
4563
4564 return 0;
4565
4566 error:
4567 Py_DECREF(copyreg);
4568 return -1;
4569}
4570
4571static struct PyModuleDef _picklemodule = {
4572 PyModuleDef_HEAD_INIT,
4573 "_pickle",
4574 pickle_module_doc,
4575 -1,
4576 NULL,
4577 NULL,
4578 NULL,
4579 NULL,
4580 NULL
4581};
4582
4583PyMODINIT_FUNC
4584PyInit__pickle(void)
4585{
4586 PyObject *m;
4587
4588 if (PyType_Ready(&Unpickler_Type) < 0)
4589 return NULL;
4590 if (PyType_Ready(&Pickler_Type) < 0)
4591 return NULL;
4592 if (PyType_Ready(&Pdata_Type) < 0)
4593 return NULL;
4594
4595 /* Create the module and add the functions. */
4596 m = PyModule_Create(&_picklemodule);
4597 if (m == NULL)
4598 return NULL;
4599
4600 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
4601 return NULL;
4602 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
4603 return NULL;
4604
4605 /* Initialize the exceptions. */
4606 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
4607 if (PickleError == NULL)
4608 return NULL;
4609 PicklingError = \
4610 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
4611 if (PicklingError == NULL)
4612 return NULL;
4613 UnpicklingError = \
4614 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
4615 if (UnpicklingError == NULL)
4616 return NULL;
4617
4618 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
4619 return NULL;
4620 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
4621 return NULL;
4622 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
4623 return NULL;
4624
4625 if (init_stuff() < 0)
4626 return NULL;
4627
4628 return m;
4629}