blob: 2b672a73e0b507c7b2752a5a347f0411eb691615 [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
13
14/* Pickle opcodes. These must be kept updated with pickle.py.
15 Extensive docs are in pickletools.py. */
16enum opcode {
17 MARK = '(',
18 STOP = '.',
19 POP = '0',
20 POP_MARK = '1',
21 DUP = '2',
22 FLOAT = 'F',
23 INT = 'I',
24 BININT = 'J',
25 BININT1 = 'K',
26 LONG = 'L',
27 BININT2 = 'M',
28 NONE = 'N',
29 PERSID = 'P',
30 BINPERSID = 'Q',
31 REDUCE = 'R',
32 STRING = 'S',
33 BINSTRING = 'T',
34 SHORT_BINSTRING = 'U',
35 UNICODE = 'V',
36 BINUNICODE = 'X',
37 APPEND = 'a',
38 BUILD = 'b',
39 GLOBAL = 'c',
40 DICT = 'd',
41 EMPTY_DICT = '}',
42 APPENDS = 'e',
43 GET = 'g',
44 BINGET = 'h',
45 INST = 'i',
46 LONG_BINGET = 'j',
47 LIST = 'l',
48 EMPTY_LIST = ']',
49 OBJ = 'o',
50 PUT = 'p',
51 BINPUT = 'q',
52 LONG_BINPUT = 'r',
53 SETITEM = 's',
54 TUPLE = 't',
55 EMPTY_TUPLE = ')',
56 SETITEMS = 'u',
57 BINFLOAT = 'G',
58
59 /* Protocol 2. */
60 PROTO = '\x80',
61 NEWOBJ = '\x81',
62 EXT1 = '\x82',
63 EXT2 = '\x83',
64 EXT4 = '\x84',
65 TUPLE1 = '\x85',
66 TUPLE2 = '\x86',
67 TUPLE3 = '\x87',
68 NEWTRUE = '\x88',
69 NEWFALSE = '\x89',
70 LONG1 = '\x8a',
71 LONG4 = '\x8b',
72
73 /* Protocol 3 (Python 3.x) */
74 BINBYTES = 'B',
75 SHORT_BINBYTES = 'C',
76};
77
78/* These aren't opcodes -- they're ways to pickle bools before protocol 2
79 * so that unpicklers written before bools were introduced unpickle them
80 * as ints, but unpicklers after can recognize that bools were intended.
81 * Note that protocol 2 added direct ways to pickle bools.
82 */
83#undef TRUE
84#define TRUE "I01\n"
85#undef FALSE
86#define FALSE "I00\n"
87
88enum {
89 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
90 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
91 break if this gets out of synch with pickle.py, but it's unclear that would
92 help anything either. */
93 BATCHSIZE = 1000,
94
95 /* Nesting limit until Pickler, when running in "fast mode", starts
96 checking for self-referential data-structures. */
97 FAST_NESTING_LIMIT = 50,
98
99 /* Size of the write buffer of Pickler. Higher values will reduce the
100 number of calls to the write() method of the output stream. */
101 WRITE_BUF_SIZE = 256,
102};
103
104/* Exception classes for pickle. These should override the ones defined in
105 pickle.py, when the C-optimized Pickler and Unpickler are used. */
106static PyObject *PickleError;
107static PyObject *PicklingError;
108static PyObject *UnpicklingError;
109
110/* copyreg.dispatch_table, {type_object: pickling_function} */
111static PyObject *dispatch_table;
112/* For EXT[124] opcodes. */
113/* copyreg._extension_registry, {(module_name, function_name): code} */
114static PyObject *extension_registry;
115/* copyreg._inverted_registry, {code: (module_name, function_name)} */
116static PyObject *inverted_registry;
117/* copyreg._extension_cache, {code: object} */
118static PyObject *extension_cache;
119
120/* XXX: Are these really nescessary? */
121/* As the name says, an empty tuple. */
122static PyObject *empty_tuple;
123/* For looking up name pairs in copyreg._extension_registry. */
124static PyObject *two_tuple;
125
126static int
127stack_underflow(void)
128{
129 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
130 return -1;
131}
132
133/* Internal data type used as the unpickling stack. */
134typedef struct {
135 PyObject_HEAD
136 int length; /* number of initial slots in data currently used */
137 int size; /* number of slots in data allocated */
138 PyObject **data;
139} Pdata;
140
141static void
142Pdata_dealloc(Pdata *self)
143{
144 int i;
145 PyObject **p;
146
147 for (i = self->length, p = self->data; --i >= 0; p++) {
148 Py_DECREF(*p);
149 }
150 if (self->data)
151 PyMem_Free(self->data);
152 PyObject_Del(self);
153}
154
155static PyTypeObject Pdata_Type = {
156 PyVarObject_HEAD_INIT(NULL, 0)
157 "_pickle.Pdata", /*tp_name*/
158 sizeof(Pdata), /*tp_basicsize*/
159 0, /*tp_itemsize*/
160 (destructor)Pdata_dealloc, /*tp_dealloc*/
161};
162
163static PyObject *
164Pdata_New(void)
165{
166 Pdata *self;
167
168 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
169 return NULL;
170 self->size = 8;
171 self->length = 0;
172 self->data = PyMem_Malloc(self->size * sizeof(PyObject *));
173 if (self->data)
174 return (PyObject *)self;
175 Py_DECREF(self);
176 return PyErr_NoMemory();
177}
178
179
180/* Retain only the initial clearto items. If clearto >= the current
181 * number of items, this is a (non-erroneous) NOP.
182 */
183static int
184Pdata_clear(Pdata *self, int clearto)
185{
186 int i;
187 PyObject **p;
188
189 if (clearto < 0)
190 return stack_underflow();
191 if (clearto >= self->length)
192 return 0;
193
194 for (i = self->length, p = self->data + clearto; --i >= clearto; p++) {
195 Py_CLEAR(*p);
196 }
197 self->length = clearto;
198
199 return 0;
200}
201
202static int
203Pdata_grow(Pdata *self)
204{
205 int bigger;
206 size_t nbytes;
207 PyObject **tmp;
208
209 bigger = (self->size << 1) + 1;
210 if (bigger <= 0) /* was 0, or new value overflows */
211 goto nomemory;
212 if ((int)(size_t)bigger != bigger)
213 goto nomemory;
214 nbytes = (size_t)bigger * sizeof(PyObject *);
215 if (nbytes / sizeof(PyObject *) != (size_t)bigger)
216 goto nomemory;
217 tmp = PyMem_Realloc(self->data, nbytes);
218 if (tmp == NULL)
219 goto nomemory;
220 self->data = tmp;
221 self->size = bigger;
222 return 0;
223
224 nomemory:
225 PyErr_NoMemory();
226 return -1;
227}
228
229/* D is a Pdata*. Pop the topmost element and store it into V, which
230 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
231 * is raised and V is set to NULL.
232 */
233static PyObject *
234Pdata_pop(Pdata *self)
235{
236 if (self->length == 0) {
237 PyErr_SetString(UnpicklingError, "bad pickle data");
238 return NULL;
239 }
240 return self->data[--(self->length)];
241}
242#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
243
244static int
245Pdata_push(Pdata *self, PyObject *obj)
246{
247 if (self->length == self->size && Pdata_grow(self) < 0) {
248 return -1;
249 }
250 self->data[self->length++] = obj;
251 return 0;
252}
253
254/* Push an object on stack, transferring its ownership to the stack. */
255#define PDATA_PUSH(D, O, ER) do { \
256 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
257
258/* Push an object on stack, adding a new reference to the object. */
259#define PDATA_APPEND(D, O, ER) do { \
260 Py_INCREF((O)); \
261 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
262
263static PyObject *
264Pdata_poptuple(Pdata *self, Py_ssize_t start)
265{
266 PyObject *tuple;
267 Py_ssize_t len, i, j;
268
269 len = self->length - start;
270 tuple = PyTuple_New(len);
271 if (tuple == NULL)
272 return NULL;
273 for (i = start, j = 0; j < len; i++, j++)
274 PyTuple_SET_ITEM(tuple, j, self->data[i]);
275
276 self->length = start;
277 return tuple;
278}
279
280static PyObject *
281Pdata_poplist(Pdata *self, Py_ssize_t start)
282{
283 PyObject *list;
284 Py_ssize_t len, i, j;
285
286 len = self->length - start;
287 list = PyList_New(len);
288 if (list == NULL)
289 return NULL;
290 for (i = start, j = 0; j < len; i++, j++)
291 PyList_SET_ITEM(list, j, self->data[i]);
292
293 self->length = start;
294 return list;
295}
296
297typedef struct PicklerObject {
298 PyObject_HEAD
299 PyObject *write; /* write() method of the output stream */
300 PyObject *memo; /* Memo dictionary, keep track of the seen
301 objects to support self-referential objects
302 pickling. */
303 PyObject *pers_func; /* persistent_id() method, can be NULL */
304 PyObject *arg;
305 int proto; /* Pickle protocol number, >= 0 */
306 int bin; /* Boolean, true if proto > 0 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000307 int buf_size; /* Size of the current buffered pickle data */
308 char *write_buf; /* Write buffer, this is to avoid calling the
309 write() method of the output stream too
310 often. */
311 int fast; /* Enable fast mode if set to a true value.
312 The fast mode disable the usage of memo,
313 therefore speeding the pickling process by
314 not generating superfluous PUT opcodes. It
315 should not be used if with self-referential
316 objects. */
317 int fast_nesting;
318 PyObject *fast_memo;
319} PicklerObject;
320
321typedef struct UnpicklerObject {
322 PyObject_HEAD
323 Pdata *stack; /* Pickle data stack, store unpickled objects. */
324 PyObject *readline; /* readline() method of the output stream */
325 PyObject *read; /* read() method of the output stream */
326 PyObject *memo; /* Memo dictionary, provide the objects stored
327 using the PUT opcodes. */
328 PyObject *arg;
329 PyObject *pers_func; /* persistent_load() method, can be NULL. */
330 PyObject *last_string; /* Reference to the last string read by the
331 readline() method. */
332 char *buffer; /* Reading buffer. */
333 char *encoding; /* Name of the encoding to be used for
334 decoding strings pickled using Python
335 2.x. The default value is "ASCII" */
336 char *errors; /* Name of errors handling scheme to used when
337 decoding strings. The default value is
338 "strict". */
339 int *marks; /* Mark stack, used for unpickling container
340 objects. */
341 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
342 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
343} UnpicklerObject;
344
345/* Forward declarations */
346static int save(PicklerObject *, PyObject *, int);
347static int save_reduce(PicklerObject *, PyObject *, PyObject *);
348static PyTypeObject Pickler_Type;
349static PyTypeObject Unpickler_Type;
350
351
352/* Helpers for creating the argument tuple passed to functions. This has the
353 performance advantage of calling PyTuple_New() only once. */
354
355#define ARG_TUP(self, obj) do { \
356 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
357 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
358 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
359 } \
360 else { \
361 Py_DECREF((obj)); \
362 } \
363 } while (0)
364
365#define FREE_ARG_TUP(self) do { \
366 if ((self)->arg->ob_refcnt > 1) \
367 Py_CLEAR((self)->arg); \
368 } while (0)
369
370/* A temporary cleaner API for fast single argument function call.
371
372 XXX: Does caching the argument tuple provides any real performance benefits?
373
374 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
375 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
376 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
377 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
378 (i.e, call PyTuple_New() and store the returned value in an array), to save
379 one second (wall clock time). Either ways, the loading time a pickle stream
380 large enough to generate this number of calls would be massively
381 overwhelmed by other factors, like I/O throughput, the GC traversal and
382 object allocation overhead. So, I really doubt these functions provide any
383 real benefits.
384
385 On the other hand, oprofile reports that pickle spends a lot of time in
386 these functions. But, that is probably more related to the function call
387 overhead, than the argument tuple allocation.
388
389 XXX: And, what is the reference behavior of these? Steal, borrow? At first
390 glance, it seems to steal the reference of 'arg' and borrow the reference
391 of 'func'.
392 */
393static PyObject *
394pickler_call(PicklerObject *self, PyObject *func, PyObject *arg)
395{
396 PyObject *result = NULL;
397
398 ARG_TUP(self, arg);
399 if (self->arg) {
400 result = PyObject_Call(func, self->arg, NULL);
401 FREE_ARG_TUP(self);
402 }
403 return result;
404}
405
406static PyObject *
407unpickler_call(UnpicklerObject *self, PyObject *func, PyObject *arg)
408{
409 PyObject *result = NULL;
410
411 ARG_TUP(self, arg);
412 if (self->arg) {
413 result = PyObject_Call(func, self->arg, NULL);
414 FREE_ARG_TUP(self);
415 }
416 return result;
417}
418
419static Py_ssize_t
420pickler_write(PicklerObject *self, const char *s, Py_ssize_t n)
421{
422 PyObject *data, *result;
423
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000424 if (self->write_buf == NULL) {
425 PyErr_SetString(PyExc_SystemError, "invalid write buffer");
426 return -1;
427 }
428
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000429 if (s == NULL) {
430 if (!(self->buf_size))
431 return 0;
432 data = PyBytes_FromStringAndSize(self->write_buf, self->buf_size);
433 if (data == NULL)
434 return -1;
435 }
436 else {
437 if (self->buf_size && (n + self->buf_size) > WRITE_BUF_SIZE) {
438 if (pickler_write(self, NULL, 0) < 0)
439 return -1;
440 }
441
442 if (n > WRITE_BUF_SIZE) {
443 if (!(data = PyBytes_FromStringAndSize(s, n)))
444 return -1;
445 }
446 else {
447 memcpy(self->write_buf + self->buf_size, s, n);
448 self->buf_size += n;
449 return n;
450 }
451 }
452
453 /* object with write method */
454 result = pickler_call(self, self->write, data);
455 if (result == NULL)
456 return -1;
457
458 Py_DECREF(result);
459 self->buf_size = 0;
460 return n;
461}
462
463/* XXX: These read/readline functions ought to be optimized. Buffered I/O
464 might help a lot, especially with the new (but much slower) io library.
465 On the other hand, the added complexity might not worth it.
466 */
467
468/* Read at least n characters from the input stream and set s to the current
469 reading position. */
470static Py_ssize_t
471unpickler_read(UnpicklerObject *self, char **s, Py_ssize_t n)
472{
473 PyObject *len;
474 PyObject *data;
475
476 len = PyLong_FromSsize_t(n);
477 if (len == NULL)
478 return -1;
479
480 data = unpickler_call(self, self->read, len);
481 if (data == NULL)
482 return -1;
483
484 /* XXX: Should bytearray be supported too? */
485 if (!PyBytes_Check(data)) {
486 PyErr_SetString(PyExc_ValueError,
487 "read() from the underlying stream did not"
488 "return bytes");
489 return -1;
490 }
491
492 Py_XDECREF(self->last_string);
493 self->last_string = data;
494
495 if (!(*s = PyBytes_AS_STRING(data)))
496 return -1;
497
498 return n;
499}
500
501static Py_ssize_t
502unpickler_readline(UnpicklerObject *self, char **s)
503{
504 PyObject *data;
505
506 data = PyObject_CallObject(self->readline, empty_tuple);
507 if (data == NULL)
508 return -1;
509
510 /* XXX: Should bytearray be supported too? */
511 if (!PyBytes_Check(data)) {
512 PyErr_SetString(PyExc_ValueError,
513 "readline() from the underlying stream did not"
514 "return bytes");
515 return -1;
516 }
517
518 Py_XDECREF(self->last_string);
519 self->last_string = data;
520
521 if (!(*s = PyBytes_AS_STRING(data)))
522 return -1;
523
524 return PyBytes_GET_SIZE(data);
525}
526
527/* Generate a GET opcode for an object stored in the memo. The 'key' argument
528 should be the address of the object as returned by PyLong_FromVoidPtr(). */
529static int
530memo_get(PicklerObject *self, PyObject *key)
531{
532 PyObject *value;
533 PyObject *memo_id;
534 long x;
535 char pdata[30];
536 int len;
537
538 value = PyDict_GetItemWithError(self->memo, key);
539 if (value == NULL) {
540 if (!PyErr_Occurred())
541 PyErr_SetObject(PyExc_KeyError, key);
542 return -1;
543 }
544
545 memo_id = PyTuple_GetItem(value, 0);
546 if (memo_id == NULL)
547 return -1;
548
549 if (!PyLong_Check(memo_id)) {
550 PyErr_SetString(PicklingError, "memo id must be an integer");
551 return -1;
552 }
553 x = PyLong_AsLong(memo_id);
554 if (x == -1 && PyErr_Occurred())
555 return -1;
556
557 if (!self->bin) {
558 pdata[0] = GET;
559 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
560 len = (int)strlen(pdata);
561 }
562 else {
563 if (x < 256) {
564 pdata[0] = BINGET;
565 pdata[1] = (unsigned char)(x & 0xff);
566 len = 2;
567 }
568 else if (x <= 0xffffffffL) {
569 pdata[0] = LONG_BINGET;
570 pdata[1] = (unsigned char)(x & 0xff);
571 pdata[2] = (unsigned char)((x >> 8) & 0xff);
572 pdata[3] = (unsigned char)((x >> 16) & 0xff);
573 pdata[4] = (unsigned char)((x >> 24) & 0xff);
574 len = 5;
575 }
576 else { /* unlikely */
577 PyErr_SetString(PicklingError,
578 "memo id too large for LONG_BINGET");
579 return -1;
580 }
581 }
582
583 if (pickler_write(self, pdata, len) < 0)
584 return -1;
585
586 return 0;
587}
588
589/* Store an object in the memo, assign it a new unique ID based on the number
590 of objects currently stored in the memo and generate a PUT opcode. */
591static int
592memo_put(PicklerObject *self, PyObject *obj)
593{
594 PyObject *key = NULL;
595 PyObject *memo_id = NULL;
596 PyObject *tuple = NULL;
597 long x;
598 char pdata[30];
599 int len;
600 int status = 0;
601
602 if (self->fast)
603 return 0;
604
605 key = PyLong_FromVoidPtr(obj);
606 if (key == NULL)
607 goto error;
608 if ((x = PyDict_Size(self->memo)) < 0)
609 goto error;
610 memo_id = PyLong_FromLong(x);
611 if (memo_id == NULL)
612 goto error;
613 tuple = PyTuple_New(2);
614 if (tuple == NULL)
615 goto error;
616
617 Py_INCREF(memo_id);
618 PyTuple_SET_ITEM(tuple, 0, memo_id);
619 Py_INCREF(obj);
620 PyTuple_SET_ITEM(tuple, 1, obj);
621 if (PyDict_SetItem(self->memo, key, tuple) < 0)
622 goto error;
623
624 if (!self->bin) {
625 pdata[0] = PUT;
626 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
627 len = strlen(pdata);
628 }
629 else {
630 if (x < 256) {
631 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +0000632 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000633 len = 2;
634 }
635 else if (x <= 0xffffffffL) {
636 pdata[0] = LONG_BINPUT;
637 pdata[1] = (unsigned char)(x & 0xff);
638 pdata[2] = (unsigned char)((x >> 8) & 0xff);
639 pdata[3] = (unsigned char)((x >> 16) & 0xff);
640 pdata[4] = (unsigned char)((x >> 24) & 0xff);
641 len = 5;
642 }
643 else { /* unlikely */
644 PyErr_SetString(PicklingError,
645 "memo id too large for LONG_BINPUT");
646 return -1;
647 }
648 }
649
650 if (pickler_write(self, pdata, len) < 0)
651 goto error;
652
653 if (0) {
654 error:
655 status = -1;
656 }
657
658 Py_XDECREF(key);
659 Py_XDECREF(memo_id);
660 Py_XDECREF(tuple);
661
662 return status;
663}
664
665static PyObject *
666whichmodule(PyObject *global, PyObject *global_name)
667{
668 Py_ssize_t i, j;
669 static PyObject *module_str = NULL;
670 static PyObject *main_str = NULL;
671 PyObject *module_name;
672 PyObject *modules_dict;
673 PyObject *module;
674 PyObject *obj;
675
676 if (module_str == NULL) {
677 module_str = PyUnicode_InternFromString("__module__");
678 if (module_str == NULL)
679 return NULL;
680 main_str = PyUnicode_InternFromString("__main__");
681 if (main_str == NULL)
682 return NULL;
683 }
684
685 module_name = PyObject_GetAttr(global, module_str);
686
687 /* In some rare cases (e.g., random.getrandbits), __module__ can be
688 None. If it is so, then search sys.modules for the module of
689 global. */
690 if (module_name == Py_None) {
691 Py_DECREF(module_name);
692 goto search;
693 }
694
695 if (module_name) {
696 return module_name;
697 }
698 if (PyErr_ExceptionMatches(PyExc_AttributeError))
699 PyErr_Clear();
700 else
701 return NULL;
702
703 search:
704 modules_dict = PySys_GetObject("modules");
705 if (modules_dict == NULL)
706 return NULL;
707
708 i = 0;
709 module_name = NULL;
710 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
711 if (PyObject_Compare(module_name, main_str) == 0)
712 continue;
713
714 obj = PyObject_GetAttr(module, global_name);
715 if (obj == NULL) {
716 if (PyErr_ExceptionMatches(PyExc_AttributeError))
717 PyErr_Clear();
718 else
719 return NULL;
720 continue;
721 }
722
723 if (obj != global) {
724 Py_DECREF(obj);
725 continue;
726 }
727
728 Py_DECREF(obj);
729 break;
730 }
731
732 /* If no module is found, use __main__. */
733 if (!j) {
734 module_name = main_str;
735 }
736
737 Py_INCREF(module_name);
738 return module_name;
739}
740
741/* fast_save_enter() and fast_save_leave() are guards against recursive
742 objects when Pickler is used with the "fast mode" (i.e., with object
743 memoization disabled). If the nesting of a list or dict object exceed
744 FAST_NESTING_LIMIT, these guards will start keeping an internal
745 reference to the seen list or dict objects and check whether these objects
746 are recursive. These are not strictly necessary, since save() has a
747 hard-coded recursion limit, but they give a nicer error message than the
748 typical RuntimeError. */
749static int
750fast_save_enter(PicklerObject *self, PyObject *obj)
751{
752 /* if fast_nesting < 0, we're doing an error exit. */
753 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
754 PyObject *key = NULL;
755 if (self->fast_memo == NULL) {
756 self->fast_memo = PyDict_New();
757 if (self->fast_memo == NULL) {
758 self->fast_nesting = -1;
759 return 0;
760 }
761 }
762 key = PyLong_FromVoidPtr(obj);
763 if (key == NULL)
764 return 0;
765 if (PyDict_GetItem(self->fast_memo, key)) {
766 Py_DECREF(key);
767 PyErr_Format(PyExc_ValueError,
768 "fast mode: can't pickle cyclic objects "
769 "including object type %.200s at %p",
770 obj->ob_type->tp_name, obj);
771 self->fast_nesting = -1;
772 return 0;
773 }
774 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
775 Py_DECREF(key);
776 self->fast_nesting = -1;
777 return 0;
778 }
779 Py_DECREF(key);
780 }
781 return 1;
782}
783
784static int
785fast_save_leave(PicklerObject *self, PyObject *obj)
786{
787 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
788 PyObject *key = PyLong_FromVoidPtr(obj);
789 if (key == NULL)
790 return 0;
791 if (PyDict_DelItem(self->fast_memo, key) < 0) {
792 Py_DECREF(key);
793 return 0;
794 }
795 Py_DECREF(key);
796 }
797 return 1;
798}
799
800static int
801save_none(PicklerObject *self, PyObject *obj)
802{
803 const char none_op = NONE;
804 if (pickler_write(self, &none_op, 1) < 0)
805 return -1;
806
807 return 0;
808}
809
810static int
811save_bool(PicklerObject *self, PyObject *obj)
812{
813 static const char *buf[2] = { FALSE, TRUE };
814 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
815 int p = (obj == Py_True);
816
817 if (self->proto >= 2) {
818 const char bool_op = p ? NEWTRUE : NEWFALSE;
819 if (pickler_write(self, &bool_op, 1) < 0)
820 return -1;
821 }
822 else if (pickler_write(self, buf[p], len[p]) < 0)
823 return -1;
824
825 return 0;
826}
827
828static int
829save_int(PicklerObject *self, long x)
830{
831 char pdata[32];
832 int len = 0;
833
834 if (!self->bin
835#if SIZEOF_LONG > 4
836 || x > 0x7fffffffL || x < -0x80000000L
837#endif
838 ) {
839 /* Text-mode pickle, or long too big to fit in the 4-byte
840 * signed BININT format: store as a string.
841 */
842 pdata[0] = LONG; /* use LONG for consistence with pickle.py */
843 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
844 if (pickler_write(self, pdata, strlen(pdata)) < 0)
845 return -1;
846 }
847 else {
848 /* Binary pickle and x fits in a signed 4-byte int. */
849 pdata[1] = (unsigned char)(x & 0xff);
850 pdata[2] = (unsigned char)((x >> 8) & 0xff);
851 pdata[3] = (unsigned char)((x >> 16) & 0xff);
852 pdata[4] = (unsigned char)((x >> 24) & 0xff);
853
854 if ((pdata[4] == 0) && (pdata[3] == 0)) {
855 if (pdata[2] == 0) {
856 pdata[0] = BININT1;
857 len = 2;
858 }
859 else {
860 pdata[0] = BININT2;
861 len = 3;
862 }
863 }
864 else {
865 pdata[0] = BININT;
866 len = 5;
867 }
868
869 if (pickler_write(self, pdata, len) < 0)
870 return -1;
871 }
872
873 return 0;
874}
875
876static int
877save_long(PicklerObject *self, PyObject *obj)
878{
879 PyObject *repr = NULL;
880 Py_ssize_t size;
881 long val = PyLong_AsLong(obj);
882 int status = 0;
883
884 const char long_op = LONG;
885
886 if (val == -1 && PyErr_Occurred()) {
887 /* out of range for int pickling */
888 PyErr_Clear();
889 }
890 else
891 return save_int(self, val);
892
893 if (self->proto >= 2) {
894 /* Linear-time pickling. */
895 size_t nbits;
896 size_t nbytes;
897 unsigned char *pdata;
898 char header[5];
899 int i;
900 int sign = _PyLong_Sign(obj);
901
902 if (sign == 0) {
903 header[0] = LONG1;
904 header[1] = 0; /* It's 0 -- an empty bytestring. */
905 if (pickler_write(self, header, 2) < 0)
906 goto error;
907 return 0;
908 }
909 nbits = _PyLong_NumBits(obj);
910 if (nbits == (size_t)-1 && PyErr_Occurred())
911 goto error;
912 /* How many bytes do we need? There are nbits >> 3 full
913 * bytes of data, and nbits & 7 leftover bits. If there
914 * are any leftover bits, then we clearly need another
915 * byte. Wnat's not so obvious is that we *probably*
916 * need another byte even if there aren't any leftovers:
917 * the most-significant bit of the most-significant byte
918 * acts like a sign bit, and it's usually got a sense
919 * opposite of the one we need. The exception is longs
920 * of the form -(2**(8*j-1)) for j > 0. Such a long is
921 * its own 256's-complement, so has the right sign bit
922 * even without the extra byte. That's a pain to check
923 * for in advance, though, so we always grab an extra
924 * byte at the start, and cut it back later if possible.
925 */
926 nbytes = (nbits >> 3) + 1;
927 if (nbytes > INT_MAX) {
928 PyErr_SetString(PyExc_OverflowError,
929 "long too large to pickle");
930 goto error;
931 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +0000932 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000933 if (repr == NULL)
934 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +0000935 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000936 i = _PyLong_AsByteArray((PyLongObject *)obj,
937 pdata, nbytes,
938 1 /* little endian */ , 1 /* signed */ );
939 if (i < 0)
940 goto error;
941 /* If the long is negative, this may be a byte more than
942 * needed. This is so iff the MSB is all redundant sign
943 * bits.
944 */
945 if (sign < 0 &&
946 nbytes > 1 &&
947 pdata[nbytes - 1] == 0xff &&
948 (pdata[nbytes - 2] & 0x80) != 0) {
949 nbytes--;
950 }
951
952 if (nbytes < 256) {
953 header[0] = LONG1;
954 header[1] = (unsigned char)nbytes;
955 size = 2;
956 }
957 else {
958 header[0] = LONG4;
959 size = (int)nbytes;
960 for (i = 1; i < 5; i++) {
961 header[i] = (unsigned char)(size & 0xff);
962 size >>= 8;
963 }
964 size = 5;
965 }
966 if (pickler_write(self, header, size) < 0 ||
967 pickler_write(self, (char *)pdata, (int)nbytes) < 0)
968 goto error;
969 }
970 else {
971 char *string;
972
973 /* proto < 2: write the repr and newline. This is quadratic-time
974 (in the number of digits), in both directions. */
975
976 repr = PyObject_Repr(obj);
977 if (repr == NULL)
978 goto error;
979
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +0000980 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000981 if (string == NULL)
982 goto error;
983
984 if (pickler_write(self, &long_op, 1) < 0 ||
985 pickler_write(self, string, size) < 0 ||
986 pickler_write(self, "\n", 1) < 0)
987 goto error;
988 }
989
990 if (0) {
991 error:
992 status = -1;
993 }
994 Py_XDECREF(repr);
995
996 return status;
997}
998
999static int
1000save_float(PicklerObject *self, PyObject *obj)
1001{
1002 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1003
1004 if (self->bin) {
1005 char pdata[9];
1006 pdata[0] = BINFLOAT;
1007 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1008 return -1;
1009 if (pickler_write(self, pdata, 9) < 0)
1010 return -1;
1011 }
1012 else {
1013 char pdata[250];
1014 pdata[0] = FLOAT;
1015 PyOS_ascii_formatd(pdata + 1, sizeof(pdata) - 2, "%.17g", x);
1016 /* Extend the formatted string with a newline character */
1017 strcat(pdata, "\n");
1018
1019 if (pickler_write(self, pdata, strlen(pdata)) < 0)
1020 return -1;
1021 }
1022
1023 return 0;
1024}
1025
1026static int
1027save_bytes(PicklerObject *self, PyObject *obj)
1028{
1029 if (self->proto < 3) {
1030 /* Older pickle protocols do not have an opcode for pickling bytes
1031 objects. Therefore, we need to fake the copy protocol (i.e.,
1032 the __reduce__ method) to permit bytes object unpickling. */
1033 PyObject *reduce_value = NULL;
1034 PyObject *bytelist = NULL;
1035 int status;
1036
1037 bytelist = PySequence_List(obj);
1038 if (bytelist == NULL)
1039 return -1;
1040
1041 reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
1042 bytelist);
1043 if (reduce_value == NULL) {
1044 Py_DECREF(bytelist);
1045 return -1;
1046 }
1047
1048 /* save_reduce() will memoize the object automatically. */
1049 status = save_reduce(self, reduce_value, obj);
1050 Py_DECREF(reduce_value);
1051 Py_DECREF(bytelist);
1052 return status;
1053 }
1054 else {
1055 Py_ssize_t size;
1056 char header[5];
1057 int len;
1058
1059 size = PyBytes_Size(obj);
1060 if (size < 0)
1061 return -1;
1062
1063 if (size < 256) {
1064 header[0] = SHORT_BINBYTES;
1065 header[1] = (unsigned char)size;
1066 len = 2;
1067 }
1068 else if (size <= 0xffffffffL) {
1069 header[0] = BINBYTES;
1070 header[1] = (unsigned char)(size & 0xff);
1071 header[2] = (unsigned char)((size >> 8) & 0xff);
1072 header[3] = (unsigned char)((size >> 16) & 0xff);
1073 header[4] = (unsigned char)((size >> 24) & 0xff);
1074 len = 5;
1075 }
1076 else {
1077 return -1; /* string too large */
1078 }
1079
1080 if (pickler_write(self, header, len) < 0)
1081 return -1;
1082
1083 if (pickler_write(self, PyBytes_AS_STRING(obj), size) < 0)
1084 return -1;
1085
1086 if (memo_put(self, obj) < 0)
1087 return -1;
1088
1089 return 0;
1090 }
1091}
1092
1093/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1094 backslash and newline characters to \uXXXX escapes. */
1095static PyObject *
1096raw_unicode_escape(const Py_UNICODE *s, Py_ssize_t size)
1097{
1098 PyObject *repr, *result;
1099 char *p;
1100 char *q;
1101
1102 static const char *hexdigits = "0123456789abcdef";
1103
1104#ifdef Py_UNICODE_WIDE
1105 repr = PyBytes_FromStringAndSize(NULL, 10 * size);
1106#else
1107 repr = PyBytes_FromStringAndSize(NULL, 6 * size);
1108#endif
1109 if (repr == NULL)
1110 return NULL;
1111 if (size == 0)
1112 goto done;
1113
1114 p = q = PyBytes_AS_STRING(repr);
1115 while (size-- > 0) {
1116 Py_UNICODE ch = *s++;
1117#ifdef Py_UNICODE_WIDE
1118 /* Map 32-bit characters to '\Uxxxxxxxx' */
1119 if (ch >= 0x10000) {
1120 *p++ = '\\';
1121 *p++ = 'U';
1122 *p++ = hexdigits[(ch >> 28) & 0xf];
1123 *p++ = hexdigits[(ch >> 24) & 0xf];
1124 *p++ = hexdigits[(ch >> 20) & 0xf];
1125 *p++ = hexdigits[(ch >> 16) & 0xf];
1126 *p++ = hexdigits[(ch >> 12) & 0xf];
1127 *p++ = hexdigits[(ch >> 8) & 0xf];
1128 *p++ = hexdigits[(ch >> 4) & 0xf];
1129 *p++ = hexdigits[ch & 15];
1130 }
1131 else
1132#endif
1133 /* Map 16-bit characters to '\uxxxx' */
1134 if (ch >= 256 || ch == '\\' || ch == '\n') {
1135 *p++ = '\\';
1136 *p++ = 'u';
1137 *p++ = hexdigits[(ch >> 12) & 0xf];
1138 *p++ = hexdigits[(ch >> 8) & 0xf];
1139 *p++ = hexdigits[(ch >> 4) & 0xf];
1140 *p++ = hexdigits[ch & 15];
1141 }
1142 /* Copy everything else as-is */
1143 else
1144 *p++ = (char) ch;
1145 }
1146 size = p - q;
1147
1148 done:
1149 result = PyBytes_FromStringAndSize(PyBytes_AS_STRING(repr), size);
1150 Py_DECREF(repr);
1151 return result;
1152}
1153
1154static int
1155save_unicode(PicklerObject *self, PyObject *obj)
1156{
1157 Py_ssize_t size;
1158 PyObject *encoded = NULL;
1159
1160 if (self->bin) {
1161 char pdata[5];
1162
1163 encoded = PyUnicode_AsUTF8String(obj);
1164 if (encoded == NULL)
1165 goto error;
1166
1167 size = PyBytes_GET_SIZE(encoded);
1168 if (size < 0 || size > 0xffffffffL)
1169 goto error; /* string too large */
1170
1171 pdata[0] = BINUNICODE;
1172 pdata[1] = (unsigned char)(size & 0xff);
1173 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1174 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1175 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1176
1177 if (pickler_write(self, pdata, 5) < 0)
1178 goto error;
1179
1180 if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
1181 goto error;
1182 }
1183 else {
1184 const char unicode_op = UNICODE;
1185
1186 encoded = raw_unicode_escape(PyUnicode_AS_UNICODE(obj),
1187 PyUnicode_GET_SIZE(obj));
1188 if (encoded == NULL)
1189 goto error;
1190
1191 if (pickler_write(self, &unicode_op, 1) < 0)
1192 goto error;
1193
1194 size = PyBytes_GET_SIZE(encoded);
1195 if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
1196 goto error;
1197
1198 if (pickler_write(self, "\n", 1) < 0)
1199 goto error;
1200 }
1201 if (memo_put(self, obj) < 0)
1202 goto error;
1203
1204 Py_DECREF(encoded);
1205 return 0;
1206
1207 error:
1208 Py_XDECREF(encoded);
1209 return -1;
1210}
1211
1212/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1213static int
1214store_tuple_elements(PicklerObject *self, PyObject *t, int len)
1215{
1216 int i;
1217
1218 assert(PyTuple_Size(t) == len);
1219
1220 for (i = 0; i < len; i++) {
1221 PyObject *element = PyTuple_GET_ITEM(t, i);
1222
1223 if (element == NULL)
1224 return -1;
1225 if (save(self, element, 0) < 0)
1226 return -1;
1227 }
1228
1229 return 0;
1230}
1231
1232/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1233 * used across protocols to minimize the space needed to pickle them.
1234 * Tuples are also the only builtin immutable type that can be recursive
1235 * (a tuple can be reached from itself), and that requires some subtle
1236 * magic so that it works in all cases. IOW, this is a long routine.
1237 */
1238static int
1239save_tuple(PicklerObject *self, PyObject *obj)
1240{
1241 PyObject *memo_key = NULL;
1242 int len, i;
1243 int status = 0;
1244
1245 const char mark_op = MARK;
1246 const char tuple_op = TUPLE;
1247 const char pop_op = POP;
1248 const char pop_mark_op = POP_MARK;
1249 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
1250
1251 if ((len = PyTuple_Size(obj)) < 0)
1252 return -1;
1253
1254 if (len == 0) {
1255 char pdata[2];
1256
1257 if (self->proto) {
1258 pdata[0] = EMPTY_TUPLE;
1259 len = 1;
1260 }
1261 else {
1262 pdata[0] = MARK;
1263 pdata[1] = TUPLE;
1264 len = 2;
1265 }
1266 if (pickler_write(self, pdata, len) < 0)
1267 return -1;
1268 return 0;
1269 }
1270
1271 /* id(tuple) isn't in the memo now. If it shows up there after
1272 * saving the tuple elements, the tuple must be recursive, in
1273 * which case we'll pop everything we put on the stack, and fetch
1274 * its value from the memo.
1275 */
1276 memo_key = PyLong_FromVoidPtr(obj);
1277 if (memo_key == NULL)
1278 return -1;
1279
1280 if (len <= 3 && self->proto >= 2) {
1281 /* Use TUPLE{1,2,3} opcodes. */
1282 if (store_tuple_elements(self, obj, len) < 0)
1283 goto error;
1284
1285 if (PyDict_GetItem(self->memo, memo_key)) {
1286 /* pop the len elements */
1287 for (i = 0; i < len; i++)
1288 if (pickler_write(self, &pop_op, 1) < 0)
1289 goto error;
1290 /* fetch from memo */
1291 if (memo_get(self, memo_key) < 0)
1292 goto error;
1293
1294 Py_DECREF(memo_key);
1295 return 0;
1296 }
1297 else { /* Not recursive. */
1298 if (pickler_write(self, len2opcode + len, 1) < 0)
1299 goto error;
1300 }
1301 goto memoize;
1302 }
1303
1304 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
1305 * Generate MARK e1 e2 ... TUPLE
1306 */
1307 if (pickler_write(self, &mark_op, 1) < 0)
1308 goto error;
1309
1310 if (store_tuple_elements(self, obj, len) < 0)
1311 goto error;
1312
1313 if (PyDict_GetItem(self->memo, memo_key)) {
1314 /* pop the stack stuff we pushed */
1315 if (self->bin) {
1316 if (pickler_write(self, &pop_mark_op, 1) < 0)
1317 goto error;
1318 }
1319 else {
1320 /* Note that we pop one more than len, to remove
1321 * the MARK too.
1322 */
1323 for (i = 0; i <= len; i++)
1324 if (pickler_write(self, &pop_op, 1) < 0)
1325 goto error;
1326 }
1327 /* fetch from memo */
1328 if (memo_get(self, memo_key) < 0)
1329 goto error;
1330
1331 Py_DECREF(memo_key);
1332 return 0;
1333 }
1334 else { /* Not recursive. */
1335 if (pickler_write(self, &tuple_op, 1) < 0)
1336 goto error;
1337 }
1338
1339 memoize:
1340 if (memo_put(self, obj) < 0)
1341 goto error;
1342
1343 if (0) {
1344 error:
1345 status = -1;
1346 }
1347
1348 Py_DECREF(memo_key);
1349 return status;
1350}
1351
1352/* iter is an iterator giving items, and we batch up chunks of
1353 * MARK item item ... item APPENDS
1354 * opcode sequences. Calling code should have arranged to first create an
1355 * empty list, or list-like object, for the APPENDS to operate on.
1356 * Returns 0 on success, <0 on error.
1357 */
1358static int
1359batch_list(PicklerObject *self, PyObject *iter)
1360{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001361 PyObject *obj = NULL;
1362 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001363 int i, n;
1364
1365 const char mark_op = MARK;
1366 const char append_op = APPEND;
1367 const char appends_op = APPENDS;
1368
1369 assert(iter != NULL);
1370
1371 /* XXX: I think this function could be made faster by avoiding the
1372 iterator interface and fetching objects directly from list using
1373 PyList_GET_ITEM.
1374 */
1375
1376 if (self->proto == 0) {
1377 /* APPENDS isn't available; do one at a time. */
1378 for (;;) {
1379 obj = PyIter_Next(iter);
1380 if (obj == NULL) {
1381 if (PyErr_Occurred())
1382 return -1;
1383 break;
1384 }
1385 i = save(self, obj, 0);
1386 Py_DECREF(obj);
1387 if (i < 0)
1388 return -1;
1389 if (pickler_write(self, &append_op, 1) < 0)
1390 return -1;
1391 }
1392 return 0;
1393 }
1394
1395 /* proto > 0: write in batches of BATCHSIZE. */
1396 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001397 /* Get first item */
1398 firstitem = PyIter_Next(iter);
1399 if (firstitem == NULL) {
1400 if (PyErr_Occurred())
1401 goto error;
1402
1403 /* nothing more to add */
1404 break;
1405 }
1406
1407 /* Try to get a second item */
1408 obj = PyIter_Next(iter);
1409 if (obj == NULL) {
1410 if (PyErr_Occurred())
1411 goto error;
1412
1413 /* Only one item to write */
1414 if (save(self, firstitem, 0) < 0)
1415 goto error;
1416 if (pickler_write(self, &append_op, 1) < 0)
1417 goto error;
1418 Py_CLEAR(firstitem);
1419 break;
1420 }
1421
1422 /* More than one item to write */
1423
1424 /* Pump out MARK, items, APPENDS. */
1425 if (pickler_write(self, &mark_op, 1) < 0)
1426 goto error;
1427
1428 if (save(self, firstitem, 0) < 0)
1429 goto error;
1430 Py_CLEAR(firstitem);
1431 n = 1;
1432
1433 /* Fetch and save up to BATCHSIZE items */
1434 while (obj) {
1435 if (save(self, obj, 0) < 0)
1436 goto error;
1437 Py_CLEAR(obj);
1438 n += 1;
1439
1440 if (n == BATCHSIZE)
1441 break;
1442
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001443 obj = PyIter_Next(iter);
1444 if (obj == NULL) {
1445 if (PyErr_Occurred())
1446 goto error;
1447 break;
1448 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001449 }
1450
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001451 if (pickler_write(self, &appends_op, 1) < 0)
1452 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001453
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001454 } while (n == BATCHSIZE);
1455 return 0;
1456
1457 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001458 Py_XDECREF(firstitem);
1459 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001460 return -1;
1461}
1462
1463static int
1464save_list(PicklerObject *self, PyObject *obj)
1465{
1466 PyObject *iter;
1467 char header[3];
1468 int len;
1469 int status = 0;
1470
1471 if (self->fast && !fast_save_enter(self, obj))
1472 goto error;
1473
1474 /* Create an empty list. */
1475 if (self->bin) {
1476 header[0] = EMPTY_LIST;
1477 len = 1;
1478 }
1479 else {
1480 header[0] = MARK;
1481 header[1] = LIST;
1482 len = 2;
1483 }
1484
1485 if (pickler_write(self, header, len) < 0)
1486 goto error;
1487
1488 /* Get list length, and bow out early if empty. */
1489 if ((len = PyList_Size(obj)) < 0)
1490 goto error;
1491
1492 if (memo_put(self, obj) < 0)
1493 goto error;
1494
1495 if (len != 0) {
1496 /* Save the list elements. */
1497 iter = PyObject_GetIter(obj);
1498 if (iter == NULL)
1499 goto error;
1500 status = batch_list(self, iter);
1501 Py_DECREF(iter);
1502 }
1503
1504 if (0) {
1505 error:
1506 status = -1;
1507 }
1508
1509 if (self->fast && !fast_save_leave(self, obj))
1510 status = -1;
1511
1512 return status;
1513}
1514
1515/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
1516 * MARK key value ... key value SETITEMS
1517 * opcode sequences. Calling code should have arranged to first create an
1518 * empty dict, or dict-like object, for the SETITEMS to operate on.
1519 * Returns 0 on success, <0 on error.
1520 *
1521 * This is very much like batch_list(). The difference between saving
1522 * elements directly, and picking apart two-tuples, is so long-winded at
1523 * the C level, though, that attempts to combine these routines were too
1524 * ugly to bear.
1525 */
1526static int
1527batch_dict(PicklerObject *self, PyObject *iter)
1528{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001529 PyObject *obj = NULL;
1530 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001531 int i, n;
1532
1533 const char mark_op = MARK;
1534 const char setitem_op = SETITEM;
1535 const char setitems_op = SETITEMS;
1536
1537 assert(iter != NULL);
1538
1539 if (self->proto == 0) {
1540 /* SETITEMS isn't available; do one at a time. */
1541 for (;;) {
1542 obj = PyIter_Next(iter);
1543 if (obj == NULL) {
1544 if (PyErr_Occurred())
1545 return -1;
1546 break;
1547 }
1548 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
1549 PyErr_SetString(PyExc_TypeError, "dict items "
1550 "iterator must return 2-tuples");
1551 return -1;
1552 }
1553 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
1554 if (i >= 0)
1555 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
1556 Py_DECREF(obj);
1557 if (i < 0)
1558 return -1;
1559 if (pickler_write(self, &setitem_op, 1) < 0)
1560 return -1;
1561 }
1562 return 0;
1563 }
1564
1565 /* proto > 0: write in batches of BATCHSIZE. */
1566 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001567 /* Get first item */
1568 firstitem = PyIter_Next(iter);
1569 if (firstitem == NULL) {
1570 if (PyErr_Occurred())
1571 goto error;
1572
1573 /* nothing more to add */
1574 break;
1575 }
1576 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
1577 PyErr_SetString(PyExc_TypeError, "dict items "
1578 "iterator must return 2-tuples");
1579 goto error;
1580 }
1581
1582 /* Try to get a second item */
1583 obj = PyIter_Next(iter);
1584 if (obj == NULL) {
1585 if (PyErr_Occurred())
1586 goto error;
1587
1588 /* Only one item to write */
1589 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
1590 goto error;
1591 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
1592 goto error;
1593 if (pickler_write(self, &setitem_op, 1) < 0)
1594 goto error;
1595 Py_CLEAR(firstitem);
1596 break;
1597 }
1598
1599 /* More than one item to write */
1600
1601 /* Pump out MARK, items, SETITEMS. */
1602 if (pickler_write(self, &mark_op, 1) < 0)
1603 goto error;
1604
1605 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
1606 goto error;
1607 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
1608 goto error;
1609 Py_CLEAR(firstitem);
1610 n = 1;
1611
1612 /* Fetch and save up to BATCHSIZE items */
1613 while (obj) {
1614 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
1615 PyErr_SetString(PyExc_TypeError, "dict items "
1616 "iterator must return 2-tuples");
1617 goto error;
1618 }
1619 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
1620 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
1621 goto error;
1622 Py_CLEAR(obj);
1623 n += 1;
1624
1625 if (n == BATCHSIZE)
1626 break;
1627
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001628 obj = PyIter_Next(iter);
1629 if (obj == NULL) {
1630 if (PyErr_Occurred())
1631 goto error;
1632 break;
1633 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001634 }
1635
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001636 if (pickler_write(self, &setitems_op, 1) < 0)
1637 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001638
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001639 } while (n == BATCHSIZE);
1640 return 0;
1641
1642 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001643 Py_XDECREF(firstitem);
1644 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001645 return -1;
1646}
1647
1648static int
1649save_dict(PicklerObject *self, PyObject *obj)
1650{
1651 PyObject *items, *iter;
1652 char header[3];
1653 int len;
1654 int status = 0;
1655
1656 if (self->fast && !fast_save_enter(self, obj))
1657 goto error;
1658
1659 /* Create an empty dict. */
1660 if (self->bin) {
1661 header[0] = EMPTY_DICT;
1662 len = 1;
1663 }
1664 else {
1665 header[0] = MARK;
1666 header[1] = DICT;
1667 len = 2;
1668 }
1669
1670 if (pickler_write(self, header, len) < 0)
1671 goto error;
1672
1673 /* Get dict size, and bow out early if empty. */
1674 if ((len = PyDict_Size(obj)) < 0)
1675 goto error;
1676
1677 if (memo_put(self, obj) < 0)
1678 goto error;
1679
1680 if (len != 0) {
1681 /* Save the dict items. */
1682 items = PyObject_CallMethod(obj, "items", "()");
1683 if (items == NULL)
1684 goto error;
1685 iter = PyObject_GetIter(items);
1686 Py_DECREF(items);
1687 if (iter == NULL)
1688 goto error;
1689 status = batch_dict(self, iter);
1690 Py_DECREF(iter);
1691 }
1692
1693 if (0) {
1694 error:
1695 status = -1;
1696 }
1697
1698 if (self->fast && !fast_save_leave(self, obj))
1699 status = -1;
1700
1701 return status;
1702}
1703
1704static int
1705save_global(PicklerObject *self, PyObject *obj, PyObject *name)
1706{
1707 static PyObject *name_str = NULL;
1708 PyObject *global_name = NULL;
1709 PyObject *module_name = NULL;
1710 PyObject *module = NULL;
1711 PyObject *cls;
1712 int status = 0;
1713
1714 const char global_op = GLOBAL;
1715
1716 if (name_str == NULL) {
1717 name_str = PyUnicode_InternFromString("__name__");
1718 if (name_str == NULL)
1719 goto error;
1720 }
1721
1722 if (name) {
1723 global_name = name;
1724 Py_INCREF(global_name);
1725 }
1726 else {
1727 global_name = PyObject_GetAttr(obj, name_str);
1728 if (global_name == NULL)
1729 goto error;
1730 }
1731
1732 module_name = whichmodule(obj, global_name);
1733 if (module_name == NULL)
1734 goto error;
1735
1736 /* XXX: Change to use the import C API directly with level=0 to disallow
1737 relative imports.
1738
1739 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
1740 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
1741 custom import functions (IMHO, this would be a nice security
1742 feature). The import C API would need to be extended to support the
1743 extra parameters of __import__ to fix that. */
1744 module = PyImport_Import(module_name);
1745 if (module == NULL) {
1746 PyErr_Format(PicklingError,
1747 "Can't pickle %R: import of module %R failed",
1748 obj, module_name);
1749 goto error;
1750 }
1751 cls = PyObject_GetAttr(module, global_name);
1752 if (cls == NULL) {
1753 PyErr_Format(PicklingError,
1754 "Can't pickle %R: attribute lookup %S.%S failed",
1755 obj, module_name, global_name);
1756 goto error;
1757 }
1758 if (cls != obj) {
1759 Py_DECREF(cls);
1760 PyErr_Format(PicklingError,
1761 "Can't pickle %R: it's not the same object as %S.%S",
1762 obj, module_name, global_name);
1763 goto error;
1764 }
1765 Py_DECREF(cls);
1766
1767 if (self->proto >= 2) {
1768 /* See whether this is in the extension registry, and if
1769 * so generate an EXT opcode.
1770 */
1771 PyObject *code_obj; /* extension code as Python object */
1772 long code; /* extension code as C value */
1773 char pdata[5];
1774 int n;
1775
1776 PyTuple_SET_ITEM(two_tuple, 0, module_name);
1777 PyTuple_SET_ITEM(two_tuple, 1, global_name);
1778 code_obj = PyDict_GetItem(extension_registry, two_tuple);
1779 /* The object is not registered in the extension registry.
1780 This is the most likely code path. */
1781 if (code_obj == NULL)
1782 goto gen_global;
1783
1784 /* XXX: pickle.py doesn't check neither the type, nor the range
1785 of the value returned by the extension_registry. It should for
1786 consistency. */
1787
1788 /* Verify code_obj has the right type and value. */
1789 if (!PyLong_Check(code_obj)) {
1790 PyErr_Format(PicklingError,
1791 "Can't pickle %R: extension code %R isn't an integer",
1792 obj, code_obj);
1793 goto error;
1794 }
1795 code = PyLong_AS_LONG(code_obj);
1796 if (code <= 0 || code > 0x7fffffffL) {
1797 PyErr_Format(PicklingError,
1798 "Can't pickle %R: extension code %ld is out of range",
1799 obj, code);
1800 goto error;
1801 }
1802
1803 /* Generate an EXT opcode. */
1804 if (code <= 0xff) {
1805 pdata[0] = EXT1;
1806 pdata[1] = (unsigned char)code;
1807 n = 2;
1808 }
1809 else if (code <= 0xffff) {
1810 pdata[0] = EXT2;
1811 pdata[1] = (unsigned char)(code & 0xff);
1812 pdata[2] = (unsigned char)((code >> 8) & 0xff);
1813 n = 3;
1814 }
1815 else {
1816 pdata[0] = EXT4;
1817 pdata[1] = (unsigned char)(code & 0xff);
1818 pdata[2] = (unsigned char)((code >> 8) & 0xff);
1819 pdata[3] = (unsigned char)((code >> 16) & 0xff);
1820 pdata[4] = (unsigned char)((code >> 24) & 0xff);
1821 n = 5;
1822 }
1823
1824 if (pickler_write(self, pdata, n) < 0)
1825 goto error;
1826 }
1827 else {
1828 /* Generate a normal global opcode if we are using a pickle
1829 protocol <= 2, or if the object is not registered in the
1830 extension registry. */
1831 PyObject *encoded;
1832 PyObject *(*unicode_encoder)(PyObject *);
1833
1834 gen_global:
1835 if (pickler_write(self, &global_op, 1) < 0)
1836 goto error;
1837
1838 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
1839 the module name and the global name using UTF-8. We do so only when
1840 we are using the pickle protocol newer than version 3. This is to
1841 ensure compatibility with older Unpickler running on Python 2.x. */
1842 if (self->proto >= 3) {
1843 unicode_encoder = PyUnicode_AsUTF8String;
1844 }
1845 else {
1846 unicode_encoder = PyUnicode_AsASCIIString;
1847 }
1848
1849 /* Save the name of the module. */
1850 encoded = unicode_encoder(module_name);
1851 if (encoded == NULL) {
1852 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
1853 PyErr_Format(PicklingError,
1854 "can't pickle module identifier '%S' using "
1855 "pickle protocol %i", module_name, self->proto);
1856 goto error;
1857 }
1858 if (pickler_write(self, PyBytes_AS_STRING(encoded),
1859 PyBytes_GET_SIZE(encoded)) < 0) {
1860 Py_DECREF(encoded);
1861 goto error;
1862 }
1863 Py_DECREF(encoded);
1864 if(pickler_write(self, "\n", 1) < 0)
1865 goto error;
1866
1867 /* Save the name of the module. */
1868 encoded = unicode_encoder(global_name);
1869 if (encoded == NULL) {
1870 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
1871 PyErr_Format(PicklingError,
1872 "can't pickle global identifier '%S' using "
1873 "pickle protocol %i", global_name, self->proto);
1874 goto error;
1875 }
1876 if (pickler_write(self, PyBytes_AS_STRING(encoded),
1877 PyBytes_GET_SIZE(encoded)) < 0) {
1878 Py_DECREF(encoded);
1879 goto error;
1880 }
1881 Py_DECREF(encoded);
1882 if(pickler_write(self, "\n", 1) < 0)
1883 goto error;
1884
1885 /* Memoize the object. */
1886 if (memo_put(self, obj) < 0)
1887 goto error;
1888 }
1889
1890 if (0) {
1891 error:
1892 status = -1;
1893 }
1894 Py_XDECREF(module_name);
1895 Py_XDECREF(global_name);
1896 Py_XDECREF(module);
1897
1898 return status;
1899}
1900
1901static int
1902save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
1903{
1904 PyObject *pid = NULL;
1905 int status = 0;
1906
1907 const char persid_op = PERSID;
1908 const char binpersid_op = BINPERSID;
1909
1910 Py_INCREF(obj);
1911 pid = pickler_call(self, func, obj);
1912 if (pid == NULL)
1913 return -1;
1914
1915 if (pid != Py_None) {
1916 if (self->bin) {
1917 if (save(self, pid, 1) < 0 ||
1918 pickler_write(self, &binpersid_op, 1) < 0)
1919 goto error;
1920 }
1921 else {
1922 PyObject *pid_str = NULL;
1923 char *pid_ascii_bytes;
1924 Py_ssize_t size;
1925
1926 pid_str = PyObject_Str(pid);
1927 if (pid_str == NULL)
1928 goto error;
1929
1930 /* XXX: Should it check whether the persistent id only contains
1931 ASCII characters? And what if the pid contains embedded
1932 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001933 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001934 Py_DECREF(pid_str);
1935 if (pid_ascii_bytes == NULL)
1936 goto error;
1937
1938 if (pickler_write(self, &persid_op, 1) < 0 ||
1939 pickler_write(self, pid_ascii_bytes, size) < 0 ||
1940 pickler_write(self, "\n", 1) < 0)
1941 goto error;
1942 }
1943 status = 1;
1944 }
1945
1946 if (0) {
1947 error:
1948 status = -1;
1949 }
1950 Py_XDECREF(pid);
1951
1952 return status;
1953}
1954
1955/* We're saving obj, and args is the 2-thru-5 tuple returned by the
1956 * appropriate __reduce__ method for obj.
1957 */
1958static int
1959save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
1960{
1961 PyObject *callable;
1962 PyObject *argtup;
1963 PyObject *state = NULL;
1964 PyObject *listitems = NULL;
1965 PyObject *dictitems = NULL;
1966
1967 int use_newobj = self->proto >= 2;
1968
1969 const char reduce_op = REDUCE;
1970 const char build_op = BUILD;
1971 const char newobj_op = NEWOBJ;
1972
1973 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
1974 &callable, &argtup, &state, &listitems, &dictitems))
1975 return -1;
1976
1977 if (!PyCallable_Check(callable)) {
1978 PyErr_SetString(PicklingError,
1979 "first argument of save_reduce() must be callable");
1980 return -1;
1981 }
1982 if (!PyTuple_Check(argtup)) {
1983 PyErr_SetString(PicklingError,
1984 "second argument of save_reduce() must be a tuple");
1985 return -1;
1986 }
1987
1988 if (state == Py_None)
1989 state = NULL;
1990 if (listitems == Py_None)
1991 listitems = NULL;
1992 if (dictitems == Py_None)
1993 dictitems = NULL;
1994
1995 /* Protocol 2 special case: if callable's name is __newobj__, use
1996 NEWOBJ. */
1997 if (use_newobj) {
1998 static PyObject *newobj_str = NULL;
1999 PyObject *name_str;
2000
2001 if (newobj_str == NULL) {
2002 newobj_str = PyUnicode_InternFromString("__newobj__");
2003 }
2004
2005 name_str = PyObject_GetAttrString(callable, "__name__");
2006 if (name_str == NULL) {
2007 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2008 PyErr_Clear();
2009 else
2010 return -1;
2011 use_newobj = 0;
2012 }
2013 else {
2014 use_newobj = PyUnicode_Check(name_str) &&
2015 PyUnicode_Compare(name_str, newobj_str) == 0;
2016 Py_DECREF(name_str);
2017 }
2018 }
2019 if (use_newobj) {
2020 PyObject *cls;
2021 PyObject *newargtup;
2022 PyObject *obj_class;
2023 int p;
2024
2025 /* Sanity checks. */
2026 if (Py_SIZE(argtup) < 1) {
2027 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
2028 return -1;
2029 }
2030
2031 cls = PyTuple_GET_ITEM(argtup, 0);
2032 if (!PyObject_HasAttrString(cls, "__new__")) {
2033 PyErr_SetString(PicklingError, "args[0] from "
2034 "__newobj__ args has no __new__");
2035 return -1;
2036 }
2037
2038 if (obj != NULL) {
2039 obj_class = PyObject_GetAttrString(obj, "__class__");
2040 if (obj_class == NULL) {
2041 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2042 PyErr_Clear();
2043 else
2044 return -1;
2045 }
2046 p = obj_class != cls; /* true iff a problem */
2047 Py_DECREF(obj_class);
2048 if (p) {
2049 PyErr_SetString(PicklingError, "args[0] from "
2050 "__newobj__ args has the wrong class");
2051 return -1;
2052 }
2053 }
2054 /* XXX: These calls save() are prone to infinite recursion. Imagine
2055 what happen if the value returned by the __reduce__() method of
2056 some extension type contains another object of the same type. Ouch!
2057
2058 Here is a quick example, that I ran into, to illustrate what I
2059 mean:
2060
2061 >>> import pickle, copyreg
2062 >>> copyreg.dispatch_table.pop(complex)
2063 >>> pickle.dumps(1+2j)
2064 Traceback (most recent call last):
2065 ...
2066 RuntimeError: maximum recursion depth exceeded
2067
2068 Removing the complex class from copyreg.dispatch_table made the
2069 __reduce_ex__() method emit another complex object:
2070
2071 >>> (1+1j).__reduce_ex__(2)
2072 (<function __newobj__ at 0xb7b71c3c>,
2073 (<class 'complex'>, (1+1j)), None, None, None)
2074
2075 Thus when save() was called on newargstup (the 2nd item) recursion
2076 ensued. Of course, the bug was in the complex class which had a
2077 broken __getnewargs__() that emitted another complex object. But,
2078 the point, here, is it is quite easy to end up with a broken reduce
2079 function. */
2080
2081 /* Save the class and its __new__ arguments. */
2082 if (save(self, cls, 0) < 0)
2083 return -1;
2084
2085 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
2086 if (newargtup == NULL)
2087 return -1;
2088
2089 p = save(self, newargtup, 0);
2090 Py_DECREF(newargtup);
2091 if (p < 0)
2092 return -1;
2093
2094 /* Add NEWOBJ opcode. */
2095 if (pickler_write(self, &newobj_op, 1) < 0)
2096 return -1;
2097 }
2098 else { /* Not using NEWOBJ. */
2099 if (save(self, callable, 0) < 0 ||
2100 save(self, argtup, 0) < 0 ||
2101 pickler_write(self, &reduce_op, 1) < 0)
2102 return -1;
2103 }
2104
2105 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
2106 the caller do not want to memoize the object. Not particularly useful,
2107 but that is to mimic the behavior save_reduce() in pickle.py when
2108 obj is None. */
2109 if (obj && memo_put(self, obj) < 0)
2110 return -1;
2111
2112 if (listitems && batch_list(self, listitems) < 0)
2113 return -1;
2114
2115 if (dictitems && batch_dict(self, dictitems) < 0)
2116 return -1;
2117
2118 if (state) {
2119 if (save(self, state, 0) < 0 ||
2120 pickler_write(self, &build_op, 1) < 0)
2121 return -1;
2122 }
2123
2124 return 0;
2125}
2126
2127static int
2128save(PicklerObject *self, PyObject *obj, int pers_save)
2129{
2130 PyTypeObject *type;
2131 PyObject *reduce_func = NULL;
2132 PyObject *reduce_value = NULL;
2133 PyObject *memo_key = NULL;
2134 int status = 0;
2135
Alexandre Vassalottidff18342008-07-13 18:48:30 +00002136 if (Py_EnterRecursiveCall(" while pickling an object") < 0)
2137 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002138
2139 /* The extra pers_save argument is necessary to avoid calling save_pers()
2140 on its returned object. */
2141 if (!pers_save && self->pers_func) {
2142 /* save_pers() returns:
2143 -1 to signal an error;
2144 0 if it did nothing successfully;
2145 1 if a persistent id was saved.
2146 */
2147 if ((status = save_pers(self, obj, self->pers_func)) != 0)
2148 goto done;
2149 }
2150
2151 type = Py_TYPE(obj);
2152
2153 /* XXX: The old cPickle had an optimization that used switch-case
2154 statement dispatching on the first letter of the type name. It was
2155 probably not a bad idea after all. If benchmarks shows that particular
2156 optimization had some real benefits, it would be nice to add it
2157 back. */
2158
2159 /* Atom types; these aren't memoized, so don't check the memo. */
2160
2161 if (obj == Py_None) {
2162 status = save_none(self, obj);
2163 goto done;
2164 }
2165 else if (obj == Py_False || obj == Py_True) {
2166 status = save_bool(self, obj);
2167 goto done;
2168 }
2169 else if (type == &PyLong_Type) {
2170 status = save_long(self, obj);
2171 goto done;
2172 }
2173 else if (type == &PyFloat_Type) {
2174 status = save_float(self, obj);
2175 goto done;
2176 }
2177
2178 /* Check the memo to see if it has the object. If so, generate
2179 a GET (or BINGET) opcode, instead of pickling the object
2180 once again. */
2181 memo_key = PyLong_FromVoidPtr(obj);
2182 if (memo_key == NULL)
2183 goto error;
2184 if (PyDict_GetItem(self->memo, memo_key)) {
2185 if (memo_get(self, memo_key) < 0)
2186 goto error;
2187 goto done;
2188 }
2189
2190 if (type == &PyBytes_Type) {
2191 status = save_bytes(self, obj);
2192 goto done;
2193 }
2194 else if (type == &PyUnicode_Type) {
2195 status = save_unicode(self, obj);
2196 goto done;
2197 }
2198 else if (type == &PyDict_Type) {
2199 status = save_dict(self, obj);
2200 goto done;
2201 }
2202 else if (type == &PyList_Type) {
2203 status = save_list(self, obj);
2204 goto done;
2205 }
2206 else if (type == &PyTuple_Type) {
2207 status = save_tuple(self, obj);
2208 goto done;
2209 }
2210 else if (type == &PyType_Type) {
2211 status = save_global(self, obj, NULL);
2212 goto done;
2213 }
2214 else if (type == &PyFunction_Type) {
2215 status = save_global(self, obj, NULL);
2216 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
2217 /* fall back to reduce */
2218 PyErr_Clear();
2219 }
2220 else {
2221 goto done;
2222 }
2223 }
2224 else if (type == &PyCFunction_Type) {
2225 status = save_global(self, obj, NULL);
2226 goto done;
2227 }
2228 else if (PyType_IsSubtype(type, &PyType_Type)) {
2229 status = save_global(self, obj, NULL);
2230 goto done;
2231 }
2232
2233 /* XXX: This part needs some unit tests. */
2234
2235 /* Get a reduction callable, and call it. This may come from
2236 * copyreg.dispatch_table, the object's __reduce_ex__ method,
2237 * or the object's __reduce__ method.
2238 */
2239 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
2240 if (reduce_func != NULL) {
2241 /* Here, the reference count of the reduce_func object returned by
2242 PyDict_GetItem needs to be increased to be consistent with the one
2243 returned by PyObject_GetAttr. This is allow us to blindly DECREF
2244 reduce_func at the end of the save() routine.
2245 */
2246 Py_INCREF(reduce_func);
2247 Py_INCREF(obj);
2248 reduce_value = pickler_call(self, reduce_func, obj);
2249 }
2250 else {
2251 static PyObject *reduce_str = NULL;
2252 static PyObject *reduce_ex_str = NULL;
2253
2254 /* Cache the name of the reduce methods. */
2255 if (reduce_str == NULL) {
2256 reduce_str = PyUnicode_InternFromString("__reduce__");
2257 if (reduce_str == NULL)
2258 goto error;
2259 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
2260 if (reduce_ex_str == NULL)
2261 goto error;
2262 }
2263
2264 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
2265 automatically defined as __reduce__. While this is convenient, this
2266 make it impossible to know which method was actually called. Of
2267 course, this is not a big deal. But still, it would be nice to let
2268 the user know which method was called when something go
2269 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
2270 don't actually have to check for a __reduce__ method. */
2271
2272 /* Check for a __reduce_ex__ method. */
2273 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
2274 if (reduce_func != NULL) {
2275 PyObject *proto;
2276 proto = PyLong_FromLong(self->proto);
2277 if (proto != NULL) {
2278 reduce_value = pickler_call(self, reduce_func, proto);
2279 }
2280 }
2281 else {
2282 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2283 PyErr_Clear();
2284 else
2285 goto error;
2286 /* Check for a __reduce__ method. */
2287 reduce_func = PyObject_GetAttr(obj, reduce_str);
2288 if (reduce_func != NULL) {
2289 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
2290 }
2291 else {
2292 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
2293 type->tp_name, obj);
2294 goto error;
2295 }
2296 }
2297 }
2298
2299 if (reduce_value == NULL)
2300 goto error;
2301
2302 if (PyUnicode_Check(reduce_value)) {
2303 status = save_global(self, obj, reduce_value);
2304 goto done;
2305 }
2306
2307 if (!PyTuple_Check(reduce_value)) {
2308 PyErr_SetString(PicklingError,
2309 "__reduce__ must return a string or tuple");
2310 goto error;
2311 }
2312 if (Py_SIZE(reduce_value) < 2 || Py_SIZE(reduce_value) > 5) {
2313 PyErr_SetString(PicklingError, "tuple returned by __reduce__ "
2314 "must contain 2 through 5 elements");
2315 goto error;
2316 }
2317 if (!PyTuple_Check(PyTuple_GET_ITEM(reduce_value, 1))) {
2318 PyErr_SetString(PicklingError, "second item of the tuple "
2319 "returned by __reduce__ must be a tuple");
2320 goto error;
2321 }
2322
2323 status = save_reduce(self, reduce_value, obj);
2324
2325 if (0) {
2326 error:
2327 status = -1;
2328 }
2329 done:
Alexandre Vassalottidff18342008-07-13 18:48:30 +00002330 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002331 Py_XDECREF(memo_key);
2332 Py_XDECREF(reduce_func);
2333 Py_XDECREF(reduce_value);
2334
2335 return status;
2336}
2337
2338static int
2339dump(PicklerObject *self, PyObject *obj)
2340{
2341 const char stop_op = STOP;
2342
2343 if (self->proto >= 2) {
2344 char header[2];
2345
2346 header[0] = PROTO;
2347 assert(self->proto >= 0 && self->proto < 256);
2348 header[1] = (unsigned char)self->proto;
2349 if (pickler_write(self, header, 2) < 0)
2350 return -1;
2351 }
2352
2353 if (save(self, obj, 0) < 0 ||
2354 pickler_write(self, &stop_op, 1) < 0 ||
2355 pickler_write(self, NULL, 0) < 0)
2356 return -1;
2357
2358 return 0;
2359}
2360
2361PyDoc_STRVAR(Pickler_clear_memo_doc,
2362"clear_memo() -> None. Clears the pickler's \"memo\"."
2363"\n"
2364"The memo is the data structure that remembers which objects the\n"
2365"pickler has already seen, so that shared or recursive objects are\n"
2366"pickled by reference and not by value. This method is useful when\n"
2367"re-using picklers.");
2368
2369static PyObject *
2370Pickler_clear_memo(PicklerObject *self)
2371{
2372 if (self->memo)
2373 PyDict_Clear(self->memo);
2374
2375 Py_RETURN_NONE;
2376}
2377
2378PyDoc_STRVAR(Pickler_dump_doc,
2379"dump(obj) -> None. Write a pickled representation of obj to the open file.");
2380
2381static PyObject *
2382Pickler_dump(PicklerObject *self, PyObject *args)
2383{
2384 PyObject *obj;
2385
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00002386 /* Check whether the Pickler was initialized correctly (issue3664).
2387 Developers often forget to call __init__() in their subclasses, which
2388 would trigger a segfault without this check. */
2389 if (self->write == NULL) {
2390 PyErr_Format(PicklingError,
2391 "Pickler.__init__() was not called by %s.__init__()",
2392 Py_TYPE(self)->tp_name);
2393 return NULL;
2394 }
2395
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002396 if (!PyArg_ParseTuple(args, "O:dump", &obj))
2397 return NULL;
2398
2399 if (dump(self, obj) < 0)
2400 return NULL;
2401
2402 Py_RETURN_NONE;
2403}
2404
2405static struct PyMethodDef Pickler_methods[] = {
2406 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
2407 Pickler_dump_doc},
2408 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
2409 Pickler_clear_memo_doc},
2410 {NULL, NULL} /* sentinel */
2411};
2412
2413static void
2414Pickler_dealloc(PicklerObject *self)
2415{
2416 PyObject_GC_UnTrack(self);
2417
2418 Py_XDECREF(self->write);
2419 Py_XDECREF(self->memo);
2420 Py_XDECREF(self->pers_func);
2421 Py_XDECREF(self->arg);
2422 Py_XDECREF(self->fast_memo);
2423
2424 PyMem_Free(self->write_buf);
2425
2426 Py_TYPE(self)->tp_free((PyObject *)self);
2427}
2428
2429static int
2430Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
2431{
2432 Py_VISIT(self->write);
2433 Py_VISIT(self->memo);
2434 Py_VISIT(self->pers_func);
2435 Py_VISIT(self->arg);
2436 Py_VISIT(self->fast_memo);
2437 return 0;
2438}
2439
2440static int
2441Pickler_clear(PicklerObject *self)
2442{
2443 Py_CLEAR(self->write);
2444 Py_CLEAR(self->memo);
2445 Py_CLEAR(self->pers_func);
2446 Py_CLEAR(self->arg);
2447 Py_CLEAR(self->fast_memo);
2448
2449 PyMem_Free(self->write_buf);
2450 self->write_buf = NULL;
2451
2452 return 0;
2453}
2454
2455PyDoc_STRVAR(Pickler_doc,
2456"Pickler(file, protocol=None)"
2457"\n"
2458"This takes a binary file for writing a pickle data stream.\n"
2459"\n"
2460"The optional protocol argument tells the pickler to use the\n"
2461"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
2462"protocol is 3; a backward-incompatible protocol designed for\n"
2463"Python 3.0.\n"
2464"\n"
2465"Specifying a negative protocol version selects the highest\n"
2466"protocol version supported. The higher the protocol used, the\n"
2467"more recent the version of Python needed to read the pickle\n"
2468"produced.\n"
2469"\n"
2470"The file argument must have a write() method that accepts a single\n"
2471"bytes argument. It can thus be a file object opened for binary\n"
2472"writing, a io.BytesIO instance, or any other custom object that\n"
2473"meets this interface.\n");
2474
2475static int
2476Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
2477{
2478 static char *kwlist[] = {"file", "protocol", 0};
2479 PyObject *file;
2480 PyObject *proto_obj = NULL;
2481 long proto = 0;
2482
2483 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:Pickler",
2484 kwlist, &file, &proto_obj))
2485 return -1;
2486
2487 /* In case of multiple __init__() calls, clear previous content. */
2488 if (self->write != NULL)
2489 (void)Pickler_clear(self);
2490
2491 if (proto_obj == NULL || proto_obj == Py_None)
2492 proto = DEFAULT_PROTOCOL;
2493 else
2494 proto = PyLong_AsLong(proto_obj);
2495
2496 if (proto < 0)
2497 proto = HIGHEST_PROTOCOL;
2498 if (proto > HIGHEST_PROTOCOL) {
2499 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
2500 HIGHEST_PROTOCOL);
2501 return -1;
2502 }
2503
2504 self->proto = proto;
2505 self->bin = proto > 0;
2506 self->arg = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002507 self->fast = 0;
2508 self->fast_nesting = 0;
2509 self->fast_memo = NULL;
2510
2511 if (!PyObject_HasAttrString(file, "write")) {
2512 PyErr_SetString(PyExc_TypeError,
2513 "file must have a 'write' attribute");
2514 return -1;
2515 }
2516 self->write = PyObject_GetAttrString(file, "write");
2517 if (self->write == NULL)
2518 return -1;
2519 self->buf_size = 0;
2520 self->write_buf = (char *)PyMem_Malloc(WRITE_BUF_SIZE);
2521 if (self->write_buf == NULL) {
2522 PyErr_NoMemory();
2523 return -1;
2524 }
2525 self->pers_func = NULL;
2526 if (PyObject_HasAttrString((PyObject *)self, "persistent_id")) {
2527 self->pers_func = PyObject_GetAttrString((PyObject *)self,
2528 "persistent_id");
2529 if (self->pers_func == NULL)
2530 return -1;
2531 }
2532 self->memo = PyDict_New();
2533 if (self->memo == NULL)
2534 return -1;
2535
2536 return 0;
2537}
2538
2539static PyObject *
2540Pickler_get_memo(PicklerObject *self)
2541{
2542 if (self->memo == NULL)
2543 PyErr_SetString(PyExc_AttributeError, "memo");
2544 else
2545 Py_INCREF(self->memo);
2546 return self->memo;
2547}
2548
2549static int
2550Pickler_set_memo(PicklerObject *self, PyObject *value)
2551{
2552 PyObject *tmp;
2553
2554 if (value == NULL) {
2555 PyErr_SetString(PyExc_TypeError,
2556 "attribute deletion is not supported");
2557 return -1;
2558 }
2559 if (!PyDict_Check(value)) {
2560 PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
2561 return -1;
2562 }
2563
2564 tmp = self->memo;
2565 Py_INCREF(value);
2566 self->memo = value;
2567 Py_XDECREF(tmp);
2568
2569 return 0;
2570}
2571
2572static PyObject *
2573Pickler_get_persid(PicklerObject *self)
2574{
2575 if (self->pers_func == NULL)
2576 PyErr_SetString(PyExc_AttributeError, "persistent_id");
2577 else
2578 Py_INCREF(self->pers_func);
2579 return self->pers_func;
2580}
2581
2582static int
2583Pickler_set_persid(PicklerObject *self, PyObject *value)
2584{
2585 PyObject *tmp;
2586
2587 if (value == NULL) {
2588 PyErr_SetString(PyExc_TypeError,
2589 "attribute deletion is not supported");
2590 return -1;
2591 }
2592 if (!PyCallable_Check(value)) {
2593 PyErr_SetString(PyExc_TypeError,
2594 "persistent_id must be a callable taking one argument");
2595 return -1;
2596 }
2597
2598 tmp = self->pers_func;
2599 Py_INCREF(value);
2600 self->pers_func = value;
2601 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
2602
2603 return 0;
2604}
2605
2606static PyMemberDef Pickler_members[] = {
2607 {"bin", T_INT, offsetof(PicklerObject, bin)},
2608 {"fast", T_INT, offsetof(PicklerObject, fast)},
2609 {NULL}
2610};
2611
2612static PyGetSetDef Pickler_getsets[] = {
2613 {"memo", (getter)Pickler_get_memo,
2614 (setter)Pickler_set_memo},
2615 {"persistent_id", (getter)Pickler_get_persid,
2616 (setter)Pickler_set_persid},
2617 {NULL}
2618};
2619
2620static PyTypeObject Pickler_Type = {
2621 PyVarObject_HEAD_INIT(NULL, 0)
2622 "_pickle.Pickler" , /*tp_name*/
2623 sizeof(PicklerObject), /*tp_basicsize*/
2624 0, /*tp_itemsize*/
2625 (destructor)Pickler_dealloc, /*tp_dealloc*/
2626 0, /*tp_print*/
2627 0, /*tp_getattr*/
2628 0, /*tp_setattr*/
2629 0, /*tp_compare*/
2630 0, /*tp_repr*/
2631 0, /*tp_as_number*/
2632 0, /*tp_as_sequence*/
2633 0, /*tp_as_mapping*/
2634 0, /*tp_hash*/
2635 0, /*tp_call*/
2636 0, /*tp_str*/
2637 0, /*tp_getattro*/
2638 0, /*tp_setattro*/
2639 0, /*tp_as_buffer*/
2640 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2641 Pickler_doc, /*tp_doc*/
2642 (traverseproc)Pickler_traverse, /*tp_traverse*/
2643 (inquiry)Pickler_clear, /*tp_clear*/
2644 0, /*tp_richcompare*/
2645 0, /*tp_weaklistoffset*/
2646 0, /*tp_iter*/
2647 0, /*tp_iternext*/
2648 Pickler_methods, /*tp_methods*/
2649 Pickler_members, /*tp_members*/
2650 Pickler_getsets, /*tp_getset*/
2651 0, /*tp_base*/
2652 0, /*tp_dict*/
2653 0, /*tp_descr_get*/
2654 0, /*tp_descr_set*/
2655 0, /*tp_dictoffset*/
2656 (initproc)Pickler_init, /*tp_init*/
2657 PyType_GenericAlloc, /*tp_alloc*/
2658 PyType_GenericNew, /*tp_new*/
2659 PyObject_GC_Del, /*tp_free*/
2660 0, /*tp_is_gc*/
2661};
2662
2663/* Temporary helper for calling self.find_class().
2664
2665 XXX: It would be nice to able to avoid Python function call overhead, by
2666 using directly the C version of find_class(), when find_class() is not
2667 overridden by a subclass. Although, this could become rather hackish. A
2668 simpler optimization would be to call the C function when self is not a
2669 subclass instance. */
2670static PyObject *
2671find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
2672{
2673 return PyObject_CallMethod((PyObject *)self, "find_class", "OO",
2674 module_name, global_name);
2675}
2676
2677static int
2678marker(UnpicklerObject *self)
2679{
2680 if (self->num_marks < 1) {
2681 PyErr_SetString(UnpicklingError, "could not find MARK");
2682 return -1;
2683 }
2684
2685 return self->marks[--self->num_marks];
2686}
2687
2688static int
2689load_none(UnpicklerObject *self)
2690{
2691 PDATA_APPEND(self->stack, Py_None, -1);
2692 return 0;
2693}
2694
2695static int
2696bad_readline(void)
2697{
2698 PyErr_SetString(UnpicklingError, "pickle data was truncated");
2699 return -1;
2700}
2701
2702static int
2703load_int(UnpicklerObject *self)
2704{
2705 PyObject *value;
2706 char *endptr, *s;
2707 Py_ssize_t len;
2708 long x;
2709
2710 if ((len = unpickler_readline(self, &s)) < 0)
2711 return -1;
2712 if (len < 2)
2713 return bad_readline();
2714
2715 errno = 0;
2716 /* XXX: Should the base argument of strtol() be explicitly set to 10? */
2717 x = strtol(s, &endptr, 0);
2718
2719 if (errno || (*endptr != '\n') || (endptr[1] != '\0')) {
2720 /* Hm, maybe we've got something long. Let's try reading
2721 * it as a Python long object. */
2722 errno = 0;
2723 /* XXX: Same thing about the base here. */
2724 value = PyLong_FromString(s, NULL, 0);
2725 if (value == NULL) {
2726 PyErr_SetString(PyExc_ValueError,
2727 "could not convert string to int");
2728 return -1;
2729 }
2730 }
2731 else {
2732 if (len == 3 && (x == 0 || x == 1)) {
2733 if ((value = PyBool_FromLong(x)) == NULL)
2734 return -1;
2735 }
2736 else {
2737 if ((value = PyLong_FromLong(x)) == NULL)
2738 return -1;
2739 }
2740 }
2741
2742 PDATA_PUSH(self->stack, value, -1);
2743 return 0;
2744}
2745
2746static int
2747load_bool(UnpicklerObject *self, PyObject *boolean)
2748{
2749 assert(boolean == Py_True || boolean == Py_False);
2750 PDATA_APPEND(self->stack, boolean, -1);
2751 return 0;
2752}
2753
2754/* s contains x bytes of a little-endian integer. Return its value as a
2755 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
2756 * int, but when x is 4 it's a signed one. This is an historical source
2757 * of x-platform bugs.
2758 */
2759static long
2760calc_binint(char *bytes, int size)
2761{
2762 unsigned char *s = (unsigned char *)bytes;
2763 int i = size;
2764 long x = 0;
2765
2766 for (i = 0; i < size; i++) {
2767 x |= (long)s[i] << (i * 8);
2768 }
2769
2770 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
2771 * is signed, so on a box with longs bigger than 4 bytes we need
2772 * to extend a BININT's sign bit to the full width.
2773 */
2774 if (SIZEOF_LONG > 4 && size == 4) {
2775 x |= -(x & (1L << 31));
2776 }
2777
2778 return x;
2779}
2780
2781static int
2782load_binintx(UnpicklerObject *self, char *s, int size)
2783{
2784 PyObject *value;
2785 long x;
2786
2787 x = calc_binint(s, size);
2788
2789 if ((value = PyLong_FromLong(x)) == NULL)
2790 return -1;
2791
2792 PDATA_PUSH(self->stack, value, -1);
2793 return 0;
2794}
2795
2796static int
2797load_binint(UnpicklerObject *self)
2798{
2799 char *s;
2800
2801 if (unpickler_read(self, &s, 4) < 0)
2802 return -1;
2803
2804 return load_binintx(self, s, 4);
2805}
2806
2807static int
2808load_binint1(UnpicklerObject *self)
2809{
2810 char *s;
2811
2812 if (unpickler_read(self, &s, 1) < 0)
2813 return -1;
2814
2815 return load_binintx(self, s, 1);
2816}
2817
2818static int
2819load_binint2(UnpicklerObject *self)
2820{
2821 char *s;
2822
2823 if (unpickler_read(self, &s, 2) < 0)
2824 return -1;
2825
2826 return load_binintx(self, s, 2);
2827}
2828
2829static int
2830load_long(UnpicklerObject *self)
2831{
2832 PyObject *value;
2833 char *s;
2834 Py_ssize_t len;
2835
2836 if ((len = unpickler_readline(self, &s)) < 0)
2837 return -1;
2838 if (len < 2)
2839 return bad_readline();
2840
2841 /* XXX: Should the base argument explicitly set to 10? */
2842 if ((value = PyLong_FromString(s, NULL, 0)) == NULL)
2843 return -1;
2844
2845 PDATA_PUSH(self->stack, value, -1);
2846 return 0;
2847}
2848
2849/* 'size' bytes contain the # of bytes of little-endian 256's-complement
2850 * data following.
2851 */
2852static int
2853load_counted_long(UnpicklerObject *self, int size)
2854{
2855 PyObject *value;
2856 char *nbytes;
2857 char *pdata;
2858
2859 assert(size == 1 || size == 4);
2860 if (unpickler_read(self, &nbytes, size) < 0)
2861 return -1;
2862
2863 size = calc_binint(nbytes, size);
2864 if (size < 0) {
2865 /* Corrupt or hostile pickle -- we never write one like this */
2866 PyErr_SetString(UnpicklingError,
2867 "LONG pickle has negative byte count");
2868 return -1;
2869 }
2870
2871 if (size == 0)
2872 value = PyLong_FromLong(0L);
2873 else {
2874 /* Read the raw little-endian bytes and convert. */
2875 if (unpickler_read(self, &pdata, size) < 0)
2876 return -1;
2877 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
2878 1 /* little endian */ , 1 /* signed */ );
2879 }
2880 if (value == NULL)
2881 return -1;
2882 PDATA_PUSH(self->stack, value, -1);
2883 return 0;
2884}
2885
2886static int
2887load_float(UnpicklerObject *self)
2888{
2889 PyObject *value;
2890 char *endptr, *s;
2891 Py_ssize_t len;
2892 double d;
2893
2894 if ((len = unpickler_readline(self, &s)) < 0)
2895 return -1;
2896 if (len < 2)
2897 return bad_readline();
2898
2899 errno = 0;
2900 d = PyOS_ascii_strtod(s, &endptr);
2901
2902 if (errno || (endptr[0] != '\n') || (endptr[1] != '\0')) {
2903 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
2904 return -1;
2905 }
2906
2907 if ((value = PyFloat_FromDouble(d)) == NULL)
2908 return -1;
2909
2910 PDATA_PUSH(self->stack, value, -1);
2911 return 0;
2912}
2913
2914static int
2915load_binfloat(UnpicklerObject *self)
2916{
2917 PyObject *value;
2918 double x;
2919 char *s;
2920
2921 if (unpickler_read(self, &s, 8) < 0)
2922 return -1;
2923
2924 x = _PyFloat_Unpack8((unsigned char *)s, 0);
2925 if (x == -1.0 && PyErr_Occurred())
2926 return -1;
2927
2928 if ((value = PyFloat_FromDouble(x)) == NULL)
2929 return -1;
2930
2931 PDATA_PUSH(self->stack, value, -1);
2932 return 0;
2933}
2934
2935static int
2936load_string(UnpicklerObject *self)
2937{
2938 PyObject *bytes;
2939 PyObject *str = NULL;
2940 Py_ssize_t len;
2941 char *s, *p;
2942
2943 if ((len = unpickler_readline(self, &s)) < 0)
2944 return -1;
2945 if (len < 3)
2946 return bad_readline();
2947 if ((s = strdup(s)) == NULL) {
2948 PyErr_NoMemory();
2949 return -1;
2950 }
2951
2952 /* Strip outermost quotes */
2953 while (s[len - 1] <= ' ')
2954 len--;
2955 if (s[0] == '"' && s[len - 1] == '"') {
2956 s[len - 1] = '\0';
2957 p = s + 1;
2958 len -= 2;
2959 }
2960 else if (s[0] == '\'' && s[len - 1] == '\'') {
2961 s[len - 1] = '\0';
2962 p = s + 1;
2963 len -= 2;
2964 }
2965 else {
2966 free(s);
2967 PyErr_SetString(PyExc_ValueError, "insecure string pickle");
2968 return -1;
2969 }
2970
2971 /* Use the PyBytes API to decode the string, since that is what is used
2972 to encode, and then coerce the result to Unicode. */
2973 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
2974 free(s);
2975 if (bytes == NULL)
2976 return -1;
2977 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
2978 Py_DECREF(bytes);
2979 if (str == NULL)
2980 return -1;
2981
2982 PDATA_PUSH(self->stack, str, -1);
2983 return 0;
2984}
2985
2986static int
2987load_binbytes(UnpicklerObject *self)
2988{
2989 PyObject *bytes;
2990 long x;
2991 char *s;
2992
2993 if (unpickler_read(self, &s, 4) < 0)
2994 return -1;
2995
2996 x = calc_binint(s, 4);
2997 if (x < 0) {
2998 PyErr_SetString(UnpicklingError,
2999 "BINBYTES pickle has negative byte count");
3000 return -1;
3001 }
3002
3003 if (unpickler_read(self, &s, x) < 0)
3004 return -1;
3005 bytes = PyBytes_FromStringAndSize(s, x);
3006 if (bytes == NULL)
3007 return -1;
3008
3009 PDATA_PUSH(self->stack, bytes, -1);
3010 return 0;
3011}
3012
3013static int
3014load_short_binbytes(UnpicklerObject *self)
3015{
3016 PyObject *bytes;
3017 unsigned char x;
3018 char *s;
3019
3020 if (unpickler_read(self, &s, 1) < 0)
3021 return -1;
3022
3023 x = (unsigned char)s[0];
3024
3025 if (unpickler_read(self, &s, x) < 0)
3026 return -1;
3027
3028 bytes = PyBytes_FromStringAndSize(s, x);
3029 if (bytes == NULL)
3030 return -1;
3031
3032 PDATA_PUSH(self->stack, bytes, -1);
3033 return 0;
3034}
3035
3036static int
3037load_binstring(UnpicklerObject *self)
3038{
3039 PyObject *str;
3040 long x;
3041 char *s;
3042
3043 if (unpickler_read(self, &s, 4) < 0)
3044 return -1;
3045
3046 x = calc_binint(s, 4);
3047 if (x < 0) {
3048 PyErr_SetString(UnpicklingError,
3049 "BINSTRING pickle has negative byte count");
3050 return -1;
3051 }
3052
3053 if (unpickler_read(self, &s, x) < 0)
3054 return -1;
3055
3056 /* Convert Python 2.x strings to unicode. */
3057 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
3058 if (str == NULL)
3059 return -1;
3060
3061 PDATA_PUSH(self->stack, str, -1);
3062 return 0;
3063}
3064
3065static int
3066load_short_binstring(UnpicklerObject *self)
3067{
3068 PyObject *str;
3069 unsigned char x;
3070 char *s;
3071
3072 if (unpickler_read(self, &s, 1) < 0)
3073 return -1;
3074
3075 x = (unsigned char)s[0];
3076
3077 if (unpickler_read(self, &s, x) < 0)
3078 return -1;
3079
3080 /* Convert Python 2.x strings to unicode. */
3081 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
3082 if (str == NULL)
3083 return -1;
3084
3085 PDATA_PUSH(self->stack, str, -1);
3086 return 0;
3087}
3088
3089static int
3090load_unicode(UnpicklerObject *self)
3091{
3092 PyObject *str;
3093 Py_ssize_t len;
3094 char *s;
3095
3096 if ((len = unpickler_readline(self, &s)) < 0)
3097 return -1;
3098 if (len < 1)
3099 return bad_readline();
3100
3101 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
3102 if (str == NULL)
3103 return -1;
3104
3105 PDATA_PUSH(self->stack, str, -1);
3106 return 0;
3107}
3108
3109static int
3110load_binunicode(UnpicklerObject *self)
3111{
3112 PyObject *str;
3113 long size;
3114 char *s;
3115
3116 if (unpickler_read(self, &s, 4) < 0)
3117 return -1;
3118
3119 size = calc_binint(s, 4);
3120 if (size < 0) {
3121 PyErr_SetString(UnpicklingError,
3122 "BINUNICODE pickle has negative byte count");
3123 return -1;
3124 }
3125
3126 if (unpickler_read(self, &s, size) < 0)
3127 return -1;
3128
3129 str = PyUnicode_DecodeUTF8(s, size, NULL);
3130 if (str == NULL)
3131 return -1;
3132
3133 PDATA_PUSH(self->stack, str, -1);
3134 return 0;
3135}
3136
3137static int
3138load_tuple(UnpicklerObject *self)
3139{
3140 PyObject *tuple;
3141 int i;
3142
3143 if ((i = marker(self)) < 0)
3144 return -1;
3145
3146 tuple = Pdata_poptuple(self->stack, i);
3147 if (tuple == NULL)
3148 return -1;
3149 PDATA_PUSH(self->stack, tuple, -1);
3150 return 0;
3151}
3152
3153static int
3154load_counted_tuple(UnpicklerObject *self, int len)
3155{
3156 PyObject *tuple;
3157
3158 tuple = PyTuple_New(len);
3159 if (tuple == NULL)
3160 return -1;
3161
3162 while (--len >= 0) {
3163 PyObject *item;
3164
3165 PDATA_POP(self->stack, item);
3166 if (item == NULL)
3167 return -1;
3168 PyTuple_SET_ITEM(tuple, len, item);
3169 }
3170 PDATA_PUSH(self->stack, tuple, -1);
3171 return 0;
3172}
3173
3174static int
3175load_empty_list(UnpicklerObject *self)
3176{
3177 PyObject *list;
3178
3179 if ((list = PyList_New(0)) == NULL)
3180 return -1;
3181 PDATA_PUSH(self->stack, list, -1);
3182 return 0;
3183}
3184
3185static int
3186load_empty_dict(UnpicklerObject *self)
3187{
3188 PyObject *dict;
3189
3190 if ((dict = PyDict_New()) == NULL)
3191 return -1;
3192 PDATA_PUSH(self->stack, dict, -1);
3193 return 0;
3194}
3195
3196static int
3197load_list(UnpicklerObject *self)
3198{
3199 PyObject *list;
3200 int i;
3201
3202 if ((i = marker(self)) < 0)
3203 return -1;
3204
3205 list = Pdata_poplist(self->stack, i);
3206 if (list == NULL)
3207 return -1;
3208 PDATA_PUSH(self->stack, list, -1);
3209 return 0;
3210}
3211
3212static int
3213load_dict(UnpicklerObject *self)
3214{
3215 PyObject *dict, *key, *value;
3216 int i, j, k;
3217
3218 if ((i = marker(self)) < 0)
3219 return -1;
3220 j = self->stack->length;
3221
3222 if ((dict = PyDict_New()) == NULL)
3223 return -1;
3224
3225 for (k = i + 1; k < j; k += 2) {
3226 key = self->stack->data[k - 1];
3227 value = self->stack->data[k];
3228 if (PyDict_SetItem(dict, key, value) < 0) {
3229 Py_DECREF(dict);
3230 return -1;
3231 }
3232 }
3233 Pdata_clear(self->stack, i);
3234 PDATA_PUSH(self->stack, dict, -1);
3235 return 0;
3236}
3237
3238static PyObject *
3239instantiate(PyObject *cls, PyObject *args)
3240{
3241 PyObject *r = NULL;
3242
3243 /* XXX: The pickle.py module does not create instances this way when the
3244 args tuple is empty. See Unpickler._instantiate(). */
3245 if ((r = PyObject_CallObject(cls, args)))
3246 return r;
3247
3248 /* XXX: Is this still nescessary? */
3249 {
3250 PyObject *tp, *v, *tb, *tmp_value;
3251
3252 PyErr_Fetch(&tp, &v, &tb);
3253 tmp_value = v;
3254 /* NULL occurs when there was a KeyboardInterrupt */
3255 if (tmp_value == NULL)
3256 tmp_value = Py_None;
3257 if ((r = PyTuple_Pack(3, tmp_value, cls, args))) {
3258 Py_XDECREF(v);
3259 v = r;
3260 }
3261 PyErr_Restore(tp, v, tb);
3262 }
3263 return NULL;
3264}
3265
3266static int
3267load_obj(UnpicklerObject *self)
3268{
3269 PyObject *cls, *args, *obj = NULL;
3270 int i;
3271
3272 if ((i = marker(self)) < 0)
3273 return -1;
3274
3275 args = Pdata_poptuple(self->stack, i + 1);
3276 if (args == NULL)
3277 return -1;
3278
3279 PDATA_POP(self->stack, cls);
3280 if (cls) {
3281 obj = instantiate(cls, args);
3282 Py_DECREF(cls);
3283 }
3284 Py_DECREF(args);
3285 if (obj == NULL)
3286 return -1;
3287
3288 PDATA_PUSH(self->stack, obj, -1);
3289 return 0;
3290}
3291
3292static int
3293load_inst(UnpicklerObject *self)
3294{
3295 PyObject *cls = NULL;
3296 PyObject *args = NULL;
3297 PyObject *obj = NULL;
3298 PyObject *module_name;
3299 PyObject *class_name;
3300 Py_ssize_t len;
3301 int i;
3302 char *s;
3303
3304 if ((i = marker(self)) < 0)
3305 return -1;
3306 if ((len = unpickler_readline(self, &s)) < 0)
3307 return -1;
3308 if (len < 2)
3309 return bad_readline();
3310
3311 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
3312 identifiers are permitted in Python 3.0, since the INST opcode is only
3313 supported by older protocols on Python 2.x. */
3314 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
3315 if (module_name == NULL)
3316 return -1;
3317
3318 if ((len = unpickler_readline(self, &s)) >= 0) {
3319 if (len < 2)
3320 return bad_readline();
3321 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
3322 if (class_name == NULL) {
3323 cls = find_class(self, module_name, class_name);
3324 Py_DECREF(class_name);
3325 }
3326 }
3327 Py_DECREF(module_name);
3328
3329 if (cls == NULL)
3330 return -1;
3331
3332 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
3333 obj = instantiate(cls, args);
3334 Py_DECREF(args);
3335 }
3336 Py_DECREF(cls);
3337
3338 if (obj == NULL)
3339 return -1;
3340
3341 PDATA_PUSH(self->stack, obj, -1);
3342 return 0;
3343}
3344
3345static int
3346load_newobj(UnpicklerObject *self)
3347{
3348 PyObject *args = NULL;
3349 PyObject *clsraw = NULL;
3350 PyTypeObject *cls; /* clsraw cast to its true type */
3351 PyObject *obj;
3352
3353 /* Stack is ... cls argtuple, and we want to call
3354 * cls.__new__(cls, *argtuple).
3355 */
3356 PDATA_POP(self->stack, args);
3357 if (args == NULL)
3358 goto error;
3359 if (!PyTuple_Check(args)) {
3360 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
3361 goto error;
3362 }
3363
3364 PDATA_POP(self->stack, clsraw);
3365 cls = (PyTypeObject *)clsraw;
3366 if (cls == NULL)
3367 goto error;
3368 if (!PyType_Check(cls)) {
3369 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
3370 "isn't a type object");
3371 goto error;
3372 }
3373 if (cls->tp_new == NULL) {
3374 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
3375 "has NULL tp_new");
3376 goto error;
3377 }
3378
3379 /* Call __new__. */
3380 obj = cls->tp_new(cls, args, NULL);
3381 if (obj == NULL)
3382 goto error;
3383
3384 Py_DECREF(args);
3385 Py_DECREF(clsraw);
3386 PDATA_PUSH(self->stack, obj, -1);
3387 return 0;
3388
3389 error:
3390 Py_XDECREF(args);
3391 Py_XDECREF(clsraw);
3392 return -1;
3393}
3394
3395static int
3396load_global(UnpicklerObject *self)
3397{
3398 PyObject *global = NULL;
3399 PyObject *module_name;
3400 PyObject *global_name;
3401 Py_ssize_t len;
3402 char *s;
3403
3404 if ((len = unpickler_readline(self, &s)) < 0)
3405 return -1;
3406 if (len < 2)
3407 return bad_readline();
3408 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
3409 if (!module_name)
3410 return -1;
3411
3412 if ((len = unpickler_readline(self, &s)) >= 0) {
3413 if (len < 2) {
3414 Py_DECREF(module_name);
3415 return bad_readline();
3416 }
3417 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
3418 if (global_name) {
3419 global = find_class(self, module_name, global_name);
3420 Py_DECREF(global_name);
3421 }
3422 }
3423 Py_DECREF(module_name);
3424
3425 if (global == NULL)
3426 return -1;
3427 PDATA_PUSH(self->stack, global, -1);
3428 return 0;
3429}
3430
3431static int
3432load_persid(UnpicklerObject *self)
3433{
3434 PyObject *pid;
3435 Py_ssize_t len;
3436 char *s;
3437
3438 if (self->pers_func) {
3439 if ((len = unpickler_readline(self, &s)) < 0)
3440 return -1;
3441 if (len < 2)
3442 return bad_readline();
3443
3444 pid = PyBytes_FromStringAndSize(s, len - 1);
3445 if (pid == NULL)
3446 return -1;
3447
3448 /* Ugh... this does not leak since unpickler_call() steals the
3449 reference to pid first. */
3450 pid = unpickler_call(self, self->pers_func, pid);
3451 if (pid == NULL)
3452 return -1;
3453
3454 PDATA_PUSH(self->stack, pid, -1);
3455 return 0;
3456 }
3457 else {
3458 PyErr_SetString(UnpicklingError,
3459 "A load persistent id instruction was encountered,\n"
3460 "but no persistent_load function was specified.");
3461 return -1;
3462 }
3463}
3464
3465static int
3466load_binpersid(UnpicklerObject *self)
3467{
3468 PyObject *pid;
3469
3470 if (self->pers_func) {
3471 PDATA_POP(self->stack, pid);
3472 if (pid == NULL)
3473 return -1;
3474
3475 /* Ugh... this does not leak since unpickler_call() steals the
3476 reference to pid first. */
3477 pid = unpickler_call(self, self->pers_func, pid);
3478 if (pid == NULL)
3479 return -1;
3480
3481 PDATA_PUSH(self->stack, pid, -1);
3482 return 0;
3483 }
3484 else {
3485 PyErr_SetString(UnpicklingError,
3486 "A load persistent id instruction was encountered,\n"
3487 "but no persistent_load function was specified.");
3488 return -1;
3489 }
3490}
3491
3492static int
3493load_pop(UnpicklerObject *self)
3494{
3495 int len;
3496
3497 if ((len = self->stack->length) <= 0)
3498 return stack_underflow();
3499
3500 /* Note that we split the (pickle.py) stack into two stacks,
3501 * an object stack and a mark stack. We have to be clever and
3502 * pop the right one. We do this by looking at the top of the
3503 * mark stack.
3504 */
3505
3506 if ((self->num_marks > 0) && (self->marks[self->num_marks - 1] == len))
3507 self->num_marks--;
3508 else {
3509 len--;
3510 Py_DECREF(self->stack->data[len]);
3511 self->stack->length = len;
3512 }
3513
3514 return 0;
3515}
3516
3517static int
3518load_pop_mark(UnpicklerObject *self)
3519{
3520 int i;
3521
3522 if ((i = marker(self)) < 0)
3523 return -1;
3524
3525 Pdata_clear(self->stack, i);
3526
3527 return 0;
3528}
3529
3530static int
3531load_dup(UnpicklerObject *self)
3532{
3533 PyObject *last;
3534 int len;
3535
3536 if ((len = self->stack->length) <= 0)
3537 return stack_underflow();
3538 last = self->stack->data[len - 1];
3539 PDATA_APPEND(self->stack, last, -1);
3540 return 0;
3541}
3542
3543static int
3544load_get(UnpicklerObject *self)
3545{
3546 PyObject *key, *value;
3547 Py_ssize_t len;
3548 char *s;
3549
3550 if ((len = unpickler_readline(self, &s)) < 0)
3551 return -1;
3552 if (len < 2)
3553 return bad_readline();
3554
3555 key = PyLong_FromString(s, NULL, 10);
3556 if (key == NULL)
3557 return -1;
3558
3559 value = PyDict_GetItemWithError(self->memo, key);
3560 if (value == NULL) {
3561 if (!PyErr_Occurred())
3562 PyErr_SetObject(PyExc_KeyError, key);
3563 Py_DECREF(key);
3564 return -1;
3565 }
3566 Py_DECREF(key);
3567
3568 PDATA_APPEND(self->stack, value, -1);
3569 return 0;
3570}
3571
3572static int
3573load_binget(UnpicklerObject *self)
3574{
3575 PyObject *key, *value;
3576 char *s;
3577
3578 if (unpickler_read(self, &s, 1) < 0)
3579 return -1;
3580
3581 /* Here, the unsigned cast is necessary to avoid negative values. */
3582 key = PyLong_FromLong((long)(unsigned char)s[0]);
3583 if (key == NULL)
3584 return -1;
3585
3586 value = PyDict_GetItemWithError(self->memo, key);
3587 if (value == NULL) {
3588 if (!PyErr_Occurred())
3589 PyErr_SetObject(PyExc_KeyError, key);
3590 Py_DECREF(key);
3591 return -1;
3592 }
3593 Py_DECREF(key);
3594
3595 PDATA_APPEND(self->stack, value, -1);
3596 return 0;
3597}
3598
3599static int
3600load_long_binget(UnpicklerObject *self)
3601{
3602 PyObject *key, *value;
3603 char *s;
3604 long k;
3605
3606 if (unpickler_read(self, &s, 4) < 0)
3607 return -1;
3608
3609 k = (long)(unsigned char)s[0];
3610 k |= (long)(unsigned char)s[1] << 8;
3611 k |= (long)(unsigned char)s[2] << 16;
3612 k |= (long)(unsigned char)s[3] << 24;
3613
3614 key = PyLong_FromLong(k);
3615 if (key == NULL)
3616 return -1;
3617
3618 value = PyDict_GetItemWithError(self->memo, key);
3619 if (value == NULL) {
3620 if (!PyErr_Occurred())
3621 PyErr_SetObject(PyExc_KeyError, key);
3622 Py_DECREF(key);
3623 return -1;
3624 }
3625 Py_DECREF(key);
3626
3627 PDATA_APPEND(self->stack, value, -1);
3628 return 0;
3629}
3630
3631/* Push an object from the extension registry (EXT[124]). nbytes is
3632 * the number of bytes following the opcode, holding the index (code) value.
3633 */
3634static int
3635load_extension(UnpicklerObject *self, int nbytes)
3636{
3637 char *codebytes; /* the nbytes bytes after the opcode */
3638 long code; /* calc_binint returns long */
3639 PyObject *py_code; /* code as a Python int */
3640 PyObject *obj; /* the object to push */
3641 PyObject *pair; /* (module_name, class_name) */
3642 PyObject *module_name, *class_name;
3643
3644 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
3645 if (unpickler_read(self, &codebytes, nbytes) < 0)
3646 return -1;
3647 code = calc_binint(codebytes, nbytes);
3648 if (code <= 0) { /* note that 0 is forbidden */
3649 /* Corrupt or hostile pickle. */
3650 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
3651 return -1;
3652 }
3653
3654 /* Look for the code in the cache. */
3655 py_code = PyLong_FromLong(code);
3656 if (py_code == NULL)
3657 return -1;
3658 obj = PyDict_GetItem(extension_cache, py_code);
3659 if (obj != NULL) {
3660 /* Bingo. */
3661 Py_DECREF(py_code);
3662 PDATA_APPEND(self->stack, obj, -1);
3663 return 0;
3664 }
3665
3666 /* Look up the (module_name, class_name) pair. */
3667 pair = PyDict_GetItem(inverted_registry, py_code);
3668 if (pair == NULL) {
3669 Py_DECREF(py_code);
3670 PyErr_Format(PyExc_ValueError, "unregistered extension "
3671 "code %ld", code);
3672 return -1;
3673 }
3674 /* Since the extension registry is manipulable via Python code,
3675 * confirm that pair is really a 2-tuple of strings.
3676 */
3677 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
3678 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
3679 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
3680 Py_DECREF(py_code);
3681 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
3682 "isn't a 2-tuple of strings", code);
3683 return -1;
3684 }
3685 /* Load the object. */
3686 obj = find_class(self, module_name, class_name);
3687 if (obj == NULL) {
3688 Py_DECREF(py_code);
3689 return -1;
3690 }
3691 /* Cache code -> obj. */
3692 code = PyDict_SetItem(extension_cache, py_code, obj);
3693 Py_DECREF(py_code);
3694 if (code < 0) {
3695 Py_DECREF(obj);
3696 return -1;
3697 }
3698 PDATA_PUSH(self->stack, obj, -1);
3699 return 0;
3700}
3701
3702static int
3703load_put(UnpicklerObject *self)
3704{
3705 PyObject *key, *value;
3706 Py_ssize_t len;
3707 char *s;
3708 int x;
3709
3710 if ((len = unpickler_readline(self, &s)) < 0)
3711 return -1;
3712 if (len < 2)
3713 return bad_readline();
3714 if ((x = self->stack->length) <= 0)
3715 return stack_underflow();
3716
3717 key = PyLong_FromString(s, NULL, 10);
3718 if (key == NULL)
3719 return -1;
3720 value = self->stack->data[x - 1];
3721
3722 x = PyDict_SetItem(self->memo, key, value);
3723 Py_DECREF(key);
3724 return x;
3725}
3726
3727static int
3728load_binput(UnpicklerObject *self)
3729{
3730 PyObject *key, *value;
3731 char *s;
3732 int x;
3733
3734 if (unpickler_read(self, &s, 1) < 0)
3735 return -1;
3736 if ((x = self->stack->length) <= 0)
3737 return stack_underflow();
3738
3739 key = PyLong_FromLong((long)(unsigned char)s[0]);
3740 if (key == NULL)
3741 return -1;
3742 value = self->stack->data[x - 1];
3743
3744 x = PyDict_SetItem(self->memo, key, value);
3745 Py_DECREF(key);
3746 return x;
3747}
3748
3749static int
3750load_long_binput(UnpicklerObject *self)
3751{
3752 PyObject *key, *value;
3753 long k;
3754 char *s;
3755 int x;
3756
3757 if (unpickler_read(self, &s, 4) < 0)
3758 return -1;
3759 if ((x = self->stack->length) <= 0)
3760 return stack_underflow();
3761
3762 k = (long)(unsigned char)s[0];
3763 k |= (long)(unsigned char)s[1] << 8;
3764 k |= (long)(unsigned char)s[2] << 16;
3765 k |= (long)(unsigned char)s[3] << 24;
3766
3767 key = PyLong_FromLong(k);
3768 if (key == NULL)
3769 return -1;
3770 value = self->stack->data[x - 1];
3771
3772 x = PyDict_SetItem(self->memo, key, value);
3773 Py_DECREF(key);
3774 return x;
3775}
3776
3777static int
3778do_append(UnpicklerObject *self, int x)
3779{
3780 PyObject *value;
3781 PyObject *list;
3782 int len, i;
3783
3784 len = self->stack->length;
3785 if (x > len || x <= 0)
3786 return stack_underflow();
3787 if (len == x) /* nothing to do */
3788 return 0;
3789
3790 list = self->stack->data[x - 1];
3791
3792 if (PyList_Check(list)) {
3793 PyObject *slice;
3794 Py_ssize_t list_len;
3795
3796 slice = Pdata_poplist(self->stack, x);
3797 if (!slice)
3798 return -1;
3799 list_len = PyList_GET_SIZE(list);
3800 i = PyList_SetSlice(list, list_len, list_len, slice);
3801 Py_DECREF(slice);
3802 return i;
3803 }
3804 else {
3805 PyObject *append_func;
3806
3807 append_func = PyObject_GetAttrString(list, "append");
3808 if (append_func == NULL)
3809 return -1;
3810 for (i = x; i < len; i++) {
3811 PyObject *result;
3812
3813 value = self->stack->data[i];
3814 result = unpickler_call(self, append_func, value);
3815 if (result == NULL) {
3816 Pdata_clear(self->stack, i + 1);
3817 self->stack->length = x;
3818 return -1;
3819 }
3820 Py_DECREF(result);
3821 }
3822 self->stack->length = x;
3823 }
3824
3825 return 0;
3826}
3827
3828static int
3829load_append(UnpicklerObject *self)
3830{
3831 return do_append(self, self->stack->length - 1);
3832}
3833
3834static int
3835load_appends(UnpicklerObject *self)
3836{
3837 return do_append(self, marker(self));
3838}
3839
3840static int
3841do_setitems(UnpicklerObject *self, int x)
3842{
3843 PyObject *value, *key;
3844 PyObject *dict;
3845 int len, i;
3846 int status = 0;
3847
3848 len = self->stack->length;
3849 if (x > len || x <= 0)
3850 return stack_underflow();
3851 if (len == x) /* nothing to do */
3852 return 0;
3853 if ((len - x) % 2 != 0) {
3854 /* Currupt or hostile pickle -- we never write one like this. */
3855 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
3856 return -1;
3857 }
3858
3859 /* Here, dict does not actually need to be a PyDict; it could be anything
3860 that supports the __setitem__ attribute. */
3861 dict = self->stack->data[x - 1];
3862
3863 for (i = x + 1; i < len; i += 2) {
3864 key = self->stack->data[i - 1];
3865 value = self->stack->data[i];
3866 if (PyObject_SetItem(dict, key, value) < 0) {
3867 status = -1;
3868 break;
3869 }
3870 }
3871
3872 Pdata_clear(self->stack, x);
3873 return status;
3874}
3875
3876static int
3877load_setitem(UnpicklerObject *self)
3878{
3879 return do_setitems(self, self->stack->length - 2);
3880}
3881
3882static int
3883load_setitems(UnpicklerObject *self)
3884{
3885 return do_setitems(self, marker(self));
3886}
3887
3888static int
3889load_build(UnpicklerObject *self)
3890{
3891 PyObject *state, *inst, *slotstate;
3892 PyObject *setstate;
3893 int status = 0;
3894
3895 /* Stack is ... instance, state. We want to leave instance at
3896 * the stack top, possibly mutated via instance.__setstate__(state).
3897 */
3898 if (self->stack->length < 2)
3899 return stack_underflow();
3900
3901 PDATA_POP(self->stack, state);
3902 if (state == NULL)
3903 return -1;
3904
3905 inst = self->stack->data[self->stack->length - 1];
3906
3907 setstate = PyObject_GetAttrString(inst, "__setstate__");
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00003908 if (setstate == NULL) {
3909 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3910 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00003911 else {
3912 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00003913 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00003914 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003915 }
3916 else {
3917 PyObject *result;
3918
3919 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitroud79dc622008-09-05 00:03:33 +00003920 /* Ugh... this does not leak since unpickler_call() steals the
3921 reference to state first. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003922 result = unpickler_call(self, setstate, state);
3923 Py_DECREF(setstate);
3924 if (result == NULL)
3925 return -1;
3926 Py_DECREF(result);
3927 return 0;
3928 }
3929
3930 /* A default __setstate__. First see whether state embeds a
3931 * slot state dict too (a proto 2 addition).
3932 */
3933 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
3934 PyObject *tmp = state;
3935
3936 state = PyTuple_GET_ITEM(tmp, 0);
3937 slotstate = PyTuple_GET_ITEM(tmp, 1);
3938 Py_INCREF(state);
3939 Py_INCREF(slotstate);
3940 Py_DECREF(tmp);
3941 }
3942 else
3943 slotstate = NULL;
3944
3945 /* Set inst.__dict__ from the state dict (if any). */
3946 if (state != Py_None) {
3947 PyObject *dict;
3948
3949 if (!PyDict_Check(state)) {
3950 PyErr_SetString(UnpicklingError, "state is not a dictionary");
3951 goto error;
3952 }
3953 dict = PyObject_GetAttrString(inst, "__dict__");
3954 if (dict == NULL)
3955 goto error;
3956
3957 PyDict_Update(dict, state);
3958 Py_DECREF(dict);
3959 }
3960
3961 /* Also set instance attributes from the slotstate dict (if any). */
3962 if (slotstate != NULL) {
3963 PyObject *d_key, *d_value;
3964 Py_ssize_t i;
3965
3966 if (!PyDict_Check(slotstate)) {
3967 PyErr_SetString(UnpicklingError,
3968 "slot state is not a dictionary");
3969 goto error;
3970 }
3971 i = 0;
3972 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
3973 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
3974 goto error;
3975 }
3976 }
3977
3978 if (0) {
3979 error:
3980 status = -1;
3981 }
3982
3983 Py_DECREF(state);
3984 Py_XDECREF(slotstate);
3985 return status;
3986}
3987
3988static int
3989load_mark(UnpicklerObject *self)
3990{
3991
3992 /* Note that we split the (pickle.py) stack into two stacks, an
3993 * object stack and a mark stack. Here we push a mark onto the
3994 * mark stack.
3995 */
3996
3997 if ((self->num_marks + 1) >= self->marks_size) {
3998 size_t alloc;
3999 int *marks;
4000
4001 /* Use the size_t type to check for overflow. */
4002 alloc = ((size_t)self->num_marks << 1) + 20;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00004003 if (alloc > PY_SSIZE_T_MAX ||
4004 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004005 PyErr_NoMemory();
4006 return -1;
4007 }
4008
4009 if (self->marks == NULL)
4010 marks = (int *)PyMem_Malloc(alloc * sizeof(int));
4011 else
4012 marks = (int *)PyMem_Realloc(self->marks, alloc * sizeof(int));
4013 if (marks == NULL) {
4014 PyErr_NoMemory();
4015 return -1;
4016 }
4017 self->marks = marks;
4018 self->marks_size = (Py_ssize_t)alloc;
4019 }
4020
4021 self->marks[self->num_marks++] = self->stack->length;
4022
4023 return 0;
4024}
4025
4026static int
4027load_reduce(UnpicklerObject *self)
4028{
4029 PyObject *callable = NULL;
4030 PyObject *argtup = NULL;
4031 PyObject *obj = NULL;
4032
4033 PDATA_POP(self->stack, argtup);
4034 if (argtup == NULL)
4035 return -1;
4036 PDATA_POP(self->stack, callable);
4037 if (callable) {
4038 obj = instantiate(callable, argtup);
4039 Py_DECREF(callable);
4040 }
4041 Py_DECREF(argtup);
4042
4043 if (obj == NULL)
4044 return -1;
4045
4046 PDATA_PUSH(self->stack, obj, -1);
4047 return 0;
4048}
4049
4050/* Just raises an error if we don't know the protocol specified. PROTO
4051 * is the first opcode for protocols >= 2.
4052 */
4053static int
4054load_proto(UnpicklerObject *self)
4055{
4056 char *s;
4057 int i;
4058
4059 if (unpickler_read(self, &s, 1) < 0)
4060 return -1;
4061
4062 i = (unsigned char)s[0];
4063 if (i <= HIGHEST_PROTOCOL)
4064 return 0;
4065
4066 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
4067 return -1;
4068}
4069
4070static PyObject *
4071load(UnpicklerObject *self)
4072{
4073 PyObject *err;
4074 PyObject *value = NULL;
4075 char *s;
4076
4077 self->num_marks = 0;
4078 if (self->stack->length)
4079 Pdata_clear(self->stack, 0);
4080
4081 /* Convenient macros for the dispatch while-switch loop just below. */
4082#define OP(opcode, load_func) \
4083 case opcode: if (load_func(self) < 0) break; continue;
4084
4085#define OP_ARG(opcode, load_func, arg) \
4086 case opcode: if (load_func(self, (arg)) < 0) break; continue;
4087
4088 while (1) {
4089 if (unpickler_read(self, &s, 1) < 0)
4090 break;
4091
4092 switch ((enum opcode)s[0]) {
4093 OP(NONE, load_none)
4094 OP(BININT, load_binint)
4095 OP(BININT1, load_binint1)
4096 OP(BININT2, load_binint2)
4097 OP(INT, load_int)
4098 OP(LONG, load_long)
4099 OP_ARG(LONG1, load_counted_long, 1)
4100 OP_ARG(LONG4, load_counted_long, 4)
4101 OP(FLOAT, load_float)
4102 OP(BINFLOAT, load_binfloat)
4103 OP(BINBYTES, load_binbytes)
4104 OP(SHORT_BINBYTES, load_short_binbytes)
4105 OP(BINSTRING, load_binstring)
4106 OP(SHORT_BINSTRING, load_short_binstring)
4107 OP(STRING, load_string)
4108 OP(UNICODE, load_unicode)
4109 OP(BINUNICODE, load_binunicode)
4110 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
4111 OP_ARG(TUPLE1, load_counted_tuple, 1)
4112 OP_ARG(TUPLE2, load_counted_tuple, 2)
4113 OP_ARG(TUPLE3, load_counted_tuple, 3)
4114 OP(TUPLE, load_tuple)
4115 OP(EMPTY_LIST, load_empty_list)
4116 OP(LIST, load_list)
4117 OP(EMPTY_DICT, load_empty_dict)
4118 OP(DICT, load_dict)
4119 OP(OBJ, load_obj)
4120 OP(INST, load_inst)
4121 OP(NEWOBJ, load_newobj)
4122 OP(GLOBAL, load_global)
4123 OP(APPEND, load_append)
4124 OP(APPENDS, load_appends)
4125 OP(BUILD, load_build)
4126 OP(DUP, load_dup)
4127 OP(BINGET, load_binget)
4128 OP(LONG_BINGET, load_long_binget)
4129 OP(GET, load_get)
4130 OP(MARK, load_mark)
4131 OP(BINPUT, load_binput)
4132 OP(LONG_BINPUT, load_long_binput)
4133 OP(PUT, load_put)
4134 OP(POP, load_pop)
4135 OP(POP_MARK, load_pop_mark)
4136 OP(SETITEM, load_setitem)
4137 OP(SETITEMS, load_setitems)
4138 OP(PERSID, load_persid)
4139 OP(BINPERSID, load_binpersid)
4140 OP(REDUCE, load_reduce)
4141 OP(PROTO, load_proto)
4142 OP_ARG(EXT1, load_extension, 1)
4143 OP_ARG(EXT2, load_extension, 2)
4144 OP_ARG(EXT4, load_extension, 4)
4145 OP_ARG(NEWTRUE, load_bool, Py_True)
4146 OP_ARG(NEWFALSE, load_bool, Py_False)
4147
4148 case STOP:
4149 break;
4150
4151 case '\0':
4152 PyErr_SetNone(PyExc_EOFError);
4153 return NULL;
4154
4155 default:
4156 PyErr_Format(UnpicklingError,
4157 "invalid load key, '%c'.", s[0]);
4158 return NULL;
4159 }
4160
4161 break; /* and we are done! */
4162 }
4163
4164 /* XXX: It is not clear what this is actually for. */
4165 if ((err = PyErr_Occurred())) {
4166 if (err == PyExc_EOFError) {
4167 PyErr_SetNone(PyExc_EOFError);
4168 }
4169 return NULL;
4170 }
4171
4172 PDATA_POP(self->stack, value);
4173 return value;
4174}
4175
4176PyDoc_STRVAR(Unpickler_load_doc,
4177"load() -> object. Load a pickle."
4178"\n"
4179"Read a pickled object representation from the open file object given in\n"
4180"the constructor, and return the reconstituted object hierarchy specified\n"
4181"therein.\n");
4182
4183static PyObject *
4184Unpickler_load(UnpicklerObject *self)
4185{
4186 /* Check whether the Unpickler was initialized correctly. This prevents
4187 segfaulting if a subclass overridden __init__ with a function that does
4188 not call Unpickler.__init__(). Here, we simply ensure that self->read
4189 is not NULL. */
4190 if (self->read == NULL) {
4191 PyErr_Format(UnpicklingError,
4192 "Unpickler.__init__() was not called by %s.__init__()",
4193 Py_TYPE(self)->tp_name);
4194 return NULL;
4195 }
4196
4197 return load(self);
4198}
4199
4200/* The name of find_class() is misleading. In newer pickle protocols, this
4201 function is used for loading any global (i.e., functions), not just
4202 classes. The name is kept only for backward compatibility. */
4203
4204PyDoc_STRVAR(Unpickler_find_class_doc,
4205"find_class(module_name, global_name) -> object.\n"
4206"\n"
4207"Return an object from a specified module, importing the module if\n"
4208"necessary. Subclasses may override this method (e.g. to restrict\n"
4209"unpickling of arbitrary classes and functions).\n"
4210"\n"
4211"This method is called whenever a class or a function object is\n"
4212"needed. Both arguments passed are str objects.\n");
4213
4214static PyObject *
4215Unpickler_find_class(UnpicklerObject *self, PyObject *args)
4216{
4217 PyObject *global;
4218 PyObject *modules_dict;
4219 PyObject *module;
4220 PyObject *module_name, *global_name;
4221
4222 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
4223 &module_name, &global_name))
4224 return NULL;
4225
4226 modules_dict = PySys_GetObject("modules");
4227 if (modules_dict == NULL)
4228 return NULL;
4229
4230 module = PyDict_GetItem(modules_dict, module_name);
4231 if (module == NULL) {
4232 module = PyImport_Import(module_name);
4233 if (module == NULL)
4234 return NULL;
4235 global = PyObject_GetAttr(module, global_name);
4236 Py_DECREF(module);
4237 }
4238 else {
4239 global = PyObject_GetAttr(module, global_name);
4240 }
4241 return global;
4242}
4243
4244static struct PyMethodDef Unpickler_methods[] = {
4245 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
4246 Unpickler_load_doc},
4247 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
4248 Unpickler_find_class_doc},
4249 {NULL, NULL} /* sentinel */
4250};
4251
4252static void
4253Unpickler_dealloc(UnpicklerObject *self)
4254{
4255 PyObject_GC_UnTrack((PyObject *)self);
4256 Py_XDECREF(self->readline);
4257 Py_XDECREF(self->read);
4258 Py_XDECREF(self->memo);
4259 Py_XDECREF(self->stack);
4260 Py_XDECREF(self->pers_func);
4261 Py_XDECREF(self->arg);
4262 Py_XDECREF(self->last_string);
4263
4264 PyMem_Free(self->marks);
4265 free(self->encoding);
4266 free(self->errors);
4267
4268 Py_TYPE(self)->tp_free((PyObject *)self);
4269}
4270
4271static int
4272Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
4273{
4274 Py_VISIT(self->readline);
4275 Py_VISIT(self->read);
4276 Py_VISIT(self->memo);
4277 Py_VISIT(self->stack);
4278 Py_VISIT(self->pers_func);
4279 Py_VISIT(self->arg);
4280 Py_VISIT(self->last_string);
4281 return 0;
4282}
4283
4284static int
4285Unpickler_clear(UnpicklerObject *self)
4286{
4287 Py_CLEAR(self->readline);
4288 Py_CLEAR(self->read);
4289 Py_CLEAR(self->memo);
4290 Py_CLEAR(self->stack);
4291 Py_CLEAR(self->pers_func);
4292 Py_CLEAR(self->arg);
4293 Py_CLEAR(self->last_string);
4294
4295 PyMem_Free(self->marks);
4296 self->marks = NULL;
4297 free(self->encoding);
4298 self->encoding = NULL;
4299 free(self->errors);
4300 self->errors = NULL;
4301
4302 return 0;
4303}
4304
4305PyDoc_STRVAR(Unpickler_doc,
4306"Unpickler(file, *, encoding='ASCII', errors='strict')"
4307"\n"
4308"This takes a binary file for reading a pickle data stream.\n"
4309"\n"
4310"The protocol version of the pickle is detected automatically, so no\n"
4311"proto argument is needed.\n"
4312"\n"
4313"The file-like object must have two methods, a read() method\n"
4314"that takes an integer argument, and a readline() method that\n"
4315"requires no arguments. Both methods should return bytes.\n"
4316"Thus file-like object can be a binary file object opened for\n"
4317"reading, a BytesIO object, or any other custom object that\n"
4318"meets this interface.\n"
4319"\n"
4320"Optional keyword arguments are encoding and errors, which are\n"
4321"used to decode 8-bit string instances pickled by Python 2.x.\n"
4322"These default to 'ASCII' and 'strict', respectively.\n");
4323
4324static int
4325Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
4326{
4327 static char *kwlist[] = {"file", "encoding", "errors", 0};
4328 PyObject *file;
4329 char *encoding = NULL;
4330 char *errors = NULL;
4331
4332 /* XXX: That is an horrible error message. But, I don't know how to do
4333 better... */
4334 if (Py_SIZE(args) != 1) {
4335 PyErr_Format(PyExc_TypeError,
4336 "%s takes exactly one positional argument (%zd given)",
4337 Py_TYPE(self)->tp_name, Py_SIZE(args));
4338 return -1;
4339 }
4340
4341 /* Arguments parsing needs to be done in the __init__() method to allow
4342 subclasses to define their own __init__() method, which may (or may
4343 not) support Unpickler arguments. However, this means we need to be
4344 extra careful in the other Unpickler methods, since a subclass could
4345 forget to call Unpickler.__init__() thus breaking our internal
4346 invariants. */
4347 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|ss:Unpickler", kwlist,
4348 &file, &encoding, &errors))
4349 return -1;
4350
4351 /* In case of multiple __init__() calls, clear previous content. */
4352 if (self->read != NULL)
4353 (void)Unpickler_clear(self);
4354
4355 self->read = PyObject_GetAttrString(file, "read");
4356 self->readline = PyObject_GetAttrString(file, "readline");
4357 if (self->readline == NULL || self->read == NULL)
4358 return -1;
4359
4360 if (encoding == NULL)
4361 encoding = "ASCII";
4362 if (errors == NULL)
4363 errors = "strict";
4364
4365 self->encoding = strdup(encoding);
4366 self->errors = strdup(errors);
4367 if (self->encoding == NULL || self->errors == NULL) {
4368 PyErr_NoMemory();
4369 return -1;
4370 }
4371
4372 if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) {
4373 self->pers_func = PyObject_GetAttrString((PyObject *)self,
4374 "persistent_load");
4375 if (self->pers_func == NULL)
4376 return -1;
4377 }
4378 else {
4379 self->pers_func = NULL;
4380 }
4381
4382 self->stack = (Pdata *)Pdata_New();
4383 if (self->stack == NULL)
4384 return -1;
4385
4386 self->memo = PyDict_New();
4387 if (self->memo == NULL)
4388 return -1;
4389
4390 return 0;
4391}
4392
4393static PyObject *
4394Unpickler_get_memo(UnpicklerObject *self)
4395{
4396 if (self->memo == NULL)
4397 PyErr_SetString(PyExc_AttributeError, "memo");
4398 else
4399 Py_INCREF(self->memo);
4400 return self->memo;
4401}
4402
4403static int
4404Unpickler_set_memo(UnpicklerObject *self, PyObject *value)
4405{
4406 PyObject *tmp;
4407
4408 if (value == NULL) {
4409 PyErr_SetString(PyExc_TypeError,
4410 "attribute deletion is not supported");
4411 return -1;
4412 }
4413 if (!PyDict_Check(value)) {
4414 PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
4415 return -1;
4416 }
4417
4418 tmp = self->memo;
4419 Py_INCREF(value);
4420 self->memo = value;
4421 Py_XDECREF(tmp);
4422
4423 return 0;
4424}
4425
4426static PyObject *
4427Unpickler_get_persload(UnpicklerObject *self)
4428{
4429 if (self->pers_func == NULL)
4430 PyErr_SetString(PyExc_AttributeError, "persistent_load");
4431 else
4432 Py_INCREF(self->pers_func);
4433 return self->pers_func;
4434}
4435
4436static int
4437Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
4438{
4439 PyObject *tmp;
4440
4441 if (value == NULL) {
4442 PyErr_SetString(PyExc_TypeError,
4443 "attribute deletion is not supported");
4444 return -1;
4445 }
4446 if (!PyCallable_Check(value)) {
4447 PyErr_SetString(PyExc_TypeError,
4448 "persistent_load must be a callable taking "
4449 "one argument");
4450 return -1;
4451 }
4452
4453 tmp = self->pers_func;
4454 Py_INCREF(value);
4455 self->pers_func = value;
4456 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
4457
4458 return 0;
4459}
4460
4461static PyGetSetDef Unpickler_getsets[] = {
4462 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
4463 {"persistent_load", (getter)Unpickler_get_persload,
4464 (setter)Unpickler_set_persload},
4465 {NULL}
4466};
4467
4468static PyTypeObject Unpickler_Type = {
4469 PyVarObject_HEAD_INIT(NULL, 0)
4470 "_pickle.Unpickler", /*tp_name*/
4471 sizeof(UnpicklerObject), /*tp_basicsize*/
4472 0, /*tp_itemsize*/
4473 (destructor)Unpickler_dealloc, /*tp_dealloc*/
4474 0, /*tp_print*/
4475 0, /*tp_getattr*/
4476 0, /*tp_setattr*/
4477 0, /*tp_compare*/
4478 0, /*tp_repr*/
4479 0, /*tp_as_number*/
4480 0, /*tp_as_sequence*/
4481 0, /*tp_as_mapping*/
4482 0, /*tp_hash*/
4483 0, /*tp_call*/
4484 0, /*tp_str*/
4485 0, /*tp_getattro*/
4486 0, /*tp_setattro*/
4487 0, /*tp_as_buffer*/
4488 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4489 Unpickler_doc, /*tp_doc*/
4490 (traverseproc)Unpickler_traverse, /*tp_traverse*/
4491 (inquiry)Unpickler_clear, /*tp_clear*/
4492 0, /*tp_richcompare*/
4493 0, /*tp_weaklistoffset*/
4494 0, /*tp_iter*/
4495 0, /*tp_iternext*/
4496 Unpickler_methods, /*tp_methods*/
4497 0, /*tp_members*/
4498 Unpickler_getsets, /*tp_getset*/
4499 0, /*tp_base*/
4500 0, /*tp_dict*/
4501 0, /*tp_descr_get*/
4502 0, /*tp_descr_set*/
4503 0, /*tp_dictoffset*/
4504 (initproc)Unpickler_init, /*tp_init*/
4505 PyType_GenericAlloc, /*tp_alloc*/
4506 PyType_GenericNew, /*tp_new*/
4507 PyObject_GC_Del, /*tp_free*/
4508 0, /*tp_is_gc*/
4509};
4510
4511static int
4512init_stuff(void)
4513{
4514 PyObject *copyreg;
4515
4516 copyreg = PyImport_ImportModule("copyreg");
4517 if (!copyreg)
4518 return -1;
4519
4520 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
4521 if (!dispatch_table)
4522 goto error;
4523
4524 extension_registry = \
4525 PyObject_GetAttrString(copyreg, "_extension_registry");
4526 if (!extension_registry)
4527 goto error;
4528
4529 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
4530 if (!inverted_registry)
4531 goto error;
4532
4533 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
4534 if (!extension_cache)
4535 goto error;
4536
4537 Py_DECREF(copyreg);
4538
4539 empty_tuple = PyTuple_New(0);
4540 if (empty_tuple == NULL)
4541 return -1;
4542
4543 two_tuple = PyTuple_New(2);
4544 if (two_tuple == NULL)
4545 return -1;
4546 /* We use this temp container with no regard to refcounts, or to
4547 * keeping containees alive. Exempt from GC, because we don't
4548 * want anything looking at two_tuple() by magic.
4549 */
4550 PyObject_GC_UnTrack(two_tuple);
4551
4552 return 0;
4553
4554 error:
4555 Py_DECREF(copyreg);
4556 return -1;
4557}
4558
4559static struct PyModuleDef _picklemodule = {
4560 PyModuleDef_HEAD_INIT,
4561 "_pickle",
4562 pickle_module_doc,
4563 -1,
4564 NULL,
4565 NULL,
4566 NULL,
4567 NULL,
4568 NULL
4569};
4570
4571PyMODINIT_FUNC
4572PyInit__pickle(void)
4573{
4574 PyObject *m;
4575
4576 if (PyType_Ready(&Unpickler_Type) < 0)
4577 return NULL;
4578 if (PyType_Ready(&Pickler_Type) < 0)
4579 return NULL;
4580 if (PyType_Ready(&Pdata_Type) < 0)
4581 return NULL;
4582
4583 /* Create the module and add the functions. */
4584 m = PyModule_Create(&_picklemodule);
4585 if (m == NULL)
4586 return NULL;
4587
4588 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
4589 return NULL;
4590 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
4591 return NULL;
4592
4593 /* Initialize the exceptions. */
4594 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
4595 if (PickleError == NULL)
4596 return NULL;
4597 PicklingError = \
4598 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
4599 if (PicklingError == NULL)
4600 return NULL;
4601 UnpicklingError = \
4602 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
4603 if (UnpicklingError == NULL)
4604 return NULL;
4605
4606 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
4607 return NULL;
4608 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
4609 return NULL;
4610 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
4611 return NULL;
4612
4613 if (init_stuff() < 0)
4614 return NULL;
4615
4616 return m;
4617}