blob: 6cc90b3f1e8b00dd7eb57fcb93f2434f930d78a3 [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
13
14/* Pickle opcodes. These must be kept updated with pickle.py.
15 Extensive docs are in pickletools.py. */
16enum opcode {
17 MARK = '(',
18 STOP = '.',
19 POP = '0',
20 POP_MARK = '1',
21 DUP = '2',
22 FLOAT = 'F',
23 INT = 'I',
24 BININT = 'J',
25 BININT1 = 'K',
26 LONG = 'L',
27 BININT2 = 'M',
28 NONE = 'N',
29 PERSID = 'P',
30 BINPERSID = 'Q',
31 REDUCE = 'R',
32 STRING = 'S',
33 BINSTRING = 'T',
34 SHORT_BINSTRING = 'U',
35 UNICODE = 'V',
36 BINUNICODE = 'X',
37 APPEND = 'a',
38 BUILD = 'b',
39 GLOBAL = 'c',
40 DICT = 'd',
41 EMPTY_DICT = '}',
42 APPENDS = 'e',
43 GET = 'g',
44 BINGET = 'h',
45 INST = 'i',
46 LONG_BINGET = 'j',
47 LIST = 'l',
48 EMPTY_LIST = ']',
49 OBJ = 'o',
50 PUT = 'p',
51 BINPUT = 'q',
52 LONG_BINPUT = 'r',
53 SETITEM = 's',
54 TUPLE = 't',
55 EMPTY_TUPLE = ')',
56 SETITEMS = 'u',
57 BINFLOAT = 'G',
58
59 /* Protocol 2. */
60 PROTO = '\x80',
61 NEWOBJ = '\x81',
62 EXT1 = '\x82',
63 EXT2 = '\x83',
64 EXT4 = '\x84',
65 TUPLE1 = '\x85',
66 TUPLE2 = '\x86',
67 TUPLE3 = '\x87',
68 NEWTRUE = '\x88',
69 NEWFALSE = '\x89',
70 LONG1 = '\x8a',
71 LONG4 = '\x8b',
72
73 /* Protocol 3 (Python 3.x) */
74 BINBYTES = 'B',
75 SHORT_BINBYTES = 'C',
76};
77
78/* These aren't opcodes -- they're ways to pickle bools before protocol 2
79 * so that unpicklers written before bools were introduced unpickle them
80 * as ints, but unpicklers after can recognize that bools were intended.
81 * Note that protocol 2 added direct ways to pickle bools.
82 */
83#undef TRUE
84#define TRUE "I01\n"
85#undef FALSE
86#define FALSE "I00\n"
87
88enum {
89 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
90 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
91 break if this gets out of synch with pickle.py, but it's unclear that would
92 help anything either. */
93 BATCHSIZE = 1000,
94
95 /* Nesting limit until Pickler, when running in "fast mode", starts
96 checking for self-referential data-structures. */
97 FAST_NESTING_LIMIT = 50,
98
99 /* Size of the write buffer of Pickler. Higher values will reduce the
100 number of calls to the write() method of the output stream. */
101 WRITE_BUF_SIZE = 256,
102};
103
104/* Exception classes for pickle. These should override the ones defined in
105 pickle.py, when the C-optimized Pickler and Unpickler are used. */
106static PyObject *PickleError;
107static PyObject *PicklingError;
108static PyObject *UnpicklingError;
109
110/* copyreg.dispatch_table, {type_object: pickling_function} */
111static PyObject *dispatch_table;
112/* For EXT[124] opcodes. */
113/* copyreg._extension_registry, {(module_name, function_name): code} */
114static PyObject *extension_registry;
115/* copyreg._inverted_registry, {code: (module_name, function_name)} */
116static PyObject *inverted_registry;
117/* copyreg._extension_cache, {code: object} */
118static PyObject *extension_cache;
119
120/* XXX: Are these really nescessary? */
121/* As the name says, an empty tuple. */
122static PyObject *empty_tuple;
123/* For looking up name pairs in copyreg._extension_registry. */
124static PyObject *two_tuple;
125
126static int
127stack_underflow(void)
128{
129 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
130 return -1;
131}
132
133/* Internal data type used as the unpickling stack. */
134typedef struct {
135 PyObject_HEAD
136 int length; /* number of initial slots in data currently used */
137 int size; /* number of slots in data allocated */
138 PyObject **data;
139} Pdata;
140
141static void
142Pdata_dealloc(Pdata *self)
143{
144 int i;
145 PyObject **p;
146
147 for (i = self->length, p = self->data; --i >= 0; p++) {
148 Py_DECREF(*p);
149 }
150 if (self->data)
151 PyMem_Free(self->data);
152 PyObject_Del(self);
153}
154
155static PyTypeObject Pdata_Type = {
156 PyVarObject_HEAD_INIT(NULL, 0)
157 "_pickle.Pdata", /*tp_name*/
158 sizeof(Pdata), /*tp_basicsize*/
159 0, /*tp_itemsize*/
160 (destructor)Pdata_dealloc, /*tp_dealloc*/
161};
162
163static PyObject *
164Pdata_New(void)
165{
166 Pdata *self;
167
168 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
169 return NULL;
170 self->size = 8;
171 self->length = 0;
172 self->data = PyMem_Malloc(self->size * sizeof(PyObject *));
173 if (self->data)
174 return (PyObject *)self;
175 Py_DECREF(self);
176 return PyErr_NoMemory();
177}
178
179
180/* Retain only the initial clearto items. If clearto >= the current
181 * number of items, this is a (non-erroneous) NOP.
182 */
183static int
184Pdata_clear(Pdata *self, int clearto)
185{
186 int i;
187 PyObject **p;
188
189 if (clearto < 0)
190 return stack_underflow();
191 if (clearto >= self->length)
192 return 0;
193
194 for (i = self->length, p = self->data + clearto; --i >= clearto; p++) {
195 Py_CLEAR(*p);
196 }
197 self->length = clearto;
198
199 return 0;
200}
201
202static int
203Pdata_grow(Pdata *self)
204{
205 int bigger;
206 size_t nbytes;
207 PyObject **tmp;
208
209 bigger = (self->size << 1) + 1;
210 if (bigger <= 0) /* was 0, or new value overflows */
211 goto nomemory;
212 if ((int)(size_t)bigger != bigger)
213 goto nomemory;
214 nbytes = (size_t)bigger * sizeof(PyObject *);
215 if (nbytes / sizeof(PyObject *) != (size_t)bigger)
216 goto nomemory;
217 tmp = PyMem_Realloc(self->data, nbytes);
218 if (tmp == NULL)
219 goto nomemory;
220 self->data = tmp;
221 self->size = bigger;
222 return 0;
223
224 nomemory:
225 PyErr_NoMemory();
226 return -1;
227}
228
229/* D is a Pdata*. Pop the topmost element and store it into V, which
230 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
231 * is raised and V is set to NULL.
232 */
233static PyObject *
234Pdata_pop(Pdata *self)
235{
236 if (self->length == 0) {
237 PyErr_SetString(UnpicklingError, "bad pickle data");
238 return NULL;
239 }
240 return self->data[--(self->length)];
241}
242#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
243
244static int
245Pdata_push(Pdata *self, PyObject *obj)
246{
247 if (self->length == self->size && Pdata_grow(self) < 0) {
248 return -1;
249 }
250 self->data[self->length++] = obj;
251 return 0;
252}
253
254/* Push an object on stack, transferring its ownership to the stack. */
255#define PDATA_PUSH(D, O, ER) do { \
256 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
257
258/* Push an object on stack, adding a new reference to the object. */
259#define PDATA_APPEND(D, O, ER) do { \
260 Py_INCREF((O)); \
261 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
262
263static PyObject *
264Pdata_poptuple(Pdata *self, Py_ssize_t start)
265{
266 PyObject *tuple;
267 Py_ssize_t len, i, j;
268
269 len = self->length - start;
270 tuple = PyTuple_New(len);
271 if (tuple == NULL)
272 return NULL;
273 for (i = start, j = 0; j < len; i++, j++)
274 PyTuple_SET_ITEM(tuple, j, self->data[i]);
275
276 self->length = start;
277 return tuple;
278}
279
280static PyObject *
281Pdata_poplist(Pdata *self, Py_ssize_t start)
282{
283 PyObject *list;
284 Py_ssize_t len, i, j;
285
286 len = self->length - start;
287 list = PyList_New(len);
288 if (list == NULL)
289 return NULL;
290 for (i = start, j = 0; j < len; i++, j++)
291 PyList_SET_ITEM(list, j, self->data[i]);
292
293 self->length = start;
294 return list;
295}
296
297typedef struct PicklerObject {
298 PyObject_HEAD
299 PyObject *write; /* write() method of the output stream */
300 PyObject *memo; /* Memo dictionary, keep track of the seen
301 objects to support self-referential objects
302 pickling. */
303 PyObject *pers_func; /* persistent_id() method, can be NULL */
304 PyObject *arg;
305 int proto; /* Pickle protocol number, >= 0 */
306 int bin; /* Boolean, true if proto > 0 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000307 int buf_size; /* Size of the current buffered pickle data */
308 char *write_buf; /* Write buffer, this is to avoid calling the
309 write() method of the output stream too
310 often. */
311 int fast; /* Enable fast mode if set to a true value.
312 The fast mode disable the usage of memo,
313 therefore speeding the pickling process by
314 not generating superfluous PUT opcodes. It
315 should not be used if with self-referential
316 objects. */
317 int fast_nesting;
318 PyObject *fast_memo;
319} PicklerObject;
320
321typedef struct UnpicklerObject {
322 PyObject_HEAD
323 Pdata *stack; /* Pickle data stack, store unpickled objects. */
324 PyObject *readline; /* readline() method of the output stream */
325 PyObject *read; /* read() method of the output stream */
326 PyObject *memo; /* Memo dictionary, provide the objects stored
327 using the PUT opcodes. */
328 PyObject *arg;
329 PyObject *pers_func; /* persistent_load() method, can be NULL. */
330 PyObject *last_string; /* Reference to the last string read by the
331 readline() method. */
332 char *buffer; /* Reading buffer. */
333 char *encoding; /* Name of the encoding to be used for
334 decoding strings pickled using Python
335 2.x. The default value is "ASCII" */
336 char *errors; /* Name of errors handling scheme to used when
337 decoding strings. The default value is
338 "strict". */
339 int *marks; /* Mark stack, used for unpickling container
340 objects. */
341 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
342 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
343} UnpicklerObject;
344
345/* Forward declarations */
346static int save(PicklerObject *, PyObject *, int);
347static int save_reduce(PicklerObject *, PyObject *, PyObject *);
348static PyTypeObject Pickler_Type;
349static PyTypeObject Unpickler_Type;
350
351
352/* Helpers for creating the argument tuple passed to functions. This has the
353 performance advantage of calling PyTuple_New() only once. */
354
355#define ARG_TUP(self, obj) do { \
356 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
357 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
358 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
359 } \
360 else { \
361 Py_DECREF((obj)); \
362 } \
363 } while (0)
364
365#define FREE_ARG_TUP(self) do { \
366 if ((self)->arg->ob_refcnt > 1) \
367 Py_CLEAR((self)->arg); \
368 } while (0)
369
370/* A temporary cleaner API for fast single argument function call.
371
372 XXX: Does caching the argument tuple provides any real performance benefits?
373
374 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
375 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
376 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
377 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
378 (i.e, call PyTuple_New() and store the returned value in an array), to save
379 one second (wall clock time). Either ways, the loading time a pickle stream
380 large enough to generate this number of calls would be massively
381 overwhelmed by other factors, like I/O throughput, the GC traversal and
382 object allocation overhead. So, I really doubt these functions provide any
383 real benefits.
384
385 On the other hand, oprofile reports that pickle spends a lot of time in
386 these functions. But, that is probably more related to the function call
387 overhead, than the argument tuple allocation.
388
389 XXX: And, what is the reference behavior of these? Steal, borrow? At first
390 glance, it seems to steal the reference of 'arg' and borrow the reference
391 of 'func'.
392 */
393static PyObject *
394pickler_call(PicklerObject *self, PyObject *func, PyObject *arg)
395{
396 PyObject *result = NULL;
397
398 ARG_TUP(self, arg);
399 if (self->arg) {
400 result = PyObject_Call(func, self->arg, NULL);
401 FREE_ARG_TUP(self);
402 }
403 return result;
404}
405
406static PyObject *
407unpickler_call(UnpicklerObject *self, PyObject *func, PyObject *arg)
408{
409 PyObject *result = NULL;
410
411 ARG_TUP(self, arg);
412 if (self->arg) {
413 result = PyObject_Call(func, self->arg, NULL);
414 FREE_ARG_TUP(self);
415 }
416 return result;
417}
418
419static Py_ssize_t
420pickler_write(PicklerObject *self, const char *s, Py_ssize_t n)
421{
422 PyObject *data, *result;
423
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000424 if (self->write_buf == NULL) {
425 PyErr_SetString(PyExc_SystemError, "invalid write buffer");
426 return -1;
427 }
428
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000429 if (s == NULL) {
430 if (!(self->buf_size))
431 return 0;
432 data = PyBytes_FromStringAndSize(self->write_buf, self->buf_size);
433 if (data == NULL)
434 return -1;
435 }
436 else {
437 if (self->buf_size && (n + self->buf_size) > WRITE_BUF_SIZE) {
438 if (pickler_write(self, NULL, 0) < 0)
439 return -1;
440 }
441
442 if (n > WRITE_BUF_SIZE) {
443 if (!(data = PyBytes_FromStringAndSize(s, n)))
444 return -1;
445 }
446 else {
447 memcpy(self->write_buf + self->buf_size, s, n);
448 self->buf_size += n;
449 return n;
450 }
451 }
452
453 /* object with write method */
454 result = pickler_call(self, self->write, data);
455 if (result == NULL)
456 return -1;
457
458 Py_DECREF(result);
459 self->buf_size = 0;
460 return n;
461}
462
463/* XXX: These read/readline functions ought to be optimized. Buffered I/O
464 might help a lot, especially with the new (but much slower) io library.
465 On the other hand, the added complexity might not worth it.
466 */
467
468/* Read at least n characters from the input stream and set s to the current
469 reading position. */
470static Py_ssize_t
471unpickler_read(UnpicklerObject *self, char **s, Py_ssize_t n)
472{
473 PyObject *len;
474 PyObject *data;
475
476 len = PyLong_FromSsize_t(n);
477 if (len == NULL)
478 return -1;
479
480 data = unpickler_call(self, self->read, len);
481 if (data == NULL)
482 return -1;
483
484 /* XXX: Should bytearray be supported too? */
485 if (!PyBytes_Check(data)) {
486 PyErr_SetString(PyExc_ValueError,
487 "read() from the underlying stream did not"
488 "return bytes");
Amaury Forgeot d'Arc5f952572008-11-25 21:11:54 +0000489 Py_DECREF(data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000490 return -1;
491 }
492
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000493 if (PyBytes_GET_SIZE(data) != n) {
494 PyErr_SetNone(PyExc_EOFError);
Amaury Forgeot d'Arc5f952572008-11-25 21:11:54 +0000495 Py_DECREF(data);
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000496 return -1;
497 }
498
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000499 Py_XDECREF(self->last_string);
500 self->last_string = data;
501
502 if (!(*s = PyBytes_AS_STRING(data)))
503 return -1;
504
505 return n;
506}
507
508static Py_ssize_t
509unpickler_readline(UnpicklerObject *self, char **s)
510{
511 PyObject *data;
512
513 data = PyObject_CallObject(self->readline, empty_tuple);
514 if (data == NULL)
515 return -1;
516
517 /* XXX: Should bytearray be supported too? */
518 if (!PyBytes_Check(data)) {
519 PyErr_SetString(PyExc_ValueError,
520 "readline() from the underlying stream did not"
521 "return bytes");
522 return -1;
523 }
524
525 Py_XDECREF(self->last_string);
526 self->last_string = data;
527
528 if (!(*s = PyBytes_AS_STRING(data)))
529 return -1;
530
531 return PyBytes_GET_SIZE(data);
532}
533
534/* Generate a GET opcode for an object stored in the memo. The 'key' argument
535 should be the address of the object as returned by PyLong_FromVoidPtr(). */
536static int
537memo_get(PicklerObject *self, PyObject *key)
538{
539 PyObject *value;
540 PyObject *memo_id;
541 long x;
542 char pdata[30];
543 int len;
544
545 value = PyDict_GetItemWithError(self->memo, key);
546 if (value == NULL) {
547 if (!PyErr_Occurred())
548 PyErr_SetObject(PyExc_KeyError, key);
549 return -1;
550 }
551
552 memo_id = PyTuple_GetItem(value, 0);
553 if (memo_id == NULL)
554 return -1;
555
556 if (!PyLong_Check(memo_id)) {
557 PyErr_SetString(PicklingError, "memo id must be an integer");
558 return -1;
559 }
560 x = PyLong_AsLong(memo_id);
561 if (x == -1 && PyErr_Occurred())
562 return -1;
563
564 if (!self->bin) {
565 pdata[0] = GET;
566 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
567 len = (int)strlen(pdata);
568 }
569 else {
570 if (x < 256) {
571 pdata[0] = BINGET;
572 pdata[1] = (unsigned char)(x & 0xff);
573 len = 2;
574 }
575 else if (x <= 0xffffffffL) {
576 pdata[0] = LONG_BINGET;
577 pdata[1] = (unsigned char)(x & 0xff);
578 pdata[2] = (unsigned char)((x >> 8) & 0xff);
579 pdata[3] = (unsigned char)((x >> 16) & 0xff);
580 pdata[4] = (unsigned char)((x >> 24) & 0xff);
581 len = 5;
582 }
583 else { /* unlikely */
584 PyErr_SetString(PicklingError,
585 "memo id too large for LONG_BINGET");
586 return -1;
587 }
588 }
589
590 if (pickler_write(self, pdata, len) < 0)
591 return -1;
592
593 return 0;
594}
595
596/* Store an object in the memo, assign it a new unique ID based on the number
597 of objects currently stored in the memo and generate a PUT opcode. */
598static int
599memo_put(PicklerObject *self, PyObject *obj)
600{
601 PyObject *key = NULL;
602 PyObject *memo_id = NULL;
603 PyObject *tuple = NULL;
604 long x;
605 char pdata[30];
606 int len;
607 int status = 0;
608
609 if (self->fast)
610 return 0;
611
612 key = PyLong_FromVoidPtr(obj);
613 if (key == NULL)
614 goto error;
615 if ((x = PyDict_Size(self->memo)) < 0)
616 goto error;
617 memo_id = PyLong_FromLong(x);
618 if (memo_id == NULL)
619 goto error;
620 tuple = PyTuple_New(2);
621 if (tuple == NULL)
622 goto error;
623
624 Py_INCREF(memo_id);
625 PyTuple_SET_ITEM(tuple, 0, memo_id);
626 Py_INCREF(obj);
627 PyTuple_SET_ITEM(tuple, 1, obj);
628 if (PyDict_SetItem(self->memo, key, tuple) < 0)
629 goto error;
630
631 if (!self->bin) {
632 pdata[0] = PUT;
633 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
634 len = strlen(pdata);
635 }
636 else {
637 if (x < 256) {
638 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +0000639 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000640 len = 2;
641 }
642 else if (x <= 0xffffffffL) {
643 pdata[0] = LONG_BINPUT;
644 pdata[1] = (unsigned char)(x & 0xff);
645 pdata[2] = (unsigned char)((x >> 8) & 0xff);
646 pdata[3] = (unsigned char)((x >> 16) & 0xff);
647 pdata[4] = (unsigned char)((x >> 24) & 0xff);
648 len = 5;
649 }
650 else { /* unlikely */
651 PyErr_SetString(PicklingError,
652 "memo id too large for LONG_BINPUT");
653 return -1;
654 }
655 }
656
657 if (pickler_write(self, pdata, len) < 0)
658 goto error;
659
660 if (0) {
661 error:
662 status = -1;
663 }
664
665 Py_XDECREF(key);
666 Py_XDECREF(memo_id);
667 Py_XDECREF(tuple);
668
669 return status;
670}
671
672static PyObject *
673whichmodule(PyObject *global, PyObject *global_name)
674{
675 Py_ssize_t i, j;
676 static PyObject *module_str = NULL;
677 static PyObject *main_str = NULL;
678 PyObject *module_name;
679 PyObject *modules_dict;
680 PyObject *module;
681 PyObject *obj;
682
683 if (module_str == NULL) {
684 module_str = PyUnicode_InternFromString("__module__");
685 if (module_str == NULL)
686 return NULL;
687 main_str = PyUnicode_InternFromString("__main__");
688 if (main_str == NULL)
689 return NULL;
690 }
691
692 module_name = PyObject_GetAttr(global, module_str);
693
694 /* In some rare cases (e.g., random.getrandbits), __module__ can be
695 None. If it is so, then search sys.modules for the module of
696 global. */
697 if (module_name == Py_None) {
698 Py_DECREF(module_name);
699 goto search;
700 }
701
702 if (module_name) {
703 return module_name;
704 }
705 if (PyErr_ExceptionMatches(PyExc_AttributeError))
706 PyErr_Clear();
707 else
708 return NULL;
709
710 search:
711 modules_dict = PySys_GetObject("modules");
712 if (modules_dict == NULL)
713 return NULL;
714
715 i = 0;
716 module_name = NULL;
717 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
718 if (PyObject_Compare(module_name, main_str) == 0)
719 continue;
720
721 obj = PyObject_GetAttr(module, global_name);
722 if (obj == NULL) {
723 if (PyErr_ExceptionMatches(PyExc_AttributeError))
724 PyErr_Clear();
725 else
726 return NULL;
727 continue;
728 }
729
730 if (obj != global) {
731 Py_DECREF(obj);
732 continue;
733 }
734
735 Py_DECREF(obj);
736 break;
737 }
738
739 /* If no module is found, use __main__. */
740 if (!j) {
741 module_name = main_str;
742 }
743
744 Py_INCREF(module_name);
745 return module_name;
746}
747
748/* fast_save_enter() and fast_save_leave() are guards against recursive
749 objects when Pickler is used with the "fast mode" (i.e., with object
750 memoization disabled). If the nesting of a list or dict object exceed
751 FAST_NESTING_LIMIT, these guards will start keeping an internal
752 reference to the seen list or dict objects and check whether these objects
753 are recursive. These are not strictly necessary, since save() has a
754 hard-coded recursion limit, but they give a nicer error message than the
755 typical RuntimeError. */
756static int
757fast_save_enter(PicklerObject *self, PyObject *obj)
758{
759 /* if fast_nesting < 0, we're doing an error exit. */
760 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
761 PyObject *key = NULL;
762 if (self->fast_memo == NULL) {
763 self->fast_memo = PyDict_New();
764 if (self->fast_memo == NULL) {
765 self->fast_nesting = -1;
766 return 0;
767 }
768 }
769 key = PyLong_FromVoidPtr(obj);
770 if (key == NULL)
771 return 0;
772 if (PyDict_GetItem(self->fast_memo, key)) {
773 Py_DECREF(key);
774 PyErr_Format(PyExc_ValueError,
775 "fast mode: can't pickle cyclic objects "
776 "including object type %.200s at %p",
777 obj->ob_type->tp_name, obj);
778 self->fast_nesting = -1;
779 return 0;
780 }
781 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
782 Py_DECREF(key);
783 self->fast_nesting = -1;
784 return 0;
785 }
786 Py_DECREF(key);
787 }
788 return 1;
789}
790
791static int
792fast_save_leave(PicklerObject *self, PyObject *obj)
793{
794 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
795 PyObject *key = PyLong_FromVoidPtr(obj);
796 if (key == NULL)
797 return 0;
798 if (PyDict_DelItem(self->fast_memo, key) < 0) {
799 Py_DECREF(key);
800 return 0;
801 }
802 Py_DECREF(key);
803 }
804 return 1;
805}
806
807static int
808save_none(PicklerObject *self, PyObject *obj)
809{
810 const char none_op = NONE;
811 if (pickler_write(self, &none_op, 1) < 0)
812 return -1;
813
814 return 0;
815}
816
817static int
818save_bool(PicklerObject *self, PyObject *obj)
819{
820 static const char *buf[2] = { FALSE, TRUE };
821 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
822 int p = (obj == Py_True);
823
824 if (self->proto >= 2) {
825 const char bool_op = p ? NEWTRUE : NEWFALSE;
826 if (pickler_write(self, &bool_op, 1) < 0)
827 return -1;
828 }
829 else if (pickler_write(self, buf[p], len[p]) < 0)
830 return -1;
831
832 return 0;
833}
834
835static int
836save_int(PicklerObject *self, long x)
837{
838 char pdata[32];
839 int len = 0;
840
841 if (!self->bin
842#if SIZEOF_LONG > 4
843 || x > 0x7fffffffL || x < -0x80000000L
844#endif
845 ) {
846 /* Text-mode pickle, or long too big to fit in the 4-byte
847 * signed BININT format: store as a string.
848 */
849 pdata[0] = LONG; /* use LONG for consistence with pickle.py */
850 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
851 if (pickler_write(self, pdata, strlen(pdata)) < 0)
852 return -1;
853 }
854 else {
855 /* Binary pickle and x fits in a signed 4-byte int. */
856 pdata[1] = (unsigned char)(x & 0xff);
857 pdata[2] = (unsigned char)((x >> 8) & 0xff);
858 pdata[3] = (unsigned char)((x >> 16) & 0xff);
859 pdata[4] = (unsigned char)((x >> 24) & 0xff);
860
861 if ((pdata[4] == 0) && (pdata[3] == 0)) {
862 if (pdata[2] == 0) {
863 pdata[0] = BININT1;
864 len = 2;
865 }
866 else {
867 pdata[0] = BININT2;
868 len = 3;
869 }
870 }
871 else {
872 pdata[0] = BININT;
873 len = 5;
874 }
875
876 if (pickler_write(self, pdata, len) < 0)
877 return -1;
878 }
879
880 return 0;
881}
882
883static int
884save_long(PicklerObject *self, PyObject *obj)
885{
886 PyObject *repr = NULL;
887 Py_ssize_t size;
888 long val = PyLong_AsLong(obj);
889 int status = 0;
890
891 const char long_op = LONG;
892
893 if (val == -1 && PyErr_Occurred()) {
894 /* out of range for int pickling */
895 PyErr_Clear();
896 }
897 else
898 return save_int(self, val);
899
900 if (self->proto >= 2) {
901 /* Linear-time pickling. */
902 size_t nbits;
903 size_t nbytes;
904 unsigned char *pdata;
905 char header[5];
906 int i;
907 int sign = _PyLong_Sign(obj);
908
909 if (sign == 0) {
910 header[0] = LONG1;
911 header[1] = 0; /* It's 0 -- an empty bytestring. */
912 if (pickler_write(self, header, 2) < 0)
913 goto error;
914 return 0;
915 }
916 nbits = _PyLong_NumBits(obj);
917 if (nbits == (size_t)-1 && PyErr_Occurred())
918 goto error;
919 /* How many bytes do we need? There are nbits >> 3 full
920 * bytes of data, and nbits & 7 leftover bits. If there
921 * are any leftover bits, then we clearly need another
922 * byte. Wnat's not so obvious is that we *probably*
923 * need another byte even if there aren't any leftovers:
924 * the most-significant bit of the most-significant byte
925 * acts like a sign bit, and it's usually got a sense
926 * opposite of the one we need. The exception is longs
927 * of the form -(2**(8*j-1)) for j > 0. Such a long is
928 * its own 256's-complement, so has the right sign bit
929 * even without the extra byte. That's a pain to check
930 * for in advance, though, so we always grab an extra
931 * byte at the start, and cut it back later if possible.
932 */
933 nbytes = (nbits >> 3) + 1;
934 if (nbytes > INT_MAX) {
935 PyErr_SetString(PyExc_OverflowError,
936 "long too large to pickle");
937 goto error;
938 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +0000939 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000940 if (repr == NULL)
941 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +0000942 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000943 i = _PyLong_AsByteArray((PyLongObject *)obj,
944 pdata, nbytes,
945 1 /* little endian */ , 1 /* signed */ );
946 if (i < 0)
947 goto error;
948 /* If the long is negative, this may be a byte more than
949 * needed. This is so iff the MSB is all redundant sign
950 * bits.
951 */
952 if (sign < 0 &&
953 nbytes > 1 &&
954 pdata[nbytes - 1] == 0xff &&
955 (pdata[nbytes - 2] & 0x80) != 0) {
956 nbytes--;
957 }
958
959 if (nbytes < 256) {
960 header[0] = LONG1;
961 header[1] = (unsigned char)nbytes;
962 size = 2;
963 }
964 else {
965 header[0] = LONG4;
966 size = (int)nbytes;
967 for (i = 1; i < 5; i++) {
968 header[i] = (unsigned char)(size & 0xff);
969 size >>= 8;
970 }
971 size = 5;
972 }
973 if (pickler_write(self, header, size) < 0 ||
974 pickler_write(self, (char *)pdata, (int)nbytes) < 0)
975 goto error;
976 }
977 else {
978 char *string;
979
980 /* proto < 2: write the repr and newline. This is quadratic-time
981 (in the number of digits), in both directions. */
982
983 repr = PyObject_Repr(obj);
984 if (repr == NULL)
985 goto error;
986
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +0000987 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000988 if (string == NULL)
989 goto error;
990
991 if (pickler_write(self, &long_op, 1) < 0 ||
992 pickler_write(self, string, size) < 0 ||
993 pickler_write(self, "\n", 1) < 0)
994 goto error;
995 }
996
997 if (0) {
998 error:
999 status = -1;
1000 }
1001 Py_XDECREF(repr);
1002
1003 return status;
1004}
1005
1006static int
1007save_float(PicklerObject *self, PyObject *obj)
1008{
1009 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1010
1011 if (self->bin) {
1012 char pdata[9];
1013 pdata[0] = BINFLOAT;
1014 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1015 return -1;
1016 if (pickler_write(self, pdata, 9) < 0)
1017 return -1;
1018 }
1019 else {
1020 char pdata[250];
1021 pdata[0] = FLOAT;
1022 PyOS_ascii_formatd(pdata + 1, sizeof(pdata) - 2, "%.17g", x);
1023 /* Extend the formatted string with a newline character */
1024 strcat(pdata, "\n");
1025
1026 if (pickler_write(self, pdata, strlen(pdata)) < 0)
1027 return -1;
1028 }
1029
1030 return 0;
1031}
1032
1033static int
1034save_bytes(PicklerObject *self, PyObject *obj)
1035{
1036 if (self->proto < 3) {
1037 /* Older pickle protocols do not have an opcode for pickling bytes
1038 objects. Therefore, we need to fake the copy protocol (i.e.,
1039 the __reduce__ method) to permit bytes object unpickling. */
1040 PyObject *reduce_value = NULL;
1041 PyObject *bytelist = NULL;
1042 int status;
1043
1044 bytelist = PySequence_List(obj);
1045 if (bytelist == NULL)
1046 return -1;
1047
1048 reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
1049 bytelist);
1050 if (reduce_value == NULL) {
1051 Py_DECREF(bytelist);
1052 return -1;
1053 }
1054
1055 /* save_reduce() will memoize the object automatically. */
1056 status = save_reduce(self, reduce_value, obj);
1057 Py_DECREF(reduce_value);
1058 Py_DECREF(bytelist);
1059 return status;
1060 }
1061 else {
1062 Py_ssize_t size;
1063 char header[5];
1064 int len;
1065
1066 size = PyBytes_Size(obj);
1067 if (size < 0)
1068 return -1;
1069
1070 if (size < 256) {
1071 header[0] = SHORT_BINBYTES;
1072 header[1] = (unsigned char)size;
1073 len = 2;
1074 }
1075 else if (size <= 0xffffffffL) {
1076 header[0] = BINBYTES;
1077 header[1] = (unsigned char)(size & 0xff);
1078 header[2] = (unsigned char)((size >> 8) & 0xff);
1079 header[3] = (unsigned char)((size >> 16) & 0xff);
1080 header[4] = (unsigned char)((size >> 24) & 0xff);
1081 len = 5;
1082 }
1083 else {
1084 return -1; /* string too large */
1085 }
1086
1087 if (pickler_write(self, header, len) < 0)
1088 return -1;
1089
1090 if (pickler_write(self, PyBytes_AS_STRING(obj), size) < 0)
1091 return -1;
1092
1093 if (memo_put(self, obj) < 0)
1094 return -1;
1095
1096 return 0;
1097 }
1098}
1099
1100/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1101 backslash and newline characters to \uXXXX escapes. */
1102static PyObject *
1103raw_unicode_escape(const Py_UNICODE *s, Py_ssize_t size)
1104{
1105 PyObject *repr, *result;
1106 char *p;
1107 char *q;
1108
1109 static const char *hexdigits = "0123456789abcdef";
1110
1111#ifdef Py_UNICODE_WIDE
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001112 const Py_ssize_t expandsize = 10;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001113#else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001114 const Py_ssize_t expandsize = 6;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001115#endif
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001116
1117 if (size > PY_SSIZE_T_MAX / expandsize)
1118 return PyErr_NoMemory();
1119
1120 repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001121 if (repr == NULL)
1122 return NULL;
1123 if (size == 0)
1124 goto done;
1125
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001126 p = q = PyByteArray_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001127 while (size-- > 0) {
1128 Py_UNICODE ch = *s++;
1129#ifdef Py_UNICODE_WIDE
1130 /* Map 32-bit characters to '\Uxxxxxxxx' */
1131 if (ch >= 0x10000) {
1132 *p++ = '\\';
1133 *p++ = 'U';
1134 *p++ = hexdigits[(ch >> 28) & 0xf];
1135 *p++ = hexdigits[(ch >> 24) & 0xf];
1136 *p++ = hexdigits[(ch >> 20) & 0xf];
1137 *p++ = hexdigits[(ch >> 16) & 0xf];
1138 *p++ = hexdigits[(ch >> 12) & 0xf];
1139 *p++ = hexdigits[(ch >> 8) & 0xf];
1140 *p++ = hexdigits[(ch >> 4) & 0xf];
1141 *p++ = hexdigits[ch & 15];
1142 }
1143 else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001144#else
1145 /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
1146 if (ch >= 0xD800 && ch < 0xDC00) {
1147 Py_UNICODE ch2;
1148 Py_UCS4 ucs;
1149
1150 ch2 = *s++;
1151 size--;
1152 if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
1153 ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
1154 *p++ = '\\';
1155 *p++ = 'U';
1156 *p++ = hexdigits[(ucs >> 28) & 0xf];
1157 *p++ = hexdigits[(ucs >> 24) & 0xf];
1158 *p++ = hexdigits[(ucs >> 20) & 0xf];
1159 *p++ = hexdigits[(ucs >> 16) & 0xf];
1160 *p++ = hexdigits[(ucs >> 12) & 0xf];
1161 *p++ = hexdigits[(ucs >> 8) & 0xf];
1162 *p++ = hexdigits[(ucs >> 4) & 0xf];
1163 *p++ = hexdigits[ucs & 0xf];
1164 continue;
1165 }
1166 /* Fall through: isolated surrogates are copied as-is */
1167 s--;
1168 size++;
1169 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001170#endif
1171 /* Map 16-bit characters to '\uxxxx' */
1172 if (ch >= 256 || ch == '\\' || ch == '\n') {
1173 *p++ = '\\';
1174 *p++ = 'u';
1175 *p++ = hexdigits[(ch >> 12) & 0xf];
1176 *p++ = hexdigits[(ch >> 8) & 0xf];
1177 *p++ = hexdigits[(ch >> 4) & 0xf];
1178 *p++ = hexdigits[ch & 15];
1179 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001180 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001181 else
1182 *p++ = (char) ch;
1183 }
1184 size = p - q;
1185
1186 done:
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001187 result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001188 Py_DECREF(repr);
1189 return result;
1190}
1191
1192static int
1193save_unicode(PicklerObject *self, PyObject *obj)
1194{
1195 Py_ssize_t size;
1196 PyObject *encoded = NULL;
1197
1198 if (self->bin) {
1199 char pdata[5];
1200
1201 encoded = PyUnicode_AsUTF8String(obj);
1202 if (encoded == NULL)
1203 goto error;
1204
1205 size = PyBytes_GET_SIZE(encoded);
1206 if (size < 0 || size > 0xffffffffL)
1207 goto error; /* string too large */
1208
1209 pdata[0] = BINUNICODE;
1210 pdata[1] = (unsigned char)(size & 0xff);
1211 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1212 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1213 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1214
1215 if (pickler_write(self, pdata, 5) < 0)
1216 goto error;
1217
1218 if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
1219 goto error;
1220 }
1221 else {
1222 const char unicode_op = UNICODE;
1223
1224 encoded = raw_unicode_escape(PyUnicode_AS_UNICODE(obj),
1225 PyUnicode_GET_SIZE(obj));
1226 if (encoded == NULL)
1227 goto error;
1228
1229 if (pickler_write(self, &unicode_op, 1) < 0)
1230 goto error;
1231
1232 size = PyBytes_GET_SIZE(encoded);
1233 if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
1234 goto error;
1235
1236 if (pickler_write(self, "\n", 1) < 0)
1237 goto error;
1238 }
1239 if (memo_put(self, obj) < 0)
1240 goto error;
1241
1242 Py_DECREF(encoded);
1243 return 0;
1244
1245 error:
1246 Py_XDECREF(encoded);
1247 return -1;
1248}
1249
1250/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1251static int
1252store_tuple_elements(PicklerObject *self, PyObject *t, int len)
1253{
1254 int i;
1255
1256 assert(PyTuple_Size(t) == len);
1257
1258 for (i = 0; i < len; i++) {
1259 PyObject *element = PyTuple_GET_ITEM(t, i);
1260
1261 if (element == NULL)
1262 return -1;
1263 if (save(self, element, 0) < 0)
1264 return -1;
1265 }
1266
1267 return 0;
1268}
1269
1270/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1271 * used across protocols to minimize the space needed to pickle them.
1272 * Tuples are also the only builtin immutable type that can be recursive
1273 * (a tuple can be reached from itself), and that requires some subtle
1274 * magic so that it works in all cases. IOW, this is a long routine.
1275 */
1276static int
1277save_tuple(PicklerObject *self, PyObject *obj)
1278{
1279 PyObject *memo_key = NULL;
1280 int len, i;
1281 int status = 0;
1282
1283 const char mark_op = MARK;
1284 const char tuple_op = TUPLE;
1285 const char pop_op = POP;
1286 const char pop_mark_op = POP_MARK;
1287 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
1288
1289 if ((len = PyTuple_Size(obj)) < 0)
1290 return -1;
1291
1292 if (len == 0) {
1293 char pdata[2];
1294
1295 if (self->proto) {
1296 pdata[0] = EMPTY_TUPLE;
1297 len = 1;
1298 }
1299 else {
1300 pdata[0] = MARK;
1301 pdata[1] = TUPLE;
1302 len = 2;
1303 }
1304 if (pickler_write(self, pdata, len) < 0)
1305 return -1;
1306 return 0;
1307 }
1308
1309 /* id(tuple) isn't in the memo now. If it shows up there after
1310 * saving the tuple elements, the tuple must be recursive, in
1311 * which case we'll pop everything we put on the stack, and fetch
1312 * its value from the memo.
1313 */
1314 memo_key = PyLong_FromVoidPtr(obj);
1315 if (memo_key == NULL)
1316 return -1;
1317
1318 if (len <= 3 && self->proto >= 2) {
1319 /* Use TUPLE{1,2,3} opcodes. */
1320 if (store_tuple_elements(self, obj, len) < 0)
1321 goto error;
1322
1323 if (PyDict_GetItem(self->memo, memo_key)) {
1324 /* pop the len elements */
1325 for (i = 0; i < len; i++)
1326 if (pickler_write(self, &pop_op, 1) < 0)
1327 goto error;
1328 /* fetch from memo */
1329 if (memo_get(self, memo_key) < 0)
1330 goto error;
1331
1332 Py_DECREF(memo_key);
1333 return 0;
1334 }
1335 else { /* Not recursive. */
1336 if (pickler_write(self, len2opcode + len, 1) < 0)
1337 goto error;
1338 }
1339 goto memoize;
1340 }
1341
1342 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
1343 * Generate MARK e1 e2 ... TUPLE
1344 */
1345 if (pickler_write(self, &mark_op, 1) < 0)
1346 goto error;
1347
1348 if (store_tuple_elements(self, obj, len) < 0)
1349 goto error;
1350
1351 if (PyDict_GetItem(self->memo, memo_key)) {
1352 /* pop the stack stuff we pushed */
1353 if (self->bin) {
1354 if (pickler_write(self, &pop_mark_op, 1) < 0)
1355 goto error;
1356 }
1357 else {
1358 /* Note that we pop one more than len, to remove
1359 * the MARK too.
1360 */
1361 for (i = 0; i <= len; i++)
1362 if (pickler_write(self, &pop_op, 1) < 0)
1363 goto error;
1364 }
1365 /* fetch from memo */
1366 if (memo_get(self, memo_key) < 0)
1367 goto error;
1368
1369 Py_DECREF(memo_key);
1370 return 0;
1371 }
1372 else { /* Not recursive. */
1373 if (pickler_write(self, &tuple_op, 1) < 0)
1374 goto error;
1375 }
1376
1377 memoize:
1378 if (memo_put(self, obj) < 0)
1379 goto error;
1380
1381 if (0) {
1382 error:
1383 status = -1;
1384 }
1385
1386 Py_DECREF(memo_key);
1387 return status;
1388}
1389
1390/* iter is an iterator giving items, and we batch up chunks of
1391 * MARK item item ... item APPENDS
1392 * opcode sequences. Calling code should have arranged to first create an
1393 * empty list, or list-like object, for the APPENDS to operate on.
1394 * Returns 0 on success, <0 on error.
1395 */
1396static int
1397batch_list(PicklerObject *self, PyObject *iter)
1398{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001399 PyObject *obj = NULL;
1400 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001401 int i, n;
1402
1403 const char mark_op = MARK;
1404 const char append_op = APPEND;
1405 const char appends_op = APPENDS;
1406
1407 assert(iter != NULL);
1408
1409 /* XXX: I think this function could be made faster by avoiding the
1410 iterator interface and fetching objects directly from list using
1411 PyList_GET_ITEM.
1412 */
1413
1414 if (self->proto == 0) {
1415 /* APPENDS isn't available; do one at a time. */
1416 for (;;) {
1417 obj = PyIter_Next(iter);
1418 if (obj == NULL) {
1419 if (PyErr_Occurred())
1420 return -1;
1421 break;
1422 }
1423 i = save(self, obj, 0);
1424 Py_DECREF(obj);
1425 if (i < 0)
1426 return -1;
1427 if (pickler_write(self, &append_op, 1) < 0)
1428 return -1;
1429 }
1430 return 0;
1431 }
1432
1433 /* proto > 0: write in batches of BATCHSIZE. */
1434 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001435 /* Get first item */
1436 firstitem = PyIter_Next(iter);
1437 if (firstitem == NULL) {
1438 if (PyErr_Occurred())
1439 goto error;
1440
1441 /* nothing more to add */
1442 break;
1443 }
1444
1445 /* Try to get a second item */
1446 obj = PyIter_Next(iter);
1447 if (obj == NULL) {
1448 if (PyErr_Occurred())
1449 goto error;
1450
1451 /* Only one item to write */
1452 if (save(self, firstitem, 0) < 0)
1453 goto error;
1454 if (pickler_write(self, &append_op, 1) < 0)
1455 goto error;
1456 Py_CLEAR(firstitem);
1457 break;
1458 }
1459
1460 /* More than one item to write */
1461
1462 /* Pump out MARK, items, APPENDS. */
1463 if (pickler_write(self, &mark_op, 1) < 0)
1464 goto error;
1465
1466 if (save(self, firstitem, 0) < 0)
1467 goto error;
1468 Py_CLEAR(firstitem);
1469 n = 1;
1470
1471 /* Fetch and save up to BATCHSIZE items */
1472 while (obj) {
1473 if (save(self, obj, 0) < 0)
1474 goto error;
1475 Py_CLEAR(obj);
1476 n += 1;
1477
1478 if (n == BATCHSIZE)
1479 break;
1480
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001481 obj = PyIter_Next(iter);
1482 if (obj == NULL) {
1483 if (PyErr_Occurred())
1484 goto error;
1485 break;
1486 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001487 }
1488
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001489 if (pickler_write(self, &appends_op, 1) < 0)
1490 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001491
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001492 } while (n == BATCHSIZE);
1493 return 0;
1494
1495 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001496 Py_XDECREF(firstitem);
1497 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001498 return -1;
1499}
1500
1501static int
1502save_list(PicklerObject *self, PyObject *obj)
1503{
1504 PyObject *iter;
1505 char header[3];
1506 int len;
1507 int status = 0;
1508
1509 if (self->fast && !fast_save_enter(self, obj))
1510 goto error;
1511
1512 /* Create an empty list. */
1513 if (self->bin) {
1514 header[0] = EMPTY_LIST;
1515 len = 1;
1516 }
1517 else {
1518 header[0] = MARK;
1519 header[1] = LIST;
1520 len = 2;
1521 }
1522
1523 if (pickler_write(self, header, len) < 0)
1524 goto error;
1525
1526 /* Get list length, and bow out early if empty. */
1527 if ((len = PyList_Size(obj)) < 0)
1528 goto error;
1529
1530 if (memo_put(self, obj) < 0)
1531 goto error;
1532
1533 if (len != 0) {
1534 /* Save the list elements. */
1535 iter = PyObject_GetIter(obj);
1536 if (iter == NULL)
1537 goto error;
1538 status = batch_list(self, iter);
1539 Py_DECREF(iter);
1540 }
1541
1542 if (0) {
1543 error:
1544 status = -1;
1545 }
1546
1547 if (self->fast && !fast_save_leave(self, obj))
1548 status = -1;
1549
1550 return status;
1551}
1552
1553/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
1554 * MARK key value ... key value SETITEMS
1555 * opcode sequences. Calling code should have arranged to first create an
1556 * empty dict, or dict-like object, for the SETITEMS to operate on.
1557 * Returns 0 on success, <0 on error.
1558 *
1559 * This is very much like batch_list(). The difference between saving
1560 * elements directly, and picking apart two-tuples, is so long-winded at
1561 * the C level, though, that attempts to combine these routines were too
1562 * ugly to bear.
1563 */
1564static int
1565batch_dict(PicklerObject *self, PyObject *iter)
1566{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001567 PyObject *obj = NULL;
1568 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001569 int i, n;
1570
1571 const char mark_op = MARK;
1572 const char setitem_op = SETITEM;
1573 const char setitems_op = SETITEMS;
1574
1575 assert(iter != NULL);
1576
1577 if (self->proto == 0) {
1578 /* SETITEMS isn't available; do one at a time. */
1579 for (;;) {
1580 obj = PyIter_Next(iter);
1581 if (obj == NULL) {
1582 if (PyErr_Occurred())
1583 return -1;
1584 break;
1585 }
1586 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
1587 PyErr_SetString(PyExc_TypeError, "dict items "
1588 "iterator must return 2-tuples");
1589 return -1;
1590 }
1591 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
1592 if (i >= 0)
1593 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
1594 Py_DECREF(obj);
1595 if (i < 0)
1596 return -1;
1597 if (pickler_write(self, &setitem_op, 1) < 0)
1598 return -1;
1599 }
1600 return 0;
1601 }
1602
1603 /* proto > 0: write in batches of BATCHSIZE. */
1604 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001605 /* Get first item */
1606 firstitem = PyIter_Next(iter);
1607 if (firstitem == NULL) {
1608 if (PyErr_Occurred())
1609 goto error;
1610
1611 /* nothing more to add */
1612 break;
1613 }
1614 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
1615 PyErr_SetString(PyExc_TypeError, "dict items "
1616 "iterator must return 2-tuples");
1617 goto error;
1618 }
1619
1620 /* Try to get a second item */
1621 obj = PyIter_Next(iter);
1622 if (obj == NULL) {
1623 if (PyErr_Occurred())
1624 goto error;
1625
1626 /* Only one item to write */
1627 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
1628 goto error;
1629 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
1630 goto error;
1631 if (pickler_write(self, &setitem_op, 1) < 0)
1632 goto error;
1633 Py_CLEAR(firstitem);
1634 break;
1635 }
1636
1637 /* More than one item to write */
1638
1639 /* Pump out MARK, items, SETITEMS. */
1640 if (pickler_write(self, &mark_op, 1) < 0)
1641 goto error;
1642
1643 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
1644 goto error;
1645 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
1646 goto error;
1647 Py_CLEAR(firstitem);
1648 n = 1;
1649
1650 /* Fetch and save up to BATCHSIZE items */
1651 while (obj) {
1652 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
1653 PyErr_SetString(PyExc_TypeError, "dict items "
1654 "iterator must return 2-tuples");
1655 goto error;
1656 }
1657 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
1658 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
1659 goto error;
1660 Py_CLEAR(obj);
1661 n += 1;
1662
1663 if (n == BATCHSIZE)
1664 break;
1665
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001666 obj = PyIter_Next(iter);
1667 if (obj == NULL) {
1668 if (PyErr_Occurred())
1669 goto error;
1670 break;
1671 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001672 }
1673
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001674 if (pickler_write(self, &setitems_op, 1) < 0)
1675 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001676
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001677 } while (n == BATCHSIZE);
1678 return 0;
1679
1680 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001681 Py_XDECREF(firstitem);
1682 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001683 return -1;
1684}
1685
1686static int
1687save_dict(PicklerObject *self, PyObject *obj)
1688{
1689 PyObject *items, *iter;
1690 char header[3];
1691 int len;
1692 int status = 0;
1693
1694 if (self->fast && !fast_save_enter(self, obj))
1695 goto error;
1696
1697 /* Create an empty dict. */
1698 if (self->bin) {
1699 header[0] = EMPTY_DICT;
1700 len = 1;
1701 }
1702 else {
1703 header[0] = MARK;
1704 header[1] = DICT;
1705 len = 2;
1706 }
1707
1708 if (pickler_write(self, header, len) < 0)
1709 goto error;
1710
1711 /* Get dict size, and bow out early if empty. */
1712 if ((len = PyDict_Size(obj)) < 0)
1713 goto error;
1714
1715 if (memo_put(self, obj) < 0)
1716 goto error;
1717
1718 if (len != 0) {
1719 /* Save the dict items. */
1720 items = PyObject_CallMethod(obj, "items", "()");
1721 if (items == NULL)
1722 goto error;
1723 iter = PyObject_GetIter(items);
1724 Py_DECREF(items);
1725 if (iter == NULL)
1726 goto error;
1727 status = batch_dict(self, iter);
1728 Py_DECREF(iter);
1729 }
1730
1731 if (0) {
1732 error:
1733 status = -1;
1734 }
1735
1736 if (self->fast && !fast_save_leave(self, obj))
1737 status = -1;
1738
1739 return status;
1740}
1741
1742static int
1743save_global(PicklerObject *self, PyObject *obj, PyObject *name)
1744{
1745 static PyObject *name_str = NULL;
1746 PyObject *global_name = NULL;
1747 PyObject *module_name = NULL;
1748 PyObject *module = NULL;
1749 PyObject *cls;
1750 int status = 0;
1751
1752 const char global_op = GLOBAL;
1753
1754 if (name_str == NULL) {
1755 name_str = PyUnicode_InternFromString("__name__");
1756 if (name_str == NULL)
1757 goto error;
1758 }
1759
1760 if (name) {
1761 global_name = name;
1762 Py_INCREF(global_name);
1763 }
1764 else {
1765 global_name = PyObject_GetAttr(obj, name_str);
1766 if (global_name == NULL)
1767 goto error;
1768 }
1769
1770 module_name = whichmodule(obj, global_name);
1771 if (module_name == NULL)
1772 goto error;
1773
1774 /* XXX: Change to use the import C API directly with level=0 to disallow
1775 relative imports.
1776
1777 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
1778 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
1779 custom import functions (IMHO, this would be a nice security
1780 feature). The import C API would need to be extended to support the
1781 extra parameters of __import__ to fix that. */
1782 module = PyImport_Import(module_name);
1783 if (module == NULL) {
1784 PyErr_Format(PicklingError,
1785 "Can't pickle %R: import of module %R failed",
1786 obj, module_name);
1787 goto error;
1788 }
1789 cls = PyObject_GetAttr(module, global_name);
1790 if (cls == NULL) {
1791 PyErr_Format(PicklingError,
1792 "Can't pickle %R: attribute lookup %S.%S failed",
1793 obj, module_name, global_name);
1794 goto error;
1795 }
1796 if (cls != obj) {
1797 Py_DECREF(cls);
1798 PyErr_Format(PicklingError,
1799 "Can't pickle %R: it's not the same object as %S.%S",
1800 obj, module_name, global_name);
1801 goto error;
1802 }
1803 Py_DECREF(cls);
1804
1805 if (self->proto >= 2) {
1806 /* See whether this is in the extension registry, and if
1807 * so generate an EXT opcode.
1808 */
1809 PyObject *code_obj; /* extension code as Python object */
1810 long code; /* extension code as C value */
1811 char pdata[5];
1812 int n;
1813
1814 PyTuple_SET_ITEM(two_tuple, 0, module_name);
1815 PyTuple_SET_ITEM(two_tuple, 1, global_name);
1816 code_obj = PyDict_GetItem(extension_registry, two_tuple);
1817 /* The object is not registered in the extension registry.
1818 This is the most likely code path. */
1819 if (code_obj == NULL)
1820 goto gen_global;
1821
1822 /* XXX: pickle.py doesn't check neither the type, nor the range
1823 of the value returned by the extension_registry. It should for
1824 consistency. */
1825
1826 /* Verify code_obj has the right type and value. */
1827 if (!PyLong_Check(code_obj)) {
1828 PyErr_Format(PicklingError,
1829 "Can't pickle %R: extension code %R isn't an integer",
1830 obj, code_obj);
1831 goto error;
1832 }
1833 code = PyLong_AS_LONG(code_obj);
1834 if (code <= 0 || code > 0x7fffffffL) {
1835 PyErr_Format(PicklingError,
1836 "Can't pickle %R: extension code %ld is out of range",
1837 obj, code);
1838 goto error;
1839 }
1840
1841 /* Generate an EXT opcode. */
1842 if (code <= 0xff) {
1843 pdata[0] = EXT1;
1844 pdata[1] = (unsigned char)code;
1845 n = 2;
1846 }
1847 else if (code <= 0xffff) {
1848 pdata[0] = EXT2;
1849 pdata[1] = (unsigned char)(code & 0xff);
1850 pdata[2] = (unsigned char)((code >> 8) & 0xff);
1851 n = 3;
1852 }
1853 else {
1854 pdata[0] = EXT4;
1855 pdata[1] = (unsigned char)(code & 0xff);
1856 pdata[2] = (unsigned char)((code >> 8) & 0xff);
1857 pdata[3] = (unsigned char)((code >> 16) & 0xff);
1858 pdata[4] = (unsigned char)((code >> 24) & 0xff);
1859 n = 5;
1860 }
1861
1862 if (pickler_write(self, pdata, n) < 0)
1863 goto error;
1864 }
1865 else {
1866 /* Generate a normal global opcode if we are using a pickle
1867 protocol <= 2, or if the object is not registered in the
1868 extension registry. */
1869 PyObject *encoded;
1870 PyObject *(*unicode_encoder)(PyObject *);
1871
1872 gen_global:
1873 if (pickler_write(self, &global_op, 1) < 0)
1874 goto error;
1875
1876 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
1877 the module name and the global name using UTF-8. We do so only when
1878 we are using the pickle protocol newer than version 3. This is to
1879 ensure compatibility with older Unpickler running on Python 2.x. */
1880 if (self->proto >= 3) {
1881 unicode_encoder = PyUnicode_AsUTF8String;
1882 }
1883 else {
1884 unicode_encoder = PyUnicode_AsASCIIString;
1885 }
1886
1887 /* Save the name of the module. */
1888 encoded = unicode_encoder(module_name);
1889 if (encoded == NULL) {
1890 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
1891 PyErr_Format(PicklingError,
1892 "can't pickle module identifier '%S' using "
1893 "pickle protocol %i", module_name, self->proto);
1894 goto error;
1895 }
1896 if (pickler_write(self, PyBytes_AS_STRING(encoded),
1897 PyBytes_GET_SIZE(encoded)) < 0) {
1898 Py_DECREF(encoded);
1899 goto error;
1900 }
1901 Py_DECREF(encoded);
1902 if(pickler_write(self, "\n", 1) < 0)
1903 goto error;
1904
1905 /* Save the name of the module. */
1906 encoded = unicode_encoder(global_name);
1907 if (encoded == NULL) {
1908 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
1909 PyErr_Format(PicklingError,
1910 "can't pickle global identifier '%S' using "
1911 "pickle protocol %i", global_name, self->proto);
1912 goto error;
1913 }
1914 if (pickler_write(self, PyBytes_AS_STRING(encoded),
1915 PyBytes_GET_SIZE(encoded)) < 0) {
1916 Py_DECREF(encoded);
1917 goto error;
1918 }
1919 Py_DECREF(encoded);
1920 if(pickler_write(self, "\n", 1) < 0)
1921 goto error;
1922
1923 /* Memoize the object. */
1924 if (memo_put(self, obj) < 0)
1925 goto error;
1926 }
1927
1928 if (0) {
1929 error:
1930 status = -1;
1931 }
1932 Py_XDECREF(module_name);
1933 Py_XDECREF(global_name);
1934 Py_XDECREF(module);
1935
1936 return status;
1937}
1938
1939static int
1940save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
1941{
1942 PyObject *pid = NULL;
1943 int status = 0;
1944
1945 const char persid_op = PERSID;
1946 const char binpersid_op = BINPERSID;
1947
1948 Py_INCREF(obj);
1949 pid = pickler_call(self, func, obj);
1950 if (pid == NULL)
1951 return -1;
1952
1953 if (pid != Py_None) {
1954 if (self->bin) {
1955 if (save(self, pid, 1) < 0 ||
1956 pickler_write(self, &binpersid_op, 1) < 0)
1957 goto error;
1958 }
1959 else {
1960 PyObject *pid_str = NULL;
1961 char *pid_ascii_bytes;
1962 Py_ssize_t size;
1963
1964 pid_str = PyObject_Str(pid);
1965 if (pid_str == NULL)
1966 goto error;
1967
1968 /* XXX: Should it check whether the persistent id only contains
1969 ASCII characters? And what if the pid contains embedded
1970 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001971 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001972 Py_DECREF(pid_str);
1973 if (pid_ascii_bytes == NULL)
1974 goto error;
1975
1976 if (pickler_write(self, &persid_op, 1) < 0 ||
1977 pickler_write(self, pid_ascii_bytes, size) < 0 ||
1978 pickler_write(self, "\n", 1) < 0)
1979 goto error;
1980 }
1981 status = 1;
1982 }
1983
1984 if (0) {
1985 error:
1986 status = -1;
1987 }
1988 Py_XDECREF(pid);
1989
1990 return status;
1991}
1992
1993/* We're saving obj, and args is the 2-thru-5 tuple returned by the
1994 * appropriate __reduce__ method for obj.
1995 */
1996static int
1997save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
1998{
1999 PyObject *callable;
2000 PyObject *argtup;
2001 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002002 PyObject *listitems = Py_None;
2003 PyObject *dictitems = Py_None;
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002004 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002005
2006 int use_newobj = self->proto >= 2;
2007
2008 const char reduce_op = REDUCE;
2009 const char build_op = BUILD;
2010 const char newobj_op = NEWOBJ;
2011
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002012 size = PyTuple_Size(args);
2013 if (size < 2 || size > 5) {
2014 PyErr_SetString(PicklingError, "tuple returned by "
2015 "__reduce__ must contain 2 through 5 elements");
2016 return -1;
2017 }
2018
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002019 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
2020 &callable, &argtup, &state, &listitems, &dictitems))
2021 return -1;
2022
2023 if (!PyCallable_Check(callable)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002024 PyErr_SetString(PicklingError, "first item of the tuple "
2025 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002026 return -1;
2027 }
2028 if (!PyTuple_Check(argtup)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002029 PyErr_SetString(PicklingError, "second item of the tuple "
2030 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002031 return -1;
2032 }
2033
2034 if (state == Py_None)
2035 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002036
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002037 if (listitems == Py_None)
2038 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002039 else if (!PyIter_Check(listitems)) {
2040 PyErr_Format(PicklingError, "Fourth element of tuple"
2041 "returned by __reduce__ must be an iterator, not %s",
2042 Py_TYPE(listitems)->tp_name);
2043 return -1;
2044 }
2045
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002046 if (dictitems == Py_None)
2047 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002048 else if (!PyIter_Check(dictitems)) {
2049 PyErr_Format(PicklingError, "Fifth element of tuple"
2050 "returned by __reduce__ must be an iterator, not %s",
2051 Py_TYPE(dictitems)->tp_name);
2052 return -1;
2053 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002054
2055 /* Protocol 2 special case: if callable's name is __newobj__, use
2056 NEWOBJ. */
2057 if (use_newobj) {
2058 static PyObject *newobj_str = NULL;
2059 PyObject *name_str;
2060
2061 if (newobj_str == NULL) {
2062 newobj_str = PyUnicode_InternFromString("__newobj__");
2063 }
2064
2065 name_str = PyObject_GetAttrString(callable, "__name__");
2066 if (name_str == NULL) {
2067 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2068 PyErr_Clear();
2069 else
2070 return -1;
2071 use_newobj = 0;
2072 }
2073 else {
2074 use_newobj = PyUnicode_Check(name_str) &&
2075 PyUnicode_Compare(name_str, newobj_str) == 0;
2076 Py_DECREF(name_str);
2077 }
2078 }
2079 if (use_newobj) {
2080 PyObject *cls;
2081 PyObject *newargtup;
2082 PyObject *obj_class;
2083 int p;
2084
2085 /* Sanity checks. */
2086 if (Py_SIZE(argtup) < 1) {
2087 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
2088 return -1;
2089 }
2090
2091 cls = PyTuple_GET_ITEM(argtup, 0);
2092 if (!PyObject_HasAttrString(cls, "__new__")) {
2093 PyErr_SetString(PicklingError, "args[0] from "
2094 "__newobj__ args has no __new__");
2095 return -1;
2096 }
2097
2098 if (obj != NULL) {
2099 obj_class = PyObject_GetAttrString(obj, "__class__");
2100 if (obj_class == NULL) {
2101 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2102 PyErr_Clear();
2103 else
2104 return -1;
2105 }
2106 p = obj_class != cls; /* true iff a problem */
2107 Py_DECREF(obj_class);
2108 if (p) {
2109 PyErr_SetString(PicklingError, "args[0] from "
2110 "__newobj__ args has the wrong class");
2111 return -1;
2112 }
2113 }
2114 /* XXX: These calls save() are prone to infinite recursion. Imagine
2115 what happen if the value returned by the __reduce__() method of
2116 some extension type contains another object of the same type. Ouch!
2117
2118 Here is a quick example, that I ran into, to illustrate what I
2119 mean:
2120
2121 >>> import pickle, copyreg
2122 >>> copyreg.dispatch_table.pop(complex)
2123 >>> pickle.dumps(1+2j)
2124 Traceback (most recent call last):
2125 ...
2126 RuntimeError: maximum recursion depth exceeded
2127
2128 Removing the complex class from copyreg.dispatch_table made the
2129 __reduce_ex__() method emit another complex object:
2130
2131 >>> (1+1j).__reduce_ex__(2)
2132 (<function __newobj__ at 0xb7b71c3c>,
2133 (<class 'complex'>, (1+1j)), None, None, None)
2134
2135 Thus when save() was called on newargstup (the 2nd item) recursion
2136 ensued. Of course, the bug was in the complex class which had a
2137 broken __getnewargs__() that emitted another complex object. But,
2138 the point, here, is it is quite easy to end up with a broken reduce
2139 function. */
2140
2141 /* Save the class and its __new__ arguments. */
2142 if (save(self, cls, 0) < 0)
2143 return -1;
2144
2145 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
2146 if (newargtup == NULL)
2147 return -1;
2148
2149 p = save(self, newargtup, 0);
2150 Py_DECREF(newargtup);
2151 if (p < 0)
2152 return -1;
2153
2154 /* Add NEWOBJ opcode. */
2155 if (pickler_write(self, &newobj_op, 1) < 0)
2156 return -1;
2157 }
2158 else { /* Not using NEWOBJ. */
2159 if (save(self, callable, 0) < 0 ||
2160 save(self, argtup, 0) < 0 ||
2161 pickler_write(self, &reduce_op, 1) < 0)
2162 return -1;
2163 }
2164
2165 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
2166 the caller do not want to memoize the object. Not particularly useful,
2167 but that is to mimic the behavior save_reduce() in pickle.py when
2168 obj is None. */
2169 if (obj && memo_put(self, obj) < 0)
2170 return -1;
2171
2172 if (listitems && batch_list(self, listitems) < 0)
2173 return -1;
2174
2175 if (dictitems && batch_dict(self, dictitems) < 0)
2176 return -1;
2177
2178 if (state) {
2179 if (save(self, state, 0) < 0 ||
2180 pickler_write(self, &build_op, 1) < 0)
2181 return -1;
2182 }
2183
2184 return 0;
2185}
2186
2187static int
2188save(PicklerObject *self, PyObject *obj, int pers_save)
2189{
2190 PyTypeObject *type;
2191 PyObject *reduce_func = NULL;
2192 PyObject *reduce_value = NULL;
2193 PyObject *memo_key = NULL;
2194 int status = 0;
2195
Alexandre Vassalottidff18342008-07-13 18:48:30 +00002196 if (Py_EnterRecursiveCall(" while pickling an object") < 0)
2197 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002198
2199 /* The extra pers_save argument is necessary to avoid calling save_pers()
2200 on its returned object. */
2201 if (!pers_save && self->pers_func) {
2202 /* save_pers() returns:
2203 -1 to signal an error;
2204 0 if it did nothing successfully;
2205 1 if a persistent id was saved.
2206 */
2207 if ((status = save_pers(self, obj, self->pers_func)) != 0)
2208 goto done;
2209 }
2210
2211 type = Py_TYPE(obj);
2212
2213 /* XXX: The old cPickle had an optimization that used switch-case
2214 statement dispatching on the first letter of the type name. It was
2215 probably not a bad idea after all. If benchmarks shows that particular
2216 optimization had some real benefits, it would be nice to add it
2217 back. */
2218
2219 /* Atom types; these aren't memoized, so don't check the memo. */
2220
2221 if (obj == Py_None) {
2222 status = save_none(self, obj);
2223 goto done;
2224 }
2225 else if (obj == Py_False || obj == Py_True) {
2226 status = save_bool(self, obj);
2227 goto done;
2228 }
2229 else if (type == &PyLong_Type) {
2230 status = save_long(self, obj);
2231 goto done;
2232 }
2233 else if (type == &PyFloat_Type) {
2234 status = save_float(self, obj);
2235 goto done;
2236 }
2237
2238 /* Check the memo to see if it has the object. If so, generate
2239 a GET (or BINGET) opcode, instead of pickling the object
2240 once again. */
2241 memo_key = PyLong_FromVoidPtr(obj);
2242 if (memo_key == NULL)
2243 goto error;
2244 if (PyDict_GetItem(self->memo, memo_key)) {
2245 if (memo_get(self, memo_key) < 0)
2246 goto error;
2247 goto done;
2248 }
2249
2250 if (type == &PyBytes_Type) {
2251 status = save_bytes(self, obj);
2252 goto done;
2253 }
2254 else if (type == &PyUnicode_Type) {
2255 status = save_unicode(self, obj);
2256 goto done;
2257 }
2258 else if (type == &PyDict_Type) {
2259 status = save_dict(self, obj);
2260 goto done;
2261 }
2262 else if (type == &PyList_Type) {
2263 status = save_list(self, obj);
2264 goto done;
2265 }
2266 else if (type == &PyTuple_Type) {
2267 status = save_tuple(self, obj);
2268 goto done;
2269 }
2270 else if (type == &PyType_Type) {
2271 status = save_global(self, obj, NULL);
2272 goto done;
2273 }
2274 else if (type == &PyFunction_Type) {
2275 status = save_global(self, obj, NULL);
2276 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
2277 /* fall back to reduce */
2278 PyErr_Clear();
2279 }
2280 else {
2281 goto done;
2282 }
2283 }
2284 else if (type == &PyCFunction_Type) {
2285 status = save_global(self, obj, NULL);
2286 goto done;
2287 }
2288 else if (PyType_IsSubtype(type, &PyType_Type)) {
2289 status = save_global(self, obj, NULL);
2290 goto done;
2291 }
2292
2293 /* XXX: This part needs some unit tests. */
2294
2295 /* Get a reduction callable, and call it. This may come from
2296 * copyreg.dispatch_table, the object's __reduce_ex__ method,
2297 * or the object's __reduce__ method.
2298 */
2299 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
2300 if (reduce_func != NULL) {
2301 /* Here, the reference count of the reduce_func object returned by
2302 PyDict_GetItem needs to be increased to be consistent with the one
2303 returned by PyObject_GetAttr. This is allow us to blindly DECREF
2304 reduce_func at the end of the save() routine.
2305 */
2306 Py_INCREF(reduce_func);
2307 Py_INCREF(obj);
2308 reduce_value = pickler_call(self, reduce_func, obj);
2309 }
2310 else {
2311 static PyObject *reduce_str = NULL;
2312 static PyObject *reduce_ex_str = NULL;
2313
2314 /* Cache the name of the reduce methods. */
2315 if (reduce_str == NULL) {
2316 reduce_str = PyUnicode_InternFromString("__reduce__");
2317 if (reduce_str == NULL)
2318 goto error;
2319 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
2320 if (reduce_ex_str == NULL)
2321 goto error;
2322 }
2323
2324 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
2325 automatically defined as __reduce__. While this is convenient, this
2326 make it impossible to know which method was actually called. Of
2327 course, this is not a big deal. But still, it would be nice to let
2328 the user know which method was called when something go
2329 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
2330 don't actually have to check for a __reduce__ method. */
2331
2332 /* Check for a __reduce_ex__ method. */
2333 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
2334 if (reduce_func != NULL) {
2335 PyObject *proto;
2336 proto = PyLong_FromLong(self->proto);
2337 if (proto != NULL) {
2338 reduce_value = pickler_call(self, reduce_func, proto);
2339 }
2340 }
2341 else {
2342 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2343 PyErr_Clear();
2344 else
2345 goto error;
2346 /* Check for a __reduce__ method. */
2347 reduce_func = PyObject_GetAttr(obj, reduce_str);
2348 if (reduce_func != NULL) {
2349 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
2350 }
2351 else {
2352 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
2353 type->tp_name, obj);
2354 goto error;
2355 }
2356 }
2357 }
2358
2359 if (reduce_value == NULL)
2360 goto error;
2361
2362 if (PyUnicode_Check(reduce_value)) {
2363 status = save_global(self, obj, reduce_value);
2364 goto done;
2365 }
2366
2367 if (!PyTuple_Check(reduce_value)) {
2368 PyErr_SetString(PicklingError,
2369 "__reduce__ must return a string or tuple");
2370 goto error;
2371 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002372
2373 status = save_reduce(self, reduce_value, obj);
2374
2375 if (0) {
2376 error:
2377 status = -1;
2378 }
2379 done:
Alexandre Vassalottidff18342008-07-13 18:48:30 +00002380 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002381 Py_XDECREF(memo_key);
2382 Py_XDECREF(reduce_func);
2383 Py_XDECREF(reduce_value);
2384
2385 return status;
2386}
2387
2388static int
2389dump(PicklerObject *self, PyObject *obj)
2390{
2391 const char stop_op = STOP;
2392
2393 if (self->proto >= 2) {
2394 char header[2];
2395
2396 header[0] = PROTO;
2397 assert(self->proto >= 0 && self->proto < 256);
2398 header[1] = (unsigned char)self->proto;
2399 if (pickler_write(self, header, 2) < 0)
2400 return -1;
2401 }
2402
2403 if (save(self, obj, 0) < 0 ||
2404 pickler_write(self, &stop_op, 1) < 0 ||
2405 pickler_write(self, NULL, 0) < 0)
2406 return -1;
2407
2408 return 0;
2409}
2410
2411PyDoc_STRVAR(Pickler_clear_memo_doc,
2412"clear_memo() -> None. Clears the pickler's \"memo\"."
2413"\n"
2414"The memo is the data structure that remembers which objects the\n"
2415"pickler has already seen, so that shared or recursive objects are\n"
2416"pickled by reference and not by value. This method is useful when\n"
2417"re-using picklers.");
2418
2419static PyObject *
2420Pickler_clear_memo(PicklerObject *self)
2421{
2422 if (self->memo)
2423 PyDict_Clear(self->memo);
2424
2425 Py_RETURN_NONE;
2426}
2427
2428PyDoc_STRVAR(Pickler_dump_doc,
2429"dump(obj) -> None. Write a pickled representation of obj to the open file.");
2430
2431static PyObject *
2432Pickler_dump(PicklerObject *self, PyObject *args)
2433{
2434 PyObject *obj;
2435
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00002436 /* Check whether the Pickler was initialized correctly (issue3664).
2437 Developers often forget to call __init__() in their subclasses, which
2438 would trigger a segfault without this check. */
2439 if (self->write == NULL) {
2440 PyErr_Format(PicklingError,
2441 "Pickler.__init__() was not called by %s.__init__()",
2442 Py_TYPE(self)->tp_name);
2443 return NULL;
2444 }
2445
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002446 if (!PyArg_ParseTuple(args, "O:dump", &obj))
2447 return NULL;
2448
2449 if (dump(self, obj) < 0)
2450 return NULL;
2451
2452 Py_RETURN_NONE;
2453}
2454
2455static struct PyMethodDef Pickler_methods[] = {
2456 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
2457 Pickler_dump_doc},
2458 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
2459 Pickler_clear_memo_doc},
2460 {NULL, NULL} /* sentinel */
2461};
2462
2463static void
2464Pickler_dealloc(PicklerObject *self)
2465{
2466 PyObject_GC_UnTrack(self);
2467
2468 Py_XDECREF(self->write);
2469 Py_XDECREF(self->memo);
2470 Py_XDECREF(self->pers_func);
2471 Py_XDECREF(self->arg);
2472 Py_XDECREF(self->fast_memo);
2473
2474 PyMem_Free(self->write_buf);
2475
2476 Py_TYPE(self)->tp_free((PyObject *)self);
2477}
2478
2479static int
2480Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
2481{
2482 Py_VISIT(self->write);
2483 Py_VISIT(self->memo);
2484 Py_VISIT(self->pers_func);
2485 Py_VISIT(self->arg);
2486 Py_VISIT(self->fast_memo);
2487 return 0;
2488}
2489
2490static int
2491Pickler_clear(PicklerObject *self)
2492{
2493 Py_CLEAR(self->write);
2494 Py_CLEAR(self->memo);
2495 Py_CLEAR(self->pers_func);
2496 Py_CLEAR(self->arg);
2497 Py_CLEAR(self->fast_memo);
2498
2499 PyMem_Free(self->write_buf);
2500 self->write_buf = NULL;
2501
2502 return 0;
2503}
2504
2505PyDoc_STRVAR(Pickler_doc,
2506"Pickler(file, protocol=None)"
2507"\n"
2508"This takes a binary file for writing a pickle data stream.\n"
2509"\n"
2510"The optional protocol argument tells the pickler to use the\n"
2511"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
2512"protocol is 3; a backward-incompatible protocol designed for\n"
2513"Python 3.0.\n"
2514"\n"
2515"Specifying a negative protocol version selects the highest\n"
2516"protocol version supported. The higher the protocol used, the\n"
2517"more recent the version of Python needed to read the pickle\n"
2518"produced.\n"
2519"\n"
2520"The file argument must have a write() method that accepts a single\n"
2521"bytes argument. It can thus be a file object opened for binary\n"
2522"writing, a io.BytesIO instance, or any other custom object that\n"
2523"meets this interface.\n");
2524
2525static int
2526Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
2527{
2528 static char *kwlist[] = {"file", "protocol", 0};
2529 PyObject *file;
2530 PyObject *proto_obj = NULL;
2531 long proto = 0;
2532
2533 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:Pickler",
2534 kwlist, &file, &proto_obj))
2535 return -1;
2536
2537 /* In case of multiple __init__() calls, clear previous content. */
2538 if (self->write != NULL)
2539 (void)Pickler_clear(self);
2540
2541 if (proto_obj == NULL || proto_obj == Py_None)
2542 proto = DEFAULT_PROTOCOL;
2543 else
2544 proto = PyLong_AsLong(proto_obj);
2545
2546 if (proto < 0)
2547 proto = HIGHEST_PROTOCOL;
2548 if (proto > HIGHEST_PROTOCOL) {
2549 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
2550 HIGHEST_PROTOCOL);
2551 return -1;
2552 }
2553
2554 self->proto = proto;
2555 self->bin = proto > 0;
2556 self->arg = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002557 self->fast = 0;
2558 self->fast_nesting = 0;
2559 self->fast_memo = NULL;
2560
2561 if (!PyObject_HasAttrString(file, "write")) {
2562 PyErr_SetString(PyExc_TypeError,
2563 "file must have a 'write' attribute");
2564 return -1;
2565 }
2566 self->write = PyObject_GetAttrString(file, "write");
2567 if (self->write == NULL)
2568 return -1;
2569 self->buf_size = 0;
2570 self->write_buf = (char *)PyMem_Malloc(WRITE_BUF_SIZE);
2571 if (self->write_buf == NULL) {
2572 PyErr_NoMemory();
2573 return -1;
2574 }
2575 self->pers_func = NULL;
2576 if (PyObject_HasAttrString((PyObject *)self, "persistent_id")) {
2577 self->pers_func = PyObject_GetAttrString((PyObject *)self,
2578 "persistent_id");
2579 if (self->pers_func == NULL)
2580 return -1;
2581 }
2582 self->memo = PyDict_New();
2583 if (self->memo == NULL)
2584 return -1;
2585
2586 return 0;
2587}
2588
2589static PyObject *
2590Pickler_get_memo(PicklerObject *self)
2591{
2592 if (self->memo == NULL)
2593 PyErr_SetString(PyExc_AttributeError, "memo");
2594 else
2595 Py_INCREF(self->memo);
2596 return self->memo;
2597}
2598
2599static int
2600Pickler_set_memo(PicklerObject *self, PyObject *value)
2601{
2602 PyObject *tmp;
2603
2604 if (value == NULL) {
2605 PyErr_SetString(PyExc_TypeError,
2606 "attribute deletion is not supported");
2607 return -1;
2608 }
2609 if (!PyDict_Check(value)) {
2610 PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
2611 return -1;
2612 }
2613
2614 tmp = self->memo;
2615 Py_INCREF(value);
2616 self->memo = value;
2617 Py_XDECREF(tmp);
2618
2619 return 0;
2620}
2621
2622static PyObject *
2623Pickler_get_persid(PicklerObject *self)
2624{
2625 if (self->pers_func == NULL)
2626 PyErr_SetString(PyExc_AttributeError, "persistent_id");
2627 else
2628 Py_INCREF(self->pers_func);
2629 return self->pers_func;
2630}
2631
2632static int
2633Pickler_set_persid(PicklerObject *self, PyObject *value)
2634{
2635 PyObject *tmp;
2636
2637 if (value == NULL) {
2638 PyErr_SetString(PyExc_TypeError,
2639 "attribute deletion is not supported");
2640 return -1;
2641 }
2642 if (!PyCallable_Check(value)) {
2643 PyErr_SetString(PyExc_TypeError,
2644 "persistent_id must be a callable taking one argument");
2645 return -1;
2646 }
2647
2648 tmp = self->pers_func;
2649 Py_INCREF(value);
2650 self->pers_func = value;
2651 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
2652
2653 return 0;
2654}
2655
2656static PyMemberDef Pickler_members[] = {
2657 {"bin", T_INT, offsetof(PicklerObject, bin)},
2658 {"fast", T_INT, offsetof(PicklerObject, fast)},
2659 {NULL}
2660};
2661
2662static PyGetSetDef Pickler_getsets[] = {
2663 {"memo", (getter)Pickler_get_memo,
2664 (setter)Pickler_set_memo},
2665 {"persistent_id", (getter)Pickler_get_persid,
2666 (setter)Pickler_set_persid},
2667 {NULL}
2668};
2669
2670static PyTypeObject Pickler_Type = {
2671 PyVarObject_HEAD_INIT(NULL, 0)
2672 "_pickle.Pickler" , /*tp_name*/
2673 sizeof(PicklerObject), /*tp_basicsize*/
2674 0, /*tp_itemsize*/
2675 (destructor)Pickler_dealloc, /*tp_dealloc*/
2676 0, /*tp_print*/
2677 0, /*tp_getattr*/
2678 0, /*tp_setattr*/
2679 0, /*tp_compare*/
2680 0, /*tp_repr*/
2681 0, /*tp_as_number*/
2682 0, /*tp_as_sequence*/
2683 0, /*tp_as_mapping*/
2684 0, /*tp_hash*/
2685 0, /*tp_call*/
2686 0, /*tp_str*/
2687 0, /*tp_getattro*/
2688 0, /*tp_setattro*/
2689 0, /*tp_as_buffer*/
2690 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2691 Pickler_doc, /*tp_doc*/
2692 (traverseproc)Pickler_traverse, /*tp_traverse*/
2693 (inquiry)Pickler_clear, /*tp_clear*/
2694 0, /*tp_richcompare*/
2695 0, /*tp_weaklistoffset*/
2696 0, /*tp_iter*/
2697 0, /*tp_iternext*/
2698 Pickler_methods, /*tp_methods*/
2699 Pickler_members, /*tp_members*/
2700 Pickler_getsets, /*tp_getset*/
2701 0, /*tp_base*/
2702 0, /*tp_dict*/
2703 0, /*tp_descr_get*/
2704 0, /*tp_descr_set*/
2705 0, /*tp_dictoffset*/
2706 (initproc)Pickler_init, /*tp_init*/
2707 PyType_GenericAlloc, /*tp_alloc*/
2708 PyType_GenericNew, /*tp_new*/
2709 PyObject_GC_Del, /*tp_free*/
2710 0, /*tp_is_gc*/
2711};
2712
2713/* Temporary helper for calling self.find_class().
2714
2715 XXX: It would be nice to able to avoid Python function call overhead, by
2716 using directly the C version of find_class(), when find_class() is not
2717 overridden by a subclass. Although, this could become rather hackish. A
2718 simpler optimization would be to call the C function when self is not a
2719 subclass instance. */
2720static PyObject *
2721find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
2722{
2723 return PyObject_CallMethod((PyObject *)self, "find_class", "OO",
2724 module_name, global_name);
2725}
2726
2727static int
2728marker(UnpicklerObject *self)
2729{
2730 if (self->num_marks < 1) {
2731 PyErr_SetString(UnpicklingError, "could not find MARK");
2732 return -1;
2733 }
2734
2735 return self->marks[--self->num_marks];
2736}
2737
2738static int
2739load_none(UnpicklerObject *self)
2740{
2741 PDATA_APPEND(self->stack, Py_None, -1);
2742 return 0;
2743}
2744
2745static int
2746bad_readline(void)
2747{
2748 PyErr_SetString(UnpicklingError, "pickle data was truncated");
2749 return -1;
2750}
2751
2752static int
2753load_int(UnpicklerObject *self)
2754{
2755 PyObject *value;
2756 char *endptr, *s;
2757 Py_ssize_t len;
2758 long x;
2759
2760 if ((len = unpickler_readline(self, &s)) < 0)
2761 return -1;
2762 if (len < 2)
2763 return bad_readline();
2764
2765 errno = 0;
2766 /* XXX: Should the base argument of strtol() be explicitly set to 10? */
2767 x = strtol(s, &endptr, 0);
2768
2769 if (errno || (*endptr != '\n') || (endptr[1] != '\0')) {
2770 /* Hm, maybe we've got something long. Let's try reading
2771 * it as a Python long object. */
2772 errno = 0;
2773 /* XXX: Same thing about the base here. */
2774 value = PyLong_FromString(s, NULL, 0);
2775 if (value == NULL) {
2776 PyErr_SetString(PyExc_ValueError,
2777 "could not convert string to int");
2778 return -1;
2779 }
2780 }
2781 else {
2782 if (len == 3 && (x == 0 || x == 1)) {
2783 if ((value = PyBool_FromLong(x)) == NULL)
2784 return -1;
2785 }
2786 else {
2787 if ((value = PyLong_FromLong(x)) == NULL)
2788 return -1;
2789 }
2790 }
2791
2792 PDATA_PUSH(self->stack, value, -1);
2793 return 0;
2794}
2795
2796static int
2797load_bool(UnpicklerObject *self, PyObject *boolean)
2798{
2799 assert(boolean == Py_True || boolean == Py_False);
2800 PDATA_APPEND(self->stack, boolean, -1);
2801 return 0;
2802}
2803
2804/* s contains x bytes of a little-endian integer. Return its value as a
2805 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
2806 * int, but when x is 4 it's a signed one. This is an historical source
2807 * of x-platform bugs.
2808 */
2809static long
2810calc_binint(char *bytes, int size)
2811{
2812 unsigned char *s = (unsigned char *)bytes;
2813 int i = size;
2814 long x = 0;
2815
2816 for (i = 0; i < size; i++) {
2817 x |= (long)s[i] << (i * 8);
2818 }
2819
2820 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
2821 * is signed, so on a box with longs bigger than 4 bytes we need
2822 * to extend a BININT's sign bit to the full width.
2823 */
2824 if (SIZEOF_LONG > 4 && size == 4) {
2825 x |= -(x & (1L << 31));
2826 }
2827
2828 return x;
2829}
2830
2831static int
2832load_binintx(UnpicklerObject *self, char *s, int size)
2833{
2834 PyObject *value;
2835 long x;
2836
2837 x = calc_binint(s, size);
2838
2839 if ((value = PyLong_FromLong(x)) == NULL)
2840 return -1;
2841
2842 PDATA_PUSH(self->stack, value, -1);
2843 return 0;
2844}
2845
2846static int
2847load_binint(UnpicklerObject *self)
2848{
2849 char *s;
2850
2851 if (unpickler_read(self, &s, 4) < 0)
2852 return -1;
2853
2854 return load_binintx(self, s, 4);
2855}
2856
2857static int
2858load_binint1(UnpicklerObject *self)
2859{
2860 char *s;
2861
2862 if (unpickler_read(self, &s, 1) < 0)
2863 return -1;
2864
2865 return load_binintx(self, s, 1);
2866}
2867
2868static int
2869load_binint2(UnpicklerObject *self)
2870{
2871 char *s;
2872
2873 if (unpickler_read(self, &s, 2) < 0)
2874 return -1;
2875
2876 return load_binintx(self, s, 2);
2877}
2878
2879static int
2880load_long(UnpicklerObject *self)
2881{
2882 PyObject *value;
2883 char *s;
2884 Py_ssize_t len;
2885
2886 if ((len = unpickler_readline(self, &s)) < 0)
2887 return -1;
2888 if (len < 2)
2889 return bad_readline();
2890
2891 /* XXX: Should the base argument explicitly set to 10? */
2892 if ((value = PyLong_FromString(s, NULL, 0)) == NULL)
2893 return -1;
2894
2895 PDATA_PUSH(self->stack, value, -1);
2896 return 0;
2897}
2898
2899/* 'size' bytes contain the # of bytes of little-endian 256's-complement
2900 * data following.
2901 */
2902static int
2903load_counted_long(UnpicklerObject *self, int size)
2904{
2905 PyObject *value;
2906 char *nbytes;
2907 char *pdata;
2908
2909 assert(size == 1 || size == 4);
2910 if (unpickler_read(self, &nbytes, size) < 0)
2911 return -1;
2912
2913 size = calc_binint(nbytes, size);
2914 if (size < 0) {
2915 /* Corrupt or hostile pickle -- we never write one like this */
2916 PyErr_SetString(UnpicklingError,
2917 "LONG pickle has negative byte count");
2918 return -1;
2919 }
2920
2921 if (size == 0)
2922 value = PyLong_FromLong(0L);
2923 else {
2924 /* Read the raw little-endian bytes and convert. */
2925 if (unpickler_read(self, &pdata, size) < 0)
2926 return -1;
2927 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
2928 1 /* little endian */ , 1 /* signed */ );
2929 }
2930 if (value == NULL)
2931 return -1;
2932 PDATA_PUSH(self->stack, value, -1);
2933 return 0;
2934}
2935
2936static int
2937load_float(UnpicklerObject *self)
2938{
2939 PyObject *value;
2940 char *endptr, *s;
2941 Py_ssize_t len;
2942 double d;
2943
2944 if ((len = unpickler_readline(self, &s)) < 0)
2945 return -1;
2946 if (len < 2)
2947 return bad_readline();
2948
2949 errno = 0;
2950 d = PyOS_ascii_strtod(s, &endptr);
2951
2952 if (errno || (endptr[0] != '\n') || (endptr[1] != '\0')) {
2953 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
2954 return -1;
2955 }
2956
2957 if ((value = PyFloat_FromDouble(d)) == NULL)
2958 return -1;
2959
2960 PDATA_PUSH(self->stack, value, -1);
2961 return 0;
2962}
2963
2964static int
2965load_binfloat(UnpicklerObject *self)
2966{
2967 PyObject *value;
2968 double x;
2969 char *s;
2970
2971 if (unpickler_read(self, &s, 8) < 0)
2972 return -1;
2973
2974 x = _PyFloat_Unpack8((unsigned char *)s, 0);
2975 if (x == -1.0 && PyErr_Occurred())
2976 return -1;
2977
2978 if ((value = PyFloat_FromDouble(x)) == NULL)
2979 return -1;
2980
2981 PDATA_PUSH(self->stack, value, -1);
2982 return 0;
2983}
2984
2985static int
2986load_string(UnpicklerObject *self)
2987{
2988 PyObject *bytes;
2989 PyObject *str = NULL;
2990 Py_ssize_t len;
2991 char *s, *p;
2992
2993 if ((len = unpickler_readline(self, &s)) < 0)
2994 return -1;
2995 if (len < 3)
2996 return bad_readline();
2997 if ((s = strdup(s)) == NULL) {
2998 PyErr_NoMemory();
2999 return -1;
3000 }
3001
3002 /* Strip outermost quotes */
3003 while (s[len - 1] <= ' ')
3004 len--;
3005 if (s[0] == '"' && s[len - 1] == '"') {
3006 s[len - 1] = '\0';
3007 p = s + 1;
3008 len -= 2;
3009 }
3010 else if (s[0] == '\'' && s[len - 1] == '\'') {
3011 s[len - 1] = '\0';
3012 p = s + 1;
3013 len -= 2;
3014 }
3015 else {
3016 free(s);
3017 PyErr_SetString(PyExc_ValueError, "insecure string pickle");
3018 return -1;
3019 }
3020
3021 /* Use the PyBytes API to decode the string, since that is what is used
3022 to encode, and then coerce the result to Unicode. */
3023 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
3024 free(s);
3025 if (bytes == NULL)
3026 return -1;
3027 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
3028 Py_DECREF(bytes);
3029 if (str == NULL)
3030 return -1;
3031
3032 PDATA_PUSH(self->stack, str, -1);
3033 return 0;
3034}
3035
3036static int
3037load_binbytes(UnpicklerObject *self)
3038{
3039 PyObject *bytes;
3040 long x;
3041 char *s;
3042
3043 if (unpickler_read(self, &s, 4) < 0)
3044 return -1;
3045
3046 x = calc_binint(s, 4);
3047 if (x < 0) {
3048 PyErr_SetString(UnpicklingError,
3049 "BINBYTES pickle has negative byte count");
3050 return -1;
3051 }
3052
3053 if (unpickler_read(self, &s, x) < 0)
3054 return -1;
3055 bytes = PyBytes_FromStringAndSize(s, x);
3056 if (bytes == NULL)
3057 return -1;
3058
3059 PDATA_PUSH(self->stack, bytes, -1);
3060 return 0;
3061}
3062
3063static int
3064load_short_binbytes(UnpicklerObject *self)
3065{
3066 PyObject *bytes;
3067 unsigned char x;
3068 char *s;
3069
3070 if (unpickler_read(self, &s, 1) < 0)
3071 return -1;
3072
3073 x = (unsigned char)s[0];
3074
3075 if (unpickler_read(self, &s, x) < 0)
3076 return -1;
3077
3078 bytes = PyBytes_FromStringAndSize(s, x);
3079 if (bytes == NULL)
3080 return -1;
3081
3082 PDATA_PUSH(self->stack, bytes, -1);
3083 return 0;
3084}
3085
3086static int
3087load_binstring(UnpicklerObject *self)
3088{
3089 PyObject *str;
3090 long x;
3091 char *s;
3092
3093 if (unpickler_read(self, &s, 4) < 0)
3094 return -1;
3095
3096 x = calc_binint(s, 4);
3097 if (x < 0) {
3098 PyErr_SetString(UnpicklingError,
3099 "BINSTRING pickle has negative byte count");
3100 return -1;
3101 }
3102
3103 if (unpickler_read(self, &s, x) < 0)
3104 return -1;
3105
3106 /* Convert Python 2.x strings to unicode. */
3107 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
3108 if (str == NULL)
3109 return -1;
3110
3111 PDATA_PUSH(self->stack, str, -1);
3112 return 0;
3113}
3114
3115static int
3116load_short_binstring(UnpicklerObject *self)
3117{
3118 PyObject *str;
3119 unsigned char x;
3120 char *s;
3121
3122 if (unpickler_read(self, &s, 1) < 0)
3123 return -1;
3124
3125 x = (unsigned char)s[0];
3126
3127 if (unpickler_read(self, &s, x) < 0)
3128 return -1;
3129
3130 /* Convert Python 2.x strings to unicode. */
3131 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
3132 if (str == NULL)
3133 return -1;
3134
3135 PDATA_PUSH(self->stack, str, -1);
3136 return 0;
3137}
3138
3139static int
3140load_unicode(UnpicklerObject *self)
3141{
3142 PyObject *str;
3143 Py_ssize_t len;
3144 char *s;
3145
3146 if ((len = unpickler_readline(self, &s)) < 0)
3147 return -1;
3148 if (len < 1)
3149 return bad_readline();
3150
3151 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
3152 if (str == NULL)
3153 return -1;
3154
3155 PDATA_PUSH(self->stack, str, -1);
3156 return 0;
3157}
3158
3159static int
3160load_binunicode(UnpicklerObject *self)
3161{
3162 PyObject *str;
3163 long size;
3164 char *s;
3165
3166 if (unpickler_read(self, &s, 4) < 0)
3167 return -1;
3168
3169 size = calc_binint(s, 4);
3170 if (size < 0) {
3171 PyErr_SetString(UnpicklingError,
3172 "BINUNICODE pickle has negative byte count");
3173 return -1;
3174 }
3175
3176 if (unpickler_read(self, &s, size) < 0)
3177 return -1;
3178
3179 str = PyUnicode_DecodeUTF8(s, size, NULL);
3180 if (str == NULL)
3181 return -1;
3182
3183 PDATA_PUSH(self->stack, str, -1);
3184 return 0;
3185}
3186
3187static int
3188load_tuple(UnpicklerObject *self)
3189{
3190 PyObject *tuple;
3191 int i;
3192
3193 if ((i = marker(self)) < 0)
3194 return -1;
3195
3196 tuple = Pdata_poptuple(self->stack, i);
3197 if (tuple == NULL)
3198 return -1;
3199 PDATA_PUSH(self->stack, tuple, -1);
3200 return 0;
3201}
3202
3203static int
3204load_counted_tuple(UnpicklerObject *self, int len)
3205{
3206 PyObject *tuple;
3207
3208 tuple = PyTuple_New(len);
3209 if (tuple == NULL)
3210 return -1;
3211
3212 while (--len >= 0) {
3213 PyObject *item;
3214
3215 PDATA_POP(self->stack, item);
3216 if (item == NULL)
3217 return -1;
3218 PyTuple_SET_ITEM(tuple, len, item);
3219 }
3220 PDATA_PUSH(self->stack, tuple, -1);
3221 return 0;
3222}
3223
3224static int
3225load_empty_list(UnpicklerObject *self)
3226{
3227 PyObject *list;
3228
3229 if ((list = PyList_New(0)) == NULL)
3230 return -1;
3231 PDATA_PUSH(self->stack, list, -1);
3232 return 0;
3233}
3234
3235static int
3236load_empty_dict(UnpicklerObject *self)
3237{
3238 PyObject *dict;
3239
3240 if ((dict = PyDict_New()) == NULL)
3241 return -1;
3242 PDATA_PUSH(self->stack, dict, -1);
3243 return 0;
3244}
3245
3246static int
3247load_list(UnpicklerObject *self)
3248{
3249 PyObject *list;
3250 int i;
3251
3252 if ((i = marker(self)) < 0)
3253 return -1;
3254
3255 list = Pdata_poplist(self->stack, i);
3256 if (list == NULL)
3257 return -1;
3258 PDATA_PUSH(self->stack, list, -1);
3259 return 0;
3260}
3261
3262static int
3263load_dict(UnpicklerObject *self)
3264{
3265 PyObject *dict, *key, *value;
3266 int i, j, k;
3267
3268 if ((i = marker(self)) < 0)
3269 return -1;
3270 j = self->stack->length;
3271
3272 if ((dict = PyDict_New()) == NULL)
3273 return -1;
3274
3275 for (k = i + 1; k < j; k += 2) {
3276 key = self->stack->data[k - 1];
3277 value = self->stack->data[k];
3278 if (PyDict_SetItem(dict, key, value) < 0) {
3279 Py_DECREF(dict);
3280 return -1;
3281 }
3282 }
3283 Pdata_clear(self->stack, i);
3284 PDATA_PUSH(self->stack, dict, -1);
3285 return 0;
3286}
3287
3288static PyObject *
3289instantiate(PyObject *cls, PyObject *args)
3290{
3291 PyObject *r = NULL;
3292
3293 /* XXX: The pickle.py module does not create instances this way when the
3294 args tuple is empty. See Unpickler._instantiate(). */
3295 if ((r = PyObject_CallObject(cls, args)))
3296 return r;
3297
3298 /* XXX: Is this still nescessary? */
3299 {
3300 PyObject *tp, *v, *tb, *tmp_value;
3301
3302 PyErr_Fetch(&tp, &v, &tb);
3303 tmp_value = v;
3304 /* NULL occurs when there was a KeyboardInterrupt */
3305 if (tmp_value == NULL)
3306 tmp_value = Py_None;
3307 if ((r = PyTuple_Pack(3, tmp_value, cls, args))) {
3308 Py_XDECREF(v);
3309 v = r;
3310 }
3311 PyErr_Restore(tp, v, tb);
3312 }
3313 return NULL;
3314}
3315
3316static int
3317load_obj(UnpicklerObject *self)
3318{
3319 PyObject *cls, *args, *obj = NULL;
3320 int i;
3321
3322 if ((i = marker(self)) < 0)
3323 return -1;
3324
3325 args = Pdata_poptuple(self->stack, i + 1);
3326 if (args == NULL)
3327 return -1;
3328
3329 PDATA_POP(self->stack, cls);
3330 if (cls) {
3331 obj = instantiate(cls, args);
3332 Py_DECREF(cls);
3333 }
3334 Py_DECREF(args);
3335 if (obj == NULL)
3336 return -1;
3337
3338 PDATA_PUSH(self->stack, obj, -1);
3339 return 0;
3340}
3341
3342static int
3343load_inst(UnpicklerObject *self)
3344{
3345 PyObject *cls = NULL;
3346 PyObject *args = NULL;
3347 PyObject *obj = NULL;
3348 PyObject *module_name;
3349 PyObject *class_name;
3350 Py_ssize_t len;
3351 int i;
3352 char *s;
3353
3354 if ((i = marker(self)) < 0)
3355 return -1;
3356 if ((len = unpickler_readline(self, &s)) < 0)
3357 return -1;
3358 if (len < 2)
3359 return bad_readline();
3360
3361 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
3362 identifiers are permitted in Python 3.0, since the INST opcode is only
3363 supported by older protocols on Python 2.x. */
3364 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
3365 if (module_name == NULL)
3366 return -1;
3367
3368 if ((len = unpickler_readline(self, &s)) >= 0) {
3369 if (len < 2)
3370 return bad_readline();
3371 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
3372 if (class_name == NULL) {
3373 cls = find_class(self, module_name, class_name);
3374 Py_DECREF(class_name);
3375 }
3376 }
3377 Py_DECREF(module_name);
3378
3379 if (cls == NULL)
3380 return -1;
3381
3382 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
3383 obj = instantiate(cls, args);
3384 Py_DECREF(args);
3385 }
3386 Py_DECREF(cls);
3387
3388 if (obj == NULL)
3389 return -1;
3390
3391 PDATA_PUSH(self->stack, obj, -1);
3392 return 0;
3393}
3394
3395static int
3396load_newobj(UnpicklerObject *self)
3397{
3398 PyObject *args = NULL;
3399 PyObject *clsraw = NULL;
3400 PyTypeObject *cls; /* clsraw cast to its true type */
3401 PyObject *obj;
3402
3403 /* Stack is ... cls argtuple, and we want to call
3404 * cls.__new__(cls, *argtuple).
3405 */
3406 PDATA_POP(self->stack, args);
3407 if (args == NULL)
3408 goto error;
3409 if (!PyTuple_Check(args)) {
3410 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
3411 goto error;
3412 }
3413
3414 PDATA_POP(self->stack, clsraw);
3415 cls = (PyTypeObject *)clsraw;
3416 if (cls == NULL)
3417 goto error;
3418 if (!PyType_Check(cls)) {
3419 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
3420 "isn't a type object");
3421 goto error;
3422 }
3423 if (cls->tp_new == NULL) {
3424 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
3425 "has NULL tp_new");
3426 goto error;
3427 }
3428
3429 /* Call __new__. */
3430 obj = cls->tp_new(cls, args, NULL);
3431 if (obj == NULL)
3432 goto error;
3433
3434 Py_DECREF(args);
3435 Py_DECREF(clsraw);
3436 PDATA_PUSH(self->stack, obj, -1);
3437 return 0;
3438
3439 error:
3440 Py_XDECREF(args);
3441 Py_XDECREF(clsraw);
3442 return -1;
3443}
3444
3445static int
3446load_global(UnpicklerObject *self)
3447{
3448 PyObject *global = NULL;
3449 PyObject *module_name;
3450 PyObject *global_name;
3451 Py_ssize_t len;
3452 char *s;
3453
3454 if ((len = unpickler_readline(self, &s)) < 0)
3455 return -1;
3456 if (len < 2)
3457 return bad_readline();
3458 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
3459 if (!module_name)
3460 return -1;
3461
3462 if ((len = unpickler_readline(self, &s)) >= 0) {
3463 if (len < 2) {
3464 Py_DECREF(module_name);
3465 return bad_readline();
3466 }
3467 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
3468 if (global_name) {
3469 global = find_class(self, module_name, global_name);
3470 Py_DECREF(global_name);
3471 }
3472 }
3473 Py_DECREF(module_name);
3474
3475 if (global == NULL)
3476 return -1;
3477 PDATA_PUSH(self->stack, global, -1);
3478 return 0;
3479}
3480
3481static int
3482load_persid(UnpicklerObject *self)
3483{
3484 PyObject *pid;
3485 Py_ssize_t len;
3486 char *s;
3487
3488 if (self->pers_func) {
3489 if ((len = unpickler_readline(self, &s)) < 0)
3490 return -1;
3491 if (len < 2)
3492 return bad_readline();
3493
3494 pid = PyBytes_FromStringAndSize(s, len - 1);
3495 if (pid == NULL)
3496 return -1;
3497
3498 /* Ugh... this does not leak since unpickler_call() steals the
3499 reference to pid first. */
3500 pid = unpickler_call(self, self->pers_func, pid);
3501 if (pid == NULL)
3502 return -1;
3503
3504 PDATA_PUSH(self->stack, pid, -1);
3505 return 0;
3506 }
3507 else {
3508 PyErr_SetString(UnpicklingError,
3509 "A load persistent id instruction was encountered,\n"
3510 "but no persistent_load function was specified.");
3511 return -1;
3512 }
3513}
3514
3515static int
3516load_binpersid(UnpicklerObject *self)
3517{
3518 PyObject *pid;
3519
3520 if (self->pers_func) {
3521 PDATA_POP(self->stack, pid);
3522 if (pid == NULL)
3523 return -1;
3524
3525 /* Ugh... this does not leak since unpickler_call() steals the
3526 reference to pid first. */
3527 pid = unpickler_call(self, self->pers_func, pid);
3528 if (pid == NULL)
3529 return -1;
3530
3531 PDATA_PUSH(self->stack, pid, -1);
3532 return 0;
3533 }
3534 else {
3535 PyErr_SetString(UnpicklingError,
3536 "A load persistent id instruction was encountered,\n"
3537 "but no persistent_load function was specified.");
3538 return -1;
3539 }
3540}
3541
3542static int
3543load_pop(UnpicklerObject *self)
3544{
3545 int len;
3546
3547 if ((len = self->stack->length) <= 0)
3548 return stack_underflow();
3549
3550 /* Note that we split the (pickle.py) stack into two stacks,
3551 * an object stack and a mark stack. We have to be clever and
3552 * pop the right one. We do this by looking at the top of the
3553 * mark stack.
3554 */
3555
3556 if ((self->num_marks > 0) && (self->marks[self->num_marks - 1] == len))
3557 self->num_marks--;
3558 else {
3559 len--;
3560 Py_DECREF(self->stack->data[len]);
3561 self->stack->length = len;
3562 }
3563
3564 return 0;
3565}
3566
3567static int
3568load_pop_mark(UnpicklerObject *self)
3569{
3570 int i;
3571
3572 if ((i = marker(self)) < 0)
3573 return -1;
3574
3575 Pdata_clear(self->stack, i);
3576
3577 return 0;
3578}
3579
3580static int
3581load_dup(UnpicklerObject *self)
3582{
3583 PyObject *last;
3584 int len;
3585
3586 if ((len = self->stack->length) <= 0)
3587 return stack_underflow();
3588 last = self->stack->data[len - 1];
3589 PDATA_APPEND(self->stack, last, -1);
3590 return 0;
3591}
3592
3593static int
3594load_get(UnpicklerObject *self)
3595{
3596 PyObject *key, *value;
3597 Py_ssize_t len;
3598 char *s;
3599
3600 if ((len = unpickler_readline(self, &s)) < 0)
3601 return -1;
3602 if (len < 2)
3603 return bad_readline();
3604
3605 key = PyLong_FromString(s, NULL, 10);
3606 if (key == NULL)
3607 return -1;
3608
3609 value = PyDict_GetItemWithError(self->memo, key);
3610 if (value == NULL) {
3611 if (!PyErr_Occurred())
3612 PyErr_SetObject(PyExc_KeyError, key);
3613 Py_DECREF(key);
3614 return -1;
3615 }
3616 Py_DECREF(key);
3617
3618 PDATA_APPEND(self->stack, value, -1);
3619 return 0;
3620}
3621
3622static int
3623load_binget(UnpicklerObject *self)
3624{
3625 PyObject *key, *value;
3626 char *s;
3627
3628 if (unpickler_read(self, &s, 1) < 0)
3629 return -1;
3630
3631 /* Here, the unsigned cast is necessary to avoid negative values. */
3632 key = PyLong_FromLong((long)(unsigned char)s[0]);
3633 if (key == NULL)
3634 return -1;
3635
3636 value = PyDict_GetItemWithError(self->memo, key);
3637 if (value == NULL) {
3638 if (!PyErr_Occurred())
3639 PyErr_SetObject(PyExc_KeyError, key);
3640 Py_DECREF(key);
3641 return -1;
3642 }
3643 Py_DECREF(key);
3644
3645 PDATA_APPEND(self->stack, value, -1);
3646 return 0;
3647}
3648
3649static int
3650load_long_binget(UnpicklerObject *self)
3651{
3652 PyObject *key, *value;
3653 char *s;
3654 long k;
3655
3656 if (unpickler_read(self, &s, 4) < 0)
3657 return -1;
3658
3659 k = (long)(unsigned char)s[0];
3660 k |= (long)(unsigned char)s[1] << 8;
3661 k |= (long)(unsigned char)s[2] << 16;
3662 k |= (long)(unsigned char)s[3] << 24;
3663
3664 key = PyLong_FromLong(k);
3665 if (key == NULL)
3666 return -1;
3667
3668 value = PyDict_GetItemWithError(self->memo, key);
3669 if (value == NULL) {
3670 if (!PyErr_Occurred())
3671 PyErr_SetObject(PyExc_KeyError, key);
3672 Py_DECREF(key);
3673 return -1;
3674 }
3675 Py_DECREF(key);
3676
3677 PDATA_APPEND(self->stack, value, -1);
3678 return 0;
3679}
3680
3681/* Push an object from the extension registry (EXT[124]). nbytes is
3682 * the number of bytes following the opcode, holding the index (code) value.
3683 */
3684static int
3685load_extension(UnpicklerObject *self, int nbytes)
3686{
3687 char *codebytes; /* the nbytes bytes after the opcode */
3688 long code; /* calc_binint returns long */
3689 PyObject *py_code; /* code as a Python int */
3690 PyObject *obj; /* the object to push */
3691 PyObject *pair; /* (module_name, class_name) */
3692 PyObject *module_name, *class_name;
3693
3694 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
3695 if (unpickler_read(self, &codebytes, nbytes) < 0)
3696 return -1;
3697 code = calc_binint(codebytes, nbytes);
3698 if (code <= 0) { /* note that 0 is forbidden */
3699 /* Corrupt or hostile pickle. */
3700 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
3701 return -1;
3702 }
3703
3704 /* Look for the code in the cache. */
3705 py_code = PyLong_FromLong(code);
3706 if (py_code == NULL)
3707 return -1;
3708 obj = PyDict_GetItem(extension_cache, py_code);
3709 if (obj != NULL) {
3710 /* Bingo. */
3711 Py_DECREF(py_code);
3712 PDATA_APPEND(self->stack, obj, -1);
3713 return 0;
3714 }
3715
3716 /* Look up the (module_name, class_name) pair. */
3717 pair = PyDict_GetItem(inverted_registry, py_code);
3718 if (pair == NULL) {
3719 Py_DECREF(py_code);
3720 PyErr_Format(PyExc_ValueError, "unregistered extension "
3721 "code %ld", code);
3722 return -1;
3723 }
3724 /* Since the extension registry is manipulable via Python code,
3725 * confirm that pair is really a 2-tuple of strings.
3726 */
3727 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
3728 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
3729 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
3730 Py_DECREF(py_code);
3731 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
3732 "isn't a 2-tuple of strings", code);
3733 return -1;
3734 }
3735 /* Load the object. */
3736 obj = find_class(self, module_name, class_name);
3737 if (obj == NULL) {
3738 Py_DECREF(py_code);
3739 return -1;
3740 }
3741 /* Cache code -> obj. */
3742 code = PyDict_SetItem(extension_cache, py_code, obj);
3743 Py_DECREF(py_code);
3744 if (code < 0) {
3745 Py_DECREF(obj);
3746 return -1;
3747 }
3748 PDATA_PUSH(self->stack, obj, -1);
3749 return 0;
3750}
3751
3752static int
3753load_put(UnpicklerObject *self)
3754{
3755 PyObject *key, *value;
3756 Py_ssize_t len;
3757 char *s;
3758 int x;
3759
3760 if ((len = unpickler_readline(self, &s)) < 0)
3761 return -1;
3762 if (len < 2)
3763 return bad_readline();
3764 if ((x = self->stack->length) <= 0)
3765 return stack_underflow();
3766
3767 key = PyLong_FromString(s, NULL, 10);
3768 if (key == NULL)
3769 return -1;
3770 value = self->stack->data[x - 1];
3771
3772 x = PyDict_SetItem(self->memo, key, value);
3773 Py_DECREF(key);
3774 return x;
3775}
3776
3777static int
3778load_binput(UnpicklerObject *self)
3779{
3780 PyObject *key, *value;
3781 char *s;
3782 int x;
3783
3784 if (unpickler_read(self, &s, 1) < 0)
3785 return -1;
3786 if ((x = self->stack->length) <= 0)
3787 return stack_underflow();
3788
3789 key = PyLong_FromLong((long)(unsigned char)s[0]);
3790 if (key == NULL)
3791 return -1;
3792 value = self->stack->data[x - 1];
3793
3794 x = PyDict_SetItem(self->memo, key, value);
3795 Py_DECREF(key);
3796 return x;
3797}
3798
3799static int
3800load_long_binput(UnpicklerObject *self)
3801{
3802 PyObject *key, *value;
3803 long k;
3804 char *s;
3805 int x;
3806
3807 if (unpickler_read(self, &s, 4) < 0)
3808 return -1;
3809 if ((x = self->stack->length) <= 0)
3810 return stack_underflow();
3811
3812 k = (long)(unsigned char)s[0];
3813 k |= (long)(unsigned char)s[1] << 8;
3814 k |= (long)(unsigned char)s[2] << 16;
3815 k |= (long)(unsigned char)s[3] << 24;
3816
3817 key = PyLong_FromLong(k);
3818 if (key == NULL)
3819 return -1;
3820 value = self->stack->data[x - 1];
3821
3822 x = PyDict_SetItem(self->memo, key, value);
3823 Py_DECREF(key);
3824 return x;
3825}
3826
3827static int
3828do_append(UnpicklerObject *self, int x)
3829{
3830 PyObject *value;
3831 PyObject *list;
3832 int len, i;
3833
3834 len = self->stack->length;
3835 if (x > len || x <= 0)
3836 return stack_underflow();
3837 if (len == x) /* nothing to do */
3838 return 0;
3839
3840 list = self->stack->data[x - 1];
3841
3842 if (PyList_Check(list)) {
3843 PyObject *slice;
3844 Py_ssize_t list_len;
3845
3846 slice = Pdata_poplist(self->stack, x);
3847 if (!slice)
3848 return -1;
3849 list_len = PyList_GET_SIZE(list);
3850 i = PyList_SetSlice(list, list_len, list_len, slice);
3851 Py_DECREF(slice);
3852 return i;
3853 }
3854 else {
3855 PyObject *append_func;
3856
3857 append_func = PyObject_GetAttrString(list, "append");
3858 if (append_func == NULL)
3859 return -1;
3860 for (i = x; i < len; i++) {
3861 PyObject *result;
3862
3863 value = self->stack->data[i];
3864 result = unpickler_call(self, append_func, value);
3865 if (result == NULL) {
3866 Pdata_clear(self->stack, i + 1);
3867 self->stack->length = x;
3868 return -1;
3869 }
3870 Py_DECREF(result);
3871 }
3872 self->stack->length = x;
3873 }
3874
3875 return 0;
3876}
3877
3878static int
3879load_append(UnpicklerObject *self)
3880{
3881 return do_append(self, self->stack->length - 1);
3882}
3883
3884static int
3885load_appends(UnpicklerObject *self)
3886{
3887 return do_append(self, marker(self));
3888}
3889
3890static int
3891do_setitems(UnpicklerObject *self, int x)
3892{
3893 PyObject *value, *key;
3894 PyObject *dict;
3895 int len, i;
3896 int status = 0;
3897
3898 len = self->stack->length;
3899 if (x > len || x <= 0)
3900 return stack_underflow();
3901 if (len == x) /* nothing to do */
3902 return 0;
3903 if ((len - x) % 2 != 0) {
3904 /* Currupt or hostile pickle -- we never write one like this. */
3905 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
3906 return -1;
3907 }
3908
3909 /* Here, dict does not actually need to be a PyDict; it could be anything
3910 that supports the __setitem__ attribute. */
3911 dict = self->stack->data[x - 1];
3912
3913 for (i = x + 1; i < len; i += 2) {
3914 key = self->stack->data[i - 1];
3915 value = self->stack->data[i];
3916 if (PyObject_SetItem(dict, key, value) < 0) {
3917 status = -1;
3918 break;
3919 }
3920 }
3921
3922 Pdata_clear(self->stack, x);
3923 return status;
3924}
3925
3926static int
3927load_setitem(UnpicklerObject *self)
3928{
3929 return do_setitems(self, self->stack->length - 2);
3930}
3931
3932static int
3933load_setitems(UnpicklerObject *self)
3934{
3935 return do_setitems(self, marker(self));
3936}
3937
3938static int
3939load_build(UnpicklerObject *self)
3940{
3941 PyObject *state, *inst, *slotstate;
3942 PyObject *setstate;
3943 int status = 0;
3944
3945 /* Stack is ... instance, state. We want to leave instance at
3946 * the stack top, possibly mutated via instance.__setstate__(state).
3947 */
3948 if (self->stack->length < 2)
3949 return stack_underflow();
3950
3951 PDATA_POP(self->stack, state);
3952 if (state == NULL)
3953 return -1;
3954
3955 inst = self->stack->data[self->stack->length - 1];
3956
3957 setstate = PyObject_GetAttrString(inst, "__setstate__");
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00003958 if (setstate == NULL) {
3959 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3960 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00003961 else {
3962 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00003963 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00003964 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003965 }
3966 else {
3967 PyObject *result;
3968
3969 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitroud79dc622008-09-05 00:03:33 +00003970 /* Ugh... this does not leak since unpickler_call() steals the
3971 reference to state first. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003972 result = unpickler_call(self, setstate, state);
3973 Py_DECREF(setstate);
3974 if (result == NULL)
3975 return -1;
3976 Py_DECREF(result);
3977 return 0;
3978 }
3979
3980 /* A default __setstate__. First see whether state embeds a
3981 * slot state dict too (a proto 2 addition).
3982 */
3983 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
3984 PyObject *tmp = state;
3985
3986 state = PyTuple_GET_ITEM(tmp, 0);
3987 slotstate = PyTuple_GET_ITEM(tmp, 1);
3988 Py_INCREF(state);
3989 Py_INCREF(slotstate);
3990 Py_DECREF(tmp);
3991 }
3992 else
3993 slotstate = NULL;
3994
3995 /* Set inst.__dict__ from the state dict (if any). */
3996 if (state != Py_None) {
3997 PyObject *dict;
3998
3999 if (!PyDict_Check(state)) {
4000 PyErr_SetString(UnpicklingError, "state is not a dictionary");
4001 goto error;
4002 }
4003 dict = PyObject_GetAttrString(inst, "__dict__");
4004 if (dict == NULL)
4005 goto error;
4006
4007 PyDict_Update(dict, state);
4008 Py_DECREF(dict);
4009 }
4010
4011 /* Also set instance attributes from the slotstate dict (if any). */
4012 if (slotstate != NULL) {
4013 PyObject *d_key, *d_value;
4014 Py_ssize_t i;
4015
4016 if (!PyDict_Check(slotstate)) {
4017 PyErr_SetString(UnpicklingError,
4018 "slot state is not a dictionary");
4019 goto error;
4020 }
4021 i = 0;
4022 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
4023 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
4024 goto error;
4025 }
4026 }
4027
4028 if (0) {
4029 error:
4030 status = -1;
4031 }
4032
4033 Py_DECREF(state);
4034 Py_XDECREF(slotstate);
4035 return status;
4036}
4037
4038static int
4039load_mark(UnpicklerObject *self)
4040{
4041
4042 /* Note that we split the (pickle.py) stack into two stacks, an
4043 * object stack and a mark stack. Here we push a mark onto the
4044 * mark stack.
4045 */
4046
4047 if ((self->num_marks + 1) >= self->marks_size) {
4048 size_t alloc;
4049 int *marks;
4050
4051 /* Use the size_t type to check for overflow. */
4052 alloc = ((size_t)self->num_marks << 1) + 20;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00004053 if (alloc > PY_SSIZE_T_MAX ||
4054 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004055 PyErr_NoMemory();
4056 return -1;
4057 }
4058
4059 if (self->marks == NULL)
4060 marks = (int *)PyMem_Malloc(alloc * sizeof(int));
4061 else
4062 marks = (int *)PyMem_Realloc(self->marks, alloc * sizeof(int));
4063 if (marks == NULL) {
4064 PyErr_NoMemory();
4065 return -1;
4066 }
4067 self->marks = marks;
4068 self->marks_size = (Py_ssize_t)alloc;
4069 }
4070
4071 self->marks[self->num_marks++] = self->stack->length;
4072
4073 return 0;
4074}
4075
4076static int
4077load_reduce(UnpicklerObject *self)
4078{
4079 PyObject *callable = NULL;
4080 PyObject *argtup = NULL;
4081 PyObject *obj = NULL;
4082
4083 PDATA_POP(self->stack, argtup);
4084 if (argtup == NULL)
4085 return -1;
4086 PDATA_POP(self->stack, callable);
4087 if (callable) {
4088 obj = instantiate(callable, argtup);
4089 Py_DECREF(callable);
4090 }
4091 Py_DECREF(argtup);
4092
4093 if (obj == NULL)
4094 return -1;
4095
4096 PDATA_PUSH(self->stack, obj, -1);
4097 return 0;
4098}
4099
4100/* Just raises an error if we don't know the protocol specified. PROTO
4101 * is the first opcode for protocols >= 2.
4102 */
4103static int
4104load_proto(UnpicklerObject *self)
4105{
4106 char *s;
4107 int i;
4108
4109 if (unpickler_read(self, &s, 1) < 0)
4110 return -1;
4111
4112 i = (unsigned char)s[0];
4113 if (i <= HIGHEST_PROTOCOL)
4114 return 0;
4115
4116 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
4117 return -1;
4118}
4119
4120static PyObject *
4121load(UnpicklerObject *self)
4122{
4123 PyObject *err;
4124 PyObject *value = NULL;
4125 char *s;
4126
4127 self->num_marks = 0;
4128 if (self->stack->length)
4129 Pdata_clear(self->stack, 0);
4130
4131 /* Convenient macros for the dispatch while-switch loop just below. */
4132#define OP(opcode, load_func) \
4133 case opcode: if (load_func(self) < 0) break; continue;
4134
4135#define OP_ARG(opcode, load_func, arg) \
4136 case opcode: if (load_func(self, (arg)) < 0) break; continue;
4137
4138 while (1) {
4139 if (unpickler_read(self, &s, 1) < 0)
4140 break;
4141
4142 switch ((enum opcode)s[0]) {
4143 OP(NONE, load_none)
4144 OP(BININT, load_binint)
4145 OP(BININT1, load_binint1)
4146 OP(BININT2, load_binint2)
4147 OP(INT, load_int)
4148 OP(LONG, load_long)
4149 OP_ARG(LONG1, load_counted_long, 1)
4150 OP_ARG(LONG4, load_counted_long, 4)
4151 OP(FLOAT, load_float)
4152 OP(BINFLOAT, load_binfloat)
4153 OP(BINBYTES, load_binbytes)
4154 OP(SHORT_BINBYTES, load_short_binbytes)
4155 OP(BINSTRING, load_binstring)
4156 OP(SHORT_BINSTRING, load_short_binstring)
4157 OP(STRING, load_string)
4158 OP(UNICODE, load_unicode)
4159 OP(BINUNICODE, load_binunicode)
4160 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
4161 OP_ARG(TUPLE1, load_counted_tuple, 1)
4162 OP_ARG(TUPLE2, load_counted_tuple, 2)
4163 OP_ARG(TUPLE3, load_counted_tuple, 3)
4164 OP(TUPLE, load_tuple)
4165 OP(EMPTY_LIST, load_empty_list)
4166 OP(LIST, load_list)
4167 OP(EMPTY_DICT, load_empty_dict)
4168 OP(DICT, load_dict)
4169 OP(OBJ, load_obj)
4170 OP(INST, load_inst)
4171 OP(NEWOBJ, load_newobj)
4172 OP(GLOBAL, load_global)
4173 OP(APPEND, load_append)
4174 OP(APPENDS, load_appends)
4175 OP(BUILD, load_build)
4176 OP(DUP, load_dup)
4177 OP(BINGET, load_binget)
4178 OP(LONG_BINGET, load_long_binget)
4179 OP(GET, load_get)
4180 OP(MARK, load_mark)
4181 OP(BINPUT, load_binput)
4182 OP(LONG_BINPUT, load_long_binput)
4183 OP(PUT, load_put)
4184 OP(POP, load_pop)
4185 OP(POP_MARK, load_pop_mark)
4186 OP(SETITEM, load_setitem)
4187 OP(SETITEMS, load_setitems)
4188 OP(PERSID, load_persid)
4189 OP(BINPERSID, load_binpersid)
4190 OP(REDUCE, load_reduce)
4191 OP(PROTO, load_proto)
4192 OP_ARG(EXT1, load_extension, 1)
4193 OP_ARG(EXT2, load_extension, 2)
4194 OP_ARG(EXT4, load_extension, 4)
4195 OP_ARG(NEWTRUE, load_bool, Py_True)
4196 OP_ARG(NEWFALSE, load_bool, Py_False)
4197
4198 case STOP:
4199 break;
4200
4201 case '\0':
4202 PyErr_SetNone(PyExc_EOFError);
4203 return NULL;
4204
4205 default:
4206 PyErr_Format(UnpicklingError,
4207 "invalid load key, '%c'.", s[0]);
4208 return NULL;
4209 }
4210
4211 break; /* and we are done! */
4212 }
4213
4214 /* XXX: It is not clear what this is actually for. */
4215 if ((err = PyErr_Occurred())) {
4216 if (err == PyExc_EOFError) {
4217 PyErr_SetNone(PyExc_EOFError);
4218 }
4219 return NULL;
4220 }
4221
4222 PDATA_POP(self->stack, value);
4223 return value;
4224}
4225
4226PyDoc_STRVAR(Unpickler_load_doc,
4227"load() -> object. Load a pickle."
4228"\n"
4229"Read a pickled object representation from the open file object given in\n"
4230"the constructor, and return the reconstituted object hierarchy specified\n"
4231"therein.\n");
4232
4233static PyObject *
4234Unpickler_load(UnpicklerObject *self)
4235{
4236 /* Check whether the Unpickler was initialized correctly. This prevents
4237 segfaulting if a subclass overridden __init__ with a function that does
4238 not call Unpickler.__init__(). Here, we simply ensure that self->read
4239 is not NULL. */
4240 if (self->read == NULL) {
4241 PyErr_Format(UnpicklingError,
4242 "Unpickler.__init__() was not called by %s.__init__()",
4243 Py_TYPE(self)->tp_name);
4244 return NULL;
4245 }
4246
4247 return load(self);
4248}
4249
4250/* The name of find_class() is misleading. In newer pickle protocols, this
4251 function is used for loading any global (i.e., functions), not just
4252 classes. The name is kept only for backward compatibility. */
4253
4254PyDoc_STRVAR(Unpickler_find_class_doc,
4255"find_class(module_name, global_name) -> object.\n"
4256"\n"
4257"Return an object from a specified module, importing the module if\n"
4258"necessary. Subclasses may override this method (e.g. to restrict\n"
4259"unpickling of arbitrary classes and functions).\n"
4260"\n"
4261"This method is called whenever a class or a function object is\n"
4262"needed. Both arguments passed are str objects.\n");
4263
4264static PyObject *
4265Unpickler_find_class(UnpicklerObject *self, PyObject *args)
4266{
4267 PyObject *global;
4268 PyObject *modules_dict;
4269 PyObject *module;
4270 PyObject *module_name, *global_name;
4271
4272 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
4273 &module_name, &global_name))
4274 return NULL;
4275
4276 modules_dict = PySys_GetObject("modules");
4277 if (modules_dict == NULL)
4278 return NULL;
4279
4280 module = PyDict_GetItem(modules_dict, module_name);
4281 if (module == NULL) {
4282 module = PyImport_Import(module_name);
4283 if (module == NULL)
4284 return NULL;
4285 global = PyObject_GetAttr(module, global_name);
4286 Py_DECREF(module);
4287 }
4288 else {
4289 global = PyObject_GetAttr(module, global_name);
4290 }
4291 return global;
4292}
4293
4294static struct PyMethodDef Unpickler_methods[] = {
4295 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
4296 Unpickler_load_doc},
4297 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
4298 Unpickler_find_class_doc},
4299 {NULL, NULL} /* sentinel */
4300};
4301
4302static void
4303Unpickler_dealloc(UnpicklerObject *self)
4304{
4305 PyObject_GC_UnTrack((PyObject *)self);
4306 Py_XDECREF(self->readline);
4307 Py_XDECREF(self->read);
4308 Py_XDECREF(self->memo);
4309 Py_XDECREF(self->stack);
4310 Py_XDECREF(self->pers_func);
4311 Py_XDECREF(self->arg);
4312 Py_XDECREF(self->last_string);
4313
4314 PyMem_Free(self->marks);
4315 free(self->encoding);
4316 free(self->errors);
4317
4318 Py_TYPE(self)->tp_free((PyObject *)self);
4319}
4320
4321static int
4322Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
4323{
4324 Py_VISIT(self->readline);
4325 Py_VISIT(self->read);
4326 Py_VISIT(self->memo);
4327 Py_VISIT(self->stack);
4328 Py_VISIT(self->pers_func);
4329 Py_VISIT(self->arg);
4330 Py_VISIT(self->last_string);
4331 return 0;
4332}
4333
4334static int
4335Unpickler_clear(UnpicklerObject *self)
4336{
4337 Py_CLEAR(self->readline);
4338 Py_CLEAR(self->read);
4339 Py_CLEAR(self->memo);
4340 Py_CLEAR(self->stack);
4341 Py_CLEAR(self->pers_func);
4342 Py_CLEAR(self->arg);
4343 Py_CLEAR(self->last_string);
4344
4345 PyMem_Free(self->marks);
4346 self->marks = NULL;
4347 free(self->encoding);
4348 self->encoding = NULL;
4349 free(self->errors);
4350 self->errors = NULL;
4351
4352 return 0;
4353}
4354
4355PyDoc_STRVAR(Unpickler_doc,
4356"Unpickler(file, *, encoding='ASCII', errors='strict')"
4357"\n"
4358"This takes a binary file for reading a pickle data stream.\n"
4359"\n"
4360"The protocol version of the pickle is detected automatically, so no\n"
4361"proto argument is needed.\n"
4362"\n"
4363"The file-like object must have two methods, a read() method\n"
4364"that takes an integer argument, and a readline() method that\n"
4365"requires no arguments. Both methods should return bytes.\n"
4366"Thus file-like object can be a binary file object opened for\n"
4367"reading, a BytesIO object, or any other custom object that\n"
4368"meets this interface.\n"
4369"\n"
4370"Optional keyword arguments are encoding and errors, which are\n"
4371"used to decode 8-bit string instances pickled by Python 2.x.\n"
4372"These default to 'ASCII' and 'strict', respectively.\n");
4373
4374static int
4375Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
4376{
4377 static char *kwlist[] = {"file", "encoding", "errors", 0};
4378 PyObject *file;
4379 char *encoding = NULL;
4380 char *errors = NULL;
4381
4382 /* XXX: That is an horrible error message. But, I don't know how to do
4383 better... */
4384 if (Py_SIZE(args) != 1) {
4385 PyErr_Format(PyExc_TypeError,
4386 "%s takes exactly one positional argument (%zd given)",
4387 Py_TYPE(self)->tp_name, Py_SIZE(args));
4388 return -1;
4389 }
4390
4391 /* Arguments parsing needs to be done in the __init__() method to allow
4392 subclasses to define their own __init__() method, which may (or may
4393 not) support Unpickler arguments. However, this means we need to be
4394 extra careful in the other Unpickler methods, since a subclass could
4395 forget to call Unpickler.__init__() thus breaking our internal
4396 invariants. */
4397 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|ss:Unpickler", kwlist,
4398 &file, &encoding, &errors))
4399 return -1;
4400
4401 /* In case of multiple __init__() calls, clear previous content. */
4402 if (self->read != NULL)
4403 (void)Unpickler_clear(self);
4404
4405 self->read = PyObject_GetAttrString(file, "read");
4406 self->readline = PyObject_GetAttrString(file, "readline");
4407 if (self->readline == NULL || self->read == NULL)
4408 return -1;
4409
4410 if (encoding == NULL)
4411 encoding = "ASCII";
4412 if (errors == NULL)
4413 errors = "strict";
4414
4415 self->encoding = strdup(encoding);
4416 self->errors = strdup(errors);
4417 if (self->encoding == NULL || self->errors == NULL) {
4418 PyErr_NoMemory();
4419 return -1;
4420 }
4421
4422 if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) {
4423 self->pers_func = PyObject_GetAttrString((PyObject *)self,
4424 "persistent_load");
4425 if (self->pers_func == NULL)
4426 return -1;
4427 }
4428 else {
4429 self->pers_func = NULL;
4430 }
4431
4432 self->stack = (Pdata *)Pdata_New();
4433 if (self->stack == NULL)
4434 return -1;
4435
4436 self->memo = PyDict_New();
4437 if (self->memo == NULL)
4438 return -1;
4439
4440 return 0;
4441}
4442
4443static PyObject *
4444Unpickler_get_memo(UnpicklerObject *self)
4445{
4446 if (self->memo == NULL)
4447 PyErr_SetString(PyExc_AttributeError, "memo");
4448 else
4449 Py_INCREF(self->memo);
4450 return self->memo;
4451}
4452
4453static int
4454Unpickler_set_memo(UnpicklerObject *self, PyObject *value)
4455{
4456 PyObject *tmp;
4457
4458 if (value == NULL) {
4459 PyErr_SetString(PyExc_TypeError,
4460 "attribute deletion is not supported");
4461 return -1;
4462 }
4463 if (!PyDict_Check(value)) {
4464 PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
4465 return -1;
4466 }
4467
4468 tmp = self->memo;
4469 Py_INCREF(value);
4470 self->memo = value;
4471 Py_XDECREF(tmp);
4472
4473 return 0;
4474}
4475
4476static PyObject *
4477Unpickler_get_persload(UnpicklerObject *self)
4478{
4479 if (self->pers_func == NULL)
4480 PyErr_SetString(PyExc_AttributeError, "persistent_load");
4481 else
4482 Py_INCREF(self->pers_func);
4483 return self->pers_func;
4484}
4485
4486static int
4487Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
4488{
4489 PyObject *tmp;
4490
4491 if (value == NULL) {
4492 PyErr_SetString(PyExc_TypeError,
4493 "attribute deletion is not supported");
4494 return -1;
4495 }
4496 if (!PyCallable_Check(value)) {
4497 PyErr_SetString(PyExc_TypeError,
4498 "persistent_load must be a callable taking "
4499 "one argument");
4500 return -1;
4501 }
4502
4503 tmp = self->pers_func;
4504 Py_INCREF(value);
4505 self->pers_func = value;
4506 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
4507
4508 return 0;
4509}
4510
4511static PyGetSetDef Unpickler_getsets[] = {
4512 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
4513 {"persistent_load", (getter)Unpickler_get_persload,
4514 (setter)Unpickler_set_persload},
4515 {NULL}
4516};
4517
4518static PyTypeObject Unpickler_Type = {
4519 PyVarObject_HEAD_INIT(NULL, 0)
4520 "_pickle.Unpickler", /*tp_name*/
4521 sizeof(UnpicklerObject), /*tp_basicsize*/
4522 0, /*tp_itemsize*/
4523 (destructor)Unpickler_dealloc, /*tp_dealloc*/
4524 0, /*tp_print*/
4525 0, /*tp_getattr*/
4526 0, /*tp_setattr*/
4527 0, /*tp_compare*/
4528 0, /*tp_repr*/
4529 0, /*tp_as_number*/
4530 0, /*tp_as_sequence*/
4531 0, /*tp_as_mapping*/
4532 0, /*tp_hash*/
4533 0, /*tp_call*/
4534 0, /*tp_str*/
4535 0, /*tp_getattro*/
4536 0, /*tp_setattro*/
4537 0, /*tp_as_buffer*/
4538 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4539 Unpickler_doc, /*tp_doc*/
4540 (traverseproc)Unpickler_traverse, /*tp_traverse*/
4541 (inquiry)Unpickler_clear, /*tp_clear*/
4542 0, /*tp_richcompare*/
4543 0, /*tp_weaklistoffset*/
4544 0, /*tp_iter*/
4545 0, /*tp_iternext*/
4546 Unpickler_methods, /*tp_methods*/
4547 0, /*tp_members*/
4548 Unpickler_getsets, /*tp_getset*/
4549 0, /*tp_base*/
4550 0, /*tp_dict*/
4551 0, /*tp_descr_get*/
4552 0, /*tp_descr_set*/
4553 0, /*tp_dictoffset*/
4554 (initproc)Unpickler_init, /*tp_init*/
4555 PyType_GenericAlloc, /*tp_alloc*/
4556 PyType_GenericNew, /*tp_new*/
4557 PyObject_GC_Del, /*tp_free*/
4558 0, /*tp_is_gc*/
4559};
4560
4561static int
4562init_stuff(void)
4563{
4564 PyObject *copyreg;
4565
4566 copyreg = PyImport_ImportModule("copyreg");
4567 if (!copyreg)
4568 return -1;
4569
4570 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
4571 if (!dispatch_table)
4572 goto error;
4573
4574 extension_registry = \
4575 PyObject_GetAttrString(copyreg, "_extension_registry");
4576 if (!extension_registry)
4577 goto error;
4578
4579 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
4580 if (!inverted_registry)
4581 goto error;
4582
4583 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
4584 if (!extension_cache)
4585 goto error;
4586
4587 Py_DECREF(copyreg);
4588
4589 empty_tuple = PyTuple_New(0);
4590 if (empty_tuple == NULL)
4591 return -1;
4592
4593 two_tuple = PyTuple_New(2);
4594 if (two_tuple == NULL)
4595 return -1;
4596 /* We use this temp container with no regard to refcounts, or to
4597 * keeping containees alive. Exempt from GC, because we don't
4598 * want anything looking at two_tuple() by magic.
4599 */
4600 PyObject_GC_UnTrack(two_tuple);
4601
4602 return 0;
4603
4604 error:
4605 Py_DECREF(copyreg);
4606 return -1;
4607}
4608
4609static struct PyModuleDef _picklemodule = {
4610 PyModuleDef_HEAD_INIT,
4611 "_pickle",
4612 pickle_module_doc,
4613 -1,
4614 NULL,
4615 NULL,
4616 NULL,
4617 NULL,
4618 NULL
4619};
4620
4621PyMODINIT_FUNC
4622PyInit__pickle(void)
4623{
4624 PyObject *m;
4625
4626 if (PyType_Ready(&Unpickler_Type) < 0)
4627 return NULL;
4628 if (PyType_Ready(&Pickler_Type) < 0)
4629 return NULL;
4630 if (PyType_Ready(&Pdata_Type) < 0)
4631 return NULL;
4632
4633 /* Create the module and add the functions. */
4634 m = PyModule_Create(&_picklemodule);
4635 if (m == NULL)
4636 return NULL;
4637
4638 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
4639 return NULL;
4640 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
4641 return NULL;
4642
4643 /* Initialize the exceptions. */
4644 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
4645 if (PickleError == NULL)
4646 return NULL;
4647 PicklingError = \
4648 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
4649 if (PicklingError == NULL)
4650 return NULL;
4651 UnpicklingError = \
4652 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
4653 if (UnpicklingError == NULL)
4654 return NULL;
4655
4656 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
4657 return NULL;
4658 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
4659 return NULL;
4660 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
4661 return NULL;
4662
4663 if (init_stuff() < 0)
4664 return NULL;
4665
4666 return m;
4667}