blob: a0810b99a9f9086315c7e9ebd457403e7d7b9509 [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
13
14/* Pickle opcodes. These must be kept updated with pickle.py.
15 Extensive docs are in pickletools.py. */
16enum opcode {
17 MARK = '(',
18 STOP = '.',
19 POP = '0',
20 POP_MARK = '1',
21 DUP = '2',
22 FLOAT = 'F',
23 INT = 'I',
24 BININT = 'J',
25 BININT1 = 'K',
26 LONG = 'L',
27 BININT2 = 'M',
28 NONE = 'N',
29 PERSID = 'P',
30 BINPERSID = 'Q',
31 REDUCE = 'R',
32 STRING = 'S',
33 BINSTRING = 'T',
34 SHORT_BINSTRING = 'U',
35 UNICODE = 'V',
36 BINUNICODE = 'X',
37 APPEND = 'a',
38 BUILD = 'b',
39 GLOBAL = 'c',
40 DICT = 'd',
41 EMPTY_DICT = '}',
42 APPENDS = 'e',
43 GET = 'g',
44 BINGET = 'h',
45 INST = 'i',
46 LONG_BINGET = 'j',
47 LIST = 'l',
48 EMPTY_LIST = ']',
49 OBJ = 'o',
50 PUT = 'p',
51 BINPUT = 'q',
52 LONG_BINPUT = 'r',
53 SETITEM = 's',
54 TUPLE = 't',
55 EMPTY_TUPLE = ')',
56 SETITEMS = 'u',
57 BINFLOAT = 'G',
58
59 /* Protocol 2. */
60 PROTO = '\x80',
61 NEWOBJ = '\x81',
62 EXT1 = '\x82',
63 EXT2 = '\x83',
64 EXT4 = '\x84',
65 TUPLE1 = '\x85',
66 TUPLE2 = '\x86',
67 TUPLE3 = '\x87',
68 NEWTRUE = '\x88',
69 NEWFALSE = '\x89',
70 LONG1 = '\x8a',
71 LONG4 = '\x8b',
72
73 /* Protocol 3 (Python 3.x) */
74 BINBYTES = 'B',
75 SHORT_BINBYTES = 'C',
76};
77
78/* These aren't opcodes -- they're ways to pickle bools before protocol 2
79 * so that unpicklers written before bools were introduced unpickle them
80 * as ints, but unpicklers after can recognize that bools were intended.
81 * Note that protocol 2 added direct ways to pickle bools.
82 */
83#undef TRUE
84#define TRUE "I01\n"
85#undef FALSE
86#define FALSE "I00\n"
87
88enum {
89 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
90 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
91 break if this gets out of synch with pickle.py, but it's unclear that would
92 help anything either. */
93 BATCHSIZE = 1000,
94
95 /* Nesting limit until Pickler, when running in "fast mode", starts
96 checking for self-referential data-structures. */
97 FAST_NESTING_LIMIT = 50,
98
99 /* Size of the write buffer of Pickler. Higher values will reduce the
100 number of calls to the write() method of the output stream. */
101 WRITE_BUF_SIZE = 256,
102};
103
104/* Exception classes for pickle. These should override the ones defined in
105 pickle.py, when the C-optimized Pickler and Unpickler are used. */
106static PyObject *PickleError;
107static PyObject *PicklingError;
108static PyObject *UnpicklingError;
109
110/* copyreg.dispatch_table, {type_object: pickling_function} */
111static PyObject *dispatch_table;
112/* For EXT[124] opcodes. */
113/* copyreg._extension_registry, {(module_name, function_name): code} */
114static PyObject *extension_registry;
115/* copyreg._inverted_registry, {code: (module_name, function_name)} */
116static PyObject *inverted_registry;
117/* copyreg._extension_cache, {code: object} */
118static PyObject *extension_cache;
119
120/* XXX: Are these really nescessary? */
121/* As the name says, an empty tuple. */
122static PyObject *empty_tuple;
123/* For looking up name pairs in copyreg._extension_registry. */
124static PyObject *two_tuple;
125
126static int
127stack_underflow(void)
128{
129 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
130 return -1;
131}
132
133/* Internal data type used as the unpickling stack. */
134typedef struct {
135 PyObject_HEAD
136 int length; /* number of initial slots in data currently used */
137 int size; /* number of slots in data allocated */
138 PyObject **data;
139} Pdata;
140
141static void
142Pdata_dealloc(Pdata *self)
143{
144 int i;
145 PyObject **p;
146
147 for (i = self->length, p = self->data; --i >= 0; p++) {
148 Py_DECREF(*p);
149 }
150 if (self->data)
151 PyMem_Free(self->data);
152 PyObject_Del(self);
153}
154
155static PyTypeObject Pdata_Type = {
156 PyVarObject_HEAD_INIT(NULL, 0)
157 "_pickle.Pdata", /*tp_name*/
158 sizeof(Pdata), /*tp_basicsize*/
159 0, /*tp_itemsize*/
160 (destructor)Pdata_dealloc, /*tp_dealloc*/
161};
162
163static PyObject *
164Pdata_New(void)
165{
166 Pdata *self;
167
168 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
169 return NULL;
170 self->size = 8;
171 self->length = 0;
172 self->data = PyMem_Malloc(self->size * sizeof(PyObject *));
173 if (self->data)
174 return (PyObject *)self;
175 Py_DECREF(self);
176 return PyErr_NoMemory();
177}
178
179
180/* Retain only the initial clearto items. If clearto >= the current
181 * number of items, this is a (non-erroneous) NOP.
182 */
183static int
184Pdata_clear(Pdata *self, int clearto)
185{
186 int i;
187 PyObject **p;
188
189 if (clearto < 0)
190 return stack_underflow();
191 if (clearto >= self->length)
192 return 0;
193
194 for (i = self->length, p = self->data + clearto; --i >= clearto; p++) {
195 Py_CLEAR(*p);
196 }
197 self->length = clearto;
198
199 return 0;
200}
201
202static int
203Pdata_grow(Pdata *self)
204{
205 int bigger;
206 size_t nbytes;
207 PyObject **tmp;
208
209 bigger = (self->size << 1) + 1;
210 if (bigger <= 0) /* was 0, or new value overflows */
211 goto nomemory;
212 if ((int)(size_t)bigger != bigger)
213 goto nomemory;
214 nbytes = (size_t)bigger * sizeof(PyObject *);
215 if (nbytes / sizeof(PyObject *) != (size_t)bigger)
216 goto nomemory;
217 tmp = PyMem_Realloc(self->data, nbytes);
218 if (tmp == NULL)
219 goto nomemory;
220 self->data = tmp;
221 self->size = bigger;
222 return 0;
223
224 nomemory:
225 PyErr_NoMemory();
226 return -1;
227}
228
229/* D is a Pdata*. Pop the topmost element and store it into V, which
230 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
231 * is raised and V is set to NULL.
232 */
233static PyObject *
234Pdata_pop(Pdata *self)
235{
236 if (self->length == 0) {
237 PyErr_SetString(UnpicklingError, "bad pickle data");
238 return NULL;
239 }
240 return self->data[--(self->length)];
241}
242#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
243
244static int
245Pdata_push(Pdata *self, PyObject *obj)
246{
247 if (self->length == self->size && Pdata_grow(self) < 0) {
248 return -1;
249 }
250 self->data[self->length++] = obj;
251 return 0;
252}
253
254/* Push an object on stack, transferring its ownership to the stack. */
255#define PDATA_PUSH(D, O, ER) do { \
256 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
257
258/* Push an object on stack, adding a new reference to the object. */
259#define PDATA_APPEND(D, O, ER) do { \
260 Py_INCREF((O)); \
261 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
262
263static PyObject *
264Pdata_poptuple(Pdata *self, Py_ssize_t start)
265{
266 PyObject *tuple;
267 Py_ssize_t len, i, j;
268
269 len = self->length - start;
270 tuple = PyTuple_New(len);
271 if (tuple == NULL)
272 return NULL;
273 for (i = start, j = 0; j < len; i++, j++)
274 PyTuple_SET_ITEM(tuple, j, self->data[i]);
275
276 self->length = start;
277 return tuple;
278}
279
280static PyObject *
281Pdata_poplist(Pdata *self, Py_ssize_t start)
282{
283 PyObject *list;
284 Py_ssize_t len, i, j;
285
286 len = self->length - start;
287 list = PyList_New(len);
288 if (list == NULL)
289 return NULL;
290 for (i = start, j = 0; j < len; i++, j++)
291 PyList_SET_ITEM(list, j, self->data[i]);
292
293 self->length = start;
294 return list;
295}
296
297typedef struct PicklerObject {
298 PyObject_HEAD
299 PyObject *write; /* write() method of the output stream */
300 PyObject *memo; /* Memo dictionary, keep track of the seen
301 objects to support self-referential objects
302 pickling. */
303 PyObject *pers_func; /* persistent_id() method, can be NULL */
304 PyObject *arg;
305 int proto; /* Pickle protocol number, >= 0 */
306 int bin; /* Boolean, true if proto > 0 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000307 int buf_size; /* Size of the current buffered pickle data */
308 char *write_buf; /* Write buffer, this is to avoid calling the
309 write() method of the output stream too
310 often. */
311 int fast; /* Enable fast mode if set to a true value.
312 The fast mode disable the usage of memo,
313 therefore speeding the pickling process by
314 not generating superfluous PUT opcodes. It
315 should not be used if with self-referential
316 objects. */
317 int fast_nesting;
318 PyObject *fast_memo;
319} PicklerObject;
320
321typedef struct UnpicklerObject {
322 PyObject_HEAD
323 Pdata *stack; /* Pickle data stack, store unpickled objects. */
324 PyObject *readline; /* readline() method of the output stream */
325 PyObject *read; /* read() method of the output stream */
326 PyObject *memo; /* Memo dictionary, provide the objects stored
327 using the PUT opcodes. */
328 PyObject *arg;
329 PyObject *pers_func; /* persistent_load() method, can be NULL. */
330 PyObject *last_string; /* Reference to the last string read by the
331 readline() method. */
332 char *buffer; /* Reading buffer. */
333 char *encoding; /* Name of the encoding to be used for
334 decoding strings pickled using Python
335 2.x. The default value is "ASCII" */
336 char *errors; /* Name of errors handling scheme to used when
337 decoding strings. The default value is
338 "strict". */
339 int *marks; /* Mark stack, used for unpickling container
340 objects. */
341 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
342 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
343} UnpicklerObject;
344
345/* Forward declarations */
346static int save(PicklerObject *, PyObject *, int);
347static int save_reduce(PicklerObject *, PyObject *, PyObject *);
348static PyTypeObject Pickler_Type;
349static PyTypeObject Unpickler_Type;
350
351
352/* Helpers for creating the argument tuple passed to functions. This has the
353 performance advantage of calling PyTuple_New() only once. */
354
355#define ARG_TUP(self, obj) do { \
356 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
357 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
358 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
359 } \
360 else { \
361 Py_DECREF((obj)); \
362 } \
363 } while (0)
364
365#define FREE_ARG_TUP(self) do { \
366 if ((self)->arg->ob_refcnt > 1) \
367 Py_CLEAR((self)->arg); \
368 } while (0)
369
370/* A temporary cleaner API for fast single argument function call.
371
372 XXX: Does caching the argument tuple provides any real performance benefits?
373
374 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
375 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
376 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
377 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
378 (i.e, call PyTuple_New() and store the returned value in an array), to save
379 one second (wall clock time). Either ways, the loading time a pickle stream
380 large enough to generate this number of calls would be massively
381 overwhelmed by other factors, like I/O throughput, the GC traversal and
382 object allocation overhead. So, I really doubt these functions provide any
383 real benefits.
384
385 On the other hand, oprofile reports that pickle spends a lot of time in
386 these functions. But, that is probably more related to the function call
387 overhead, than the argument tuple allocation.
388
389 XXX: And, what is the reference behavior of these? Steal, borrow? At first
390 glance, it seems to steal the reference of 'arg' and borrow the reference
391 of 'func'.
392 */
393static PyObject *
394pickler_call(PicklerObject *self, PyObject *func, PyObject *arg)
395{
396 PyObject *result = NULL;
397
398 ARG_TUP(self, arg);
399 if (self->arg) {
400 result = PyObject_Call(func, self->arg, NULL);
401 FREE_ARG_TUP(self);
402 }
403 return result;
404}
405
406static PyObject *
407unpickler_call(UnpicklerObject *self, PyObject *func, PyObject *arg)
408{
409 PyObject *result = NULL;
410
411 ARG_TUP(self, arg);
412 if (self->arg) {
413 result = PyObject_Call(func, self->arg, NULL);
414 FREE_ARG_TUP(self);
415 }
416 return result;
417}
418
419static Py_ssize_t
420pickler_write(PicklerObject *self, const char *s, Py_ssize_t n)
421{
422 PyObject *data, *result;
423
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000424 if (self->write_buf == NULL) {
425 PyErr_SetString(PyExc_SystemError, "invalid write buffer");
426 return -1;
427 }
428
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000429 if (s == NULL) {
430 if (!(self->buf_size))
431 return 0;
432 data = PyBytes_FromStringAndSize(self->write_buf, self->buf_size);
433 if (data == NULL)
434 return -1;
435 }
436 else {
437 if (self->buf_size && (n + self->buf_size) > WRITE_BUF_SIZE) {
438 if (pickler_write(self, NULL, 0) < 0)
439 return -1;
440 }
441
442 if (n > WRITE_BUF_SIZE) {
443 if (!(data = PyBytes_FromStringAndSize(s, n)))
444 return -1;
445 }
446 else {
447 memcpy(self->write_buf + self->buf_size, s, n);
448 self->buf_size += n;
449 return n;
450 }
451 }
452
453 /* object with write method */
454 result = pickler_call(self, self->write, data);
455 if (result == NULL)
456 return -1;
457
458 Py_DECREF(result);
459 self->buf_size = 0;
460 return n;
461}
462
463/* XXX: These read/readline functions ought to be optimized. Buffered I/O
464 might help a lot, especially with the new (but much slower) io library.
465 On the other hand, the added complexity might not worth it.
466 */
467
468/* Read at least n characters from the input stream and set s to the current
469 reading position. */
470static Py_ssize_t
471unpickler_read(UnpicklerObject *self, char **s, Py_ssize_t n)
472{
473 PyObject *len;
474 PyObject *data;
475
476 len = PyLong_FromSsize_t(n);
477 if (len == NULL)
478 return -1;
479
480 data = unpickler_call(self, self->read, len);
481 if (data == NULL)
482 return -1;
483
484 /* XXX: Should bytearray be supported too? */
485 if (!PyBytes_Check(data)) {
486 PyErr_SetString(PyExc_ValueError,
487 "read() from the underlying stream did not"
488 "return bytes");
Amaury Forgeot d'Arc5f952572008-11-25 21:11:54 +0000489 Py_DECREF(data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000490 return -1;
491 }
492
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000493 if (PyBytes_GET_SIZE(data) != n) {
494 PyErr_SetNone(PyExc_EOFError);
Amaury Forgeot d'Arc5f952572008-11-25 21:11:54 +0000495 Py_DECREF(data);
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000496 return -1;
497 }
498
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000499 Py_XDECREF(self->last_string);
500 self->last_string = data;
501
502 if (!(*s = PyBytes_AS_STRING(data)))
503 return -1;
504
505 return n;
506}
507
508static Py_ssize_t
509unpickler_readline(UnpicklerObject *self, char **s)
510{
511 PyObject *data;
512
513 data = PyObject_CallObject(self->readline, empty_tuple);
514 if (data == NULL)
515 return -1;
516
517 /* XXX: Should bytearray be supported too? */
518 if (!PyBytes_Check(data)) {
519 PyErr_SetString(PyExc_ValueError,
520 "readline() from the underlying stream did not"
521 "return bytes");
522 return -1;
523 }
524
525 Py_XDECREF(self->last_string);
526 self->last_string = data;
527
528 if (!(*s = PyBytes_AS_STRING(data)))
529 return -1;
530
531 return PyBytes_GET_SIZE(data);
532}
533
534/* Generate a GET opcode for an object stored in the memo. The 'key' argument
535 should be the address of the object as returned by PyLong_FromVoidPtr(). */
536static int
537memo_get(PicklerObject *self, PyObject *key)
538{
539 PyObject *value;
540 PyObject *memo_id;
541 long x;
542 char pdata[30];
543 int len;
544
545 value = PyDict_GetItemWithError(self->memo, key);
546 if (value == NULL) {
547 if (!PyErr_Occurred())
548 PyErr_SetObject(PyExc_KeyError, key);
549 return -1;
550 }
551
552 memo_id = PyTuple_GetItem(value, 0);
553 if (memo_id == NULL)
554 return -1;
555
556 if (!PyLong_Check(memo_id)) {
557 PyErr_SetString(PicklingError, "memo id must be an integer");
558 return -1;
559 }
560 x = PyLong_AsLong(memo_id);
561 if (x == -1 && PyErr_Occurred())
562 return -1;
563
564 if (!self->bin) {
565 pdata[0] = GET;
566 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
567 len = (int)strlen(pdata);
568 }
569 else {
570 if (x < 256) {
571 pdata[0] = BINGET;
572 pdata[1] = (unsigned char)(x & 0xff);
573 len = 2;
574 }
575 else if (x <= 0xffffffffL) {
576 pdata[0] = LONG_BINGET;
577 pdata[1] = (unsigned char)(x & 0xff);
578 pdata[2] = (unsigned char)((x >> 8) & 0xff);
579 pdata[3] = (unsigned char)((x >> 16) & 0xff);
580 pdata[4] = (unsigned char)((x >> 24) & 0xff);
581 len = 5;
582 }
583 else { /* unlikely */
584 PyErr_SetString(PicklingError,
585 "memo id too large for LONG_BINGET");
586 return -1;
587 }
588 }
589
590 if (pickler_write(self, pdata, len) < 0)
591 return -1;
592
593 return 0;
594}
595
596/* Store an object in the memo, assign it a new unique ID based on the number
597 of objects currently stored in the memo and generate a PUT opcode. */
598static int
599memo_put(PicklerObject *self, PyObject *obj)
600{
601 PyObject *key = NULL;
602 PyObject *memo_id = NULL;
603 PyObject *tuple = NULL;
604 long x;
605 char pdata[30];
606 int len;
607 int status = 0;
608
609 if (self->fast)
610 return 0;
611
612 key = PyLong_FromVoidPtr(obj);
613 if (key == NULL)
614 goto error;
615 if ((x = PyDict_Size(self->memo)) < 0)
616 goto error;
617 memo_id = PyLong_FromLong(x);
618 if (memo_id == NULL)
619 goto error;
620 tuple = PyTuple_New(2);
621 if (tuple == NULL)
622 goto error;
623
624 Py_INCREF(memo_id);
625 PyTuple_SET_ITEM(tuple, 0, memo_id);
626 Py_INCREF(obj);
627 PyTuple_SET_ITEM(tuple, 1, obj);
628 if (PyDict_SetItem(self->memo, key, tuple) < 0)
629 goto error;
630
631 if (!self->bin) {
632 pdata[0] = PUT;
633 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
634 len = strlen(pdata);
635 }
636 else {
637 if (x < 256) {
638 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +0000639 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000640 len = 2;
641 }
642 else if (x <= 0xffffffffL) {
643 pdata[0] = LONG_BINPUT;
644 pdata[1] = (unsigned char)(x & 0xff);
645 pdata[2] = (unsigned char)((x >> 8) & 0xff);
646 pdata[3] = (unsigned char)((x >> 16) & 0xff);
647 pdata[4] = (unsigned char)((x >> 24) & 0xff);
648 len = 5;
649 }
650 else { /* unlikely */
651 PyErr_SetString(PicklingError,
652 "memo id too large for LONG_BINPUT");
653 return -1;
654 }
655 }
656
657 if (pickler_write(self, pdata, len) < 0)
658 goto error;
659
660 if (0) {
661 error:
662 status = -1;
663 }
664
665 Py_XDECREF(key);
666 Py_XDECREF(memo_id);
667 Py_XDECREF(tuple);
668
669 return status;
670}
671
672static PyObject *
673whichmodule(PyObject *global, PyObject *global_name)
674{
675 Py_ssize_t i, j;
676 static PyObject *module_str = NULL;
677 static PyObject *main_str = NULL;
678 PyObject *module_name;
679 PyObject *modules_dict;
680 PyObject *module;
681 PyObject *obj;
682
683 if (module_str == NULL) {
684 module_str = PyUnicode_InternFromString("__module__");
685 if (module_str == NULL)
686 return NULL;
687 main_str = PyUnicode_InternFromString("__main__");
688 if (main_str == NULL)
689 return NULL;
690 }
691
692 module_name = PyObject_GetAttr(global, module_str);
693
694 /* In some rare cases (e.g., random.getrandbits), __module__ can be
695 None. If it is so, then search sys.modules for the module of
696 global. */
697 if (module_name == Py_None) {
698 Py_DECREF(module_name);
699 goto search;
700 }
701
702 if (module_name) {
703 return module_name;
704 }
705 if (PyErr_ExceptionMatches(PyExc_AttributeError))
706 PyErr_Clear();
707 else
708 return NULL;
709
710 search:
711 modules_dict = PySys_GetObject("modules");
712 if (modules_dict == NULL)
713 return NULL;
714
715 i = 0;
716 module_name = NULL;
717 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
718 if (PyObject_Compare(module_name, main_str) == 0)
719 continue;
720
721 obj = PyObject_GetAttr(module, global_name);
722 if (obj == NULL) {
723 if (PyErr_ExceptionMatches(PyExc_AttributeError))
724 PyErr_Clear();
725 else
726 return NULL;
727 continue;
728 }
729
730 if (obj != global) {
731 Py_DECREF(obj);
732 continue;
733 }
734
735 Py_DECREF(obj);
736 break;
737 }
738
739 /* If no module is found, use __main__. */
740 if (!j) {
741 module_name = main_str;
742 }
743
744 Py_INCREF(module_name);
745 return module_name;
746}
747
748/* fast_save_enter() and fast_save_leave() are guards against recursive
749 objects when Pickler is used with the "fast mode" (i.e., with object
750 memoization disabled). If the nesting of a list or dict object exceed
751 FAST_NESTING_LIMIT, these guards will start keeping an internal
752 reference to the seen list or dict objects and check whether these objects
753 are recursive. These are not strictly necessary, since save() has a
754 hard-coded recursion limit, but they give a nicer error message than the
755 typical RuntimeError. */
756static int
757fast_save_enter(PicklerObject *self, PyObject *obj)
758{
759 /* if fast_nesting < 0, we're doing an error exit. */
760 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
761 PyObject *key = NULL;
762 if (self->fast_memo == NULL) {
763 self->fast_memo = PyDict_New();
764 if (self->fast_memo == NULL) {
765 self->fast_nesting = -1;
766 return 0;
767 }
768 }
769 key = PyLong_FromVoidPtr(obj);
770 if (key == NULL)
771 return 0;
772 if (PyDict_GetItem(self->fast_memo, key)) {
773 Py_DECREF(key);
774 PyErr_Format(PyExc_ValueError,
775 "fast mode: can't pickle cyclic objects "
776 "including object type %.200s at %p",
777 obj->ob_type->tp_name, obj);
778 self->fast_nesting = -1;
779 return 0;
780 }
781 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
782 Py_DECREF(key);
783 self->fast_nesting = -1;
784 return 0;
785 }
786 Py_DECREF(key);
787 }
788 return 1;
789}
790
791static int
792fast_save_leave(PicklerObject *self, PyObject *obj)
793{
794 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
795 PyObject *key = PyLong_FromVoidPtr(obj);
796 if (key == NULL)
797 return 0;
798 if (PyDict_DelItem(self->fast_memo, key) < 0) {
799 Py_DECREF(key);
800 return 0;
801 }
802 Py_DECREF(key);
803 }
804 return 1;
805}
806
807static int
808save_none(PicklerObject *self, PyObject *obj)
809{
810 const char none_op = NONE;
811 if (pickler_write(self, &none_op, 1) < 0)
812 return -1;
813
814 return 0;
815}
816
817static int
818save_bool(PicklerObject *self, PyObject *obj)
819{
820 static const char *buf[2] = { FALSE, TRUE };
821 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
822 int p = (obj == Py_True);
823
824 if (self->proto >= 2) {
825 const char bool_op = p ? NEWTRUE : NEWFALSE;
826 if (pickler_write(self, &bool_op, 1) < 0)
827 return -1;
828 }
829 else if (pickler_write(self, buf[p], len[p]) < 0)
830 return -1;
831
832 return 0;
833}
834
835static int
836save_int(PicklerObject *self, long x)
837{
838 char pdata[32];
839 int len = 0;
840
841 if (!self->bin
842#if SIZEOF_LONG > 4
843 || x > 0x7fffffffL || x < -0x80000000L
844#endif
845 ) {
846 /* Text-mode pickle, or long too big to fit in the 4-byte
847 * signed BININT format: store as a string.
848 */
849 pdata[0] = LONG; /* use LONG for consistence with pickle.py */
850 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
851 if (pickler_write(self, pdata, strlen(pdata)) < 0)
852 return -1;
853 }
854 else {
855 /* Binary pickle and x fits in a signed 4-byte int. */
856 pdata[1] = (unsigned char)(x & 0xff);
857 pdata[2] = (unsigned char)((x >> 8) & 0xff);
858 pdata[3] = (unsigned char)((x >> 16) & 0xff);
859 pdata[4] = (unsigned char)((x >> 24) & 0xff);
860
861 if ((pdata[4] == 0) && (pdata[3] == 0)) {
862 if (pdata[2] == 0) {
863 pdata[0] = BININT1;
864 len = 2;
865 }
866 else {
867 pdata[0] = BININT2;
868 len = 3;
869 }
870 }
871 else {
872 pdata[0] = BININT;
873 len = 5;
874 }
875
876 if (pickler_write(self, pdata, len) < 0)
877 return -1;
878 }
879
880 return 0;
881}
882
883static int
884save_long(PicklerObject *self, PyObject *obj)
885{
886 PyObject *repr = NULL;
887 Py_ssize_t size;
888 long val = PyLong_AsLong(obj);
889 int status = 0;
890
891 const char long_op = LONG;
892
893 if (val == -1 && PyErr_Occurred()) {
894 /* out of range for int pickling */
895 PyErr_Clear();
896 }
897 else
898 return save_int(self, val);
899
900 if (self->proto >= 2) {
901 /* Linear-time pickling. */
902 size_t nbits;
903 size_t nbytes;
904 unsigned char *pdata;
905 char header[5];
906 int i;
907 int sign = _PyLong_Sign(obj);
908
909 if (sign == 0) {
910 header[0] = LONG1;
911 header[1] = 0; /* It's 0 -- an empty bytestring. */
912 if (pickler_write(self, header, 2) < 0)
913 goto error;
914 return 0;
915 }
916 nbits = _PyLong_NumBits(obj);
917 if (nbits == (size_t)-1 && PyErr_Occurred())
918 goto error;
919 /* How many bytes do we need? There are nbits >> 3 full
920 * bytes of data, and nbits & 7 leftover bits. If there
921 * are any leftover bits, then we clearly need another
922 * byte. Wnat's not so obvious is that we *probably*
923 * need another byte even if there aren't any leftovers:
924 * the most-significant bit of the most-significant byte
925 * acts like a sign bit, and it's usually got a sense
926 * opposite of the one we need. The exception is longs
927 * of the form -(2**(8*j-1)) for j > 0. Such a long is
928 * its own 256's-complement, so has the right sign bit
929 * even without the extra byte. That's a pain to check
930 * for in advance, though, so we always grab an extra
931 * byte at the start, and cut it back later if possible.
932 */
933 nbytes = (nbits >> 3) + 1;
934 if (nbytes > INT_MAX) {
935 PyErr_SetString(PyExc_OverflowError,
936 "long too large to pickle");
937 goto error;
938 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +0000939 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000940 if (repr == NULL)
941 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +0000942 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000943 i = _PyLong_AsByteArray((PyLongObject *)obj,
944 pdata, nbytes,
945 1 /* little endian */ , 1 /* signed */ );
946 if (i < 0)
947 goto error;
948 /* If the long is negative, this may be a byte more than
949 * needed. This is so iff the MSB is all redundant sign
950 * bits.
951 */
952 if (sign < 0 &&
953 nbytes > 1 &&
954 pdata[nbytes - 1] == 0xff &&
955 (pdata[nbytes - 2] & 0x80) != 0) {
956 nbytes--;
957 }
958
959 if (nbytes < 256) {
960 header[0] = LONG1;
961 header[1] = (unsigned char)nbytes;
962 size = 2;
963 }
964 else {
965 header[0] = LONG4;
966 size = (int)nbytes;
967 for (i = 1; i < 5; i++) {
968 header[i] = (unsigned char)(size & 0xff);
969 size >>= 8;
970 }
971 size = 5;
972 }
973 if (pickler_write(self, header, size) < 0 ||
974 pickler_write(self, (char *)pdata, (int)nbytes) < 0)
975 goto error;
976 }
977 else {
978 char *string;
979
980 /* proto < 2: write the repr and newline. This is quadratic-time
981 (in the number of digits), in both directions. */
982
983 repr = PyObject_Repr(obj);
984 if (repr == NULL)
985 goto error;
986
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +0000987 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000988 if (string == NULL)
989 goto error;
990
991 if (pickler_write(self, &long_op, 1) < 0 ||
992 pickler_write(self, string, size) < 0 ||
993 pickler_write(self, "\n", 1) < 0)
994 goto error;
995 }
996
997 if (0) {
998 error:
999 status = -1;
1000 }
1001 Py_XDECREF(repr);
1002
1003 return status;
1004}
1005
1006static int
1007save_float(PicklerObject *self, PyObject *obj)
1008{
1009 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1010
1011 if (self->bin) {
1012 char pdata[9];
1013 pdata[0] = BINFLOAT;
1014 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1015 return -1;
1016 if (pickler_write(self, pdata, 9) < 0)
1017 return -1;
1018 }
1019 else {
1020 char pdata[250];
1021 pdata[0] = FLOAT;
1022 PyOS_ascii_formatd(pdata + 1, sizeof(pdata) - 2, "%.17g", x);
1023 /* Extend the formatted string with a newline character */
1024 strcat(pdata, "\n");
1025
1026 if (pickler_write(self, pdata, strlen(pdata)) < 0)
1027 return -1;
1028 }
1029
1030 return 0;
1031}
1032
1033static int
1034save_bytes(PicklerObject *self, PyObject *obj)
1035{
1036 if (self->proto < 3) {
1037 /* Older pickle protocols do not have an opcode for pickling bytes
1038 objects. Therefore, we need to fake the copy protocol (i.e.,
1039 the __reduce__ method) to permit bytes object unpickling. */
1040 PyObject *reduce_value = NULL;
1041 PyObject *bytelist = NULL;
1042 int status;
1043
1044 bytelist = PySequence_List(obj);
1045 if (bytelist == NULL)
1046 return -1;
1047
1048 reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
1049 bytelist);
1050 if (reduce_value == NULL) {
1051 Py_DECREF(bytelist);
1052 return -1;
1053 }
1054
1055 /* save_reduce() will memoize the object automatically. */
1056 status = save_reduce(self, reduce_value, obj);
1057 Py_DECREF(reduce_value);
1058 Py_DECREF(bytelist);
1059 return status;
1060 }
1061 else {
1062 Py_ssize_t size;
1063 char header[5];
1064 int len;
1065
1066 size = PyBytes_Size(obj);
1067 if (size < 0)
1068 return -1;
1069
1070 if (size < 256) {
1071 header[0] = SHORT_BINBYTES;
1072 header[1] = (unsigned char)size;
1073 len = 2;
1074 }
1075 else if (size <= 0xffffffffL) {
1076 header[0] = BINBYTES;
1077 header[1] = (unsigned char)(size & 0xff);
1078 header[2] = (unsigned char)((size >> 8) & 0xff);
1079 header[3] = (unsigned char)((size >> 16) & 0xff);
1080 header[4] = (unsigned char)((size >> 24) & 0xff);
1081 len = 5;
1082 }
1083 else {
1084 return -1; /* string too large */
1085 }
1086
1087 if (pickler_write(self, header, len) < 0)
1088 return -1;
1089
1090 if (pickler_write(self, PyBytes_AS_STRING(obj), size) < 0)
1091 return -1;
1092
1093 if (memo_put(self, obj) < 0)
1094 return -1;
1095
1096 return 0;
1097 }
1098}
1099
1100/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1101 backslash and newline characters to \uXXXX escapes. */
1102static PyObject *
1103raw_unicode_escape(const Py_UNICODE *s, Py_ssize_t size)
1104{
1105 PyObject *repr, *result;
1106 char *p;
1107 char *q;
1108
1109 static const char *hexdigits = "0123456789abcdef";
1110
1111#ifdef Py_UNICODE_WIDE
1112 repr = PyBytes_FromStringAndSize(NULL, 10 * size);
1113#else
1114 repr = PyBytes_FromStringAndSize(NULL, 6 * size);
1115#endif
1116 if (repr == NULL)
1117 return NULL;
1118 if (size == 0)
1119 goto done;
1120
1121 p = q = PyBytes_AS_STRING(repr);
1122 while (size-- > 0) {
1123 Py_UNICODE ch = *s++;
1124#ifdef Py_UNICODE_WIDE
1125 /* Map 32-bit characters to '\Uxxxxxxxx' */
1126 if (ch >= 0x10000) {
1127 *p++ = '\\';
1128 *p++ = 'U';
1129 *p++ = hexdigits[(ch >> 28) & 0xf];
1130 *p++ = hexdigits[(ch >> 24) & 0xf];
1131 *p++ = hexdigits[(ch >> 20) & 0xf];
1132 *p++ = hexdigits[(ch >> 16) & 0xf];
1133 *p++ = hexdigits[(ch >> 12) & 0xf];
1134 *p++ = hexdigits[(ch >> 8) & 0xf];
1135 *p++ = hexdigits[(ch >> 4) & 0xf];
1136 *p++ = hexdigits[ch & 15];
1137 }
1138 else
1139#endif
1140 /* Map 16-bit characters to '\uxxxx' */
1141 if (ch >= 256 || ch == '\\' || ch == '\n') {
1142 *p++ = '\\';
1143 *p++ = 'u';
1144 *p++ = hexdigits[(ch >> 12) & 0xf];
1145 *p++ = hexdigits[(ch >> 8) & 0xf];
1146 *p++ = hexdigits[(ch >> 4) & 0xf];
1147 *p++ = hexdigits[ch & 15];
1148 }
1149 /* Copy everything else as-is */
1150 else
1151 *p++ = (char) ch;
1152 }
1153 size = p - q;
1154
1155 done:
1156 result = PyBytes_FromStringAndSize(PyBytes_AS_STRING(repr), size);
1157 Py_DECREF(repr);
1158 return result;
1159}
1160
1161static int
1162save_unicode(PicklerObject *self, PyObject *obj)
1163{
1164 Py_ssize_t size;
1165 PyObject *encoded = NULL;
1166
1167 if (self->bin) {
1168 char pdata[5];
1169
1170 encoded = PyUnicode_AsUTF8String(obj);
1171 if (encoded == NULL)
1172 goto error;
1173
1174 size = PyBytes_GET_SIZE(encoded);
1175 if (size < 0 || size > 0xffffffffL)
1176 goto error; /* string too large */
1177
1178 pdata[0] = BINUNICODE;
1179 pdata[1] = (unsigned char)(size & 0xff);
1180 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1181 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1182 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1183
1184 if (pickler_write(self, pdata, 5) < 0)
1185 goto error;
1186
1187 if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
1188 goto error;
1189 }
1190 else {
1191 const char unicode_op = UNICODE;
1192
1193 encoded = raw_unicode_escape(PyUnicode_AS_UNICODE(obj),
1194 PyUnicode_GET_SIZE(obj));
1195 if (encoded == NULL)
1196 goto error;
1197
1198 if (pickler_write(self, &unicode_op, 1) < 0)
1199 goto error;
1200
1201 size = PyBytes_GET_SIZE(encoded);
1202 if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
1203 goto error;
1204
1205 if (pickler_write(self, "\n", 1) < 0)
1206 goto error;
1207 }
1208 if (memo_put(self, obj) < 0)
1209 goto error;
1210
1211 Py_DECREF(encoded);
1212 return 0;
1213
1214 error:
1215 Py_XDECREF(encoded);
1216 return -1;
1217}
1218
1219/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1220static int
1221store_tuple_elements(PicklerObject *self, PyObject *t, int len)
1222{
1223 int i;
1224
1225 assert(PyTuple_Size(t) == len);
1226
1227 for (i = 0; i < len; i++) {
1228 PyObject *element = PyTuple_GET_ITEM(t, i);
1229
1230 if (element == NULL)
1231 return -1;
1232 if (save(self, element, 0) < 0)
1233 return -1;
1234 }
1235
1236 return 0;
1237}
1238
1239/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1240 * used across protocols to minimize the space needed to pickle them.
1241 * Tuples are also the only builtin immutable type that can be recursive
1242 * (a tuple can be reached from itself), and that requires some subtle
1243 * magic so that it works in all cases. IOW, this is a long routine.
1244 */
1245static int
1246save_tuple(PicklerObject *self, PyObject *obj)
1247{
1248 PyObject *memo_key = NULL;
1249 int len, i;
1250 int status = 0;
1251
1252 const char mark_op = MARK;
1253 const char tuple_op = TUPLE;
1254 const char pop_op = POP;
1255 const char pop_mark_op = POP_MARK;
1256 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
1257
1258 if ((len = PyTuple_Size(obj)) < 0)
1259 return -1;
1260
1261 if (len == 0) {
1262 char pdata[2];
1263
1264 if (self->proto) {
1265 pdata[0] = EMPTY_TUPLE;
1266 len = 1;
1267 }
1268 else {
1269 pdata[0] = MARK;
1270 pdata[1] = TUPLE;
1271 len = 2;
1272 }
1273 if (pickler_write(self, pdata, len) < 0)
1274 return -1;
1275 return 0;
1276 }
1277
1278 /* id(tuple) isn't in the memo now. If it shows up there after
1279 * saving the tuple elements, the tuple must be recursive, in
1280 * which case we'll pop everything we put on the stack, and fetch
1281 * its value from the memo.
1282 */
1283 memo_key = PyLong_FromVoidPtr(obj);
1284 if (memo_key == NULL)
1285 return -1;
1286
1287 if (len <= 3 && self->proto >= 2) {
1288 /* Use TUPLE{1,2,3} opcodes. */
1289 if (store_tuple_elements(self, obj, len) < 0)
1290 goto error;
1291
1292 if (PyDict_GetItem(self->memo, memo_key)) {
1293 /* pop the len elements */
1294 for (i = 0; i < len; i++)
1295 if (pickler_write(self, &pop_op, 1) < 0)
1296 goto error;
1297 /* fetch from memo */
1298 if (memo_get(self, memo_key) < 0)
1299 goto error;
1300
1301 Py_DECREF(memo_key);
1302 return 0;
1303 }
1304 else { /* Not recursive. */
1305 if (pickler_write(self, len2opcode + len, 1) < 0)
1306 goto error;
1307 }
1308 goto memoize;
1309 }
1310
1311 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
1312 * Generate MARK e1 e2 ... TUPLE
1313 */
1314 if (pickler_write(self, &mark_op, 1) < 0)
1315 goto error;
1316
1317 if (store_tuple_elements(self, obj, len) < 0)
1318 goto error;
1319
1320 if (PyDict_GetItem(self->memo, memo_key)) {
1321 /* pop the stack stuff we pushed */
1322 if (self->bin) {
1323 if (pickler_write(self, &pop_mark_op, 1) < 0)
1324 goto error;
1325 }
1326 else {
1327 /* Note that we pop one more than len, to remove
1328 * the MARK too.
1329 */
1330 for (i = 0; i <= len; i++)
1331 if (pickler_write(self, &pop_op, 1) < 0)
1332 goto error;
1333 }
1334 /* fetch from memo */
1335 if (memo_get(self, memo_key) < 0)
1336 goto error;
1337
1338 Py_DECREF(memo_key);
1339 return 0;
1340 }
1341 else { /* Not recursive. */
1342 if (pickler_write(self, &tuple_op, 1) < 0)
1343 goto error;
1344 }
1345
1346 memoize:
1347 if (memo_put(self, obj) < 0)
1348 goto error;
1349
1350 if (0) {
1351 error:
1352 status = -1;
1353 }
1354
1355 Py_DECREF(memo_key);
1356 return status;
1357}
1358
1359/* iter is an iterator giving items, and we batch up chunks of
1360 * MARK item item ... item APPENDS
1361 * opcode sequences. Calling code should have arranged to first create an
1362 * empty list, or list-like object, for the APPENDS to operate on.
1363 * Returns 0 on success, <0 on error.
1364 */
1365static int
1366batch_list(PicklerObject *self, PyObject *iter)
1367{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001368 PyObject *obj = NULL;
1369 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001370 int i, n;
1371
1372 const char mark_op = MARK;
1373 const char append_op = APPEND;
1374 const char appends_op = APPENDS;
1375
1376 assert(iter != NULL);
1377
1378 /* XXX: I think this function could be made faster by avoiding the
1379 iterator interface and fetching objects directly from list using
1380 PyList_GET_ITEM.
1381 */
1382
1383 if (self->proto == 0) {
1384 /* APPENDS isn't available; do one at a time. */
1385 for (;;) {
1386 obj = PyIter_Next(iter);
1387 if (obj == NULL) {
1388 if (PyErr_Occurred())
1389 return -1;
1390 break;
1391 }
1392 i = save(self, obj, 0);
1393 Py_DECREF(obj);
1394 if (i < 0)
1395 return -1;
1396 if (pickler_write(self, &append_op, 1) < 0)
1397 return -1;
1398 }
1399 return 0;
1400 }
1401
1402 /* proto > 0: write in batches of BATCHSIZE. */
1403 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001404 /* Get first item */
1405 firstitem = PyIter_Next(iter);
1406 if (firstitem == NULL) {
1407 if (PyErr_Occurred())
1408 goto error;
1409
1410 /* nothing more to add */
1411 break;
1412 }
1413
1414 /* Try to get a second item */
1415 obj = PyIter_Next(iter);
1416 if (obj == NULL) {
1417 if (PyErr_Occurred())
1418 goto error;
1419
1420 /* Only one item to write */
1421 if (save(self, firstitem, 0) < 0)
1422 goto error;
1423 if (pickler_write(self, &append_op, 1) < 0)
1424 goto error;
1425 Py_CLEAR(firstitem);
1426 break;
1427 }
1428
1429 /* More than one item to write */
1430
1431 /* Pump out MARK, items, APPENDS. */
1432 if (pickler_write(self, &mark_op, 1) < 0)
1433 goto error;
1434
1435 if (save(self, firstitem, 0) < 0)
1436 goto error;
1437 Py_CLEAR(firstitem);
1438 n = 1;
1439
1440 /* Fetch and save up to BATCHSIZE items */
1441 while (obj) {
1442 if (save(self, obj, 0) < 0)
1443 goto error;
1444 Py_CLEAR(obj);
1445 n += 1;
1446
1447 if (n == BATCHSIZE)
1448 break;
1449
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001450 obj = PyIter_Next(iter);
1451 if (obj == NULL) {
1452 if (PyErr_Occurred())
1453 goto error;
1454 break;
1455 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001456 }
1457
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001458 if (pickler_write(self, &appends_op, 1) < 0)
1459 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001460
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001461 } while (n == BATCHSIZE);
1462 return 0;
1463
1464 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001465 Py_XDECREF(firstitem);
1466 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001467 return -1;
1468}
1469
1470static int
1471save_list(PicklerObject *self, PyObject *obj)
1472{
1473 PyObject *iter;
1474 char header[3];
1475 int len;
1476 int status = 0;
1477
1478 if (self->fast && !fast_save_enter(self, obj))
1479 goto error;
1480
1481 /* Create an empty list. */
1482 if (self->bin) {
1483 header[0] = EMPTY_LIST;
1484 len = 1;
1485 }
1486 else {
1487 header[0] = MARK;
1488 header[1] = LIST;
1489 len = 2;
1490 }
1491
1492 if (pickler_write(self, header, len) < 0)
1493 goto error;
1494
1495 /* Get list length, and bow out early if empty. */
1496 if ((len = PyList_Size(obj)) < 0)
1497 goto error;
1498
1499 if (memo_put(self, obj) < 0)
1500 goto error;
1501
1502 if (len != 0) {
1503 /* Save the list elements. */
1504 iter = PyObject_GetIter(obj);
1505 if (iter == NULL)
1506 goto error;
1507 status = batch_list(self, iter);
1508 Py_DECREF(iter);
1509 }
1510
1511 if (0) {
1512 error:
1513 status = -1;
1514 }
1515
1516 if (self->fast && !fast_save_leave(self, obj))
1517 status = -1;
1518
1519 return status;
1520}
1521
1522/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
1523 * MARK key value ... key value SETITEMS
1524 * opcode sequences. Calling code should have arranged to first create an
1525 * empty dict, or dict-like object, for the SETITEMS to operate on.
1526 * Returns 0 on success, <0 on error.
1527 *
1528 * This is very much like batch_list(). The difference between saving
1529 * elements directly, and picking apart two-tuples, is so long-winded at
1530 * the C level, though, that attempts to combine these routines were too
1531 * ugly to bear.
1532 */
1533static int
1534batch_dict(PicklerObject *self, PyObject *iter)
1535{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001536 PyObject *obj = NULL;
1537 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001538 int i, n;
1539
1540 const char mark_op = MARK;
1541 const char setitem_op = SETITEM;
1542 const char setitems_op = SETITEMS;
1543
1544 assert(iter != NULL);
1545
1546 if (self->proto == 0) {
1547 /* SETITEMS isn't available; do one at a time. */
1548 for (;;) {
1549 obj = PyIter_Next(iter);
1550 if (obj == NULL) {
1551 if (PyErr_Occurred())
1552 return -1;
1553 break;
1554 }
1555 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
1556 PyErr_SetString(PyExc_TypeError, "dict items "
1557 "iterator must return 2-tuples");
1558 return -1;
1559 }
1560 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
1561 if (i >= 0)
1562 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
1563 Py_DECREF(obj);
1564 if (i < 0)
1565 return -1;
1566 if (pickler_write(self, &setitem_op, 1) < 0)
1567 return -1;
1568 }
1569 return 0;
1570 }
1571
1572 /* proto > 0: write in batches of BATCHSIZE. */
1573 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001574 /* Get first item */
1575 firstitem = PyIter_Next(iter);
1576 if (firstitem == NULL) {
1577 if (PyErr_Occurred())
1578 goto error;
1579
1580 /* nothing more to add */
1581 break;
1582 }
1583 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
1584 PyErr_SetString(PyExc_TypeError, "dict items "
1585 "iterator must return 2-tuples");
1586 goto error;
1587 }
1588
1589 /* Try to get a second item */
1590 obj = PyIter_Next(iter);
1591 if (obj == NULL) {
1592 if (PyErr_Occurred())
1593 goto error;
1594
1595 /* Only one item to write */
1596 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
1597 goto error;
1598 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
1599 goto error;
1600 if (pickler_write(self, &setitem_op, 1) < 0)
1601 goto error;
1602 Py_CLEAR(firstitem);
1603 break;
1604 }
1605
1606 /* More than one item to write */
1607
1608 /* Pump out MARK, items, SETITEMS. */
1609 if (pickler_write(self, &mark_op, 1) < 0)
1610 goto error;
1611
1612 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
1613 goto error;
1614 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
1615 goto error;
1616 Py_CLEAR(firstitem);
1617 n = 1;
1618
1619 /* Fetch and save up to BATCHSIZE items */
1620 while (obj) {
1621 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
1622 PyErr_SetString(PyExc_TypeError, "dict items "
1623 "iterator must return 2-tuples");
1624 goto error;
1625 }
1626 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
1627 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
1628 goto error;
1629 Py_CLEAR(obj);
1630 n += 1;
1631
1632 if (n == BATCHSIZE)
1633 break;
1634
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001635 obj = PyIter_Next(iter);
1636 if (obj == NULL) {
1637 if (PyErr_Occurred())
1638 goto error;
1639 break;
1640 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001641 }
1642
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001643 if (pickler_write(self, &setitems_op, 1) < 0)
1644 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001645
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001646 } while (n == BATCHSIZE);
1647 return 0;
1648
1649 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001650 Py_XDECREF(firstitem);
1651 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001652 return -1;
1653}
1654
1655static int
1656save_dict(PicklerObject *self, PyObject *obj)
1657{
1658 PyObject *items, *iter;
1659 char header[3];
1660 int len;
1661 int status = 0;
1662
1663 if (self->fast && !fast_save_enter(self, obj))
1664 goto error;
1665
1666 /* Create an empty dict. */
1667 if (self->bin) {
1668 header[0] = EMPTY_DICT;
1669 len = 1;
1670 }
1671 else {
1672 header[0] = MARK;
1673 header[1] = DICT;
1674 len = 2;
1675 }
1676
1677 if (pickler_write(self, header, len) < 0)
1678 goto error;
1679
1680 /* Get dict size, and bow out early if empty. */
1681 if ((len = PyDict_Size(obj)) < 0)
1682 goto error;
1683
1684 if (memo_put(self, obj) < 0)
1685 goto error;
1686
1687 if (len != 0) {
1688 /* Save the dict items. */
1689 items = PyObject_CallMethod(obj, "items", "()");
1690 if (items == NULL)
1691 goto error;
1692 iter = PyObject_GetIter(items);
1693 Py_DECREF(items);
1694 if (iter == NULL)
1695 goto error;
1696 status = batch_dict(self, iter);
1697 Py_DECREF(iter);
1698 }
1699
1700 if (0) {
1701 error:
1702 status = -1;
1703 }
1704
1705 if (self->fast && !fast_save_leave(self, obj))
1706 status = -1;
1707
1708 return status;
1709}
1710
1711static int
1712save_global(PicklerObject *self, PyObject *obj, PyObject *name)
1713{
1714 static PyObject *name_str = NULL;
1715 PyObject *global_name = NULL;
1716 PyObject *module_name = NULL;
1717 PyObject *module = NULL;
1718 PyObject *cls;
1719 int status = 0;
1720
1721 const char global_op = GLOBAL;
1722
1723 if (name_str == NULL) {
1724 name_str = PyUnicode_InternFromString("__name__");
1725 if (name_str == NULL)
1726 goto error;
1727 }
1728
1729 if (name) {
1730 global_name = name;
1731 Py_INCREF(global_name);
1732 }
1733 else {
1734 global_name = PyObject_GetAttr(obj, name_str);
1735 if (global_name == NULL)
1736 goto error;
1737 }
1738
1739 module_name = whichmodule(obj, global_name);
1740 if (module_name == NULL)
1741 goto error;
1742
1743 /* XXX: Change to use the import C API directly with level=0 to disallow
1744 relative imports.
1745
1746 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
1747 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
1748 custom import functions (IMHO, this would be a nice security
1749 feature). The import C API would need to be extended to support the
1750 extra parameters of __import__ to fix that. */
1751 module = PyImport_Import(module_name);
1752 if (module == NULL) {
1753 PyErr_Format(PicklingError,
1754 "Can't pickle %R: import of module %R failed",
1755 obj, module_name);
1756 goto error;
1757 }
1758 cls = PyObject_GetAttr(module, global_name);
1759 if (cls == NULL) {
1760 PyErr_Format(PicklingError,
1761 "Can't pickle %R: attribute lookup %S.%S failed",
1762 obj, module_name, global_name);
1763 goto error;
1764 }
1765 if (cls != obj) {
1766 Py_DECREF(cls);
1767 PyErr_Format(PicklingError,
1768 "Can't pickle %R: it's not the same object as %S.%S",
1769 obj, module_name, global_name);
1770 goto error;
1771 }
1772 Py_DECREF(cls);
1773
1774 if (self->proto >= 2) {
1775 /* See whether this is in the extension registry, and if
1776 * so generate an EXT opcode.
1777 */
1778 PyObject *code_obj; /* extension code as Python object */
1779 long code; /* extension code as C value */
1780 char pdata[5];
1781 int n;
1782
1783 PyTuple_SET_ITEM(two_tuple, 0, module_name);
1784 PyTuple_SET_ITEM(two_tuple, 1, global_name);
1785 code_obj = PyDict_GetItem(extension_registry, two_tuple);
1786 /* The object is not registered in the extension registry.
1787 This is the most likely code path. */
1788 if (code_obj == NULL)
1789 goto gen_global;
1790
1791 /* XXX: pickle.py doesn't check neither the type, nor the range
1792 of the value returned by the extension_registry. It should for
1793 consistency. */
1794
1795 /* Verify code_obj has the right type and value. */
1796 if (!PyLong_Check(code_obj)) {
1797 PyErr_Format(PicklingError,
1798 "Can't pickle %R: extension code %R isn't an integer",
1799 obj, code_obj);
1800 goto error;
1801 }
1802 code = PyLong_AS_LONG(code_obj);
1803 if (code <= 0 || code > 0x7fffffffL) {
1804 PyErr_Format(PicklingError,
1805 "Can't pickle %R: extension code %ld is out of range",
1806 obj, code);
1807 goto error;
1808 }
1809
1810 /* Generate an EXT opcode. */
1811 if (code <= 0xff) {
1812 pdata[0] = EXT1;
1813 pdata[1] = (unsigned char)code;
1814 n = 2;
1815 }
1816 else if (code <= 0xffff) {
1817 pdata[0] = EXT2;
1818 pdata[1] = (unsigned char)(code & 0xff);
1819 pdata[2] = (unsigned char)((code >> 8) & 0xff);
1820 n = 3;
1821 }
1822 else {
1823 pdata[0] = EXT4;
1824 pdata[1] = (unsigned char)(code & 0xff);
1825 pdata[2] = (unsigned char)((code >> 8) & 0xff);
1826 pdata[3] = (unsigned char)((code >> 16) & 0xff);
1827 pdata[4] = (unsigned char)((code >> 24) & 0xff);
1828 n = 5;
1829 }
1830
1831 if (pickler_write(self, pdata, n) < 0)
1832 goto error;
1833 }
1834 else {
1835 /* Generate a normal global opcode if we are using a pickle
1836 protocol <= 2, or if the object is not registered in the
1837 extension registry. */
1838 PyObject *encoded;
1839 PyObject *(*unicode_encoder)(PyObject *);
1840
1841 gen_global:
1842 if (pickler_write(self, &global_op, 1) < 0)
1843 goto error;
1844
1845 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
1846 the module name and the global name using UTF-8. We do so only when
1847 we are using the pickle protocol newer than version 3. This is to
1848 ensure compatibility with older Unpickler running on Python 2.x. */
1849 if (self->proto >= 3) {
1850 unicode_encoder = PyUnicode_AsUTF8String;
1851 }
1852 else {
1853 unicode_encoder = PyUnicode_AsASCIIString;
1854 }
1855
1856 /* Save the name of the module. */
1857 encoded = unicode_encoder(module_name);
1858 if (encoded == NULL) {
1859 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
1860 PyErr_Format(PicklingError,
1861 "can't pickle module identifier '%S' using "
1862 "pickle protocol %i", module_name, self->proto);
1863 goto error;
1864 }
1865 if (pickler_write(self, PyBytes_AS_STRING(encoded),
1866 PyBytes_GET_SIZE(encoded)) < 0) {
1867 Py_DECREF(encoded);
1868 goto error;
1869 }
1870 Py_DECREF(encoded);
1871 if(pickler_write(self, "\n", 1) < 0)
1872 goto error;
1873
1874 /* Save the name of the module. */
1875 encoded = unicode_encoder(global_name);
1876 if (encoded == NULL) {
1877 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
1878 PyErr_Format(PicklingError,
1879 "can't pickle global identifier '%S' using "
1880 "pickle protocol %i", global_name, self->proto);
1881 goto error;
1882 }
1883 if (pickler_write(self, PyBytes_AS_STRING(encoded),
1884 PyBytes_GET_SIZE(encoded)) < 0) {
1885 Py_DECREF(encoded);
1886 goto error;
1887 }
1888 Py_DECREF(encoded);
1889 if(pickler_write(self, "\n", 1) < 0)
1890 goto error;
1891
1892 /* Memoize the object. */
1893 if (memo_put(self, obj) < 0)
1894 goto error;
1895 }
1896
1897 if (0) {
1898 error:
1899 status = -1;
1900 }
1901 Py_XDECREF(module_name);
1902 Py_XDECREF(global_name);
1903 Py_XDECREF(module);
1904
1905 return status;
1906}
1907
1908static int
1909save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
1910{
1911 PyObject *pid = NULL;
1912 int status = 0;
1913
1914 const char persid_op = PERSID;
1915 const char binpersid_op = BINPERSID;
1916
1917 Py_INCREF(obj);
1918 pid = pickler_call(self, func, obj);
1919 if (pid == NULL)
1920 return -1;
1921
1922 if (pid != Py_None) {
1923 if (self->bin) {
1924 if (save(self, pid, 1) < 0 ||
1925 pickler_write(self, &binpersid_op, 1) < 0)
1926 goto error;
1927 }
1928 else {
1929 PyObject *pid_str = NULL;
1930 char *pid_ascii_bytes;
1931 Py_ssize_t size;
1932
1933 pid_str = PyObject_Str(pid);
1934 if (pid_str == NULL)
1935 goto error;
1936
1937 /* XXX: Should it check whether the persistent id only contains
1938 ASCII characters? And what if the pid contains embedded
1939 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001940 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001941 Py_DECREF(pid_str);
1942 if (pid_ascii_bytes == NULL)
1943 goto error;
1944
1945 if (pickler_write(self, &persid_op, 1) < 0 ||
1946 pickler_write(self, pid_ascii_bytes, size) < 0 ||
1947 pickler_write(self, "\n", 1) < 0)
1948 goto error;
1949 }
1950 status = 1;
1951 }
1952
1953 if (0) {
1954 error:
1955 status = -1;
1956 }
1957 Py_XDECREF(pid);
1958
1959 return status;
1960}
1961
1962/* We're saving obj, and args is the 2-thru-5 tuple returned by the
1963 * appropriate __reduce__ method for obj.
1964 */
1965static int
1966save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
1967{
1968 PyObject *callable;
1969 PyObject *argtup;
1970 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00001971 PyObject *listitems = Py_None;
1972 PyObject *dictitems = Py_None;
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00001973 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001974
1975 int use_newobj = self->proto >= 2;
1976
1977 const char reduce_op = REDUCE;
1978 const char build_op = BUILD;
1979 const char newobj_op = NEWOBJ;
1980
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00001981 size = PyTuple_Size(args);
1982 if (size < 2 || size > 5) {
1983 PyErr_SetString(PicklingError, "tuple returned by "
1984 "__reduce__ must contain 2 through 5 elements");
1985 return -1;
1986 }
1987
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001988 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
1989 &callable, &argtup, &state, &listitems, &dictitems))
1990 return -1;
1991
1992 if (!PyCallable_Check(callable)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00001993 PyErr_SetString(PicklingError, "first item of the tuple "
1994 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001995 return -1;
1996 }
1997 if (!PyTuple_Check(argtup)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00001998 PyErr_SetString(PicklingError, "second item of the tuple "
1999 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002000 return -1;
2001 }
2002
2003 if (state == Py_None)
2004 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002005
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002006 if (listitems == Py_None)
2007 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002008 else if (!PyIter_Check(listitems)) {
2009 PyErr_Format(PicklingError, "Fourth element of tuple"
2010 "returned by __reduce__ must be an iterator, not %s",
2011 Py_TYPE(listitems)->tp_name);
2012 return -1;
2013 }
2014
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002015 if (dictitems == Py_None)
2016 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002017 else if (!PyIter_Check(dictitems)) {
2018 PyErr_Format(PicklingError, "Fifth element of tuple"
2019 "returned by __reduce__ must be an iterator, not %s",
2020 Py_TYPE(dictitems)->tp_name);
2021 return -1;
2022 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002023
2024 /* Protocol 2 special case: if callable's name is __newobj__, use
2025 NEWOBJ. */
2026 if (use_newobj) {
2027 static PyObject *newobj_str = NULL;
2028 PyObject *name_str;
2029
2030 if (newobj_str == NULL) {
2031 newobj_str = PyUnicode_InternFromString("__newobj__");
2032 }
2033
2034 name_str = PyObject_GetAttrString(callable, "__name__");
2035 if (name_str == NULL) {
2036 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2037 PyErr_Clear();
2038 else
2039 return -1;
2040 use_newobj = 0;
2041 }
2042 else {
2043 use_newobj = PyUnicode_Check(name_str) &&
2044 PyUnicode_Compare(name_str, newobj_str) == 0;
2045 Py_DECREF(name_str);
2046 }
2047 }
2048 if (use_newobj) {
2049 PyObject *cls;
2050 PyObject *newargtup;
2051 PyObject *obj_class;
2052 int p;
2053
2054 /* Sanity checks. */
2055 if (Py_SIZE(argtup) < 1) {
2056 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
2057 return -1;
2058 }
2059
2060 cls = PyTuple_GET_ITEM(argtup, 0);
2061 if (!PyObject_HasAttrString(cls, "__new__")) {
2062 PyErr_SetString(PicklingError, "args[0] from "
2063 "__newobj__ args has no __new__");
2064 return -1;
2065 }
2066
2067 if (obj != NULL) {
2068 obj_class = PyObject_GetAttrString(obj, "__class__");
2069 if (obj_class == NULL) {
2070 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2071 PyErr_Clear();
2072 else
2073 return -1;
2074 }
2075 p = obj_class != cls; /* true iff a problem */
2076 Py_DECREF(obj_class);
2077 if (p) {
2078 PyErr_SetString(PicklingError, "args[0] from "
2079 "__newobj__ args has the wrong class");
2080 return -1;
2081 }
2082 }
2083 /* XXX: These calls save() are prone to infinite recursion. Imagine
2084 what happen if the value returned by the __reduce__() method of
2085 some extension type contains another object of the same type. Ouch!
2086
2087 Here is a quick example, that I ran into, to illustrate what I
2088 mean:
2089
2090 >>> import pickle, copyreg
2091 >>> copyreg.dispatch_table.pop(complex)
2092 >>> pickle.dumps(1+2j)
2093 Traceback (most recent call last):
2094 ...
2095 RuntimeError: maximum recursion depth exceeded
2096
2097 Removing the complex class from copyreg.dispatch_table made the
2098 __reduce_ex__() method emit another complex object:
2099
2100 >>> (1+1j).__reduce_ex__(2)
2101 (<function __newobj__ at 0xb7b71c3c>,
2102 (<class 'complex'>, (1+1j)), None, None, None)
2103
2104 Thus when save() was called on newargstup (the 2nd item) recursion
2105 ensued. Of course, the bug was in the complex class which had a
2106 broken __getnewargs__() that emitted another complex object. But,
2107 the point, here, is it is quite easy to end up with a broken reduce
2108 function. */
2109
2110 /* Save the class and its __new__ arguments. */
2111 if (save(self, cls, 0) < 0)
2112 return -1;
2113
2114 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
2115 if (newargtup == NULL)
2116 return -1;
2117
2118 p = save(self, newargtup, 0);
2119 Py_DECREF(newargtup);
2120 if (p < 0)
2121 return -1;
2122
2123 /* Add NEWOBJ opcode. */
2124 if (pickler_write(self, &newobj_op, 1) < 0)
2125 return -1;
2126 }
2127 else { /* Not using NEWOBJ. */
2128 if (save(self, callable, 0) < 0 ||
2129 save(self, argtup, 0) < 0 ||
2130 pickler_write(self, &reduce_op, 1) < 0)
2131 return -1;
2132 }
2133
2134 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
2135 the caller do not want to memoize the object. Not particularly useful,
2136 but that is to mimic the behavior save_reduce() in pickle.py when
2137 obj is None. */
2138 if (obj && memo_put(self, obj) < 0)
2139 return -1;
2140
2141 if (listitems && batch_list(self, listitems) < 0)
2142 return -1;
2143
2144 if (dictitems && batch_dict(self, dictitems) < 0)
2145 return -1;
2146
2147 if (state) {
2148 if (save(self, state, 0) < 0 ||
2149 pickler_write(self, &build_op, 1) < 0)
2150 return -1;
2151 }
2152
2153 return 0;
2154}
2155
2156static int
2157save(PicklerObject *self, PyObject *obj, int pers_save)
2158{
2159 PyTypeObject *type;
2160 PyObject *reduce_func = NULL;
2161 PyObject *reduce_value = NULL;
2162 PyObject *memo_key = NULL;
2163 int status = 0;
2164
Alexandre Vassalottidff18342008-07-13 18:48:30 +00002165 if (Py_EnterRecursiveCall(" while pickling an object") < 0)
2166 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002167
2168 /* The extra pers_save argument is necessary to avoid calling save_pers()
2169 on its returned object. */
2170 if (!pers_save && self->pers_func) {
2171 /* save_pers() returns:
2172 -1 to signal an error;
2173 0 if it did nothing successfully;
2174 1 if a persistent id was saved.
2175 */
2176 if ((status = save_pers(self, obj, self->pers_func)) != 0)
2177 goto done;
2178 }
2179
2180 type = Py_TYPE(obj);
2181
2182 /* XXX: The old cPickle had an optimization that used switch-case
2183 statement dispatching on the first letter of the type name. It was
2184 probably not a bad idea after all. If benchmarks shows that particular
2185 optimization had some real benefits, it would be nice to add it
2186 back. */
2187
2188 /* Atom types; these aren't memoized, so don't check the memo. */
2189
2190 if (obj == Py_None) {
2191 status = save_none(self, obj);
2192 goto done;
2193 }
2194 else if (obj == Py_False || obj == Py_True) {
2195 status = save_bool(self, obj);
2196 goto done;
2197 }
2198 else if (type == &PyLong_Type) {
2199 status = save_long(self, obj);
2200 goto done;
2201 }
2202 else if (type == &PyFloat_Type) {
2203 status = save_float(self, obj);
2204 goto done;
2205 }
2206
2207 /* Check the memo to see if it has the object. If so, generate
2208 a GET (or BINGET) opcode, instead of pickling the object
2209 once again. */
2210 memo_key = PyLong_FromVoidPtr(obj);
2211 if (memo_key == NULL)
2212 goto error;
2213 if (PyDict_GetItem(self->memo, memo_key)) {
2214 if (memo_get(self, memo_key) < 0)
2215 goto error;
2216 goto done;
2217 }
2218
2219 if (type == &PyBytes_Type) {
2220 status = save_bytes(self, obj);
2221 goto done;
2222 }
2223 else if (type == &PyUnicode_Type) {
2224 status = save_unicode(self, obj);
2225 goto done;
2226 }
2227 else if (type == &PyDict_Type) {
2228 status = save_dict(self, obj);
2229 goto done;
2230 }
2231 else if (type == &PyList_Type) {
2232 status = save_list(self, obj);
2233 goto done;
2234 }
2235 else if (type == &PyTuple_Type) {
2236 status = save_tuple(self, obj);
2237 goto done;
2238 }
2239 else if (type == &PyType_Type) {
2240 status = save_global(self, obj, NULL);
2241 goto done;
2242 }
2243 else if (type == &PyFunction_Type) {
2244 status = save_global(self, obj, NULL);
2245 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
2246 /* fall back to reduce */
2247 PyErr_Clear();
2248 }
2249 else {
2250 goto done;
2251 }
2252 }
2253 else if (type == &PyCFunction_Type) {
2254 status = save_global(self, obj, NULL);
2255 goto done;
2256 }
2257 else if (PyType_IsSubtype(type, &PyType_Type)) {
2258 status = save_global(self, obj, NULL);
2259 goto done;
2260 }
2261
2262 /* XXX: This part needs some unit tests. */
2263
2264 /* Get a reduction callable, and call it. This may come from
2265 * copyreg.dispatch_table, the object's __reduce_ex__ method,
2266 * or the object's __reduce__ method.
2267 */
2268 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
2269 if (reduce_func != NULL) {
2270 /* Here, the reference count of the reduce_func object returned by
2271 PyDict_GetItem needs to be increased to be consistent with the one
2272 returned by PyObject_GetAttr. This is allow us to blindly DECREF
2273 reduce_func at the end of the save() routine.
2274 */
2275 Py_INCREF(reduce_func);
2276 Py_INCREF(obj);
2277 reduce_value = pickler_call(self, reduce_func, obj);
2278 }
2279 else {
2280 static PyObject *reduce_str = NULL;
2281 static PyObject *reduce_ex_str = NULL;
2282
2283 /* Cache the name of the reduce methods. */
2284 if (reduce_str == NULL) {
2285 reduce_str = PyUnicode_InternFromString("__reduce__");
2286 if (reduce_str == NULL)
2287 goto error;
2288 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
2289 if (reduce_ex_str == NULL)
2290 goto error;
2291 }
2292
2293 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
2294 automatically defined as __reduce__. While this is convenient, this
2295 make it impossible to know which method was actually called. Of
2296 course, this is not a big deal. But still, it would be nice to let
2297 the user know which method was called when something go
2298 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
2299 don't actually have to check for a __reduce__ method. */
2300
2301 /* Check for a __reduce_ex__ method. */
2302 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
2303 if (reduce_func != NULL) {
2304 PyObject *proto;
2305 proto = PyLong_FromLong(self->proto);
2306 if (proto != NULL) {
2307 reduce_value = pickler_call(self, reduce_func, proto);
2308 }
2309 }
2310 else {
2311 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2312 PyErr_Clear();
2313 else
2314 goto error;
2315 /* Check for a __reduce__ method. */
2316 reduce_func = PyObject_GetAttr(obj, reduce_str);
2317 if (reduce_func != NULL) {
2318 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
2319 }
2320 else {
2321 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
2322 type->tp_name, obj);
2323 goto error;
2324 }
2325 }
2326 }
2327
2328 if (reduce_value == NULL)
2329 goto error;
2330
2331 if (PyUnicode_Check(reduce_value)) {
2332 status = save_global(self, obj, reduce_value);
2333 goto done;
2334 }
2335
2336 if (!PyTuple_Check(reduce_value)) {
2337 PyErr_SetString(PicklingError,
2338 "__reduce__ must return a string or tuple");
2339 goto error;
2340 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002341
2342 status = save_reduce(self, reduce_value, obj);
2343
2344 if (0) {
2345 error:
2346 status = -1;
2347 }
2348 done:
Alexandre Vassalottidff18342008-07-13 18:48:30 +00002349 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002350 Py_XDECREF(memo_key);
2351 Py_XDECREF(reduce_func);
2352 Py_XDECREF(reduce_value);
2353
2354 return status;
2355}
2356
2357static int
2358dump(PicklerObject *self, PyObject *obj)
2359{
2360 const char stop_op = STOP;
2361
2362 if (self->proto >= 2) {
2363 char header[2];
2364
2365 header[0] = PROTO;
2366 assert(self->proto >= 0 && self->proto < 256);
2367 header[1] = (unsigned char)self->proto;
2368 if (pickler_write(self, header, 2) < 0)
2369 return -1;
2370 }
2371
2372 if (save(self, obj, 0) < 0 ||
2373 pickler_write(self, &stop_op, 1) < 0 ||
2374 pickler_write(self, NULL, 0) < 0)
2375 return -1;
2376
2377 return 0;
2378}
2379
2380PyDoc_STRVAR(Pickler_clear_memo_doc,
2381"clear_memo() -> None. Clears the pickler's \"memo\"."
2382"\n"
2383"The memo is the data structure that remembers which objects the\n"
2384"pickler has already seen, so that shared or recursive objects are\n"
2385"pickled by reference and not by value. This method is useful when\n"
2386"re-using picklers.");
2387
2388static PyObject *
2389Pickler_clear_memo(PicklerObject *self)
2390{
2391 if (self->memo)
2392 PyDict_Clear(self->memo);
2393
2394 Py_RETURN_NONE;
2395}
2396
2397PyDoc_STRVAR(Pickler_dump_doc,
2398"dump(obj) -> None. Write a pickled representation of obj to the open file.");
2399
2400static PyObject *
2401Pickler_dump(PicklerObject *self, PyObject *args)
2402{
2403 PyObject *obj;
2404
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00002405 /* Check whether the Pickler was initialized correctly (issue3664).
2406 Developers often forget to call __init__() in their subclasses, which
2407 would trigger a segfault without this check. */
2408 if (self->write == NULL) {
2409 PyErr_Format(PicklingError,
2410 "Pickler.__init__() was not called by %s.__init__()",
2411 Py_TYPE(self)->tp_name);
2412 return NULL;
2413 }
2414
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002415 if (!PyArg_ParseTuple(args, "O:dump", &obj))
2416 return NULL;
2417
2418 if (dump(self, obj) < 0)
2419 return NULL;
2420
2421 Py_RETURN_NONE;
2422}
2423
2424static struct PyMethodDef Pickler_methods[] = {
2425 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
2426 Pickler_dump_doc},
2427 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
2428 Pickler_clear_memo_doc},
2429 {NULL, NULL} /* sentinel */
2430};
2431
2432static void
2433Pickler_dealloc(PicklerObject *self)
2434{
2435 PyObject_GC_UnTrack(self);
2436
2437 Py_XDECREF(self->write);
2438 Py_XDECREF(self->memo);
2439 Py_XDECREF(self->pers_func);
2440 Py_XDECREF(self->arg);
2441 Py_XDECREF(self->fast_memo);
2442
2443 PyMem_Free(self->write_buf);
2444
2445 Py_TYPE(self)->tp_free((PyObject *)self);
2446}
2447
2448static int
2449Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
2450{
2451 Py_VISIT(self->write);
2452 Py_VISIT(self->memo);
2453 Py_VISIT(self->pers_func);
2454 Py_VISIT(self->arg);
2455 Py_VISIT(self->fast_memo);
2456 return 0;
2457}
2458
2459static int
2460Pickler_clear(PicklerObject *self)
2461{
2462 Py_CLEAR(self->write);
2463 Py_CLEAR(self->memo);
2464 Py_CLEAR(self->pers_func);
2465 Py_CLEAR(self->arg);
2466 Py_CLEAR(self->fast_memo);
2467
2468 PyMem_Free(self->write_buf);
2469 self->write_buf = NULL;
2470
2471 return 0;
2472}
2473
2474PyDoc_STRVAR(Pickler_doc,
2475"Pickler(file, protocol=None)"
2476"\n"
2477"This takes a binary file for writing a pickle data stream.\n"
2478"\n"
2479"The optional protocol argument tells the pickler to use the\n"
2480"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
2481"protocol is 3; a backward-incompatible protocol designed for\n"
2482"Python 3.0.\n"
2483"\n"
2484"Specifying a negative protocol version selects the highest\n"
2485"protocol version supported. The higher the protocol used, the\n"
2486"more recent the version of Python needed to read the pickle\n"
2487"produced.\n"
2488"\n"
2489"The file argument must have a write() method that accepts a single\n"
2490"bytes argument. It can thus be a file object opened for binary\n"
2491"writing, a io.BytesIO instance, or any other custom object that\n"
2492"meets this interface.\n");
2493
2494static int
2495Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
2496{
2497 static char *kwlist[] = {"file", "protocol", 0};
2498 PyObject *file;
2499 PyObject *proto_obj = NULL;
2500 long proto = 0;
2501
2502 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:Pickler",
2503 kwlist, &file, &proto_obj))
2504 return -1;
2505
2506 /* In case of multiple __init__() calls, clear previous content. */
2507 if (self->write != NULL)
2508 (void)Pickler_clear(self);
2509
2510 if (proto_obj == NULL || proto_obj == Py_None)
2511 proto = DEFAULT_PROTOCOL;
2512 else
2513 proto = PyLong_AsLong(proto_obj);
2514
2515 if (proto < 0)
2516 proto = HIGHEST_PROTOCOL;
2517 if (proto > HIGHEST_PROTOCOL) {
2518 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
2519 HIGHEST_PROTOCOL);
2520 return -1;
2521 }
2522
2523 self->proto = proto;
2524 self->bin = proto > 0;
2525 self->arg = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002526 self->fast = 0;
2527 self->fast_nesting = 0;
2528 self->fast_memo = NULL;
2529
2530 if (!PyObject_HasAttrString(file, "write")) {
2531 PyErr_SetString(PyExc_TypeError,
2532 "file must have a 'write' attribute");
2533 return -1;
2534 }
2535 self->write = PyObject_GetAttrString(file, "write");
2536 if (self->write == NULL)
2537 return -1;
2538 self->buf_size = 0;
2539 self->write_buf = (char *)PyMem_Malloc(WRITE_BUF_SIZE);
2540 if (self->write_buf == NULL) {
2541 PyErr_NoMemory();
2542 return -1;
2543 }
2544 self->pers_func = NULL;
2545 if (PyObject_HasAttrString((PyObject *)self, "persistent_id")) {
2546 self->pers_func = PyObject_GetAttrString((PyObject *)self,
2547 "persistent_id");
2548 if (self->pers_func == NULL)
2549 return -1;
2550 }
2551 self->memo = PyDict_New();
2552 if (self->memo == NULL)
2553 return -1;
2554
2555 return 0;
2556}
2557
2558static PyObject *
2559Pickler_get_memo(PicklerObject *self)
2560{
2561 if (self->memo == NULL)
2562 PyErr_SetString(PyExc_AttributeError, "memo");
2563 else
2564 Py_INCREF(self->memo);
2565 return self->memo;
2566}
2567
2568static int
2569Pickler_set_memo(PicklerObject *self, PyObject *value)
2570{
2571 PyObject *tmp;
2572
2573 if (value == NULL) {
2574 PyErr_SetString(PyExc_TypeError,
2575 "attribute deletion is not supported");
2576 return -1;
2577 }
2578 if (!PyDict_Check(value)) {
2579 PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
2580 return -1;
2581 }
2582
2583 tmp = self->memo;
2584 Py_INCREF(value);
2585 self->memo = value;
2586 Py_XDECREF(tmp);
2587
2588 return 0;
2589}
2590
2591static PyObject *
2592Pickler_get_persid(PicklerObject *self)
2593{
2594 if (self->pers_func == NULL)
2595 PyErr_SetString(PyExc_AttributeError, "persistent_id");
2596 else
2597 Py_INCREF(self->pers_func);
2598 return self->pers_func;
2599}
2600
2601static int
2602Pickler_set_persid(PicklerObject *self, PyObject *value)
2603{
2604 PyObject *tmp;
2605
2606 if (value == NULL) {
2607 PyErr_SetString(PyExc_TypeError,
2608 "attribute deletion is not supported");
2609 return -1;
2610 }
2611 if (!PyCallable_Check(value)) {
2612 PyErr_SetString(PyExc_TypeError,
2613 "persistent_id must be a callable taking one argument");
2614 return -1;
2615 }
2616
2617 tmp = self->pers_func;
2618 Py_INCREF(value);
2619 self->pers_func = value;
2620 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
2621
2622 return 0;
2623}
2624
2625static PyMemberDef Pickler_members[] = {
2626 {"bin", T_INT, offsetof(PicklerObject, bin)},
2627 {"fast", T_INT, offsetof(PicklerObject, fast)},
2628 {NULL}
2629};
2630
2631static PyGetSetDef Pickler_getsets[] = {
2632 {"memo", (getter)Pickler_get_memo,
2633 (setter)Pickler_set_memo},
2634 {"persistent_id", (getter)Pickler_get_persid,
2635 (setter)Pickler_set_persid},
2636 {NULL}
2637};
2638
2639static PyTypeObject Pickler_Type = {
2640 PyVarObject_HEAD_INIT(NULL, 0)
2641 "_pickle.Pickler" , /*tp_name*/
2642 sizeof(PicklerObject), /*tp_basicsize*/
2643 0, /*tp_itemsize*/
2644 (destructor)Pickler_dealloc, /*tp_dealloc*/
2645 0, /*tp_print*/
2646 0, /*tp_getattr*/
2647 0, /*tp_setattr*/
2648 0, /*tp_compare*/
2649 0, /*tp_repr*/
2650 0, /*tp_as_number*/
2651 0, /*tp_as_sequence*/
2652 0, /*tp_as_mapping*/
2653 0, /*tp_hash*/
2654 0, /*tp_call*/
2655 0, /*tp_str*/
2656 0, /*tp_getattro*/
2657 0, /*tp_setattro*/
2658 0, /*tp_as_buffer*/
2659 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2660 Pickler_doc, /*tp_doc*/
2661 (traverseproc)Pickler_traverse, /*tp_traverse*/
2662 (inquiry)Pickler_clear, /*tp_clear*/
2663 0, /*tp_richcompare*/
2664 0, /*tp_weaklistoffset*/
2665 0, /*tp_iter*/
2666 0, /*tp_iternext*/
2667 Pickler_methods, /*tp_methods*/
2668 Pickler_members, /*tp_members*/
2669 Pickler_getsets, /*tp_getset*/
2670 0, /*tp_base*/
2671 0, /*tp_dict*/
2672 0, /*tp_descr_get*/
2673 0, /*tp_descr_set*/
2674 0, /*tp_dictoffset*/
2675 (initproc)Pickler_init, /*tp_init*/
2676 PyType_GenericAlloc, /*tp_alloc*/
2677 PyType_GenericNew, /*tp_new*/
2678 PyObject_GC_Del, /*tp_free*/
2679 0, /*tp_is_gc*/
2680};
2681
2682/* Temporary helper for calling self.find_class().
2683
2684 XXX: It would be nice to able to avoid Python function call overhead, by
2685 using directly the C version of find_class(), when find_class() is not
2686 overridden by a subclass. Although, this could become rather hackish. A
2687 simpler optimization would be to call the C function when self is not a
2688 subclass instance. */
2689static PyObject *
2690find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
2691{
2692 return PyObject_CallMethod((PyObject *)self, "find_class", "OO",
2693 module_name, global_name);
2694}
2695
2696static int
2697marker(UnpicklerObject *self)
2698{
2699 if (self->num_marks < 1) {
2700 PyErr_SetString(UnpicklingError, "could not find MARK");
2701 return -1;
2702 }
2703
2704 return self->marks[--self->num_marks];
2705}
2706
2707static int
2708load_none(UnpicklerObject *self)
2709{
2710 PDATA_APPEND(self->stack, Py_None, -1);
2711 return 0;
2712}
2713
2714static int
2715bad_readline(void)
2716{
2717 PyErr_SetString(UnpicklingError, "pickle data was truncated");
2718 return -1;
2719}
2720
2721static int
2722load_int(UnpicklerObject *self)
2723{
2724 PyObject *value;
2725 char *endptr, *s;
2726 Py_ssize_t len;
2727 long x;
2728
2729 if ((len = unpickler_readline(self, &s)) < 0)
2730 return -1;
2731 if (len < 2)
2732 return bad_readline();
2733
2734 errno = 0;
2735 /* XXX: Should the base argument of strtol() be explicitly set to 10? */
2736 x = strtol(s, &endptr, 0);
2737
2738 if (errno || (*endptr != '\n') || (endptr[1] != '\0')) {
2739 /* Hm, maybe we've got something long. Let's try reading
2740 * it as a Python long object. */
2741 errno = 0;
2742 /* XXX: Same thing about the base here. */
2743 value = PyLong_FromString(s, NULL, 0);
2744 if (value == NULL) {
2745 PyErr_SetString(PyExc_ValueError,
2746 "could not convert string to int");
2747 return -1;
2748 }
2749 }
2750 else {
2751 if (len == 3 && (x == 0 || x == 1)) {
2752 if ((value = PyBool_FromLong(x)) == NULL)
2753 return -1;
2754 }
2755 else {
2756 if ((value = PyLong_FromLong(x)) == NULL)
2757 return -1;
2758 }
2759 }
2760
2761 PDATA_PUSH(self->stack, value, -1);
2762 return 0;
2763}
2764
2765static int
2766load_bool(UnpicklerObject *self, PyObject *boolean)
2767{
2768 assert(boolean == Py_True || boolean == Py_False);
2769 PDATA_APPEND(self->stack, boolean, -1);
2770 return 0;
2771}
2772
2773/* s contains x bytes of a little-endian integer. Return its value as a
2774 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
2775 * int, but when x is 4 it's a signed one. This is an historical source
2776 * of x-platform bugs.
2777 */
2778static long
2779calc_binint(char *bytes, int size)
2780{
2781 unsigned char *s = (unsigned char *)bytes;
2782 int i = size;
2783 long x = 0;
2784
2785 for (i = 0; i < size; i++) {
2786 x |= (long)s[i] << (i * 8);
2787 }
2788
2789 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
2790 * is signed, so on a box with longs bigger than 4 bytes we need
2791 * to extend a BININT's sign bit to the full width.
2792 */
2793 if (SIZEOF_LONG > 4 && size == 4) {
2794 x |= -(x & (1L << 31));
2795 }
2796
2797 return x;
2798}
2799
2800static int
2801load_binintx(UnpicklerObject *self, char *s, int size)
2802{
2803 PyObject *value;
2804 long x;
2805
2806 x = calc_binint(s, size);
2807
2808 if ((value = PyLong_FromLong(x)) == NULL)
2809 return -1;
2810
2811 PDATA_PUSH(self->stack, value, -1);
2812 return 0;
2813}
2814
2815static int
2816load_binint(UnpicklerObject *self)
2817{
2818 char *s;
2819
2820 if (unpickler_read(self, &s, 4) < 0)
2821 return -1;
2822
2823 return load_binintx(self, s, 4);
2824}
2825
2826static int
2827load_binint1(UnpicklerObject *self)
2828{
2829 char *s;
2830
2831 if (unpickler_read(self, &s, 1) < 0)
2832 return -1;
2833
2834 return load_binintx(self, s, 1);
2835}
2836
2837static int
2838load_binint2(UnpicklerObject *self)
2839{
2840 char *s;
2841
2842 if (unpickler_read(self, &s, 2) < 0)
2843 return -1;
2844
2845 return load_binintx(self, s, 2);
2846}
2847
2848static int
2849load_long(UnpicklerObject *self)
2850{
2851 PyObject *value;
2852 char *s;
2853 Py_ssize_t len;
2854
2855 if ((len = unpickler_readline(self, &s)) < 0)
2856 return -1;
2857 if (len < 2)
2858 return bad_readline();
2859
2860 /* XXX: Should the base argument explicitly set to 10? */
2861 if ((value = PyLong_FromString(s, NULL, 0)) == NULL)
2862 return -1;
2863
2864 PDATA_PUSH(self->stack, value, -1);
2865 return 0;
2866}
2867
2868/* 'size' bytes contain the # of bytes of little-endian 256's-complement
2869 * data following.
2870 */
2871static int
2872load_counted_long(UnpicklerObject *self, int size)
2873{
2874 PyObject *value;
2875 char *nbytes;
2876 char *pdata;
2877
2878 assert(size == 1 || size == 4);
2879 if (unpickler_read(self, &nbytes, size) < 0)
2880 return -1;
2881
2882 size = calc_binint(nbytes, size);
2883 if (size < 0) {
2884 /* Corrupt or hostile pickle -- we never write one like this */
2885 PyErr_SetString(UnpicklingError,
2886 "LONG pickle has negative byte count");
2887 return -1;
2888 }
2889
2890 if (size == 0)
2891 value = PyLong_FromLong(0L);
2892 else {
2893 /* Read the raw little-endian bytes and convert. */
2894 if (unpickler_read(self, &pdata, size) < 0)
2895 return -1;
2896 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
2897 1 /* little endian */ , 1 /* signed */ );
2898 }
2899 if (value == NULL)
2900 return -1;
2901 PDATA_PUSH(self->stack, value, -1);
2902 return 0;
2903}
2904
2905static int
2906load_float(UnpicklerObject *self)
2907{
2908 PyObject *value;
2909 char *endptr, *s;
2910 Py_ssize_t len;
2911 double d;
2912
2913 if ((len = unpickler_readline(self, &s)) < 0)
2914 return -1;
2915 if (len < 2)
2916 return bad_readline();
2917
2918 errno = 0;
2919 d = PyOS_ascii_strtod(s, &endptr);
2920
2921 if (errno || (endptr[0] != '\n') || (endptr[1] != '\0')) {
2922 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
2923 return -1;
2924 }
2925
2926 if ((value = PyFloat_FromDouble(d)) == NULL)
2927 return -1;
2928
2929 PDATA_PUSH(self->stack, value, -1);
2930 return 0;
2931}
2932
2933static int
2934load_binfloat(UnpicklerObject *self)
2935{
2936 PyObject *value;
2937 double x;
2938 char *s;
2939
2940 if (unpickler_read(self, &s, 8) < 0)
2941 return -1;
2942
2943 x = _PyFloat_Unpack8((unsigned char *)s, 0);
2944 if (x == -1.0 && PyErr_Occurred())
2945 return -1;
2946
2947 if ((value = PyFloat_FromDouble(x)) == NULL)
2948 return -1;
2949
2950 PDATA_PUSH(self->stack, value, -1);
2951 return 0;
2952}
2953
2954static int
2955load_string(UnpicklerObject *self)
2956{
2957 PyObject *bytes;
2958 PyObject *str = NULL;
2959 Py_ssize_t len;
2960 char *s, *p;
2961
2962 if ((len = unpickler_readline(self, &s)) < 0)
2963 return -1;
2964 if (len < 3)
2965 return bad_readline();
2966 if ((s = strdup(s)) == NULL) {
2967 PyErr_NoMemory();
2968 return -1;
2969 }
2970
2971 /* Strip outermost quotes */
2972 while (s[len - 1] <= ' ')
2973 len--;
2974 if (s[0] == '"' && s[len - 1] == '"') {
2975 s[len - 1] = '\0';
2976 p = s + 1;
2977 len -= 2;
2978 }
2979 else if (s[0] == '\'' && s[len - 1] == '\'') {
2980 s[len - 1] = '\0';
2981 p = s + 1;
2982 len -= 2;
2983 }
2984 else {
2985 free(s);
2986 PyErr_SetString(PyExc_ValueError, "insecure string pickle");
2987 return -1;
2988 }
2989
2990 /* Use the PyBytes API to decode the string, since that is what is used
2991 to encode, and then coerce the result to Unicode. */
2992 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
2993 free(s);
2994 if (bytes == NULL)
2995 return -1;
2996 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
2997 Py_DECREF(bytes);
2998 if (str == NULL)
2999 return -1;
3000
3001 PDATA_PUSH(self->stack, str, -1);
3002 return 0;
3003}
3004
3005static int
3006load_binbytes(UnpicklerObject *self)
3007{
3008 PyObject *bytes;
3009 long x;
3010 char *s;
3011
3012 if (unpickler_read(self, &s, 4) < 0)
3013 return -1;
3014
3015 x = calc_binint(s, 4);
3016 if (x < 0) {
3017 PyErr_SetString(UnpicklingError,
3018 "BINBYTES pickle has negative byte count");
3019 return -1;
3020 }
3021
3022 if (unpickler_read(self, &s, x) < 0)
3023 return -1;
3024 bytes = PyBytes_FromStringAndSize(s, x);
3025 if (bytes == NULL)
3026 return -1;
3027
3028 PDATA_PUSH(self->stack, bytes, -1);
3029 return 0;
3030}
3031
3032static int
3033load_short_binbytes(UnpicklerObject *self)
3034{
3035 PyObject *bytes;
3036 unsigned char x;
3037 char *s;
3038
3039 if (unpickler_read(self, &s, 1) < 0)
3040 return -1;
3041
3042 x = (unsigned char)s[0];
3043
3044 if (unpickler_read(self, &s, x) < 0)
3045 return -1;
3046
3047 bytes = PyBytes_FromStringAndSize(s, x);
3048 if (bytes == NULL)
3049 return -1;
3050
3051 PDATA_PUSH(self->stack, bytes, -1);
3052 return 0;
3053}
3054
3055static int
3056load_binstring(UnpicklerObject *self)
3057{
3058 PyObject *str;
3059 long x;
3060 char *s;
3061
3062 if (unpickler_read(self, &s, 4) < 0)
3063 return -1;
3064
3065 x = calc_binint(s, 4);
3066 if (x < 0) {
3067 PyErr_SetString(UnpicklingError,
3068 "BINSTRING pickle has negative byte count");
3069 return -1;
3070 }
3071
3072 if (unpickler_read(self, &s, x) < 0)
3073 return -1;
3074
3075 /* Convert Python 2.x strings to unicode. */
3076 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
3077 if (str == NULL)
3078 return -1;
3079
3080 PDATA_PUSH(self->stack, str, -1);
3081 return 0;
3082}
3083
3084static int
3085load_short_binstring(UnpicklerObject *self)
3086{
3087 PyObject *str;
3088 unsigned char x;
3089 char *s;
3090
3091 if (unpickler_read(self, &s, 1) < 0)
3092 return -1;
3093
3094 x = (unsigned char)s[0];
3095
3096 if (unpickler_read(self, &s, x) < 0)
3097 return -1;
3098
3099 /* Convert Python 2.x strings to unicode. */
3100 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
3101 if (str == NULL)
3102 return -1;
3103
3104 PDATA_PUSH(self->stack, str, -1);
3105 return 0;
3106}
3107
3108static int
3109load_unicode(UnpicklerObject *self)
3110{
3111 PyObject *str;
3112 Py_ssize_t len;
3113 char *s;
3114
3115 if ((len = unpickler_readline(self, &s)) < 0)
3116 return -1;
3117 if (len < 1)
3118 return bad_readline();
3119
3120 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
3121 if (str == NULL)
3122 return -1;
3123
3124 PDATA_PUSH(self->stack, str, -1);
3125 return 0;
3126}
3127
3128static int
3129load_binunicode(UnpicklerObject *self)
3130{
3131 PyObject *str;
3132 long size;
3133 char *s;
3134
3135 if (unpickler_read(self, &s, 4) < 0)
3136 return -1;
3137
3138 size = calc_binint(s, 4);
3139 if (size < 0) {
3140 PyErr_SetString(UnpicklingError,
3141 "BINUNICODE pickle has negative byte count");
3142 return -1;
3143 }
3144
3145 if (unpickler_read(self, &s, size) < 0)
3146 return -1;
3147
3148 str = PyUnicode_DecodeUTF8(s, size, NULL);
3149 if (str == NULL)
3150 return -1;
3151
3152 PDATA_PUSH(self->stack, str, -1);
3153 return 0;
3154}
3155
3156static int
3157load_tuple(UnpicklerObject *self)
3158{
3159 PyObject *tuple;
3160 int i;
3161
3162 if ((i = marker(self)) < 0)
3163 return -1;
3164
3165 tuple = Pdata_poptuple(self->stack, i);
3166 if (tuple == NULL)
3167 return -1;
3168 PDATA_PUSH(self->stack, tuple, -1);
3169 return 0;
3170}
3171
3172static int
3173load_counted_tuple(UnpicklerObject *self, int len)
3174{
3175 PyObject *tuple;
3176
3177 tuple = PyTuple_New(len);
3178 if (tuple == NULL)
3179 return -1;
3180
3181 while (--len >= 0) {
3182 PyObject *item;
3183
3184 PDATA_POP(self->stack, item);
3185 if (item == NULL)
3186 return -1;
3187 PyTuple_SET_ITEM(tuple, len, item);
3188 }
3189 PDATA_PUSH(self->stack, tuple, -1);
3190 return 0;
3191}
3192
3193static int
3194load_empty_list(UnpicklerObject *self)
3195{
3196 PyObject *list;
3197
3198 if ((list = PyList_New(0)) == NULL)
3199 return -1;
3200 PDATA_PUSH(self->stack, list, -1);
3201 return 0;
3202}
3203
3204static int
3205load_empty_dict(UnpicklerObject *self)
3206{
3207 PyObject *dict;
3208
3209 if ((dict = PyDict_New()) == NULL)
3210 return -1;
3211 PDATA_PUSH(self->stack, dict, -1);
3212 return 0;
3213}
3214
3215static int
3216load_list(UnpicklerObject *self)
3217{
3218 PyObject *list;
3219 int i;
3220
3221 if ((i = marker(self)) < 0)
3222 return -1;
3223
3224 list = Pdata_poplist(self->stack, i);
3225 if (list == NULL)
3226 return -1;
3227 PDATA_PUSH(self->stack, list, -1);
3228 return 0;
3229}
3230
3231static int
3232load_dict(UnpicklerObject *self)
3233{
3234 PyObject *dict, *key, *value;
3235 int i, j, k;
3236
3237 if ((i = marker(self)) < 0)
3238 return -1;
3239 j = self->stack->length;
3240
3241 if ((dict = PyDict_New()) == NULL)
3242 return -1;
3243
3244 for (k = i + 1; k < j; k += 2) {
3245 key = self->stack->data[k - 1];
3246 value = self->stack->data[k];
3247 if (PyDict_SetItem(dict, key, value) < 0) {
3248 Py_DECREF(dict);
3249 return -1;
3250 }
3251 }
3252 Pdata_clear(self->stack, i);
3253 PDATA_PUSH(self->stack, dict, -1);
3254 return 0;
3255}
3256
3257static PyObject *
3258instantiate(PyObject *cls, PyObject *args)
3259{
3260 PyObject *r = NULL;
3261
3262 /* XXX: The pickle.py module does not create instances this way when the
3263 args tuple is empty. See Unpickler._instantiate(). */
3264 if ((r = PyObject_CallObject(cls, args)))
3265 return r;
3266
3267 /* XXX: Is this still nescessary? */
3268 {
3269 PyObject *tp, *v, *tb, *tmp_value;
3270
3271 PyErr_Fetch(&tp, &v, &tb);
3272 tmp_value = v;
3273 /* NULL occurs when there was a KeyboardInterrupt */
3274 if (tmp_value == NULL)
3275 tmp_value = Py_None;
3276 if ((r = PyTuple_Pack(3, tmp_value, cls, args))) {
3277 Py_XDECREF(v);
3278 v = r;
3279 }
3280 PyErr_Restore(tp, v, tb);
3281 }
3282 return NULL;
3283}
3284
3285static int
3286load_obj(UnpicklerObject *self)
3287{
3288 PyObject *cls, *args, *obj = NULL;
3289 int i;
3290
3291 if ((i = marker(self)) < 0)
3292 return -1;
3293
3294 args = Pdata_poptuple(self->stack, i + 1);
3295 if (args == NULL)
3296 return -1;
3297
3298 PDATA_POP(self->stack, cls);
3299 if (cls) {
3300 obj = instantiate(cls, args);
3301 Py_DECREF(cls);
3302 }
3303 Py_DECREF(args);
3304 if (obj == NULL)
3305 return -1;
3306
3307 PDATA_PUSH(self->stack, obj, -1);
3308 return 0;
3309}
3310
3311static int
3312load_inst(UnpicklerObject *self)
3313{
3314 PyObject *cls = NULL;
3315 PyObject *args = NULL;
3316 PyObject *obj = NULL;
3317 PyObject *module_name;
3318 PyObject *class_name;
3319 Py_ssize_t len;
3320 int i;
3321 char *s;
3322
3323 if ((i = marker(self)) < 0)
3324 return -1;
3325 if ((len = unpickler_readline(self, &s)) < 0)
3326 return -1;
3327 if (len < 2)
3328 return bad_readline();
3329
3330 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
3331 identifiers are permitted in Python 3.0, since the INST opcode is only
3332 supported by older protocols on Python 2.x. */
3333 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
3334 if (module_name == NULL)
3335 return -1;
3336
3337 if ((len = unpickler_readline(self, &s)) >= 0) {
3338 if (len < 2)
3339 return bad_readline();
3340 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
3341 if (class_name == NULL) {
3342 cls = find_class(self, module_name, class_name);
3343 Py_DECREF(class_name);
3344 }
3345 }
3346 Py_DECREF(module_name);
3347
3348 if (cls == NULL)
3349 return -1;
3350
3351 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
3352 obj = instantiate(cls, args);
3353 Py_DECREF(args);
3354 }
3355 Py_DECREF(cls);
3356
3357 if (obj == NULL)
3358 return -1;
3359
3360 PDATA_PUSH(self->stack, obj, -1);
3361 return 0;
3362}
3363
3364static int
3365load_newobj(UnpicklerObject *self)
3366{
3367 PyObject *args = NULL;
3368 PyObject *clsraw = NULL;
3369 PyTypeObject *cls; /* clsraw cast to its true type */
3370 PyObject *obj;
3371
3372 /* Stack is ... cls argtuple, and we want to call
3373 * cls.__new__(cls, *argtuple).
3374 */
3375 PDATA_POP(self->stack, args);
3376 if (args == NULL)
3377 goto error;
3378 if (!PyTuple_Check(args)) {
3379 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
3380 goto error;
3381 }
3382
3383 PDATA_POP(self->stack, clsraw);
3384 cls = (PyTypeObject *)clsraw;
3385 if (cls == NULL)
3386 goto error;
3387 if (!PyType_Check(cls)) {
3388 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
3389 "isn't a type object");
3390 goto error;
3391 }
3392 if (cls->tp_new == NULL) {
3393 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
3394 "has NULL tp_new");
3395 goto error;
3396 }
3397
3398 /* Call __new__. */
3399 obj = cls->tp_new(cls, args, NULL);
3400 if (obj == NULL)
3401 goto error;
3402
3403 Py_DECREF(args);
3404 Py_DECREF(clsraw);
3405 PDATA_PUSH(self->stack, obj, -1);
3406 return 0;
3407
3408 error:
3409 Py_XDECREF(args);
3410 Py_XDECREF(clsraw);
3411 return -1;
3412}
3413
3414static int
3415load_global(UnpicklerObject *self)
3416{
3417 PyObject *global = NULL;
3418 PyObject *module_name;
3419 PyObject *global_name;
3420 Py_ssize_t len;
3421 char *s;
3422
3423 if ((len = unpickler_readline(self, &s)) < 0)
3424 return -1;
3425 if (len < 2)
3426 return bad_readline();
3427 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
3428 if (!module_name)
3429 return -1;
3430
3431 if ((len = unpickler_readline(self, &s)) >= 0) {
3432 if (len < 2) {
3433 Py_DECREF(module_name);
3434 return bad_readline();
3435 }
3436 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
3437 if (global_name) {
3438 global = find_class(self, module_name, global_name);
3439 Py_DECREF(global_name);
3440 }
3441 }
3442 Py_DECREF(module_name);
3443
3444 if (global == NULL)
3445 return -1;
3446 PDATA_PUSH(self->stack, global, -1);
3447 return 0;
3448}
3449
3450static int
3451load_persid(UnpicklerObject *self)
3452{
3453 PyObject *pid;
3454 Py_ssize_t len;
3455 char *s;
3456
3457 if (self->pers_func) {
3458 if ((len = unpickler_readline(self, &s)) < 0)
3459 return -1;
3460 if (len < 2)
3461 return bad_readline();
3462
3463 pid = PyBytes_FromStringAndSize(s, len - 1);
3464 if (pid == NULL)
3465 return -1;
3466
3467 /* Ugh... this does not leak since unpickler_call() steals the
3468 reference to pid first. */
3469 pid = unpickler_call(self, self->pers_func, pid);
3470 if (pid == NULL)
3471 return -1;
3472
3473 PDATA_PUSH(self->stack, pid, -1);
3474 return 0;
3475 }
3476 else {
3477 PyErr_SetString(UnpicklingError,
3478 "A load persistent id instruction was encountered,\n"
3479 "but no persistent_load function was specified.");
3480 return -1;
3481 }
3482}
3483
3484static int
3485load_binpersid(UnpicklerObject *self)
3486{
3487 PyObject *pid;
3488
3489 if (self->pers_func) {
3490 PDATA_POP(self->stack, pid);
3491 if (pid == NULL)
3492 return -1;
3493
3494 /* Ugh... this does not leak since unpickler_call() steals the
3495 reference to pid first. */
3496 pid = unpickler_call(self, self->pers_func, pid);
3497 if (pid == NULL)
3498 return -1;
3499
3500 PDATA_PUSH(self->stack, pid, -1);
3501 return 0;
3502 }
3503 else {
3504 PyErr_SetString(UnpicklingError,
3505 "A load persistent id instruction was encountered,\n"
3506 "but no persistent_load function was specified.");
3507 return -1;
3508 }
3509}
3510
3511static int
3512load_pop(UnpicklerObject *self)
3513{
3514 int len;
3515
3516 if ((len = self->stack->length) <= 0)
3517 return stack_underflow();
3518
3519 /* Note that we split the (pickle.py) stack into two stacks,
3520 * an object stack and a mark stack. We have to be clever and
3521 * pop the right one. We do this by looking at the top of the
3522 * mark stack.
3523 */
3524
3525 if ((self->num_marks > 0) && (self->marks[self->num_marks - 1] == len))
3526 self->num_marks--;
3527 else {
3528 len--;
3529 Py_DECREF(self->stack->data[len]);
3530 self->stack->length = len;
3531 }
3532
3533 return 0;
3534}
3535
3536static int
3537load_pop_mark(UnpicklerObject *self)
3538{
3539 int i;
3540
3541 if ((i = marker(self)) < 0)
3542 return -1;
3543
3544 Pdata_clear(self->stack, i);
3545
3546 return 0;
3547}
3548
3549static int
3550load_dup(UnpicklerObject *self)
3551{
3552 PyObject *last;
3553 int len;
3554
3555 if ((len = self->stack->length) <= 0)
3556 return stack_underflow();
3557 last = self->stack->data[len - 1];
3558 PDATA_APPEND(self->stack, last, -1);
3559 return 0;
3560}
3561
3562static int
3563load_get(UnpicklerObject *self)
3564{
3565 PyObject *key, *value;
3566 Py_ssize_t len;
3567 char *s;
3568
3569 if ((len = unpickler_readline(self, &s)) < 0)
3570 return -1;
3571 if (len < 2)
3572 return bad_readline();
3573
3574 key = PyLong_FromString(s, NULL, 10);
3575 if (key == NULL)
3576 return -1;
3577
3578 value = PyDict_GetItemWithError(self->memo, key);
3579 if (value == NULL) {
3580 if (!PyErr_Occurred())
3581 PyErr_SetObject(PyExc_KeyError, key);
3582 Py_DECREF(key);
3583 return -1;
3584 }
3585 Py_DECREF(key);
3586
3587 PDATA_APPEND(self->stack, value, -1);
3588 return 0;
3589}
3590
3591static int
3592load_binget(UnpicklerObject *self)
3593{
3594 PyObject *key, *value;
3595 char *s;
3596
3597 if (unpickler_read(self, &s, 1) < 0)
3598 return -1;
3599
3600 /* Here, the unsigned cast is necessary to avoid negative values. */
3601 key = PyLong_FromLong((long)(unsigned char)s[0]);
3602 if (key == NULL)
3603 return -1;
3604
3605 value = PyDict_GetItemWithError(self->memo, key);
3606 if (value == NULL) {
3607 if (!PyErr_Occurred())
3608 PyErr_SetObject(PyExc_KeyError, key);
3609 Py_DECREF(key);
3610 return -1;
3611 }
3612 Py_DECREF(key);
3613
3614 PDATA_APPEND(self->stack, value, -1);
3615 return 0;
3616}
3617
3618static int
3619load_long_binget(UnpicklerObject *self)
3620{
3621 PyObject *key, *value;
3622 char *s;
3623 long k;
3624
3625 if (unpickler_read(self, &s, 4) < 0)
3626 return -1;
3627
3628 k = (long)(unsigned char)s[0];
3629 k |= (long)(unsigned char)s[1] << 8;
3630 k |= (long)(unsigned char)s[2] << 16;
3631 k |= (long)(unsigned char)s[3] << 24;
3632
3633 key = PyLong_FromLong(k);
3634 if (key == NULL)
3635 return -1;
3636
3637 value = PyDict_GetItemWithError(self->memo, key);
3638 if (value == NULL) {
3639 if (!PyErr_Occurred())
3640 PyErr_SetObject(PyExc_KeyError, key);
3641 Py_DECREF(key);
3642 return -1;
3643 }
3644 Py_DECREF(key);
3645
3646 PDATA_APPEND(self->stack, value, -1);
3647 return 0;
3648}
3649
3650/* Push an object from the extension registry (EXT[124]). nbytes is
3651 * the number of bytes following the opcode, holding the index (code) value.
3652 */
3653static int
3654load_extension(UnpicklerObject *self, int nbytes)
3655{
3656 char *codebytes; /* the nbytes bytes after the opcode */
3657 long code; /* calc_binint returns long */
3658 PyObject *py_code; /* code as a Python int */
3659 PyObject *obj; /* the object to push */
3660 PyObject *pair; /* (module_name, class_name) */
3661 PyObject *module_name, *class_name;
3662
3663 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
3664 if (unpickler_read(self, &codebytes, nbytes) < 0)
3665 return -1;
3666 code = calc_binint(codebytes, nbytes);
3667 if (code <= 0) { /* note that 0 is forbidden */
3668 /* Corrupt or hostile pickle. */
3669 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
3670 return -1;
3671 }
3672
3673 /* Look for the code in the cache. */
3674 py_code = PyLong_FromLong(code);
3675 if (py_code == NULL)
3676 return -1;
3677 obj = PyDict_GetItem(extension_cache, py_code);
3678 if (obj != NULL) {
3679 /* Bingo. */
3680 Py_DECREF(py_code);
3681 PDATA_APPEND(self->stack, obj, -1);
3682 return 0;
3683 }
3684
3685 /* Look up the (module_name, class_name) pair. */
3686 pair = PyDict_GetItem(inverted_registry, py_code);
3687 if (pair == NULL) {
3688 Py_DECREF(py_code);
3689 PyErr_Format(PyExc_ValueError, "unregistered extension "
3690 "code %ld", code);
3691 return -1;
3692 }
3693 /* Since the extension registry is manipulable via Python code,
3694 * confirm that pair is really a 2-tuple of strings.
3695 */
3696 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
3697 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
3698 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
3699 Py_DECREF(py_code);
3700 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
3701 "isn't a 2-tuple of strings", code);
3702 return -1;
3703 }
3704 /* Load the object. */
3705 obj = find_class(self, module_name, class_name);
3706 if (obj == NULL) {
3707 Py_DECREF(py_code);
3708 return -1;
3709 }
3710 /* Cache code -> obj. */
3711 code = PyDict_SetItem(extension_cache, py_code, obj);
3712 Py_DECREF(py_code);
3713 if (code < 0) {
3714 Py_DECREF(obj);
3715 return -1;
3716 }
3717 PDATA_PUSH(self->stack, obj, -1);
3718 return 0;
3719}
3720
3721static int
3722load_put(UnpicklerObject *self)
3723{
3724 PyObject *key, *value;
3725 Py_ssize_t len;
3726 char *s;
3727 int x;
3728
3729 if ((len = unpickler_readline(self, &s)) < 0)
3730 return -1;
3731 if (len < 2)
3732 return bad_readline();
3733 if ((x = self->stack->length) <= 0)
3734 return stack_underflow();
3735
3736 key = PyLong_FromString(s, NULL, 10);
3737 if (key == NULL)
3738 return -1;
3739 value = self->stack->data[x - 1];
3740
3741 x = PyDict_SetItem(self->memo, key, value);
3742 Py_DECREF(key);
3743 return x;
3744}
3745
3746static int
3747load_binput(UnpicklerObject *self)
3748{
3749 PyObject *key, *value;
3750 char *s;
3751 int x;
3752
3753 if (unpickler_read(self, &s, 1) < 0)
3754 return -1;
3755 if ((x = self->stack->length) <= 0)
3756 return stack_underflow();
3757
3758 key = PyLong_FromLong((long)(unsigned char)s[0]);
3759 if (key == NULL)
3760 return -1;
3761 value = self->stack->data[x - 1];
3762
3763 x = PyDict_SetItem(self->memo, key, value);
3764 Py_DECREF(key);
3765 return x;
3766}
3767
3768static int
3769load_long_binput(UnpicklerObject *self)
3770{
3771 PyObject *key, *value;
3772 long k;
3773 char *s;
3774 int x;
3775
3776 if (unpickler_read(self, &s, 4) < 0)
3777 return -1;
3778 if ((x = self->stack->length) <= 0)
3779 return stack_underflow();
3780
3781 k = (long)(unsigned char)s[0];
3782 k |= (long)(unsigned char)s[1] << 8;
3783 k |= (long)(unsigned char)s[2] << 16;
3784 k |= (long)(unsigned char)s[3] << 24;
3785
3786 key = PyLong_FromLong(k);
3787 if (key == NULL)
3788 return -1;
3789 value = self->stack->data[x - 1];
3790
3791 x = PyDict_SetItem(self->memo, key, value);
3792 Py_DECREF(key);
3793 return x;
3794}
3795
3796static int
3797do_append(UnpicklerObject *self, int x)
3798{
3799 PyObject *value;
3800 PyObject *list;
3801 int len, i;
3802
3803 len = self->stack->length;
3804 if (x > len || x <= 0)
3805 return stack_underflow();
3806 if (len == x) /* nothing to do */
3807 return 0;
3808
3809 list = self->stack->data[x - 1];
3810
3811 if (PyList_Check(list)) {
3812 PyObject *slice;
3813 Py_ssize_t list_len;
3814
3815 slice = Pdata_poplist(self->stack, x);
3816 if (!slice)
3817 return -1;
3818 list_len = PyList_GET_SIZE(list);
3819 i = PyList_SetSlice(list, list_len, list_len, slice);
3820 Py_DECREF(slice);
3821 return i;
3822 }
3823 else {
3824 PyObject *append_func;
3825
3826 append_func = PyObject_GetAttrString(list, "append");
3827 if (append_func == NULL)
3828 return -1;
3829 for (i = x; i < len; i++) {
3830 PyObject *result;
3831
3832 value = self->stack->data[i];
3833 result = unpickler_call(self, append_func, value);
3834 if (result == NULL) {
3835 Pdata_clear(self->stack, i + 1);
3836 self->stack->length = x;
3837 return -1;
3838 }
3839 Py_DECREF(result);
3840 }
3841 self->stack->length = x;
3842 }
3843
3844 return 0;
3845}
3846
3847static int
3848load_append(UnpicklerObject *self)
3849{
3850 return do_append(self, self->stack->length - 1);
3851}
3852
3853static int
3854load_appends(UnpicklerObject *self)
3855{
3856 return do_append(self, marker(self));
3857}
3858
3859static int
3860do_setitems(UnpicklerObject *self, int x)
3861{
3862 PyObject *value, *key;
3863 PyObject *dict;
3864 int len, i;
3865 int status = 0;
3866
3867 len = self->stack->length;
3868 if (x > len || x <= 0)
3869 return stack_underflow();
3870 if (len == x) /* nothing to do */
3871 return 0;
3872 if ((len - x) % 2 != 0) {
3873 /* Currupt or hostile pickle -- we never write one like this. */
3874 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
3875 return -1;
3876 }
3877
3878 /* Here, dict does not actually need to be a PyDict; it could be anything
3879 that supports the __setitem__ attribute. */
3880 dict = self->stack->data[x - 1];
3881
3882 for (i = x + 1; i < len; i += 2) {
3883 key = self->stack->data[i - 1];
3884 value = self->stack->data[i];
3885 if (PyObject_SetItem(dict, key, value) < 0) {
3886 status = -1;
3887 break;
3888 }
3889 }
3890
3891 Pdata_clear(self->stack, x);
3892 return status;
3893}
3894
3895static int
3896load_setitem(UnpicklerObject *self)
3897{
3898 return do_setitems(self, self->stack->length - 2);
3899}
3900
3901static int
3902load_setitems(UnpicklerObject *self)
3903{
3904 return do_setitems(self, marker(self));
3905}
3906
3907static int
3908load_build(UnpicklerObject *self)
3909{
3910 PyObject *state, *inst, *slotstate;
3911 PyObject *setstate;
3912 int status = 0;
3913
3914 /* Stack is ... instance, state. We want to leave instance at
3915 * the stack top, possibly mutated via instance.__setstate__(state).
3916 */
3917 if (self->stack->length < 2)
3918 return stack_underflow();
3919
3920 PDATA_POP(self->stack, state);
3921 if (state == NULL)
3922 return -1;
3923
3924 inst = self->stack->data[self->stack->length - 1];
3925
3926 setstate = PyObject_GetAttrString(inst, "__setstate__");
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00003927 if (setstate == NULL) {
3928 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3929 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00003930 else {
3931 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00003932 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00003933 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003934 }
3935 else {
3936 PyObject *result;
3937
3938 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitroud79dc622008-09-05 00:03:33 +00003939 /* Ugh... this does not leak since unpickler_call() steals the
3940 reference to state first. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003941 result = unpickler_call(self, setstate, state);
3942 Py_DECREF(setstate);
3943 if (result == NULL)
3944 return -1;
3945 Py_DECREF(result);
3946 return 0;
3947 }
3948
3949 /* A default __setstate__. First see whether state embeds a
3950 * slot state dict too (a proto 2 addition).
3951 */
3952 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
3953 PyObject *tmp = state;
3954
3955 state = PyTuple_GET_ITEM(tmp, 0);
3956 slotstate = PyTuple_GET_ITEM(tmp, 1);
3957 Py_INCREF(state);
3958 Py_INCREF(slotstate);
3959 Py_DECREF(tmp);
3960 }
3961 else
3962 slotstate = NULL;
3963
3964 /* Set inst.__dict__ from the state dict (if any). */
3965 if (state != Py_None) {
3966 PyObject *dict;
3967
3968 if (!PyDict_Check(state)) {
3969 PyErr_SetString(UnpicklingError, "state is not a dictionary");
3970 goto error;
3971 }
3972 dict = PyObject_GetAttrString(inst, "__dict__");
3973 if (dict == NULL)
3974 goto error;
3975
3976 PyDict_Update(dict, state);
3977 Py_DECREF(dict);
3978 }
3979
3980 /* Also set instance attributes from the slotstate dict (if any). */
3981 if (slotstate != NULL) {
3982 PyObject *d_key, *d_value;
3983 Py_ssize_t i;
3984
3985 if (!PyDict_Check(slotstate)) {
3986 PyErr_SetString(UnpicklingError,
3987 "slot state is not a dictionary");
3988 goto error;
3989 }
3990 i = 0;
3991 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
3992 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
3993 goto error;
3994 }
3995 }
3996
3997 if (0) {
3998 error:
3999 status = -1;
4000 }
4001
4002 Py_DECREF(state);
4003 Py_XDECREF(slotstate);
4004 return status;
4005}
4006
4007static int
4008load_mark(UnpicklerObject *self)
4009{
4010
4011 /* Note that we split the (pickle.py) stack into two stacks, an
4012 * object stack and a mark stack. Here we push a mark onto the
4013 * mark stack.
4014 */
4015
4016 if ((self->num_marks + 1) >= self->marks_size) {
4017 size_t alloc;
4018 int *marks;
4019
4020 /* Use the size_t type to check for overflow. */
4021 alloc = ((size_t)self->num_marks << 1) + 20;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00004022 if (alloc > PY_SSIZE_T_MAX ||
4023 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004024 PyErr_NoMemory();
4025 return -1;
4026 }
4027
4028 if (self->marks == NULL)
4029 marks = (int *)PyMem_Malloc(alloc * sizeof(int));
4030 else
4031 marks = (int *)PyMem_Realloc(self->marks, alloc * sizeof(int));
4032 if (marks == NULL) {
4033 PyErr_NoMemory();
4034 return -1;
4035 }
4036 self->marks = marks;
4037 self->marks_size = (Py_ssize_t)alloc;
4038 }
4039
4040 self->marks[self->num_marks++] = self->stack->length;
4041
4042 return 0;
4043}
4044
4045static int
4046load_reduce(UnpicklerObject *self)
4047{
4048 PyObject *callable = NULL;
4049 PyObject *argtup = NULL;
4050 PyObject *obj = NULL;
4051
4052 PDATA_POP(self->stack, argtup);
4053 if (argtup == NULL)
4054 return -1;
4055 PDATA_POP(self->stack, callable);
4056 if (callable) {
4057 obj = instantiate(callable, argtup);
4058 Py_DECREF(callable);
4059 }
4060 Py_DECREF(argtup);
4061
4062 if (obj == NULL)
4063 return -1;
4064
4065 PDATA_PUSH(self->stack, obj, -1);
4066 return 0;
4067}
4068
4069/* Just raises an error if we don't know the protocol specified. PROTO
4070 * is the first opcode for protocols >= 2.
4071 */
4072static int
4073load_proto(UnpicklerObject *self)
4074{
4075 char *s;
4076 int i;
4077
4078 if (unpickler_read(self, &s, 1) < 0)
4079 return -1;
4080
4081 i = (unsigned char)s[0];
4082 if (i <= HIGHEST_PROTOCOL)
4083 return 0;
4084
4085 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
4086 return -1;
4087}
4088
4089static PyObject *
4090load(UnpicklerObject *self)
4091{
4092 PyObject *err;
4093 PyObject *value = NULL;
4094 char *s;
4095
4096 self->num_marks = 0;
4097 if (self->stack->length)
4098 Pdata_clear(self->stack, 0);
4099
4100 /* Convenient macros for the dispatch while-switch loop just below. */
4101#define OP(opcode, load_func) \
4102 case opcode: if (load_func(self) < 0) break; continue;
4103
4104#define OP_ARG(opcode, load_func, arg) \
4105 case opcode: if (load_func(self, (arg)) < 0) break; continue;
4106
4107 while (1) {
4108 if (unpickler_read(self, &s, 1) < 0)
4109 break;
4110
4111 switch ((enum opcode)s[0]) {
4112 OP(NONE, load_none)
4113 OP(BININT, load_binint)
4114 OP(BININT1, load_binint1)
4115 OP(BININT2, load_binint2)
4116 OP(INT, load_int)
4117 OP(LONG, load_long)
4118 OP_ARG(LONG1, load_counted_long, 1)
4119 OP_ARG(LONG4, load_counted_long, 4)
4120 OP(FLOAT, load_float)
4121 OP(BINFLOAT, load_binfloat)
4122 OP(BINBYTES, load_binbytes)
4123 OP(SHORT_BINBYTES, load_short_binbytes)
4124 OP(BINSTRING, load_binstring)
4125 OP(SHORT_BINSTRING, load_short_binstring)
4126 OP(STRING, load_string)
4127 OP(UNICODE, load_unicode)
4128 OP(BINUNICODE, load_binunicode)
4129 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
4130 OP_ARG(TUPLE1, load_counted_tuple, 1)
4131 OP_ARG(TUPLE2, load_counted_tuple, 2)
4132 OP_ARG(TUPLE3, load_counted_tuple, 3)
4133 OP(TUPLE, load_tuple)
4134 OP(EMPTY_LIST, load_empty_list)
4135 OP(LIST, load_list)
4136 OP(EMPTY_DICT, load_empty_dict)
4137 OP(DICT, load_dict)
4138 OP(OBJ, load_obj)
4139 OP(INST, load_inst)
4140 OP(NEWOBJ, load_newobj)
4141 OP(GLOBAL, load_global)
4142 OP(APPEND, load_append)
4143 OP(APPENDS, load_appends)
4144 OP(BUILD, load_build)
4145 OP(DUP, load_dup)
4146 OP(BINGET, load_binget)
4147 OP(LONG_BINGET, load_long_binget)
4148 OP(GET, load_get)
4149 OP(MARK, load_mark)
4150 OP(BINPUT, load_binput)
4151 OP(LONG_BINPUT, load_long_binput)
4152 OP(PUT, load_put)
4153 OP(POP, load_pop)
4154 OP(POP_MARK, load_pop_mark)
4155 OP(SETITEM, load_setitem)
4156 OP(SETITEMS, load_setitems)
4157 OP(PERSID, load_persid)
4158 OP(BINPERSID, load_binpersid)
4159 OP(REDUCE, load_reduce)
4160 OP(PROTO, load_proto)
4161 OP_ARG(EXT1, load_extension, 1)
4162 OP_ARG(EXT2, load_extension, 2)
4163 OP_ARG(EXT4, load_extension, 4)
4164 OP_ARG(NEWTRUE, load_bool, Py_True)
4165 OP_ARG(NEWFALSE, load_bool, Py_False)
4166
4167 case STOP:
4168 break;
4169
4170 case '\0':
4171 PyErr_SetNone(PyExc_EOFError);
4172 return NULL;
4173
4174 default:
4175 PyErr_Format(UnpicklingError,
4176 "invalid load key, '%c'.", s[0]);
4177 return NULL;
4178 }
4179
4180 break; /* and we are done! */
4181 }
4182
4183 /* XXX: It is not clear what this is actually for. */
4184 if ((err = PyErr_Occurred())) {
4185 if (err == PyExc_EOFError) {
4186 PyErr_SetNone(PyExc_EOFError);
4187 }
4188 return NULL;
4189 }
4190
4191 PDATA_POP(self->stack, value);
4192 return value;
4193}
4194
4195PyDoc_STRVAR(Unpickler_load_doc,
4196"load() -> object. Load a pickle."
4197"\n"
4198"Read a pickled object representation from the open file object given in\n"
4199"the constructor, and return the reconstituted object hierarchy specified\n"
4200"therein.\n");
4201
4202static PyObject *
4203Unpickler_load(UnpicklerObject *self)
4204{
4205 /* Check whether the Unpickler was initialized correctly. This prevents
4206 segfaulting if a subclass overridden __init__ with a function that does
4207 not call Unpickler.__init__(). Here, we simply ensure that self->read
4208 is not NULL. */
4209 if (self->read == NULL) {
4210 PyErr_Format(UnpicklingError,
4211 "Unpickler.__init__() was not called by %s.__init__()",
4212 Py_TYPE(self)->tp_name);
4213 return NULL;
4214 }
4215
4216 return load(self);
4217}
4218
4219/* The name of find_class() is misleading. In newer pickle protocols, this
4220 function is used for loading any global (i.e., functions), not just
4221 classes. The name is kept only for backward compatibility. */
4222
4223PyDoc_STRVAR(Unpickler_find_class_doc,
4224"find_class(module_name, global_name) -> object.\n"
4225"\n"
4226"Return an object from a specified module, importing the module if\n"
4227"necessary. Subclasses may override this method (e.g. to restrict\n"
4228"unpickling of arbitrary classes and functions).\n"
4229"\n"
4230"This method is called whenever a class or a function object is\n"
4231"needed. Both arguments passed are str objects.\n");
4232
4233static PyObject *
4234Unpickler_find_class(UnpicklerObject *self, PyObject *args)
4235{
4236 PyObject *global;
4237 PyObject *modules_dict;
4238 PyObject *module;
4239 PyObject *module_name, *global_name;
4240
4241 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
4242 &module_name, &global_name))
4243 return NULL;
4244
4245 modules_dict = PySys_GetObject("modules");
4246 if (modules_dict == NULL)
4247 return NULL;
4248
4249 module = PyDict_GetItem(modules_dict, module_name);
4250 if (module == NULL) {
4251 module = PyImport_Import(module_name);
4252 if (module == NULL)
4253 return NULL;
4254 global = PyObject_GetAttr(module, global_name);
4255 Py_DECREF(module);
4256 }
4257 else {
4258 global = PyObject_GetAttr(module, global_name);
4259 }
4260 return global;
4261}
4262
4263static struct PyMethodDef Unpickler_methods[] = {
4264 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
4265 Unpickler_load_doc},
4266 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
4267 Unpickler_find_class_doc},
4268 {NULL, NULL} /* sentinel */
4269};
4270
4271static void
4272Unpickler_dealloc(UnpicklerObject *self)
4273{
4274 PyObject_GC_UnTrack((PyObject *)self);
4275 Py_XDECREF(self->readline);
4276 Py_XDECREF(self->read);
4277 Py_XDECREF(self->memo);
4278 Py_XDECREF(self->stack);
4279 Py_XDECREF(self->pers_func);
4280 Py_XDECREF(self->arg);
4281 Py_XDECREF(self->last_string);
4282
4283 PyMem_Free(self->marks);
4284 free(self->encoding);
4285 free(self->errors);
4286
4287 Py_TYPE(self)->tp_free((PyObject *)self);
4288}
4289
4290static int
4291Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
4292{
4293 Py_VISIT(self->readline);
4294 Py_VISIT(self->read);
4295 Py_VISIT(self->memo);
4296 Py_VISIT(self->stack);
4297 Py_VISIT(self->pers_func);
4298 Py_VISIT(self->arg);
4299 Py_VISIT(self->last_string);
4300 return 0;
4301}
4302
4303static int
4304Unpickler_clear(UnpicklerObject *self)
4305{
4306 Py_CLEAR(self->readline);
4307 Py_CLEAR(self->read);
4308 Py_CLEAR(self->memo);
4309 Py_CLEAR(self->stack);
4310 Py_CLEAR(self->pers_func);
4311 Py_CLEAR(self->arg);
4312 Py_CLEAR(self->last_string);
4313
4314 PyMem_Free(self->marks);
4315 self->marks = NULL;
4316 free(self->encoding);
4317 self->encoding = NULL;
4318 free(self->errors);
4319 self->errors = NULL;
4320
4321 return 0;
4322}
4323
4324PyDoc_STRVAR(Unpickler_doc,
4325"Unpickler(file, *, encoding='ASCII', errors='strict')"
4326"\n"
4327"This takes a binary file for reading a pickle data stream.\n"
4328"\n"
4329"The protocol version of the pickle is detected automatically, so no\n"
4330"proto argument is needed.\n"
4331"\n"
4332"The file-like object must have two methods, a read() method\n"
4333"that takes an integer argument, and a readline() method that\n"
4334"requires no arguments. Both methods should return bytes.\n"
4335"Thus file-like object can be a binary file object opened for\n"
4336"reading, a BytesIO object, or any other custom object that\n"
4337"meets this interface.\n"
4338"\n"
4339"Optional keyword arguments are encoding and errors, which are\n"
4340"used to decode 8-bit string instances pickled by Python 2.x.\n"
4341"These default to 'ASCII' and 'strict', respectively.\n");
4342
4343static int
4344Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
4345{
4346 static char *kwlist[] = {"file", "encoding", "errors", 0};
4347 PyObject *file;
4348 char *encoding = NULL;
4349 char *errors = NULL;
4350
4351 /* XXX: That is an horrible error message. But, I don't know how to do
4352 better... */
4353 if (Py_SIZE(args) != 1) {
4354 PyErr_Format(PyExc_TypeError,
4355 "%s takes exactly one positional argument (%zd given)",
4356 Py_TYPE(self)->tp_name, Py_SIZE(args));
4357 return -1;
4358 }
4359
4360 /* Arguments parsing needs to be done in the __init__() method to allow
4361 subclasses to define their own __init__() method, which may (or may
4362 not) support Unpickler arguments. However, this means we need to be
4363 extra careful in the other Unpickler methods, since a subclass could
4364 forget to call Unpickler.__init__() thus breaking our internal
4365 invariants. */
4366 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|ss:Unpickler", kwlist,
4367 &file, &encoding, &errors))
4368 return -1;
4369
4370 /* In case of multiple __init__() calls, clear previous content. */
4371 if (self->read != NULL)
4372 (void)Unpickler_clear(self);
4373
4374 self->read = PyObject_GetAttrString(file, "read");
4375 self->readline = PyObject_GetAttrString(file, "readline");
4376 if (self->readline == NULL || self->read == NULL)
4377 return -1;
4378
4379 if (encoding == NULL)
4380 encoding = "ASCII";
4381 if (errors == NULL)
4382 errors = "strict";
4383
4384 self->encoding = strdup(encoding);
4385 self->errors = strdup(errors);
4386 if (self->encoding == NULL || self->errors == NULL) {
4387 PyErr_NoMemory();
4388 return -1;
4389 }
4390
4391 if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) {
4392 self->pers_func = PyObject_GetAttrString((PyObject *)self,
4393 "persistent_load");
4394 if (self->pers_func == NULL)
4395 return -1;
4396 }
4397 else {
4398 self->pers_func = NULL;
4399 }
4400
4401 self->stack = (Pdata *)Pdata_New();
4402 if (self->stack == NULL)
4403 return -1;
4404
4405 self->memo = PyDict_New();
4406 if (self->memo == NULL)
4407 return -1;
4408
4409 return 0;
4410}
4411
4412static PyObject *
4413Unpickler_get_memo(UnpicklerObject *self)
4414{
4415 if (self->memo == NULL)
4416 PyErr_SetString(PyExc_AttributeError, "memo");
4417 else
4418 Py_INCREF(self->memo);
4419 return self->memo;
4420}
4421
4422static int
4423Unpickler_set_memo(UnpicklerObject *self, PyObject *value)
4424{
4425 PyObject *tmp;
4426
4427 if (value == NULL) {
4428 PyErr_SetString(PyExc_TypeError,
4429 "attribute deletion is not supported");
4430 return -1;
4431 }
4432 if (!PyDict_Check(value)) {
4433 PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
4434 return -1;
4435 }
4436
4437 tmp = self->memo;
4438 Py_INCREF(value);
4439 self->memo = value;
4440 Py_XDECREF(tmp);
4441
4442 return 0;
4443}
4444
4445static PyObject *
4446Unpickler_get_persload(UnpicklerObject *self)
4447{
4448 if (self->pers_func == NULL)
4449 PyErr_SetString(PyExc_AttributeError, "persistent_load");
4450 else
4451 Py_INCREF(self->pers_func);
4452 return self->pers_func;
4453}
4454
4455static int
4456Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
4457{
4458 PyObject *tmp;
4459
4460 if (value == NULL) {
4461 PyErr_SetString(PyExc_TypeError,
4462 "attribute deletion is not supported");
4463 return -1;
4464 }
4465 if (!PyCallable_Check(value)) {
4466 PyErr_SetString(PyExc_TypeError,
4467 "persistent_load must be a callable taking "
4468 "one argument");
4469 return -1;
4470 }
4471
4472 tmp = self->pers_func;
4473 Py_INCREF(value);
4474 self->pers_func = value;
4475 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
4476
4477 return 0;
4478}
4479
4480static PyGetSetDef Unpickler_getsets[] = {
4481 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
4482 {"persistent_load", (getter)Unpickler_get_persload,
4483 (setter)Unpickler_set_persload},
4484 {NULL}
4485};
4486
4487static PyTypeObject Unpickler_Type = {
4488 PyVarObject_HEAD_INIT(NULL, 0)
4489 "_pickle.Unpickler", /*tp_name*/
4490 sizeof(UnpicklerObject), /*tp_basicsize*/
4491 0, /*tp_itemsize*/
4492 (destructor)Unpickler_dealloc, /*tp_dealloc*/
4493 0, /*tp_print*/
4494 0, /*tp_getattr*/
4495 0, /*tp_setattr*/
4496 0, /*tp_compare*/
4497 0, /*tp_repr*/
4498 0, /*tp_as_number*/
4499 0, /*tp_as_sequence*/
4500 0, /*tp_as_mapping*/
4501 0, /*tp_hash*/
4502 0, /*tp_call*/
4503 0, /*tp_str*/
4504 0, /*tp_getattro*/
4505 0, /*tp_setattro*/
4506 0, /*tp_as_buffer*/
4507 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4508 Unpickler_doc, /*tp_doc*/
4509 (traverseproc)Unpickler_traverse, /*tp_traverse*/
4510 (inquiry)Unpickler_clear, /*tp_clear*/
4511 0, /*tp_richcompare*/
4512 0, /*tp_weaklistoffset*/
4513 0, /*tp_iter*/
4514 0, /*tp_iternext*/
4515 Unpickler_methods, /*tp_methods*/
4516 0, /*tp_members*/
4517 Unpickler_getsets, /*tp_getset*/
4518 0, /*tp_base*/
4519 0, /*tp_dict*/
4520 0, /*tp_descr_get*/
4521 0, /*tp_descr_set*/
4522 0, /*tp_dictoffset*/
4523 (initproc)Unpickler_init, /*tp_init*/
4524 PyType_GenericAlloc, /*tp_alloc*/
4525 PyType_GenericNew, /*tp_new*/
4526 PyObject_GC_Del, /*tp_free*/
4527 0, /*tp_is_gc*/
4528};
4529
4530static int
4531init_stuff(void)
4532{
4533 PyObject *copyreg;
4534
4535 copyreg = PyImport_ImportModule("copyreg");
4536 if (!copyreg)
4537 return -1;
4538
4539 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
4540 if (!dispatch_table)
4541 goto error;
4542
4543 extension_registry = \
4544 PyObject_GetAttrString(copyreg, "_extension_registry");
4545 if (!extension_registry)
4546 goto error;
4547
4548 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
4549 if (!inverted_registry)
4550 goto error;
4551
4552 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
4553 if (!extension_cache)
4554 goto error;
4555
4556 Py_DECREF(copyreg);
4557
4558 empty_tuple = PyTuple_New(0);
4559 if (empty_tuple == NULL)
4560 return -1;
4561
4562 two_tuple = PyTuple_New(2);
4563 if (two_tuple == NULL)
4564 return -1;
4565 /* We use this temp container with no regard to refcounts, or to
4566 * keeping containees alive. Exempt from GC, because we don't
4567 * want anything looking at two_tuple() by magic.
4568 */
4569 PyObject_GC_UnTrack(two_tuple);
4570
4571 return 0;
4572
4573 error:
4574 Py_DECREF(copyreg);
4575 return -1;
4576}
4577
4578static struct PyModuleDef _picklemodule = {
4579 PyModuleDef_HEAD_INIT,
4580 "_pickle",
4581 pickle_module_doc,
4582 -1,
4583 NULL,
4584 NULL,
4585 NULL,
4586 NULL,
4587 NULL
4588};
4589
4590PyMODINIT_FUNC
4591PyInit__pickle(void)
4592{
4593 PyObject *m;
4594
4595 if (PyType_Ready(&Unpickler_Type) < 0)
4596 return NULL;
4597 if (PyType_Ready(&Pickler_Type) < 0)
4598 return NULL;
4599 if (PyType_Ready(&Pdata_Type) < 0)
4600 return NULL;
4601
4602 /* Create the module and add the functions. */
4603 m = PyModule_Create(&_picklemodule);
4604 if (m == NULL)
4605 return NULL;
4606
4607 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
4608 return NULL;
4609 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
4610 return NULL;
4611
4612 /* Initialize the exceptions. */
4613 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
4614 if (PickleError == NULL)
4615 return NULL;
4616 PicklingError = \
4617 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
4618 if (PicklingError == NULL)
4619 return NULL;
4620 UnpicklingError = \
4621 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
4622 if (UnpicklingError == NULL)
4623 return NULL;
4624
4625 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
4626 return NULL;
4627 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
4628 return NULL;
4629 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
4630 return NULL;
4631
4632 if (init_stuff() < 0)
4633 return NULL;
4634
4635 return m;
4636}