blob: 02a3e447d9072c392396eeea2e13634c7e272b5f [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
13
14/* Pickle opcodes. These must be kept updated with pickle.py.
15 Extensive docs are in pickletools.py. */
16enum opcode {
17 MARK = '(',
18 STOP = '.',
19 POP = '0',
20 POP_MARK = '1',
21 DUP = '2',
22 FLOAT = 'F',
23 INT = 'I',
24 BININT = 'J',
25 BININT1 = 'K',
26 LONG = 'L',
27 BININT2 = 'M',
28 NONE = 'N',
29 PERSID = 'P',
30 BINPERSID = 'Q',
31 REDUCE = 'R',
32 STRING = 'S',
33 BINSTRING = 'T',
34 SHORT_BINSTRING = 'U',
35 UNICODE = 'V',
36 BINUNICODE = 'X',
37 APPEND = 'a',
38 BUILD = 'b',
39 GLOBAL = 'c',
40 DICT = 'd',
41 EMPTY_DICT = '}',
42 APPENDS = 'e',
43 GET = 'g',
44 BINGET = 'h',
45 INST = 'i',
46 LONG_BINGET = 'j',
47 LIST = 'l',
48 EMPTY_LIST = ']',
49 OBJ = 'o',
50 PUT = 'p',
51 BINPUT = 'q',
52 LONG_BINPUT = 'r',
53 SETITEM = 's',
54 TUPLE = 't',
55 EMPTY_TUPLE = ')',
56 SETITEMS = 'u',
57 BINFLOAT = 'G',
58
59 /* Protocol 2. */
60 PROTO = '\x80',
61 NEWOBJ = '\x81',
62 EXT1 = '\x82',
63 EXT2 = '\x83',
64 EXT4 = '\x84',
65 TUPLE1 = '\x85',
66 TUPLE2 = '\x86',
67 TUPLE3 = '\x87',
68 NEWTRUE = '\x88',
69 NEWFALSE = '\x89',
70 LONG1 = '\x8a',
71 LONG4 = '\x8b',
72
73 /* Protocol 3 (Python 3.x) */
74 BINBYTES = 'B',
75 SHORT_BINBYTES = 'C',
76};
77
78/* These aren't opcodes -- they're ways to pickle bools before protocol 2
79 * so that unpicklers written before bools were introduced unpickle them
80 * as ints, but unpicklers after can recognize that bools were intended.
81 * Note that protocol 2 added direct ways to pickle bools.
82 */
83#undef TRUE
84#define TRUE "I01\n"
85#undef FALSE
86#define FALSE "I00\n"
87
88enum {
89 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
90 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
91 break if this gets out of synch with pickle.py, but it's unclear that would
92 help anything either. */
93 BATCHSIZE = 1000,
94
95 /* Nesting limit until Pickler, when running in "fast mode", starts
96 checking for self-referential data-structures. */
97 FAST_NESTING_LIMIT = 50,
98
99 /* Size of the write buffer of Pickler. Higher values will reduce the
100 number of calls to the write() method of the output stream. */
101 WRITE_BUF_SIZE = 256,
102};
103
104/* Exception classes for pickle. These should override the ones defined in
105 pickle.py, when the C-optimized Pickler and Unpickler are used. */
106static PyObject *PickleError;
107static PyObject *PicklingError;
108static PyObject *UnpicklingError;
109
110/* copyreg.dispatch_table, {type_object: pickling_function} */
111static PyObject *dispatch_table;
112/* For EXT[124] opcodes. */
113/* copyreg._extension_registry, {(module_name, function_name): code} */
114static PyObject *extension_registry;
115/* copyreg._inverted_registry, {code: (module_name, function_name)} */
116static PyObject *inverted_registry;
117/* copyreg._extension_cache, {code: object} */
118static PyObject *extension_cache;
119
120/* XXX: Are these really nescessary? */
121/* As the name says, an empty tuple. */
122static PyObject *empty_tuple;
123/* For looking up name pairs in copyreg._extension_registry. */
124static PyObject *two_tuple;
125
126static int
127stack_underflow(void)
128{
129 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
130 return -1;
131}
132
133/* Internal data type used as the unpickling stack. */
134typedef struct {
135 PyObject_HEAD
136 int length; /* number of initial slots in data currently used */
137 int size; /* number of slots in data allocated */
138 PyObject **data;
139} Pdata;
140
141static void
142Pdata_dealloc(Pdata *self)
143{
144 int i;
145 PyObject **p;
146
147 for (i = self->length, p = self->data; --i >= 0; p++) {
148 Py_DECREF(*p);
149 }
150 if (self->data)
151 PyMem_Free(self->data);
152 PyObject_Del(self);
153}
154
155static PyTypeObject Pdata_Type = {
156 PyVarObject_HEAD_INIT(NULL, 0)
157 "_pickle.Pdata", /*tp_name*/
158 sizeof(Pdata), /*tp_basicsize*/
159 0, /*tp_itemsize*/
160 (destructor)Pdata_dealloc, /*tp_dealloc*/
161};
162
163static PyObject *
164Pdata_New(void)
165{
166 Pdata *self;
167
168 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
169 return NULL;
170 self->size = 8;
171 self->length = 0;
172 self->data = PyMem_Malloc(self->size * sizeof(PyObject *));
173 if (self->data)
174 return (PyObject *)self;
175 Py_DECREF(self);
176 return PyErr_NoMemory();
177}
178
179
180/* Retain only the initial clearto items. If clearto >= the current
181 * number of items, this is a (non-erroneous) NOP.
182 */
183static int
184Pdata_clear(Pdata *self, int clearto)
185{
186 int i;
187 PyObject **p;
188
189 if (clearto < 0)
190 return stack_underflow();
191 if (clearto >= self->length)
192 return 0;
193
194 for (i = self->length, p = self->data + clearto; --i >= clearto; p++) {
195 Py_CLEAR(*p);
196 }
197 self->length = clearto;
198
199 return 0;
200}
201
202static int
203Pdata_grow(Pdata *self)
204{
205 int bigger;
206 size_t nbytes;
207 PyObject **tmp;
208
209 bigger = (self->size << 1) + 1;
210 if (bigger <= 0) /* was 0, or new value overflows */
211 goto nomemory;
212 if ((int)(size_t)bigger != bigger)
213 goto nomemory;
214 nbytes = (size_t)bigger * sizeof(PyObject *);
215 if (nbytes / sizeof(PyObject *) != (size_t)bigger)
216 goto nomemory;
217 tmp = PyMem_Realloc(self->data, nbytes);
218 if (tmp == NULL)
219 goto nomemory;
220 self->data = tmp;
221 self->size = bigger;
222 return 0;
223
224 nomemory:
225 PyErr_NoMemory();
226 return -1;
227}
228
229/* D is a Pdata*. Pop the topmost element and store it into V, which
230 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
231 * is raised and V is set to NULL.
232 */
233static PyObject *
234Pdata_pop(Pdata *self)
235{
236 if (self->length == 0) {
237 PyErr_SetString(UnpicklingError, "bad pickle data");
238 return NULL;
239 }
240 return self->data[--(self->length)];
241}
242#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
243
244static int
245Pdata_push(Pdata *self, PyObject *obj)
246{
247 if (self->length == self->size && Pdata_grow(self) < 0) {
248 return -1;
249 }
250 self->data[self->length++] = obj;
251 return 0;
252}
253
254/* Push an object on stack, transferring its ownership to the stack. */
255#define PDATA_PUSH(D, O, ER) do { \
256 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
257
258/* Push an object on stack, adding a new reference to the object. */
259#define PDATA_APPEND(D, O, ER) do { \
260 Py_INCREF((O)); \
261 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
262
263static PyObject *
264Pdata_poptuple(Pdata *self, Py_ssize_t start)
265{
266 PyObject *tuple;
267 Py_ssize_t len, i, j;
268
269 len = self->length - start;
270 tuple = PyTuple_New(len);
271 if (tuple == NULL)
272 return NULL;
273 for (i = start, j = 0; j < len; i++, j++)
274 PyTuple_SET_ITEM(tuple, j, self->data[i]);
275
276 self->length = start;
277 return tuple;
278}
279
280static PyObject *
281Pdata_poplist(Pdata *self, Py_ssize_t start)
282{
283 PyObject *list;
284 Py_ssize_t len, i, j;
285
286 len = self->length - start;
287 list = PyList_New(len);
288 if (list == NULL)
289 return NULL;
290 for (i = start, j = 0; j < len; i++, j++)
291 PyList_SET_ITEM(list, j, self->data[i]);
292
293 self->length = start;
294 return list;
295}
296
297typedef struct PicklerObject {
298 PyObject_HEAD
299 PyObject *write; /* write() method of the output stream */
300 PyObject *memo; /* Memo dictionary, keep track of the seen
301 objects to support self-referential objects
302 pickling. */
303 PyObject *pers_func; /* persistent_id() method, can be NULL */
304 PyObject *arg;
305 int proto; /* Pickle protocol number, >= 0 */
306 int bin; /* Boolean, true if proto > 0 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000307 int buf_size; /* Size of the current buffered pickle data */
308 char *write_buf; /* Write buffer, this is to avoid calling the
309 write() method of the output stream too
310 often. */
311 int fast; /* Enable fast mode if set to a true value.
312 The fast mode disable the usage of memo,
313 therefore speeding the pickling process by
314 not generating superfluous PUT opcodes. It
315 should not be used if with self-referential
316 objects. */
317 int fast_nesting;
318 PyObject *fast_memo;
319} PicklerObject;
320
321typedef struct UnpicklerObject {
322 PyObject_HEAD
323 Pdata *stack; /* Pickle data stack, store unpickled objects. */
324 PyObject *readline; /* readline() method of the output stream */
325 PyObject *read; /* read() method of the output stream */
326 PyObject *memo; /* Memo dictionary, provide the objects stored
327 using the PUT opcodes. */
328 PyObject *arg;
329 PyObject *pers_func; /* persistent_load() method, can be NULL. */
330 PyObject *last_string; /* Reference to the last string read by the
331 readline() method. */
332 char *buffer; /* Reading buffer. */
333 char *encoding; /* Name of the encoding to be used for
334 decoding strings pickled using Python
335 2.x. The default value is "ASCII" */
336 char *errors; /* Name of errors handling scheme to used when
337 decoding strings. The default value is
338 "strict". */
339 int *marks; /* Mark stack, used for unpickling container
340 objects. */
341 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
342 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
343} UnpicklerObject;
344
345/* Forward declarations */
346static int save(PicklerObject *, PyObject *, int);
347static int save_reduce(PicklerObject *, PyObject *, PyObject *);
348static PyTypeObject Pickler_Type;
349static PyTypeObject Unpickler_Type;
350
351
352/* Helpers for creating the argument tuple passed to functions. This has the
353 performance advantage of calling PyTuple_New() only once. */
354
355#define ARG_TUP(self, obj) do { \
356 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
357 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
358 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
359 } \
360 else { \
361 Py_DECREF((obj)); \
362 } \
363 } while (0)
364
365#define FREE_ARG_TUP(self) do { \
366 if ((self)->arg->ob_refcnt > 1) \
367 Py_CLEAR((self)->arg); \
368 } while (0)
369
370/* A temporary cleaner API for fast single argument function call.
371
372 XXX: Does caching the argument tuple provides any real performance benefits?
373
374 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
375 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
376 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
377 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
378 (i.e, call PyTuple_New() and store the returned value in an array), to save
379 one second (wall clock time). Either ways, the loading time a pickle stream
380 large enough to generate this number of calls would be massively
381 overwhelmed by other factors, like I/O throughput, the GC traversal and
382 object allocation overhead. So, I really doubt these functions provide any
383 real benefits.
384
385 On the other hand, oprofile reports that pickle spends a lot of time in
386 these functions. But, that is probably more related to the function call
387 overhead, than the argument tuple allocation.
388
389 XXX: And, what is the reference behavior of these? Steal, borrow? At first
390 glance, it seems to steal the reference of 'arg' and borrow the reference
391 of 'func'.
392 */
393static PyObject *
394pickler_call(PicklerObject *self, PyObject *func, PyObject *arg)
395{
396 PyObject *result = NULL;
397
398 ARG_TUP(self, arg);
399 if (self->arg) {
400 result = PyObject_Call(func, self->arg, NULL);
401 FREE_ARG_TUP(self);
402 }
403 return result;
404}
405
406static PyObject *
407unpickler_call(UnpicklerObject *self, PyObject *func, PyObject *arg)
408{
409 PyObject *result = NULL;
410
411 ARG_TUP(self, arg);
412 if (self->arg) {
413 result = PyObject_Call(func, self->arg, NULL);
414 FREE_ARG_TUP(self);
415 }
416 return result;
417}
418
419static Py_ssize_t
420pickler_write(PicklerObject *self, const char *s, Py_ssize_t n)
421{
422 PyObject *data, *result;
423
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000424 if (self->write_buf == NULL) {
425 PyErr_SetString(PyExc_SystemError, "invalid write buffer");
426 return -1;
427 }
428
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000429 if (s == NULL) {
430 if (!(self->buf_size))
431 return 0;
432 data = PyBytes_FromStringAndSize(self->write_buf, self->buf_size);
433 if (data == NULL)
434 return -1;
435 }
436 else {
437 if (self->buf_size && (n + self->buf_size) > WRITE_BUF_SIZE) {
438 if (pickler_write(self, NULL, 0) < 0)
439 return -1;
440 }
441
442 if (n > WRITE_BUF_SIZE) {
443 if (!(data = PyBytes_FromStringAndSize(s, n)))
444 return -1;
445 }
446 else {
447 memcpy(self->write_buf + self->buf_size, s, n);
448 self->buf_size += n;
449 return n;
450 }
451 }
452
453 /* object with write method */
454 result = pickler_call(self, self->write, data);
455 if (result == NULL)
456 return -1;
457
458 Py_DECREF(result);
459 self->buf_size = 0;
460 return n;
461}
462
463/* XXX: These read/readline functions ought to be optimized. Buffered I/O
464 might help a lot, especially with the new (but much slower) io library.
465 On the other hand, the added complexity might not worth it.
466 */
467
468/* Read at least n characters from the input stream and set s to the current
469 reading position. */
470static Py_ssize_t
471unpickler_read(UnpicklerObject *self, char **s, Py_ssize_t n)
472{
473 PyObject *len;
474 PyObject *data;
475
476 len = PyLong_FromSsize_t(n);
477 if (len == NULL)
478 return -1;
479
480 data = unpickler_call(self, self->read, len);
481 if (data == NULL)
482 return -1;
483
484 /* XXX: Should bytearray be supported too? */
485 if (!PyBytes_Check(data)) {
486 PyErr_SetString(PyExc_ValueError,
487 "read() from the underlying stream did not"
488 "return bytes");
Amaury Forgeot d'Arc5f952572008-11-25 21:11:54 +0000489 Py_DECREF(data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000490 return -1;
491 }
492
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000493 if (PyBytes_GET_SIZE(data) != n) {
494 PyErr_SetNone(PyExc_EOFError);
Amaury Forgeot d'Arc5f952572008-11-25 21:11:54 +0000495 Py_DECREF(data);
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000496 return -1;
497 }
498
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000499 Py_XDECREF(self->last_string);
500 self->last_string = data;
501
502 if (!(*s = PyBytes_AS_STRING(data)))
503 return -1;
504
505 return n;
506}
507
508static Py_ssize_t
509unpickler_readline(UnpicklerObject *self, char **s)
510{
511 PyObject *data;
512
513 data = PyObject_CallObject(self->readline, empty_tuple);
514 if (data == NULL)
515 return -1;
516
517 /* XXX: Should bytearray be supported too? */
518 if (!PyBytes_Check(data)) {
519 PyErr_SetString(PyExc_ValueError,
520 "readline() from the underlying stream did not"
521 "return bytes");
522 return -1;
523 }
524
525 Py_XDECREF(self->last_string);
526 self->last_string = data;
527
528 if (!(*s = PyBytes_AS_STRING(data)))
529 return -1;
530
531 return PyBytes_GET_SIZE(data);
532}
533
534/* Generate a GET opcode for an object stored in the memo. The 'key' argument
535 should be the address of the object as returned by PyLong_FromVoidPtr(). */
536static int
537memo_get(PicklerObject *self, PyObject *key)
538{
539 PyObject *value;
540 PyObject *memo_id;
541 long x;
542 char pdata[30];
543 int len;
544
545 value = PyDict_GetItemWithError(self->memo, key);
546 if (value == NULL) {
547 if (!PyErr_Occurred())
548 PyErr_SetObject(PyExc_KeyError, key);
549 return -1;
550 }
551
552 memo_id = PyTuple_GetItem(value, 0);
553 if (memo_id == NULL)
554 return -1;
555
556 if (!PyLong_Check(memo_id)) {
557 PyErr_SetString(PicklingError, "memo id must be an integer");
558 return -1;
559 }
560 x = PyLong_AsLong(memo_id);
561 if (x == -1 && PyErr_Occurred())
562 return -1;
563
564 if (!self->bin) {
565 pdata[0] = GET;
566 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
567 len = (int)strlen(pdata);
568 }
569 else {
570 if (x < 256) {
571 pdata[0] = BINGET;
572 pdata[1] = (unsigned char)(x & 0xff);
573 len = 2;
574 }
575 else if (x <= 0xffffffffL) {
576 pdata[0] = LONG_BINGET;
577 pdata[1] = (unsigned char)(x & 0xff);
578 pdata[2] = (unsigned char)((x >> 8) & 0xff);
579 pdata[3] = (unsigned char)((x >> 16) & 0xff);
580 pdata[4] = (unsigned char)((x >> 24) & 0xff);
581 len = 5;
582 }
583 else { /* unlikely */
584 PyErr_SetString(PicklingError,
585 "memo id too large for LONG_BINGET");
586 return -1;
587 }
588 }
589
590 if (pickler_write(self, pdata, len) < 0)
591 return -1;
592
593 return 0;
594}
595
596/* Store an object in the memo, assign it a new unique ID based on the number
597 of objects currently stored in the memo and generate a PUT opcode. */
598static int
599memo_put(PicklerObject *self, PyObject *obj)
600{
601 PyObject *key = NULL;
602 PyObject *memo_id = NULL;
603 PyObject *tuple = NULL;
604 long x;
605 char pdata[30];
606 int len;
607 int status = 0;
608
609 if (self->fast)
610 return 0;
611
612 key = PyLong_FromVoidPtr(obj);
613 if (key == NULL)
614 goto error;
615 if ((x = PyDict_Size(self->memo)) < 0)
616 goto error;
617 memo_id = PyLong_FromLong(x);
618 if (memo_id == NULL)
619 goto error;
620 tuple = PyTuple_New(2);
621 if (tuple == NULL)
622 goto error;
623
624 Py_INCREF(memo_id);
625 PyTuple_SET_ITEM(tuple, 0, memo_id);
626 Py_INCREF(obj);
627 PyTuple_SET_ITEM(tuple, 1, obj);
628 if (PyDict_SetItem(self->memo, key, tuple) < 0)
629 goto error;
630
631 if (!self->bin) {
632 pdata[0] = PUT;
633 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
634 len = strlen(pdata);
635 }
636 else {
637 if (x < 256) {
638 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +0000639 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000640 len = 2;
641 }
642 else if (x <= 0xffffffffL) {
643 pdata[0] = LONG_BINPUT;
644 pdata[1] = (unsigned char)(x & 0xff);
645 pdata[2] = (unsigned char)((x >> 8) & 0xff);
646 pdata[3] = (unsigned char)((x >> 16) & 0xff);
647 pdata[4] = (unsigned char)((x >> 24) & 0xff);
648 len = 5;
649 }
650 else { /* unlikely */
651 PyErr_SetString(PicklingError,
652 "memo id too large for LONG_BINPUT");
653 return -1;
654 }
655 }
656
657 if (pickler_write(self, pdata, len) < 0)
658 goto error;
659
660 if (0) {
661 error:
662 status = -1;
663 }
664
665 Py_XDECREF(key);
666 Py_XDECREF(memo_id);
667 Py_XDECREF(tuple);
668
669 return status;
670}
671
672static PyObject *
673whichmodule(PyObject *global, PyObject *global_name)
674{
675 Py_ssize_t i, j;
676 static PyObject *module_str = NULL;
677 static PyObject *main_str = NULL;
678 PyObject *module_name;
679 PyObject *modules_dict;
680 PyObject *module;
681 PyObject *obj;
682
683 if (module_str == NULL) {
684 module_str = PyUnicode_InternFromString("__module__");
685 if (module_str == NULL)
686 return NULL;
687 main_str = PyUnicode_InternFromString("__main__");
688 if (main_str == NULL)
689 return NULL;
690 }
691
692 module_name = PyObject_GetAttr(global, module_str);
693
694 /* In some rare cases (e.g., random.getrandbits), __module__ can be
695 None. If it is so, then search sys.modules for the module of
696 global. */
697 if (module_name == Py_None) {
698 Py_DECREF(module_name);
699 goto search;
700 }
701
702 if (module_name) {
703 return module_name;
704 }
705 if (PyErr_ExceptionMatches(PyExc_AttributeError))
706 PyErr_Clear();
707 else
708 return NULL;
709
710 search:
711 modules_dict = PySys_GetObject("modules");
712 if (modules_dict == NULL)
713 return NULL;
714
715 i = 0;
716 module_name = NULL;
717 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
718 if (PyObject_Compare(module_name, main_str) == 0)
719 continue;
720
721 obj = PyObject_GetAttr(module, global_name);
722 if (obj == NULL) {
723 if (PyErr_ExceptionMatches(PyExc_AttributeError))
724 PyErr_Clear();
725 else
726 return NULL;
727 continue;
728 }
729
730 if (obj != global) {
731 Py_DECREF(obj);
732 continue;
733 }
734
735 Py_DECREF(obj);
736 break;
737 }
738
739 /* If no module is found, use __main__. */
740 if (!j) {
741 module_name = main_str;
742 }
743
744 Py_INCREF(module_name);
745 return module_name;
746}
747
748/* fast_save_enter() and fast_save_leave() are guards against recursive
749 objects when Pickler is used with the "fast mode" (i.e., with object
750 memoization disabled). If the nesting of a list or dict object exceed
751 FAST_NESTING_LIMIT, these guards will start keeping an internal
752 reference to the seen list or dict objects and check whether these objects
753 are recursive. These are not strictly necessary, since save() has a
754 hard-coded recursion limit, but they give a nicer error message than the
755 typical RuntimeError. */
756static int
757fast_save_enter(PicklerObject *self, PyObject *obj)
758{
759 /* if fast_nesting < 0, we're doing an error exit. */
760 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
761 PyObject *key = NULL;
762 if (self->fast_memo == NULL) {
763 self->fast_memo = PyDict_New();
764 if (self->fast_memo == NULL) {
765 self->fast_nesting = -1;
766 return 0;
767 }
768 }
769 key = PyLong_FromVoidPtr(obj);
770 if (key == NULL)
771 return 0;
772 if (PyDict_GetItem(self->fast_memo, key)) {
773 Py_DECREF(key);
774 PyErr_Format(PyExc_ValueError,
775 "fast mode: can't pickle cyclic objects "
776 "including object type %.200s at %p",
777 obj->ob_type->tp_name, obj);
778 self->fast_nesting = -1;
779 return 0;
780 }
781 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
782 Py_DECREF(key);
783 self->fast_nesting = -1;
784 return 0;
785 }
786 Py_DECREF(key);
787 }
788 return 1;
789}
790
791static int
792fast_save_leave(PicklerObject *self, PyObject *obj)
793{
794 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
795 PyObject *key = PyLong_FromVoidPtr(obj);
796 if (key == NULL)
797 return 0;
798 if (PyDict_DelItem(self->fast_memo, key) < 0) {
799 Py_DECREF(key);
800 return 0;
801 }
802 Py_DECREF(key);
803 }
804 return 1;
805}
806
807static int
808save_none(PicklerObject *self, PyObject *obj)
809{
810 const char none_op = NONE;
811 if (pickler_write(self, &none_op, 1) < 0)
812 return -1;
813
814 return 0;
815}
816
817static int
818save_bool(PicklerObject *self, PyObject *obj)
819{
820 static const char *buf[2] = { FALSE, TRUE };
821 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
822 int p = (obj == Py_True);
823
824 if (self->proto >= 2) {
825 const char bool_op = p ? NEWTRUE : NEWFALSE;
826 if (pickler_write(self, &bool_op, 1) < 0)
827 return -1;
828 }
829 else if (pickler_write(self, buf[p], len[p]) < 0)
830 return -1;
831
832 return 0;
833}
834
835static int
836save_int(PicklerObject *self, long x)
837{
838 char pdata[32];
839 int len = 0;
840
841 if (!self->bin
842#if SIZEOF_LONG > 4
843 || x > 0x7fffffffL || x < -0x80000000L
844#endif
845 ) {
846 /* Text-mode pickle, or long too big to fit in the 4-byte
847 * signed BININT format: store as a string.
848 */
Mark Dickinson8dd05142009-01-20 20:43:58 +0000849 pdata[0] = LONG; /* use LONG for consistency with pickle.py */
850 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ldL\n", x);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000851 if (pickler_write(self, pdata, strlen(pdata)) < 0)
852 return -1;
853 }
854 else {
855 /* Binary pickle and x fits in a signed 4-byte int. */
856 pdata[1] = (unsigned char)(x & 0xff);
857 pdata[2] = (unsigned char)((x >> 8) & 0xff);
858 pdata[3] = (unsigned char)((x >> 16) & 0xff);
859 pdata[4] = (unsigned char)((x >> 24) & 0xff);
860
861 if ((pdata[4] == 0) && (pdata[3] == 0)) {
862 if (pdata[2] == 0) {
863 pdata[0] = BININT1;
864 len = 2;
865 }
866 else {
867 pdata[0] = BININT2;
868 len = 3;
869 }
870 }
871 else {
872 pdata[0] = BININT;
873 len = 5;
874 }
875
876 if (pickler_write(self, pdata, len) < 0)
877 return -1;
878 }
879
880 return 0;
881}
882
883static int
884save_long(PicklerObject *self, PyObject *obj)
885{
886 PyObject *repr = NULL;
887 Py_ssize_t size;
888 long val = PyLong_AsLong(obj);
889 int status = 0;
890
891 const char long_op = LONG;
892
893 if (val == -1 && PyErr_Occurred()) {
894 /* out of range for int pickling */
895 PyErr_Clear();
896 }
897 else
898 return save_int(self, val);
899
900 if (self->proto >= 2) {
901 /* Linear-time pickling. */
902 size_t nbits;
903 size_t nbytes;
904 unsigned char *pdata;
905 char header[5];
906 int i;
907 int sign = _PyLong_Sign(obj);
908
909 if (sign == 0) {
910 header[0] = LONG1;
911 header[1] = 0; /* It's 0 -- an empty bytestring. */
912 if (pickler_write(self, header, 2) < 0)
913 goto error;
914 return 0;
915 }
916 nbits = _PyLong_NumBits(obj);
917 if (nbits == (size_t)-1 && PyErr_Occurred())
918 goto error;
919 /* How many bytes do we need? There are nbits >> 3 full
920 * bytes of data, and nbits & 7 leftover bits. If there
921 * are any leftover bits, then we clearly need another
922 * byte. Wnat's not so obvious is that we *probably*
923 * need another byte even if there aren't any leftovers:
924 * the most-significant bit of the most-significant byte
925 * acts like a sign bit, and it's usually got a sense
926 * opposite of the one we need. The exception is longs
927 * of the form -(2**(8*j-1)) for j > 0. Such a long is
928 * its own 256's-complement, so has the right sign bit
929 * even without the extra byte. That's a pain to check
930 * for in advance, though, so we always grab an extra
931 * byte at the start, and cut it back later if possible.
932 */
933 nbytes = (nbits >> 3) + 1;
934 if (nbytes > INT_MAX) {
935 PyErr_SetString(PyExc_OverflowError,
936 "long too large to pickle");
937 goto error;
938 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +0000939 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000940 if (repr == NULL)
941 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +0000942 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000943 i = _PyLong_AsByteArray((PyLongObject *)obj,
944 pdata, nbytes,
945 1 /* little endian */ , 1 /* signed */ );
946 if (i < 0)
947 goto error;
948 /* If the long is negative, this may be a byte more than
949 * needed. This is so iff the MSB is all redundant sign
950 * bits.
951 */
952 if (sign < 0 &&
953 nbytes > 1 &&
954 pdata[nbytes - 1] == 0xff &&
955 (pdata[nbytes - 2] & 0x80) != 0) {
956 nbytes--;
957 }
958
959 if (nbytes < 256) {
960 header[0] = LONG1;
961 header[1] = (unsigned char)nbytes;
962 size = 2;
963 }
964 else {
965 header[0] = LONG4;
966 size = (int)nbytes;
967 for (i = 1; i < 5; i++) {
968 header[i] = (unsigned char)(size & 0xff);
969 size >>= 8;
970 }
971 size = 5;
972 }
973 if (pickler_write(self, header, size) < 0 ||
974 pickler_write(self, (char *)pdata, (int)nbytes) < 0)
975 goto error;
976 }
977 else {
978 char *string;
979
Mark Dickinson8dd05142009-01-20 20:43:58 +0000980 /* proto < 2: write the repr and newline. This is quadratic-time (in
981 the number of digits), in both directions. We add a trailing 'L'
982 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000983
984 repr = PyObject_Repr(obj);
985 if (repr == NULL)
986 goto error;
987
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +0000988 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000989 if (string == NULL)
990 goto error;
991
992 if (pickler_write(self, &long_op, 1) < 0 ||
993 pickler_write(self, string, size) < 0 ||
Mark Dickinson8dd05142009-01-20 20:43:58 +0000994 pickler_write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000995 goto error;
996 }
997
998 if (0) {
999 error:
1000 status = -1;
1001 }
1002 Py_XDECREF(repr);
1003
1004 return status;
1005}
1006
1007static int
1008save_float(PicklerObject *self, PyObject *obj)
1009{
1010 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1011
1012 if (self->bin) {
1013 char pdata[9];
1014 pdata[0] = BINFLOAT;
1015 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1016 return -1;
1017 if (pickler_write(self, pdata, 9) < 0)
1018 return -1;
1019 }
1020 else {
1021 char pdata[250];
1022 pdata[0] = FLOAT;
1023 PyOS_ascii_formatd(pdata + 1, sizeof(pdata) - 2, "%.17g", x);
1024 /* Extend the formatted string with a newline character */
1025 strcat(pdata, "\n");
1026
1027 if (pickler_write(self, pdata, strlen(pdata)) < 0)
1028 return -1;
1029 }
1030
1031 return 0;
1032}
1033
1034static int
1035save_bytes(PicklerObject *self, PyObject *obj)
1036{
1037 if (self->proto < 3) {
1038 /* Older pickle protocols do not have an opcode for pickling bytes
1039 objects. Therefore, we need to fake the copy protocol (i.e.,
1040 the __reduce__ method) to permit bytes object unpickling. */
1041 PyObject *reduce_value = NULL;
1042 PyObject *bytelist = NULL;
1043 int status;
1044
1045 bytelist = PySequence_List(obj);
1046 if (bytelist == NULL)
1047 return -1;
1048
1049 reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
1050 bytelist);
1051 if (reduce_value == NULL) {
1052 Py_DECREF(bytelist);
1053 return -1;
1054 }
1055
1056 /* save_reduce() will memoize the object automatically. */
1057 status = save_reduce(self, reduce_value, obj);
1058 Py_DECREF(reduce_value);
1059 Py_DECREF(bytelist);
1060 return status;
1061 }
1062 else {
1063 Py_ssize_t size;
1064 char header[5];
1065 int len;
1066
1067 size = PyBytes_Size(obj);
1068 if (size < 0)
1069 return -1;
1070
1071 if (size < 256) {
1072 header[0] = SHORT_BINBYTES;
1073 header[1] = (unsigned char)size;
1074 len = 2;
1075 }
1076 else if (size <= 0xffffffffL) {
1077 header[0] = BINBYTES;
1078 header[1] = (unsigned char)(size & 0xff);
1079 header[2] = (unsigned char)((size >> 8) & 0xff);
1080 header[3] = (unsigned char)((size >> 16) & 0xff);
1081 header[4] = (unsigned char)((size >> 24) & 0xff);
1082 len = 5;
1083 }
1084 else {
1085 return -1; /* string too large */
1086 }
1087
1088 if (pickler_write(self, header, len) < 0)
1089 return -1;
1090
1091 if (pickler_write(self, PyBytes_AS_STRING(obj), size) < 0)
1092 return -1;
1093
1094 if (memo_put(self, obj) < 0)
1095 return -1;
1096
1097 return 0;
1098 }
1099}
1100
1101/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1102 backslash and newline characters to \uXXXX escapes. */
1103static PyObject *
1104raw_unicode_escape(const Py_UNICODE *s, Py_ssize_t size)
1105{
1106 PyObject *repr, *result;
1107 char *p;
1108 char *q;
1109
1110 static const char *hexdigits = "0123456789abcdef";
1111
1112#ifdef Py_UNICODE_WIDE
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001113 const Py_ssize_t expandsize = 10;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001114#else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001115 const Py_ssize_t expandsize = 6;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001116#endif
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001117
1118 if (size > PY_SSIZE_T_MAX / expandsize)
1119 return PyErr_NoMemory();
1120
1121 repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001122 if (repr == NULL)
1123 return NULL;
1124 if (size == 0)
1125 goto done;
1126
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001127 p = q = PyByteArray_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001128 while (size-- > 0) {
1129 Py_UNICODE ch = *s++;
1130#ifdef Py_UNICODE_WIDE
1131 /* Map 32-bit characters to '\Uxxxxxxxx' */
1132 if (ch >= 0x10000) {
1133 *p++ = '\\';
1134 *p++ = 'U';
1135 *p++ = hexdigits[(ch >> 28) & 0xf];
1136 *p++ = hexdigits[(ch >> 24) & 0xf];
1137 *p++ = hexdigits[(ch >> 20) & 0xf];
1138 *p++ = hexdigits[(ch >> 16) & 0xf];
1139 *p++ = hexdigits[(ch >> 12) & 0xf];
1140 *p++ = hexdigits[(ch >> 8) & 0xf];
1141 *p++ = hexdigits[(ch >> 4) & 0xf];
1142 *p++ = hexdigits[ch & 15];
1143 }
1144 else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001145#else
1146 /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
1147 if (ch >= 0xD800 && ch < 0xDC00) {
1148 Py_UNICODE ch2;
1149 Py_UCS4 ucs;
1150
1151 ch2 = *s++;
1152 size--;
1153 if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
1154 ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
1155 *p++ = '\\';
1156 *p++ = 'U';
1157 *p++ = hexdigits[(ucs >> 28) & 0xf];
1158 *p++ = hexdigits[(ucs >> 24) & 0xf];
1159 *p++ = hexdigits[(ucs >> 20) & 0xf];
1160 *p++ = hexdigits[(ucs >> 16) & 0xf];
1161 *p++ = hexdigits[(ucs >> 12) & 0xf];
1162 *p++ = hexdigits[(ucs >> 8) & 0xf];
1163 *p++ = hexdigits[(ucs >> 4) & 0xf];
1164 *p++ = hexdigits[ucs & 0xf];
1165 continue;
1166 }
1167 /* Fall through: isolated surrogates are copied as-is */
1168 s--;
1169 size++;
1170 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001171#endif
1172 /* Map 16-bit characters to '\uxxxx' */
1173 if (ch >= 256 || ch == '\\' || ch == '\n') {
1174 *p++ = '\\';
1175 *p++ = 'u';
1176 *p++ = hexdigits[(ch >> 12) & 0xf];
1177 *p++ = hexdigits[(ch >> 8) & 0xf];
1178 *p++ = hexdigits[(ch >> 4) & 0xf];
1179 *p++ = hexdigits[ch & 15];
1180 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001181 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001182 else
1183 *p++ = (char) ch;
1184 }
1185 size = p - q;
1186
1187 done:
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001188 result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001189 Py_DECREF(repr);
1190 return result;
1191}
1192
1193static int
1194save_unicode(PicklerObject *self, PyObject *obj)
1195{
1196 Py_ssize_t size;
1197 PyObject *encoded = NULL;
1198
1199 if (self->bin) {
1200 char pdata[5];
1201
1202 encoded = PyUnicode_AsUTF8String(obj);
1203 if (encoded == NULL)
1204 goto error;
1205
1206 size = PyBytes_GET_SIZE(encoded);
1207 if (size < 0 || size > 0xffffffffL)
1208 goto error; /* string too large */
1209
1210 pdata[0] = BINUNICODE;
1211 pdata[1] = (unsigned char)(size & 0xff);
1212 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1213 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1214 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1215
1216 if (pickler_write(self, pdata, 5) < 0)
1217 goto error;
1218
1219 if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
1220 goto error;
1221 }
1222 else {
1223 const char unicode_op = UNICODE;
1224
1225 encoded = raw_unicode_escape(PyUnicode_AS_UNICODE(obj),
1226 PyUnicode_GET_SIZE(obj));
1227 if (encoded == NULL)
1228 goto error;
1229
1230 if (pickler_write(self, &unicode_op, 1) < 0)
1231 goto error;
1232
1233 size = PyBytes_GET_SIZE(encoded);
1234 if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
1235 goto error;
1236
1237 if (pickler_write(self, "\n", 1) < 0)
1238 goto error;
1239 }
1240 if (memo_put(self, obj) < 0)
1241 goto error;
1242
1243 Py_DECREF(encoded);
1244 return 0;
1245
1246 error:
1247 Py_XDECREF(encoded);
1248 return -1;
1249}
1250
1251/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1252static int
1253store_tuple_elements(PicklerObject *self, PyObject *t, int len)
1254{
1255 int i;
1256
1257 assert(PyTuple_Size(t) == len);
1258
1259 for (i = 0; i < len; i++) {
1260 PyObject *element = PyTuple_GET_ITEM(t, i);
1261
1262 if (element == NULL)
1263 return -1;
1264 if (save(self, element, 0) < 0)
1265 return -1;
1266 }
1267
1268 return 0;
1269}
1270
1271/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1272 * used across protocols to minimize the space needed to pickle them.
1273 * Tuples are also the only builtin immutable type that can be recursive
1274 * (a tuple can be reached from itself), and that requires some subtle
1275 * magic so that it works in all cases. IOW, this is a long routine.
1276 */
1277static int
1278save_tuple(PicklerObject *self, PyObject *obj)
1279{
1280 PyObject *memo_key = NULL;
1281 int len, i;
1282 int status = 0;
1283
1284 const char mark_op = MARK;
1285 const char tuple_op = TUPLE;
1286 const char pop_op = POP;
1287 const char pop_mark_op = POP_MARK;
1288 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
1289
1290 if ((len = PyTuple_Size(obj)) < 0)
1291 return -1;
1292
1293 if (len == 0) {
1294 char pdata[2];
1295
1296 if (self->proto) {
1297 pdata[0] = EMPTY_TUPLE;
1298 len = 1;
1299 }
1300 else {
1301 pdata[0] = MARK;
1302 pdata[1] = TUPLE;
1303 len = 2;
1304 }
1305 if (pickler_write(self, pdata, len) < 0)
1306 return -1;
1307 return 0;
1308 }
1309
1310 /* id(tuple) isn't in the memo now. If it shows up there after
1311 * saving the tuple elements, the tuple must be recursive, in
1312 * which case we'll pop everything we put on the stack, and fetch
1313 * its value from the memo.
1314 */
1315 memo_key = PyLong_FromVoidPtr(obj);
1316 if (memo_key == NULL)
1317 return -1;
1318
1319 if (len <= 3 && self->proto >= 2) {
1320 /* Use TUPLE{1,2,3} opcodes. */
1321 if (store_tuple_elements(self, obj, len) < 0)
1322 goto error;
1323
1324 if (PyDict_GetItem(self->memo, memo_key)) {
1325 /* pop the len elements */
1326 for (i = 0; i < len; i++)
1327 if (pickler_write(self, &pop_op, 1) < 0)
1328 goto error;
1329 /* fetch from memo */
1330 if (memo_get(self, memo_key) < 0)
1331 goto error;
1332
1333 Py_DECREF(memo_key);
1334 return 0;
1335 }
1336 else { /* Not recursive. */
1337 if (pickler_write(self, len2opcode + len, 1) < 0)
1338 goto error;
1339 }
1340 goto memoize;
1341 }
1342
1343 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
1344 * Generate MARK e1 e2 ... TUPLE
1345 */
1346 if (pickler_write(self, &mark_op, 1) < 0)
1347 goto error;
1348
1349 if (store_tuple_elements(self, obj, len) < 0)
1350 goto error;
1351
1352 if (PyDict_GetItem(self->memo, memo_key)) {
1353 /* pop the stack stuff we pushed */
1354 if (self->bin) {
1355 if (pickler_write(self, &pop_mark_op, 1) < 0)
1356 goto error;
1357 }
1358 else {
1359 /* Note that we pop one more than len, to remove
1360 * the MARK too.
1361 */
1362 for (i = 0; i <= len; i++)
1363 if (pickler_write(self, &pop_op, 1) < 0)
1364 goto error;
1365 }
1366 /* fetch from memo */
1367 if (memo_get(self, memo_key) < 0)
1368 goto error;
1369
1370 Py_DECREF(memo_key);
1371 return 0;
1372 }
1373 else { /* Not recursive. */
1374 if (pickler_write(self, &tuple_op, 1) < 0)
1375 goto error;
1376 }
1377
1378 memoize:
1379 if (memo_put(self, obj) < 0)
1380 goto error;
1381
1382 if (0) {
1383 error:
1384 status = -1;
1385 }
1386
1387 Py_DECREF(memo_key);
1388 return status;
1389}
1390
1391/* iter is an iterator giving items, and we batch up chunks of
1392 * MARK item item ... item APPENDS
1393 * opcode sequences. Calling code should have arranged to first create an
1394 * empty list, or list-like object, for the APPENDS to operate on.
1395 * Returns 0 on success, <0 on error.
1396 */
1397static int
1398batch_list(PicklerObject *self, PyObject *iter)
1399{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001400 PyObject *obj = NULL;
1401 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001402 int i, n;
1403
1404 const char mark_op = MARK;
1405 const char append_op = APPEND;
1406 const char appends_op = APPENDS;
1407
1408 assert(iter != NULL);
1409
1410 /* XXX: I think this function could be made faster by avoiding the
1411 iterator interface and fetching objects directly from list using
1412 PyList_GET_ITEM.
1413 */
1414
1415 if (self->proto == 0) {
1416 /* APPENDS isn't available; do one at a time. */
1417 for (;;) {
1418 obj = PyIter_Next(iter);
1419 if (obj == NULL) {
1420 if (PyErr_Occurred())
1421 return -1;
1422 break;
1423 }
1424 i = save(self, obj, 0);
1425 Py_DECREF(obj);
1426 if (i < 0)
1427 return -1;
1428 if (pickler_write(self, &append_op, 1) < 0)
1429 return -1;
1430 }
1431 return 0;
1432 }
1433
1434 /* proto > 0: write in batches of BATCHSIZE. */
1435 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001436 /* Get first item */
1437 firstitem = PyIter_Next(iter);
1438 if (firstitem == NULL) {
1439 if (PyErr_Occurred())
1440 goto error;
1441
1442 /* nothing more to add */
1443 break;
1444 }
1445
1446 /* Try to get a second item */
1447 obj = PyIter_Next(iter);
1448 if (obj == NULL) {
1449 if (PyErr_Occurred())
1450 goto error;
1451
1452 /* Only one item to write */
1453 if (save(self, firstitem, 0) < 0)
1454 goto error;
1455 if (pickler_write(self, &append_op, 1) < 0)
1456 goto error;
1457 Py_CLEAR(firstitem);
1458 break;
1459 }
1460
1461 /* More than one item to write */
1462
1463 /* Pump out MARK, items, APPENDS. */
1464 if (pickler_write(self, &mark_op, 1) < 0)
1465 goto error;
1466
1467 if (save(self, firstitem, 0) < 0)
1468 goto error;
1469 Py_CLEAR(firstitem);
1470 n = 1;
1471
1472 /* Fetch and save up to BATCHSIZE items */
1473 while (obj) {
1474 if (save(self, obj, 0) < 0)
1475 goto error;
1476 Py_CLEAR(obj);
1477 n += 1;
1478
1479 if (n == BATCHSIZE)
1480 break;
1481
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001482 obj = PyIter_Next(iter);
1483 if (obj == NULL) {
1484 if (PyErr_Occurred())
1485 goto error;
1486 break;
1487 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001488 }
1489
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001490 if (pickler_write(self, &appends_op, 1) < 0)
1491 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001492
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001493 } while (n == BATCHSIZE);
1494 return 0;
1495
1496 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001497 Py_XDECREF(firstitem);
1498 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001499 return -1;
1500}
1501
1502static int
1503save_list(PicklerObject *self, PyObject *obj)
1504{
1505 PyObject *iter;
1506 char header[3];
1507 int len;
1508 int status = 0;
1509
1510 if (self->fast && !fast_save_enter(self, obj))
1511 goto error;
1512
1513 /* Create an empty list. */
1514 if (self->bin) {
1515 header[0] = EMPTY_LIST;
1516 len = 1;
1517 }
1518 else {
1519 header[0] = MARK;
1520 header[1] = LIST;
1521 len = 2;
1522 }
1523
1524 if (pickler_write(self, header, len) < 0)
1525 goto error;
1526
1527 /* Get list length, and bow out early if empty. */
1528 if ((len = PyList_Size(obj)) < 0)
1529 goto error;
1530
1531 if (memo_put(self, obj) < 0)
1532 goto error;
1533
1534 if (len != 0) {
1535 /* Save the list elements. */
1536 iter = PyObject_GetIter(obj);
1537 if (iter == NULL)
1538 goto error;
1539 status = batch_list(self, iter);
1540 Py_DECREF(iter);
1541 }
1542
1543 if (0) {
1544 error:
1545 status = -1;
1546 }
1547
1548 if (self->fast && !fast_save_leave(self, obj))
1549 status = -1;
1550
1551 return status;
1552}
1553
1554/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
1555 * MARK key value ... key value SETITEMS
1556 * opcode sequences. Calling code should have arranged to first create an
1557 * empty dict, or dict-like object, for the SETITEMS to operate on.
1558 * Returns 0 on success, <0 on error.
1559 *
1560 * This is very much like batch_list(). The difference between saving
1561 * elements directly, and picking apart two-tuples, is so long-winded at
1562 * the C level, though, that attempts to combine these routines were too
1563 * ugly to bear.
1564 */
1565static int
1566batch_dict(PicklerObject *self, PyObject *iter)
1567{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001568 PyObject *obj = NULL;
1569 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001570 int i, n;
1571
1572 const char mark_op = MARK;
1573 const char setitem_op = SETITEM;
1574 const char setitems_op = SETITEMS;
1575
1576 assert(iter != NULL);
1577
1578 if (self->proto == 0) {
1579 /* SETITEMS isn't available; do one at a time. */
1580 for (;;) {
1581 obj = PyIter_Next(iter);
1582 if (obj == NULL) {
1583 if (PyErr_Occurred())
1584 return -1;
1585 break;
1586 }
1587 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
1588 PyErr_SetString(PyExc_TypeError, "dict items "
1589 "iterator must return 2-tuples");
1590 return -1;
1591 }
1592 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
1593 if (i >= 0)
1594 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
1595 Py_DECREF(obj);
1596 if (i < 0)
1597 return -1;
1598 if (pickler_write(self, &setitem_op, 1) < 0)
1599 return -1;
1600 }
1601 return 0;
1602 }
1603
1604 /* proto > 0: write in batches of BATCHSIZE. */
1605 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001606 /* Get first item */
1607 firstitem = PyIter_Next(iter);
1608 if (firstitem == NULL) {
1609 if (PyErr_Occurred())
1610 goto error;
1611
1612 /* nothing more to add */
1613 break;
1614 }
1615 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
1616 PyErr_SetString(PyExc_TypeError, "dict items "
1617 "iterator must return 2-tuples");
1618 goto error;
1619 }
1620
1621 /* Try to get a second item */
1622 obj = PyIter_Next(iter);
1623 if (obj == NULL) {
1624 if (PyErr_Occurred())
1625 goto error;
1626
1627 /* Only one item to write */
1628 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
1629 goto error;
1630 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
1631 goto error;
1632 if (pickler_write(self, &setitem_op, 1) < 0)
1633 goto error;
1634 Py_CLEAR(firstitem);
1635 break;
1636 }
1637
1638 /* More than one item to write */
1639
1640 /* Pump out MARK, items, SETITEMS. */
1641 if (pickler_write(self, &mark_op, 1) < 0)
1642 goto error;
1643
1644 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
1645 goto error;
1646 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
1647 goto error;
1648 Py_CLEAR(firstitem);
1649 n = 1;
1650
1651 /* Fetch and save up to BATCHSIZE items */
1652 while (obj) {
1653 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
1654 PyErr_SetString(PyExc_TypeError, "dict items "
1655 "iterator must return 2-tuples");
1656 goto error;
1657 }
1658 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
1659 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
1660 goto error;
1661 Py_CLEAR(obj);
1662 n += 1;
1663
1664 if (n == BATCHSIZE)
1665 break;
1666
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001667 obj = PyIter_Next(iter);
1668 if (obj == NULL) {
1669 if (PyErr_Occurred())
1670 goto error;
1671 break;
1672 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001673 }
1674
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001675 if (pickler_write(self, &setitems_op, 1) < 0)
1676 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001677
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001678 } while (n == BATCHSIZE);
1679 return 0;
1680
1681 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001682 Py_XDECREF(firstitem);
1683 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001684 return -1;
1685}
1686
1687static int
1688save_dict(PicklerObject *self, PyObject *obj)
1689{
1690 PyObject *items, *iter;
1691 char header[3];
1692 int len;
1693 int status = 0;
1694
1695 if (self->fast && !fast_save_enter(self, obj))
1696 goto error;
1697
1698 /* Create an empty dict. */
1699 if (self->bin) {
1700 header[0] = EMPTY_DICT;
1701 len = 1;
1702 }
1703 else {
1704 header[0] = MARK;
1705 header[1] = DICT;
1706 len = 2;
1707 }
1708
1709 if (pickler_write(self, header, len) < 0)
1710 goto error;
1711
1712 /* Get dict size, and bow out early if empty. */
1713 if ((len = PyDict_Size(obj)) < 0)
1714 goto error;
1715
1716 if (memo_put(self, obj) < 0)
1717 goto error;
1718
1719 if (len != 0) {
1720 /* Save the dict items. */
1721 items = PyObject_CallMethod(obj, "items", "()");
1722 if (items == NULL)
1723 goto error;
1724 iter = PyObject_GetIter(items);
1725 Py_DECREF(items);
1726 if (iter == NULL)
1727 goto error;
1728 status = batch_dict(self, iter);
1729 Py_DECREF(iter);
1730 }
1731
1732 if (0) {
1733 error:
1734 status = -1;
1735 }
1736
1737 if (self->fast && !fast_save_leave(self, obj))
1738 status = -1;
1739
1740 return status;
1741}
1742
1743static int
1744save_global(PicklerObject *self, PyObject *obj, PyObject *name)
1745{
1746 static PyObject *name_str = NULL;
1747 PyObject *global_name = NULL;
1748 PyObject *module_name = NULL;
1749 PyObject *module = NULL;
1750 PyObject *cls;
1751 int status = 0;
1752
1753 const char global_op = GLOBAL;
1754
1755 if (name_str == NULL) {
1756 name_str = PyUnicode_InternFromString("__name__");
1757 if (name_str == NULL)
1758 goto error;
1759 }
1760
1761 if (name) {
1762 global_name = name;
1763 Py_INCREF(global_name);
1764 }
1765 else {
1766 global_name = PyObject_GetAttr(obj, name_str);
1767 if (global_name == NULL)
1768 goto error;
1769 }
1770
1771 module_name = whichmodule(obj, global_name);
1772 if (module_name == NULL)
1773 goto error;
1774
1775 /* XXX: Change to use the import C API directly with level=0 to disallow
1776 relative imports.
1777
1778 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
1779 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
1780 custom import functions (IMHO, this would be a nice security
1781 feature). The import C API would need to be extended to support the
1782 extra parameters of __import__ to fix that. */
1783 module = PyImport_Import(module_name);
1784 if (module == NULL) {
1785 PyErr_Format(PicklingError,
1786 "Can't pickle %R: import of module %R failed",
1787 obj, module_name);
1788 goto error;
1789 }
1790 cls = PyObject_GetAttr(module, global_name);
1791 if (cls == NULL) {
1792 PyErr_Format(PicklingError,
1793 "Can't pickle %R: attribute lookup %S.%S failed",
1794 obj, module_name, global_name);
1795 goto error;
1796 }
1797 if (cls != obj) {
1798 Py_DECREF(cls);
1799 PyErr_Format(PicklingError,
1800 "Can't pickle %R: it's not the same object as %S.%S",
1801 obj, module_name, global_name);
1802 goto error;
1803 }
1804 Py_DECREF(cls);
1805
1806 if (self->proto >= 2) {
1807 /* See whether this is in the extension registry, and if
1808 * so generate an EXT opcode.
1809 */
1810 PyObject *code_obj; /* extension code as Python object */
1811 long code; /* extension code as C value */
1812 char pdata[5];
1813 int n;
1814
1815 PyTuple_SET_ITEM(two_tuple, 0, module_name);
1816 PyTuple_SET_ITEM(two_tuple, 1, global_name);
1817 code_obj = PyDict_GetItem(extension_registry, two_tuple);
1818 /* The object is not registered in the extension registry.
1819 This is the most likely code path. */
1820 if (code_obj == NULL)
1821 goto gen_global;
1822
1823 /* XXX: pickle.py doesn't check neither the type, nor the range
1824 of the value returned by the extension_registry. It should for
1825 consistency. */
1826
1827 /* Verify code_obj has the right type and value. */
1828 if (!PyLong_Check(code_obj)) {
1829 PyErr_Format(PicklingError,
1830 "Can't pickle %R: extension code %R isn't an integer",
1831 obj, code_obj);
1832 goto error;
1833 }
1834 code = PyLong_AS_LONG(code_obj);
1835 if (code <= 0 || code > 0x7fffffffL) {
1836 PyErr_Format(PicklingError,
1837 "Can't pickle %R: extension code %ld is out of range",
1838 obj, code);
1839 goto error;
1840 }
1841
1842 /* Generate an EXT opcode. */
1843 if (code <= 0xff) {
1844 pdata[0] = EXT1;
1845 pdata[1] = (unsigned char)code;
1846 n = 2;
1847 }
1848 else if (code <= 0xffff) {
1849 pdata[0] = EXT2;
1850 pdata[1] = (unsigned char)(code & 0xff);
1851 pdata[2] = (unsigned char)((code >> 8) & 0xff);
1852 n = 3;
1853 }
1854 else {
1855 pdata[0] = EXT4;
1856 pdata[1] = (unsigned char)(code & 0xff);
1857 pdata[2] = (unsigned char)((code >> 8) & 0xff);
1858 pdata[3] = (unsigned char)((code >> 16) & 0xff);
1859 pdata[4] = (unsigned char)((code >> 24) & 0xff);
1860 n = 5;
1861 }
1862
1863 if (pickler_write(self, pdata, n) < 0)
1864 goto error;
1865 }
1866 else {
1867 /* Generate a normal global opcode if we are using a pickle
1868 protocol <= 2, or if the object is not registered in the
1869 extension registry. */
1870 PyObject *encoded;
1871 PyObject *(*unicode_encoder)(PyObject *);
1872
1873 gen_global:
1874 if (pickler_write(self, &global_op, 1) < 0)
1875 goto error;
1876
1877 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
1878 the module name and the global name using UTF-8. We do so only when
1879 we are using the pickle protocol newer than version 3. This is to
1880 ensure compatibility with older Unpickler running on Python 2.x. */
1881 if (self->proto >= 3) {
1882 unicode_encoder = PyUnicode_AsUTF8String;
1883 }
1884 else {
1885 unicode_encoder = PyUnicode_AsASCIIString;
1886 }
1887
1888 /* Save the name of the module. */
1889 encoded = unicode_encoder(module_name);
1890 if (encoded == NULL) {
1891 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
1892 PyErr_Format(PicklingError,
1893 "can't pickle module identifier '%S' using "
1894 "pickle protocol %i", module_name, self->proto);
1895 goto error;
1896 }
1897 if (pickler_write(self, PyBytes_AS_STRING(encoded),
1898 PyBytes_GET_SIZE(encoded)) < 0) {
1899 Py_DECREF(encoded);
1900 goto error;
1901 }
1902 Py_DECREF(encoded);
1903 if(pickler_write(self, "\n", 1) < 0)
1904 goto error;
1905
1906 /* Save the name of the module. */
1907 encoded = unicode_encoder(global_name);
1908 if (encoded == NULL) {
1909 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
1910 PyErr_Format(PicklingError,
1911 "can't pickle global identifier '%S' using "
1912 "pickle protocol %i", global_name, self->proto);
1913 goto error;
1914 }
1915 if (pickler_write(self, PyBytes_AS_STRING(encoded),
1916 PyBytes_GET_SIZE(encoded)) < 0) {
1917 Py_DECREF(encoded);
1918 goto error;
1919 }
1920 Py_DECREF(encoded);
1921 if(pickler_write(self, "\n", 1) < 0)
1922 goto error;
1923
1924 /* Memoize the object. */
1925 if (memo_put(self, obj) < 0)
1926 goto error;
1927 }
1928
1929 if (0) {
1930 error:
1931 status = -1;
1932 }
1933 Py_XDECREF(module_name);
1934 Py_XDECREF(global_name);
1935 Py_XDECREF(module);
1936
1937 return status;
1938}
1939
1940static int
1941save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
1942{
1943 PyObject *pid = NULL;
1944 int status = 0;
1945
1946 const char persid_op = PERSID;
1947 const char binpersid_op = BINPERSID;
1948
1949 Py_INCREF(obj);
1950 pid = pickler_call(self, func, obj);
1951 if (pid == NULL)
1952 return -1;
1953
1954 if (pid != Py_None) {
1955 if (self->bin) {
1956 if (save(self, pid, 1) < 0 ||
1957 pickler_write(self, &binpersid_op, 1) < 0)
1958 goto error;
1959 }
1960 else {
1961 PyObject *pid_str = NULL;
1962 char *pid_ascii_bytes;
1963 Py_ssize_t size;
1964
1965 pid_str = PyObject_Str(pid);
1966 if (pid_str == NULL)
1967 goto error;
1968
1969 /* XXX: Should it check whether the persistent id only contains
1970 ASCII characters? And what if the pid contains embedded
1971 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001972 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001973 Py_DECREF(pid_str);
1974 if (pid_ascii_bytes == NULL)
1975 goto error;
1976
1977 if (pickler_write(self, &persid_op, 1) < 0 ||
1978 pickler_write(self, pid_ascii_bytes, size) < 0 ||
1979 pickler_write(self, "\n", 1) < 0)
1980 goto error;
1981 }
1982 status = 1;
1983 }
1984
1985 if (0) {
1986 error:
1987 status = -1;
1988 }
1989 Py_XDECREF(pid);
1990
1991 return status;
1992}
1993
1994/* We're saving obj, and args is the 2-thru-5 tuple returned by the
1995 * appropriate __reduce__ method for obj.
1996 */
1997static int
1998save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
1999{
2000 PyObject *callable;
2001 PyObject *argtup;
2002 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002003 PyObject *listitems = Py_None;
2004 PyObject *dictitems = Py_None;
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002005 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002006
2007 int use_newobj = self->proto >= 2;
2008
2009 const char reduce_op = REDUCE;
2010 const char build_op = BUILD;
2011 const char newobj_op = NEWOBJ;
2012
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002013 size = PyTuple_Size(args);
2014 if (size < 2 || size > 5) {
2015 PyErr_SetString(PicklingError, "tuple returned by "
2016 "__reduce__ must contain 2 through 5 elements");
2017 return -1;
2018 }
2019
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002020 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
2021 &callable, &argtup, &state, &listitems, &dictitems))
2022 return -1;
2023
2024 if (!PyCallable_Check(callable)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002025 PyErr_SetString(PicklingError, "first item of the tuple "
2026 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002027 return -1;
2028 }
2029 if (!PyTuple_Check(argtup)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002030 PyErr_SetString(PicklingError, "second item of the tuple "
2031 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002032 return -1;
2033 }
2034
2035 if (state == Py_None)
2036 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002037
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002038 if (listitems == Py_None)
2039 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002040 else if (!PyIter_Check(listitems)) {
2041 PyErr_Format(PicklingError, "Fourth element of tuple"
2042 "returned by __reduce__ must be an iterator, not %s",
2043 Py_TYPE(listitems)->tp_name);
2044 return -1;
2045 }
2046
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002047 if (dictitems == Py_None)
2048 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002049 else if (!PyIter_Check(dictitems)) {
2050 PyErr_Format(PicklingError, "Fifth element of tuple"
2051 "returned by __reduce__ must be an iterator, not %s",
2052 Py_TYPE(dictitems)->tp_name);
2053 return -1;
2054 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002055
2056 /* Protocol 2 special case: if callable's name is __newobj__, use
2057 NEWOBJ. */
2058 if (use_newobj) {
2059 static PyObject *newobj_str = NULL;
2060 PyObject *name_str;
2061
2062 if (newobj_str == NULL) {
2063 newobj_str = PyUnicode_InternFromString("__newobj__");
2064 }
2065
2066 name_str = PyObject_GetAttrString(callable, "__name__");
2067 if (name_str == NULL) {
2068 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2069 PyErr_Clear();
2070 else
2071 return -1;
2072 use_newobj = 0;
2073 }
2074 else {
2075 use_newobj = PyUnicode_Check(name_str) &&
2076 PyUnicode_Compare(name_str, newobj_str) == 0;
2077 Py_DECREF(name_str);
2078 }
2079 }
2080 if (use_newobj) {
2081 PyObject *cls;
2082 PyObject *newargtup;
2083 PyObject *obj_class;
2084 int p;
2085
2086 /* Sanity checks. */
2087 if (Py_SIZE(argtup) < 1) {
2088 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
2089 return -1;
2090 }
2091
2092 cls = PyTuple_GET_ITEM(argtup, 0);
2093 if (!PyObject_HasAttrString(cls, "__new__")) {
2094 PyErr_SetString(PicklingError, "args[0] from "
2095 "__newobj__ args has no __new__");
2096 return -1;
2097 }
2098
2099 if (obj != NULL) {
2100 obj_class = PyObject_GetAttrString(obj, "__class__");
2101 if (obj_class == NULL) {
2102 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2103 PyErr_Clear();
2104 else
2105 return -1;
2106 }
2107 p = obj_class != cls; /* true iff a problem */
2108 Py_DECREF(obj_class);
2109 if (p) {
2110 PyErr_SetString(PicklingError, "args[0] from "
2111 "__newobj__ args has the wrong class");
2112 return -1;
2113 }
2114 }
2115 /* XXX: These calls save() are prone to infinite recursion. Imagine
2116 what happen if the value returned by the __reduce__() method of
2117 some extension type contains another object of the same type. Ouch!
2118
2119 Here is a quick example, that I ran into, to illustrate what I
2120 mean:
2121
2122 >>> import pickle, copyreg
2123 >>> copyreg.dispatch_table.pop(complex)
2124 >>> pickle.dumps(1+2j)
2125 Traceback (most recent call last):
2126 ...
2127 RuntimeError: maximum recursion depth exceeded
2128
2129 Removing the complex class from copyreg.dispatch_table made the
2130 __reduce_ex__() method emit another complex object:
2131
2132 >>> (1+1j).__reduce_ex__(2)
2133 (<function __newobj__ at 0xb7b71c3c>,
2134 (<class 'complex'>, (1+1j)), None, None, None)
2135
2136 Thus when save() was called on newargstup (the 2nd item) recursion
2137 ensued. Of course, the bug was in the complex class which had a
2138 broken __getnewargs__() that emitted another complex object. But,
2139 the point, here, is it is quite easy to end up with a broken reduce
2140 function. */
2141
2142 /* Save the class and its __new__ arguments. */
2143 if (save(self, cls, 0) < 0)
2144 return -1;
2145
2146 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
2147 if (newargtup == NULL)
2148 return -1;
2149
2150 p = save(self, newargtup, 0);
2151 Py_DECREF(newargtup);
2152 if (p < 0)
2153 return -1;
2154
2155 /* Add NEWOBJ opcode. */
2156 if (pickler_write(self, &newobj_op, 1) < 0)
2157 return -1;
2158 }
2159 else { /* Not using NEWOBJ. */
2160 if (save(self, callable, 0) < 0 ||
2161 save(self, argtup, 0) < 0 ||
2162 pickler_write(self, &reduce_op, 1) < 0)
2163 return -1;
2164 }
2165
2166 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
2167 the caller do not want to memoize the object. Not particularly useful,
2168 but that is to mimic the behavior save_reduce() in pickle.py when
2169 obj is None. */
2170 if (obj && memo_put(self, obj) < 0)
2171 return -1;
2172
2173 if (listitems && batch_list(self, listitems) < 0)
2174 return -1;
2175
2176 if (dictitems && batch_dict(self, dictitems) < 0)
2177 return -1;
2178
2179 if (state) {
2180 if (save(self, state, 0) < 0 ||
2181 pickler_write(self, &build_op, 1) < 0)
2182 return -1;
2183 }
2184
2185 return 0;
2186}
2187
2188static int
2189save(PicklerObject *self, PyObject *obj, int pers_save)
2190{
2191 PyTypeObject *type;
2192 PyObject *reduce_func = NULL;
2193 PyObject *reduce_value = NULL;
2194 PyObject *memo_key = NULL;
2195 int status = 0;
2196
Alexandre Vassalottidff18342008-07-13 18:48:30 +00002197 if (Py_EnterRecursiveCall(" while pickling an object") < 0)
2198 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002199
2200 /* The extra pers_save argument is necessary to avoid calling save_pers()
2201 on its returned object. */
2202 if (!pers_save && self->pers_func) {
2203 /* save_pers() returns:
2204 -1 to signal an error;
2205 0 if it did nothing successfully;
2206 1 if a persistent id was saved.
2207 */
2208 if ((status = save_pers(self, obj, self->pers_func)) != 0)
2209 goto done;
2210 }
2211
2212 type = Py_TYPE(obj);
2213
2214 /* XXX: The old cPickle had an optimization that used switch-case
2215 statement dispatching on the first letter of the type name. It was
2216 probably not a bad idea after all. If benchmarks shows that particular
2217 optimization had some real benefits, it would be nice to add it
2218 back. */
2219
2220 /* Atom types; these aren't memoized, so don't check the memo. */
2221
2222 if (obj == Py_None) {
2223 status = save_none(self, obj);
2224 goto done;
2225 }
2226 else if (obj == Py_False || obj == Py_True) {
2227 status = save_bool(self, obj);
2228 goto done;
2229 }
2230 else if (type == &PyLong_Type) {
2231 status = save_long(self, obj);
2232 goto done;
2233 }
2234 else if (type == &PyFloat_Type) {
2235 status = save_float(self, obj);
2236 goto done;
2237 }
2238
2239 /* Check the memo to see if it has the object. If so, generate
2240 a GET (or BINGET) opcode, instead of pickling the object
2241 once again. */
2242 memo_key = PyLong_FromVoidPtr(obj);
2243 if (memo_key == NULL)
2244 goto error;
2245 if (PyDict_GetItem(self->memo, memo_key)) {
2246 if (memo_get(self, memo_key) < 0)
2247 goto error;
2248 goto done;
2249 }
2250
2251 if (type == &PyBytes_Type) {
2252 status = save_bytes(self, obj);
2253 goto done;
2254 }
2255 else if (type == &PyUnicode_Type) {
2256 status = save_unicode(self, obj);
2257 goto done;
2258 }
2259 else if (type == &PyDict_Type) {
2260 status = save_dict(self, obj);
2261 goto done;
2262 }
2263 else if (type == &PyList_Type) {
2264 status = save_list(self, obj);
2265 goto done;
2266 }
2267 else if (type == &PyTuple_Type) {
2268 status = save_tuple(self, obj);
2269 goto done;
2270 }
2271 else if (type == &PyType_Type) {
2272 status = save_global(self, obj, NULL);
2273 goto done;
2274 }
2275 else if (type == &PyFunction_Type) {
2276 status = save_global(self, obj, NULL);
2277 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
2278 /* fall back to reduce */
2279 PyErr_Clear();
2280 }
2281 else {
2282 goto done;
2283 }
2284 }
2285 else if (type == &PyCFunction_Type) {
2286 status = save_global(self, obj, NULL);
2287 goto done;
2288 }
2289 else if (PyType_IsSubtype(type, &PyType_Type)) {
2290 status = save_global(self, obj, NULL);
2291 goto done;
2292 }
2293
2294 /* XXX: This part needs some unit tests. */
2295
2296 /* Get a reduction callable, and call it. This may come from
2297 * copyreg.dispatch_table, the object's __reduce_ex__ method,
2298 * or the object's __reduce__ method.
2299 */
2300 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
2301 if (reduce_func != NULL) {
2302 /* Here, the reference count of the reduce_func object returned by
2303 PyDict_GetItem needs to be increased to be consistent with the one
2304 returned by PyObject_GetAttr. This is allow us to blindly DECREF
2305 reduce_func at the end of the save() routine.
2306 */
2307 Py_INCREF(reduce_func);
2308 Py_INCREF(obj);
2309 reduce_value = pickler_call(self, reduce_func, obj);
2310 }
2311 else {
2312 static PyObject *reduce_str = NULL;
2313 static PyObject *reduce_ex_str = NULL;
2314
2315 /* Cache the name of the reduce methods. */
2316 if (reduce_str == NULL) {
2317 reduce_str = PyUnicode_InternFromString("__reduce__");
2318 if (reduce_str == NULL)
2319 goto error;
2320 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
2321 if (reduce_ex_str == NULL)
2322 goto error;
2323 }
2324
2325 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
2326 automatically defined as __reduce__. While this is convenient, this
2327 make it impossible to know which method was actually called. Of
2328 course, this is not a big deal. But still, it would be nice to let
2329 the user know which method was called when something go
2330 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
2331 don't actually have to check for a __reduce__ method. */
2332
2333 /* Check for a __reduce_ex__ method. */
2334 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
2335 if (reduce_func != NULL) {
2336 PyObject *proto;
2337 proto = PyLong_FromLong(self->proto);
2338 if (proto != NULL) {
2339 reduce_value = pickler_call(self, reduce_func, proto);
2340 }
2341 }
2342 else {
2343 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2344 PyErr_Clear();
2345 else
2346 goto error;
2347 /* Check for a __reduce__ method. */
2348 reduce_func = PyObject_GetAttr(obj, reduce_str);
2349 if (reduce_func != NULL) {
2350 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
2351 }
2352 else {
2353 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
2354 type->tp_name, obj);
2355 goto error;
2356 }
2357 }
2358 }
2359
2360 if (reduce_value == NULL)
2361 goto error;
2362
2363 if (PyUnicode_Check(reduce_value)) {
2364 status = save_global(self, obj, reduce_value);
2365 goto done;
2366 }
2367
2368 if (!PyTuple_Check(reduce_value)) {
2369 PyErr_SetString(PicklingError,
2370 "__reduce__ must return a string or tuple");
2371 goto error;
2372 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002373
2374 status = save_reduce(self, reduce_value, obj);
2375
2376 if (0) {
2377 error:
2378 status = -1;
2379 }
2380 done:
Alexandre Vassalottidff18342008-07-13 18:48:30 +00002381 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002382 Py_XDECREF(memo_key);
2383 Py_XDECREF(reduce_func);
2384 Py_XDECREF(reduce_value);
2385
2386 return status;
2387}
2388
2389static int
2390dump(PicklerObject *self, PyObject *obj)
2391{
2392 const char stop_op = STOP;
2393
2394 if (self->proto >= 2) {
2395 char header[2];
2396
2397 header[0] = PROTO;
2398 assert(self->proto >= 0 && self->proto < 256);
2399 header[1] = (unsigned char)self->proto;
2400 if (pickler_write(self, header, 2) < 0)
2401 return -1;
2402 }
2403
2404 if (save(self, obj, 0) < 0 ||
2405 pickler_write(self, &stop_op, 1) < 0 ||
2406 pickler_write(self, NULL, 0) < 0)
2407 return -1;
2408
2409 return 0;
2410}
2411
2412PyDoc_STRVAR(Pickler_clear_memo_doc,
2413"clear_memo() -> None. Clears the pickler's \"memo\"."
2414"\n"
2415"The memo is the data structure that remembers which objects the\n"
2416"pickler has already seen, so that shared or recursive objects are\n"
2417"pickled by reference and not by value. This method is useful when\n"
2418"re-using picklers.");
2419
2420static PyObject *
2421Pickler_clear_memo(PicklerObject *self)
2422{
2423 if (self->memo)
2424 PyDict_Clear(self->memo);
2425
2426 Py_RETURN_NONE;
2427}
2428
2429PyDoc_STRVAR(Pickler_dump_doc,
2430"dump(obj) -> None. Write a pickled representation of obj to the open file.");
2431
2432static PyObject *
2433Pickler_dump(PicklerObject *self, PyObject *args)
2434{
2435 PyObject *obj;
2436
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00002437 /* Check whether the Pickler was initialized correctly (issue3664).
2438 Developers often forget to call __init__() in their subclasses, which
2439 would trigger a segfault without this check. */
2440 if (self->write == NULL) {
2441 PyErr_Format(PicklingError,
2442 "Pickler.__init__() was not called by %s.__init__()",
2443 Py_TYPE(self)->tp_name);
2444 return NULL;
2445 }
2446
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002447 if (!PyArg_ParseTuple(args, "O:dump", &obj))
2448 return NULL;
2449
2450 if (dump(self, obj) < 0)
2451 return NULL;
2452
2453 Py_RETURN_NONE;
2454}
2455
2456static struct PyMethodDef Pickler_methods[] = {
2457 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
2458 Pickler_dump_doc},
2459 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
2460 Pickler_clear_memo_doc},
2461 {NULL, NULL} /* sentinel */
2462};
2463
2464static void
2465Pickler_dealloc(PicklerObject *self)
2466{
2467 PyObject_GC_UnTrack(self);
2468
2469 Py_XDECREF(self->write);
2470 Py_XDECREF(self->memo);
2471 Py_XDECREF(self->pers_func);
2472 Py_XDECREF(self->arg);
2473 Py_XDECREF(self->fast_memo);
2474
2475 PyMem_Free(self->write_buf);
2476
2477 Py_TYPE(self)->tp_free((PyObject *)self);
2478}
2479
2480static int
2481Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
2482{
2483 Py_VISIT(self->write);
2484 Py_VISIT(self->memo);
2485 Py_VISIT(self->pers_func);
2486 Py_VISIT(self->arg);
2487 Py_VISIT(self->fast_memo);
2488 return 0;
2489}
2490
2491static int
2492Pickler_clear(PicklerObject *self)
2493{
2494 Py_CLEAR(self->write);
2495 Py_CLEAR(self->memo);
2496 Py_CLEAR(self->pers_func);
2497 Py_CLEAR(self->arg);
2498 Py_CLEAR(self->fast_memo);
2499
2500 PyMem_Free(self->write_buf);
2501 self->write_buf = NULL;
2502
2503 return 0;
2504}
2505
2506PyDoc_STRVAR(Pickler_doc,
2507"Pickler(file, protocol=None)"
2508"\n"
2509"This takes a binary file for writing a pickle data stream.\n"
2510"\n"
2511"The optional protocol argument tells the pickler to use the\n"
2512"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
2513"protocol is 3; a backward-incompatible protocol designed for\n"
2514"Python 3.0.\n"
2515"\n"
2516"Specifying a negative protocol version selects the highest\n"
2517"protocol version supported. The higher the protocol used, the\n"
2518"more recent the version of Python needed to read the pickle\n"
2519"produced.\n"
2520"\n"
2521"The file argument must have a write() method that accepts a single\n"
2522"bytes argument. It can thus be a file object opened for binary\n"
2523"writing, a io.BytesIO instance, or any other custom object that\n"
2524"meets this interface.\n");
2525
2526static int
2527Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
2528{
2529 static char *kwlist[] = {"file", "protocol", 0};
2530 PyObject *file;
2531 PyObject *proto_obj = NULL;
2532 long proto = 0;
2533
2534 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:Pickler",
2535 kwlist, &file, &proto_obj))
2536 return -1;
2537
2538 /* In case of multiple __init__() calls, clear previous content. */
2539 if (self->write != NULL)
2540 (void)Pickler_clear(self);
2541
2542 if (proto_obj == NULL || proto_obj == Py_None)
2543 proto = DEFAULT_PROTOCOL;
2544 else
2545 proto = PyLong_AsLong(proto_obj);
2546
2547 if (proto < 0)
2548 proto = HIGHEST_PROTOCOL;
2549 if (proto > HIGHEST_PROTOCOL) {
2550 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
2551 HIGHEST_PROTOCOL);
2552 return -1;
2553 }
2554
2555 self->proto = proto;
2556 self->bin = proto > 0;
2557 self->arg = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002558 self->fast = 0;
2559 self->fast_nesting = 0;
2560 self->fast_memo = NULL;
2561
2562 if (!PyObject_HasAttrString(file, "write")) {
2563 PyErr_SetString(PyExc_TypeError,
2564 "file must have a 'write' attribute");
2565 return -1;
2566 }
2567 self->write = PyObject_GetAttrString(file, "write");
2568 if (self->write == NULL)
2569 return -1;
2570 self->buf_size = 0;
2571 self->write_buf = (char *)PyMem_Malloc(WRITE_BUF_SIZE);
2572 if (self->write_buf == NULL) {
2573 PyErr_NoMemory();
2574 return -1;
2575 }
2576 self->pers_func = NULL;
2577 if (PyObject_HasAttrString((PyObject *)self, "persistent_id")) {
2578 self->pers_func = PyObject_GetAttrString((PyObject *)self,
2579 "persistent_id");
2580 if (self->pers_func == NULL)
2581 return -1;
2582 }
2583 self->memo = PyDict_New();
2584 if (self->memo == NULL)
2585 return -1;
2586
2587 return 0;
2588}
2589
2590static PyObject *
2591Pickler_get_memo(PicklerObject *self)
2592{
2593 if (self->memo == NULL)
2594 PyErr_SetString(PyExc_AttributeError, "memo");
2595 else
2596 Py_INCREF(self->memo);
2597 return self->memo;
2598}
2599
2600static int
2601Pickler_set_memo(PicklerObject *self, PyObject *value)
2602{
2603 PyObject *tmp;
2604
2605 if (value == NULL) {
2606 PyErr_SetString(PyExc_TypeError,
2607 "attribute deletion is not supported");
2608 return -1;
2609 }
2610 if (!PyDict_Check(value)) {
2611 PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
2612 return -1;
2613 }
2614
2615 tmp = self->memo;
2616 Py_INCREF(value);
2617 self->memo = value;
2618 Py_XDECREF(tmp);
2619
2620 return 0;
2621}
2622
2623static PyObject *
2624Pickler_get_persid(PicklerObject *self)
2625{
2626 if (self->pers_func == NULL)
2627 PyErr_SetString(PyExc_AttributeError, "persistent_id");
2628 else
2629 Py_INCREF(self->pers_func);
2630 return self->pers_func;
2631}
2632
2633static int
2634Pickler_set_persid(PicklerObject *self, PyObject *value)
2635{
2636 PyObject *tmp;
2637
2638 if (value == NULL) {
2639 PyErr_SetString(PyExc_TypeError,
2640 "attribute deletion is not supported");
2641 return -1;
2642 }
2643 if (!PyCallable_Check(value)) {
2644 PyErr_SetString(PyExc_TypeError,
2645 "persistent_id must be a callable taking one argument");
2646 return -1;
2647 }
2648
2649 tmp = self->pers_func;
2650 Py_INCREF(value);
2651 self->pers_func = value;
2652 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
2653
2654 return 0;
2655}
2656
2657static PyMemberDef Pickler_members[] = {
2658 {"bin", T_INT, offsetof(PicklerObject, bin)},
2659 {"fast", T_INT, offsetof(PicklerObject, fast)},
2660 {NULL}
2661};
2662
2663static PyGetSetDef Pickler_getsets[] = {
2664 {"memo", (getter)Pickler_get_memo,
2665 (setter)Pickler_set_memo},
2666 {"persistent_id", (getter)Pickler_get_persid,
2667 (setter)Pickler_set_persid},
2668 {NULL}
2669};
2670
2671static PyTypeObject Pickler_Type = {
2672 PyVarObject_HEAD_INIT(NULL, 0)
2673 "_pickle.Pickler" , /*tp_name*/
2674 sizeof(PicklerObject), /*tp_basicsize*/
2675 0, /*tp_itemsize*/
2676 (destructor)Pickler_dealloc, /*tp_dealloc*/
2677 0, /*tp_print*/
2678 0, /*tp_getattr*/
2679 0, /*tp_setattr*/
2680 0, /*tp_compare*/
2681 0, /*tp_repr*/
2682 0, /*tp_as_number*/
2683 0, /*tp_as_sequence*/
2684 0, /*tp_as_mapping*/
2685 0, /*tp_hash*/
2686 0, /*tp_call*/
2687 0, /*tp_str*/
2688 0, /*tp_getattro*/
2689 0, /*tp_setattro*/
2690 0, /*tp_as_buffer*/
2691 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2692 Pickler_doc, /*tp_doc*/
2693 (traverseproc)Pickler_traverse, /*tp_traverse*/
2694 (inquiry)Pickler_clear, /*tp_clear*/
2695 0, /*tp_richcompare*/
2696 0, /*tp_weaklistoffset*/
2697 0, /*tp_iter*/
2698 0, /*tp_iternext*/
2699 Pickler_methods, /*tp_methods*/
2700 Pickler_members, /*tp_members*/
2701 Pickler_getsets, /*tp_getset*/
2702 0, /*tp_base*/
2703 0, /*tp_dict*/
2704 0, /*tp_descr_get*/
2705 0, /*tp_descr_set*/
2706 0, /*tp_dictoffset*/
2707 (initproc)Pickler_init, /*tp_init*/
2708 PyType_GenericAlloc, /*tp_alloc*/
2709 PyType_GenericNew, /*tp_new*/
2710 PyObject_GC_Del, /*tp_free*/
2711 0, /*tp_is_gc*/
2712};
2713
2714/* Temporary helper for calling self.find_class().
2715
2716 XXX: It would be nice to able to avoid Python function call overhead, by
2717 using directly the C version of find_class(), when find_class() is not
2718 overridden by a subclass. Although, this could become rather hackish. A
2719 simpler optimization would be to call the C function when self is not a
2720 subclass instance. */
2721static PyObject *
2722find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
2723{
2724 return PyObject_CallMethod((PyObject *)self, "find_class", "OO",
2725 module_name, global_name);
2726}
2727
2728static int
2729marker(UnpicklerObject *self)
2730{
2731 if (self->num_marks < 1) {
2732 PyErr_SetString(UnpicklingError, "could not find MARK");
2733 return -1;
2734 }
2735
2736 return self->marks[--self->num_marks];
2737}
2738
2739static int
2740load_none(UnpicklerObject *self)
2741{
2742 PDATA_APPEND(self->stack, Py_None, -1);
2743 return 0;
2744}
2745
2746static int
2747bad_readline(void)
2748{
2749 PyErr_SetString(UnpicklingError, "pickle data was truncated");
2750 return -1;
2751}
2752
2753static int
2754load_int(UnpicklerObject *self)
2755{
2756 PyObject *value;
2757 char *endptr, *s;
2758 Py_ssize_t len;
2759 long x;
2760
2761 if ((len = unpickler_readline(self, &s)) < 0)
2762 return -1;
2763 if (len < 2)
2764 return bad_readline();
2765
2766 errno = 0;
2767 /* XXX: Should the base argument of strtol() be explicitly set to 10? */
2768 x = strtol(s, &endptr, 0);
2769
2770 if (errno || (*endptr != '\n') || (endptr[1] != '\0')) {
2771 /* Hm, maybe we've got something long. Let's try reading
2772 * it as a Python long object. */
2773 errno = 0;
2774 /* XXX: Same thing about the base here. */
2775 value = PyLong_FromString(s, NULL, 0);
2776 if (value == NULL) {
2777 PyErr_SetString(PyExc_ValueError,
2778 "could not convert string to int");
2779 return -1;
2780 }
2781 }
2782 else {
2783 if (len == 3 && (x == 0 || x == 1)) {
2784 if ((value = PyBool_FromLong(x)) == NULL)
2785 return -1;
2786 }
2787 else {
2788 if ((value = PyLong_FromLong(x)) == NULL)
2789 return -1;
2790 }
2791 }
2792
2793 PDATA_PUSH(self->stack, value, -1);
2794 return 0;
2795}
2796
2797static int
2798load_bool(UnpicklerObject *self, PyObject *boolean)
2799{
2800 assert(boolean == Py_True || boolean == Py_False);
2801 PDATA_APPEND(self->stack, boolean, -1);
2802 return 0;
2803}
2804
2805/* s contains x bytes of a little-endian integer. Return its value as a
2806 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
2807 * int, but when x is 4 it's a signed one. This is an historical source
2808 * of x-platform bugs.
2809 */
2810static long
2811calc_binint(char *bytes, int size)
2812{
2813 unsigned char *s = (unsigned char *)bytes;
2814 int i = size;
2815 long x = 0;
2816
2817 for (i = 0; i < size; i++) {
2818 x |= (long)s[i] << (i * 8);
2819 }
2820
2821 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
2822 * is signed, so on a box with longs bigger than 4 bytes we need
2823 * to extend a BININT's sign bit to the full width.
2824 */
2825 if (SIZEOF_LONG > 4 && size == 4) {
2826 x |= -(x & (1L << 31));
2827 }
2828
2829 return x;
2830}
2831
2832static int
2833load_binintx(UnpicklerObject *self, char *s, int size)
2834{
2835 PyObject *value;
2836 long x;
2837
2838 x = calc_binint(s, size);
2839
2840 if ((value = PyLong_FromLong(x)) == NULL)
2841 return -1;
2842
2843 PDATA_PUSH(self->stack, value, -1);
2844 return 0;
2845}
2846
2847static int
2848load_binint(UnpicklerObject *self)
2849{
2850 char *s;
2851
2852 if (unpickler_read(self, &s, 4) < 0)
2853 return -1;
2854
2855 return load_binintx(self, s, 4);
2856}
2857
2858static int
2859load_binint1(UnpicklerObject *self)
2860{
2861 char *s;
2862
2863 if (unpickler_read(self, &s, 1) < 0)
2864 return -1;
2865
2866 return load_binintx(self, s, 1);
2867}
2868
2869static int
2870load_binint2(UnpicklerObject *self)
2871{
2872 char *s;
2873
2874 if (unpickler_read(self, &s, 2) < 0)
2875 return -1;
2876
2877 return load_binintx(self, s, 2);
2878}
2879
2880static int
2881load_long(UnpicklerObject *self)
2882{
2883 PyObject *value;
Mark Dickinson8dd05142009-01-20 20:43:58 +00002884 char *s, *ss;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002885 Py_ssize_t len;
2886
2887 if ((len = unpickler_readline(self, &s)) < 0)
2888 return -1;
2889 if (len < 2)
2890 return bad_readline();
2891
Mark Dickinson8dd05142009-01-20 20:43:58 +00002892 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
2893 the 'L' before calling PyLong_FromString. In order to maintain
2894 compatibility with Python 3.0.0, we don't actually *require*
2895 the 'L' to be present. */
2896 if (s[len-2] == 'L') {
2897 ss = (char *)PyMem_Malloc(len-1);
2898 if (ss == NULL) {
2899 PyErr_NoMemory();
2900 return -1;
2901 }
2902 strncpy(ss, s, len-2);
2903 ss[len-2] = '\0';
2904
2905 /* XXX: Should the base argument explicitly set to 10? */
2906 value = PyLong_FromString(ss, NULL, 0);
2907 PyMem_Free(ss);
2908 }
2909 else {
2910 value = PyLong_FromString(s, NULL, 0);
2911 }
2912 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002913 return -1;
2914
2915 PDATA_PUSH(self->stack, value, -1);
2916 return 0;
2917}
2918
2919/* 'size' bytes contain the # of bytes of little-endian 256's-complement
2920 * data following.
2921 */
2922static int
2923load_counted_long(UnpicklerObject *self, int size)
2924{
2925 PyObject *value;
2926 char *nbytes;
2927 char *pdata;
2928
2929 assert(size == 1 || size == 4);
2930 if (unpickler_read(self, &nbytes, size) < 0)
2931 return -1;
2932
2933 size = calc_binint(nbytes, size);
2934 if (size < 0) {
2935 /* Corrupt or hostile pickle -- we never write one like this */
2936 PyErr_SetString(UnpicklingError,
2937 "LONG pickle has negative byte count");
2938 return -1;
2939 }
2940
2941 if (size == 0)
2942 value = PyLong_FromLong(0L);
2943 else {
2944 /* Read the raw little-endian bytes and convert. */
2945 if (unpickler_read(self, &pdata, size) < 0)
2946 return -1;
2947 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
2948 1 /* little endian */ , 1 /* signed */ );
2949 }
2950 if (value == NULL)
2951 return -1;
2952 PDATA_PUSH(self->stack, value, -1);
2953 return 0;
2954}
2955
2956static int
2957load_float(UnpicklerObject *self)
2958{
2959 PyObject *value;
2960 char *endptr, *s;
2961 Py_ssize_t len;
2962 double d;
2963
2964 if ((len = unpickler_readline(self, &s)) < 0)
2965 return -1;
2966 if (len < 2)
2967 return bad_readline();
2968
2969 errno = 0;
2970 d = PyOS_ascii_strtod(s, &endptr);
2971
2972 if (errno || (endptr[0] != '\n') || (endptr[1] != '\0')) {
2973 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
2974 return -1;
2975 }
2976
2977 if ((value = PyFloat_FromDouble(d)) == NULL)
2978 return -1;
2979
2980 PDATA_PUSH(self->stack, value, -1);
2981 return 0;
2982}
2983
2984static int
2985load_binfloat(UnpicklerObject *self)
2986{
2987 PyObject *value;
2988 double x;
2989 char *s;
2990
2991 if (unpickler_read(self, &s, 8) < 0)
2992 return -1;
2993
2994 x = _PyFloat_Unpack8((unsigned char *)s, 0);
2995 if (x == -1.0 && PyErr_Occurred())
2996 return -1;
2997
2998 if ((value = PyFloat_FromDouble(x)) == NULL)
2999 return -1;
3000
3001 PDATA_PUSH(self->stack, value, -1);
3002 return 0;
3003}
3004
3005static int
3006load_string(UnpicklerObject *self)
3007{
3008 PyObject *bytes;
3009 PyObject *str = NULL;
3010 Py_ssize_t len;
3011 char *s, *p;
3012
3013 if ((len = unpickler_readline(self, &s)) < 0)
3014 return -1;
3015 if (len < 3)
3016 return bad_readline();
3017 if ((s = strdup(s)) == NULL) {
3018 PyErr_NoMemory();
3019 return -1;
3020 }
3021
3022 /* Strip outermost quotes */
3023 while (s[len - 1] <= ' ')
3024 len--;
3025 if (s[0] == '"' && s[len - 1] == '"') {
3026 s[len - 1] = '\0';
3027 p = s + 1;
3028 len -= 2;
3029 }
3030 else if (s[0] == '\'' && s[len - 1] == '\'') {
3031 s[len - 1] = '\0';
3032 p = s + 1;
3033 len -= 2;
3034 }
3035 else {
3036 free(s);
3037 PyErr_SetString(PyExc_ValueError, "insecure string pickle");
3038 return -1;
3039 }
3040
3041 /* Use the PyBytes API to decode the string, since that is what is used
3042 to encode, and then coerce the result to Unicode. */
3043 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
3044 free(s);
3045 if (bytes == NULL)
3046 return -1;
3047 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
3048 Py_DECREF(bytes);
3049 if (str == NULL)
3050 return -1;
3051
3052 PDATA_PUSH(self->stack, str, -1);
3053 return 0;
3054}
3055
3056static int
3057load_binbytes(UnpicklerObject *self)
3058{
3059 PyObject *bytes;
3060 long x;
3061 char *s;
3062
3063 if (unpickler_read(self, &s, 4) < 0)
3064 return -1;
3065
3066 x = calc_binint(s, 4);
3067 if (x < 0) {
3068 PyErr_SetString(UnpicklingError,
3069 "BINBYTES pickle has negative byte count");
3070 return -1;
3071 }
3072
3073 if (unpickler_read(self, &s, x) < 0)
3074 return -1;
3075 bytes = PyBytes_FromStringAndSize(s, x);
3076 if (bytes == NULL)
3077 return -1;
3078
3079 PDATA_PUSH(self->stack, bytes, -1);
3080 return 0;
3081}
3082
3083static int
3084load_short_binbytes(UnpicklerObject *self)
3085{
3086 PyObject *bytes;
3087 unsigned char x;
3088 char *s;
3089
3090 if (unpickler_read(self, &s, 1) < 0)
3091 return -1;
3092
3093 x = (unsigned char)s[0];
3094
3095 if (unpickler_read(self, &s, x) < 0)
3096 return -1;
3097
3098 bytes = PyBytes_FromStringAndSize(s, x);
3099 if (bytes == NULL)
3100 return -1;
3101
3102 PDATA_PUSH(self->stack, bytes, -1);
3103 return 0;
3104}
3105
3106static int
3107load_binstring(UnpicklerObject *self)
3108{
3109 PyObject *str;
3110 long x;
3111 char *s;
3112
3113 if (unpickler_read(self, &s, 4) < 0)
3114 return -1;
3115
3116 x = calc_binint(s, 4);
3117 if (x < 0) {
3118 PyErr_SetString(UnpicklingError,
3119 "BINSTRING pickle has negative byte count");
3120 return -1;
3121 }
3122
3123 if (unpickler_read(self, &s, x) < 0)
3124 return -1;
3125
3126 /* Convert Python 2.x strings to unicode. */
3127 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
3128 if (str == NULL)
3129 return -1;
3130
3131 PDATA_PUSH(self->stack, str, -1);
3132 return 0;
3133}
3134
3135static int
3136load_short_binstring(UnpicklerObject *self)
3137{
3138 PyObject *str;
3139 unsigned char x;
3140 char *s;
3141
3142 if (unpickler_read(self, &s, 1) < 0)
3143 return -1;
3144
3145 x = (unsigned char)s[0];
3146
3147 if (unpickler_read(self, &s, x) < 0)
3148 return -1;
3149
3150 /* Convert Python 2.x strings to unicode. */
3151 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
3152 if (str == NULL)
3153 return -1;
3154
3155 PDATA_PUSH(self->stack, str, -1);
3156 return 0;
3157}
3158
3159static int
3160load_unicode(UnpicklerObject *self)
3161{
3162 PyObject *str;
3163 Py_ssize_t len;
3164 char *s;
3165
3166 if ((len = unpickler_readline(self, &s)) < 0)
3167 return -1;
3168 if (len < 1)
3169 return bad_readline();
3170
3171 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
3172 if (str == NULL)
3173 return -1;
3174
3175 PDATA_PUSH(self->stack, str, -1);
3176 return 0;
3177}
3178
3179static int
3180load_binunicode(UnpicklerObject *self)
3181{
3182 PyObject *str;
3183 long size;
3184 char *s;
3185
3186 if (unpickler_read(self, &s, 4) < 0)
3187 return -1;
3188
3189 size = calc_binint(s, 4);
3190 if (size < 0) {
3191 PyErr_SetString(UnpicklingError,
3192 "BINUNICODE pickle has negative byte count");
3193 return -1;
3194 }
3195
3196 if (unpickler_read(self, &s, size) < 0)
3197 return -1;
3198
3199 str = PyUnicode_DecodeUTF8(s, size, NULL);
3200 if (str == NULL)
3201 return -1;
3202
3203 PDATA_PUSH(self->stack, str, -1);
3204 return 0;
3205}
3206
3207static int
3208load_tuple(UnpicklerObject *self)
3209{
3210 PyObject *tuple;
3211 int i;
3212
3213 if ((i = marker(self)) < 0)
3214 return -1;
3215
3216 tuple = Pdata_poptuple(self->stack, i);
3217 if (tuple == NULL)
3218 return -1;
3219 PDATA_PUSH(self->stack, tuple, -1);
3220 return 0;
3221}
3222
3223static int
3224load_counted_tuple(UnpicklerObject *self, int len)
3225{
3226 PyObject *tuple;
3227
3228 tuple = PyTuple_New(len);
3229 if (tuple == NULL)
3230 return -1;
3231
3232 while (--len >= 0) {
3233 PyObject *item;
3234
3235 PDATA_POP(self->stack, item);
3236 if (item == NULL)
3237 return -1;
3238 PyTuple_SET_ITEM(tuple, len, item);
3239 }
3240 PDATA_PUSH(self->stack, tuple, -1);
3241 return 0;
3242}
3243
3244static int
3245load_empty_list(UnpicklerObject *self)
3246{
3247 PyObject *list;
3248
3249 if ((list = PyList_New(0)) == NULL)
3250 return -1;
3251 PDATA_PUSH(self->stack, list, -1);
3252 return 0;
3253}
3254
3255static int
3256load_empty_dict(UnpicklerObject *self)
3257{
3258 PyObject *dict;
3259
3260 if ((dict = PyDict_New()) == NULL)
3261 return -1;
3262 PDATA_PUSH(self->stack, dict, -1);
3263 return 0;
3264}
3265
3266static int
3267load_list(UnpicklerObject *self)
3268{
3269 PyObject *list;
3270 int i;
3271
3272 if ((i = marker(self)) < 0)
3273 return -1;
3274
3275 list = Pdata_poplist(self->stack, i);
3276 if (list == NULL)
3277 return -1;
3278 PDATA_PUSH(self->stack, list, -1);
3279 return 0;
3280}
3281
3282static int
3283load_dict(UnpicklerObject *self)
3284{
3285 PyObject *dict, *key, *value;
3286 int i, j, k;
3287
3288 if ((i = marker(self)) < 0)
3289 return -1;
3290 j = self->stack->length;
3291
3292 if ((dict = PyDict_New()) == NULL)
3293 return -1;
3294
3295 for (k = i + 1; k < j; k += 2) {
3296 key = self->stack->data[k - 1];
3297 value = self->stack->data[k];
3298 if (PyDict_SetItem(dict, key, value) < 0) {
3299 Py_DECREF(dict);
3300 return -1;
3301 }
3302 }
3303 Pdata_clear(self->stack, i);
3304 PDATA_PUSH(self->stack, dict, -1);
3305 return 0;
3306}
3307
3308static PyObject *
3309instantiate(PyObject *cls, PyObject *args)
3310{
3311 PyObject *r = NULL;
3312
3313 /* XXX: The pickle.py module does not create instances this way when the
3314 args tuple is empty. See Unpickler._instantiate(). */
3315 if ((r = PyObject_CallObject(cls, args)))
3316 return r;
3317
3318 /* XXX: Is this still nescessary? */
3319 {
3320 PyObject *tp, *v, *tb, *tmp_value;
3321
3322 PyErr_Fetch(&tp, &v, &tb);
3323 tmp_value = v;
3324 /* NULL occurs when there was a KeyboardInterrupt */
3325 if (tmp_value == NULL)
3326 tmp_value = Py_None;
3327 if ((r = PyTuple_Pack(3, tmp_value, cls, args))) {
3328 Py_XDECREF(v);
3329 v = r;
3330 }
3331 PyErr_Restore(tp, v, tb);
3332 }
3333 return NULL;
3334}
3335
3336static int
3337load_obj(UnpicklerObject *self)
3338{
3339 PyObject *cls, *args, *obj = NULL;
3340 int i;
3341
3342 if ((i = marker(self)) < 0)
3343 return -1;
3344
3345 args = Pdata_poptuple(self->stack, i + 1);
3346 if (args == NULL)
3347 return -1;
3348
3349 PDATA_POP(self->stack, cls);
3350 if (cls) {
3351 obj = instantiate(cls, args);
3352 Py_DECREF(cls);
3353 }
3354 Py_DECREF(args);
3355 if (obj == NULL)
3356 return -1;
3357
3358 PDATA_PUSH(self->stack, obj, -1);
3359 return 0;
3360}
3361
3362static int
3363load_inst(UnpicklerObject *self)
3364{
3365 PyObject *cls = NULL;
3366 PyObject *args = NULL;
3367 PyObject *obj = NULL;
3368 PyObject *module_name;
3369 PyObject *class_name;
3370 Py_ssize_t len;
3371 int i;
3372 char *s;
3373
3374 if ((i = marker(self)) < 0)
3375 return -1;
3376 if ((len = unpickler_readline(self, &s)) < 0)
3377 return -1;
3378 if (len < 2)
3379 return bad_readline();
3380
3381 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
3382 identifiers are permitted in Python 3.0, since the INST opcode is only
3383 supported by older protocols on Python 2.x. */
3384 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
3385 if (module_name == NULL)
3386 return -1;
3387
3388 if ((len = unpickler_readline(self, &s)) >= 0) {
3389 if (len < 2)
3390 return bad_readline();
3391 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
3392 if (class_name == NULL) {
3393 cls = find_class(self, module_name, class_name);
3394 Py_DECREF(class_name);
3395 }
3396 }
3397 Py_DECREF(module_name);
3398
3399 if (cls == NULL)
3400 return -1;
3401
3402 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
3403 obj = instantiate(cls, args);
3404 Py_DECREF(args);
3405 }
3406 Py_DECREF(cls);
3407
3408 if (obj == NULL)
3409 return -1;
3410
3411 PDATA_PUSH(self->stack, obj, -1);
3412 return 0;
3413}
3414
3415static int
3416load_newobj(UnpicklerObject *self)
3417{
3418 PyObject *args = NULL;
3419 PyObject *clsraw = NULL;
3420 PyTypeObject *cls; /* clsraw cast to its true type */
3421 PyObject *obj;
3422
3423 /* Stack is ... cls argtuple, and we want to call
3424 * cls.__new__(cls, *argtuple).
3425 */
3426 PDATA_POP(self->stack, args);
3427 if (args == NULL)
3428 goto error;
3429 if (!PyTuple_Check(args)) {
3430 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
3431 goto error;
3432 }
3433
3434 PDATA_POP(self->stack, clsraw);
3435 cls = (PyTypeObject *)clsraw;
3436 if (cls == NULL)
3437 goto error;
3438 if (!PyType_Check(cls)) {
3439 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
3440 "isn't a type object");
3441 goto error;
3442 }
3443 if (cls->tp_new == NULL) {
3444 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
3445 "has NULL tp_new");
3446 goto error;
3447 }
3448
3449 /* Call __new__. */
3450 obj = cls->tp_new(cls, args, NULL);
3451 if (obj == NULL)
3452 goto error;
3453
3454 Py_DECREF(args);
3455 Py_DECREF(clsraw);
3456 PDATA_PUSH(self->stack, obj, -1);
3457 return 0;
3458
3459 error:
3460 Py_XDECREF(args);
3461 Py_XDECREF(clsraw);
3462 return -1;
3463}
3464
3465static int
3466load_global(UnpicklerObject *self)
3467{
3468 PyObject *global = NULL;
3469 PyObject *module_name;
3470 PyObject *global_name;
3471 Py_ssize_t len;
3472 char *s;
3473
3474 if ((len = unpickler_readline(self, &s)) < 0)
3475 return -1;
3476 if (len < 2)
3477 return bad_readline();
3478 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
3479 if (!module_name)
3480 return -1;
3481
3482 if ((len = unpickler_readline(self, &s)) >= 0) {
3483 if (len < 2) {
3484 Py_DECREF(module_name);
3485 return bad_readline();
3486 }
3487 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
3488 if (global_name) {
3489 global = find_class(self, module_name, global_name);
3490 Py_DECREF(global_name);
3491 }
3492 }
3493 Py_DECREF(module_name);
3494
3495 if (global == NULL)
3496 return -1;
3497 PDATA_PUSH(self->stack, global, -1);
3498 return 0;
3499}
3500
3501static int
3502load_persid(UnpicklerObject *self)
3503{
3504 PyObject *pid;
3505 Py_ssize_t len;
3506 char *s;
3507
3508 if (self->pers_func) {
3509 if ((len = unpickler_readline(self, &s)) < 0)
3510 return -1;
3511 if (len < 2)
3512 return bad_readline();
3513
3514 pid = PyBytes_FromStringAndSize(s, len - 1);
3515 if (pid == NULL)
3516 return -1;
3517
3518 /* Ugh... this does not leak since unpickler_call() steals the
3519 reference to pid first. */
3520 pid = unpickler_call(self, self->pers_func, pid);
3521 if (pid == NULL)
3522 return -1;
3523
3524 PDATA_PUSH(self->stack, pid, -1);
3525 return 0;
3526 }
3527 else {
3528 PyErr_SetString(UnpicklingError,
3529 "A load persistent id instruction was encountered,\n"
3530 "but no persistent_load function was specified.");
3531 return -1;
3532 }
3533}
3534
3535static int
3536load_binpersid(UnpicklerObject *self)
3537{
3538 PyObject *pid;
3539
3540 if (self->pers_func) {
3541 PDATA_POP(self->stack, pid);
3542 if (pid == NULL)
3543 return -1;
3544
3545 /* Ugh... this does not leak since unpickler_call() steals the
3546 reference to pid first. */
3547 pid = unpickler_call(self, self->pers_func, pid);
3548 if (pid == NULL)
3549 return -1;
3550
3551 PDATA_PUSH(self->stack, pid, -1);
3552 return 0;
3553 }
3554 else {
3555 PyErr_SetString(UnpicklingError,
3556 "A load persistent id instruction was encountered,\n"
3557 "but no persistent_load function was specified.");
3558 return -1;
3559 }
3560}
3561
3562static int
3563load_pop(UnpicklerObject *self)
3564{
3565 int len;
3566
3567 if ((len = self->stack->length) <= 0)
3568 return stack_underflow();
3569
3570 /* Note that we split the (pickle.py) stack into two stacks,
3571 * an object stack and a mark stack. We have to be clever and
3572 * pop the right one. We do this by looking at the top of the
3573 * mark stack.
3574 */
3575
3576 if ((self->num_marks > 0) && (self->marks[self->num_marks - 1] == len))
3577 self->num_marks--;
3578 else {
3579 len--;
3580 Py_DECREF(self->stack->data[len]);
3581 self->stack->length = len;
3582 }
3583
3584 return 0;
3585}
3586
3587static int
3588load_pop_mark(UnpicklerObject *self)
3589{
3590 int i;
3591
3592 if ((i = marker(self)) < 0)
3593 return -1;
3594
3595 Pdata_clear(self->stack, i);
3596
3597 return 0;
3598}
3599
3600static int
3601load_dup(UnpicklerObject *self)
3602{
3603 PyObject *last;
3604 int len;
3605
3606 if ((len = self->stack->length) <= 0)
3607 return stack_underflow();
3608 last = self->stack->data[len - 1];
3609 PDATA_APPEND(self->stack, last, -1);
3610 return 0;
3611}
3612
3613static int
3614load_get(UnpicklerObject *self)
3615{
3616 PyObject *key, *value;
3617 Py_ssize_t len;
3618 char *s;
3619
3620 if ((len = unpickler_readline(self, &s)) < 0)
3621 return -1;
3622 if (len < 2)
3623 return bad_readline();
3624
3625 key = PyLong_FromString(s, NULL, 10);
3626 if (key == NULL)
3627 return -1;
3628
3629 value = PyDict_GetItemWithError(self->memo, key);
3630 if (value == NULL) {
3631 if (!PyErr_Occurred())
3632 PyErr_SetObject(PyExc_KeyError, key);
3633 Py_DECREF(key);
3634 return -1;
3635 }
3636 Py_DECREF(key);
3637
3638 PDATA_APPEND(self->stack, value, -1);
3639 return 0;
3640}
3641
3642static int
3643load_binget(UnpicklerObject *self)
3644{
3645 PyObject *key, *value;
3646 char *s;
3647
3648 if (unpickler_read(self, &s, 1) < 0)
3649 return -1;
3650
3651 /* Here, the unsigned cast is necessary to avoid negative values. */
3652 key = PyLong_FromLong((long)(unsigned char)s[0]);
3653 if (key == NULL)
3654 return -1;
3655
3656 value = PyDict_GetItemWithError(self->memo, key);
3657 if (value == NULL) {
3658 if (!PyErr_Occurred())
3659 PyErr_SetObject(PyExc_KeyError, key);
3660 Py_DECREF(key);
3661 return -1;
3662 }
3663 Py_DECREF(key);
3664
3665 PDATA_APPEND(self->stack, value, -1);
3666 return 0;
3667}
3668
3669static int
3670load_long_binget(UnpicklerObject *self)
3671{
3672 PyObject *key, *value;
3673 char *s;
3674 long k;
3675
3676 if (unpickler_read(self, &s, 4) < 0)
3677 return -1;
3678
3679 k = (long)(unsigned char)s[0];
3680 k |= (long)(unsigned char)s[1] << 8;
3681 k |= (long)(unsigned char)s[2] << 16;
3682 k |= (long)(unsigned char)s[3] << 24;
3683
3684 key = PyLong_FromLong(k);
3685 if (key == NULL)
3686 return -1;
3687
3688 value = PyDict_GetItemWithError(self->memo, key);
3689 if (value == NULL) {
3690 if (!PyErr_Occurred())
3691 PyErr_SetObject(PyExc_KeyError, key);
3692 Py_DECREF(key);
3693 return -1;
3694 }
3695 Py_DECREF(key);
3696
3697 PDATA_APPEND(self->stack, value, -1);
3698 return 0;
3699}
3700
3701/* Push an object from the extension registry (EXT[124]). nbytes is
3702 * the number of bytes following the opcode, holding the index (code) value.
3703 */
3704static int
3705load_extension(UnpicklerObject *self, int nbytes)
3706{
3707 char *codebytes; /* the nbytes bytes after the opcode */
3708 long code; /* calc_binint returns long */
3709 PyObject *py_code; /* code as a Python int */
3710 PyObject *obj; /* the object to push */
3711 PyObject *pair; /* (module_name, class_name) */
3712 PyObject *module_name, *class_name;
3713
3714 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
3715 if (unpickler_read(self, &codebytes, nbytes) < 0)
3716 return -1;
3717 code = calc_binint(codebytes, nbytes);
3718 if (code <= 0) { /* note that 0 is forbidden */
3719 /* Corrupt or hostile pickle. */
3720 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
3721 return -1;
3722 }
3723
3724 /* Look for the code in the cache. */
3725 py_code = PyLong_FromLong(code);
3726 if (py_code == NULL)
3727 return -1;
3728 obj = PyDict_GetItem(extension_cache, py_code);
3729 if (obj != NULL) {
3730 /* Bingo. */
3731 Py_DECREF(py_code);
3732 PDATA_APPEND(self->stack, obj, -1);
3733 return 0;
3734 }
3735
3736 /* Look up the (module_name, class_name) pair. */
3737 pair = PyDict_GetItem(inverted_registry, py_code);
3738 if (pair == NULL) {
3739 Py_DECREF(py_code);
3740 PyErr_Format(PyExc_ValueError, "unregistered extension "
3741 "code %ld", code);
3742 return -1;
3743 }
3744 /* Since the extension registry is manipulable via Python code,
3745 * confirm that pair is really a 2-tuple of strings.
3746 */
3747 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
3748 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
3749 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
3750 Py_DECREF(py_code);
3751 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
3752 "isn't a 2-tuple of strings", code);
3753 return -1;
3754 }
3755 /* Load the object. */
3756 obj = find_class(self, module_name, class_name);
3757 if (obj == NULL) {
3758 Py_DECREF(py_code);
3759 return -1;
3760 }
3761 /* Cache code -> obj. */
3762 code = PyDict_SetItem(extension_cache, py_code, obj);
3763 Py_DECREF(py_code);
3764 if (code < 0) {
3765 Py_DECREF(obj);
3766 return -1;
3767 }
3768 PDATA_PUSH(self->stack, obj, -1);
3769 return 0;
3770}
3771
3772static int
3773load_put(UnpicklerObject *self)
3774{
3775 PyObject *key, *value;
3776 Py_ssize_t len;
3777 char *s;
3778 int x;
3779
3780 if ((len = unpickler_readline(self, &s)) < 0)
3781 return -1;
3782 if (len < 2)
3783 return bad_readline();
3784 if ((x = self->stack->length) <= 0)
3785 return stack_underflow();
3786
3787 key = PyLong_FromString(s, NULL, 10);
3788 if (key == NULL)
3789 return -1;
3790 value = self->stack->data[x - 1];
3791
3792 x = PyDict_SetItem(self->memo, key, value);
3793 Py_DECREF(key);
3794 return x;
3795}
3796
3797static int
3798load_binput(UnpicklerObject *self)
3799{
3800 PyObject *key, *value;
3801 char *s;
3802 int x;
3803
3804 if (unpickler_read(self, &s, 1) < 0)
3805 return -1;
3806 if ((x = self->stack->length) <= 0)
3807 return stack_underflow();
3808
3809 key = PyLong_FromLong((long)(unsigned char)s[0]);
3810 if (key == NULL)
3811 return -1;
3812 value = self->stack->data[x - 1];
3813
3814 x = PyDict_SetItem(self->memo, key, value);
3815 Py_DECREF(key);
3816 return x;
3817}
3818
3819static int
3820load_long_binput(UnpicklerObject *self)
3821{
3822 PyObject *key, *value;
3823 long k;
3824 char *s;
3825 int x;
3826
3827 if (unpickler_read(self, &s, 4) < 0)
3828 return -1;
3829 if ((x = self->stack->length) <= 0)
3830 return stack_underflow();
3831
3832 k = (long)(unsigned char)s[0];
3833 k |= (long)(unsigned char)s[1] << 8;
3834 k |= (long)(unsigned char)s[2] << 16;
3835 k |= (long)(unsigned char)s[3] << 24;
3836
3837 key = PyLong_FromLong(k);
3838 if (key == NULL)
3839 return -1;
3840 value = self->stack->data[x - 1];
3841
3842 x = PyDict_SetItem(self->memo, key, value);
3843 Py_DECREF(key);
3844 return x;
3845}
3846
3847static int
3848do_append(UnpicklerObject *self, int x)
3849{
3850 PyObject *value;
3851 PyObject *list;
3852 int len, i;
3853
3854 len = self->stack->length;
3855 if (x > len || x <= 0)
3856 return stack_underflow();
3857 if (len == x) /* nothing to do */
3858 return 0;
3859
3860 list = self->stack->data[x - 1];
3861
3862 if (PyList_Check(list)) {
3863 PyObject *slice;
3864 Py_ssize_t list_len;
3865
3866 slice = Pdata_poplist(self->stack, x);
3867 if (!slice)
3868 return -1;
3869 list_len = PyList_GET_SIZE(list);
3870 i = PyList_SetSlice(list, list_len, list_len, slice);
3871 Py_DECREF(slice);
3872 return i;
3873 }
3874 else {
3875 PyObject *append_func;
3876
3877 append_func = PyObject_GetAttrString(list, "append");
3878 if (append_func == NULL)
3879 return -1;
3880 for (i = x; i < len; i++) {
3881 PyObject *result;
3882
3883 value = self->stack->data[i];
3884 result = unpickler_call(self, append_func, value);
3885 if (result == NULL) {
3886 Pdata_clear(self->stack, i + 1);
3887 self->stack->length = x;
3888 return -1;
3889 }
3890 Py_DECREF(result);
3891 }
3892 self->stack->length = x;
3893 }
3894
3895 return 0;
3896}
3897
3898static int
3899load_append(UnpicklerObject *self)
3900{
3901 return do_append(self, self->stack->length - 1);
3902}
3903
3904static int
3905load_appends(UnpicklerObject *self)
3906{
3907 return do_append(self, marker(self));
3908}
3909
3910static int
3911do_setitems(UnpicklerObject *self, int x)
3912{
3913 PyObject *value, *key;
3914 PyObject *dict;
3915 int len, i;
3916 int status = 0;
3917
3918 len = self->stack->length;
3919 if (x > len || x <= 0)
3920 return stack_underflow();
3921 if (len == x) /* nothing to do */
3922 return 0;
3923 if ((len - x) % 2 != 0) {
3924 /* Currupt or hostile pickle -- we never write one like this. */
3925 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
3926 return -1;
3927 }
3928
3929 /* Here, dict does not actually need to be a PyDict; it could be anything
3930 that supports the __setitem__ attribute. */
3931 dict = self->stack->data[x - 1];
3932
3933 for (i = x + 1; i < len; i += 2) {
3934 key = self->stack->data[i - 1];
3935 value = self->stack->data[i];
3936 if (PyObject_SetItem(dict, key, value) < 0) {
3937 status = -1;
3938 break;
3939 }
3940 }
3941
3942 Pdata_clear(self->stack, x);
3943 return status;
3944}
3945
3946static int
3947load_setitem(UnpicklerObject *self)
3948{
3949 return do_setitems(self, self->stack->length - 2);
3950}
3951
3952static int
3953load_setitems(UnpicklerObject *self)
3954{
3955 return do_setitems(self, marker(self));
3956}
3957
3958static int
3959load_build(UnpicklerObject *self)
3960{
3961 PyObject *state, *inst, *slotstate;
3962 PyObject *setstate;
3963 int status = 0;
3964
3965 /* Stack is ... instance, state. We want to leave instance at
3966 * the stack top, possibly mutated via instance.__setstate__(state).
3967 */
3968 if (self->stack->length < 2)
3969 return stack_underflow();
3970
3971 PDATA_POP(self->stack, state);
3972 if (state == NULL)
3973 return -1;
3974
3975 inst = self->stack->data[self->stack->length - 1];
3976
3977 setstate = PyObject_GetAttrString(inst, "__setstate__");
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00003978 if (setstate == NULL) {
3979 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3980 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00003981 else {
3982 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00003983 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00003984 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003985 }
3986 else {
3987 PyObject *result;
3988
3989 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitroud79dc622008-09-05 00:03:33 +00003990 /* Ugh... this does not leak since unpickler_call() steals the
3991 reference to state first. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003992 result = unpickler_call(self, setstate, state);
3993 Py_DECREF(setstate);
3994 if (result == NULL)
3995 return -1;
3996 Py_DECREF(result);
3997 return 0;
3998 }
3999
4000 /* A default __setstate__. First see whether state embeds a
4001 * slot state dict too (a proto 2 addition).
4002 */
4003 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
4004 PyObject *tmp = state;
4005
4006 state = PyTuple_GET_ITEM(tmp, 0);
4007 slotstate = PyTuple_GET_ITEM(tmp, 1);
4008 Py_INCREF(state);
4009 Py_INCREF(slotstate);
4010 Py_DECREF(tmp);
4011 }
4012 else
4013 slotstate = NULL;
4014
4015 /* Set inst.__dict__ from the state dict (if any). */
4016 if (state != Py_None) {
4017 PyObject *dict;
4018
4019 if (!PyDict_Check(state)) {
4020 PyErr_SetString(UnpicklingError, "state is not a dictionary");
4021 goto error;
4022 }
4023 dict = PyObject_GetAttrString(inst, "__dict__");
4024 if (dict == NULL)
4025 goto error;
4026
4027 PyDict_Update(dict, state);
4028 Py_DECREF(dict);
4029 }
4030
4031 /* Also set instance attributes from the slotstate dict (if any). */
4032 if (slotstate != NULL) {
4033 PyObject *d_key, *d_value;
4034 Py_ssize_t i;
4035
4036 if (!PyDict_Check(slotstate)) {
4037 PyErr_SetString(UnpicklingError,
4038 "slot state is not a dictionary");
4039 goto error;
4040 }
4041 i = 0;
4042 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
4043 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
4044 goto error;
4045 }
4046 }
4047
4048 if (0) {
4049 error:
4050 status = -1;
4051 }
4052
4053 Py_DECREF(state);
4054 Py_XDECREF(slotstate);
4055 return status;
4056}
4057
4058static int
4059load_mark(UnpicklerObject *self)
4060{
4061
4062 /* Note that we split the (pickle.py) stack into two stacks, an
4063 * object stack and a mark stack. Here we push a mark onto the
4064 * mark stack.
4065 */
4066
4067 if ((self->num_marks + 1) >= self->marks_size) {
4068 size_t alloc;
4069 int *marks;
4070
4071 /* Use the size_t type to check for overflow. */
4072 alloc = ((size_t)self->num_marks << 1) + 20;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00004073 if (alloc > PY_SSIZE_T_MAX ||
4074 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004075 PyErr_NoMemory();
4076 return -1;
4077 }
4078
4079 if (self->marks == NULL)
4080 marks = (int *)PyMem_Malloc(alloc * sizeof(int));
4081 else
4082 marks = (int *)PyMem_Realloc(self->marks, alloc * sizeof(int));
4083 if (marks == NULL) {
4084 PyErr_NoMemory();
4085 return -1;
4086 }
4087 self->marks = marks;
4088 self->marks_size = (Py_ssize_t)alloc;
4089 }
4090
4091 self->marks[self->num_marks++] = self->stack->length;
4092
4093 return 0;
4094}
4095
4096static int
4097load_reduce(UnpicklerObject *self)
4098{
4099 PyObject *callable = NULL;
4100 PyObject *argtup = NULL;
4101 PyObject *obj = NULL;
4102
4103 PDATA_POP(self->stack, argtup);
4104 if (argtup == NULL)
4105 return -1;
4106 PDATA_POP(self->stack, callable);
4107 if (callable) {
4108 obj = instantiate(callable, argtup);
4109 Py_DECREF(callable);
4110 }
4111 Py_DECREF(argtup);
4112
4113 if (obj == NULL)
4114 return -1;
4115
4116 PDATA_PUSH(self->stack, obj, -1);
4117 return 0;
4118}
4119
4120/* Just raises an error if we don't know the protocol specified. PROTO
4121 * is the first opcode for protocols >= 2.
4122 */
4123static int
4124load_proto(UnpicklerObject *self)
4125{
4126 char *s;
4127 int i;
4128
4129 if (unpickler_read(self, &s, 1) < 0)
4130 return -1;
4131
4132 i = (unsigned char)s[0];
4133 if (i <= HIGHEST_PROTOCOL)
4134 return 0;
4135
4136 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
4137 return -1;
4138}
4139
4140static PyObject *
4141load(UnpicklerObject *self)
4142{
4143 PyObject *err;
4144 PyObject *value = NULL;
4145 char *s;
4146
4147 self->num_marks = 0;
4148 if (self->stack->length)
4149 Pdata_clear(self->stack, 0);
4150
4151 /* Convenient macros for the dispatch while-switch loop just below. */
4152#define OP(opcode, load_func) \
4153 case opcode: if (load_func(self) < 0) break; continue;
4154
4155#define OP_ARG(opcode, load_func, arg) \
4156 case opcode: if (load_func(self, (arg)) < 0) break; continue;
4157
4158 while (1) {
4159 if (unpickler_read(self, &s, 1) < 0)
4160 break;
4161
4162 switch ((enum opcode)s[0]) {
4163 OP(NONE, load_none)
4164 OP(BININT, load_binint)
4165 OP(BININT1, load_binint1)
4166 OP(BININT2, load_binint2)
4167 OP(INT, load_int)
4168 OP(LONG, load_long)
4169 OP_ARG(LONG1, load_counted_long, 1)
4170 OP_ARG(LONG4, load_counted_long, 4)
4171 OP(FLOAT, load_float)
4172 OP(BINFLOAT, load_binfloat)
4173 OP(BINBYTES, load_binbytes)
4174 OP(SHORT_BINBYTES, load_short_binbytes)
4175 OP(BINSTRING, load_binstring)
4176 OP(SHORT_BINSTRING, load_short_binstring)
4177 OP(STRING, load_string)
4178 OP(UNICODE, load_unicode)
4179 OP(BINUNICODE, load_binunicode)
4180 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
4181 OP_ARG(TUPLE1, load_counted_tuple, 1)
4182 OP_ARG(TUPLE2, load_counted_tuple, 2)
4183 OP_ARG(TUPLE3, load_counted_tuple, 3)
4184 OP(TUPLE, load_tuple)
4185 OP(EMPTY_LIST, load_empty_list)
4186 OP(LIST, load_list)
4187 OP(EMPTY_DICT, load_empty_dict)
4188 OP(DICT, load_dict)
4189 OP(OBJ, load_obj)
4190 OP(INST, load_inst)
4191 OP(NEWOBJ, load_newobj)
4192 OP(GLOBAL, load_global)
4193 OP(APPEND, load_append)
4194 OP(APPENDS, load_appends)
4195 OP(BUILD, load_build)
4196 OP(DUP, load_dup)
4197 OP(BINGET, load_binget)
4198 OP(LONG_BINGET, load_long_binget)
4199 OP(GET, load_get)
4200 OP(MARK, load_mark)
4201 OP(BINPUT, load_binput)
4202 OP(LONG_BINPUT, load_long_binput)
4203 OP(PUT, load_put)
4204 OP(POP, load_pop)
4205 OP(POP_MARK, load_pop_mark)
4206 OP(SETITEM, load_setitem)
4207 OP(SETITEMS, load_setitems)
4208 OP(PERSID, load_persid)
4209 OP(BINPERSID, load_binpersid)
4210 OP(REDUCE, load_reduce)
4211 OP(PROTO, load_proto)
4212 OP_ARG(EXT1, load_extension, 1)
4213 OP_ARG(EXT2, load_extension, 2)
4214 OP_ARG(EXT4, load_extension, 4)
4215 OP_ARG(NEWTRUE, load_bool, Py_True)
4216 OP_ARG(NEWFALSE, load_bool, Py_False)
4217
4218 case STOP:
4219 break;
4220
4221 case '\0':
4222 PyErr_SetNone(PyExc_EOFError);
4223 return NULL;
4224
4225 default:
4226 PyErr_Format(UnpicklingError,
4227 "invalid load key, '%c'.", s[0]);
4228 return NULL;
4229 }
4230
4231 break; /* and we are done! */
4232 }
4233
4234 /* XXX: It is not clear what this is actually for. */
4235 if ((err = PyErr_Occurred())) {
4236 if (err == PyExc_EOFError) {
4237 PyErr_SetNone(PyExc_EOFError);
4238 }
4239 return NULL;
4240 }
4241
4242 PDATA_POP(self->stack, value);
4243 return value;
4244}
4245
4246PyDoc_STRVAR(Unpickler_load_doc,
4247"load() -> object. Load a pickle."
4248"\n"
4249"Read a pickled object representation from the open file object given in\n"
4250"the constructor, and return the reconstituted object hierarchy specified\n"
4251"therein.\n");
4252
4253static PyObject *
4254Unpickler_load(UnpicklerObject *self)
4255{
4256 /* Check whether the Unpickler was initialized correctly. This prevents
4257 segfaulting if a subclass overridden __init__ with a function that does
4258 not call Unpickler.__init__(). Here, we simply ensure that self->read
4259 is not NULL. */
4260 if (self->read == NULL) {
4261 PyErr_Format(UnpicklingError,
4262 "Unpickler.__init__() was not called by %s.__init__()",
4263 Py_TYPE(self)->tp_name);
4264 return NULL;
4265 }
4266
4267 return load(self);
4268}
4269
4270/* The name of find_class() is misleading. In newer pickle protocols, this
4271 function is used for loading any global (i.e., functions), not just
4272 classes. The name is kept only for backward compatibility. */
4273
4274PyDoc_STRVAR(Unpickler_find_class_doc,
4275"find_class(module_name, global_name) -> object.\n"
4276"\n"
4277"Return an object from a specified module, importing the module if\n"
4278"necessary. Subclasses may override this method (e.g. to restrict\n"
4279"unpickling of arbitrary classes and functions).\n"
4280"\n"
4281"This method is called whenever a class or a function object is\n"
4282"needed. Both arguments passed are str objects.\n");
4283
4284static PyObject *
4285Unpickler_find_class(UnpicklerObject *self, PyObject *args)
4286{
4287 PyObject *global;
4288 PyObject *modules_dict;
4289 PyObject *module;
4290 PyObject *module_name, *global_name;
4291
4292 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
4293 &module_name, &global_name))
4294 return NULL;
4295
4296 modules_dict = PySys_GetObject("modules");
4297 if (modules_dict == NULL)
4298 return NULL;
4299
4300 module = PyDict_GetItem(modules_dict, module_name);
4301 if (module == NULL) {
4302 module = PyImport_Import(module_name);
4303 if (module == NULL)
4304 return NULL;
4305 global = PyObject_GetAttr(module, global_name);
4306 Py_DECREF(module);
4307 }
4308 else {
4309 global = PyObject_GetAttr(module, global_name);
4310 }
4311 return global;
4312}
4313
4314static struct PyMethodDef Unpickler_methods[] = {
4315 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
4316 Unpickler_load_doc},
4317 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
4318 Unpickler_find_class_doc},
4319 {NULL, NULL} /* sentinel */
4320};
4321
4322static void
4323Unpickler_dealloc(UnpicklerObject *self)
4324{
4325 PyObject_GC_UnTrack((PyObject *)self);
4326 Py_XDECREF(self->readline);
4327 Py_XDECREF(self->read);
4328 Py_XDECREF(self->memo);
4329 Py_XDECREF(self->stack);
4330 Py_XDECREF(self->pers_func);
4331 Py_XDECREF(self->arg);
4332 Py_XDECREF(self->last_string);
4333
4334 PyMem_Free(self->marks);
4335 free(self->encoding);
4336 free(self->errors);
4337
4338 Py_TYPE(self)->tp_free((PyObject *)self);
4339}
4340
4341static int
4342Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
4343{
4344 Py_VISIT(self->readline);
4345 Py_VISIT(self->read);
4346 Py_VISIT(self->memo);
4347 Py_VISIT(self->stack);
4348 Py_VISIT(self->pers_func);
4349 Py_VISIT(self->arg);
4350 Py_VISIT(self->last_string);
4351 return 0;
4352}
4353
4354static int
4355Unpickler_clear(UnpicklerObject *self)
4356{
4357 Py_CLEAR(self->readline);
4358 Py_CLEAR(self->read);
4359 Py_CLEAR(self->memo);
4360 Py_CLEAR(self->stack);
4361 Py_CLEAR(self->pers_func);
4362 Py_CLEAR(self->arg);
4363 Py_CLEAR(self->last_string);
4364
4365 PyMem_Free(self->marks);
4366 self->marks = NULL;
4367 free(self->encoding);
4368 self->encoding = NULL;
4369 free(self->errors);
4370 self->errors = NULL;
4371
4372 return 0;
4373}
4374
4375PyDoc_STRVAR(Unpickler_doc,
4376"Unpickler(file, *, encoding='ASCII', errors='strict')"
4377"\n"
4378"This takes a binary file for reading a pickle data stream.\n"
4379"\n"
4380"The protocol version of the pickle is detected automatically, so no\n"
4381"proto argument is needed.\n"
4382"\n"
4383"The file-like object must have two methods, a read() method\n"
4384"that takes an integer argument, and a readline() method that\n"
4385"requires no arguments. Both methods should return bytes.\n"
4386"Thus file-like object can be a binary file object opened for\n"
4387"reading, a BytesIO object, or any other custom object that\n"
4388"meets this interface.\n"
4389"\n"
4390"Optional keyword arguments are encoding and errors, which are\n"
4391"used to decode 8-bit string instances pickled by Python 2.x.\n"
4392"These default to 'ASCII' and 'strict', respectively.\n");
4393
4394static int
4395Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
4396{
4397 static char *kwlist[] = {"file", "encoding", "errors", 0};
4398 PyObject *file;
4399 char *encoding = NULL;
4400 char *errors = NULL;
4401
4402 /* XXX: That is an horrible error message. But, I don't know how to do
4403 better... */
4404 if (Py_SIZE(args) != 1) {
4405 PyErr_Format(PyExc_TypeError,
4406 "%s takes exactly one positional argument (%zd given)",
4407 Py_TYPE(self)->tp_name, Py_SIZE(args));
4408 return -1;
4409 }
4410
4411 /* Arguments parsing needs to be done in the __init__() method to allow
4412 subclasses to define their own __init__() method, which may (or may
4413 not) support Unpickler arguments. However, this means we need to be
4414 extra careful in the other Unpickler methods, since a subclass could
4415 forget to call Unpickler.__init__() thus breaking our internal
4416 invariants. */
4417 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|ss:Unpickler", kwlist,
4418 &file, &encoding, &errors))
4419 return -1;
4420
4421 /* In case of multiple __init__() calls, clear previous content. */
4422 if (self->read != NULL)
4423 (void)Unpickler_clear(self);
4424
4425 self->read = PyObject_GetAttrString(file, "read");
4426 self->readline = PyObject_GetAttrString(file, "readline");
4427 if (self->readline == NULL || self->read == NULL)
4428 return -1;
4429
4430 if (encoding == NULL)
4431 encoding = "ASCII";
4432 if (errors == NULL)
4433 errors = "strict";
4434
4435 self->encoding = strdup(encoding);
4436 self->errors = strdup(errors);
4437 if (self->encoding == NULL || self->errors == NULL) {
4438 PyErr_NoMemory();
4439 return -1;
4440 }
4441
4442 if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) {
4443 self->pers_func = PyObject_GetAttrString((PyObject *)self,
4444 "persistent_load");
4445 if (self->pers_func == NULL)
4446 return -1;
4447 }
4448 else {
4449 self->pers_func = NULL;
4450 }
4451
4452 self->stack = (Pdata *)Pdata_New();
4453 if (self->stack == NULL)
4454 return -1;
4455
4456 self->memo = PyDict_New();
4457 if (self->memo == NULL)
4458 return -1;
4459
4460 return 0;
4461}
4462
4463static PyObject *
4464Unpickler_get_memo(UnpicklerObject *self)
4465{
4466 if (self->memo == NULL)
4467 PyErr_SetString(PyExc_AttributeError, "memo");
4468 else
4469 Py_INCREF(self->memo);
4470 return self->memo;
4471}
4472
4473static int
4474Unpickler_set_memo(UnpicklerObject *self, PyObject *value)
4475{
4476 PyObject *tmp;
4477
4478 if (value == NULL) {
4479 PyErr_SetString(PyExc_TypeError,
4480 "attribute deletion is not supported");
4481 return -1;
4482 }
4483 if (!PyDict_Check(value)) {
4484 PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
4485 return -1;
4486 }
4487
4488 tmp = self->memo;
4489 Py_INCREF(value);
4490 self->memo = value;
4491 Py_XDECREF(tmp);
4492
4493 return 0;
4494}
4495
4496static PyObject *
4497Unpickler_get_persload(UnpicklerObject *self)
4498{
4499 if (self->pers_func == NULL)
4500 PyErr_SetString(PyExc_AttributeError, "persistent_load");
4501 else
4502 Py_INCREF(self->pers_func);
4503 return self->pers_func;
4504}
4505
4506static int
4507Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
4508{
4509 PyObject *tmp;
4510
4511 if (value == NULL) {
4512 PyErr_SetString(PyExc_TypeError,
4513 "attribute deletion is not supported");
4514 return -1;
4515 }
4516 if (!PyCallable_Check(value)) {
4517 PyErr_SetString(PyExc_TypeError,
4518 "persistent_load must be a callable taking "
4519 "one argument");
4520 return -1;
4521 }
4522
4523 tmp = self->pers_func;
4524 Py_INCREF(value);
4525 self->pers_func = value;
4526 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
4527
4528 return 0;
4529}
4530
4531static PyGetSetDef Unpickler_getsets[] = {
4532 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
4533 {"persistent_load", (getter)Unpickler_get_persload,
4534 (setter)Unpickler_set_persload},
4535 {NULL}
4536};
4537
4538static PyTypeObject Unpickler_Type = {
4539 PyVarObject_HEAD_INIT(NULL, 0)
4540 "_pickle.Unpickler", /*tp_name*/
4541 sizeof(UnpicklerObject), /*tp_basicsize*/
4542 0, /*tp_itemsize*/
4543 (destructor)Unpickler_dealloc, /*tp_dealloc*/
4544 0, /*tp_print*/
4545 0, /*tp_getattr*/
4546 0, /*tp_setattr*/
4547 0, /*tp_compare*/
4548 0, /*tp_repr*/
4549 0, /*tp_as_number*/
4550 0, /*tp_as_sequence*/
4551 0, /*tp_as_mapping*/
4552 0, /*tp_hash*/
4553 0, /*tp_call*/
4554 0, /*tp_str*/
4555 0, /*tp_getattro*/
4556 0, /*tp_setattro*/
4557 0, /*tp_as_buffer*/
4558 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4559 Unpickler_doc, /*tp_doc*/
4560 (traverseproc)Unpickler_traverse, /*tp_traverse*/
4561 (inquiry)Unpickler_clear, /*tp_clear*/
4562 0, /*tp_richcompare*/
4563 0, /*tp_weaklistoffset*/
4564 0, /*tp_iter*/
4565 0, /*tp_iternext*/
4566 Unpickler_methods, /*tp_methods*/
4567 0, /*tp_members*/
4568 Unpickler_getsets, /*tp_getset*/
4569 0, /*tp_base*/
4570 0, /*tp_dict*/
4571 0, /*tp_descr_get*/
4572 0, /*tp_descr_set*/
4573 0, /*tp_dictoffset*/
4574 (initproc)Unpickler_init, /*tp_init*/
4575 PyType_GenericAlloc, /*tp_alloc*/
4576 PyType_GenericNew, /*tp_new*/
4577 PyObject_GC_Del, /*tp_free*/
4578 0, /*tp_is_gc*/
4579};
4580
4581static int
4582init_stuff(void)
4583{
4584 PyObject *copyreg;
4585
4586 copyreg = PyImport_ImportModule("copyreg");
4587 if (!copyreg)
4588 return -1;
4589
4590 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
4591 if (!dispatch_table)
4592 goto error;
4593
4594 extension_registry = \
4595 PyObject_GetAttrString(copyreg, "_extension_registry");
4596 if (!extension_registry)
4597 goto error;
4598
4599 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
4600 if (!inverted_registry)
4601 goto error;
4602
4603 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
4604 if (!extension_cache)
4605 goto error;
4606
4607 Py_DECREF(copyreg);
4608
4609 empty_tuple = PyTuple_New(0);
4610 if (empty_tuple == NULL)
4611 return -1;
4612
4613 two_tuple = PyTuple_New(2);
4614 if (two_tuple == NULL)
4615 return -1;
4616 /* We use this temp container with no regard to refcounts, or to
4617 * keeping containees alive. Exempt from GC, because we don't
4618 * want anything looking at two_tuple() by magic.
4619 */
4620 PyObject_GC_UnTrack(two_tuple);
4621
4622 return 0;
4623
4624 error:
4625 Py_DECREF(copyreg);
4626 return -1;
4627}
4628
4629static struct PyModuleDef _picklemodule = {
4630 PyModuleDef_HEAD_INIT,
4631 "_pickle",
4632 pickle_module_doc,
4633 -1,
4634 NULL,
4635 NULL,
4636 NULL,
4637 NULL,
4638 NULL
4639};
4640
4641PyMODINIT_FUNC
4642PyInit__pickle(void)
4643{
4644 PyObject *m;
4645
4646 if (PyType_Ready(&Unpickler_Type) < 0)
4647 return NULL;
4648 if (PyType_Ready(&Pickler_Type) < 0)
4649 return NULL;
4650 if (PyType_Ready(&Pdata_Type) < 0)
4651 return NULL;
4652
4653 /* Create the module and add the functions. */
4654 m = PyModule_Create(&_picklemodule);
4655 if (m == NULL)
4656 return NULL;
4657
4658 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
4659 return NULL;
4660 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
4661 return NULL;
4662
4663 /* Initialize the exceptions. */
4664 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
4665 if (PickleError == NULL)
4666 return NULL;
4667 PicklingError = \
4668 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
4669 if (PicklingError == NULL)
4670 return NULL;
4671 UnpicklingError = \
4672 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
4673 if (UnpicklingError == NULL)
4674 return NULL;
4675
4676 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
4677 return NULL;
4678 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
4679 return NULL;
4680 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
4681 return NULL;
4682
4683 if (init_stuff() < 0)
4684 return NULL;
4685
4686 return m;
4687}