blob: 435969d6762e92bb777f9b17bd8dd1868221cf23 [file] [log] [blame]
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001#include "Python.h"
2#include "structmember.h"
3
4PyDoc_STRVAR(pickle_module_doc,
5"Optimized C implementation for the Python pickle module.");
6
7/* Bump this when new opcodes are added to the pickle protocol. */
8enum {
9 HIGHEST_PROTOCOL = 3,
10 DEFAULT_PROTOCOL = 3
11};
12
13
14/* Pickle opcodes. These must be kept updated with pickle.py.
15 Extensive docs are in pickletools.py. */
16enum opcode {
17 MARK = '(',
18 STOP = '.',
19 POP = '0',
20 POP_MARK = '1',
21 DUP = '2',
22 FLOAT = 'F',
23 INT = 'I',
24 BININT = 'J',
25 BININT1 = 'K',
26 LONG = 'L',
27 BININT2 = 'M',
28 NONE = 'N',
29 PERSID = 'P',
30 BINPERSID = 'Q',
31 REDUCE = 'R',
32 STRING = 'S',
33 BINSTRING = 'T',
34 SHORT_BINSTRING = 'U',
35 UNICODE = 'V',
36 BINUNICODE = 'X',
37 APPEND = 'a',
38 BUILD = 'b',
39 GLOBAL = 'c',
40 DICT = 'd',
41 EMPTY_DICT = '}',
42 APPENDS = 'e',
43 GET = 'g',
44 BINGET = 'h',
45 INST = 'i',
46 LONG_BINGET = 'j',
47 LIST = 'l',
48 EMPTY_LIST = ']',
49 OBJ = 'o',
50 PUT = 'p',
51 BINPUT = 'q',
52 LONG_BINPUT = 'r',
53 SETITEM = 's',
54 TUPLE = 't',
55 EMPTY_TUPLE = ')',
56 SETITEMS = 'u',
57 BINFLOAT = 'G',
58
59 /* Protocol 2. */
60 PROTO = '\x80',
61 NEWOBJ = '\x81',
62 EXT1 = '\x82',
63 EXT2 = '\x83',
64 EXT4 = '\x84',
65 TUPLE1 = '\x85',
66 TUPLE2 = '\x86',
67 TUPLE3 = '\x87',
68 NEWTRUE = '\x88',
69 NEWFALSE = '\x89',
70 LONG1 = '\x8a',
71 LONG4 = '\x8b',
72
73 /* Protocol 3 (Python 3.x) */
74 BINBYTES = 'B',
75 SHORT_BINBYTES = 'C',
76};
77
78/* These aren't opcodes -- they're ways to pickle bools before protocol 2
79 * so that unpicklers written before bools were introduced unpickle them
80 * as ints, but unpicklers after can recognize that bools were intended.
81 * Note that protocol 2 added direct ways to pickle bools.
82 */
83#undef TRUE
84#define TRUE "I01\n"
85#undef FALSE
86#define FALSE "I00\n"
87
88enum {
89 /* Keep in synch with pickle.Pickler._BATCHSIZE. This is how many elements
90 batch_list/dict() pumps out before doing APPENDS/SETITEMS. Nothing will
91 break if this gets out of synch with pickle.py, but it's unclear that would
92 help anything either. */
93 BATCHSIZE = 1000,
94
95 /* Nesting limit until Pickler, when running in "fast mode", starts
96 checking for self-referential data-structures. */
97 FAST_NESTING_LIMIT = 50,
98
99 /* Size of the write buffer of Pickler. Higher values will reduce the
100 number of calls to the write() method of the output stream. */
101 WRITE_BUF_SIZE = 256,
102};
103
104/* Exception classes for pickle. These should override the ones defined in
105 pickle.py, when the C-optimized Pickler and Unpickler are used. */
106static PyObject *PickleError;
107static PyObject *PicklingError;
108static PyObject *UnpicklingError;
109
110/* copyreg.dispatch_table, {type_object: pickling_function} */
111static PyObject *dispatch_table;
112/* For EXT[124] opcodes. */
113/* copyreg._extension_registry, {(module_name, function_name): code} */
114static PyObject *extension_registry;
115/* copyreg._inverted_registry, {code: (module_name, function_name)} */
116static PyObject *inverted_registry;
117/* copyreg._extension_cache, {code: object} */
118static PyObject *extension_cache;
119
120/* XXX: Are these really nescessary? */
121/* As the name says, an empty tuple. */
122static PyObject *empty_tuple;
123/* For looking up name pairs in copyreg._extension_registry. */
124static PyObject *two_tuple;
125
126static int
127stack_underflow(void)
128{
129 PyErr_SetString(UnpicklingError, "unpickling stack underflow");
130 return -1;
131}
132
133/* Internal data type used as the unpickling stack. */
134typedef struct {
135 PyObject_HEAD
136 int length; /* number of initial slots in data currently used */
137 int size; /* number of slots in data allocated */
138 PyObject **data;
139} Pdata;
140
141static void
142Pdata_dealloc(Pdata *self)
143{
144 int i;
145 PyObject **p;
146
147 for (i = self->length, p = self->data; --i >= 0; p++) {
148 Py_DECREF(*p);
149 }
150 if (self->data)
151 PyMem_Free(self->data);
152 PyObject_Del(self);
153}
154
155static PyTypeObject Pdata_Type = {
156 PyVarObject_HEAD_INIT(NULL, 0)
157 "_pickle.Pdata", /*tp_name*/
158 sizeof(Pdata), /*tp_basicsize*/
159 0, /*tp_itemsize*/
160 (destructor)Pdata_dealloc, /*tp_dealloc*/
161};
162
163static PyObject *
164Pdata_New(void)
165{
166 Pdata *self;
167
168 if (!(self = PyObject_New(Pdata, &Pdata_Type)))
169 return NULL;
170 self->size = 8;
171 self->length = 0;
172 self->data = PyMem_Malloc(self->size * sizeof(PyObject *));
173 if (self->data)
174 return (PyObject *)self;
175 Py_DECREF(self);
176 return PyErr_NoMemory();
177}
178
179
180/* Retain only the initial clearto items. If clearto >= the current
181 * number of items, this is a (non-erroneous) NOP.
182 */
183static int
184Pdata_clear(Pdata *self, int clearto)
185{
186 int i;
187 PyObject **p;
188
189 if (clearto < 0)
190 return stack_underflow();
191 if (clearto >= self->length)
192 return 0;
193
194 for (i = self->length, p = self->data + clearto; --i >= clearto; p++) {
195 Py_CLEAR(*p);
196 }
197 self->length = clearto;
198
199 return 0;
200}
201
202static int
203Pdata_grow(Pdata *self)
204{
205 int bigger;
206 size_t nbytes;
207 PyObject **tmp;
208
209 bigger = (self->size << 1) + 1;
210 if (bigger <= 0) /* was 0, or new value overflows */
211 goto nomemory;
212 if ((int)(size_t)bigger != bigger)
213 goto nomemory;
214 nbytes = (size_t)bigger * sizeof(PyObject *);
215 if (nbytes / sizeof(PyObject *) != (size_t)bigger)
216 goto nomemory;
217 tmp = PyMem_Realloc(self->data, nbytes);
218 if (tmp == NULL)
219 goto nomemory;
220 self->data = tmp;
221 self->size = bigger;
222 return 0;
223
224 nomemory:
225 PyErr_NoMemory();
226 return -1;
227}
228
229/* D is a Pdata*. Pop the topmost element and store it into V, which
230 * must be an lvalue holding PyObject*. On stack underflow, UnpicklingError
231 * is raised and V is set to NULL.
232 */
233static PyObject *
234Pdata_pop(Pdata *self)
235{
236 if (self->length == 0) {
237 PyErr_SetString(UnpicklingError, "bad pickle data");
238 return NULL;
239 }
240 return self->data[--(self->length)];
241}
242#define PDATA_POP(D, V) do { (V) = Pdata_pop((D)); } while (0)
243
244static int
245Pdata_push(Pdata *self, PyObject *obj)
246{
247 if (self->length == self->size && Pdata_grow(self) < 0) {
248 return -1;
249 }
250 self->data[self->length++] = obj;
251 return 0;
252}
253
254/* Push an object on stack, transferring its ownership to the stack. */
255#define PDATA_PUSH(D, O, ER) do { \
256 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
257
258/* Push an object on stack, adding a new reference to the object. */
259#define PDATA_APPEND(D, O, ER) do { \
260 Py_INCREF((O)); \
261 if (Pdata_push((D), (O)) < 0) return (ER); } while(0)
262
263static PyObject *
264Pdata_poptuple(Pdata *self, Py_ssize_t start)
265{
266 PyObject *tuple;
267 Py_ssize_t len, i, j;
268
269 len = self->length - start;
270 tuple = PyTuple_New(len);
271 if (tuple == NULL)
272 return NULL;
273 for (i = start, j = 0; j < len; i++, j++)
274 PyTuple_SET_ITEM(tuple, j, self->data[i]);
275
276 self->length = start;
277 return tuple;
278}
279
280static PyObject *
281Pdata_poplist(Pdata *self, Py_ssize_t start)
282{
283 PyObject *list;
284 Py_ssize_t len, i, j;
285
286 len = self->length - start;
287 list = PyList_New(len);
288 if (list == NULL)
289 return NULL;
290 for (i = start, j = 0; j < len; i++, j++)
291 PyList_SET_ITEM(list, j, self->data[i]);
292
293 self->length = start;
294 return list;
295}
296
297typedef struct PicklerObject {
298 PyObject_HEAD
299 PyObject *write; /* write() method of the output stream */
300 PyObject *memo; /* Memo dictionary, keep track of the seen
301 objects to support self-referential objects
302 pickling. */
303 PyObject *pers_func; /* persistent_id() method, can be NULL */
304 PyObject *arg;
305 int proto; /* Pickle protocol number, >= 0 */
306 int bin; /* Boolean, true if proto > 0 */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000307 int buf_size; /* Size of the current buffered pickle data */
308 char *write_buf; /* Write buffer, this is to avoid calling the
309 write() method of the output stream too
310 often. */
311 int fast; /* Enable fast mode if set to a true value.
312 The fast mode disable the usage of memo,
313 therefore speeding the pickling process by
314 not generating superfluous PUT opcodes. It
315 should not be used if with self-referential
316 objects. */
317 int fast_nesting;
318 PyObject *fast_memo;
319} PicklerObject;
320
321typedef struct UnpicklerObject {
322 PyObject_HEAD
323 Pdata *stack; /* Pickle data stack, store unpickled objects. */
324 PyObject *readline; /* readline() method of the output stream */
325 PyObject *read; /* read() method of the output stream */
326 PyObject *memo; /* Memo dictionary, provide the objects stored
327 using the PUT opcodes. */
328 PyObject *arg;
329 PyObject *pers_func; /* persistent_load() method, can be NULL. */
330 PyObject *last_string; /* Reference to the last string read by the
331 readline() method. */
332 char *buffer; /* Reading buffer. */
333 char *encoding; /* Name of the encoding to be used for
334 decoding strings pickled using Python
335 2.x. The default value is "ASCII" */
336 char *errors; /* Name of errors handling scheme to used when
337 decoding strings. The default value is
338 "strict". */
339 int *marks; /* Mark stack, used for unpickling container
340 objects. */
341 Py_ssize_t num_marks; /* Number of marks in the mark stack. */
342 Py_ssize_t marks_size; /* Current allocated size of the mark stack. */
343} UnpicklerObject;
344
345/* Forward declarations */
346static int save(PicklerObject *, PyObject *, int);
347static int save_reduce(PicklerObject *, PyObject *, PyObject *);
348static PyTypeObject Pickler_Type;
349static PyTypeObject Unpickler_Type;
350
351
352/* Helpers for creating the argument tuple passed to functions. This has the
353 performance advantage of calling PyTuple_New() only once. */
354
355#define ARG_TUP(self, obj) do { \
356 if ((self)->arg || ((self)->arg=PyTuple_New(1))) { \
357 Py_XDECREF(PyTuple_GET_ITEM((self)->arg, 0)); \
358 PyTuple_SET_ITEM((self)->arg, 0, (obj)); \
359 } \
360 else { \
361 Py_DECREF((obj)); \
362 } \
363 } while (0)
364
365#define FREE_ARG_TUP(self) do { \
366 if ((self)->arg->ob_refcnt > 1) \
367 Py_CLEAR((self)->arg); \
368 } while (0)
369
370/* A temporary cleaner API for fast single argument function call.
371
372 XXX: Does caching the argument tuple provides any real performance benefits?
373
374 A quick benchmark, on a 2.0GHz Athlon64 3200+ running Linux 2.6.24 with
375 glibc 2.7, tells me that it takes roughly 20,000,000 PyTuple_New(1) calls
376 when the tuple is retrieved from the freelist (i.e, call PyTuple_New() then
377 immediately DECREF it) and 1,200,000 calls when allocating brand new tuples
378 (i.e, call PyTuple_New() and store the returned value in an array), to save
379 one second (wall clock time). Either ways, the loading time a pickle stream
380 large enough to generate this number of calls would be massively
381 overwhelmed by other factors, like I/O throughput, the GC traversal and
382 object allocation overhead. So, I really doubt these functions provide any
383 real benefits.
384
385 On the other hand, oprofile reports that pickle spends a lot of time in
386 these functions. But, that is probably more related to the function call
387 overhead, than the argument tuple allocation.
388
389 XXX: And, what is the reference behavior of these? Steal, borrow? At first
390 glance, it seems to steal the reference of 'arg' and borrow the reference
391 of 'func'.
392 */
393static PyObject *
394pickler_call(PicklerObject *self, PyObject *func, PyObject *arg)
395{
396 PyObject *result = NULL;
397
398 ARG_TUP(self, arg);
399 if (self->arg) {
400 result = PyObject_Call(func, self->arg, NULL);
401 FREE_ARG_TUP(self);
402 }
403 return result;
404}
405
406static PyObject *
407unpickler_call(UnpicklerObject *self, PyObject *func, PyObject *arg)
408{
409 PyObject *result = NULL;
410
411 ARG_TUP(self, arg);
412 if (self->arg) {
413 result = PyObject_Call(func, self->arg, NULL);
414 FREE_ARG_TUP(self);
415 }
416 return result;
417}
418
419static Py_ssize_t
420pickler_write(PicklerObject *self, const char *s, Py_ssize_t n)
421{
422 PyObject *data, *result;
423
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +0000424 if (self->write_buf == NULL) {
425 PyErr_SetString(PyExc_SystemError, "invalid write buffer");
426 return -1;
427 }
428
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000429 if (s == NULL) {
430 if (!(self->buf_size))
431 return 0;
432 data = PyBytes_FromStringAndSize(self->write_buf, self->buf_size);
433 if (data == NULL)
434 return -1;
435 }
436 else {
437 if (self->buf_size && (n + self->buf_size) > WRITE_BUF_SIZE) {
438 if (pickler_write(self, NULL, 0) < 0)
439 return -1;
440 }
441
442 if (n > WRITE_BUF_SIZE) {
443 if (!(data = PyBytes_FromStringAndSize(s, n)))
444 return -1;
445 }
446 else {
447 memcpy(self->write_buf + self->buf_size, s, n);
448 self->buf_size += n;
449 return n;
450 }
451 }
452
453 /* object with write method */
454 result = pickler_call(self, self->write, data);
455 if (result == NULL)
456 return -1;
457
458 Py_DECREF(result);
459 self->buf_size = 0;
460 return n;
461}
462
463/* XXX: These read/readline functions ought to be optimized. Buffered I/O
464 might help a lot, especially with the new (but much slower) io library.
465 On the other hand, the added complexity might not worth it.
466 */
467
468/* Read at least n characters from the input stream and set s to the current
469 reading position. */
470static Py_ssize_t
471unpickler_read(UnpicklerObject *self, char **s, Py_ssize_t n)
472{
473 PyObject *len;
474 PyObject *data;
475
476 len = PyLong_FromSsize_t(n);
477 if (len == NULL)
478 return -1;
479
480 data = unpickler_call(self, self->read, len);
481 if (data == NULL)
482 return -1;
483
484 /* XXX: Should bytearray be supported too? */
485 if (!PyBytes_Check(data)) {
486 PyErr_SetString(PyExc_ValueError,
487 "read() from the underlying stream did not"
488 "return bytes");
Amaury Forgeot d'Arc5f952572008-11-25 21:11:54 +0000489 Py_DECREF(data);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000490 return -1;
491 }
492
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000493 if (PyBytes_GET_SIZE(data) != n) {
494 PyErr_SetNone(PyExc_EOFError);
Amaury Forgeot d'Arc5f952572008-11-25 21:11:54 +0000495 Py_DECREF(data);
Amaury Forgeot d'Arc3e4e72f2008-11-11 20:05:06 +0000496 return -1;
497 }
498
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000499 Py_XDECREF(self->last_string);
500 self->last_string = data;
501
502 if (!(*s = PyBytes_AS_STRING(data)))
503 return -1;
504
505 return n;
506}
507
508static Py_ssize_t
509unpickler_readline(UnpicklerObject *self, char **s)
510{
511 PyObject *data;
512
513 data = PyObject_CallObject(self->readline, empty_tuple);
514 if (data == NULL)
515 return -1;
516
517 /* XXX: Should bytearray be supported too? */
518 if (!PyBytes_Check(data)) {
519 PyErr_SetString(PyExc_ValueError,
520 "readline() from the underlying stream did not"
521 "return bytes");
522 return -1;
523 }
524
525 Py_XDECREF(self->last_string);
526 self->last_string = data;
527
528 if (!(*s = PyBytes_AS_STRING(data)))
529 return -1;
530
531 return PyBytes_GET_SIZE(data);
532}
533
534/* Generate a GET opcode for an object stored in the memo. The 'key' argument
535 should be the address of the object as returned by PyLong_FromVoidPtr(). */
536static int
537memo_get(PicklerObject *self, PyObject *key)
538{
539 PyObject *value;
540 PyObject *memo_id;
541 long x;
542 char pdata[30];
543 int len;
544
545 value = PyDict_GetItemWithError(self->memo, key);
546 if (value == NULL) {
547 if (!PyErr_Occurred())
548 PyErr_SetObject(PyExc_KeyError, key);
549 return -1;
550 }
551
552 memo_id = PyTuple_GetItem(value, 0);
553 if (memo_id == NULL)
554 return -1;
555
556 if (!PyLong_Check(memo_id)) {
557 PyErr_SetString(PicklingError, "memo id must be an integer");
558 return -1;
559 }
560 x = PyLong_AsLong(memo_id);
561 if (x == -1 && PyErr_Occurred())
562 return -1;
563
564 if (!self->bin) {
565 pdata[0] = GET;
566 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
567 len = (int)strlen(pdata);
568 }
569 else {
570 if (x < 256) {
571 pdata[0] = BINGET;
572 pdata[1] = (unsigned char)(x & 0xff);
573 len = 2;
574 }
575 else if (x <= 0xffffffffL) {
576 pdata[0] = LONG_BINGET;
577 pdata[1] = (unsigned char)(x & 0xff);
578 pdata[2] = (unsigned char)((x >> 8) & 0xff);
579 pdata[3] = (unsigned char)((x >> 16) & 0xff);
580 pdata[4] = (unsigned char)((x >> 24) & 0xff);
581 len = 5;
582 }
583 else { /* unlikely */
584 PyErr_SetString(PicklingError,
585 "memo id too large for LONG_BINGET");
586 return -1;
587 }
588 }
589
590 if (pickler_write(self, pdata, len) < 0)
591 return -1;
592
593 return 0;
594}
595
596/* Store an object in the memo, assign it a new unique ID based on the number
597 of objects currently stored in the memo and generate a PUT opcode. */
598static int
599memo_put(PicklerObject *self, PyObject *obj)
600{
601 PyObject *key = NULL;
602 PyObject *memo_id = NULL;
603 PyObject *tuple = NULL;
604 long x;
605 char pdata[30];
606 int len;
607 int status = 0;
608
609 if (self->fast)
610 return 0;
611
612 key = PyLong_FromVoidPtr(obj);
613 if (key == NULL)
614 goto error;
615 if ((x = PyDict_Size(self->memo)) < 0)
616 goto error;
617 memo_id = PyLong_FromLong(x);
618 if (memo_id == NULL)
619 goto error;
620 tuple = PyTuple_New(2);
621 if (tuple == NULL)
622 goto error;
623
624 Py_INCREF(memo_id);
625 PyTuple_SET_ITEM(tuple, 0, memo_id);
626 Py_INCREF(obj);
627 PyTuple_SET_ITEM(tuple, 1, obj);
628 if (PyDict_SetItem(self->memo, key, tuple) < 0)
629 goto error;
630
631 if (!self->bin) {
632 pdata[0] = PUT;
633 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ld\n", x);
634 len = strlen(pdata);
635 }
636 else {
637 if (x < 256) {
638 pdata[0] = BINPUT;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +0000639 pdata[1] = (unsigned char)x;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000640 len = 2;
641 }
642 else if (x <= 0xffffffffL) {
643 pdata[0] = LONG_BINPUT;
644 pdata[1] = (unsigned char)(x & 0xff);
645 pdata[2] = (unsigned char)((x >> 8) & 0xff);
646 pdata[3] = (unsigned char)((x >> 16) & 0xff);
647 pdata[4] = (unsigned char)((x >> 24) & 0xff);
648 len = 5;
649 }
650 else { /* unlikely */
651 PyErr_SetString(PicklingError,
652 "memo id too large for LONG_BINPUT");
653 return -1;
654 }
655 }
656
657 if (pickler_write(self, pdata, len) < 0)
658 goto error;
659
660 if (0) {
661 error:
662 status = -1;
663 }
664
665 Py_XDECREF(key);
666 Py_XDECREF(memo_id);
667 Py_XDECREF(tuple);
668
669 return status;
670}
671
672static PyObject *
673whichmodule(PyObject *global, PyObject *global_name)
674{
675 Py_ssize_t i, j;
676 static PyObject *module_str = NULL;
677 static PyObject *main_str = NULL;
678 PyObject *module_name;
679 PyObject *modules_dict;
680 PyObject *module;
681 PyObject *obj;
682
683 if (module_str == NULL) {
684 module_str = PyUnicode_InternFromString("__module__");
685 if (module_str == NULL)
686 return NULL;
687 main_str = PyUnicode_InternFromString("__main__");
688 if (main_str == NULL)
689 return NULL;
690 }
691
692 module_name = PyObject_GetAttr(global, module_str);
693
694 /* In some rare cases (e.g., random.getrandbits), __module__ can be
695 None. If it is so, then search sys.modules for the module of
696 global. */
697 if (module_name == Py_None) {
698 Py_DECREF(module_name);
699 goto search;
700 }
701
702 if (module_name) {
703 return module_name;
704 }
705 if (PyErr_ExceptionMatches(PyExc_AttributeError))
706 PyErr_Clear();
707 else
708 return NULL;
709
710 search:
711 modules_dict = PySys_GetObject("modules");
712 if (modules_dict == NULL)
713 return NULL;
714
715 i = 0;
716 module_name = NULL;
717 while ((j = PyDict_Next(modules_dict, &i, &module_name, &module))) {
718 if (PyObject_Compare(module_name, main_str) == 0)
719 continue;
720
721 obj = PyObject_GetAttr(module, global_name);
722 if (obj == NULL) {
723 if (PyErr_ExceptionMatches(PyExc_AttributeError))
724 PyErr_Clear();
725 else
726 return NULL;
727 continue;
728 }
729
730 if (obj != global) {
731 Py_DECREF(obj);
732 continue;
733 }
734
735 Py_DECREF(obj);
736 break;
737 }
738
739 /* If no module is found, use __main__. */
740 if (!j) {
741 module_name = main_str;
742 }
743
744 Py_INCREF(module_name);
745 return module_name;
746}
747
748/* fast_save_enter() and fast_save_leave() are guards against recursive
749 objects when Pickler is used with the "fast mode" (i.e., with object
750 memoization disabled). If the nesting of a list or dict object exceed
751 FAST_NESTING_LIMIT, these guards will start keeping an internal
752 reference to the seen list or dict objects and check whether these objects
753 are recursive. These are not strictly necessary, since save() has a
754 hard-coded recursion limit, but they give a nicer error message than the
755 typical RuntimeError. */
756static int
757fast_save_enter(PicklerObject *self, PyObject *obj)
758{
759 /* if fast_nesting < 0, we're doing an error exit. */
760 if (++self->fast_nesting >= FAST_NESTING_LIMIT) {
761 PyObject *key = NULL;
762 if (self->fast_memo == NULL) {
763 self->fast_memo = PyDict_New();
764 if (self->fast_memo == NULL) {
765 self->fast_nesting = -1;
766 return 0;
767 }
768 }
769 key = PyLong_FromVoidPtr(obj);
770 if (key == NULL)
771 return 0;
772 if (PyDict_GetItem(self->fast_memo, key)) {
773 Py_DECREF(key);
774 PyErr_Format(PyExc_ValueError,
775 "fast mode: can't pickle cyclic objects "
776 "including object type %.200s at %p",
777 obj->ob_type->tp_name, obj);
778 self->fast_nesting = -1;
779 return 0;
780 }
781 if (PyDict_SetItem(self->fast_memo, key, Py_None) < 0) {
782 Py_DECREF(key);
783 self->fast_nesting = -1;
784 return 0;
785 }
786 Py_DECREF(key);
787 }
788 return 1;
789}
790
791static int
792fast_save_leave(PicklerObject *self, PyObject *obj)
793{
794 if (self->fast_nesting-- >= FAST_NESTING_LIMIT) {
795 PyObject *key = PyLong_FromVoidPtr(obj);
796 if (key == NULL)
797 return 0;
798 if (PyDict_DelItem(self->fast_memo, key) < 0) {
799 Py_DECREF(key);
800 return 0;
801 }
802 Py_DECREF(key);
803 }
804 return 1;
805}
806
807static int
808save_none(PicklerObject *self, PyObject *obj)
809{
810 const char none_op = NONE;
811 if (pickler_write(self, &none_op, 1) < 0)
812 return -1;
813
814 return 0;
815}
816
817static int
818save_bool(PicklerObject *self, PyObject *obj)
819{
820 static const char *buf[2] = { FALSE, TRUE };
821 const char len[2] = {sizeof(FALSE) - 1, sizeof(TRUE) - 1};
822 int p = (obj == Py_True);
823
824 if (self->proto >= 2) {
825 const char bool_op = p ? NEWTRUE : NEWFALSE;
826 if (pickler_write(self, &bool_op, 1) < 0)
827 return -1;
828 }
829 else if (pickler_write(self, buf[p], len[p]) < 0)
830 return -1;
831
832 return 0;
833}
834
835static int
836save_int(PicklerObject *self, long x)
837{
838 char pdata[32];
839 int len = 0;
840
841 if (!self->bin
842#if SIZEOF_LONG > 4
843 || x > 0x7fffffffL || x < -0x80000000L
844#endif
845 ) {
846 /* Text-mode pickle, or long too big to fit in the 4-byte
847 * signed BININT format: store as a string.
848 */
Mark Dickinson8dd05142009-01-20 20:43:58 +0000849 pdata[0] = LONG; /* use LONG for consistency with pickle.py */
850 PyOS_snprintf(pdata + 1, sizeof(pdata) - 1, "%ldL\n", x);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000851 if (pickler_write(self, pdata, strlen(pdata)) < 0)
852 return -1;
853 }
854 else {
855 /* Binary pickle and x fits in a signed 4-byte int. */
856 pdata[1] = (unsigned char)(x & 0xff);
857 pdata[2] = (unsigned char)((x >> 8) & 0xff);
858 pdata[3] = (unsigned char)((x >> 16) & 0xff);
859 pdata[4] = (unsigned char)((x >> 24) & 0xff);
860
861 if ((pdata[4] == 0) && (pdata[3] == 0)) {
862 if (pdata[2] == 0) {
863 pdata[0] = BININT1;
864 len = 2;
865 }
866 else {
867 pdata[0] = BININT2;
868 len = 3;
869 }
870 }
871 else {
872 pdata[0] = BININT;
873 len = 5;
874 }
875
876 if (pickler_write(self, pdata, len) < 0)
877 return -1;
878 }
879
880 return 0;
881}
882
883static int
884save_long(PicklerObject *self, PyObject *obj)
885{
886 PyObject *repr = NULL;
887 Py_ssize_t size;
888 long val = PyLong_AsLong(obj);
889 int status = 0;
890
891 const char long_op = LONG;
892
893 if (val == -1 && PyErr_Occurred()) {
894 /* out of range for int pickling */
895 PyErr_Clear();
896 }
897 else
898 return save_int(self, val);
899
900 if (self->proto >= 2) {
901 /* Linear-time pickling. */
902 size_t nbits;
903 size_t nbytes;
904 unsigned char *pdata;
905 char header[5];
906 int i;
907 int sign = _PyLong_Sign(obj);
908
909 if (sign == 0) {
910 header[0] = LONG1;
911 header[1] = 0; /* It's 0 -- an empty bytestring. */
912 if (pickler_write(self, header, 2) < 0)
913 goto error;
914 return 0;
915 }
916 nbits = _PyLong_NumBits(obj);
917 if (nbits == (size_t)-1 && PyErr_Occurred())
918 goto error;
919 /* How many bytes do we need? There are nbits >> 3 full
920 * bytes of data, and nbits & 7 leftover bits. If there
921 * are any leftover bits, then we clearly need another
922 * byte. Wnat's not so obvious is that we *probably*
923 * need another byte even if there aren't any leftovers:
924 * the most-significant bit of the most-significant byte
925 * acts like a sign bit, and it's usually got a sense
926 * opposite of the one we need. The exception is longs
927 * of the form -(2**(8*j-1)) for j > 0. Such a long is
928 * its own 256's-complement, so has the right sign bit
929 * even without the extra byte. That's a pain to check
930 * for in advance, though, so we always grab an extra
931 * byte at the start, and cut it back later if possible.
932 */
933 nbytes = (nbits >> 3) + 1;
934 if (nbytes > INT_MAX) {
935 PyErr_SetString(PyExc_OverflowError,
936 "long too large to pickle");
937 goto error;
938 }
Neal Norwitz6ae2eb22008-08-24 23:50:08 +0000939 repr = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)nbytes);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000940 if (repr == NULL)
941 goto error;
Neal Norwitz6ae2eb22008-08-24 23:50:08 +0000942 pdata = (unsigned char *)PyBytes_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000943 i = _PyLong_AsByteArray((PyLongObject *)obj,
944 pdata, nbytes,
945 1 /* little endian */ , 1 /* signed */ );
946 if (i < 0)
947 goto error;
948 /* If the long is negative, this may be a byte more than
949 * needed. This is so iff the MSB is all redundant sign
950 * bits.
951 */
952 if (sign < 0 &&
953 nbytes > 1 &&
954 pdata[nbytes - 1] == 0xff &&
955 (pdata[nbytes - 2] & 0x80) != 0) {
956 nbytes--;
957 }
958
959 if (nbytes < 256) {
960 header[0] = LONG1;
961 header[1] = (unsigned char)nbytes;
962 size = 2;
963 }
964 else {
965 header[0] = LONG4;
966 size = (int)nbytes;
967 for (i = 1; i < 5; i++) {
968 header[i] = (unsigned char)(size & 0xff);
969 size >>= 8;
970 }
971 size = 5;
972 }
973 if (pickler_write(self, header, size) < 0 ||
974 pickler_write(self, (char *)pdata, (int)nbytes) < 0)
975 goto error;
976 }
977 else {
978 char *string;
979
Mark Dickinson8dd05142009-01-20 20:43:58 +0000980 /* proto < 2: write the repr and newline. This is quadratic-time (in
981 the number of digits), in both directions. We add a trailing 'L'
982 to the repr, for compatibility with Python 2.x. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000983
984 repr = PyObject_Repr(obj);
985 if (repr == NULL)
986 goto error;
987
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +0000988 string = _PyUnicode_AsStringAndSize(repr, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000989 if (string == NULL)
990 goto error;
991
992 if (pickler_write(self, &long_op, 1) < 0 ||
993 pickler_write(self, string, size) < 0 ||
Mark Dickinson8dd05142009-01-20 20:43:58 +0000994 pickler_write(self, "L\n", 2) < 0)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +0000995 goto error;
996 }
997
998 if (0) {
999 error:
1000 status = -1;
1001 }
1002 Py_XDECREF(repr);
1003
1004 return status;
1005}
1006
1007static int
1008save_float(PicklerObject *self, PyObject *obj)
1009{
1010 double x = PyFloat_AS_DOUBLE((PyFloatObject *)obj);
1011
1012 if (self->bin) {
1013 char pdata[9];
1014 pdata[0] = BINFLOAT;
1015 if (_PyFloat_Pack8(x, (unsigned char *)&pdata[1], 0) < 0)
1016 return -1;
1017 if (pickler_write(self, pdata, 9) < 0)
1018 return -1;
1019 }
1020 else {
1021 char pdata[250];
1022 pdata[0] = FLOAT;
1023 PyOS_ascii_formatd(pdata + 1, sizeof(pdata) - 2, "%.17g", x);
1024 /* Extend the formatted string with a newline character */
1025 strcat(pdata, "\n");
1026
1027 if (pickler_write(self, pdata, strlen(pdata)) < 0)
1028 return -1;
1029 }
1030
1031 return 0;
1032}
1033
1034static int
1035save_bytes(PicklerObject *self, PyObject *obj)
1036{
1037 if (self->proto < 3) {
1038 /* Older pickle protocols do not have an opcode for pickling bytes
1039 objects. Therefore, we need to fake the copy protocol (i.e.,
1040 the __reduce__ method) to permit bytes object unpickling. */
1041 PyObject *reduce_value = NULL;
1042 PyObject *bytelist = NULL;
1043 int status;
1044
1045 bytelist = PySequence_List(obj);
1046 if (bytelist == NULL)
1047 return -1;
1048
1049 reduce_value = Py_BuildValue("(O(O))", (PyObject *)&PyBytes_Type,
1050 bytelist);
1051 if (reduce_value == NULL) {
1052 Py_DECREF(bytelist);
1053 return -1;
1054 }
1055
1056 /* save_reduce() will memoize the object automatically. */
1057 status = save_reduce(self, reduce_value, obj);
1058 Py_DECREF(reduce_value);
1059 Py_DECREF(bytelist);
1060 return status;
1061 }
1062 else {
1063 Py_ssize_t size;
1064 char header[5];
1065 int len;
1066
1067 size = PyBytes_Size(obj);
1068 if (size < 0)
1069 return -1;
1070
1071 if (size < 256) {
1072 header[0] = SHORT_BINBYTES;
1073 header[1] = (unsigned char)size;
1074 len = 2;
1075 }
1076 else if (size <= 0xffffffffL) {
1077 header[0] = BINBYTES;
1078 header[1] = (unsigned char)(size & 0xff);
1079 header[2] = (unsigned char)((size >> 8) & 0xff);
1080 header[3] = (unsigned char)((size >> 16) & 0xff);
1081 header[4] = (unsigned char)((size >> 24) & 0xff);
1082 len = 5;
1083 }
1084 else {
1085 return -1; /* string too large */
1086 }
1087
1088 if (pickler_write(self, header, len) < 0)
1089 return -1;
1090
1091 if (pickler_write(self, PyBytes_AS_STRING(obj), size) < 0)
1092 return -1;
1093
1094 if (memo_put(self, obj) < 0)
1095 return -1;
1096
1097 return 0;
1098 }
1099}
1100
1101/* A copy of PyUnicode_EncodeRawUnicodeEscape() that also translates
1102 backslash and newline characters to \uXXXX escapes. */
1103static PyObject *
1104raw_unicode_escape(const Py_UNICODE *s, Py_ssize_t size)
1105{
1106 PyObject *repr, *result;
1107 char *p;
1108 char *q;
1109
1110 static const char *hexdigits = "0123456789abcdef";
1111
1112#ifdef Py_UNICODE_WIDE
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001113 const Py_ssize_t expandsize = 10;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001114#else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001115 const Py_ssize_t expandsize = 6;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001116#endif
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001117
1118 if (size > PY_SSIZE_T_MAX / expandsize)
1119 return PyErr_NoMemory();
1120
1121 repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001122 if (repr == NULL)
1123 return NULL;
1124 if (size == 0)
1125 goto done;
1126
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001127 p = q = PyByteArray_AS_STRING(repr);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001128 while (size-- > 0) {
1129 Py_UNICODE ch = *s++;
1130#ifdef Py_UNICODE_WIDE
1131 /* Map 32-bit characters to '\Uxxxxxxxx' */
1132 if (ch >= 0x10000) {
1133 *p++ = '\\';
1134 *p++ = 'U';
1135 *p++ = hexdigits[(ch >> 28) & 0xf];
1136 *p++ = hexdigits[(ch >> 24) & 0xf];
1137 *p++ = hexdigits[(ch >> 20) & 0xf];
1138 *p++ = hexdigits[(ch >> 16) & 0xf];
1139 *p++ = hexdigits[(ch >> 12) & 0xf];
1140 *p++ = hexdigits[(ch >> 8) & 0xf];
1141 *p++ = hexdigits[(ch >> 4) & 0xf];
1142 *p++ = hexdigits[ch & 15];
1143 }
1144 else
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001145#else
1146 /* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
1147 if (ch >= 0xD800 && ch < 0xDC00) {
1148 Py_UNICODE ch2;
1149 Py_UCS4 ucs;
1150
1151 ch2 = *s++;
1152 size--;
1153 if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
1154 ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
1155 *p++ = '\\';
1156 *p++ = 'U';
1157 *p++ = hexdigits[(ucs >> 28) & 0xf];
1158 *p++ = hexdigits[(ucs >> 24) & 0xf];
1159 *p++ = hexdigits[(ucs >> 20) & 0xf];
1160 *p++ = hexdigits[(ucs >> 16) & 0xf];
1161 *p++ = hexdigits[(ucs >> 12) & 0xf];
1162 *p++ = hexdigits[(ucs >> 8) & 0xf];
1163 *p++ = hexdigits[(ucs >> 4) & 0xf];
1164 *p++ = hexdigits[ucs & 0xf];
1165 continue;
1166 }
1167 /* Fall through: isolated surrogates are copied as-is */
1168 s--;
1169 size++;
1170 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001171#endif
1172 /* Map 16-bit characters to '\uxxxx' */
1173 if (ch >= 256 || ch == '\\' || ch == '\n') {
1174 *p++ = '\\';
1175 *p++ = 'u';
1176 *p++ = hexdigits[(ch >> 12) & 0xf];
1177 *p++ = hexdigits[(ch >> 8) & 0xf];
1178 *p++ = hexdigits[(ch >> 4) & 0xf];
1179 *p++ = hexdigits[ch & 15];
1180 }
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001181 /* Copy everything else as-is */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001182 else
1183 *p++ = (char) ch;
1184 }
1185 size = p - q;
1186
1187 done:
Alexandre Vassalotti554d8782008-12-27 07:32:41 +00001188 result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001189 Py_DECREF(repr);
1190 return result;
1191}
1192
1193static int
1194save_unicode(PicklerObject *self, PyObject *obj)
1195{
1196 Py_ssize_t size;
1197 PyObject *encoded = NULL;
1198
1199 if (self->bin) {
1200 char pdata[5];
1201
1202 encoded = PyUnicode_AsUTF8String(obj);
1203 if (encoded == NULL)
1204 goto error;
1205
1206 size = PyBytes_GET_SIZE(encoded);
1207 if (size < 0 || size > 0xffffffffL)
1208 goto error; /* string too large */
1209
1210 pdata[0] = BINUNICODE;
1211 pdata[1] = (unsigned char)(size & 0xff);
1212 pdata[2] = (unsigned char)((size >> 8) & 0xff);
1213 pdata[3] = (unsigned char)((size >> 16) & 0xff);
1214 pdata[4] = (unsigned char)((size >> 24) & 0xff);
1215
1216 if (pickler_write(self, pdata, 5) < 0)
1217 goto error;
1218
1219 if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
1220 goto error;
1221 }
1222 else {
1223 const char unicode_op = UNICODE;
1224
1225 encoded = raw_unicode_escape(PyUnicode_AS_UNICODE(obj),
1226 PyUnicode_GET_SIZE(obj));
1227 if (encoded == NULL)
1228 goto error;
1229
1230 if (pickler_write(self, &unicode_op, 1) < 0)
1231 goto error;
1232
1233 size = PyBytes_GET_SIZE(encoded);
1234 if (pickler_write(self, PyBytes_AS_STRING(encoded), size) < 0)
1235 goto error;
1236
1237 if (pickler_write(self, "\n", 1) < 0)
1238 goto error;
1239 }
1240 if (memo_put(self, obj) < 0)
1241 goto error;
1242
1243 Py_DECREF(encoded);
1244 return 0;
1245
1246 error:
1247 Py_XDECREF(encoded);
1248 return -1;
1249}
1250
1251/* A helper for save_tuple. Push the len elements in tuple t on the stack. */
1252static int
1253store_tuple_elements(PicklerObject *self, PyObject *t, int len)
1254{
1255 int i;
1256
1257 assert(PyTuple_Size(t) == len);
1258
1259 for (i = 0; i < len; i++) {
1260 PyObject *element = PyTuple_GET_ITEM(t, i);
1261
1262 if (element == NULL)
1263 return -1;
1264 if (save(self, element, 0) < 0)
1265 return -1;
1266 }
1267
1268 return 0;
1269}
1270
1271/* Tuples are ubiquitous in the pickle protocols, so many techniques are
1272 * used across protocols to minimize the space needed to pickle them.
1273 * Tuples are also the only builtin immutable type that can be recursive
1274 * (a tuple can be reached from itself), and that requires some subtle
1275 * magic so that it works in all cases. IOW, this is a long routine.
1276 */
1277static int
1278save_tuple(PicklerObject *self, PyObject *obj)
1279{
1280 PyObject *memo_key = NULL;
1281 int len, i;
1282 int status = 0;
1283
1284 const char mark_op = MARK;
1285 const char tuple_op = TUPLE;
1286 const char pop_op = POP;
1287 const char pop_mark_op = POP_MARK;
1288 const char len2opcode[] = {EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3};
1289
1290 if ((len = PyTuple_Size(obj)) < 0)
1291 return -1;
1292
1293 if (len == 0) {
1294 char pdata[2];
1295
1296 if (self->proto) {
1297 pdata[0] = EMPTY_TUPLE;
1298 len = 1;
1299 }
1300 else {
1301 pdata[0] = MARK;
1302 pdata[1] = TUPLE;
1303 len = 2;
1304 }
1305 if (pickler_write(self, pdata, len) < 0)
1306 return -1;
1307 return 0;
1308 }
1309
1310 /* id(tuple) isn't in the memo now. If it shows up there after
1311 * saving the tuple elements, the tuple must be recursive, in
1312 * which case we'll pop everything we put on the stack, and fetch
1313 * its value from the memo.
1314 */
1315 memo_key = PyLong_FromVoidPtr(obj);
1316 if (memo_key == NULL)
1317 return -1;
1318
1319 if (len <= 3 && self->proto >= 2) {
1320 /* Use TUPLE{1,2,3} opcodes. */
1321 if (store_tuple_elements(self, obj, len) < 0)
1322 goto error;
1323
1324 if (PyDict_GetItem(self->memo, memo_key)) {
1325 /* pop the len elements */
1326 for (i = 0; i < len; i++)
1327 if (pickler_write(self, &pop_op, 1) < 0)
1328 goto error;
1329 /* fetch from memo */
1330 if (memo_get(self, memo_key) < 0)
1331 goto error;
1332
1333 Py_DECREF(memo_key);
1334 return 0;
1335 }
1336 else { /* Not recursive. */
1337 if (pickler_write(self, len2opcode + len, 1) < 0)
1338 goto error;
1339 }
1340 goto memoize;
1341 }
1342
1343 /* proto < 2 and len > 0, or proto >= 2 and len > 3.
1344 * Generate MARK e1 e2 ... TUPLE
1345 */
1346 if (pickler_write(self, &mark_op, 1) < 0)
1347 goto error;
1348
1349 if (store_tuple_elements(self, obj, len) < 0)
1350 goto error;
1351
1352 if (PyDict_GetItem(self->memo, memo_key)) {
1353 /* pop the stack stuff we pushed */
1354 if (self->bin) {
1355 if (pickler_write(self, &pop_mark_op, 1) < 0)
1356 goto error;
1357 }
1358 else {
1359 /* Note that we pop one more than len, to remove
1360 * the MARK too.
1361 */
1362 for (i = 0; i <= len; i++)
1363 if (pickler_write(self, &pop_op, 1) < 0)
1364 goto error;
1365 }
1366 /* fetch from memo */
1367 if (memo_get(self, memo_key) < 0)
1368 goto error;
1369
1370 Py_DECREF(memo_key);
1371 return 0;
1372 }
1373 else { /* Not recursive. */
1374 if (pickler_write(self, &tuple_op, 1) < 0)
1375 goto error;
1376 }
1377
1378 memoize:
1379 if (memo_put(self, obj) < 0)
1380 goto error;
1381
1382 if (0) {
1383 error:
1384 status = -1;
1385 }
1386
1387 Py_DECREF(memo_key);
1388 return status;
1389}
1390
1391/* iter is an iterator giving items, and we batch up chunks of
1392 * MARK item item ... item APPENDS
1393 * opcode sequences. Calling code should have arranged to first create an
1394 * empty list, or list-like object, for the APPENDS to operate on.
1395 * Returns 0 on success, <0 on error.
1396 */
1397static int
1398batch_list(PicklerObject *self, PyObject *iter)
1399{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001400 PyObject *obj = NULL;
1401 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001402 int i, n;
1403
1404 const char mark_op = MARK;
1405 const char append_op = APPEND;
1406 const char appends_op = APPENDS;
1407
1408 assert(iter != NULL);
1409
1410 /* XXX: I think this function could be made faster by avoiding the
1411 iterator interface and fetching objects directly from list using
1412 PyList_GET_ITEM.
1413 */
1414
1415 if (self->proto == 0) {
1416 /* APPENDS isn't available; do one at a time. */
1417 for (;;) {
1418 obj = PyIter_Next(iter);
1419 if (obj == NULL) {
1420 if (PyErr_Occurred())
1421 return -1;
1422 break;
1423 }
1424 i = save(self, obj, 0);
1425 Py_DECREF(obj);
1426 if (i < 0)
1427 return -1;
1428 if (pickler_write(self, &append_op, 1) < 0)
1429 return -1;
1430 }
1431 return 0;
1432 }
1433
1434 /* proto > 0: write in batches of BATCHSIZE. */
1435 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001436 /* Get first item */
1437 firstitem = PyIter_Next(iter);
1438 if (firstitem == NULL) {
1439 if (PyErr_Occurred())
1440 goto error;
1441
1442 /* nothing more to add */
1443 break;
1444 }
1445
1446 /* Try to get a second item */
1447 obj = PyIter_Next(iter);
1448 if (obj == NULL) {
1449 if (PyErr_Occurred())
1450 goto error;
1451
1452 /* Only one item to write */
1453 if (save(self, firstitem, 0) < 0)
1454 goto error;
1455 if (pickler_write(self, &append_op, 1) < 0)
1456 goto error;
1457 Py_CLEAR(firstitem);
1458 break;
1459 }
1460
1461 /* More than one item to write */
1462
1463 /* Pump out MARK, items, APPENDS. */
1464 if (pickler_write(self, &mark_op, 1) < 0)
1465 goto error;
1466
1467 if (save(self, firstitem, 0) < 0)
1468 goto error;
1469 Py_CLEAR(firstitem);
1470 n = 1;
1471
1472 /* Fetch and save up to BATCHSIZE items */
1473 while (obj) {
1474 if (save(self, obj, 0) < 0)
1475 goto error;
1476 Py_CLEAR(obj);
1477 n += 1;
1478
1479 if (n == BATCHSIZE)
1480 break;
1481
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001482 obj = PyIter_Next(iter);
1483 if (obj == NULL) {
1484 if (PyErr_Occurred())
1485 goto error;
1486 break;
1487 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001488 }
1489
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001490 if (pickler_write(self, &appends_op, 1) < 0)
1491 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001492
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001493 } while (n == BATCHSIZE);
1494 return 0;
1495
1496 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001497 Py_XDECREF(firstitem);
1498 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001499 return -1;
1500}
1501
1502static int
1503save_list(PicklerObject *self, PyObject *obj)
1504{
1505 PyObject *iter;
1506 char header[3];
1507 int len;
1508 int status = 0;
1509
1510 if (self->fast && !fast_save_enter(self, obj))
1511 goto error;
1512
1513 /* Create an empty list. */
1514 if (self->bin) {
1515 header[0] = EMPTY_LIST;
1516 len = 1;
1517 }
1518 else {
1519 header[0] = MARK;
1520 header[1] = LIST;
1521 len = 2;
1522 }
1523
1524 if (pickler_write(self, header, len) < 0)
1525 goto error;
1526
1527 /* Get list length, and bow out early if empty. */
1528 if ((len = PyList_Size(obj)) < 0)
1529 goto error;
1530
1531 if (memo_put(self, obj) < 0)
1532 goto error;
1533
1534 if (len != 0) {
1535 /* Save the list elements. */
1536 iter = PyObject_GetIter(obj);
1537 if (iter == NULL)
1538 goto error;
1539 status = batch_list(self, iter);
1540 Py_DECREF(iter);
1541 }
1542
1543 if (0) {
1544 error:
1545 status = -1;
1546 }
1547
1548 if (self->fast && !fast_save_leave(self, obj))
1549 status = -1;
1550
1551 return status;
1552}
1553
1554/* iter is an iterator giving (key, value) pairs, and we batch up chunks of
1555 * MARK key value ... key value SETITEMS
1556 * opcode sequences. Calling code should have arranged to first create an
1557 * empty dict, or dict-like object, for the SETITEMS to operate on.
1558 * Returns 0 on success, <0 on error.
1559 *
1560 * This is very much like batch_list(). The difference between saving
1561 * elements directly, and picking apart two-tuples, is so long-winded at
1562 * the C level, though, that attempts to combine these routines were too
1563 * ugly to bear.
1564 */
1565static int
1566batch_dict(PicklerObject *self, PyObject *iter)
1567{
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001568 PyObject *obj = NULL;
1569 PyObject *firstitem = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001570 int i, n;
1571
1572 const char mark_op = MARK;
1573 const char setitem_op = SETITEM;
1574 const char setitems_op = SETITEMS;
1575
1576 assert(iter != NULL);
1577
1578 if (self->proto == 0) {
1579 /* SETITEMS isn't available; do one at a time. */
1580 for (;;) {
1581 obj = PyIter_Next(iter);
1582 if (obj == NULL) {
1583 if (PyErr_Occurred())
1584 return -1;
1585 break;
1586 }
1587 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
1588 PyErr_SetString(PyExc_TypeError, "dict items "
1589 "iterator must return 2-tuples");
1590 return -1;
1591 }
1592 i = save(self, PyTuple_GET_ITEM(obj, 0), 0);
1593 if (i >= 0)
1594 i = save(self, PyTuple_GET_ITEM(obj, 1), 0);
1595 Py_DECREF(obj);
1596 if (i < 0)
1597 return -1;
1598 if (pickler_write(self, &setitem_op, 1) < 0)
1599 return -1;
1600 }
1601 return 0;
1602 }
1603
1604 /* proto > 0: write in batches of BATCHSIZE. */
1605 do {
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001606 /* Get first item */
1607 firstitem = PyIter_Next(iter);
1608 if (firstitem == NULL) {
1609 if (PyErr_Occurred())
1610 goto error;
1611
1612 /* nothing more to add */
1613 break;
1614 }
1615 if (!PyTuple_Check(firstitem) || PyTuple_Size(firstitem) != 2) {
1616 PyErr_SetString(PyExc_TypeError, "dict items "
1617 "iterator must return 2-tuples");
1618 goto error;
1619 }
1620
1621 /* Try to get a second item */
1622 obj = PyIter_Next(iter);
1623 if (obj == NULL) {
1624 if (PyErr_Occurred())
1625 goto error;
1626
1627 /* Only one item to write */
1628 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
1629 goto error;
1630 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
1631 goto error;
1632 if (pickler_write(self, &setitem_op, 1) < 0)
1633 goto error;
1634 Py_CLEAR(firstitem);
1635 break;
1636 }
1637
1638 /* More than one item to write */
1639
1640 /* Pump out MARK, items, SETITEMS. */
1641 if (pickler_write(self, &mark_op, 1) < 0)
1642 goto error;
1643
1644 if (save(self, PyTuple_GET_ITEM(firstitem, 0), 0) < 0)
1645 goto error;
1646 if (save(self, PyTuple_GET_ITEM(firstitem, 1), 0) < 0)
1647 goto error;
1648 Py_CLEAR(firstitem);
1649 n = 1;
1650
1651 /* Fetch and save up to BATCHSIZE items */
1652 while (obj) {
1653 if (!PyTuple_Check(obj) || PyTuple_Size(obj) != 2) {
1654 PyErr_SetString(PyExc_TypeError, "dict items "
1655 "iterator must return 2-tuples");
1656 goto error;
1657 }
1658 if (save(self, PyTuple_GET_ITEM(obj, 0), 0) < 0 ||
1659 save(self, PyTuple_GET_ITEM(obj, 1), 0) < 0)
1660 goto error;
1661 Py_CLEAR(obj);
1662 n += 1;
1663
1664 if (n == BATCHSIZE)
1665 break;
1666
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001667 obj = PyIter_Next(iter);
1668 if (obj == NULL) {
1669 if (PyErr_Occurred())
1670 goto error;
1671 break;
1672 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001673 }
1674
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001675 if (pickler_write(self, &setitems_op, 1) < 0)
1676 goto error;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001677
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001678 } while (n == BATCHSIZE);
1679 return 0;
1680
1681 error:
Amaury Forgeot d'Arcfb1a5eb2008-09-11 21:03:37 +00001682 Py_XDECREF(firstitem);
1683 Py_XDECREF(obj);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001684 return -1;
1685}
1686
1687static int
1688save_dict(PicklerObject *self, PyObject *obj)
1689{
1690 PyObject *items, *iter;
1691 char header[3];
1692 int len;
1693 int status = 0;
1694
1695 if (self->fast && !fast_save_enter(self, obj))
1696 goto error;
1697
1698 /* Create an empty dict. */
1699 if (self->bin) {
1700 header[0] = EMPTY_DICT;
1701 len = 1;
1702 }
1703 else {
1704 header[0] = MARK;
1705 header[1] = DICT;
1706 len = 2;
1707 }
1708
1709 if (pickler_write(self, header, len) < 0)
1710 goto error;
1711
1712 /* Get dict size, and bow out early if empty. */
1713 if ((len = PyDict_Size(obj)) < 0)
1714 goto error;
1715
1716 if (memo_put(self, obj) < 0)
1717 goto error;
1718
1719 if (len != 0) {
1720 /* Save the dict items. */
1721 items = PyObject_CallMethod(obj, "items", "()");
1722 if (items == NULL)
1723 goto error;
1724 iter = PyObject_GetIter(items);
1725 Py_DECREF(items);
1726 if (iter == NULL)
1727 goto error;
1728 status = batch_dict(self, iter);
1729 Py_DECREF(iter);
1730 }
1731
1732 if (0) {
1733 error:
1734 status = -1;
1735 }
1736
1737 if (self->fast && !fast_save_leave(self, obj))
1738 status = -1;
1739
1740 return status;
1741}
1742
1743static int
1744save_global(PicklerObject *self, PyObject *obj, PyObject *name)
1745{
1746 static PyObject *name_str = NULL;
1747 PyObject *global_name = NULL;
1748 PyObject *module_name = NULL;
1749 PyObject *module = NULL;
1750 PyObject *cls;
1751 int status = 0;
1752
1753 const char global_op = GLOBAL;
1754
1755 if (name_str == NULL) {
1756 name_str = PyUnicode_InternFromString("__name__");
1757 if (name_str == NULL)
1758 goto error;
1759 }
1760
1761 if (name) {
1762 global_name = name;
1763 Py_INCREF(global_name);
1764 }
1765 else {
1766 global_name = PyObject_GetAttr(obj, name_str);
1767 if (global_name == NULL)
1768 goto error;
1769 }
1770
1771 module_name = whichmodule(obj, global_name);
1772 if (module_name == NULL)
1773 goto error;
1774
1775 /* XXX: Change to use the import C API directly with level=0 to disallow
1776 relative imports.
1777
1778 XXX: PyImport_ImportModuleLevel could be used. However, this bypasses
1779 builtins.__import__. Therefore, _pickle, unlike pickle.py, will ignore
1780 custom import functions (IMHO, this would be a nice security
1781 feature). The import C API would need to be extended to support the
1782 extra parameters of __import__ to fix that. */
1783 module = PyImport_Import(module_name);
1784 if (module == NULL) {
1785 PyErr_Format(PicklingError,
1786 "Can't pickle %R: import of module %R failed",
1787 obj, module_name);
1788 goto error;
1789 }
1790 cls = PyObject_GetAttr(module, global_name);
1791 if (cls == NULL) {
1792 PyErr_Format(PicklingError,
1793 "Can't pickle %R: attribute lookup %S.%S failed",
1794 obj, module_name, global_name);
1795 goto error;
1796 }
1797 if (cls != obj) {
1798 Py_DECREF(cls);
1799 PyErr_Format(PicklingError,
1800 "Can't pickle %R: it's not the same object as %S.%S",
1801 obj, module_name, global_name);
1802 goto error;
1803 }
1804 Py_DECREF(cls);
1805
1806 if (self->proto >= 2) {
1807 /* See whether this is in the extension registry, and if
1808 * so generate an EXT opcode.
1809 */
1810 PyObject *code_obj; /* extension code as Python object */
1811 long code; /* extension code as C value */
1812 char pdata[5];
1813 int n;
1814
1815 PyTuple_SET_ITEM(two_tuple, 0, module_name);
1816 PyTuple_SET_ITEM(two_tuple, 1, global_name);
1817 code_obj = PyDict_GetItem(extension_registry, two_tuple);
1818 /* The object is not registered in the extension registry.
1819 This is the most likely code path. */
1820 if (code_obj == NULL)
1821 goto gen_global;
1822
1823 /* XXX: pickle.py doesn't check neither the type, nor the range
1824 of the value returned by the extension_registry. It should for
1825 consistency. */
1826
1827 /* Verify code_obj has the right type and value. */
1828 if (!PyLong_Check(code_obj)) {
1829 PyErr_Format(PicklingError,
1830 "Can't pickle %R: extension code %R isn't an integer",
1831 obj, code_obj);
1832 goto error;
1833 }
1834 code = PyLong_AS_LONG(code_obj);
1835 if (code <= 0 || code > 0x7fffffffL) {
1836 PyErr_Format(PicklingError,
1837 "Can't pickle %R: extension code %ld is out of range",
1838 obj, code);
1839 goto error;
1840 }
1841
1842 /* Generate an EXT opcode. */
1843 if (code <= 0xff) {
1844 pdata[0] = EXT1;
1845 pdata[1] = (unsigned char)code;
1846 n = 2;
1847 }
1848 else if (code <= 0xffff) {
1849 pdata[0] = EXT2;
1850 pdata[1] = (unsigned char)(code & 0xff);
1851 pdata[2] = (unsigned char)((code >> 8) & 0xff);
1852 n = 3;
1853 }
1854 else {
1855 pdata[0] = EXT4;
1856 pdata[1] = (unsigned char)(code & 0xff);
1857 pdata[2] = (unsigned char)((code >> 8) & 0xff);
1858 pdata[3] = (unsigned char)((code >> 16) & 0xff);
1859 pdata[4] = (unsigned char)((code >> 24) & 0xff);
1860 n = 5;
1861 }
1862
1863 if (pickler_write(self, pdata, n) < 0)
1864 goto error;
1865 }
1866 else {
1867 /* Generate a normal global opcode if we are using a pickle
1868 protocol <= 2, or if the object is not registered in the
1869 extension registry. */
1870 PyObject *encoded;
1871 PyObject *(*unicode_encoder)(PyObject *);
1872
1873 gen_global:
1874 if (pickler_write(self, &global_op, 1) < 0)
1875 goto error;
1876
1877 /* Since Python 3.0 now supports non-ASCII identifiers, we encode both
1878 the module name and the global name using UTF-8. We do so only when
1879 we are using the pickle protocol newer than version 3. This is to
1880 ensure compatibility with older Unpickler running on Python 2.x. */
1881 if (self->proto >= 3) {
1882 unicode_encoder = PyUnicode_AsUTF8String;
1883 }
1884 else {
1885 unicode_encoder = PyUnicode_AsASCIIString;
1886 }
1887
1888 /* Save the name of the module. */
1889 encoded = unicode_encoder(module_name);
1890 if (encoded == NULL) {
1891 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
1892 PyErr_Format(PicklingError,
1893 "can't pickle module identifier '%S' using "
1894 "pickle protocol %i", module_name, self->proto);
1895 goto error;
1896 }
1897 if (pickler_write(self, PyBytes_AS_STRING(encoded),
1898 PyBytes_GET_SIZE(encoded)) < 0) {
1899 Py_DECREF(encoded);
1900 goto error;
1901 }
1902 Py_DECREF(encoded);
1903 if(pickler_write(self, "\n", 1) < 0)
1904 goto error;
1905
1906 /* Save the name of the module. */
1907 encoded = unicode_encoder(global_name);
1908 if (encoded == NULL) {
1909 if (PyErr_ExceptionMatches(PyExc_UnicodeEncodeError))
1910 PyErr_Format(PicklingError,
1911 "can't pickle global identifier '%S' using "
1912 "pickle protocol %i", global_name, self->proto);
1913 goto error;
1914 }
1915 if (pickler_write(self, PyBytes_AS_STRING(encoded),
1916 PyBytes_GET_SIZE(encoded)) < 0) {
1917 Py_DECREF(encoded);
1918 goto error;
1919 }
1920 Py_DECREF(encoded);
1921 if(pickler_write(self, "\n", 1) < 0)
1922 goto error;
1923
1924 /* Memoize the object. */
1925 if (memo_put(self, obj) < 0)
1926 goto error;
1927 }
1928
1929 if (0) {
1930 error:
1931 status = -1;
1932 }
1933 Py_XDECREF(module_name);
1934 Py_XDECREF(global_name);
1935 Py_XDECREF(module);
1936
1937 return status;
1938}
1939
1940static int
1941save_pers(PicklerObject *self, PyObject *obj, PyObject *func)
1942{
1943 PyObject *pid = NULL;
1944 int status = 0;
1945
1946 const char persid_op = PERSID;
1947 const char binpersid_op = BINPERSID;
1948
1949 Py_INCREF(obj);
1950 pid = pickler_call(self, func, obj);
1951 if (pid == NULL)
1952 return -1;
1953
1954 if (pid != Py_None) {
1955 if (self->bin) {
1956 if (save(self, pid, 1) < 0 ||
1957 pickler_write(self, &binpersid_op, 1) < 0)
1958 goto error;
1959 }
1960 else {
1961 PyObject *pid_str = NULL;
1962 char *pid_ascii_bytes;
1963 Py_ssize_t size;
1964
1965 pid_str = PyObject_Str(pid);
1966 if (pid_str == NULL)
1967 goto error;
1968
1969 /* XXX: Should it check whether the persistent id only contains
1970 ASCII characters? And what if the pid contains embedded
1971 newlines? */
Marc-André Lemburg4cc0f242008-08-07 18:54:33 +00001972 pid_ascii_bytes = _PyUnicode_AsStringAndSize(pid_str, &size);
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00001973 Py_DECREF(pid_str);
1974 if (pid_ascii_bytes == NULL)
1975 goto error;
1976
1977 if (pickler_write(self, &persid_op, 1) < 0 ||
1978 pickler_write(self, pid_ascii_bytes, size) < 0 ||
1979 pickler_write(self, "\n", 1) < 0)
1980 goto error;
1981 }
1982 status = 1;
1983 }
1984
1985 if (0) {
1986 error:
1987 status = -1;
1988 }
1989 Py_XDECREF(pid);
1990
1991 return status;
1992}
1993
1994/* We're saving obj, and args is the 2-thru-5 tuple returned by the
1995 * appropriate __reduce__ method for obj.
1996 */
1997static int
1998save_reduce(PicklerObject *self, PyObject *args, PyObject *obj)
1999{
2000 PyObject *callable;
2001 PyObject *argtup;
2002 PyObject *state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002003 PyObject *listitems = Py_None;
2004 PyObject *dictitems = Py_None;
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002005 Py_ssize_t size;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002006
2007 int use_newobj = self->proto >= 2;
2008
2009 const char reduce_op = REDUCE;
2010 const char build_op = BUILD;
2011 const char newobj_op = NEWOBJ;
2012
Hirokazu Yamamotob46a6332008-11-04 00:35:10 +00002013 size = PyTuple_Size(args);
2014 if (size < 2 || size > 5) {
2015 PyErr_SetString(PicklingError, "tuple returned by "
2016 "__reduce__ must contain 2 through 5 elements");
2017 return -1;
2018 }
2019
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002020 if (!PyArg_UnpackTuple(args, "save_reduce", 2, 5,
2021 &callable, &argtup, &state, &listitems, &dictitems))
2022 return -1;
2023
2024 if (!PyCallable_Check(callable)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002025 PyErr_SetString(PicklingError, "first item of the tuple "
2026 "returned by __reduce__ must be callable");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002027 return -1;
2028 }
2029 if (!PyTuple_Check(argtup)) {
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002030 PyErr_SetString(PicklingError, "second item of the tuple "
2031 "returned by __reduce__ must be a tuple");
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002032 return -1;
2033 }
2034
2035 if (state == Py_None)
2036 state = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002037
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002038 if (listitems == Py_None)
2039 listitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002040 else if (!PyIter_Check(listitems)) {
2041 PyErr_Format(PicklingError, "Fourth element of tuple"
2042 "returned by __reduce__ must be an iterator, not %s",
2043 Py_TYPE(listitems)->tp_name);
2044 return -1;
2045 }
2046
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002047 if (dictitems == Py_None)
2048 dictitems = NULL;
Amaury Forgeot d'Arc424b4812008-10-30 22:25:31 +00002049 else if (!PyIter_Check(dictitems)) {
2050 PyErr_Format(PicklingError, "Fifth element of tuple"
2051 "returned by __reduce__ must be an iterator, not %s",
2052 Py_TYPE(dictitems)->tp_name);
2053 return -1;
2054 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002055
2056 /* Protocol 2 special case: if callable's name is __newobj__, use
2057 NEWOBJ. */
2058 if (use_newobj) {
2059 static PyObject *newobj_str = NULL;
2060 PyObject *name_str;
2061
2062 if (newobj_str == NULL) {
2063 newobj_str = PyUnicode_InternFromString("__newobj__");
2064 }
2065
2066 name_str = PyObject_GetAttrString(callable, "__name__");
2067 if (name_str == NULL) {
2068 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2069 PyErr_Clear();
2070 else
2071 return -1;
2072 use_newobj = 0;
2073 }
2074 else {
2075 use_newobj = PyUnicode_Check(name_str) &&
2076 PyUnicode_Compare(name_str, newobj_str) == 0;
2077 Py_DECREF(name_str);
2078 }
2079 }
2080 if (use_newobj) {
2081 PyObject *cls;
2082 PyObject *newargtup;
2083 PyObject *obj_class;
2084 int p;
2085
2086 /* Sanity checks. */
2087 if (Py_SIZE(argtup) < 1) {
2088 PyErr_SetString(PicklingError, "__newobj__ arglist is empty");
2089 return -1;
2090 }
2091
2092 cls = PyTuple_GET_ITEM(argtup, 0);
2093 if (!PyObject_HasAttrString(cls, "__new__")) {
2094 PyErr_SetString(PicklingError, "args[0] from "
2095 "__newobj__ args has no __new__");
2096 return -1;
2097 }
2098
2099 if (obj != NULL) {
2100 obj_class = PyObject_GetAttrString(obj, "__class__");
2101 if (obj_class == NULL) {
2102 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2103 PyErr_Clear();
2104 else
2105 return -1;
2106 }
2107 p = obj_class != cls; /* true iff a problem */
2108 Py_DECREF(obj_class);
2109 if (p) {
2110 PyErr_SetString(PicklingError, "args[0] from "
2111 "__newobj__ args has the wrong class");
2112 return -1;
2113 }
2114 }
2115 /* XXX: These calls save() are prone to infinite recursion. Imagine
2116 what happen if the value returned by the __reduce__() method of
2117 some extension type contains another object of the same type. Ouch!
2118
2119 Here is a quick example, that I ran into, to illustrate what I
2120 mean:
2121
2122 >>> import pickle, copyreg
2123 >>> copyreg.dispatch_table.pop(complex)
2124 >>> pickle.dumps(1+2j)
2125 Traceback (most recent call last):
2126 ...
2127 RuntimeError: maximum recursion depth exceeded
2128
2129 Removing the complex class from copyreg.dispatch_table made the
2130 __reduce_ex__() method emit another complex object:
2131
2132 >>> (1+1j).__reduce_ex__(2)
2133 (<function __newobj__ at 0xb7b71c3c>,
2134 (<class 'complex'>, (1+1j)), None, None, None)
2135
2136 Thus when save() was called on newargstup (the 2nd item) recursion
2137 ensued. Of course, the bug was in the complex class which had a
2138 broken __getnewargs__() that emitted another complex object. But,
2139 the point, here, is it is quite easy to end up with a broken reduce
2140 function. */
2141
2142 /* Save the class and its __new__ arguments. */
2143 if (save(self, cls, 0) < 0)
2144 return -1;
2145
2146 newargtup = PyTuple_GetSlice(argtup, 1, Py_SIZE(argtup));
2147 if (newargtup == NULL)
2148 return -1;
2149
2150 p = save(self, newargtup, 0);
2151 Py_DECREF(newargtup);
2152 if (p < 0)
2153 return -1;
2154
2155 /* Add NEWOBJ opcode. */
2156 if (pickler_write(self, &newobj_op, 1) < 0)
2157 return -1;
2158 }
2159 else { /* Not using NEWOBJ. */
2160 if (save(self, callable, 0) < 0 ||
2161 save(self, argtup, 0) < 0 ||
2162 pickler_write(self, &reduce_op, 1) < 0)
2163 return -1;
2164 }
2165
2166 /* obj can be NULL when save_reduce() is used directly. A NULL obj means
2167 the caller do not want to memoize the object. Not particularly useful,
2168 but that is to mimic the behavior save_reduce() in pickle.py when
2169 obj is None. */
2170 if (obj && memo_put(self, obj) < 0)
2171 return -1;
2172
2173 if (listitems && batch_list(self, listitems) < 0)
2174 return -1;
2175
2176 if (dictitems && batch_dict(self, dictitems) < 0)
2177 return -1;
2178
2179 if (state) {
2180 if (save(self, state, 0) < 0 ||
2181 pickler_write(self, &build_op, 1) < 0)
2182 return -1;
2183 }
2184
2185 return 0;
2186}
2187
2188static int
2189save(PicklerObject *self, PyObject *obj, int pers_save)
2190{
2191 PyTypeObject *type;
2192 PyObject *reduce_func = NULL;
2193 PyObject *reduce_value = NULL;
2194 PyObject *memo_key = NULL;
2195 int status = 0;
2196
Alexandre Vassalottidff18342008-07-13 18:48:30 +00002197 if (Py_EnterRecursiveCall(" while pickling an object") < 0)
2198 return -1;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002199
2200 /* The extra pers_save argument is necessary to avoid calling save_pers()
2201 on its returned object. */
2202 if (!pers_save && self->pers_func) {
2203 /* save_pers() returns:
2204 -1 to signal an error;
2205 0 if it did nothing successfully;
2206 1 if a persistent id was saved.
2207 */
2208 if ((status = save_pers(self, obj, self->pers_func)) != 0)
2209 goto done;
2210 }
2211
2212 type = Py_TYPE(obj);
2213
2214 /* XXX: The old cPickle had an optimization that used switch-case
2215 statement dispatching on the first letter of the type name. It was
2216 probably not a bad idea after all. If benchmarks shows that particular
2217 optimization had some real benefits, it would be nice to add it
2218 back. */
2219
2220 /* Atom types; these aren't memoized, so don't check the memo. */
2221
2222 if (obj == Py_None) {
2223 status = save_none(self, obj);
2224 goto done;
2225 }
2226 else if (obj == Py_False || obj == Py_True) {
2227 status = save_bool(self, obj);
2228 goto done;
2229 }
2230 else if (type == &PyLong_Type) {
2231 status = save_long(self, obj);
2232 goto done;
2233 }
2234 else if (type == &PyFloat_Type) {
2235 status = save_float(self, obj);
2236 goto done;
2237 }
2238
2239 /* Check the memo to see if it has the object. If so, generate
2240 a GET (or BINGET) opcode, instead of pickling the object
2241 once again. */
2242 memo_key = PyLong_FromVoidPtr(obj);
2243 if (memo_key == NULL)
2244 goto error;
2245 if (PyDict_GetItem(self->memo, memo_key)) {
2246 if (memo_get(self, memo_key) < 0)
2247 goto error;
2248 goto done;
2249 }
2250
2251 if (type == &PyBytes_Type) {
2252 status = save_bytes(self, obj);
2253 goto done;
2254 }
2255 else if (type == &PyUnicode_Type) {
2256 status = save_unicode(self, obj);
2257 goto done;
2258 }
2259 else if (type == &PyDict_Type) {
2260 status = save_dict(self, obj);
2261 goto done;
2262 }
2263 else if (type == &PyList_Type) {
2264 status = save_list(self, obj);
2265 goto done;
2266 }
2267 else if (type == &PyTuple_Type) {
2268 status = save_tuple(self, obj);
2269 goto done;
2270 }
2271 else if (type == &PyType_Type) {
2272 status = save_global(self, obj, NULL);
2273 goto done;
2274 }
2275 else if (type == &PyFunction_Type) {
2276 status = save_global(self, obj, NULL);
2277 if (status < 0 && PyErr_ExceptionMatches(PickleError)) {
2278 /* fall back to reduce */
2279 PyErr_Clear();
2280 }
2281 else {
2282 goto done;
2283 }
2284 }
2285 else if (type == &PyCFunction_Type) {
2286 status = save_global(self, obj, NULL);
2287 goto done;
2288 }
2289 else if (PyType_IsSubtype(type, &PyType_Type)) {
2290 status = save_global(self, obj, NULL);
2291 goto done;
2292 }
2293
2294 /* XXX: This part needs some unit tests. */
2295
2296 /* Get a reduction callable, and call it. This may come from
2297 * copyreg.dispatch_table, the object's __reduce_ex__ method,
2298 * or the object's __reduce__ method.
2299 */
2300 reduce_func = PyDict_GetItem(dispatch_table, (PyObject *)type);
2301 if (reduce_func != NULL) {
2302 /* Here, the reference count of the reduce_func object returned by
2303 PyDict_GetItem needs to be increased to be consistent with the one
2304 returned by PyObject_GetAttr. This is allow us to blindly DECREF
2305 reduce_func at the end of the save() routine.
2306 */
2307 Py_INCREF(reduce_func);
2308 Py_INCREF(obj);
2309 reduce_value = pickler_call(self, reduce_func, obj);
2310 }
2311 else {
2312 static PyObject *reduce_str = NULL;
2313 static PyObject *reduce_ex_str = NULL;
2314
2315 /* Cache the name of the reduce methods. */
2316 if (reduce_str == NULL) {
2317 reduce_str = PyUnicode_InternFromString("__reduce__");
2318 if (reduce_str == NULL)
2319 goto error;
2320 reduce_ex_str = PyUnicode_InternFromString("__reduce_ex__");
2321 if (reduce_ex_str == NULL)
2322 goto error;
2323 }
2324
2325 /* XXX: If the __reduce__ method is defined, __reduce_ex__ is
2326 automatically defined as __reduce__. While this is convenient, this
2327 make it impossible to know which method was actually called. Of
2328 course, this is not a big deal. But still, it would be nice to let
2329 the user know which method was called when something go
2330 wrong. Incidentally, this means if __reduce_ex__ is not defined, we
2331 don't actually have to check for a __reduce__ method. */
2332
2333 /* Check for a __reduce_ex__ method. */
2334 reduce_func = PyObject_GetAttr(obj, reduce_ex_str);
2335 if (reduce_func != NULL) {
2336 PyObject *proto;
2337 proto = PyLong_FromLong(self->proto);
2338 if (proto != NULL) {
2339 reduce_value = pickler_call(self, reduce_func, proto);
2340 }
2341 }
2342 else {
2343 if (PyErr_ExceptionMatches(PyExc_AttributeError))
2344 PyErr_Clear();
2345 else
2346 goto error;
2347 /* Check for a __reduce__ method. */
2348 reduce_func = PyObject_GetAttr(obj, reduce_str);
2349 if (reduce_func != NULL) {
2350 reduce_value = PyObject_Call(reduce_func, empty_tuple, NULL);
2351 }
2352 else {
2353 PyErr_Format(PicklingError, "can't pickle '%.200s' object: %R",
2354 type->tp_name, obj);
2355 goto error;
2356 }
2357 }
2358 }
2359
2360 if (reduce_value == NULL)
2361 goto error;
2362
2363 if (PyUnicode_Check(reduce_value)) {
2364 status = save_global(self, obj, reduce_value);
2365 goto done;
2366 }
2367
2368 if (!PyTuple_Check(reduce_value)) {
2369 PyErr_SetString(PicklingError,
2370 "__reduce__ must return a string or tuple");
2371 goto error;
2372 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002373
2374 status = save_reduce(self, reduce_value, obj);
2375
2376 if (0) {
2377 error:
2378 status = -1;
2379 }
2380 done:
Alexandre Vassalottidff18342008-07-13 18:48:30 +00002381 Py_LeaveRecursiveCall();
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002382 Py_XDECREF(memo_key);
2383 Py_XDECREF(reduce_func);
2384 Py_XDECREF(reduce_value);
2385
2386 return status;
2387}
2388
2389static int
2390dump(PicklerObject *self, PyObject *obj)
2391{
2392 const char stop_op = STOP;
2393
2394 if (self->proto >= 2) {
2395 char header[2];
2396
2397 header[0] = PROTO;
2398 assert(self->proto >= 0 && self->proto < 256);
2399 header[1] = (unsigned char)self->proto;
2400 if (pickler_write(self, header, 2) < 0)
2401 return -1;
2402 }
2403
2404 if (save(self, obj, 0) < 0 ||
2405 pickler_write(self, &stop_op, 1) < 0 ||
2406 pickler_write(self, NULL, 0) < 0)
2407 return -1;
2408
2409 return 0;
2410}
2411
2412PyDoc_STRVAR(Pickler_clear_memo_doc,
2413"clear_memo() -> None. Clears the pickler's \"memo\"."
2414"\n"
2415"The memo is the data structure that remembers which objects the\n"
2416"pickler has already seen, so that shared or recursive objects are\n"
2417"pickled by reference and not by value. This method is useful when\n"
2418"re-using picklers.");
2419
2420static PyObject *
2421Pickler_clear_memo(PicklerObject *self)
2422{
2423 if (self->memo)
2424 PyDict_Clear(self->memo);
2425
2426 Py_RETURN_NONE;
2427}
2428
2429PyDoc_STRVAR(Pickler_dump_doc,
2430"dump(obj) -> None. Write a pickled representation of obj to the open file.");
2431
2432static PyObject *
2433Pickler_dump(PicklerObject *self, PyObject *args)
2434{
2435 PyObject *obj;
2436
Amaury Forgeot d'Arc87eee632008-10-17 20:15:53 +00002437 /* Check whether the Pickler was initialized correctly (issue3664).
2438 Developers often forget to call __init__() in their subclasses, which
2439 would trigger a segfault without this check. */
2440 if (self->write == NULL) {
2441 PyErr_Format(PicklingError,
2442 "Pickler.__init__() was not called by %s.__init__()",
2443 Py_TYPE(self)->tp_name);
2444 return NULL;
2445 }
2446
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002447 if (!PyArg_ParseTuple(args, "O:dump", &obj))
2448 return NULL;
2449
2450 if (dump(self, obj) < 0)
2451 return NULL;
2452
2453 Py_RETURN_NONE;
2454}
2455
2456static struct PyMethodDef Pickler_methods[] = {
2457 {"dump", (PyCFunction)Pickler_dump, METH_VARARGS,
2458 Pickler_dump_doc},
2459 {"clear_memo", (PyCFunction)Pickler_clear_memo, METH_NOARGS,
2460 Pickler_clear_memo_doc},
2461 {NULL, NULL} /* sentinel */
2462};
2463
2464static void
2465Pickler_dealloc(PicklerObject *self)
2466{
2467 PyObject_GC_UnTrack(self);
2468
2469 Py_XDECREF(self->write);
2470 Py_XDECREF(self->memo);
2471 Py_XDECREF(self->pers_func);
2472 Py_XDECREF(self->arg);
2473 Py_XDECREF(self->fast_memo);
2474
2475 PyMem_Free(self->write_buf);
2476
2477 Py_TYPE(self)->tp_free((PyObject *)self);
2478}
2479
2480static int
2481Pickler_traverse(PicklerObject *self, visitproc visit, void *arg)
2482{
2483 Py_VISIT(self->write);
2484 Py_VISIT(self->memo);
2485 Py_VISIT(self->pers_func);
2486 Py_VISIT(self->arg);
2487 Py_VISIT(self->fast_memo);
2488 return 0;
2489}
2490
2491static int
2492Pickler_clear(PicklerObject *self)
2493{
2494 Py_CLEAR(self->write);
2495 Py_CLEAR(self->memo);
2496 Py_CLEAR(self->pers_func);
2497 Py_CLEAR(self->arg);
2498 Py_CLEAR(self->fast_memo);
2499
2500 PyMem_Free(self->write_buf);
2501 self->write_buf = NULL;
2502
2503 return 0;
2504}
2505
2506PyDoc_STRVAR(Pickler_doc,
2507"Pickler(file, protocol=None)"
2508"\n"
2509"This takes a binary file for writing a pickle data stream.\n"
2510"\n"
2511"The optional protocol argument tells the pickler to use the\n"
2512"given protocol; supported protocols are 0, 1, 2, 3. The default\n"
2513"protocol is 3; a backward-incompatible protocol designed for\n"
2514"Python 3.0.\n"
2515"\n"
2516"Specifying a negative protocol version selects the highest\n"
2517"protocol version supported. The higher the protocol used, the\n"
2518"more recent the version of Python needed to read the pickle\n"
2519"produced.\n"
2520"\n"
2521"The file argument must have a write() method that accepts a single\n"
2522"bytes argument. It can thus be a file object opened for binary\n"
2523"writing, a io.BytesIO instance, or any other custom object that\n"
2524"meets this interface.\n");
2525
2526static int
2527Pickler_init(PicklerObject *self, PyObject *args, PyObject *kwds)
2528{
2529 static char *kwlist[] = {"file", "protocol", 0};
2530 PyObject *file;
2531 PyObject *proto_obj = NULL;
2532 long proto = 0;
2533
2534 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|O:Pickler",
2535 kwlist, &file, &proto_obj))
2536 return -1;
2537
2538 /* In case of multiple __init__() calls, clear previous content. */
2539 if (self->write != NULL)
2540 (void)Pickler_clear(self);
2541
2542 if (proto_obj == NULL || proto_obj == Py_None)
2543 proto = DEFAULT_PROTOCOL;
2544 else
2545 proto = PyLong_AsLong(proto_obj);
2546
2547 if (proto < 0)
2548 proto = HIGHEST_PROTOCOL;
2549 if (proto > HIGHEST_PROTOCOL) {
2550 PyErr_Format(PyExc_ValueError, "pickle protocol must be <= %d",
2551 HIGHEST_PROTOCOL);
2552 return -1;
2553 }
2554
2555 self->proto = proto;
2556 self->bin = proto > 0;
2557 self->arg = NULL;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002558 self->fast = 0;
2559 self->fast_nesting = 0;
2560 self->fast_memo = NULL;
2561
2562 if (!PyObject_HasAttrString(file, "write")) {
2563 PyErr_SetString(PyExc_TypeError,
2564 "file must have a 'write' attribute");
2565 return -1;
2566 }
2567 self->write = PyObject_GetAttrString(file, "write");
2568 if (self->write == NULL)
2569 return -1;
2570 self->buf_size = 0;
2571 self->write_buf = (char *)PyMem_Malloc(WRITE_BUF_SIZE);
2572 if (self->write_buf == NULL) {
2573 PyErr_NoMemory();
2574 return -1;
2575 }
2576 self->pers_func = NULL;
2577 if (PyObject_HasAttrString((PyObject *)self, "persistent_id")) {
2578 self->pers_func = PyObject_GetAttrString((PyObject *)self,
2579 "persistent_id");
2580 if (self->pers_func == NULL)
2581 return -1;
2582 }
2583 self->memo = PyDict_New();
2584 if (self->memo == NULL)
2585 return -1;
2586
2587 return 0;
2588}
2589
2590static PyObject *
2591Pickler_get_memo(PicklerObject *self)
2592{
2593 if (self->memo == NULL)
2594 PyErr_SetString(PyExc_AttributeError, "memo");
2595 else
2596 Py_INCREF(self->memo);
2597 return self->memo;
2598}
2599
2600static int
2601Pickler_set_memo(PicklerObject *self, PyObject *value)
2602{
2603 PyObject *tmp;
2604
2605 if (value == NULL) {
2606 PyErr_SetString(PyExc_TypeError,
2607 "attribute deletion is not supported");
2608 return -1;
2609 }
2610 if (!PyDict_Check(value)) {
2611 PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
2612 return -1;
2613 }
2614
2615 tmp = self->memo;
2616 Py_INCREF(value);
2617 self->memo = value;
2618 Py_XDECREF(tmp);
2619
2620 return 0;
2621}
2622
2623static PyObject *
2624Pickler_get_persid(PicklerObject *self)
2625{
2626 if (self->pers_func == NULL)
2627 PyErr_SetString(PyExc_AttributeError, "persistent_id");
2628 else
2629 Py_INCREF(self->pers_func);
2630 return self->pers_func;
2631}
2632
2633static int
2634Pickler_set_persid(PicklerObject *self, PyObject *value)
2635{
2636 PyObject *tmp;
2637
2638 if (value == NULL) {
2639 PyErr_SetString(PyExc_TypeError,
2640 "attribute deletion is not supported");
2641 return -1;
2642 }
2643 if (!PyCallable_Check(value)) {
2644 PyErr_SetString(PyExc_TypeError,
2645 "persistent_id must be a callable taking one argument");
2646 return -1;
2647 }
2648
2649 tmp = self->pers_func;
2650 Py_INCREF(value);
2651 self->pers_func = value;
2652 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
2653
2654 return 0;
2655}
2656
2657static PyMemberDef Pickler_members[] = {
2658 {"bin", T_INT, offsetof(PicklerObject, bin)},
2659 {"fast", T_INT, offsetof(PicklerObject, fast)},
2660 {NULL}
2661};
2662
2663static PyGetSetDef Pickler_getsets[] = {
2664 {"memo", (getter)Pickler_get_memo,
2665 (setter)Pickler_set_memo},
2666 {"persistent_id", (getter)Pickler_get_persid,
2667 (setter)Pickler_set_persid},
2668 {NULL}
2669};
2670
2671static PyTypeObject Pickler_Type = {
2672 PyVarObject_HEAD_INIT(NULL, 0)
2673 "_pickle.Pickler" , /*tp_name*/
2674 sizeof(PicklerObject), /*tp_basicsize*/
2675 0, /*tp_itemsize*/
2676 (destructor)Pickler_dealloc, /*tp_dealloc*/
2677 0, /*tp_print*/
2678 0, /*tp_getattr*/
2679 0, /*tp_setattr*/
2680 0, /*tp_compare*/
2681 0, /*tp_repr*/
2682 0, /*tp_as_number*/
2683 0, /*tp_as_sequence*/
2684 0, /*tp_as_mapping*/
2685 0, /*tp_hash*/
2686 0, /*tp_call*/
2687 0, /*tp_str*/
2688 0, /*tp_getattro*/
2689 0, /*tp_setattro*/
2690 0, /*tp_as_buffer*/
2691 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
2692 Pickler_doc, /*tp_doc*/
2693 (traverseproc)Pickler_traverse, /*tp_traverse*/
2694 (inquiry)Pickler_clear, /*tp_clear*/
2695 0, /*tp_richcompare*/
2696 0, /*tp_weaklistoffset*/
2697 0, /*tp_iter*/
2698 0, /*tp_iternext*/
2699 Pickler_methods, /*tp_methods*/
2700 Pickler_members, /*tp_members*/
2701 Pickler_getsets, /*tp_getset*/
2702 0, /*tp_base*/
2703 0, /*tp_dict*/
2704 0, /*tp_descr_get*/
2705 0, /*tp_descr_set*/
2706 0, /*tp_dictoffset*/
2707 (initproc)Pickler_init, /*tp_init*/
2708 PyType_GenericAlloc, /*tp_alloc*/
2709 PyType_GenericNew, /*tp_new*/
2710 PyObject_GC_Del, /*tp_free*/
2711 0, /*tp_is_gc*/
2712};
2713
2714/* Temporary helper for calling self.find_class().
2715
2716 XXX: It would be nice to able to avoid Python function call overhead, by
2717 using directly the C version of find_class(), when find_class() is not
2718 overridden by a subclass. Although, this could become rather hackish. A
2719 simpler optimization would be to call the C function when self is not a
2720 subclass instance. */
2721static PyObject *
2722find_class(UnpicklerObject *self, PyObject *module_name, PyObject *global_name)
2723{
2724 return PyObject_CallMethod((PyObject *)self, "find_class", "OO",
2725 module_name, global_name);
2726}
2727
2728static int
2729marker(UnpicklerObject *self)
2730{
2731 if (self->num_marks < 1) {
2732 PyErr_SetString(UnpicklingError, "could not find MARK");
2733 return -1;
2734 }
2735
2736 return self->marks[--self->num_marks];
2737}
2738
2739static int
2740load_none(UnpicklerObject *self)
2741{
2742 PDATA_APPEND(self->stack, Py_None, -1);
2743 return 0;
2744}
2745
2746static int
2747bad_readline(void)
2748{
2749 PyErr_SetString(UnpicklingError, "pickle data was truncated");
2750 return -1;
2751}
2752
2753static int
2754load_int(UnpicklerObject *self)
2755{
2756 PyObject *value;
2757 char *endptr, *s;
2758 Py_ssize_t len;
2759 long x;
2760
2761 if ((len = unpickler_readline(self, &s)) < 0)
2762 return -1;
2763 if (len < 2)
2764 return bad_readline();
2765
2766 errno = 0;
2767 /* XXX: Should the base argument of strtol() be explicitly set to 10? */
2768 x = strtol(s, &endptr, 0);
2769
2770 if (errno || (*endptr != '\n') || (endptr[1] != '\0')) {
2771 /* Hm, maybe we've got something long. Let's try reading
2772 * it as a Python long object. */
2773 errno = 0;
2774 /* XXX: Same thing about the base here. */
2775 value = PyLong_FromString(s, NULL, 0);
2776 if (value == NULL) {
2777 PyErr_SetString(PyExc_ValueError,
2778 "could not convert string to int");
2779 return -1;
2780 }
2781 }
2782 else {
2783 if (len == 3 && (x == 0 || x == 1)) {
2784 if ((value = PyBool_FromLong(x)) == NULL)
2785 return -1;
2786 }
2787 else {
2788 if ((value = PyLong_FromLong(x)) == NULL)
2789 return -1;
2790 }
2791 }
2792
2793 PDATA_PUSH(self->stack, value, -1);
2794 return 0;
2795}
2796
2797static int
2798load_bool(UnpicklerObject *self, PyObject *boolean)
2799{
2800 assert(boolean == Py_True || boolean == Py_False);
2801 PDATA_APPEND(self->stack, boolean, -1);
2802 return 0;
2803}
2804
2805/* s contains x bytes of a little-endian integer. Return its value as a
2806 * C int. Obscure: when x is 1 or 2, this is an unsigned little-endian
2807 * int, but when x is 4 it's a signed one. This is an historical source
2808 * of x-platform bugs.
2809 */
2810static long
2811calc_binint(char *bytes, int size)
2812{
2813 unsigned char *s = (unsigned char *)bytes;
2814 int i = size;
2815 long x = 0;
2816
2817 for (i = 0; i < size; i++) {
2818 x |= (long)s[i] << (i * 8);
2819 }
2820
2821 /* Unlike BININT1 and BININT2, BININT (more accurately BININT4)
2822 * is signed, so on a box with longs bigger than 4 bytes we need
2823 * to extend a BININT's sign bit to the full width.
2824 */
2825 if (SIZEOF_LONG > 4 && size == 4) {
2826 x |= -(x & (1L << 31));
2827 }
2828
2829 return x;
2830}
2831
2832static int
2833load_binintx(UnpicklerObject *self, char *s, int size)
2834{
2835 PyObject *value;
2836 long x;
2837
2838 x = calc_binint(s, size);
2839
2840 if ((value = PyLong_FromLong(x)) == NULL)
2841 return -1;
2842
2843 PDATA_PUSH(self->stack, value, -1);
2844 return 0;
2845}
2846
2847static int
2848load_binint(UnpicklerObject *self)
2849{
2850 char *s;
2851
2852 if (unpickler_read(self, &s, 4) < 0)
2853 return -1;
2854
2855 return load_binintx(self, s, 4);
2856}
2857
2858static int
2859load_binint1(UnpicklerObject *self)
2860{
2861 char *s;
2862
2863 if (unpickler_read(self, &s, 1) < 0)
2864 return -1;
2865
2866 return load_binintx(self, s, 1);
2867}
2868
2869static int
2870load_binint2(UnpicklerObject *self)
2871{
2872 char *s;
2873
2874 if (unpickler_read(self, &s, 2) < 0)
2875 return -1;
2876
2877 return load_binintx(self, s, 2);
2878}
2879
2880static int
2881load_long(UnpicklerObject *self)
2882{
2883 PyObject *value;
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00002884 char *s;
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002885 Py_ssize_t len;
2886
2887 if ((len = unpickler_readline(self, &s)) < 0)
2888 return -1;
2889 if (len < 2)
2890 return bad_readline();
2891
Mark Dickinson8dd05142009-01-20 20:43:58 +00002892 /* s[len-2] will usually be 'L' (and s[len-1] is '\n'); we need to remove
2893 the 'L' before calling PyLong_FromString. In order to maintain
2894 compatibility with Python 3.0.0, we don't actually *require*
2895 the 'L' to be present. */
2896 if (s[len-2] == 'L') {
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00002897 s[len-2] = '\0';
Mark Dickinson8dd05142009-01-20 20:43:58 +00002898 /* XXX: Should the base argument explicitly set to 10? */
Alexandre Vassalotti446f7ff2009-01-23 04:43:46 +00002899 value = PyLong_FromString(s, NULL, 0);
Mark Dickinson8dd05142009-01-20 20:43:58 +00002900 }
2901 else {
2902 value = PyLong_FromString(s, NULL, 0);
2903 }
2904 if (value == NULL)
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00002905 return -1;
2906
2907 PDATA_PUSH(self->stack, value, -1);
2908 return 0;
2909}
2910
2911/* 'size' bytes contain the # of bytes of little-endian 256's-complement
2912 * data following.
2913 */
2914static int
2915load_counted_long(UnpicklerObject *self, int size)
2916{
2917 PyObject *value;
2918 char *nbytes;
2919 char *pdata;
2920
2921 assert(size == 1 || size == 4);
2922 if (unpickler_read(self, &nbytes, size) < 0)
2923 return -1;
2924
2925 size = calc_binint(nbytes, size);
2926 if (size < 0) {
2927 /* Corrupt or hostile pickle -- we never write one like this */
2928 PyErr_SetString(UnpicklingError,
2929 "LONG pickle has negative byte count");
2930 return -1;
2931 }
2932
2933 if (size == 0)
2934 value = PyLong_FromLong(0L);
2935 else {
2936 /* Read the raw little-endian bytes and convert. */
2937 if (unpickler_read(self, &pdata, size) < 0)
2938 return -1;
2939 value = _PyLong_FromByteArray((unsigned char *)pdata, (size_t)size,
2940 1 /* little endian */ , 1 /* signed */ );
2941 }
2942 if (value == NULL)
2943 return -1;
2944 PDATA_PUSH(self->stack, value, -1);
2945 return 0;
2946}
2947
2948static int
2949load_float(UnpicklerObject *self)
2950{
2951 PyObject *value;
2952 char *endptr, *s;
2953 Py_ssize_t len;
2954 double d;
2955
2956 if ((len = unpickler_readline(self, &s)) < 0)
2957 return -1;
2958 if (len < 2)
2959 return bad_readline();
2960
2961 errno = 0;
2962 d = PyOS_ascii_strtod(s, &endptr);
2963
2964 if (errno || (endptr[0] != '\n') || (endptr[1] != '\0')) {
2965 PyErr_SetString(PyExc_ValueError, "could not convert string to float");
2966 return -1;
2967 }
2968
2969 if ((value = PyFloat_FromDouble(d)) == NULL)
2970 return -1;
2971
2972 PDATA_PUSH(self->stack, value, -1);
2973 return 0;
2974}
2975
2976static int
2977load_binfloat(UnpicklerObject *self)
2978{
2979 PyObject *value;
2980 double x;
2981 char *s;
2982
2983 if (unpickler_read(self, &s, 8) < 0)
2984 return -1;
2985
2986 x = _PyFloat_Unpack8((unsigned char *)s, 0);
2987 if (x == -1.0 && PyErr_Occurred())
2988 return -1;
2989
2990 if ((value = PyFloat_FromDouble(x)) == NULL)
2991 return -1;
2992
2993 PDATA_PUSH(self->stack, value, -1);
2994 return 0;
2995}
2996
2997static int
2998load_string(UnpicklerObject *self)
2999{
3000 PyObject *bytes;
3001 PyObject *str = NULL;
3002 Py_ssize_t len;
3003 char *s, *p;
3004
3005 if ((len = unpickler_readline(self, &s)) < 0)
3006 return -1;
3007 if (len < 3)
3008 return bad_readline();
3009 if ((s = strdup(s)) == NULL) {
3010 PyErr_NoMemory();
3011 return -1;
3012 }
3013
3014 /* Strip outermost quotes */
3015 while (s[len - 1] <= ' ')
3016 len--;
3017 if (s[0] == '"' && s[len - 1] == '"') {
3018 s[len - 1] = '\0';
3019 p = s + 1;
3020 len -= 2;
3021 }
3022 else if (s[0] == '\'' && s[len - 1] == '\'') {
3023 s[len - 1] = '\0';
3024 p = s + 1;
3025 len -= 2;
3026 }
3027 else {
3028 free(s);
3029 PyErr_SetString(PyExc_ValueError, "insecure string pickle");
3030 return -1;
3031 }
3032
3033 /* Use the PyBytes API to decode the string, since that is what is used
3034 to encode, and then coerce the result to Unicode. */
3035 bytes = PyBytes_DecodeEscape(p, len, NULL, 0, NULL);
3036 free(s);
3037 if (bytes == NULL)
3038 return -1;
3039 str = PyUnicode_FromEncodedObject(bytes, self->encoding, self->errors);
3040 Py_DECREF(bytes);
3041 if (str == NULL)
3042 return -1;
3043
3044 PDATA_PUSH(self->stack, str, -1);
3045 return 0;
3046}
3047
3048static int
3049load_binbytes(UnpicklerObject *self)
3050{
3051 PyObject *bytes;
3052 long x;
3053 char *s;
3054
3055 if (unpickler_read(self, &s, 4) < 0)
3056 return -1;
3057
3058 x = calc_binint(s, 4);
3059 if (x < 0) {
3060 PyErr_SetString(UnpicklingError,
3061 "BINBYTES pickle has negative byte count");
3062 return -1;
3063 }
3064
3065 if (unpickler_read(self, &s, x) < 0)
3066 return -1;
3067 bytes = PyBytes_FromStringAndSize(s, x);
3068 if (bytes == NULL)
3069 return -1;
3070
3071 PDATA_PUSH(self->stack, bytes, -1);
3072 return 0;
3073}
3074
3075static int
3076load_short_binbytes(UnpicklerObject *self)
3077{
3078 PyObject *bytes;
3079 unsigned char x;
3080 char *s;
3081
3082 if (unpickler_read(self, &s, 1) < 0)
3083 return -1;
3084
3085 x = (unsigned char)s[0];
3086
3087 if (unpickler_read(self, &s, x) < 0)
3088 return -1;
3089
3090 bytes = PyBytes_FromStringAndSize(s, x);
3091 if (bytes == NULL)
3092 return -1;
3093
3094 PDATA_PUSH(self->stack, bytes, -1);
3095 return 0;
3096}
3097
3098static int
3099load_binstring(UnpicklerObject *self)
3100{
3101 PyObject *str;
3102 long x;
3103 char *s;
3104
3105 if (unpickler_read(self, &s, 4) < 0)
3106 return -1;
3107
3108 x = calc_binint(s, 4);
3109 if (x < 0) {
3110 PyErr_SetString(UnpicklingError,
3111 "BINSTRING pickle has negative byte count");
3112 return -1;
3113 }
3114
3115 if (unpickler_read(self, &s, x) < 0)
3116 return -1;
3117
3118 /* Convert Python 2.x strings to unicode. */
3119 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
3120 if (str == NULL)
3121 return -1;
3122
3123 PDATA_PUSH(self->stack, str, -1);
3124 return 0;
3125}
3126
3127static int
3128load_short_binstring(UnpicklerObject *self)
3129{
3130 PyObject *str;
3131 unsigned char x;
3132 char *s;
3133
3134 if (unpickler_read(self, &s, 1) < 0)
3135 return -1;
3136
3137 x = (unsigned char)s[0];
3138
3139 if (unpickler_read(self, &s, x) < 0)
3140 return -1;
3141
3142 /* Convert Python 2.x strings to unicode. */
3143 str = PyUnicode_Decode(s, x, self->encoding, self->errors);
3144 if (str == NULL)
3145 return -1;
3146
3147 PDATA_PUSH(self->stack, str, -1);
3148 return 0;
3149}
3150
3151static int
3152load_unicode(UnpicklerObject *self)
3153{
3154 PyObject *str;
3155 Py_ssize_t len;
3156 char *s;
3157
3158 if ((len = unpickler_readline(self, &s)) < 0)
3159 return -1;
3160 if (len < 1)
3161 return bad_readline();
3162
3163 str = PyUnicode_DecodeRawUnicodeEscape(s, len - 1, NULL);
3164 if (str == NULL)
3165 return -1;
3166
3167 PDATA_PUSH(self->stack, str, -1);
3168 return 0;
3169}
3170
3171static int
3172load_binunicode(UnpicklerObject *self)
3173{
3174 PyObject *str;
3175 long size;
3176 char *s;
3177
3178 if (unpickler_read(self, &s, 4) < 0)
3179 return -1;
3180
3181 size = calc_binint(s, 4);
3182 if (size < 0) {
3183 PyErr_SetString(UnpicklingError,
3184 "BINUNICODE pickle has negative byte count");
3185 return -1;
3186 }
3187
3188 if (unpickler_read(self, &s, size) < 0)
3189 return -1;
3190
3191 str = PyUnicode_DecodeUTF8(s, size, NULL);
3192 if (str == NULL)
3193 return -1;
3194
3195 PDATA_PUSH(self->stack, str, -1);
3196 return 0;
3197}
3198
3199static int
3200load_tuple(UnpicklerObject *self)
3201{
3202 PyObject *tuple;
3203 int i;
3204
3205 if ((i = marker(self)) < 0)
3206 return -1;
3207
3208 tuple = Pdata_poptuple(self->stack, i);
3209 if (tuple == NULL)
3210 return -1;
3211 PDATA_PUSH(self->stack, tuple, -1);
3212 return 0;
3213}
3214
3215static int
3216load_counted_tuple(UnpicklerObject *self, int len)
3217{
3218 PyObject *tuple;
3219
3220 tuple = PyTuple_New(len);
3221 if (tuple == NULL)
3222 return -1;
3223
3224 while (--len >= 0) {
3225 PyObject *item;
3226
3227 PDATA_POP(self->stack, item);
3228 if (item == NULL)
3229 return -1;
3230 PyTuple_SET_ITEM(tuple, len, item);
3231 }
3232 PDATA_PUSH(self->stack, tuple, -1);
3233 return 0;
3234}
3235
3236static int
3237load_empty_list(UnpicklerObject *self)
3238{
3239 PyObject *list;
3240
3241 if ((list = PyList_New(0)) == NULL)
3242 return -1;
3243 PDATA_PUSH(self->stack, list, -1);
3244 return 0;
3245}
3246
3247static int
3248load_empty_dict(UnpicklerObject *self)
3249{
3250 PyObject *dict;
3251
3252 if ((dict = PyDict_New()) == NULL)
3253 return -1;
3254 PDATA_PUSH(self->stack, dict, -1);
3255 return 0;
3256}
3257
3258static int
3259load_list(UnpicklerObject *self)
3260{
3261 PyObject *list;
3262 int i;
3263
3264 if ((i = marker(self)) < 0)
3265 return -1;
3266
3267 list = Pdata_poplist(self->stack, i);
3268 if (list == NULL)
3269 return -1;
3270 PDATA_PUSH(self->stack, list, -1);
3271 return 0;
3272}
3273
3274static int
3275load_dict(UnpicklerObject *self)
3276{
3277 PyObject *dict, *key, *value;
3278 int i, j, k;
3279
3280 if ((i = marker(self)) < 0)
3281 return -1;
3282 j = self->stack->length;
3283
3284 if ((dict = PyDict_New()) == NULL)
3285 return -1;
3286
3287 for (k = i + 1; k < j; k += 2) {
3288 key = self->stack->data[k - 1];
3289 value = self->stack->data[k];
3290 if (PyDict_SetItem(dict, key, value) < 0) {
3291 Py_DECREF(dict);
3292 return -1;
3293 }
3294 }
3295 Pdata_clear(self->stack, i);
3296 PDATA_PUSH(self->stack, dict, -1);
3297 return 0;
3298}
3299
3300static PyObject *
3301instantiate(PyObject *cls, PyObject *args)
3302{
3303 PyObject *r = NULL;
3304
3305 /* XXX: The pickle.py module does not create instances this way when the
3306 args tuple is empty. See Unpickler._instantiate(). */
3307 if ((r = PyObject_CallObject(cls, args)))
3308 return r;
3309
3310 /* XXX: Is this still nescessary? */
3311 {
3312 PyObject *tp, *v, *tb, *tmp_value;
3313
3314 PyErr_Fetch(&tp, &v, &tb);
3315 tmp_value = v;
3316 /* NULL occurs when there was a KeyboardInterrupt */
3317 if (tmp_value == NULL)
3318 tmp_value = Py_None;
3319 if ((r = PyTuple_Pack(3, tmp_value, cls, args))) {
3320 Py_XDECREF(v);
3321 v = r;
3322 }
3323 PyErr_Restore(tp, v, tb);
3324 }
3325 return NULL;
3326}
3327
3328static int
3329load_obj(UnpicklerObject *self)
3330{
3331 PyObject *cls, *args, *obj = NULL;
3332 int i;
3333
3334 if ((i = marker(self)) < 0)
3335 return -1;
3336
3337 args = Pdata_poptuple(self->stack, i + 1);
3338 if (args == NULL)
3339 return -1;
3340
3341 PDATA_POP(self->stack, cls);
3342 if (cls) {
3343 obj = instantiate(cls, args);
3344 Py_DECREF(cls);
3345 }
3346 Py_DECREF(args);
3347 if (obj == NULL)
3348 return -1;
3349
3350 PDATA_PUSH(self->stack, obj, -1);
3351 return 0;
3352}
3353
3354static int
3355load_inst(UnpicklerObject *self)
3356{
3357 PyObject *cls = NULL;
3358 PyObject *args = NULL;
3359 PyObject *obj = NULL;
3360 PyObject *module_name;
3361 PyObject *class_name;
3362 Py_ssize_t len;
3363 int i;
3364 char *s;
3365
3366 if ((i = marker(self)) < 0)
3367 return -1;
3368 if ((len = unpickler_readline(self, &s)) < 0)
3369 return -1;
3370 if (len < 2)
3371 return bad_readline();
3372
3373 /* Here it is safe to use PyUnicode_DecodeASCII(), even though non-ASCII
3374 identifiers are permitted in Python 3.0, since the INST opcode is only
3375 supported by older protocols on Python 2.x. */
3376 module_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
3377 if (module_name == NULL)
3378 return -1;
3379
3380 if ((len = unpickler_readline(self, &s)) >= 0) {
3381 if (len < 2)
3382 return bad_readline();
3383 class_name = PyUnicode_DecodeASCII(s, len - 1, "strict");
3384 if (class_name == NULL) {
3385 cls = find_class(self, module_name, class_name);
3386 Py_DECREF(class_name);
3387 }
3388 }
3389 Py_DECREF(module_name);
3390
3391 if (cls == NULL)
3392 return -1;
3393
3394 if ((args = Pdata_poptuple(self->stack, i)) != NULL) {
3395 obj = instantiate(cls, args);
3396 Py_DECREF(args);
3397 }
3398 Py_DECREF(cls);
3399
3400 if (obj == NULL)
3401 return -1;
3402
3403 PDATA_PUSH(self->stack, obj, -1);
3404 return 0;
3405}
3406
3407static int
3408load_newobj(UnpicklerObject *self)
3409{
3410 PyObject *args = NULL;
3411 PyObject *clsraw = NULL;
3412 PyTypeObject *cls; /* clsraw cast to its true type */
3413 PyObject *obj;
3414
3415 /* Stack is ... cls argtuple, and we want to call
3416 * cls.__new__(cls, *argtuple).
3417 */
3418 PDATA_POP(self->stack, args);
3419 if (args == NULL)
3420 goto error;
3421 if (!PyTuple_Check(args)) {
3422 PyErr_SetString(UnpicklingError, "NEWOBJ expected an arg " "tuple.");
3423 goto error;
3424 }
3425
3426 PDATA_POP(self->stack, clsraw);
3427 cls = (PyTypeObject *)clsraw;
3428 if (cls == NULL)
3429 goto error;
3430 if (!PyType_Check(cls)) {
3431 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
3432 "isn't a type object");
3433 goto error;
3434 }
3435 if (cls->tp_new == NULL) {
3436 PyErr_SetString(UnpicklingError, "NEWOBJ class argument "
3437 "has NULL tp_new");
3438 goto error;
3439 }
3440
3441 /* Call __new__. */
3442 obj = cls->tp_new(cls, args, NULL);
3443 if (obj == NULL)
3444 goto error;
3445
3446 Py_DECREF(args);
3447 Py_DECREF(clsraw);
3448 PDATA_PUSH(self->stack, obj, -1);
3449 return 0;
3450
3451 error:
3452 Py_XDECREF(args);
3453 Py_XDECREF(clsraw);
3454 return -1;
3455}
3456
3457static int
3458load_global(UnpicklerObject *self)
3459{
3460 PyObject *global = NULL;
3461 PyObject *module_name;
3462 PyObject *global_name;
3463 Py_ssize_t len;
3464 char *s;
3465
3466 if ((len = unpickler_readline(self, &s)) < 0)
3467 return -1;
3468 if (len < 2)
3469 return bad_readline();
3470 module_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
3471 if (!module_name)
3472 return -1;
3473
3474 if ((len = unpickler_readline(self, &s)) >= 0) {
3475 if (len < 2) {
3476 Py_DECREF(module_name);
3477 return bad_readline();
3478 }
3479 global_name = PyUnicode_DecodeUTF8(s, len - 1, "strict");
3480 if (global_name) {
3481 global = find_class(self, module_name, global_name);
3482 Py_DECREF(global_name);
3483 }
3484 }
3485 Py_DECREF(module_name);
3486
3487 if (global == NULL)
3488 return -1;
3489 PDATA_PUSH(self->stack, global, -1);
3490 return 0;
3491}
3492
3493static int
3494load_persid(UnpicklerObject *self)
3495{
3496 PyObject *pid;
3497 Py_ssize_t len;
3498 char *s;
3499
3500 if (self->pers_func) {
3501 if ((len = unpickler_readline(self, &s)) < 0)
3502 return -1;
3503 if (len < 2)
3504 return bad_readline();
3505
3506 pid = PyBytes_FromStringAndSize(s, len - 1);
3507 if (pid == NULL)
3508 return -1;
3509
3510 /* Ugh... this does not leak since unpickler_call() steals the
3511 reference to pid first. */
3512 pid = unpickler_call(self, self->pers_func, pid);
3513 if (pid == NULL)
3514 return -1;
3515
3516 PDATA_PUSH(self->stack, pid, -1);
3517 return 0;
3518 }
3519 else {
3520 PyErr_SetString(UnpicklingError,
3521 "A load persistent id instruction was encountered,\n"
3522 "but no persistent_load function was specified.");
3523 return -1;
3524 }
3525}
3526
3527static int
3528load_binpersid(UnpicklerObject *self)
3529{
3530 PyObject *pid;
3531
3532 if (self->pers_func) {
3533 PDATA_POP(self->stack, pid);
3534 if (pid == NULL)
3535 return -1;
3536
3537 /* Ugh... this does not leak since unpickler_call() steals the
3538 reference to pid first. */
3539 pid = unpickler_call(self, self->pers_func, pid);
3540 if (pid == NULL)
3541 return -1;
3542
3543 PDATA_PUSH(self->stack, pid, -1);
3544 return 0;
3545 }
3546 else {
3547 PyErr_SetString(UnpicklingError,
3548 "A load persistent id instruction was encountered,\n"
3549 "but no persistent_load function was specified.");
3550 return -1;
3551 }
3552}
3553
3554static int
3555load_pop(UnpicklerObject *self)
3556{
3557 int len;
3558
3559 if ((len = self->stack->length) <= 0)
3560 return stack_underflow();
3561
3562 /* Note that we split the (pickle.py) stack into two stacks,
3563 * an object stack and a mark stack. We have to be clever and
3564 * pop the right one. We do this by looking at the top of the
3565 * mark stack.
3566 */
3567
3568 if ((self->num_marks > 0) && (self->marks[self->num_marks - 1] == len))
3569 self->num_marks--;
3570 else {
3571 len--;
3572 Py_DECREF(self->stack->data[len]);
3573 self->stack->length = len;
3574 }
3575
3576 return 0;
3577}
3578
3579static int
3580load_pop_mark(UnpicklerObject *self)
3581{
3582 int i;
3583
3584 if ((i = marker(self)) < 0)
3585 return -1;
3586
3587 Pdata_clear(self->stack, i);
3588
3589 return 0;
3590}
3591
3592static int
3593load_dup(UnpicklerObject *self)
3594{
3595 PyObject *last;
3596 int len;
3597
3598 if ((len = self->stack->length) <= 0)
3599 return stack_underflow();
3600 last = self->stack->data[len - 1];
3601 PDATA_APPEND(self->stack, last, -1);
3602 return 0;
3603}
3604
3605static int
3606load_get(UnpicklerObject *self)
3607{
3608 PyObject *key, *value;
3609 Py_ssize_t len;
3610 char *s;
3611
3612 if ((len = unpickler_readline(self, &s)) < 0)
3613 return -1;
3614 if (len < 2)
3615 return bad_readline();
3616
3617 key = PyLong_FromString(s, NULL, 10);
3618 if (key == NULL)
3619 return -1;
3620
3621 value = PyDict_GetItemWithError(self->memo, key);
3622 if (value == NULL) {
3623 if (!PyErr_Occurred())
3624 PyErr_SetObject(PyExc_KeyError, key);
3625 Py_DECREF(key);
3626 return -1;
3627 }
3628 Py_DECREF(key);
3629
3630 PDATA_APPEND(self->stack, value, -1);
3631 return 0;
3632}
3633
3634static int
3635load_binget(UnpicklerObject *self)
3636{
3637 PyObject *key, *value;
3638 char *s;
3639
3640 if (unpickler_read(self, &s, 1) < 0)
3641 return -1;
3642
3643 /* Here, the unsigned cast is necessary to avoid negative values. */
3644 key = PyLong_FromLong((long)(unsigned char)s[0]);
3645 if (key == NULL)
3646 return -1;
3647
3648 value = PyDict_GetItemWithError(self->memo, key);
3649 if (value == NULL) {
3650 if (!PyErr_Occurred())
3651 PyErr_SetObject(PyExc_KeyError, key);
3652 Py_DECREF(key);
3653 return -1;
3654 }
3655 Py_DECREF(key);
3656
3657 PDATA_APPEND(self->stack, value, -1);
3658 return 0;
3659}
3660
3661static int
3662load_long_binget(UnpicklerObject *self)
3663{
3664 PyObject *key, *value;
3665 char *s;
3666 long k;
3667
3668 if (unpickler_read(self, &s, 4) < 0)
3669 return -1;
3670
3671 k = (long)(unsigned char)s[0];
3672 k |= (long)(unsigned char)s[1] << 8;
3673 k |= (long)(unsigned char)s[2] << 16;
3674 k |= (long)(unsigned char)s[3] << 24;
3675
3676 key = PyLong_FromLong(k);
3677 if (key == NULL)
3678 return -1;
3679
3680 value = PyDict_GetItemWithError(self->memo, key);
3681 if (value == NULL) {
3682 if (!PyErr_Occurred())
3683 PyErr_SetObject(PyExc_KeyError, key);
3684 Py_DECREF(key);
3685 return -1;
3686 }
3687 Py_DECREF(key);
3688
3689 PDATA_APPEND(self->stack, value, -1);
3690 return 0;
3691}
3692
3693/* Push an object from the extension registry (EXT[124]). nbytes is
3694 * the number of bytes following the opcode, holding the index (code) value.
3695 */
3696static int
3697load_extension(UnpicklerObject *self, int nbytes)
3698{
3699 char *codebytes; /* the nbytes bytes after the opcode */
3700 long code; /* calc_binint returns long */
3701 PyObject *py_code; /* code as a Python int */
3702 PyObject *obj; /* the object to push */
3703 PyObject *pair; /* (module_name, class_name) */
3704 PyObject *module_name, *class_name;
3705
3706 assert(nbytes == 1 || nbytes == 2 || nbytes == 4);
3707 if (unpickler_read(self, &codebytes, nbytes) < 0)
3708 return -1;
3709 code = calc_binint(codebytes, nbytes);
3710 if (code <= 0) { /* note that 0 is forbidden */
3711 /* Corrupt or hostile pickle. */
3712 PyErr_SetString(UnpicklingError, "EXT specifies code <= 0");
3713 return -1;
3714 }
3715
3716 /* Look for the code in the cache. */
3717 py_code = PyLong_FromLong(code);
3718 if (py_code == NULL)
3719 return -1;
3720 obj = PyDict_GetItem(extension_cache, py_code);
3721 if (obj != NULL) {
3722 /* Bingo. */
3723 Py_DECREF(py_code);
3724 PDATA_APPEND(self->stack, obj, -1);
3725 return 0;
3726 }
3727
3728 /* Look up the (module_name, class_name) pair. */
3729 pair = PyDict_GetItem(inverted_registry, py_code);
3730 if (pair == NULL) {
3731 Py_DECREF(py_code);
3732 PyErr_Format(PyExc_ValueError, "unregistered extension "
3733 "code %ld", code);
3734 return -1;
3735 }
3736 /* Since the extension registry is manipulable via Python code,
3737 * confirm that pair is really a 2-tuple of strings.
3738 */
3739 if (!PyTuple_Check(pair) || PyTuple_Size(pair) != 2 ||
3740 !PyUnicode_Check(module_name = PyTuple_GET_ITEM(pair, 0)) ||
3741 !PyUnicode_Check(class_name = PyTuple_GET_ITEM(pair, 1))) {
3742 Py_DECREF(py_code);
3743 PyErr_Format(PyExc_ValueError, "_inverted_registry[%ld] "
3744 "isn't a 2-tuple of strings", code);
3745 return -1;
3746 }
3747 /* Load the object. */
3748 obj = find_class(self, module_name, class_name);
3749 if (obj == NULL) {
3750 Py_DECREF(py_code);
3751 return -1;
3752 }
3753 /* Cache code -> obj. */
3754 code = PyDict_SetItem(extension_cache, py_code, obj);
3755 Py_DECREF(py_code);
3756 if (code < 0) {
3757 Py_DECREF(obj);
3758 return -1;
3759 }
3760 PDATA_PUSH(self->stack, obj, -1);
3761 return 0;
3762}
3763
3764static int
3765load_put(UnpicklerObject *self)
3766{
3767 PyObject *key, *value;
3768 Py_ssize_t len;
3769 char *s;
3770 int x;
3771
3772 if ((len = unpickler_readline(self, &s)) < 0)
3773 return -1;
3774 if (len < 2)
3775 return bad_readline();
3776 if ((x = self->stack->length) <= 0)
3777 return stack_underflow();
3778
3779 key = PyLong_FromString(s, NULL, 10);
3780 if (key == NULL)
3781 return -1;
3782 value = self->stack->data[x - 1];
3783
3784 x = PyDict_SetItem(self->memo, key, value);
3785 Py_DECREF(key);
3786 return x;
3787}
3788
3789static int
3790load_binput(UnpicklerObject *self)
3791{
3792 PyObject *key, *value;
3793 char *s;
3794 int x;
3795
3796 if (unpickler_read(self, &s, 1) < 0)
3797 return -1;
3798 if ((x = self->stack->length) <= 0)
3799 return stack_underflow();
3800
3801 key = PyLong_FromLong((long)(unsigned char)s[0]);
3802 if (key == NULL)
3803 return -1;
3804 value = self->stack->data[x - 1];
3805
3806 x = PyDict_SetItem(self->memo, key, value);
3807 Py_DECREF(key);
3808 return x;
3809}
3810
3811static int
3812load_long_binput(UnpicklerObject *self)
3813{
3814 PyObject *key, *value;
3815 long k;
3816 char *s;
3817 int x;
3818
3819 if (unpickler_read(self, &s, 4) < 0)
3820 return -1;
3821 if ((x = self->stack->length) <= 0)
3822 return stack_underflow();
3823
3824 k = (long)(unsigned char)s[0];
3825 k |= (long)(unsigned char)s[1] << 8;
3826 k |= (long)(unsigned char)s[2] << 16;
3827 k |= (long)(unsigned char)s[3] << 24;
3828
3829 key = PyLong_FromLong(k);
3830 if (key == NULL)
3831 return -1;
3832 value = self->stack->data[x - 1];
3833
3834 x = PyDict_SetItem(self->memo, key, value);
3835 Py_DECREF(key);
3836 return x;
3837}
3838
3839static int
3840do_append(UnpicklerObject *self, int x)
3841{
3842 PyObject *value;
3843 PyObject *list;
3844 int len, i;
3845
3846 len = self->stack->length;
3847 if (x > len || x <= 0)
3848 return stack_underflow();
3849 if (len == x) /* nothing to do */
3850 return 0;
3851
3852 list = self->stack->data[x - 1];
3853
3854 if (PyList_Check(list)) {
3855 PyObject *slice;
3856 Py_ssize_t list_len;
3857
3858 slice = Pdata_poplist(self->stack, x);
3859 if (!slice)
3860 return -1;
3861 list_len = PyList_GET_SIZE(list);
3862 i = PyList_SetSlice(list, list_len, list_len, slice);
3863 Py_DECREF(slice);
3864 return i;
3865 }
3866 else {
3867 PyObject *append_func;
3868
3869 append_func = PyObject_GetAttrString(list, "append");
3870 if (append_func == NULL)
3871 return -1;
3872 for (i = x; i < len; i++) {
3873 PyObject *result;
3874
3875 value = self->stack->data[i];
3876 result = unpickler_call(self, append_func, value);
3877 if (result == NULL) {
3878 Pdata_clear(self->stack, i + 1);
3879 self->stack->length = x;
3880 return -1;
3881 }
3882 Py_DECREF(result);
3883 }
3884 self->stack->length = x;
3885 }
3886
3887 return 0;
3888}
3889
3890static int
3891load_append(UnpicklerObject *self)
3892{
3893 return do_append(self, self->stack->length - 1);
3894}
3895
3896static int
3897load_appends(UnpicklerObject *self)
3898{
3899 return do_append(self, marker(self));
3900}
3901
3902static int
3903do_setitems(UnpicklerObject *self, int x)
3904{
3905 PyObject *value, *key;
3906 PyObject *dict;
3907 int len, i;
3908 int status = 0;
3909
3910 len = self->stack->length;
3911 if (x > len || x <= 0)
3912 return stack_underflow();
3913 if (len == x) /* nothing to do */
3914 return 0;
3915 if ((len - x) % 2 != 0) {
3916 /* Currupt or hostile pickle -- we never write one like this. */
3917 PyErr_SetString(UnpicklingError, "odd number of items for SETITEMS");
3918 return -1;
3919 }
3920
3921 /* Here, dict does not actually need to be a PyDict; it could be anything
3922 that supports the __setitem__ attribute. */
3923 dict = self->stack->data[x - 1];
3924
3925 for (i = x + 1; i < len; i += 2) {
3926 key = self->stack->data[i - 1];
3927 value = self->stack->data[i];
3928 if (PyObject_SetItem(dict, key, value) < 0) {
3929 status = -1;
3930 break;
3931 }
3932 }
3933
3934 Pdata_clear(self->stack, x);
3935 return status;
3936}
3937
3938static int
3939load_setitem(UnpicklerObject *self)
3940{
3941 return do_setitems(self, self->stack->length - 2);
3942}
3943
3944static int
3945load_setitems(UnpicklerObject *self)
3946{
3947 return do_setitems(self, marker(self));
3948}
3949
3950static int
3951load_build(UnpicklerObject *self)
3952{
3953 PyObject *state, *inst, *slotstate;
3954 PyObject *setstate;
3955 int status = 0;
3956
3957 /* Stack is ... instance, state. We want to leave instance at
3958 * the stack top, possibly mutated via instance.__setstate__(state).
3959 */
3960 if (self->stack->length < 2)
3961 return stack_underflow();
3962
3963 PDATA_POP(self->stack, state);
3964 if (state == NULL)
3965 return -1;
3966
3967 inst = self->stack->data[self->stack->length - 1];
3968
3969 setstate = PyObject_GetAttrString(inst, "__setstate__");
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00003970 if (setstate == NULL) {
3971 if (PyErr_ExceptionMatches(PyExc_AttributeError))
3972 PyErr_Clear();
Antoine Pitroud79dc622008-09-05 00:03:33 +00003973 else {
3974 Py_DECREF(state);
Alexandre Vassalotti1f9d9072008-08-15 03:07:47 +00003975 return -1;
Antoine Pitroud79dc622008-09-05 00:03:33 +00003976 }
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003977 }
3978 else {
3979 PyObject *result;
3980
3981 /* The explicit __setstate__ is responsible for everything. */
Antoine Pitroud79dc622008-09-05 00:03:33 +00003982 /* Ugh... this does not leak since unpickler_call() steals the
3983 reference to state first. */
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00003984 result = unpickler_call(self, setstate, state);
3985 Py_DECREF(setstate);
3986 if (result == NULL)
3987 return -1;
3988 Py_DECREF(result);
3989 return 0;
3990 }
3991
3992 /* A default __setstate__. First see whether state embeds a
3993 * slot state dict too (a proto 2 addition).
3994 */
3995 if (PyTuple_Check(state) && Py_SIZE(state) == 2) {
3996 PyObject *tmp = state;
3997
3998 state = PyTuple_GET_ITEM(tmp, 0);
3999 slotstate = PyTuple_GET_ITEM(tmp, 1);
4000 Py_INCREF(state);
4001 Py_INCREF(slotstate);
4002 Py_DECREF(tmp);
4003 }
4004 else
4005 slotstate = NULL;
4006
4007 /* Set inst.__dict__ from the state dict (if any). */
4008 if (state != Py_None) {
4009 PyObject *dict;
4010
4011 if (!PyDict_Check(state)) {
4012 PyErr_SetString(UnpicklingError, "state is not a dictionary");
4013 goto error;
4014 }
4015 dict = PyObject_GetAttrString(inst, "__dict__");
4016 if (dict == NULL)
4017 goto error;
4018
4019 PyDict_Update(dict, state);
4020 Py_DECREF(dict);
4021 }
4022
4023 /* Also set instance attributes from the slotstate dict (if any). */
4024 if (slotstate != NULL) {
4025 PyObject *d_key, *d_value;
4026 Py_ssize_t i;
4027
4028 if (!PyDict_Check(slotstate)) {
4029 PyErr_SetString(UnpicklingError,
4030 "slot state is not a dictionary");
4031 goto error;
4032 }
4033 i = 0;
4034 while (PyDict_Next(slotstate, &i, &d_key, &d_value)) {
4035 if (PyObject_SetAttr(inst, d_key, d_value) < 0)
4036 goto error;
4037 }
4038 }
4039
4040 if (0) {
4041 error:
4042 status = -1;
4043 }
4044
4045 Py_DECREF(state);
4046 Py_XDECREF(slotstate);
4047 return status;
4048}
4049
4050static int
4051load_mark(UnpicklerObject *self)
4052{
4053
4054 /* Note that we split the (pickle.py) stack into two stacks, an
4055 * object stack and a mark stack. Here we push a mark onto the
4056 * mark stack.
4057 */
4058
4059 if ((self->num_marks + 1) >= self->marks_size) {
4060 size_t alloc;
4061 int *marks;
4062
4063 /* Use the size_t type to check for overflow. */
4064 alloc = ((size_t)self->num_marks << 1) + 20;
Alexandre Vassalotti7634ff52008-06-13 02:16:06 +00004065 if (alloc > PY_SSIZE_T_MAX ||
4066 alloc <= ((size_t)self->num_marks + 1)) {
Alexandre Vassalottica2d6102008-06-12 18:26:05 +00004067 PyErr_NoMemory();
4068 return -1;
4069 }
4070
4071 if (self->marks == NULL)
4072 marks = (int *)PyMem_Malloc(alloc * sizeof(int));
4073 else
4074 marks = (int *)PyMem_Realloc(self->marks, alloc * sizeof(int));
4075 if (marks == NULL) {
4076 PyErr_NoMemory();
4077 return -1;
4078 }
4079 self->marks = marks;
4080 self->marks_size = (Py_ssize_t)alloc;
4081 }
4082
4083 self->marks[self->num_marks++] = self->stack->length;
4084
4085 return 0;
4086}
4087
4088static int
4089load_reduce(UnpicklerObject *self)
4090{
4091 PyObject *callable = NULL;
4092 PyObject *argtup = NULL;
4093 PyObject *obj = NULL;
4094
4095 PDATA_POP(self->stack, argtup);
4096 if (argtup == NULL)
4097 return -1;
4098 PDATA_POP(self->stack, callable);
4099 if (callable) {
4100 obj = instantiate(callable, argtup);
4101 Py_DECREF(callable);
4102 }
4103 Py_DECREF(argtup);
4104
4105 if (obj == NULL)
4106 return -1;
4107
4108 PDATA_PUSH(self->stack, obj, -1);
4109 return 0;
4110}
4111
4112/* Just raises an error if we don't know the protocol specified. PROTO
4113 * is the first opcode for protocols >= 2.
4114 */
4115static int
4116load_proto(UnpicklerObject *self)
4117{
4118 char *s;
4119 int i;
4120
4121 if (unpickler_read(self, &s, 1) < 0)
4122 return -1;
4123
4124 i = (unsigned char)s[0];
4125 if (i <= HIGHEST_PROTOCOL)
4126 return 0;
4127
4128 PyErr_Format(PyExc_ValueError, "unsupported pickle protocol: %d", i);
4129 return -1;
4130}
4131
4132static PyObject *
4133load(UnpicklerObject *self)
4134{
4135 PyObject *err;
4136 PyObject *value = NULL;
4137 char *s;
4138
4139 self->num_marks = 0;
4140 if (self->stack->length)
4141 Pdata_clear(self->stack, 0);
4142
4143 /* Convenient macros for the dispatch while-switch loop just below. */
4144#define OP(opcode, load_func) \
4145 case opcode: if (load_func(self) < 0) break; continue;
4146
4147#define OP_ARG(opcode, load_func, arg) \
4148 case opcode: if (load_func(self, (arg)) < 0) break; continue;
4149
4150 while (1) {
4151 if (unpickler_read(self, &s, 1) < 0)
4152 break;
4153
4154 switch ((enum opcode)s[0]) {
4155 OP(NONE, load_none)
4156 OP(BININT, load_binint)
4157 OP(BININT1, load_binint1)
4158 OP(BININT2, load_binint2)
4159 OP(INT, load_int)
4160 OP(LONG, load_long)
4161 OP_ARG(LONG1, load_counted_long, 1)
4162 OP_ARG(LONG4, load_counted_long, 4)
4163 OP(FLOAT, load_float)
4164 OP(BINFLOAT, load_binfloat)
4165 OP(BINBYTES, load_binbytes)
4166 OP(SHORT_BINBYTES, load_short_binbytes)
4167 OP(BINSTRING, load_binstring)
4168 OP(SHORT_BINSTRING, load_short_binstring)
4169 OP(STRING, load_string)
4170 OP(UNICODE, load_unicode)
4171 OP(BINUNICODE, load_binunicode)
4172 OP_ARG(EMPTY_TUPLE, load_counted_tuple, 0)
4173 OP_ARG(TUPLE1, load_counted_tuple, 1)
4174 OP_ARG(TUPLE2, load_counted_tuple, 2)
4175 OP_ARG(TUPLE3, load_counted_tuple, 3)
4176 OP(TUPLE, load_tuple)
4177 OP(EMPTY_LIST, load_empty_list)
4178 OP(LIST, load_list)
4179 OP(EMPTY_DICT, load_empty_dict)
4180 OP(DICT, load_dict)
4181 OP(OBJ, load_obj)
4182 OP(INST, load_inst)
4183 OP(NEWOBJ, load_newobj)
4184 OP(GLOBAL, load_global)
4185 OP(APPEND, load_append)
4186 OP(APPENDS, load_appends)
4187 OP(BUILD, load_build)
4188 OP(DUP, load_dup)
4189 OP(BINGET, load_binget)
4190 OP(LONG_BINGET, load_long_binget)
4191 OP(GET, load_get)
4192 OP(MARK, load_mark)
4193 OP(BINPUT, load_binput)
4194 OP(LONG_BINPUT, load_long_binput)
4195 OP(PUT, load_put)
4196 OP(POP, load_pop)
4197 OP(POP_MARK, load_pop_mark)
4198 OP(SETITEM, load_setitem)
4199 OP(SETITEMS, load_setitems)
4200 OP(PERSID, load_persid)
4201 OP(BINPERSID, load_binpersid)
4202 OP(REDUCE, load_reduce)
4203 OP(PROTO, load_proto)
4204 OP_ARG(EXT1, load_extension, 1)
4205 OP_ARG(EXT2, load_extension, 2)
4206 OP_ARG(EXT4, load_extension, 4)
4207 OP_ARG(NEWTRUE, load_bool, Py_True)
4208 OP_ARG(NEWFALSE, load_bool, Py_False)
4209
4210 case STOP:
4211 break;
4212
4213 case '\0':
4214 PyErr_SetNone(PyExc_EOFError);
4215 return NULL;
4216
4217 default:
4218 PyErr_Format(UnpicklingError,
4219 "invalid load key, '%c'.", s[0]);
4220 return NULL;
4221 }
4222
4223 break; /* and we are done! */
4224 }
4225
4226 /* XXX: It is not clear what this is actually for. */
4227 if ((err = PyErr_Occurred())) {
4228 if (err == PyExc_EOFError) {
4229 PyErr_SetNone(PyExc_EOFError);
4230 }
4231 return NULL;
4232 }
4233
4234 PDATA_POP(self->stack, value);
4235 return value;
4236}
4237
4238PyDoc_STRVAR(Unpickler_load_doc,
4239"load() -> object. Load a pickle."
4240"\n"
4241"Read a pickled object representation from the open file object given in\n"
4242"the constructor, and return the reconstituted object hierarchy specified\n"
4243"therein.\n");
4244
4245static PyObject *
4246Unpickler_load(UnpicklerObject *self)
4247{
4248 /* Check whether the Unpickler was initialized correctly. This prevents
4249 segfaulting if a subclass overridden __init__ with a function that does
4250 not call Unpickler.__init__(). Here, we simply ensure that self->read
4251 is not NULL. */
4252 if (self->read == NULL) {
4253 PyErr_Format(UnpicklingError,
4254 "Unpickler.__init__() was not called by %s.__init__()",
4255 Py_TYPE(self)->tp_name);
4256 return NULL;
4257 }
4258
4259 return load(self);
4260}
4261
4262/* The name of find_class() is misleading. In newer pickle protocols, this
4263 function is used for loading any global (i.e., functions), not just
4264 classes. The name is kept only for backward compatibility. */
4265
4266PyDoc_STRVAR(Unpickler_find_class_doc,
4267"find_class(module_name, global_name) -> object.\n"
4268"\n"
4269"Return an object from a specified module, importing the module if\n"
4270"necessary. Subclasses may override this method (e.g. to restrict\n"
4271"unpickling of arbitrary classes and functions).\n"
4272"\n"
4273"This method is called whenever a class or a function object is\n"
4274"needed. Both arguments passed are str objects.\n");
4275
4276static PyObject *
4277Unpickler_find_class(UnpicklerObject *self, PyObject *args)
4278{
4279 PyObject *global;
4280 PyObject *modules_dict;
4281 PyObject *module;
4282 PyObject *module_name, *global_name;
4283
4284 if (!PyArg_UnpackTuple(args, "find_class", 2, 2,
4285 &module_name, &global_name))
4286 return NULL;
4287
4288 modules_dict = PySys_GetObject("modules");
4289 if (modules_dict == NULL)
4290 return NULL;
4291
4292 module = PyDict_GetItem(modules_dict, module_name);
4293 if (module == NULL) {
4294 module = PyImport_Import(module_name);
4295 if (module == NULL)
4296 return NULL;
4297 global = PyObject_GetAttr(module, global_name);
4298 Py_DECREF(module);
4299 }
4300 else {
4301 global = PyObject_GetAttr(module, global_name);
4302 }
4303 return global;
4304}
4305
4306static struct PyMethodDef Unpickler_methods[] = {
4307 {"load", (PyCFunction)Unpickler_load, METH_NOARGS,
4308 Unpickler_load_doc},
4309 {"find_class", (PyCFunction)Unpickler_find_class, METH_VARARGS,
4310 Unpickler_find_class_doc},
4311 {NULL, NULL} /* sentinel */
4312};
4313
4314static void
4315Unpickler_dealloc(UnpicklerObject *self)
4316{
4317 PyObject_GC_UnTrack((PyObject *)self);
4318 Py_XDECREF(self->readline);
4319 Py_XDECREF(self->read);
4320 Py_XDECREF(self->memo);
4321 Py_XDECREF(self->stack);
4322 Py_XDECREF(self->pers_func);
4323 Py_XDECREF(self->arg);
4324 Py_XDECREF(self->last_string);
4325
4326 PyMem_Free(self->marks);
4327 free(self->encoding);
4328 free(self->errors);
4329
4330 Py_TYPE(self)->tp_free((PyObject *)self);
4331}
4332
4333static int
4334Unpickler_traverse(UnpicklerObject *self, visitproc visit, void *arg)
4335{
4336 Py_VISIT(self->readline);
4337 Py_VISIT(self->read);
4338 Py_VISIT(self->memo);
4339 Py_VISIT(self->stack);
4340 Py_VISIT(self->pers_func);
4341 Py_VISIT(self->arg);
4342 Py_VISIT(self->last_string);
4343 return 0;
4344}
4345
4346static int
4347Unpickler_clear(UnpicklerObject *self)
4348{
4349 Py_CLEAR(self->readline);
4350 Py_CLEAR(self->read);
4351 Py_CLEAR(self->memo);
4352 Py_CLEAR(self->stack);
4353 Py_CLEAR(self->pers_func);
4354 Py_CLEAR(self->arg);
4355 Py_CLEAR(self->last_string);
4356
4357 PyMem_Free(self->marks);
4358 self->marks = NULL;
4359 free(self->encoding);
4360 self->encoding = NULL;
4361 free(self->errors);
4362 self->errors = NULL;
4363
4364 return 0;
4365}
4366
4367PyDoc_STRVAR(Unpickler_doc,
4368"Unpickler(file, *, encoding='ASCII', errors='strict')"
4369"\n"
4370"This takes a binary file for reading a pickle data stream.\n"
4371"\n"
4372"The protocol version of the pickle is detected automatically, so no\n"
4373"proto argument is needed.\n"
4374"\n"
4375"The file-like object must have two methods, a read() method\n"
4376"that takes an integer argument, and a readline() method that\n"
4377"requires no arguments. Both methods should return bytes.\n"
4378"Thus file-like object can be a binary file object opened for\n"
4379"reading, a BytesIO object, or any other custom object that\n"
4380"meets this interface.\n"
4381"\n"
4382"Optional keyword arguments are encoding and errors, which are\n"
4383"used to decode 8-bit string instances pickled by Python 2.x.\n"
4384"These default to 'ASCII' and 'strict', respectively.\n");
4385
4386static int
4387Unpickler_init(UnpicklerObject *self, PyObject *args, PyObject *kwds)
4388{
4389 static char *kwlist[] = {"file", "encoding", "errors", 0};
4390 PyObject *file;
4391 char *encoding = NULL;
4392 char *errors = NULL;
4393
4394 /* XXX: That is an horrible error message. But, I don't know how to do
4395 better... */
4396 if (Py_SIZE(args) != 1) {
4397 PyErr_Format(PyExc_TypeError,
4398 "%s takes exactly one positional argument (%zd given)",
4399 Py_TYPE(self)->tp_name, Py_SIZE(args));
4400 return -1;
4401 }
4402
4403 /* Arguments parsing needs to be done in the __init__() method to allow
4404 subclasses to define their own __init__() method, which may (or may
4405 not) support Unpickler arguments. However, this means we need to be
4406 extra careful in the other Unpickler methods, since a subclass could
4407 forget to call Unpickler.__init__() thus breaking our internal
4408 invariants. */
4409 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|ss:Unpickler", kwlist,
4410 &file, &encoding, &errors))
4411 return -1;
4412
4413 /* In case of multiple __init__() calls, clear previous content. */
4414 if (self->read != NULL)
4415 (void)Unpickler_clear(self);
4416
4417 self->read = PyObject_GetAttrString(file, "read");
4418 self->readline = PyObject_GetAttrString(file, "readline");
4419 if (self->readline == NULL || self->read == NULL)
4420 return -1;
4421
4422 if (encoding == NULL)
4423 encoding = "ASCII";
4424 if (errors == NULL)
4425 errors = "strict";
4426
4427 self->encoding = strdup(encoding);
4428 self->errors = strdup(errors);
4429 if (self->encoding == NULL || self->errors == NULL) {
4430 PyErr_NoMemory();
4431 return -1;
4432 }
4433
4434 if (PyObject_HasAttrString((PyObject *)self, "persistent_load")) {
4435 self->pers_func = PyObject_GetAttrString((PyObject *)self,
4436 "persistent_load");
4437 if (self->pers_func == NULL)
4438 return -1;
4439 }
4440 else {
4441 self->pers_func = NULL;
4442 }
4443
4444 self->stack = (Pdata *)Pdata_New();
4445 if (self->stack == NULL)
4446 return -1;
4447
4448 self->memo = PyDict_New();
4449 if (self->memo == NULL)
4450 return -1;
4451
4452 return 0;
4453}
4454
4455static PyObject *
4456Unpickler_get_memo(UnpicklerObject *self)
4457{
4458 if (self->memo == NULL)
4459 PyErr_SetString(PyExc_AttributeError, "memo");
4460 else
4461 Py_INCREF(self->memo);
4462 return self->memo;
4463}
4464
4465static int
4466Unpickler_set_memo(UnpicklerObject *self, PyObject *value)
4467{
4468 PyObject *tmp;
4469
4470 if (value == NULL) {
4471 PyErr_SetString(PyExc_TypeError,
4472 "attribute deletion is not supported");
4473 return -1;
4474 }
4475 if (!PyDict_Check(value)) {
4476 PyErr_SetString(PyExc_TypeError, "memo must be a dictionary");
4477 return -1;
4478 }
4479
4480 tmp = self->memo;
4481 Py_INCREF(value);
4482 self->memo = value;
4483 Py_XDECREF(tmp);
4484
4485 return 0;
4486}
4487
4488static PyObject *
4489Unpickler_get_persload(UnpicklerObject *self)
4490{
4491 if (self->pers_func == NULL)
4492 PyErr_SetString(PyExc_AttributeError, "persistent_load");
4493 else
4494 Py_INCREF(self->pers_func);
4495 return self->pers_func;
4496}
4497
4498static int
4499Unpickler_set_persload(UnpicklerObject *self, PyObject *value)
4500{
4501 PyObject *tmp;
4502
4503 if (value == NULL) {
4504 PyErr_SetString(PyExc_TypeError,
4505 "attribute deletion is not supported");
4506 return -1;
4507 }
4508 if (!PyCallable_Check(value)) {
4509 PyErr_SetString(PyExc_TypeError,
4510 "persistent_load must be a callable taking "
4511 "one argument");
4512 return -1;
4513 }
4514
4515 tmp = self->pers_func;
4516 Py_INCREF(value);
4517 self->pers_func = value;
4518 Py_XDECREF(tmp); /* self->pers_func can be NULL, so be careful. */
4519
4520 return 0;
4521}
4522
4523static PyGetSetDef Unpickler_getsets[] = {
4524 {"memo", (getter)Unpickler_get_memo, (setter)Unpickler_set_memo},
4525 {"persistent_load", (getter)Unpickler_get_persload,
4526 (setter)Unpickler_set_persload},
4527 {NULL}
4528};
4529
4530static PyTypeObject Unpickler_Type = {
4531 PyVarObject_HEAD_INIT(NULL, 0)
4532 "_pickle.Unpickler", /*tp_name*/
4533 sizeof(UnpicklerObject), /*tp_basicsize*/
4534 0, /*tp_itemsize*/
4535 (destructor)Unpickler_dealloc, /*tp_dealloc*/
4536 0, /*tp_print*/
4537 0, /*tp_getattr*/
4538 0, /*tp_setattr*/
4539 0, /*tp_compare*/
4540 0, /*tp_repr*/
4541 0, /*tp_as_number*/
4542 0, /*tp_as_sequence*/
4543 0, /*tp_as_mapping*/
4544 0, /*tp_hash*/
4545 0, /*tp_call*/
4546 0, /*tp_str*/
4547 0, /*tp_getattro*/
4548 0, /*tp_setattro*/
4549 0, /*tp_as_buffer*/
4550 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
4551 Unpickler_doc, /*tp_doc*/
4552 (traverseproc)Unpickler_traverse, /*tp_traverse*/
4553 (inquiry)Unpickler_clear, /*tp_clear*/
4554 0, /*tp_richcompare*/
4555 0, /*tp_weaklistoffset*/
4556 0, /*tp_iter*/
4557 0, /*tp_iternext*/
4558 Unpickler_methods, /*tp_methods*/
4559 0, /*tp_members*/
4560 Unpickler_getsets, /*tp_getset*/
4561 0, /*tp_base*/
4562 0, /*tp_dict*/
4563 0, /*tp_descr_get*/
4564 0, /*tp_descr_set*/
4565 0, /*tp_dictoffset*/
4566 (initproc)Unpickler_init, /*tp_init*/
4567 PyType_GenericAlloc, /*tp_alloc*/
4568 PyType_GenericNew, /*tp_new*/
4569 PyObject_GC_Del, /*tp_free*/
4570 0, /*tp_is_gc*/
4571};
4572
4573static int
4574init_stuff(void)
4575{
4576 PyObject *copyreg;
4577
4578 copyreg = PyImport_ImportModule("copyreg");
4579 if (!copyreg)
4580 return -1;
4581
4582 dispatch_table = PyObject_GetAttrString(copyreg, "dispatch_table");
4583 if (!dispatch_table)
4584 goto error;
4585
4586 extension_registry = \
4587 PyObject_GetAttrString(copyreg, "_extension_registry");
4588 if (!extension_registry)
4589 goto error;
4590
4591 inverted_registry = PyObject_GetAttrString(copyreg, "_inverted_registry");
4592 if (!inverted_registry)
4593 goto error;
4594
4595 extension_cache = PyObject_GetAttrString(copyreg, "_extension_cache");
4596 if (!extension_cache)
4597 goto error;
4598
4599 Py_DECREF(copyreg);
4600
4601 empty_tuple = PyTuple_New(0);
4602 if (empty_tuple == NULL)
4603 return -1;
4604
4605 two_tuple = PyTuple_New(2);
4606 if (two_tuple == NULL)
4607 return -1;
4608 /* We use this temp container with no regard to refcounts, or to
4609 * keeping containees alive. Exempt from GC, because we don't
4610 * want anything looking at two_tuple() by magic.
4611 */
4612 PyObject_GC_UnTrack(two_tuple);
4613
4614 return 0;
4615
4616 error:
4617 Py_DECREF(copyreg);
4618 return -1;
4619}
4620
4621static struct PyModuleDef _picklemodule = {
4622 PyModuleDef_HEAD_INIT,
4623 "_pickle",
4624 pickle_module_doc,
4625 -1,
4626 NULL,
4627 NULL,
4628 NULL,
4629 NULL,
4630 NULL
4631};
4632
4633PyMODINIT_FUNC
4634PyInit__pickle(void)
4635{
4636 PyObject *m;
4637
4638 if (PyType_Ready(&Unpickler_Type) < 0)
4639 return NULL;
4640 if (PyType_Ready(&Pickler_Type) < 0)
4641 return NULL;
4642 if (PyType_Ready(&Pdata_Type) < 0)
4643 return NULL;
4644
4645 /* Create the module and add the functions. */
4646 m = PyModule_Create(&_picklemodule);
4647 if (m == NULL)
4648 return NULL;
4649
4650 if (PyModule_AddObject(m, "Pickler", (PyObject *)&Pickler_Type) < 0)
4651 return NULL;
4652 if (PyModule_AddObject(m, "Unpickler", (PyObject *)&Unpickler_Type) < 0)
4653 return NULL;
4654
4655 /* Initialize the exceptions. */
4656 PickleError = PyErr_NewException("_pickle.PickleError", NULL, NULL);
4657 if (PickleError == NULL)
4658 return NULL;
4659 PicklingError = \
4660 PyErr_NewException("_pickle.PicklingError", PickleError, NULL);
4661 if (PicklingError == NULL)
4662 return NULL;
4663 UnpicklingError = \
4664 PyErr_NewException("_pickle.UnpicklingError", PickleError, NULL);
4665 if (UnpicklingError == NULL)
4666 return NULL;
4667
4668 if (PyModule_AddObject(m, "PickleError", PickleError) < 0)
4669 return NULL;
4670 if (PyModule_AddObject(m, "PicklingError", PicklingError) < 0)
4671 return NULL;
4672 if (PyModule_AddObject(m, "UnpicklingError", UnpicklingError) < 0)
4673 return NULL;
4674
4675 if (init_stuff() < 0)
4676 return NULL;
4677
4678 return m;
4679}